def add_duet(self, text, part, percentage=1.0): tts = TTS(Voice(Voice.Language.enUS, Voice.Sex.male, "Joey")) aud, dr = sf.read(tts.speak(text)) aud = aud.repeat(2, axis=0) dr = dr * 2 one_part_length = len(self.final_audio) / self.times if one_part_length < len(aud): print('Warning: Duet is longer than the beat!') start = int(one_part_length * part) length = len(aud) self.final_audio[start:start + length:] += aud * percentage return self.final_audio, self.sr
class WindowWidget(QtWidgets.QWidget): def __init__(self, voice): super(WindowWidget, self).__init__() self.sess = None self.captioner = None self.prepare_questioner() self.prepare_captioner() self.threadpool = QtCore.QThreadPool() self.questioner_running = False self.applying_output = False global graph graph = tf.get_default_graph() if voice: self.tts = TTS() else: self.tts = None # Viewing region self.viewing_region = QtWidgets.QLabel(self) layout = QtWidgets.QHBoxLayout() layout.addWidget(self.viewing_region) # Load button self.load_button = QtWidgets.QPushButton('Load image') self.load_button.clicked.connect(self.load_button_clicked) right_sidebar = QtWidgets.QVBoxLayout() right_sidebar.addWidget(self.load_button) # Extra instructions region self.instr_region = QtWidgets.QLabel(self) self.instr_region.setText('Or drop an image onto this window.') right_sidebar.addWidget(self.instr_region) right_sidebar.addStretch() # Progress bar self.progress = QtWidgets.QProgressBar(self) self.progress.setMaximum(100) right_sidebar.addWidget(self.progress) self.progress.hide() # Text region self.text_region = QtWidgets.QLabel(self) self.text_region.setFrameStyle(QtWidgets.QFrame.Panel | QtWidgets.QFrame.Sunken) self.text_region.setWordWrap(True) self.text_region.setMargin(8) self.text_region.setText('...') right_sidebar.addWidget(self.text_region) layout.addLayout(right_sidebar) # Launch self.setWindowIcon(QtGui.QIcon(os.path.join('icon', 'question.png'))) self.setLayout(layout) self.setAcceptDrops(True) self.show() def prepare_questioner(self): self.sess = gpt2.start_tf_sess() gpt2.load_gpt2(self.sess) def prepare_captioner(self): config = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader) checkpoint_path = os.path.join(config['project_root_dir'], config['checkpoint_path']) vocab_file_path = os.path.join(config['project_root_dir'], config['vocab_file_path']) self.captioner = Captioner(self.sess, checkpoint_path, vocab_file_path) def load_button_clicked(self): if self.questioner_running or self.applying_output: print("Can't load an image right now. Questioner is busy.") else: image_path, _ = QtWidgets.QFileDialog.getOpenFileName( self, 'Open file') if image_path: self.load_image(image_path) def load_image(self, image_path): pixmap = QtGui.QPixmap(image_path) pixmap = pixmap.scaled(500, 500, QtCore.Qt.KeepAspectRatio) self.viewing_region.setPixmap(pixmap) self.text_region.setText('Questioner is working.') self.adjustSize() self.questioner_running = True worker = Worker(self.run_questioner, image_path) worker.signals.finished.connect(self.questioner_finished) worker.signals.error.connect(self.questioner_failed) worker.signals.result.connect(self.apply_questioner_output) self.threadpool.start(worker) def run_questioner(self, image_path): global graph with graph.as_default(): # this is run on a separate thread caption = self.captioner.caption(image_path) questions = gpt2_gen_questions(self.sess, caption, nsamples=1, temperature=0.7) return questions[0] if len(questions) > 0 else '' def questioner_finished(self): self.questioner_running = False def questioner_failed(self, e): print(e) def apply_questioner_output(self, question): self.applying_output = True if len(question) > 0: self.text_region.setText(question) if self.tts: self.progress.show() self.tts.speak(question, self.tts_callback) self.progress.hide() self.applying_output = False def tts_callback(self, i, seq_len, batch_size, gen_rate): percentage = i * 100 / seq_len self.progress.setValue(percentage) def dragEnterEvent(self, evt): if evt.mimeData().hasUrls: evt.accept() else: evt.ignore() def dragMoveEvent(self, evt): if evt.mimeData().hasUrls: evt.accept() else: evt.ignore() def dropEvent(self, evt): if evt.mimeData().hasUrls \ and not self.questioner_running \ and not self.applying_output: evt.setDropAction(QtCore.Qt.CopyAction) evt.accept() for url in evt.mimeData().urls(): if op_sys == 'Darwin': image_path = str( NSURL.URLWithString_(str( url.toString())).filePathURL().path()) else: image_path = str(url.toLocalFile()) self.load_image(image_path) else: evt.ignore()
from tts import TTS from asr import ASR TTS.speak(ASR.listen())
choice = input( 'Would you like to suggest a topic or should i pick from samples? ') if choice in 'sample': choice = input('Choose from samples: \n 1-) Eminem \n 2-) 50 Cent') if choice == '1': lyrics = eminem elif choice == '2': lyrics = fifty_cent else: limit = input("Set a line limit: ") lyrics = crawl_lyrics(choice)[:int(limit)] ap = AudioProcessing() for text in lyrics: text = prepare_text(text) tts = TTS(Voice(Voice.Language.enUS, Voice.Sex.male, "Justin")) ap.modify(tts.speak(text), 0.1, -1, 1, mid_part=0.05, mid_pitch=2, mid_stretch=1, accel=1.0) aud, br = ap.insert_beat() aud, br = ap.add_duet('Drop the beat DJ!', 0, 2) aud, br = ap.add_duet('Aha. Yeah. Aha!', 3, 0.5) ap.write(aud, br, name=choice)