def load(self): # for user in self.userdata: # if self.userdata.index(user) == self.Userchooser.currentIndex() - 1: # file_name = user[0] # break userindex = self.Userchooser.currentIndex() - 1 if (userindex > -1): file_name = self.userdata[userindex][0] #self.enroll_checklist() if file_name == None: self.status("no user") return fname = "gmms/" + file_name + ".model" #fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: if self.backend == None: try: self.backend = ModelInterface() self.backend.load(fname, file_name) except Exception as e: self.warn(str(e)) else: try: self.backend.load(fname, file_name) except Exception as e: self.warn(str(e)) self.status("loaded model " + file_name) else: self.status("Please select user.")
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print "No valid directory found!" sys.exit(1) training_stats = [] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print "No wav file found in {0}".format(d) continue print "Label '{0}' has files: {1}".format(label, ', '.join(wavs)) total_len = 0 for wav in wavs: fs, signal = read_wav(wav) print " File '{}' has frequency={} and length={}".format( wav, fs, len(signal)) total_len += len(signal) m.enroll(label, fs, signal) training_stats.append((label, total_len)) print "--------------------------------------------" for label, total_len in training_stats: print "Total length of training data for '{}' is {}".format( label, total_len) print "For best accuracy, please make sure all labels have similar amount of training data!" m.train() m.dump(output_model)
def task_enroll(input_directory, output_model): m = ModelInterface() for k in input_directory.strip().split(): input_dirs = [os.path.expanduser(k)] for d in input_directory: dirs = itertools.chain(*(glob.glob(d))) for d in dirs: if os.path.isdir(d): dirs = [d] files = [] if len(dirs) == 0: print "No valid directory found!" sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print "No wav file found in {0}".format(d) continue print "Label {0} has files {1}".format(label, ','.join(wavs)) for wav in wavs: fs, signal = read_wav(wav) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print ("No valid directory found!") sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) #label =d wavs = glob.glob(d + '/*.wav') #print(wavs) if len(wavs) == 0: print ("No wav file found in {0}".format(d)) continue print ("Label {0} has files {1}".format(label, ','.join(wavs))) for wav in wavs: fs, signal = read_wav(wav) label=wav.split('/')[-1].split('_')[0] #print(label) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def task_predict(input_file, input_model): # FS = 8000 m = ModelInterface.load(input_model) fs_noise, noise = read_wav("noise.wav") m.init_noise(fs_noise, noise) #for f in [input_files]: try: fs, signal = read_wav(input_file) print("freq " + str(fs)) signal = m.filter(fs, signal) print("len " + str(len(signal))) if len(signal) < 50: return None print("AA") label = m.predict(fs, signal) print input_file, '->', label return label except: print "Unexpected error:", sys.exc_info()[0]
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label = m.predict(fs, signal) f = os.path.basename(f) print f, '->', label
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): wavs = glob.glob(f + '/*.wav') for wav in wavs: fs, signal = read_wav(wav) label = m.predict(fs, signal) print (wav.split('/')[-1].split('_')[0]+'|->|'+label)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print "No valid directory found!" sys.exit(1) training_stats = [] for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print "No wav file found in {0}".format(d) continue print "Label '{0}' has files: {1}".format(label, ', '.join(wavs)) total_len = 0 for wav in wavs: fs, signal = read_wav(wav) print " File '{}' has frequency={} and length={}".format(wav, fs, len(signal)) total_len += len(signal) m.enroll(label, fs, signal) training_stats.append((label, total_len)) print "--------------------------------------------" for label, total_len in training_stats: print "Total length of training data for '{}' is {}".format(label, total_len) print "For best accuracy, please make sure all labels have similar amount of training data!" m.train() m.dump(output_model)
def task_predict(input_model): m = ModelInterface.load(input_model) test_data = range(1,4) for f in test_data: wav = "./data/test/"+str(f)+".wav" print wav fs, signal = read_wav(wav) label = m.predict(fs, signal) print f, '->', label
def task_predict(input_model): m = ModelInterface.load(input_model) test_data = range(1, 4) for f in test_data: wav = "./data/test/" + str(f) + ".wav" print wav fs, signal = read_wav(wav) label = m.predict(fs, signal) print f, '->', label
def task_enroll(output_model): m = ModelInterface() train_data = range(1, 4) for i in train_data: wav = "./data/train/" + str(i) + ".wav" print wav label = str(i) fs, signal = read_wav(wav) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) #label = m.predict(fs, signal) (label, score) = m.predict_with_score(fs, signal) if math.fabs(score) < 0.13: print f, '->', label, ' score:', score else: print f, 'NO SPEAKER RECOGNIZED (', label, ' score:', score, ')'
def task_predict(input_files, input_model, index): m = ModelInterface.load(input_model) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label = m.predict(fs, signal) print f, '->', label if label == index: sys.exit(1) else: sys.exit(2)
def load_check_box(self, label): fname = "gmms/" + label + ".model" if fname == None: self.status("no user") return #fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: if self.backend == None: try: self.backend = ModelInterface() self.backend.load(fname, label) except Exception as e: self.warn(str(e)) else: try: self.backend.load(fname, label) except Exception as e: self.warn(str(e)) self.status("loaded all")
def handle_speaker_rec_greeting_intent(self, message): directory = "/tmp/mycroft_wake_words" self.newest = max(glob.iglob(os.path.join(directory, '*.wav')), key=os.path.getctime) CWD_PATH = os.path.dirname(__file__) input_model = os.path.join(CWD_PATH, "model.out") m = ModelInterface.load(input_model) input_files = self.newest fs, signal = read_wav(input_files) label = m.predict(fs, signal) self.speak("Yes, I do recognize your voice, %s" % (label)) print(self.newest)
def task_enroll(output_model): m = ModelInterface() train_data = range(1,4) for i in train_data: wav = "./data/train/"+str(i)+".wav" print wav label = str(i) fs, signal = read_wav(wav) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) ev = [] for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) label = m.predict(fs, signal) ev.append(label) print f, '->', label size = len(ev) cnt = Counter(ev) mostCommon = cnt.most_common(1)[0][1] print(mostCommon) print(mostCommon * 100 / size)
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = input_dirs.strip().split() dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] for d in dirs: label = os.path.basename(d) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: continue print "Label {0} has files {1}".format(label, ','.join(wavs)) for wav in wavs: fs, signal = wavfile.read(wav) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def task_predict(input_files, input_current, input_model): m = ModelInterface.load(input_model) with open('/host{0}/predictions.csv'.format(input_current), 'w') as pred: ss = csv.writer(pred, delimiter=',') ss.writerow(['score', 'label', 'file']) for f in glob.glob(os.path.expanduser(input_files)): fs, signal = read_wav(f) scores = m.predict_scores(fs, signal) y_scores = dict(zip(m.gmmset.y, scores)) i = 0 for label, score in sorted(y_scores.items(), key=lambda o: o[1], reverse=True): if i == 0 and label in [ 'Albert', 'Casado', 'Pedro', 'Iglesias' ]: ss.writerow([score, label, f]) i += 1
def task_enroll(input_dirs, output_model): m = ModelInterface() input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print "No valid directory found!" sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0: print "No wav file found in {0}".format(d) continue print "Label {0} has files {1}".format(label, ','.join(wavs)) for wav in wavs: fs, signal = read_wav(wav) m.enroll(label, fs, signal) m.train() m.dump(output_model)
def sendData(link, data, timestart): global pause global voiceMode global fail global rawYData global e time = datetime.datetime.now() #print(time.strftime('%Y-%m-%d %H:%M:%S')) jsonData = { 'wave': data.tolist(), 'sr': 16000, 'time_start': time.strftime('%Y-%m-%d %H:%M:%S'), 'user_id_id': 1 } jsonData = json.dumps(jsonData, sort_keys=True) r = requests.post(url=link, data=jsonData) print("The response is:%s" % r.text) trigger = json.loads(r.text)["Trigger"] labelsound = str(json.loads(r.text)["Labels"]) #trigger = r.json() #trigger = trigger[0]['Trigger'] print trigger if voiceMode and trigger == True: voiceMode = False pause = True print 'pause' model = ModelInterface.load("m.out") #fs,array = read_wav("start.mp3") #sd.play(array,fs) mixer.init(16000) mixer.music.load('start.mp3') mixer.music.play() ti.sleep(2) print "Recording Voice..." samplerate = 44100 # Hertz duration = 5 #8s predictData = sd.rec(int(samplerate * duration), samplerate=samplerate, channels=1, blocking=True) signal = np.array(predictData).flatten() #VAD filter #------- label = model.predict(samplerate, signal) if label == 'unknown': mixer.music.load('fail.mp3') mixer.music.play() ti.sleep(3) print "Recording Voice..." predictData = sd.rec(int(samplerate * duration), samplerate=samplerate, channels=1, blocking=True) signal = np.array(predictData).flatten() label1 = model.predict(samplerate, signal) #print label1 if label1 == "unknown": fail = True mixer.music.load('fail.mp3') mixer.music.play() ti.sleep(3) notifyFailer(labelsound) if (fail == False): mixer.music.load('success.mp3') mixer.music.play() ti.sleep(30) #predictData = np.asarray(predictData,dtype="float32") #write_wav("test.wav",samplerate,signal) #sf.write("test.wav", signal, samplerate) print "done" rawYData = [] e = 0 fail = False pause = False voiceMode = True
from pandas import DataFrame, Series from emitStack import EmitStack import os import numpy as np import sys sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'gui')) from gui.interface import ModelInterface modelName = "model.out" model = ModelInterface( ) if not os.path.isfile(modelName) else ModelInterface.load(modelName) modelDict = {} train_second = 2 participantDict = {} bufferDict = {} stack = EmitStack(2) train_stack = EmitStack(2) dataPathDict = {} room = [] OUTPUTPATH = 'Output/'
def handleAction(action): if action['type'] == "AUDIO": label = action['label'] sampleRate = action['sampleRate'] audio = action['audio'] length = action['length'] groupid = action['groupid'] participantid = action['participantid'] if groupid not in modelDict.keys(): modelDict[groupid] = ModelInterface() if groupid not in bufferDict.keys(): bufferDict[groupid] = EmitStack(train_second) if groupid not in participantDict.keys(): participantDict[groupid] = {} if participantid not in participantDict[groupid].keys(): participantDict[groupid][participantid] = label audioArr = np.empty(length) for key, value in audio.iteritems(): audioArr[int(key)] = value bufferDict[groupid].extend(audioArr, length) if bufferDict[groupid].canEmit(sampleRate): emitArr = bufferDict[groupid].emitLabel() modelDict[groupid].enroll(participantid, sampleRate, emitArr) print audioArr elif action['type'] == "TRAIN_GROUP": groupid = action['groupid'] if groupid in bufferDict.keys(): bufferDict[groupid].emitLabel() if groupid in modelDict.keys(): modelDict[groupid].train() print "Finish training" else: print "No model with group id {0}".format(groupid) elif action['type'] == "STARTOVER": groupid = action['groupid'] modelDict.pop(groupid, None) bufferDict.pop(groupid, None) participantDict.pop(groupid, None) print "group id {0} starts over".format(groupid) elif action['type'] == "FINISH": groupid = action['groupid'] modelDict.pop(groupid, None) bufferDict.pop(groupid, None) participantDict.pop(groupid, None) dataPathDict.pop(groupid, None) print "group id {0} finish".format(groupid) elif action['type'] == "PREDICT": sampleRate = action['sampleRate'] audio = action['audio'] length = action['length'] groupid = action['groupid'] audioArr = np.empty(length) for key, value in audio.iteritems(): audioArr[int(key)] = value predict_label = "N/A" if groupid in bufferDict.keys() and groupid in modelDict.keys( ) and groupid in participantDict.keys(): bufferDict[groupid].extend(audioArr, length) if bufferDict[groupid].canEmit(sampleRate): emitArr = bufferDict[groupid].emitLabel() voiceThreshold = 0.02 if np.mean(emitArr[emitArr > 0]) > voiceThreshold: label = modelDict[groupid].predict(sampleRate, emitArr) emitSecond = stack.emitHeight if label in participantDict[groupid].keys(): print participantDict[groupid][label] predict_label = label emit( 'data', { 'type': 'DATA', 'label': participantDict[groupid][label], 'second': emitSecond }) else: print "No one is speaking" if not "csvFile" in dataPathDict[groupid].keys(): dataPathDict[groupid]["csvFile"] = DataFrame( columns=('group id', 'time', 'participant id', 'condition', 'meeting', 'date')) tempDict = dataPathDict[groupid] tempDict["csvFile"].loc[len(tempDict["csvFile"])] = [ groupid, tempDict['time'], predict_label, tempDict['condition'], tempDict['meeting'], tempDict['date'] ] tempDict['time'] = tempDict['time'] + train_second else: print "Group {0} is not registerd".format(groupid) if not "soundFile" in dataPathDict[groupid].keys(): sound_format = Format('wav') dataPathDict[groupid]["soundFile"] = Sndfile( dataPathDict[groupid]["soundPath"], 'w', sound_format, 1, sampleRate) dataPathDict[groupid]["soundFile"].write_frames(audioArr) elif action['type'] == "OPEN_MEETING": groupid = action['groupid'] condition = action['condition'] meeting = action['meeting'] nowString = datetime.now().strftime('%Y%m%d%H%M%S') nowFormat = datetime.now().strftime('%Y-%m-%d %H:%M:%S') if not os.path.exists(OUTPUTPATH): os.mkdir(OUTPUTPATH) groupPath = os.path.join(OUTPUTPATH, groupid + '/') if not os.path.exists(groupPath): os.mkdir(groupPath) meetingPath = os.path.join(groupPath, meeting + '/') if not os.path.exists(meetingPath): os.mkdir(meetingPath) filename = groupid + '-' + nowString if groupid not in dataPathDict.keys(): dataPathDict[groupid] = {} dataPathDict[groupid]['soundPath'] = os.path.join( meetingPath, filename + '.wav') dataPathDict[groupid]['csvPath'] = os.path.join( meetingPath, filename + '.csv') dataPathDict[groupid]['condition'] = condition dataPathDict[groupid]['meeting'] = meeting dataPathDict[groupid]['date'] = nowFormat dataPathDict[groupid]['time'] = 0 elif action['type'] == "CLOSE_MEETING": groupid = action['groupid'] dataPathDict[groupid]['soundFile'].close() dataPathDict[groupid].pop('soundFile', None) print "Sound file finish recorded" dataPathDict[groupid]['csvFile'].to_csv( path_or_buf=dataPathDict[groupid]['csvPath']) dataPathDict[groupid].pop('csvFile', None) print "CSV finish recorded" elif action['type'] == "REGISTER_GROUP": groupid = action['groupid'] join_room(groupid) print "{0} is register".format(groupid) elif action['type'] == "REGISTER": user = action['user'] room.append(user) join_room(user) print "{0} is register".format(user) elif action['type'] == "LEAVE": user = action['user'] leave_room(user) room.remove(user) print "{0} leaves".format(user) else: print "This action is not handled yet"
def task_predict(input_files, input_model): m = ModelInterface.load(input_model) for f in glob.glob(input_files): fs, signal = wavfile.read(f) label = m.predict(fs, signal) print f, '->', label
from pandas import DataFrame, Series from emitStack import EmitStack import os import numpy as np import sys sys.path.append(os.path.join( os.path.dirname(os.path.realpath(__file__)), 'gui')) from gui.interface import ModelInterface modelName = "model.out" model = ModelInterface() if not os.path.isfile(modelName) else ModelInterface.load(modelName) modelDict = {} train_second = 2 participantDict = {} bufferDict = {} stack = EmitStack(2) train_stack = EmitStack(2) dataPathDict = {} room = [] OUTPUTPATH = 'Output/'
class Main(QMainWindow): CONV_INTERVAL = 0.2 CONV_DURATION = 0.5 SMALL_TRESHOLD = 0.6 CONV_FILTER_DURATION = CONV_DURATION FS = 8000 TEST_DURATION = 3 def google_service_producer(self): ###speech to text credentials = GoogleCredentials.get_application_default( ).create_scoped(['https://www.googleapis.com/auth/cloud-platform']) http = httplib2.Http() credentials.authorize(http) return discovery.build('speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL) ### #checkbox def show_checkbox(self): self.available_to_enroll = QGroupBox("users") self.vbox = QVBoxLayout() for user in self.userdata: print(user) checkbox = QtGui.QCheckBox(str(user[0])) checkbox.setGeometry(QtCore.QRect(50, 390, 71, 21)) self.vbox.addWidget(checkbox) self.available_to_enroll.setLayout(self.vbox) def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("gui/edytor.ui", self) self.last_none_start = 0 self.last_switch_user = 0 self.last_enter_none_handler = False self.last_user_detected = 'None' self.statusBar() self.check_result = False self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.AddUser.clicked.connect(self.add_user) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata = [] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"gui/image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"gui/image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "gui/image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('gui/avatar/') #quick enroll self.show_checkbox() self.checkbox.setWidget(self.available_to_enroll) self.LoadAll.clicked.connect(self.enroll_checklist) #Conversation Mode Variables self.conv_record = np.array([], dtype=NPDtype) self.time_init = QTimer(self) self.current_label = None # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.generateTranscript.clicked.connect(self.generate_transcript) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = read_wav("bg.wav") self.backend.init_noise(fs, signal) except: pass # def ##check enroll def enroll_checklist(self): all_check_box = self.available_to_enroll.findChildren(QtGui.QCheckBox) for check_box in all_check_box: # print(str(check_box.text())+" : "+str(check_box.isChecked())) if (check_box.isChecked()): self.load_check_box(str(check_box.text())) def load_check_box(self, label): fname = "gmms/" + label + ".model" if fname == None: self.status("no user") return #fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: if self.backend == None: try: self.backend = ModelInterface() self.backend.load(fname, label) except Exception as e: self.warn(str(e)) else: try: self.backend.load(fname, label) except Exception as e: self.warn(str(e)) self.status("loaded all") ############ RECORD def start_record(self): self.pyaudio = pyaudio.PyAudio() self.status("Recording...") self.movie.start() self.Alading.setPixmap(QPixmap(u"gui/image/a_thinking.png")) self.recordData = [] self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=Main.FS, input=True, frames_per_buffer=1) self.stopped = False self.reco_th = RecorderThread(self) self.reco_th.start() self.timer.start(1000) self.record_time = 0 self.update_all_timer() def add_record_data(self, i): self.recordData.append(i) return self.stopped def timer_callback(self): self.record_time += 1 self.status("Recording..." + time_str(self.record_time)) self.update_all_timer() def stop_record(self): self.movie.stop() self.stopped = True self.reco_th.wait() self.timer.stop() self.stream.stop_stream() self.stream.close() self.pyaudio.terminate() self.status("Record stopped") ############## conversation def start_conv_record(self): if not (self.check_result): path = 'speech/' queue = 'queue/' files = glob.glob(path + queue + '*.wav') for file in files: os.remove(file) progress = 'progress/' files = glob.glob(path + progress + '*.wav') for file in files: os.remove(file) done = 'done/' files = glob.glob(path + done + '*.wav') for file in files: os.remove(file) result = 'result/' files = glob.glob(path + result + '*.wav') for file in files: os.remove(file) self.conv_result_list = [] self.start_record() self.recording_id = 0 self.recording_result = 0 self.conv_now_pos = 0 self.conv_timer = QTimer(self) self.conv_timer.timeout.connect(self.do_conversation) self.conv_timer.start(Main.CONV_INTERVAL * 1000) #reset self.graphwindow.wid.reset() self.conv_threading = TranscriptThread(self) self.conv_threading.start() self.check_result = True else: pass def stop_conv(self): self.recording_id = 0 self.stop_record() self.conv_timer.stop() self.check_result = False def do_conversation(self): interval_len = int(Main.CONV_INTERVAL * Main.FS) segment_len = int(Main.CONV_DURATION * Main.FS) self.conv_now_pos += interval_len to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]):self.conv_now_pos] signal = np.array(to_filter, dtype=NPDtype) label = 'None' try: signal = self.backend.filter(Main.FS, signal) if len(signal) > 50: label = self.backend.predict(Main.FS, signal) # if ((label!=self.current_label)&(label!=None)): # write_wav("try.wav",Main.FS,self.conv_record) # self.conv_record=np.array([], dtype=NPDtype) # self.current_label=label # self.conv_record=np.concatenate((self.conv_record,signal),axis=0) # print(self.conv_record.shape) # else: # self.conv_record=np.concatenate((self.conv_record,signal),axis=0) except Exception as e: print(traceback.format_exc()) print(str(e)) global last_label_to_show label_to_show = label directory = 'speech/queue/' if label and len(self.conv_result_list) != 0: last_label = self.conv_result_list[-1] if last_label and last_label != label: label_to_show = last_label_to_show if (last_label != label) and (last_label != 'None'): file_name = str( self.recording_id) + '_' + last_label + '_' + str( self.last_switch_user / Main.FS) + '_' + str( self.conv_now_pos / Main.FS) + '.wav' print(file_name) write_wav( directory + file_name, Main.FS, self.backend.filter( Main.FS, np.array( self.recordData[self.last_switch_user - segment_len:self.conv_now_pos], dtype=NPDtype))) threading.Thread(target=SpeechToText, args=( file_name, 'speech', 'queue', 'done', 'progress', 'result', self.google_service_producer(), )).start() self.recording_id += 1 if (last_label != label): self.last_switch_user = self.conv_now_pos self.conv_result_list.append(label) print(label_to_show, "label to show") last_label_to_show = label_to_show #ADD FOR GRAPH if label_to_show is None: label_to_show = 'Nobody' if len(NAMELIST) and NAMELIST[-1] != label_to_show: NAMELIST.append(label_to_show) self.convUsername.setText(label_to_show) self.Alading_conv.setPixmap(QPixmap(u"gui/image/a_result.png")) self.convUserImage.setPixmap(self.get_avatar(label_to_show)) # print to transcript area # if (last_label!=label) and (label!='None'): # self.TranscriptArea.append(str(label)) def generate_transcript(self): fname = QFileDialog.getSaveFileName(self, "Save Transcript Result", "") if not fname: return f = open(fname, 'w') # print('halo'+self.TranscriptArea.toPlainText()) f.write(self.TranscriptArea.toPlainText()) ###### RECOGNIZE def start_reco_record(self): self.Alading.setPixmap(QPixmap(u"gui/image/a_hello.png")) self.recoRecordData = np.array((), dtype=NPDtype) self.start_record() def stop_reco_record(self): self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) self.reco_remove_update(Main.FS, signal) def reco_do_predict(self, fs, signal): label = self.backend.predict(fs, signal) if not label: label = "Nobody" print(label) self.recoUsername.setText(label) self.Alading.setPixmap(QPixmap(u"gui/image/a_result.png")) self.recoUserImage.setPixmap(self.get_avatar(label)) # TODO To Delete write_wav('reco.wav', fs, signal) def reco_remove_update(self, fs, signal): new_signal = self.backend.filter(fs, signal) print("After removed: {0} -> {1}".format(len(signal), len(new_signal))) self.recoRecordData = np.concatenate((self.recoRecordData, new_signal)) real_len = float(len( self.recoRecordData)) / Main.FS / Main.TEST_DURATION * 100 if real_len > 100: real_len = 100 self.reco_do_predict(fs, self.recoRecordData) def reco_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") print('reco_file') if not fname: return self.status(fname) fs, signal = read_wav(fname) self.reco_do_predict(fs, signal) def reco_files(self): fnames = QFileDialog.getOpenFileNames(self, "Select Wav Files", "", "Files (*.wav)") print('reco_files') for f in fnames: fs, sig = read_wav(f) newsig = self.backend.filter(fs, sig) label = self.backend.predict(fs, newsig) print(f, label) ########## ENROLL def start_enroll_record(self): self.enrollWav = None self.enrollFileName.setText("") self.start_record() def enroll_file(self): fname = QFileDialog.getOpenFileName(self, "Open Wav File", "", "Files (*.wav)") if not fname: return self.status(fname) self.enrollFileName.setText(fname) fs, signal = read_wav(fname) signal = monophonic(signal) self.enrollWav = (fs, signal) def stop_enroll_record(self): self.stop_record() print(self.recordData[:300]) signal = np.array(self.recordData, dtype=NPDtype) self.enrollWav = (Main.FS, signal) # TODO To Delete write_wav('enroll.wav', *self.enrollWav) new_signal = self.backend.filter(*self.enrollWav) if (not len(new_signal) == 0): userindex = self.Userchooser.currentIndex() - 1 if (userindex > -1): u = self.userdata[userindex] i = 1 while (os.path.exists('suara/' + u[0] + '/' + str(i) + '.wav')): i = i + 1 shutil.copy2('enroll.wav', 'suara/' + u[0] + '/' + str(i) + '.wav') else: self.status("Please select user.") else: self.status("Input is silent.") def do_enroll(self): name = self.Username.text() if not name: self.warn("Please Input Your Name") return # self.addUserInfo() # new_signal = self.backend.filter(*self.enrollWav) # print ("After removed: {0} -> {1}".format(len(self.enrollWav[1]), len(new_signal))) # print ("Enroll: {:.4f} seconds".format(float(len(new_signal)) / Main.FS)) # if len(new_signal) == 0: # print( "Error! Input is silent! Please enroll again") # return path = 'suara/' + str(name) + '/' wavs = glob.glob(path + '*.wav') for wav in wavs: fs, signal = read_wav(wav) new_signal = self.backend.filter(fs, signal) self.backend.enroll(name, fs, new_signal) def start_train(self): self.status("Training...") self.backend.train() self.status("Training Done.") ####### UI related def getWidget(self, splash): t = QtCore.QElapsedTimer() t.start() while (t.elapsed() < 800): showing = "times = " + str(t.elapsed()) splash.showMessage(showing) QtCore.QCoreApplication.processEvents() def upload_avatar(self): fname = QFileDialog.getOpenFileName(self, "Open JPG File", "", "File (*.jpg)") if not fname: return self.avatarname = fname self.Userimage.setPixmap(QPixmap(fname)) userindex = self.Userchooser.currentIndex() - 1 if (userindex > -1): u = self.userdata[userindex] shutil.copy2(fname, 'gui/avatar/' + u[0] + '.jpg') else: self.status("Please select user.") def loadUsers(self): with open("gui/avatar/metainfo.txt") as db: for line in db: tmp = line.split() self.userdata.append(tmp) self.Userchooser.addItem(tmp[0]) newpath = 'suara/' + tmp[0] if not (os.path.exists(newpath)): os.makedirs(newpath) def showUserInfo(self): for user in self.userdata: if self.userdata.index( user) == self.Userchooser.currentIndex() - 1: self.Username.setText(user[0]) self.Userage.setValue(int(user[1])) if user[2] == 'F': self.Usersex.setCurrentIndex(1) else: self.Usersex.setCurrentIndex(0) if (os.path.exists('gui/avatar/' + str(user[0]) + '.jpg')): self.Userimage.setPixmap( QPixmap('gui/avatar/' + str(user[0]) + '.jpg')) else: self.Userimage.setPixmap(QPixmap("gui/image/nouser.jpg")) def updateUserInfo(self): userindex = self.Userchooser.currentIndex() - 1 if (userindex > -1): u = self.userdata[userindex] u[0] = str(self.Username.displayText()) u[1] = self.Userage.value() if self.Usersex.currentIndex(): u[2] = 'F' else: u[2] = 'M' with open("gui/avatar/metainfo.txt", "w") as db: for user in self.userdata: for i in range(3): db.write(str(user[i]) + " ") db.write("\n") else: self.status("Please select user.") def writeuserdata(self): with open("gui/avatar/metainfo.txt", "w") as db: for user in self.userdata: print(len(user)) for i in range(0, len(user)): db.write(str(user[i]) + " ") db.write("\n") def clearUserInfo(self): # self.Username.setText("") # self.Userage.setValue(0) # self.Usersex.setCurrentIndex(0) # self.Userimage.setPixmap(self.defaultimage) # try: print(self.Username.displayText()) for user, index in zip(self.userdata, range(0, len(self.userdata))): print(user) if self.userdata[index][0] == self.Username.displayText(): self.userdata.pop(index) self.Userchooser.removeItem(index + 1) self.vbox.removeWidget(index) break self.writeuserdata() # checkbox = QtGui.QCheckBox(str(newuser[0])) # checkbox.setGeometry(QtCore.QRect(50,390,71,21)) # self.vbox.addWidget(checkbox) # self.available_to_enroll.setLayout(self.vbox) self.status('not found user') return # self.userdata.remove(self.Username.displayText()) # except: # self.status('the user is not exist') def addUserInfo(self): for user in self.userdata: if user[0] == str(self.Username.displayText()): return if len(self.userdata) < 10: newuser = [] newuser.append(str(self.Username.displayText())) newuser.append(self.Userage.value()) if self.Usersex.currentIndex(): newuser.append('F') else: newuser.append('M') if self.avatarname: shutil.copy(self.avatarname, 'gui/avatar/' + user[0] + '.jpg') self.userdata.append(newuser) self.writeuserdata() self.Userchooser.addItem(str(self.Username.displayText())) # checkbox = QtGui.QCheckBox(str(newuser[0])) # checkbox.setGeometry(QtCore.QRect(50,390,71,21)) # self.vbox.addWidget(checkbox) # self.available_to_enroll.setLayout(self.vbox) # self.show_checkbox() else: self.status('you cannot add more user') return ############# UTILS def warn(self, s): QMessageBox.warning(self, "Warning", s) def status(self, s=""): self.statusBar().showMessage(s) def update_all_timer(self): s = time_str(self.record_time) self.enrollTime.setText(s) self.recoTime.setText(s) self.convTime.setText(s) def dump(self): #fname = QFileDialog.getSaveFileName(self, "Save Data to:", "", "") fname = "gmms/" if fname: try: self.backend.dump(fname) except Exception as e: self.warn(str(e)) else: self.status("Dumped to file: " + fname) def load(self): # for user in self.userdata: # if self.userdata.index(user) == self.Userchooser.currentIndex() - 1: # file_name = user[0] # break userindex = self.Userchooser.currentIndex() - 1 if (userindex > -1): file_name = self.userdata[userindex][0] #self.enroll_checklist() if file_name == None: self.status("no user") return fname = "gmms/" + file_name + ".model" #fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "") if fname: if self.backend == None: try: self.backend = ModelInterface() self.backend.load(fname, file_name) except Exception as e: self.warn(str(e)) else: try: self.backend.load(fname, file_name) except Exception as e: self.warn(str(e)) self.status("loaded model " + file_name) else: self.status("Please select user.") def noise_clicked(self): self.recording_noise = not self.recording_noise if self.recording_noise: self.noiseButton.setText('Stop Recording Noise') self.start_record() else: self.noiseButton.setText('Recording Background Noise') self.stop_record() signal = np.array(self.recordData, dtype=NPDtype) wavfile.write("bg.wav", Main.FS, signal) self.backend.init_noise(Main.FS, signal) def load_noise(self): fname = QFileDialog.getOpenFileName(self, "Open Data File:", "", "Wav File (*.wav)") if fname: fs, signal = read_wav(fname) self.backend.init_noise(fs, signal) def add_user(self): u = [] u.append(str(self.Username.displayText())) u.append(self.Userage.value()) if self.Usersex.currentIndex(): u.append('F') else: u.append('M') valid = True for user in self.userdata: if (user[0] == u[0]): valid = False if ((u[0] != '') & (u[1] != 0)): if (valid): with open("gui/avatar/metainfo.txt", "a") as db: for i in range(3): db.write(str(u[i]) + " ") db.write("\n") newpath = 'suara/' + u[0] if not (os.path.exists(newpath)): os.makedirs(newpath) self.userdata.append(u) self.Userchooser.addItem(u[0]) else: self.status("The username has already been taken.") else: self.status("Please fill the form.") def load_avatar(self, dirname): self.avatars = {} for f in glob.glob(dirname + '/*.jpg'): name = os.path.basename(f).split('.')[0] print(f, name) self.avatars[name] = QPixmap(f) def get_avatar(self, username): fname = 'gui/avatar/' + username + '.jpg' if (fname != None): return QPixmap(fname) else: return QPixmap(self.defaultimage) def printDebug(self): for name, feat in self.backend.features.iteritems(): print(name, len(feat)) print("GMMs") print(len(self.backend.gmmset.gmms)) '''
parser.add_argument('-t', '--task', help='Task to do. Either "enroll" or "predict"', required=True) parser.add_argument('-i', '--input', help='Input Files(to predict) or Directories(to enroll)', required=True) parser.add_argument('-m', '--model', help='Model file to save(in enroll) or use(in predict)', required=True) ret = parser.parse_args() return ret m_enroll = ModelInterface() def task_enroll(input_dirs, output_model): input_dirs = [os.path.expanduser(k) for k in input_dirs.strip().split()] dirs = itertools.chain(*(glob.glob(d) for d in input_dirs)) dirs = [d for d in dirs if os.path.isdir(d)] files = [] if len(dirs) == 0: print "No valid directory found!" sys.exit(1) for d in dirs: label = os.path.basename(d.rstrip('/')) wavs = glob.glob(d + '/*.wav') if len(wavs) == 0:
def __init__(self, parent=None): QWidget.__init__(self, parent) uic.loadUi("gui/edytor.ui", self) self.last_none_start = 0 self.last_switch_user = 0 self.last_enter_none_handler = False self.last_user_detected = 'None' self.statusBar() self.check_result = False self.timer = QTimer(self) self.timer.timeout.connect(self.timer_callback) self.noiseButton.clicked.connect(self.noise_clicked) self.recording_noise = False self.loadNoise.clicked.connect(self.load_noise) self.AddUser.clicked.connect(self.add_user) self.enrollRecord.clicked.connect(self.start_enroll_record) self.stopEnrollRecord.clicked.connect(self.stop_enroll_record) self.enrollFile.clicked.connect(self.enroll_file) self.enroll.clicked.connect(self.do_enroll) self.startTrain.clicked.connect(self.start_train) self.dumpBtn.clicked.connect(self.dump) self.loadBtn.clicked.connect(self.load) self.recoRecord.clicked.connect(self.start_reco_record) self.stopRecoRecord.clicked.connect(self.stop_reco_record) # self.newReco.clicked.connect(self.new_reco) self.recoFile.clicked.connect(self.reco_file) self.recoInputFiles.clicked.connect(self.reco_files) #UI.init self.userdata = [] self.loadUsers() self.Userchooser.currentIndexChanged.connect(self.showUserInfo) self.ClearInfo.clicked.connect(self.clearUserInfo) self.UpdateInfo.clicked.connect(self.updateUserInfo) self.UploadImage.clicked.connect(self.upload_avatar) #movie test self.movie = QMovie(u"gui/image/recording.gif") self.movie.start() self.movie.stop() self.Animation.setMovie(self.movie) self.Animation_2.setMovie(self.movie) self.Animation_3.setMovie(self.movie) self.aladingpic = QPixmap(u"gui/image/a_hello.png") self.Alading.setPixmap(self.aladingpic) self.Alading_conv.setPixmap(self.aladingpic) #default user image setting self.avatarname = "gui/image/nouser.jpg" self.defaultimage = QPixmap(self.avatarname) self.Userimage.setPixmap(self.defaultimage) self.recoUserImage.setPixmap(self.defaultimage) self.convUserImage.setPixmap(self.defaultimage) self.load_avatar('gui/avatar/') #quick enroll self.show_checkbox() self.checkbox.setWidget(self.available_to_enroll) self.LoadAll.clicked.connect(self.enroll_checklist) #Conversation Mode Variables self.conv_record = np.array([], dtype=NPDtype) self.time_init = QTimer(self) self.current_label = None # Graph Window init self.graphwindow = GraphWindow() self.newname = "" self.lastname = "" self.Graph_button.clicked.connect(self.graphwindow.show) self.convRecord.clicked.connect(self.start_conv_record) self.convStop.clicked.connect(self.stop_conv) self.generateTranscript.clicked.connect(self.generate_transcript) self.backend = ModelInterface() # debug QShortcut(QKeySequence("Ctrl+P"), self, self.printDebug) #init try: fs, signal = read_wav("bg.wav") self.backend.init_noise(fs, signal) except: pass