def training(data_folder, model_output_folder): # Extracting features for each speaker (10 files per speakers) for spk_id in os.listdir(data_folder): spk_path = os.path.join(data_folder, spk_id) if not os.path.isdir(spk_path): continue features = np.asarray(()) for spk_file in os.listdir(spk_path): file_path = os.path.join(spk_path, spk_file) print(file_path) # read the audio sr, audio = read(file_path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # model training gmm = GMM(n_components=16, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model spk_model = "{}.gmm".format(spk_id) with open(os.path.join(model_output_folder, spk_model), 'wb') as f: pickle.dump(gmm, f) print('+ modeling completed for speaker:', spk_model, " with data point = ", features.shape)
def testDataSet(datasetpath, succes_rate): for model in os.listdir('GMMModels/'): models_name.append(model) tmp = Models.retrieveModels('GMMModels/' + model) models.append(tmp) total_files = 0 for folder in os.listdir(datasetpath): for audio in os.listdir(datasetpath + folder): total_files += 1 samplerate, audiofile = rd(datasetpath + folder + '/' + audio) mfcc_features = extract_features(audiofile, samplerate) max_score = -999999 max_model = models[0] i = 0 for model in models: score = model.score(mfcc_features) if score > max_score: max_score = score max_model = models_name[i] i = i + 1 if max_model[:len(max_model) - 4] == folder: succes_rate += 1 succes_rate = (succes_rate * 1.00) / total_files print(str((succes_rate) * 100) + "%")
def click(): username = txt.get() path = txt2.get() sr, audio = read(source + path) vector = extract_features(audio, sr) #print("vector=",vector) log_likelihood = np.zeros(len(models)) lbl3 = Label(win2, text=" ") lbl3.grid(column=0, row=4) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) #print(scores) log_likelihood[i] = scores.sum() speakers = [ fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files ] speakers = sort_list(speakers, log_likelihood) log_likelihood = sort_list(log_likelihood, speakers) print(speakers) print(log_likelihood) #winner = np.argmax(log_likelihood) if covar(audio, sr, username) >= 2 and (username == speakers[0] or username == speakers[1]): lbl3['text'] = "Authorised user detected as - " + username lock() else: lbl3['text'] = "User's voice mismatch"
def trainDataByPerson(personID): directory_path = source+personID+"-001/" print ("---->Training data path :"+directory_path) features = np.asarray(()) count = 1 file_paths = os.listdir(directory_path) for path in file_paths: path = path.strip() print (path); # read the audio sr,audio = read(directory_path + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio,sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training # -> if count == 5: --> edited below if count == 4: #gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag',n_init = 3) gmm = GaussianMixture(n_components = 16, covariance_type='diag',n_init = 3) gmm.fit(features) # dumping the trained gaussian model #picklefile = path.split("-")[0]+".gmm" Edu commented picklefile = path.split("_")[0] + ".gmm" pickle.dump(gmm,open(dest + picklefile,'w')) print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape) features = np.asarray(()) count = 0 count = count + 1
def rollRoom(): host_id = request.args.get('host_id') client_id = request.args.get('client_id') print("HOSTID") print(host_id) print("cLIENTID") print(client_id) #cur = mysql.connection.cursor() c.execute("SELECT test_folder FROM user WHERE client_id = %s", [client_id]) user = c.fetchone() testFolder = user['test_folder'] print("TEST FOLDER ") print(testFolder) modelpath = "Speakers_models/" gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] models = [pickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] path = "" for filename in os.listdir(testFolder): if filename.endswith(".wav"): print(os.path.join(testFolder, filename)) path = os.path.join(testFolder, filename) break print("PATH") print(path) sr, audio = read(path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print("\tdetected as - ", speakers[winner]) if speakers[winner] == client_id: #mycursor = mydb.cursor() sql = "UPDATE host_user SET is_attending = %s WHERE client_id = %s AND host_id = %s" val = (True, client_id, host_id) c.execute(sql, val) database.commit() os.remove(path) return Response(speakers[winner], status=200, mimetype='application/json') os.remove(path) return Response(speakers[winner], status=400, mimetype='application/json')
def register(c,d): csv=c+d csv=csv+"\n" f=open("database.txt","a") f.write(csv) f.close warnings.filterwarnings("ignore") source = "trainingData/" dest = "Speakers_models/" train_file = "trainingDataPath.txt" file_paths = open(train_file,'r') count = 1 features = np.asarray(()) for path in file_paths: path = path.strip() print (path) sr,audio=read(source+path) print(sr) vector = extract_features(audio,sr) if features.size == 0: features = vector print('count',count) else: features = np.vstack((features, vector)) print(features.shape) print('count',count) print('count',count) if count==3: gmm =mixture.GaussianMixture(n_components = 16, covariance_type='diag',n_init = 3) print('path') path1='Speakers_models/' gmm=gmm.fit(features) count=0 picklefile=path.split('.') print(str(picklefile)) picklefile = picklefile[0] print(picklefile) picklefile = picklefile[0:-1]+".gmm" print(picklefile) #if(picklefile=='deepankar/deepankar'): pickle.dump(features,open(dest+picklefile,'wb')) print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape) features = np.asarray(()) count=count+1 top = tkinter.Tk() top.geometry('200x50') top.configure(background="#5c6268") L1 = Label(top, text="user registered") L1.pack( side = LEFT) L1.place(x=10,y=10)
def buildModel(): client_id = request.args.get('client_id') #cur = mysql.connection.cursor() c.execute("SELECT train_folder FROM user WHERE client_id = %s", [client_id]) user = c.fetchone() trainFolder = user['train_folder'] print("Train folder") print(trainFolder) train_file = "newmodeltrain.txt" fileopen = open(train_file, 'w+') for filename in os.listdir(trainFolder): #if filename.endswith(".wav"): print(os.path.join(trainFolder, filename)) fileopen.write(str(trainFolder + "/" + filename + "\n")) fileopen.close() dest = "Speakers_models/" file_paths = open(train_file, 'r') features = np.asarray(()) for path in file_paths: path = path.strip() print(path) sr, audio = read(path) vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) file_paths.close() print("Feature") print(features) open(train_file, 'w').close() gmm = mixture.GaussianMixture(n_components=9, max_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = client_id + ".gmm" #pickle.dump(gmm,open(dest + picklefile,'wb')) with open(dest + picklefile, 'wb') as file: pickle.dump(gmm, file) print('+ modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) return 'Build model successfully'
def singleModelTraining(audiopath,modeldest) : mfcc_features = np.asarray(()) for folder in os.listdir(audiopath) : samplerate , audiofile = rd(audiopath + '/'+folder ) tmp = extract_features(audiofile,samplerate) if mfcc_features.size == 0 : mfcc_features = tmp else : mfcc_features = np.vstack((mfcc_features , tmp)) gmm = GMM(n_components = 16, max_iter = 200, covariance_type='diag',n_init = 3) gmm.fit(mfcc_features) Models.saveModels(modeldest , gmm , audiopath[12:])
def testSingleaudio(testpath): samplerate, audiofile = rd(testpath) mfcc_features = extract_features(audiofile, samplerate) maxscore = -999999 speaker = "" for model in os.listdir('GMMModels/'): tmp = Models.retrieveModels('GMMModels/' + model) score = tmp.score(mfcc_features) if score > maxscore: maxscore = score speaker = model.split(".gmm")[0] return speaker
def recognize_file(gmm_model, spk_list, file_path): sr, audio = read(file_path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(gmm_model)) for i, gmm in enumerate(gmm_model): scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() spk_vote = np.argmax(log_likelihood) spk_id = spk_list[spk_vote] probs = np.exp(log_likelihood[spk_vote]) / (np.exp(log_likelihood)).sum() # print("Testing audio {} was detected as - {}".format(file_path, spk_id)) return spk_id, probs
def result1(): import os import pickle import numpy as np from scipy.io.wavfile import read from featureextraction import extract_features import warnings warnings.filterwarnings("ignore") import time #path to training data source = "voice/" #path where training speakers will be saved modelpath = "trainmodels/" test_file = "development.txt" file_paths = open(test_file, 'r') gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #print(gmm_files) #Load the Gaussian gender Models models = [pickle.load(open(fname, 'rb')) for fname in gmm_files] #print(models) speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] for path in file_paths: path = path.strip() print("Testing Audio: ", path) print(path) sr, audio = read(source + path) vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print(log_likelihood) if path == path: print("\tdetected as - ", speakers[winner]) print(" Speaker identified. ") else: print("\t speaker is not detected") break
def main(ARGS): source = ARGS.source + "/" if not os.path.isdir(ARGS.model): os.mkdir(ARGS.model) dest = ARGS.model + "/" #"TrySpeakers_models/" train_file = ARGS.text #"TrainingDataPath.txt" file_paths = open(train_file, 'r') count = 1 # Extracting features for each speaker features = np.asarray(()) for path in file_paths: path = path.strip() print path # Read the audio sr, audio = read(source + path) # Extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # When features of 5 files of speaker are concatenated, then do model training if count == int(ARGS.number): gmm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # Dumping the trained gaussian model if ARGS.name: picklefile = ARGS.name + ".gmm" else: picklefile = path.split("-")[0] + ".gmm" cPickle.dump(gmm, open(dest + picklefile, 'w')) print '+ modeling completed for speaker:', picklefile, " with data point = ", features.shape features = np.asarray(()) count = 0 count = count + 1
def verifyVoiceByPerson(personID): path = personID+"_test.wav" print ("---->Verify voice file :"+path) gmm_files = [os.path.join(dest, fname) for fname in os.listdir(dest) if fname.endswith('.gmm')] # Load the Gaussian gender Models models = [pickle.load(open(fname, 'r')) for fname in gmm_files] speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] error = 0 total_sample = 0.0 print "Testing Audio : ", path total_sample += 1.0 path = path.strip() print "Single File Test Audio : ", path sr,audio = read(sourceTest + path) vector = extract_features(audio,sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) print "\tSingle File detected as - ", speakers[winner] checker_name = path.split("_")[0] if speakers[winner] != checker_name: error += 1 time.sleep(1.0) print error, total_sample accuracy = ((total_sample - error) / total_sample) * 100 print "The Accuracy Percentage for the current testing Performance with MFCC + GMM is : ", accuracy, "%" return (True if accuracy == 100.0 else False)
def Click(): lbl = Label(win3, text="Please say something:", anchor=W) lbl.grid(column=0, row=1) recording = src.Recognizer() recording.energy_threshold = 500 recording.dynamic_energy_threshold = False with src.Microphone() as source: recording.adjust_for_ambient_noise(source) audio = recording.listen(source, timeout=10) filename = "storage.wav" name = './mic/' + str(filename) #file_obj=open(name,"w+") lbl2 = Label(win3, text="recorded") lbl2.grid(column=1, row=1) with open(name, "wb") as f: f.write(audio.get_wav_data()) sr, audio = read(name) vector = extract_features(audio, sr) print(sr) log_likelihood = np.zeros(len(models)) lbl3 = Label(win3, text=" ") lbl3.grid(column=0, row=1) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() #print(log_likelihood) speakers = [ fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files ] speakers = sort_list(speakers, log_likelihood) print(speakers) #winner = np.argmax(log_likelihood) #print ("\tdetected as - ", speakers[winner]) nam = face_check() #print(nam) if (speakers[0] == nam or speakers[1] == nam) and covar(audio, sr, nam) >= 2: lbl3['text'] = "User detected and identified as - " + nam lock() else: lbl3['text'] = "User's voice and face mismatch"
def covar(audio, sr, name): #source = "./mic/storage.wav" #path where training speakers will be saved modelpathes = [ "Speakers_models_spherical/", "Speakers_models_diag/", "Speakers_models_full/" ] user_valid = 0 for modelpath in modelpathes: gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] #Load the Gaussian gender Models models = [cPickle.load(open(fname, 'rb')) for fname in gmm_files] speakers = [ fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files ] #sr,audio = read(source) vector = extract_features(audio, sr) #print("vector=",vector) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) #print(scores) log_likelihood[i] = scores.sum() speakers = [ fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files ] speakers = sort_list(speakers, log_likelihood) print(speakers) if speakers[0] == name or speakers[1] == name: user_valid += 1 return (user_valid)
def predict_speaker(self, audio, sr): ''' Predicts the speaker to which audio belog to. returns 2-tuple in-set-speaker => boolean predicted speaker => string ''' vector = extract_features(audio, sr) gmm_log_likelihood = np.zeros(len(self.gmm_models)) ubm_log_likelihood = np.zeros(len(self.ubm_models)) for i in range(len(self.gmm_models)): gmm = self.gmm_models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) gmm_log_likelihood[i] = scores.sum() ubm = self.ubm_models[i] #checking with each model one by one scores = np.array(ubm.score(vector)) ubm_log_likelihood[i] = scores.sum() winner = np.argmax(gmm_log_likelihood) if gmm_log_likelihood[winner] - ubm_log_likelihood[winner] > 0: return (True, self.speakers[winner]) else: return (False, self.speakers[winner])
def ModelTraining(source="trainingData/", dest="Speakers_models/"): ''' Trains model for Speaker recognition source => path to training data dest => path where training speakers will be saved train_file => file containing list of files for training (relative to source) num_training_samples => number of data samples for each speaker ''' #file_paths = open(train_file,'r') count = 1 # Extracting features for each speaker (5 files per speakers) features = np.asarray(()) #Extacting featues for all speakers total_features = np.asarray(()) spk_start = [] spk_end = [] model_names = [] for speaker in os.listdir(source): path = os.path.join(source, speaker) print speaker if os.path.isdir(path): for filename in os.listdir(path): if filename.endswith(".wav"): print filename # read the audio sr, audio = read(os.path.join(path, filename)) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector spk_start.append(len(total_features)) else: features = np.vstack((features, vector)) # for UBM if total_features.size == 0: total_features = vector else: total_features = np.vstack((total_features, vector)) gmm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) spk_end.append(len(total_features)) # dumping the trained gaussian model model_names.append(speaker) picklefile = speaker + ".gmm" cPickle.dump(gmm, open(dest + picklefile, 'w')) print '+ modeling completed for speaker:', picklefile, " with data point = ", features.shape features = np.asarray(()) # UBM Training for i in range(len(spk_start)): ubm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) ubm.fit( np.concatenate( (total_features[:spk_start[i]], total_features[spk_end[i]:]))) picklefile = model_names[i] + ".ubm" cPickle.dump(ubm, open(dest + picklefile, 'w')) print '+ UBM modeling completed for speaker:', picklefile
def model(): import pickle import os import numpy as np from scipy.io.wavfile import read from sklearn.mixture import GaussianMixture from featureextraction import extract_features import warnings import sys warnings.filterwarnings("ignore") #path to training data source = "speakers\\" #path where training speakers will be saved dest = "trainmodels\\" folders = os.listdir(source) #print(folders) train_file = "development.txt" features_file = "features.txt" for fold in folders: files = os.listdir(source + fold) for file in files: #print(file) if os.path.exists(train_file): file_paths = open(train_file, 'a+') file_paths.write(fold + "/" + file + "\n") else: file_paths = open(train_file, 'w') file_paths.write(fold + "/" + file + "\n") file_paths.close() file_paths = open(train_file, 'r') count = 0 features = np.asarray(()) features_file_features = open(features_file, 'a+') #print(features) for path in file_paths: path = path.strip() print(path) print(features) # read the audio sr, audio = read(source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training #if count == -1: #print("hhhhhhhh") print(features) for s in features: out_arr = np.array_str(s) features_file_features.write(out_arr) #sys.exit("Error message") gmm = GaussianMixture(n_components=16, max_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = path.split("/")[0] + ".gmm" pickle.dump(gmm, open(dest + picklefile, 'wb')) print(' modeling completed for speaker:', picklefile, " with data point = ", features.shape) features = np.asarray(()) count = 0 count = count + 1
file_paths.close() file_paths = open(train_file,'r') count = 0 features = np.asarray(()) features_file_features= open(features_file,'a+') #print(features) for path in file_paths: path = path.strip() print (path) print(features) # read the audio sr,audio = read(source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio,sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training #if count == -1: #print("hhhhhhhh") print(features) for s in features: out_arr = np.array_str(s) features_file_features.write(out_arr) #sys.exit("Error message") gmm = GaussianMixture(n_components = 16, max_iter = 200, covariance_type='diag',n_init = 3)
from featureextraction import extract_features import os def get_data_files(searchdirectory, extension=".csv"): targetfiles = [] for root, dirs, files in os.walk(searchdirectory): for file in files: if extension in file: targetfiles.append(root + "/" + file) return targetfiles if __name__ == '__main__': datafiles = get_data_files("testdata/traindatamix/Garbage") junkfeaturevectors = extract_features(datafiles) datafiles = get_data_files("testdata/traindatamix/Useful") usefulfeaturevectors = extract_features(datafiles) joined = np.c_[junkfeaturevectors, usefulfeaturevectors] joined = joined.transpose() # for a single-input model with 2 classes (binary): model = Sequential() model.add(Dense(1, input_dim=len(junkfeaturevectors), activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # generate dummy data