class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() def enroll(self, name, fs, signal): print("enroll "+name) feat = get_feature(fs, signal) self.features[name].extend(feat) def train(self): self.gmmset = GMMSet() start_time = time.time() for name, feats in self.features.items(): try: self.gmmset.fit_new(feats, name) except Exception as e : print ("%s failed"%(name)) print (time.time() - start_time, " seconds") def CheckEnroll(self): for name, feats in self.features.items(): print("%s " % (name)) def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'wb') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() def predict(self, fs, signal): """ return a label (name) """ try: feat = get_feature(fs, signal) except Exception as e: print (e) return self.gmmset.predict_one(feat) @staticmethod def load(fname): print(fname) """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface(object): UBM_MODEL_FILE = None def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): """ init vad from environment noise """ self.vad.init_noise(fs, signal) def filter(self, fs, signal): """ use VAD (voice activity detection) to filter out silence part of a signal """ ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: # signal is filtered by VAD return ret return np.array([]) def enroll(self, name, fs, signal): """ add the signal to this person's training dataset name: person's name """ feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): if self.UBM_MODEL_FILE and os.path.isfile(self.UBM_MODEL_FILE): try: from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) except Exception as e: print "Warning: failed to import gmmset. You may forget to compile gmm:" print e print "Try running `make -C src/gmm` to compile gmm module." print "But gmm from sklearn will work as well! Using it now!" return GMMSet() return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal): """ return a label (name) """ try: feat = mix_feature((fs, signal)) except Exception as e: print tb.format_exc() return None return self.gmmset.predict_one(feat) def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface(object): UBM_MODEL_FILE = 'model/ubm.mixture-32.utt-300.model' def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): self.vad.init_noise(fs, signal) def filter(self, fs, signal): ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: return ret return np.array([]) def enroll(self, name, fs, signal): feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) else: return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal, reject=False): from gmmset import GMMSetPyGMM if GMMSet is not GMMSetPyGMM: reject = False try: feat = mix_feature((fs, signal)) except Exception as e: print str(e) return None if reject: try: l = self.gmmset.predict_one_with_rejection(feat) return l except Exception as e: print str(e) return self.gmmset.predict_one(feat) def dump(self, fname): self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) self.gmmset.after_pickle() @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface(object): UBM_MODEL_FILE = 'model/ubm.mixture-32.utt-300.model' def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() self.vad = VAD() def init_noise(self, fs, signal): self.vad.init_noise(fs, signal) def filter(self, fs, signal): ret, intervals = self.vad.filter(fs, signal) orig_len = len(signal) if len(ret) > orig_len / 3: # signal is filtered by VAD return ret return np.array([]) def enroll(self, name, fs, signal): feat = mix_feature((fs, signal)) self.features[name].extend(feat) def _get_gmm_set(self): try: from gmmset import GMMSetPyGMM if GMMSet is GMMSetPyGMM: return GMMSet(ubm=GMM.load(self.UBM_MODEL_FILE)) except Exception as e: print "Warning: failed to import gmmset. You may forget to compile gmm:" print e print "Try running `make -C src/gmm` to compile gmm module." print "But gmm from sklearn will work as well! Using it now!" return GMMSet() def train(self): self.gmmset = self._get_gmm_set() start = time.time() print "Start training..." for name, feats in self.features.iteritems(): self.gmmset.fit_new(feats, name) print time.time() - start, " seconds" def predict(self, fs, signal, reject=False): from gmmset import GMMSetPyGMM if GMMSet is not GMMSetPyGMM: reject = False try: feat = mix_feature((fs, signal)) except Exception as e: print tb.format_exc() return None if reject: try: return self.gmmset.predict_one_with_rejection(feat) except Exception as e: print tb.format_exc() return self.gmmset.predict_one(feat) def dump(self, fname): self.gmmset.before_pickle() with open(fname, 'w') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) R.gmmset.after_pickle() return R
class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() # self.vad = VAD() # def init_noise(self, fs, signal): # """ # init vad from environment noise # """ # self.vad.init_noise(fs, signal) # def filter(self, fs, signal): # """ # use VAD (voice activity detection) to filter out silence part of a signal # """ # ret, intervals = self.vad.filter(fs, signal) # orig_len = len(signal) # if len(ret) > orig_len / 3: # signal is filtered by VAD # return ret #return np.array([]) def enroll(self, name, fs, signal): feat = get_feature(fs, signal) #print("feat:",feat) #print(len(feat)) self.features[name].extend(feat) def mfcc_dump(self, fname): """ dump all features to file""" with open(fname, 'wb') as f: pickle.dump(self.features, f, -1) def train(self): self.gmmset = GMMSet() start_time1 = time.time() print("Begin to train") for name, feats in self.features.items(): try: start_time2 = time.time() self.gmmset.fit_new(feats, name) print(name," trained",time.time() - start_time2, "seconds" ) except Exception as e : print ("%s failed because of %s"%(name,e)) print ("Train ",time.time() - start_time1, " seconds") def dump(self, save_dir): """ dump all models to file""" # 每个GMM模型独立保存一个模型文件 for i in range(len(self.gmmset.y)): label=self.gmmset.y[i] model=self.gmmset.gmms[i] file_name=label+'.m' save_path=os.path.join(save_dir,file_name) with open(save_path, 'wb') as f: # 这里保存的是skgmm.GMMSet object pickle.dump(model, f, -1) #self.gmmset.after_pickle() # def predict(self, fs, signal): # """ # return a label (name) # """ # try: # feat = get_feature(fs, signal) # except Exception as e: # print (e) # return self.gmmset.predict_one(feat) def predict(self, feat): """ return a label (name) """ #return self.gmmset.predict_one(feat) return self.predict_one(feat) @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: label = os.path.basename(fname.rstrip('/')).split('.')[0] R = pickle.load(f) #R.gmmset.after_pickle() return label,R
class GMMRec(object): def __init__(self): self.features = [] self.gmmset = GMMSet() self.classes = [] self.models = [] def delete_speaker(self, name): if name in self.classes: ind = self.classes.index(name) del self.classes[ind] del self.models[ind] self.classes.remove(name) ind = self.gmmset.y.index(name) del self.gmmset.gmms[ind] self.gmmset.y.remove(name) else: print name, "not in the list!" def enroll_model(self, name, model): if name not in self.classes: self.classes.append(name) self.models.append(model) self.features.append(None) gmm = self.load(model) self.gmmset.add_new(gmm, name) def enroll(self, name, mfcc_vecs, model=None): if name not in self.classes: feature = mfcc_vecs.astype(np.float32) self.features.append(feature) self.classes.append(name) self.models.append(model) else: print name + " already enrolled, please delete the old one first!" def get_mfcc(self, audio_path): (sr, sig) = wav.read(audio_path) if len(sig.shape) > 1: sig = sig[:, 0] cleansig = remove_silence(sr, sig) mfcc_vecs = mfcc(cleansig, sr, numcep=19) mfcc_delta = librosa.feature.delta(mfcc_vecs.T) mfcc_delta2 = librosa.feature.delta(mfcc_vecs.T, order=2) feats = np.vstack([mfcc_vecs.T, mfcc_delta, mfcc_delta2]) return feats.T def enroll_file(self, name, fn, model=None): if name not in self.classes: fn_mfcc = np.array(self.get_mfcc(fn)) self.enroll(name, fn_mfcc, model=model) else: print name + " already enrolled, please delete the old one first!" def _get_gmm_set(self): return GMMSet() def train(self, gmm_order=None): for name, feats, model in zip(self.classes, self.features, self.models): if (name not in self.gmmset.y) and (name is not None): gmm = self.gmmset.fit_new(feats, name, gmm_order) if model is not None: self.dump(model, part=gmm) else: print name + " already trained, skip!" def predict(self, mfcc_vecs): feature = mfcc_vecs.astype(np.float32) return self.gmmset.predict_one(feature) def dump(self, fname, part=None): with open(fname, 'w') as f: if part is None: pickle.dump(self, f, -1) else: pickle.dump(part, f, -1) @staticmethod def load(fname): with open(fname, 'r') as f: R = pickle.load(f) return R
class GMMRec(object): def __init__(self, ubmfn = None, reject_threshold = 10): self.features = [] self.gmmset = GMMSet() self.classes = [] self.reject_threshold = reject_threshold if ubmfn is not None: self.ubm = self.load(ubmfn) def enroll(self, name, signal, fs = 44100): signal_new = remove_silence(fs, signal) hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma self.features.append(feature) self.classes.append(name) def _get_gmm_set(self): return GMMSet() def train(self): self.gmmset = self._get_gmm_set() for name, feats in zip(self.classes, self.features): self.gmmset.fit_new(feats, name) def predict(self, signal, fs = 44100): signal_new = remove_silence(fs, signal) # if len(signal_new) < len(signal) / 4: # return "Silence" hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma return self.gmmset.predict_one(feature) @staticmethod def totime(secs): m, s = divmod(secs, 60) h, m = divmod(m, 60) return h, m, s def showresult(self, signal, fs, head, disp): print("%d:%02d:%02d" % (self.totime(head)), self.predict( signal, fs)) try: if disp: display(Audio(data = signal, rate = fs)) except: pass def recognize(self, signal, step = 1, duration = 1.5, fs = 44100, disp = True): head = 0 totallen = np.round(signal.shape[0] / fs).astype(int) print('Recognition results:') while head < totallen: tail = head + duration if tail > totallen: tail = totallen signali = signal[fs * head : np.min([fs * tail, fs * totallen])] self.showresult(signali, fs, head, disp) head += step #signali = signal[fs * (head - step):] #self.showresult(signali, fs, head, disp) def dump(self, fname, part = None): with open(fname, 'wb') as f: if part is None: pickle.dump(self, f, -1) else: pickle.dump(part, f, -1) @staticmethod def load(fname): with open(fname, 'rb') as f: R = pickle.load(f) return R
class ModelInterface: def __init__(self): self.features = defaultdict(list) self.gmmset = GMMSet() # self.vad = VAD() # def init_noise(self, fs, signal): # """ # init vad from environment noise # """ # self.vad.init_noise(fs, signal) # def filter(self, fs, signal): # """ # use VAD (voice activity detection) to filter out silence part of a signal # """ # ret, intervals = self.vad.filter(fs, signal) # orig_len = len(signal) # if len(ret) > orig_len / 3: # signal is filtered by VAD # return ret #return np.array([]) def enroll(self, name, fs, signal): feat = get_feature(fs, signal) #print("feat:",feat) #print(len(feat)) self.features[name].extend(feat) def train(self): self.gmmset = GMMSet() start_time = time.time() for name, feats in self.features.items(): try: self.gmmset.fit_new(feats, name) except Exception as e: print("%s failed" % (name)) print("Train ", time.time() - start_time, " seconds") def dump(self, fname): """ dump all models to file""" self.gmmset.before_pickle() with open(fname, 'wb') as f: pickle.dump(self, f, -1) self.gmmset.after_pickle() # def predict(self, fs, signal): # """ # return a label (name) # """ # try: # feat = get_feature(fs, signal) # except Exception as e: # print (e) # return self.gmmset.predict_one(feat) def predict(self, feat): """ return a label (name) """ return self.gmmset.predict_one(feat) @staticmethod def load(fname): """ load from a dumped model file""" with open(fname, 'rb') as f: R = pickle.load(f) R.gmmset.after_pickle() return R