def predict(self, signal, fs=44100): if len(signal.shape) > 1: signal = signal[:, 0] signal_new = remove_silence(fs, signal) # if len(signal_new) < len(signal) / 4: # return "Silence" mfcc_vecs = mfcc(signal_new, fs, numcep=15) return self.predict_feat(mfcc_vecs)
def predict(self, signal, fs = 44100): if len(signal.shape) > 1: signal = signal[:, 0] signal_new = remove_silence(fs, signal) # if len(signal_new) < len(signal) / 4: # return "Silence" mfcc_vecs = mfcc(signal_new, fs, numcep = 15) return self.predict_feat(mfcc_vecs)
def get_mfcc(self, audio_path): (sr, sig) = wav.read(audio_path) if len(sig.shape) > 1: sig = sig[:, 0] cleansig = remove_silence(sr, sig) mfcc_vecs = mfcc(cleansig, sr, numcep=19) mfcc_delta = librosa.feature.delta(mfcc_vecs.T) mfcc_delta2 = librosa.feature.delta(mfcc_vecs.T, order=2) feats = np.vstack([mfcc_vecs.T, mfcc_delta, mfcc_delta2]) return feats.T
def enroll(self, name, signal, fs = 44100): signal_new = remove_silence(fs, signal) hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma self.features.append(feature) self.classes.append(name)
def predict(self, signal, fs = 44100): signal_new = remove_silence(fs, signal) # if len(signal_new) < len(signal) / 4: # return "Silence" hop_length = np.min([0.016 * fs, 512]) mfcc = librosa.feature.mfcc(y = signal_new, sr = fs, n_mfcc = 15, hop_length = hop_length) mfcc = mfcc.T mu = np.mean(mfcc, axis = 0) sigma = np.std(mfcc, axis = 0) feature = (mfcc - mu) / sigma return self.gmmset.predict_one(feature)
def get_mfcc(file_path, noise): fs, y = wavfile.read(to_mono(file_path)) # read .wav file y = remove_silence(fs, y) if not np.issubdtype(y.dtype, np.floating): y = [np.float32(i) for i in y] y = np.array(y) if len(noise) > 0: if not np.issubdtype(noise.dtype, np.floating): noise = [np.float32(i) for i in noise] noise = np.array(noise) y = nr.reduce_noise(audio_clip=np.array(y), noise_clip=np.array(noise), verbose=False) mfcc = extract(fs, y) return mfcc
wavfile.write(NOISE_WAV, fs, signal) os.system("sox {0} -n noiseprof {1}".format(NOISE_WAV, NOISE_MODEL)) def filter(self, fs, signal): rand = r.randint(1, 100000) fname = "/tmp/tmp{0}.wav".format(rand) signal = monophonic(signal) wavfile.write(fname, fs, signal) fname_clean = "/tmp/tmp{0}-clean.wav".format(rand) os.system("sox {0} {1} noisered {2} {3}".format( fname, fname_clean, NOISE_MODEL, THRES)) fs, signal = wavfile.read(fname_clean) signal = monophonic(signal) os.remove(fname) os.remove(fname_clean) return signal if __name__ == "__main__": fs, bg = wavfile.read(sys.argv[1]) nr = NoiseReduction() nr.init_noise(fs, bg) fs, sig = wavfile.read(sys.argv[2]) vaded = nr.filter(fs, sig) wavfile.write('vaded.wav', fs, vaded) removed = remove_silence(fs, vaded) wavfile.write("removed.wav", fs, removed)
def enroll(self, name, signal, fs=44100): if len(signal.shape) > 1: signal = signal[:, 0] signal_new = remove_silence(fs, signal) mfcc_vecs = mfcc(signal_new, fs, numcep=15) self.enroll_feat(name, mfcc_vecs)
def enroll(self, name, signal, fs = 44100): if len(signal.shape) > 1: signal = signal[:, 0] signal_new = remove_silence(fs, signal) mfcc_vecs = mfcc(signal_new, fs, numcep = 15) self.enroll_feat(name, mfcc_vecs)