def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.read_audio_file(wav_file) F, _ = sF.feature_extraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beat_extraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def file_regression(input_file, model_name, model_type): # Load classifier: if not os.path.isfile(input_file): print("fileClassification: wav file not found!") return -1, -1, -1 #regression_models = glob.glob(model_name + "_*") I CHANGED THIS regression_models = model_name regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mt_win, etc) if model_type == 'svm' or model_type == "svm_rbf" or \ model_type == 'randomforest': _, _, _, mid_window, mid_step, short_window, short_step, compute_beat \ = load_model(regression_models[0], True) # read audio file and convert to mono samping_rate, signal = audioBasicIO.read_audio_file(input_file) signal = audioBasicIO.stereo_to_mono(signal) # feature extraction: mid_features, s, _ = \ aF.mid_feature_extraction(signal, samping_rate, mid_window * samping_rate, mid_step * samping_rate, round(samping_rate * short_window), round(samping_rate * short_step)) # long term averaging of mid-term statistics mid_features = mid_features.mean(axis=1) if compute_beat: beat, beat_conf = aF.beat_extraction(s, short_step) mid_features = np.append(mid_features, beat) mid_features = np.append(mid_features, beat_conf) # REGRESSION R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'svm' or model_type == "svm_rbf" \ or model_type == 'randomforest': model, mean, std, _, _, _, _, _ = load_model(r, True) curFV = (mid_features - mean) / std # normalization R.append(regression_wrapper(model, model_type, curFV)) # classification return R, regression_names
def fileRegression(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mt_win, etc) if model_type == 'svm' or model_type == "svm_rbf" or \ model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = \ load_model(regression_models[0], True) # read audio file and convert to mono [Fs, x] = audioBasicIO.read_audio_file(inputFile) x = audioBasicIO.stereo_to_mono(x) # feature extraction: [mt_features, s, _] = aF.mid_feature_extraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) # long term averaging of mid-term statistics mt_features = mt_features.mean(axis=1) if compute_beat: [beat, beatConf] = aF.beat_extraction(s, st_step) mt_features = np.append(mt_features, beat) mt_features = np.append(mt_features, beatConf) # REGRESSION R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'svm' or model_type == "svm_rbf" \ or model_type == 'randomforest': [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(r, True) curFV = (mt_features - MEAN) / STD # normalization R.append(regressionWrapper(model, model_type, curFV)) # classification return R, regression_names
def features(file_path): fs, s = aIO.read_audio_file(file_path) m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 mid_features, short_features, mid_feature_names = aF.mid_feature_extraction( s, fs, round(fs * m_win), round(fs * m_step), round(fs * s_win), round(fs * s_step)) mid_features = np.transpose(mid_features).mean(axis=0) beat, beat_conf = aF.beat_extraction(short_features, s_step) mid_features = np.append(mid_features, beat) mid_features = np.append(mid_features, beat_conf) mid_feature_names.append('beat') mid_feature_names.append('beat_conf') return mid_features, mid_feature_names
def fileClassification(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) # read audio file and convert to mono [Fs, x] = audioBasicIO.read_audio_file(inputFile) x = audioBasicIO.stereo_to_mono(x) if Fs == 0: # audio file IO problem return -1, -1, -1 if x.shape[0] / float(Fs) <= mt_win: return -1, -1, -1 # feature extraction: [mt_features, s, _] = aF.mid_feature_extraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) # long term averaging of mid-term statistics mt_features = mt_features.mean(axis=1) if compute_beat: [beat, beatConf] = aF.beat_extraction(s, st_step) mt_features = np.append(mt_features, beat) mt_features = np.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization # classification [Result, P] = classifierWrapper(classifier, model_type, curFV) return Result, P, classNames
def file_classification(input_file, model_name, model_type): # Load classifier: if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return -1, -1, -1 if not os.path.isfile(input_file): print("fileClassification: wav file not found!") return -1, -1, -1 if model_type == 'knn': classifier, mean, std, classes, mid_window, mid_step, short_window, \ short_step, compute_beat = load_model_knn(model_name) else: classifier, mean, std, classes, mid_window, mid_step, short_window, \ short_step, compute_beat = load_model(model_name) # read audio file and convert to mono sampling_rate, signal = audioBasicIO.read_audio_file(input_file) signal = audioBasicIO.stereo_to_mono(signal) if sampling_rate == 0: # audio file IO problem return -1, -1, -1 if signal.shape[0] / float(sampling_rate) <= mid_window: return -1, -1, -1 # feature extraction: mid_features, s, _ = \ aF.mid_feature_extraction(signal, sampling_rate, mid_window * sampling_rate, mid_step * sampling_rate, round(sampling_rate * short_window), round(sampling_rate * short_step)) # long term averaging of mid-term statistics mid_features = mid_features.mean(axis=1) if compute_beat: beat, beat_conf = aF.beat_extraction(s, short_step) mid_features = np.append(mid_features, beat) mid_features = np.append(mid_features, beat_conf) feature_vector = (mid_features - mean) / std # normalization # classification class_id, probability = classifier_wrapper(classifier, model_type, feature_vector) return class_id, probability, classes
def vadFolderWrapperMergedByTh(inputFolder, outFolder, smoothingWindow, weight, model_name, threshold): if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") classifier, mean, std, classes, mid_window, mid_step, short_window, \ short_step, compute_beat = aT.load_model(model_name) types = ('*.wav', '*.mp3') wavFilesList = [] for files in types: print(inputFolder + files) wavFilesList.extend(glob.glob((inputFolder + files))) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print("No WAV files found!") return for wavFile in wavFilesList: # print(wavFile) if not os.path.isfile(wavFile): raise Exception("Input audio file not found!") base = os.path.splitext(os.path.basename(wavFile))[0] folder = outFolder + base + '/' if not os.path.exists(folder): os.makedirs(folder) segfile = open(os.path.join(folder, 'segments'), 'w+') segfile2 = open(os.path.join(folder, 'segments_details'), 'w+') stack = deque() [fs, x] = audioBasicIO.read_audio_file(wavFile) segmentLimits = aS.silence_removal(x, fs, 0.05, 0.05, smoothingWindow, weight, False) merge=True for i, st in enumerate(segmentLimits): signal = audioBasicIO.stereo_to_mono(x[int(fs * st[0]):int(fs * st[1])]) # print('in here', len(segmentLimits), st[0],st[1],classes, type(st)) if fs == 0: continue # audio file IO problem # return -1, -1, -1 if signal.shape[0] / float(fs) < mid_window: mid_window = signal.shape[0] / float(fs) # feature extraction: mid_features, s, _ = \ aF.mid_feature_extraction(signal, fs, mid_window * fs, mid_step * fs, round(fs * short_window), round(fs * short_step)) # long term averaging of mid-term statistics mid_features = mid_features.mean(axis=1) if compute_beat: # print('in here3') beat, beat_conf = aF.beat_extraction(s, short_step) mid_features = np.append(mid_features, beat) mid_features = np.append(mid_features, beat_conf) feature_vector = (mid_features - mean) / std # normalization # class_id = -1 # probability = -1 class_id = classifier.predict(feature_vector.reshape(1, -1))[0] # probability = classifier.predict_proba(feature_vector.reshape(1, -1))[0] print(class_id, type(class_id)) label=classes[int(class_id)] print(label) if label=='speech': dur=st[1]-st[0] # print('in hereas') if merge == True: seg_prev=[] # print('in hereasq12') if len(stack) >0: seg_prev = stack.pop() if len(seg_prev) >0 and st[1]-seg_prev[0] > threshold: # print('in hereas4') seg = [st[0], st[1], label] stack.append(seg_prev) stack.append(seg) merge = True elif len(seg_prev) >0: # print('in hereasqw345') seg = [seg_prev[0], st[1], label] stack.append(seg) merge = True else: seg = [st[0], st[1], label] stack.append(seg) merge = True else: # print('in hereas2') seg = [st[0], st[1], label] stack.append(seg) merge = True else: merge = False print(i, merge) # print(len(segmentLimits), len(stack)) for sn in stack: # print(type(wavFile), sn[0].shape, sn[1].shape, type(sn[0]), type(sn[1])) strName = base + "_" + "{:.3f}".format(sn[0]) + "_" + "{:.3f}".format(sn[1]) if sn[2] == 'speech': strOut = folder + base + "_" + "{:.3f}".format(sn[0]) + "_" + "{:.3f}".format(sn[1]) + ".wav" wavfile.write(strOut, fs, x[int(fs * sn[0]):int(fs * sn[1])]) segfile.write(strName + ' ' + base + ' ' + "{:.3f}".format(sn[0]) + ' ' + "{:.3f}".format(sn[1]) + "\n") segfile2.write(strName + ' ' + "{:.3f}".format(sn[0]) + ' ' + "{:.3f}".format(sn[1]) + ' ' + sn[2] + "\n") segfile.close() segfile2.close()