def segmentclassifySMFileWrapper(inputWavFile, model_name, model_type): if not os.path.isfile(model_name): raise Exception("Input model_name not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = "" if inputWavFile[-4::]==".wav": gtFile = inputWavFile.replace(".wav", ".segments") if inputWavFile[-4::]==".mp3": gtFile = inputWavFile.replace(".mp3", ".segments") aS.mid_term_file_classification(inputWavFile, model_name, model_type, True, gtFile)
def test(audiofile_path): model_path = r"models/svm_male_female" model_type = "svm_rbf" plot_results = False labels, class_names, mt_step, class_probabilities = mid_term_file_classification( audiofile_path, model_path, model_type, plot_results) print("labels: ", len(labels)) # print "merged" segments (use labels_to_segments()) # print("\nSegments:") segs, c, probs = labels_to_segments(labels, class_probabilities, mt_step) # print("segs: ", len(segs)) print("prob test: ", len(probs)) # for iS, seg in enumerate(segs): # if probs[iS] > 0.6: # print(f'segment {iS} {seg[0]} sec - {seg[1]} sec: {class_names[int(c[iS])]} pro: {probs[iS]}') # print("type(segs): ", type(segs)) # print("type(probs): ", type(probs)) return probs, segs.tolist(), class_names, c # if __name__ == "__main__": # # audiofile_path = r"/Users/taanhtuan/Desktop/workproject/basic_audio_analysis-master/data/test.mp3" # model_path = r"models/svm_male_female" # model_type = "svm_rbf" # plot_results = False # # labels, class_names, mt_step, class_probabilities = mid_term_file_classification(audiofile_path, model_path, # model_type, plot_results) # # print("labels: ", len(labels)) # # print "merged" segments (use labels_to_segments()) # print("\nSegments:") # segs, c, probs = labels_to_segments(labels, class_probabilities, mt_step) # # print("segs: ", len(segs)) # print("prob: ", len(probs)) # # for iS, seg in enumerate(segs): # if probs[iS] > 0.58: # print(f'segment {iS} {seg[0]} sec - {seg[1]} sec: {class_names[int(c[iS])]} pro: {probs[iS]}')
"""! @brief Example 31B @details: Speech music discrimination and segmentation (using a trained speech - music segment classifier) Important: Need to run 31A first to extract speech music model (stored in svm_speech_music) @author Theodoros Giannakopoulos {[email protected]} """ from pyAudioAnalysis.audioSegmentation import mid_term_file_classification if __name__ == '__main__': au = "../data/scottish_radio.wav" gt = "../data/scottish_radio.segments" # au = "../data/musical_genres_small/hiphop/run_dmc_peter_riper.wav" mid_term_file_classification(au, "svm_speech_music", "svm_rbf", True, gt)
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.time() F = MidTermFeatures.short_term_feature_extraction( x, Fs, 0.050 * Fs, 0.050 * Fs) t2 = time.time() perTime1 = duration / (t2 - t1) print "short-term feature extraction: {0:.1f} x realtime".format( perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.time() aT.file_classification("diarizationExample.wav", "svmSM", "svm") t2 = time.time() perTime1 = duration / (t2 - t1) print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.time() [flagsInd, classesAll, acc] = aS.mid_term_file_classification("diarizationExample.wav", "svmSM", "svm", False, '') t2 = time.time() perTime1 = duration / (t2 - t1) print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.time() aS.hmm_segmentation('diarizationExample.wav', 'hmmRadioSM', False, '') t2 = time.time() perTime1 = duration / (t2 - t1) print "HMM-based classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") duration = x.shape[0] / float(Fs) t1 = time.time() [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav") segments = aS.silence_removal(x, Fs, 0.050, 0.050, smooth_window=1.0, Weight=0.3, plot=False) t2 = time.time() perTime1 = duration / (t2 - t1) print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.read_audio_file("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.time() [A1, A2, B1, B2, Smatrix] = aS.music_thumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.time() perTime1 = duration1 / (t2 - t1) print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.read_audio_file("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.time() aS.speaker_diarization("diarizationExample.wav", 4, LDAdim=0, PLOT=False) t2 = time.time() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.read_audio_file("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.time() aS.speaker_diarization("diarizationExample.wav", 4, PLOT=False) t2 = time.time() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1)
from pyAudioAnalysis import audioSegmentation as aS [flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification("303.wav", "data/models/svm_rbf_sm", "svm", True, 'data/scottish.segments')
x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification( root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm", "svm_rbf", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.train_hmm_from_file(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0) aS.train_hmm_from_directory(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0) aS.hmm_segmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp1', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 1 aS.hmm_segmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp2', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 2
def test_mt_file_classification(): labels, class_names, accuracy, cm = aS.mid_term_file_classification( "test_data/scottish.wav", "test_data/svm_rbf_sm", "svm_rbf", False, "test_data/scottish.segments") assert accuracy > 0.95, "Segment-level classification accuracy is low"
# "iNNovationMerge DailyCodeHub" # Theme : Audio segmentation week with Python # Fix-sized audio segmentation using pretrained eight class SVM model(svm_rbf_movie8class) from pyAudioAnalysis import audioSegmentation as aS [flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification("data/scottish.wav", "data/models/svm_rbf_movie8class", "svm", True, 'data/scottish.segments')