def segmentclassifyFileWrapper(inputWavFile, model_name, model_type): if not os.path.isfile(model_name): raise Exception("Input model_name not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = "" if inputWavFile[-4::]==".wav": gtFile = inputWavFile.replace(".wav", ".segments") if inputWavFile[-4::]==".mp3": gtFile = inputWavFile.replace(".mp3", ".segments") aS.mtFileClassification(inputWavFile, model_name, model_type, True, gtFile)
def func(): res = [flagsInd, classesAll, acc] = aS.mtFileClassification(PATH_TO_WAV, PATH_TO_SVM, "svm", False, PATH_TO_SEGMENTS_FILE) print res segments = getSegments(res[0]) print segments f = open(PATH_TO_DIR + "segments", "w") f.write(str(segments)) cutSegments(segments)
def run(wavFileName2,bagFile2): global wavFileName global bagFile global xStart global xEnd global annotationFlag, annotations, shadesAndSpeaker, greenIndex global spf, duration, signal time = 0 segmentDuration = 0 segments = [] # >> Open WAVfile #---------------------- #wavFileName -> global variable wavFileName = wavFileName2 bagFile = bagFile2 spf = wave.open(wavFileName,'r') #Extract Raw Audio from Wav File signal = spf.readframes(-1) signal = np.fromstring(signal, 'Int16') #self.axes.clear() #Get wavFile duration frames = spf.getnframes() rate = spf.getframerate() duration = frames / float(rate) # >> Open CSVfile #---------------------- # check if .csv exists csvFileName = bagFile.replace(".bag","_audio.csv") if os.path.isfile(csvFileName): # print '.csv Found !' annotationFile = open(csvFileName, 'rb') read = csv.reader(annotationFile) for row in read: row[0] = float(row[0]) row[1] = float(row[1]) annotations.append([row[0], row[1], row[2]]) # get speakers unic colors for annotation plot and ganttChart for shadeIndex in range(len(annotations)): if annotations[shadeIndex][2][:8] == 'Speech::': shadesAndSpeaker.append([annotations[shadeIndex][2], GreenShades[greenIndex]]) if greenIndex > len(GreenShades): greenIndex = 0 else: greenIndex = greenIndex + 1 # >> Call Classifier in case CSVFile not exists #---------------------- else: # print 'classifier...' [flagsInd, classesAll, acc] = aS.mtFileClassification(wavFileName, 'svmModelTest', 'svm', False) # declare classes [segs, classes] = aS.flags2segs(flagsInd, 1) lengthClass = len(classesAll) className = np.arange(lengthClass, dtype=np.float) for j in range(len(segs)): # no Annotation for Silence segments for i in range(len(classesAll)): if classes[j] == className[i] and classesAll[i] != 'Silence': annotations.append([segs[j][0]*1000, segs[j][1]*1000, classesAll[i]]) # >> Initialize GUI #---------------------- qApp = QtWidgets.QApplication(sys.argv) aw = ApplicationWindow() aw.setWindowTitle("Audio") aw.show() # >> Terminate GUI #---------------------- sys.exit(qApp.exec_())
def Classify(wavFileName): #Segmatation and Classification #os.chdir('Home/Documents/python/audioGraph') [flagsInd, classesAll, acc] = aS.mtFileClassification(wavFileName, 'svmModelTest', 'svm', False) print flagsInd, classesAll
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs); t2 = time.clock() perTime1 = duration / (t2-t1); print "short-term feature extraction: {0:.1f} x realtime".format(perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("diarizationExample.wav", "svmSM","svm") t2 = time.clock() perTime1 = duration / (t2-t1); print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification("diarizationExample.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('diarizationExample.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow = 1.0, Weight = 0.3, plot = False) t2 = time.clock() perTime1 = duration / (t2-t1); print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, LDAdim = 0, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1)
def run(wavFileName2,bagFile2): time = 0 segmentDuration = 0 segments = [] # >> Open WAVfile #---------------------- #audioGlobals.wavFileName -> global variable audioGlobals.wavFileName = wavFileName2 audioGlobals.bagFile = bagFile2 audioGlobals.spf = wave.open(audioGlobals.wavFileName,'r') #Extract Raw Audio from Wav File audioGlobals.signal = audioGlobals.spf.readframes(-1) audioGlobals.signal = np.fromstring(audioGlobals.signal, 'Int16') #self.axes.clear() #Get wavFile audioGlabals.duration frames = audioGlobals.spf.getnframes() rate = audioGlobals.spf.getframerate() audioGlobals.duration = frames / float(rate) # >> Open CSVfile #---------------------- # check if .csv exists csvFileName = audioGlobals.bagFile.replace(".bag","_audio.csv") if os.path.isfile(csvFileName): annotationFile = open(csvFileName, 'rb') read = csv.reader(annotationFile) for row in read: row[0] = float(row[0]) row[1] = float(row[1]) audioGlobals.annotations.append([row[0], row[1], row[2]]) # get speakers unic colors for annotation plot and ganttChart #print len(audioGlobals.GreenShades) for shadeIndex in range(len(audioGlobals.annotations)): if audioGlobals.annotations[shadeIndex][2][:8] == 'Speech::': #print audioGlobals.greenIndex, len(audioGlobals.GreenShades)-1 if audioGlobals.greenIndex >= (len(audioGlobals.GreenShades)-1): audioGlobals.greenIndex = 0 else: audioGlobals.greenIndex = audioGlobals.greenIndex + 1 #print audioGlobals.greenIndex, shadeIndex audioGlobals.shadesAndSpeaker.append([audioGlobals.annotations[shadeIndex][2], audioGlobals.GreenShades[audioGlobals.greenIndex]]) # >> Call Classifier in case CSVFile not exists #---------------------- else: [flagsInd, classesAll, acc,CM] = aS.mtFileClassification(audioGlobals.wavFileName, os.path.abspath('audio/ClassifierMethods/svmModelTest'), 'svm', False) # declare classes [segs, classes] = aS.flags2segs(flagsInd, 1) lengthClass = len(classesAll) className = np.arange(lengthClass, dtype=np.float) for j in range(len(segs)): # no Annotation for Silence segments for i in range(len(classesAll)): if classes[j] == className[i] and classesAll[i] != 'Silence': audioGlobals.annotations.append([segs[j][0]*1000, segs[j][1]*1000, classesAll[i]]) # >> Write annotations in csv file csvFileName = audioGlobals.bagFile.replace(".bag","_audio.csv") annotationFile = open(csvFileName, 'w') write = csv.writer(annotationFile) write.writerows(audioGlobals.annotations) annotationFile.close()
def save_out(test_file): [flags_ind, classes_all, acc] = aS.mtFileClassification(test_file, args.model, "knn", False) np.save(test_file, flags_ind) return classes_all
print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0) aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0) aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp1', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 1 aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp2', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 2 print("\n\n\n * * * TEST 7 * * * \n\n\n") aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm_rbf", "temp.mod", compute_beat=False) print(aT.fileRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion/01.wav", "temp.mod", "svm_rbf")) print("\n\n\n * * * TEST 8 * * * \n\n\n") aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm", "temp.mod", compute_beat=False) print(aT.fileRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion/01.wav", "temp.mod", "svm"))