def fileChromagramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
plt.xlabel('Frame no') plt.ylabel(f_names[1]) plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.featureAndTrain([root_data_path + "SM/speech", root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mtFileClassification( root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0)
def recordAudioSegments(BLOCKSIZE, Fs=16000, showSpectrogram=False, showChromagram=False, recordActivity=False): midTermBufferSize = int(Fs * BLOCKSIZE) print "Press Ctr+C to stop recording" startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") MEAN, STD = loadMEANS("svmMovies8classesMEANS") # load MEAN feature values pa = pyaudio.PyAudio() stream = pa.open(format=FORMAT, channels=1, rate=Fs, input=True, frames_per_buffer=midTermBufferSize) midTermBuffer = [] curWindow = [] count = 0 global allData allData = [] energy100_buffer_zero = [] curActiveWindow = numpy.array([]) timeStart = time.time() while 1: try: block = stream.read(midTermBufferSize) countB = len(block) / 2 format = "%dh" % (countB) shorts = struct.unpack(format, block) curWindow = list(shorts) midTermBuffer = midTermBuffer + curWindow # copy to midTermBuffer del (curWindow) #print len(midTermBuffer), midTermBufferSize #if len(midTermBuffer) == midTermBufferSize: # if midTermBuffer is full: if 1: elapsedTime = (time.time() - timeStart ) # time since recording started dataTime = (count + 1) * BLOCKSIZE # data-driven time # TODO # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs) # curFV = (mtF - MEAN) / STD # TODO allData += midTermBuffer midTermBuffer = numpy.double( midTermBuffer) # convert current buffer to numpy array # Compute spectrogram if showSpectrogram: (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram( midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # extract spectrogram FreqAxisS = numpy.array(FreqAxisS) # frequency axis DominantFreqs = FreqAxisS[numpy.argmax( spectrogram, axis=1 )] # most dominant frequencies (for each short-term window) maxFreq = numpy.mean( DominantFreqs) # get average most dominant freq maxFreqStd = numpy.std(DominantFreqs) # Compute chromagram if showChromagram: (chromagram, TimeAxisC, FreqAxisC) = aF.stChromagram(midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # get chromagram FreqAxisC = numpy.array( FreqAxisC) # frequency axis (12 chroma classes) DominantFreqsC = FreqAxisC[numpy.argmax( chromagram, axis=1)] # most dominant chroma classes maxFreqC = most_common(DominantFreqsC)[ 0] # get most common among all short-term windows # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(midTermBuffer + 16000, WidthPlot), WidthPlot, HeightPlot, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, statusHeight + 50) # Show spectrogram if showSpectrogram: iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8) iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET) cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', iSpec2) cv2.moveWindow('Spectrogram', 50, HeightPlot + statusHeight + 60) # Show chromagram if showChromagram: iChroma = numpy.array( (chromagram.T / chromagram.max()) * 255, dtype=numpy.uint8) iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET) cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', iChroma2) cv2.moveWindow('Chroma', 50, 2 * HeightPlot + statusHeight + 60) # Activity Detection: energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) / (midTermBuffer.shape[0] * 32000 * 32000)) if count < 10: # TODO make this param energy100_buffer_zero.append(energy100) mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) else: mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) if (energy100 < 1.2 * mean_energy100_zero): if curActiveWindow.shape[ 0] > 0: # if a sound has been detected in the previous segment: activeT2 = elapsedTime - BLOCKSIZE # set time of current active window if activeT2 - activeT1 > minActivityDuration: wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format( activeT1, activeT2) if recordActivity: wavfile.write( wavFileName, Fs, numpy.int16(curActiveWindow) ) # write current active window to file curActiveWindow = numpy.array( []) # delete current active window else: if curActiveWindow.shape[ 0] == 0: # this is a new active window! activeT1 = elapsedTime - BLOCKSIZE # set timestamp start of new active window curActiveWindow = numpy.concatenate( (curActiveWindow, midTermBuffer)) # Show status messages on Status cv winow: textIm = numpy.zeros((statusHeight, WidthPlot, 3)) statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % ( elapsedTime - dataTime) statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, statusStrFeature, (0, 22), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) if curActiveWindow.shape[0] > 0: cv2.putText(textIm, "sound", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) else: cv2.putText(textIm, "silence", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) midTermBuffer = [] ch = cv2.waitKey(10) count += 1 except IOError, e: print("(%d) Error recording: %s" % (errorcount, e))
def recordAndCalcHR(BLOCKSIZE, Fs, showSpectrogram=False, showChromagram=False, recordActivity=False): print("Press Ctr+C to stop process") startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") MEAN, STD = loadMEANS("svmMovies8classesMEANS") # load MEAN feature values inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) # open alsaaudio capture inp.setchannels(1) # 1 channel inp.setrate(Fs) # set sampling freq inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) # set 2-byte sample inp.setperiodsize(512) midTermBufferSize = int(Fs * BLOCKSIZE) midTermBuffer = [] curWindow = [] count = 0 global allData allData = [] energy100_buffer_zero = [] curActiveWindow = numpy.array([]) timeStart = time.time() while 1: l, data = inp.read() # read data from buffer if l: for i in range(len(data) / 2): curWindow.append(audioop.getsample(data, 2, i)) # get audio samples if (len(curWindow) + len(midTermBuffer) > midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - len( midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[ 0:samplesToCopyToMidBuffer] # copy to midTermBuffer del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer ) == midTermBufferSize: # if midTermBuffer is full: elapsedTime = (time.time() - timeStart ) # time since recording started dataTime = (count + 1) * BLOCKSIZE # data-driven time # TODO # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs) # curFV = (mtF - MEAN) / STD # TODO allData += midTermBuffer midTermBuffer = numpy.double( midTermBuffer) # convert current buffer to numpy array # Compute spectrogram if showSpectrogram: (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram( midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # extract spectrogram FreqAxisS = numpy.array(FreqAxisS) # frequency axis DominantFreqs = FreqAxisS[numpy.argmax( spectrogram, axis=1 )] # most dominant frequencies (for each short-term window) maxFreq = numpy.mean( DominantFreqs) # get average most dominant freq maxFreqStd = numpy.std(DominantFreqs) # Compute chromagram if showChromagram: (chromagram, TimeAxisC, FreqAxisC) = aF.stChromagram(midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # get chromagram FreqAxisC = numpy.array( FreqAxisC) # frequency axis (12 chroma classes) DominantFreqsC = FreqAxisC[numpy.argmax( chromagram, axis=1)] # most dominant chroma classes maxFreqC = most_common(DominantFreqsC)[ 0] # get most common among all short-term windows # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(midTermBuffer + 16000, WidthPlot), WidthPlot, HeightPlot, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, statusHeight + 50) # Show spectrogram if showSpectrogram: iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8) iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET) cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', iSpec2) cv2.moveWindow('Spectrogram', 50, HeightPlot + statusHeight + 60) # Show chromagram if showChromagram: iChroma = numpy.array( (chromagram.T / chromagram.max()) * 255, dtype=numpy.uint8) iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET) cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', iChroma2) cv2.moveWindow('Chroma', 50, 2 * HeightPlot + statusHeight + 60) # Activity Detection: energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) / (midTermBuffer.shape[0] * 32000 * 32000)) if count < 10: # TODO make this param energy100_buffer_zero.append(energy100) mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) else: mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) if (energy100 < 1.2 * mean_energy100_zero): if curActiveWindow.shape[ 0] > 0: # if a sound has been detected in the previous segment: activeT2 = elapsedTime - BLOCKSIZE # set time of current active window if activeT2 - activeT1 > minActivityDuration: wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format( activeT1, activeT2) if recordActivity: wavfile.write( wavFileName, Fs, numpy.int16(curActiveWindow) ) # write current active window to file curActiveWindow = numpy.array( []) # delete current active window else: if curActiveWindow.shape[ 0] == 0: # this is a new active window! activeT1 = elapsedTime - BLOCKSIZE # set timestamp start of new active window curActiveWindow = numpy.concatenate( (curActiveWindow, midTermBuffer)) # Show status messages on Status cv winow: textIm = numpy.zeros((statusHeight, WidthPlot, 3)) statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % ( elapsedTime - dataTime) statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, statusStrFeature, (0, 22), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) if curActiveWindow.shape[0] > 0: cv2.putText(textIm, "sound", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) else: cv2.putText(textIm, "silence", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) midTermBuffer = [] ch = cv2.waitKey(10) count += 1
"""! @brief Example 11 @details pyAudioAnalysis chromagram example @author Theodoros Giannakopoulos {[email protected]} """ import plotly import plotly.graph_objs as go from pyAudioAnalysis import audioFeatureExtraction as aF from pyAudioAnalysis import audioBasicIO as aIO layout = go.Layout(title='Chromagram example for doremi.wav signal', xaxis=dict(title='time (sec)',), yaxis=dict(title='Chroma Name',)) if __name__ == '__main__': win = 0.04 fp = "../data/doremi.wav" # music sample # read machine sound fs, s = aIO.readAudioFile(fp) fs = float(fs) dur1 = len(s) / float(fs) spec, time, freq = aF.stChromagram(s, fs, int(fs * win), int(fs * win), False) heatmap = go.Heatmap(z=spec.T, y=freq, x=time) plotly.offline.plot(go.Figure(data=[heatmap], layout=layout), filename="temp.html", auto_open=True)
print("\n\n\n * * * TEST 1 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/count.wav"); F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs); plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[0]); plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[1]); plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0) aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0) aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp1', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 1 aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp2', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments') # test 2 print("\n\n\n * * * TEST 7 * * * \n\n\n") aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm_rbf", "temp.mod", compute_beat=False)