def fileSpectrogramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
def mtCNN_classification(signal, Fs, mtWin, mtStep, SingleFrame_net, channel_mean, input_transformer, classNamesCNN, input_size): mtWin2 = int(mtWin * Fs) mtStep2 = int(mtStep * Fs) stWin = 0.040 stStep = 0.005 N = len(signal) curPos = 0 count = 0 fileNames = [] flagsInd = [] Ps = [] randomString = (''.join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(5))) while (curPos < N): N1 = curPos N2 = curPos + mtWin2 + stStep * Fs if N2 > N: N2 = N xtemp = signal[int(N1):int(N2)] #print xtemp.shape #print xtemp.shape[0] # get mid-term segment if xtemp.shape[0] < 8000: curPos += mtStep2 count += 1 continue specgram, TimeAxis, FreqAxis = aF.stSpectogram( xtemp, Fs, round(Fs * stWin), round(Fs * stStep), False) # compute spectrogram specgram = cv2.resize(specgram, (input_size, input_size), interpolation=cv2.INTER_LINEAR) #specgram = scipy.misc.imresize(specgram, float(input_size) / float(specgram.shape[0]), interp='bilinear') # resize to 227 x 227 if specgram.shape[0] != specgram.shape[1]: break #print specgram.shape #specgram = scipy.misc.imresize(specgram, float(input_size) / float(specgram.shape[0]), interp='bilinear') # resize to 227 x 227 #print specgram.shape # imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram)*255)) # create image curFileName = randomString + "temp_{0:d}.png".format(count) fileNames.append(curFileName) #imSpec = rgb2gray(np.uint8(matplotlib.cm.jet(specgram)*255)) imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) * 255)) scipy.misc.imsave(curFileName, imSpec) T1 = time.time() output_classes, outputP = singleFrame_classify_video( curFileName, SingleFrame_net, input_transformer, False, classNamesCNN, input_size) T2 = time.time() os.remove(curFileName) #print T2 - T1 #flagsInd.append(classNamesCNN.index(output_classes[0])) Ps.append(np.copy(outputP[0])) #print flagsInd[-1] curPos += mtStep2 count += 1 return np.array(flagsInd), classNamesCNN, np.array(Ps)
def createSpectrogramFile(x, Fs, fileName, stWin, stStep, label): specgramOr, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * stWin), round(Fs * stStep), False) specgramOr = filter.medfilt2d(specgramOr, 5) save_path = "medfilt5_label_" + label + '/' if not os.path.exists(save_path): os.mkdir(save_path) specgram = cv2.resize(specgramOr, (227, 227), interpolation=cv2.INTER_LINEAR) im1 = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) * 255)) scipy.misc.imsave(save_path + fileName, im1)
def createSpectrogramFile(x, Fs, fileName, stWin, stStep): specgramOr, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * stWin), round(Fs * stStep), False) print specgramOr.shape if inputs[2] == 'full': print specgramOr numpy.save(fileName.replace('.png', '') + '_spectrogram', specgramOr) else: #specgram = scipy.misc.imresize(specgramOr, float(227.0) / float(specgramOr.shape[0]), interp='bilinear') specgram = cv2.resize(specgramOr, (227, 227), interpolation=cv2.INTER_LINEAR) im1 = Image.fromarray(numpy.uint8(matplotlib.cm.jet(specgram) * 255)) scipy.misc.imsave(fileName, im1)
def mtCNN_classification(signal, Fs, mtWin, mtStep, RGB_singleFrame_net, SOUND_mean_RGB, transformer_RGB, classNamesCNN): mtWin2 = int(mtWin * Fs) mtStep2 = int(mtStep * Fs) stWin = 0.020 stStep = 0.015 N = len(signal) curPos = 0 count = 0 fileNames = [] flagsInd = [] Ps = [] randomString = (''.join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(5))) while (curPos < N): # for each mid-term segment N1 = curPos N2 = curPos + mtWin2 + stStep * Fs if N2 > N: N2 = N xtemp = signal[int(N1):int(N2)] # get mid-term segment specgram, TimeAxis, FreqAxis = aF.stSpectogram( xtemp, Fs, round(Fs * stWin), round(Fs * stStep), False) # compute spectrogram if specgram.shape[0] != specgram.shape[ 1]: # TODO (this must be dynamic!) break specgram = scipy.misc.imresize( specgram, float(227.0) / float(specgram.shape[0]), interp='bilinear') # resize to 227 x 227 imSpec = Image.fromarray(np.uint8(matplotlib.cm.jet(specgram) * 255)) # create image curFileName = randomString + "temp_{0:d}.png".format(count) fileNames.append(curFileName) scipy.misc.imsave(curFileName, imSpec) T1 = time.time() output_classes, outputP = singleFrame_classify_video( curFileName, RGB_singleFrame_net, transformer_RGB, False, classNamesCNN) T2 = time.time() #print T2 - T1 flagsInd.append(classNamesCNN.index(output_classes[0])) Ps.append(outputP[0]) #print flagsInd[-1] curPos += mtStep2 count += 1 return np.array(flagsInd), classNamesCNN, np.array(Ps)
def wave_to_spectrogram(wav: WaveData, window_size, window_step) -> np.ndarray: """ Converts the given WAV data into a spectrogram. :param wav: The WAV data to convert. :param window_size: The width of the sampling window in samples. :param window_step: The step the sampling window takes between FFTs. :return: A spectrogram. """ # Convert the WAV data in a Numpy array wav_array: np.ndarray = np.array(wav.data) # Get the spectrogram using pyAudioAnalysis return aFEx.stSpectogram(wav_array, 1, window_size, window_step, False)[0]
def _generate_spectrogram(self, filename): [Fs, x] = audioBasicIO.readAudioFile(filename) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), False) return (specgram, TimeAxis, FreqAxis)
x, Fs, 0.050 * Fs, 0.025 * Fs) plt.subplot(2, 1, 1) plt.plot(F[0, :]) plt.xlabel('Frame no') plt.ylabel(f_names[0]) plt.subplot(2, 1, 2) plt.plot(F[1, :]) plt.xlabel('Frame no') plt.ylabel(f_names[1]) plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.read_audio_file(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.featureAndTrain([root_data_path + "SM/speech", root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mtFileClassification( root_data_path + "pyAudioAnalysis/data//scottish.wav",
def recordAudioSegments(BLOCKSIZE, Fs=16000, showSpectrogram=False, showChromagram=False, recordActivity=False): midTermBufferSize = int(Fs * BLOCKSIZE) print "Press Ctr+C to stop recording" startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") MEAN, STD = loadMEANS("svmMovies8classesMEANS") # load MEAN feature values pa = pyaudio.PyAudio() stream = pa.open(format=FORMAT, channels=1, rate=Fs, input=True, frames_per_buffer=midTermBufferSize) midTermBuffer = [] curWindow = [] count = 0 global allData allData = [] energy100_buffer_zero = [] curActiveWindow = numpy.array([]) timeStart = time.time() while 1: try: block = stream.read(midTermBufferSize) countB = len(block) / 2 format = "%dh" % (countB) shorts = struct.unpack(format, block) curWindow = list(shorts) midTermBuffer = midTermBuffer + curWindow # copy to midTermBuffer del (curWindow) #print len(midTermBuffer), midTermBufferSize #if len(midTermBuffer) == midTermBufferSize: # if midTermBuffer is full: if 1: elapsedTime = (time.time() - timeStart ) # time since recording started dataTime = (count + 1) * BLOCKSIZE # data-driven time # TODO # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs) # curFV = (mtF - MEAN) / STD # TODO allData += midTermBuffer midTermBuffer = numpy.double( midTermBuffer) # convert current buffer to numpy array # Compute spectrogram if showSpectrogram: (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram( midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # extract spectrogram FreqAxisS = numpy.array(FreqAxisS) # frequency axis DominantFreqs = FreqAxisS[numpy.argmax( spectrogram, axis=1 )] # most dominant frequencies (for each short-term window) maxFreq = numpy.mean( DominantFreqs) # get average most dominant freq maxFreqStd = numpy.std(DominantFreqs) # Compute chromagram if showChromagram: (chromagram, TimeAxisC, FreqAxisC) = aF.stChromagram(midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # get chromagram FreqAxisC = numpy.array( FreqAxisC) # frequency axis (12 chroma classes) DominantFreqsC = FreqAxisC[numpy.argmax( chromagram, axis=1)] # most dominant chroma classes maxFreqC = most_common(DominantFreqsC)[ 0] # get most common among all short-term windows # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(midTermBuffer + 16000, WidthPlot), WidthPlot, HeightPlot, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, statusHeight + 50) # Show spectrogram if showSpectrogram: iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8) iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET) cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', iSpec2) cv2.moveWindow('Spectrogram', 50, HeightPlot + statusHeight + 60) # Show chromagram if showChromagram: iChroma = numpy.array( (chromagram.T / chromagram.max()) * 255, dtype=numpy.uint8) iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET) cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', iChroma2) cv2.moveWindow('Chroma', 50, 2 * HeightPlot + statusHeight + 60) # Activity Detection: energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) / (midTermBuffer.shape[0] * 32000 * 32000)) if count < 10: # TODO make this param energy100_buffer_zero.append(energy100) mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) else: mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) if (energy100 < 1.2 * mean_energy100_zero): if curActiveWindow.shape[ 0] > 0: # if a sound has been detected in the previous segment: activeT2 = elapsedTime - BLOCKSIZE # set time of current active window if activeT2 - activeT1 > minActivityDuration: wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format( activeT1, activeT2) if recordActivity: wavfile.write( wavFileName, Fs, numpy.int16(curActiveWindow) ) # write current active window to file curActiveWindow = numpy.array( []) # delete current active window else: if curActiveWindow.shape[ 0] == 0: # this is a new active window! activeT1 = elapsedTime - BLOCKSIZE # set timestamp start of new active window curActiveWindow = numpy.concatenate( (curActiveWindow, midTermBuffer)) # Show status messages on Status cv winow: textIm = numpy.zeros((statusHeight, WidthPlot, 3)) statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % ( elapsedTime - dataTime) statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, statusStrFeature, (0, 22), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) if curActiveWindow.shape[0] > 0: cv2.putText(textIm, "sound", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) else: cv2.putText(textIm, "silence", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) midTermBuffer = [] ch = cv2.waitKey(10) count += 1 except IOError, e: print("(%d) Error recording: %s" % (errorcount, e))
def recordAndCalcHR(BLOCKSIZE, Fs, showSpectrogram=False, showChromagram=False, recordActivity=False): print("Press Ctr+C to stop process") startDateTimeStr = datetime.datetime.now().strftime("%Y_%m_%d_%I:%M%p") MEAN, STD = loadMEANS("svmMovies8classesMEANS") # load MEAN feature values inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) # open alsaaudio capture inp.setchannels(1) # 1 channel inp.setrate(Fs) # set sampling freq inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) # set 2-byte sample inp.setperiodsize(512) midTermBufferSize = int(Fs * BLOCKSIZE) midTermBuffer = [] curWindow = [] count = 0 global allData allData = [] energy100_buffer_zero = [] curActiveWindow = numpy.array([]) timeStart = time.time() while 1: l, data = inp.read() # read data from buffer if l: for i in range(len(data) / 2): curWindow.append(audioop.getsample(data, 2, i)) # get audio samples if (len(curWindow) + len(midTermBuffer) > midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - len( midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[ 0:samplesToCopyToMidBuffer] # copy to midTermBuffer del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer ) == midTermBufferSize: # if midTermBuffer is full: elapsedTime = (time.time() - timeStart ) # time since recording started dataTime = (count + 1) * BLOCKSIZE # data-driven time # TODO # mtF, _ = aF.mtFeatureExtraction(midTermBuffer, Fs, BLOCKSIZE * Fs, BLOCKSIZE * Fs, 0.050 * Fs, 0.050 * Fs) # curFV = (mtF - MEAN) / STD # TODO allData += midTermBuffer midTermBuffer = numpy.double( midTermBuffer) # convert current buffer to numpy array # Compute spectrogram if showSpectrogram: (spectrogram, TimeAxisS, FreqAxisS) = aF.stSpectogram( midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # extract spectrogram FreqAxisS = numpy.array(FreqAxisS) # frequency axis DominantFreqs = FreqAxisS[numpy.argmax( spectrogram, axis=1 )] # most dominant frequencies (for each short-term window) maxFreq = numpy.mean( DominantFreqs) # get average most dominant freq maxFreqStd = numpy.std(DominantFreqs) # Compute chromagram if showChromagram: (chromagram, TimeAxisC, FreqAxisC) = aF.stChromagram(midTermBuffer, Fs, 0.020 * Fs, 0.02 * Fs) # get chromagram FreqAxisC = numpy.array( FreqAxisC) # frequency axis (12 chroma classes) DominantFreqsC = FreqAxisC[numpy.argmax( chromagram, axis=1)] # most dominant chroma classes maxFreqC = most_common(DominantFreqsC)[ 0] # get most common among all short-term windows # Plot signal window signalPlotCV = plotCV( scipy.signal.resample(midTermBuffer + 16000, WidthPlot), WidthPlot, HeightPlot, 32000) cv2.imshow('Signal', signalPlotCV) cv2.moveWindow('Signal', 50, statusHeight + 50) # Show spectrogram if showSpectrogram: iSpec = numpy.array(spectrogram.T * 255, dtype=numpy.uint8) iSpec2 = cv2.resize(iSpec, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iSpec2 = cv2.applyColorMap(iSpec2, cv2.COLORMAP_JET) cv2.putText(iSpec2, "maxFreq: %.0f Hz" % maxFreq, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Spectrogram', iSpec2) cv2.moveWindow('Spectrogram', 50, HeightPlot + statusHeight + 60) # Show chromagram if showChromagram: iChroma = numpy.array( (chromagram.T / chromagram.max()) * 255, dtype=numpy.uint8) iChroma2 = cv2.resize(iChroma, (WidthPlot, HeightPlot), interpolation=cv2.INTER_CUBIC) iChroma2 = cv2.applyColorMap(iChroma2, cv2.COLORMAP_JET) cv2.putText(iChroma2, "maxFreqC: %s" % maxFreqC, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.imshow('Chroma', iChroma2) cv2.moveWindow('Chroma', 50, 2 * HeightPlot + statusHeight + 60) # Activity Detection: energy100 = (100 * numpy.sum(midTermBuffer * midTermBuffer) / (midTermBuffer.shape[0] * 32000 * 32000)) if count < 10: # TODO make this param energy100_buffer_zero.append(energy100) mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) else: mean_energy100_zero = numpy.mean( numpy.array(energy100_buffer_zero)) if (energy100 < 1.2 * mean_energy100_zero): if curActiveWindow.shape[ 0] > 0: # if a sound has been detected in the previous segment: activeT2 = elapsedTime - BLOCKSIZE # set time of current active window if activeT2 - activeT1 > minActivityDuration: wavFileName = startDateTimeStr + "_activity_{0:.2f}_{1:.2f}.wav".format( activeT1, activeT2) if recordActivity: wavfile.write( wavFileName, Fs, numpy.int16(curActiveWindow) ) # write current active window to file curActiveWindow = numpy.array( []) # delete current active window else: if curActiveWindow.shape[ 0] == 0: # this is a new active window! activeT1 = elapsedTime - BLOCKSIZE # set timestamp start of new active window curActiveWindow = numpy.concatenate( (curActiveWindow, midTermBuffer)) # Show status messages on Status cv winow: textIm = numpy.zeros((statusHeight, WidthPlot, 3)) statusStrTime = "time: %.1f sec" % elapsedTime + " - data time: %.1f sec" % dataTime + " - loss : %.1f sec" % ( elapsedTime - dataTime) statusStrFeature = "ene1:%.1f" % energy100 + " eneZero:%.1f" % mean_energy100_zero cv2.putText(textIm, statusStrTime, (0, 11), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) cv2.putText(textIm, statusStrFeature, (0, 22), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 200)) if curActiveWindow.shape[0] > 0: cv2.putText(textIm, "sound", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) else: cv2.putText(textIm, "silence", (0, 33), cv2.FONT_HERSHEY_PLAIN, 1, (200, 200, 220)) cv2.imshow("Status", textIm) cv2.moveWindow("Status", 50, 0) midTermBuffer = [] ch = cv2.waitKey(10) count += 1
"""! @brief Example 04 @details pyAudioAnalysis spectrogram calculation and visualization example @author Theodoros Giannakopoulos {[email protected]} """ import numpy as np import scipy.io.wavfile as wavfile import plotly import plotly.graph_objs as go from pyAudioAnalysis import audioFeatureExtraction as aF layout = go.Layout( title='Spectrogram Extraction Example using pyAudioAnalysis', xaxis=dict(title='time (sec)', ), yaxis=dict(title='Freqs (Hz)', )) def normalize_signal(signal): signal = np.double(signal) signal = signal / (2.0**15) return (signal - signal.mean()) / ((np.abs(signal)).max() + 0.0000000001) if __name__ == '__main__': [Fs, s] = wavfile.read("../data/sample_music.wav") s = normalize_signal(s) [S, t, f] = aF.stSpectogram(s, Fs, int(Fs * 0.020), int(Fs * 0.020)) heatmap = go.Heatmap(z=S.T, y=f, x=t) plotly.offline.plot(go.Figure(data=[heatmap], layout=layout), filename="temp.html", auto_open=True)
def main(argv): dirName = argv[1] types = ('*.wav', ) filesList = [] for files in types: filesList.extend(glob.glob(os.path.join(dirName, files))) filesList = sorted(filesList) WIDTH_SEC = 2.4 stWin = 0.020 stStep = 0.015 WIDTH = WIDTH_SEC / stStep for f in filesList: [Fs, x] = audioBasicIO.readAudioFile(f) print(Fs) x = audioBasicIO.stereo2mono(x) specgramOr, TimeAxis, FreqAxis = aF.stSpectogram( x, Fs, round(Fs * stWin), round(Fs * stStep), False) if specgramOr.shape[0] > WIDTH: specgram = specgramOr[int(specgramOr.shape[0] / 2) - WIDTH / 2:int(specgramOr.shape[0] / 2) + WIDTH / 2, :] specgram = scipy.misc.imresize(specgram, float(227.0) / float(specgram.shape[0]), interp='bilinear') print specgram.shape im = Image.fromarray(numpy.uint8( matplotlib.cm.jet(specgram) * 255)) #plt.imshow(im) scipy.misc.imsave(f.replace(".wav", ".jpg"), im) if int(specgramOr.shape[0] / 2) - WIDTH / 2 - int( (0.2) / stStep) > 0: specgram = specgramOr[ int(specgramOr.shape[0] / 2) - WIDTH / 2 - int((0.2) / stStep):int(specgramOr.shape[0] / 2) + WIDTH / 2 - int((0.2) / stStep), :] specgram = scipy.misc.imresize(specgram, float(227.0) / float(specgram.shape[0]), interp='bilinear') im = Image.fromarray( numpy.uint8(matplotlib.cm.jet(specgram) * 255)) print specgram.shape scipy.misc.imsave(f.replace(".wav", "_02A.jpg"), im) specgram = specgramOr[ int(specgramOr.shape[0] / 2) - WIDTH / 2 + int((0.2) / stStep):int(specgramOr.shape[0] / 2) + WIDTH / 2 + int((0.2) / stStep), :] specgram = scipy.misc.imresize(specgram, float(227.0) / float(specgram.shape[0]), interp='bilinear') print specgram.shape im = Image.fromarray( numpy.uint8(matplotlib.cm.jet(specgram) * 255)) scipy.misc.imsave(f.replace(".wav", "_02B.jpg"), im) # ONLY FOR SPEECH (fewer samples). Must comment for music """specgram = specgramOr[int(specgramOr.shape[0]/2) - WIDTH/2 - int((0.1) / stStep):int(specgramOr.shape[0]/2) + WIDTH/2 - int((0.1) / stStep), :]
from pyAudioAnalysis import audioTrainTest as aT from pyAudioAnalysis import audioSegmentation as aS import matplotlib.pyplot as plt root_data_path = "/Users/tyiannak/ResearchData/Audio Dataset/pyAudioAnalysisData/" print("\n\n\n * * * TEST 1 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/count.wav"); F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs); plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[0]); plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[1]); plt.show() print("\n\n\n * * * TEST 2 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 3 * * * \n\n\n") [Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav") x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) print("\n\n\n * * * TEST 4 * * * \n\n\n") aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True) print("\n\n\n * * * TEST 5 * * * \n\n\n") [flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments') print("\n\n\n * * * TEST 6 * * * \n\n\n") aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0) aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0)