def thumbnailWrapper(inputFile, thumbnailWrapperSize): st_window = 0.5 st_step = 0.5 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(inputFile) if fs == -1: # could not read file return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, fs, st_window, st_step, thumbnailWrapperSize) # write thumbnailWrappers to WAV files: if inputFile.endswith(".wav"): thumbnailWrapperFileName1 = inputFile.replace(".wav", "_thumb1.wav") thumbnailWrapperFileName2 = inputFile.replace(".wav", "_thumb2.wav") if inputFile.endswith(".mp3"): thumbnailWrapperFileName1 = inputFile.replace(".mp3", "_thumb1.mp3") thumbnailWrapperFileName2 = inputFile.replace(".mp3", "_thumb2.mp3") wavfile.write(thumbnailWrapperFileName1, fs, x[int(fs * A1):int(fs * A2)]) wavfile.write(thumbnailWrapperFileName2, fs, x[int(fs * B1):int(fs * B2)]) print("1st thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName1, A1, A2)) print("2nd thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \ " -- {2:4.1f}sec".format(thumbnailWrapperFileName2, B1, B2)) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect="auto") plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / st_step + A2 / st_step) / 2.0 Ycenter = (B1 / st_step + B2 / st_step) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailWrapperSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1/ st_step, Smatrix.shape[0]], [A1/ st_step, A1/ st_step], color="k", linestyle="--", linewidth=2) plt.plot([B2/ st_step, Smatrix.shape[0]], [A2/ st_step, A2/ st_step], color="k", linestyle="--", linewidth=2) plt.plot([B1/ st_step, B1/ st_step], [A1/ st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.plot([B2/ st_step, B2/ st_step], [A2/ st_step, Smatrix.shape[0]], color="k", linestyle="--", linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel("frame no") plt.ylabel("frame no") plt.title("Self-similarity matrix") plt.show()
def extractFeatures(filePath, fileName, music_genre=None, window=0.05, step=0.05, thumbnailSize=20, playDuration=20): music = {} music["fileName"] = fileName music["filePath"] = filePath music["genre"] = music_genre #music["fileInfo"]=[] [fs, x] = pyab.readAudioFile(filePath + fileName) if thumbnailSize is not None: [A1, A2, B1, B2, S] = pyas.musicThumbnailing(x, fs, thumbnailSize) B = A1 E = B + playDuration audio = AudioSegment.from_file(filePath + fileName) music["audio"] = audio[int(1000 * B):int(1000 * E)] else: B = 0 E = B + playDuration audio = AudioSegment.from_file(filePath + fileName) music["audio"] = audio[int(1000 * B):int(1000 * E)] x = pyab.stereo2mono(x) x = x[int(fs * B):int(fs * E)] music["thumbNail"] = (B, E) music["fileData"] = x music["stFeatures"] = pyaf.stFeatureExtraction(x, fs, window * fs, step * fs) music["FrameRate"] = fs #music["classifierResult"] = None #music["UserResult"] = None return music
"""! @brief Example 30 @details: Music thumbnailing example @author Theodoros Giannakopoulos {[email protected]} """ import os, readchar, matplotlib.pyplot as plt, matplotlib from pyAudioAnalysis.audioBasicIO import readAudioFile, stereo2mono from pyAudioAnalysis.audioSegmentation import musicThumbnailing if __name__ == '__main__': # read signal and get normalized segment features: input_file = "../data/song2.mp3" fs, x = readAudioFile(input_file) x = stereo2mono(x) win = 0.5 [A1, A2, B1, B2, Smatrix] = musicThumbnailing(x, fs, win, win, 20) os.system("avconv -i {} -ss {} -t {} thumb1.wav " "-loglevel panic -y".format(input_file, A1, A2 - A1)) os.system("avconv -i {} -ss {} -t {} thumb2.wav " "-loglevel panic -y".format(input_file, B1, B2 - B1)) print("Playing thumbnail 1") os.system("play thumb1.wav -q") readchar.readchar() print("Playing thumbnail 2") os.system("play thumb2.wav -q") fig = plt.figure() ax = fig.add_subplot(111, aspect="auto") plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / win + A2 / win) / 2.0 Ycenter = (B1 / win + B2 / win) / 2.0
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction( x, Fs, 0.050 * Fs, 0.050 * Fs) t2 = time.clock() perTime1 = duration / (t2 - t1) print "short-term feature extraction: {0:.1f} x realtime".format( perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("snakehit.wav", "svmSM", "svm") t2 = time.clock() perTime1 = duration / (t2 - t1) print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification("snakehit.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('snakehit.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2 - t1) print "HMM-based classification - segmentation \t {0:.1f} x realtime".format( perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav") segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow=1.0, Weight=0.3, plot=False) t2 = time.clock() perTime1 = duration / (t2 - t1) print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("snakehit.wav", 4, LDAdim=0, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("snakehit.wav", 4, PLOT=False) t2 = time.clock() perTime1 = duration1 / (t2 - t1) print "Diarization \t {0:.1f} x realtime".format(perTime1)
def main(argv): if argv[1] == "-shortTerm": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs); t2 = time.clock() perTime1 = duration / (t2-t1); print "short-term feature extraction: {0:.1f} x realtime".format(perTime1) elif argv[1] == "-classifyFile": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aT.fileClassification("diarizationExample.wav", "svmSM","svm") t2 = time.clock() perTime1 = duration / (t2-t1); print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-mtClassify": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [flagsInd, classesAll, acc] = aS.mtFileClassification("diarizationExample.wav", "svmSM", "svm", False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-hmmSegmentation": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() aS.hmmSegmentation('diarizationExample.wav', 'hmmRadioSM', False, '') t2 = time.clock() perTime1 = duration / (t2-t1); print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-silenceRemoval": for i in range(nExp): [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); duration = x.shape[0] / float(Fs) t1 = time.clock() [Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav"); segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow = 1.0, Weight = 0.3, plot = False) t2 = time.clock() perTime1 = duration / (t2-t1); print "Silence removal \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-thumbnailing": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0) # find thumbnail endpoints t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Thumbnail \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-noLDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, LDAdim = 0, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1) elif argv[1] == "-diarization-LDA": for i in range(nExp): [Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav") duration1 = x1.shape[0] / float(Fs1) t1 = time.clock() aS.speakerDiarization("diarizationExample.wav", 4, PLOT = False) t2 = time.clock() perTime1 = duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1)