def classifyFolderWrapper(inputFolder, modelType, modelName, outputMode=False): if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName) PsAll = numpy.zeros((len(classNames), )) files = "*.wav" if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Fs, x] = audioBasicIO.readAudioFile(wavFile) signalLength = x.shape[0] / float(Fs) [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) PsAll += (numpy.array(P) * signalLength) Result = int(Result) Results.append(Result) if outputMode: print "{0:s}\t{1:s}".format(wavFile,classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1)) if outputMode: for i,h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) PsAll = PsAll / numpy.sum(PsAll) if outputMode: fig = plt.figure() ax = fig.add_subplot(111) plt.title("Classes percentage " + inputFolder.replace('Segments','')) ax.axis((0, len(classNames)+1, 0, 1)) ax.set_xticks(numpy.array(range(len(classNames)+1))) ax.set_xticklabels([" "] + classNames) ax.bar(numpy.array(range(len(classNames)))+0.5, PsAll) plt.show() return classNames, PsAll
def getMusicSegmentsFromFile(inputFile): modelType = "svm" modelName = "data/svmMovies8classes" dirOutput = inputFile[0:-4] + "_musicSegments" if os.path.exists(dirOutput) and dirOutput!=".": shutil.rmtree(dirOutput) os.makedirs(dirOutput) [Fs, x] = audioBasicIO.readAudioFile(inputFile) if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName) flagsInd, classNames, acc = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "") segs, classes = aS.flags2segs(flagsInd, mtStep) for i, s in enumerate(segs): if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration): strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1]) wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
def getMusicSegmentsFromFile(inputFile): modelType = "svm" modelName = "data/svmMovies8classes" dirOutput = inputFile[0:-4] + "_musicSegments" if os.path.exists(dirOutput) and dirOutput != ".": shutil.rmtree(dirOutput) os.makedirs(dirOutput) [Fs, x] = audioBasicIO.readAudioFile(inputFile) if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadKNNModel(modelName) flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults=False, gtFile="") segs, classes = aS.flags2segs(flagsInd, mtStep) for i, s in enumerate(segs): if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration): strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput + os.sep, s[0], s[1]) wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])])
def mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = ""): ''' This function performs mid-term classification of an audio stream. Towards this end, supervised knowledge is used, i.e. a pre-trained classifier. ARGUMENTS: - inputFile: path of the input WAV file - modelName: name of the classification model - modelType: svm or knn depending on the classifier type - plotResults: True if results are to be plotted using matplotlib along with a set of statistics RETURNS: - segs: a sequence of segment's endpoints: segs[i] is the endpoint of the i-th segment (in seconds) - classes: a sequence of class flags: class[i] is the class ID of the i-th segment ''' if not os.path.isfile(modelName): print "mtFileClassificationError: input modelType not found!" return (-1,-1,-1) # Load classifier: if modelType=='svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName) elif modelType=='knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName) if computeBEAT: print "Model " + modelName + " contains long-term music features (beat etc) and cannot be used in segmentation" return (-1,-1,-1) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # load input file if Fs == -1: # could not read file return (-1,-1,-1) x = audioBasicIO.stereo2mono(x); # convert stereo (if) to mono Duration = len(x) / Fs # mid-term feature extraction: [MidTermFeatures, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs*stWin), round(Fs*stStep)); flags = []; Ps = []; flagsInd = [] for i in range(MidTermFeatures.shape[1]): # for each feature vector (i.e. for each fix-sized segment): curFV = (MidTermFeatures[:, i] - MEAN) / STD; # normalize current feature vector [Result, P] = aT.classifierWrapper(Classifier, modelType, curFV) # classify vector flagsInd.append(Result) flags.append(classNames[int(Result)]) # update class label matrix Ps.append(numpy.max(P)) # update probability matrix flagsInd = numpy.array(flagsInd) # 1-window smoothing for i in range(1, len(flagsInd)-1): if flagsInd[i-1]==flagsInd[i+1]: flagsInd[i] = flagsInd[i+1] (segs, classes) = flags2segs(flags, mtStep) # convert fix-sized flags to segments and classes segs[-1] = len(x) / float(Fs) # Load grount-truth: if os.path.isfile(gtFile): [segStartGT, segEndGT, segLabelsGT] = readSegmentGT(gtFile) flagsGT, classNamesGT = segs2flags(segStartGT, segEndGT, segLabelsGT, mtStep) flagsIndGT = [] for j, fl in enumerate(flagsGT): # "align" labels with GT if classNamesGT[flagsGT[j]] in classNames: flagsIndGT.append( classNames.index( classNamesGT[flagsGT[j]] ) ) else: flagsIndGT.append( -1 ) flagsIndGT = numpy.array(flagsIndGT) else: flagsIndGT = numpy.array([]) acc = plotSegmentationResults(flagsInd, flagsIndGT, classNames, mtStep, not plotResults) if acc>=0: print "Overall Accuracy: {0:.3f}".format(acc) return (flagsInd, classNames, acc)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType): ''' recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType) This function is used to record and analyze audio segments, in a fix window basis. ARGUMENTS: - duration total recording duration - outputWavFile path of the output WAV file - midTermBufferSizeSec (fix)segment length in seconds - modelName classification model name - modelType classification model type ''' if modelType == 'neuralnet': neuralNetClassidication(duration, midTermBufferSizeSec, modelName) else: if modelType == 'svm': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = aT.loadKNNModel(modelName) else: Classifier = None inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) inp.setchannels(1) inp.setrate(Fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(512) midTermBufferSize = int(midTermBufferSizeSec * Fs) allData = [] midTermBuffer = [] curWindow = [] count = 0 # a sequence of samples # process a sequence # speed # emergency vehicle detection what have they done? emergency vehicle classification patents # plot features!!! # patents extracted! # latex literature review # writing a paper # while len(allData) < duration * Fs: # Read data from device l, data = inp.read() if l: for i in range(l): curWindow.append(audioop.getsample(data, 2, i)) if (len(curWindow) + len(midTermBuffer) > midTermBufferSize): samplesToCopyToMidBuffer = midTermBufferSize - len( midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[ 0:samplesToCopyToMidBuffer] del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer) == midTermBufferSize: count += 1 if Classifier != None: [mtFeatures, stFeatures ] = aF.mtFeatureExtraction(midTermBuffer, Fs, 2.0 * Fs, 2.0 * Fs, 0.020 * Fs, 0.020 * Fs) curFV = (mtFeatures[:, 0] - MEAN) / STD [result, P] = aT.classifierWrapper(Classifier, modelType, curFV) print classNames[int(result)] allData = allData + midTermBuffer plt.clf() plt.plot(midTermBuffer) plt.show(block=False) plt.draw() midTermBuffer = [] allDataArray = numpy.int16(allData) wavfile.write(outputWavFile, Fs, allDataArray)
def recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType): """ recordAnalyzeAudio(duration, outputWavFile, midTermBufferSizeSec, modelName, modelType) This function is used to record and analyze audio segments, in a fix window basis. ARGUMENTS: - duration total recording duration - outputWavFile path of the output WAV file - midTermBufferSizeSec (fix)segment length in seconds - modelName classification model name - modelType classification model type """ if modelType == "svm": [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadSVModel(modelName) elif modelType == "knn": [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = aT.loadKNNModel(modelName) else: Classifier = None inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NONBLOCK) inp.setchannels(1) inp.setrate(Fs) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(512) midTermBufferSize = int(midTermBufferSizeSec * Fs) allData = [] midTermBuffer = [] curWindow = [] count = 0 while len(allData) < duration * Fs: # Read data from device l, data = inp.read() if l: for i in range(l): curWindow.append(audioop.getsample(data, 2, i)) if len(curWindow) + len(midTermBuffer) > midTermBufferSize: samplesToCopyToMidBuffer = midTermBufferSize - len(midTermBuffer) else: samplesToCopyToMidBuffer = len(curWindow) midTermBuffer = midTermBuffer + curWindow[0:samplesToCopyToMidBuffer] del (curWindow[0:samplesToCopyToMidBuffer]) if len(midTermBuffer) == midTermBufferSize: count += 1 if Classifier != None: [mtFeatures, stFeatures] = aF.mtFeatureExtraction( midTermBuffer, Fs, 2.0 * Fs, 2.0 * Fs, 0.020 * Fs, 0.020 * Fs ) curFV = (mtFeatures[:, 0] - MEAN) / STD [result, P] = aT.classifierWrapper(Classifier, modelType, curFV) print classNames[int(result)] allData = allData + midTermBuffer plt.clf() plt.plot(midTermBuffer) plt.show(block=False) plt.draw() midTermBuffer = [] allDataArray = numpy.int16(allData) wavfile.write(outputWavFile, Fs, allDataArray)