def fileClassification(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(modelName): print "fileClassification: input modelName not found!" return (-1, -1, -1) if not os.path.isfile(inputFile): print "fileClassification: wav file not found!" return (-1, -1, -1) if modelType == 'svm': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(modelName) elif modelType == 'knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(modelName) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean(axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classifierWrapper(Classifier, modelType, curFV) # classification return Result, P, classNames
def loaded_soundClassification(sound, Fs, model_type, *args): # Load classifier: classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat = args x = numpy.fromstring(sound, numpy.int16) if x.shape[0] / float(Fs) <= mt_win: return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.readAudioFile(wav_file) F, _ = aF.stFeatureExtraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beatExtraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def beatExtractionWrapper(wavFileName, plot): if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs) BPM, ratio = aF.beatExtraction(F, 0.050, plot) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio)
def fileRegression(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(inputFile): print "fileClassification: wav file not found!" return (-1, -1, -1) regressionModels = glob.glob(modelName + "_*") regressionModels2 = [] for r in regressionModels: if r[-5::] != "MEANS": regressionModels2.append(r) regressionModels = regressionModels2 regressionNames = [] for r in regressionModels: regressionNames.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mtWin, etc) if modelType == 'svm': [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(regressionModels[0], True) elif modelType == 'knn': [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(regressionModels[0], True) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean( axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) # REGRESSION R = [] for ir, r in enumerate(regressionModels): if not os.path.isfile(r): print "fileClassification: input modelName not found!" return (-1, -1, -1) if modelType == 'svm': [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(r, True) elif modelType == 'knn': [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(r, True) curFV = (MidTermFeatures - MEAN) / STD # normalization R.append(regressionWrapper(Model, modelType, curFV)) # classification return R, regressionNames
def fileRegression(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: if r[-5::] != "MEANS": regression_models2.append(r) regression_models = regression_models2 regression_names = [] for r in regression_models: regression_names.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mt_win, etc) if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(regression_models[0], True) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) # REGRESSION R = [] for ir, r in enumerate(regression_models): if not os.path.isfile(r): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'svm' or model_type == "svm_rbf" \ or model_type == 'randomforest': [model, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat] = \ load_model(r, True) curFV = (mt_features - MEAN) / STD # normalization R.append(regressionWrapper(model, model_type, curFV)) # classification return R, regression_names
def featureExtraction(dirName): types = (dirName + os.sep + '*.wav', ) # the tuple of file types filesToProcess = [] for files in types: filesToProcess.extend(glob.glob(files)) for f in filesToProcess: print f [Fs, x] = audioBasicIO.readAudioFile(f) Mt, St = aF.mtFeatureExtraction(x, Fs, 1 * Fs, 1 * Fs, 0.500 * Fs, 0.500 * Fs) F = St BPM, ratio = aF.beatExtraction(F, 0.100, False) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio) print("Storing features to monogodb") storeFeaturesToMongoDb(F, BPM, f)
def fileClassification(inputFile, model_name, model_type): # Load classifier: if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if not os.path.isfile(inputFile): print("fileClassification: wav file not found!") return (-1, -1, -1) if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) [Fs, x] = audioBasicIO.readAudioFile( inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mt_win: return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def fileRegression(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(inputFile): print "fileClassification: wav file not found!" return (-1, -1, -1) regressionModels = glob.glob(modelName + "_*") regressionModels2 = [] for r in regressionModels: if r[-5::] != "MEANS": regressionModels2.append(r) regressionModels = regressionModels2 regressionNames = [] for r in regressionModels: regressionNames.append(r[r.rfind("_") + 1::]) # FEATURE EXTRACTION # LOAD ONLY THE FIRST MODEL (for mtWin, etc) if modelType == 'svm': [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(regressionModels[0], True) elif modelType == 'knn': [_, _, _, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(regressionModels[0], True) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean(axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) # REGRESSION R = [] for ir, r in enumerate(regressionModels): if not os.path.isfile(r): print "fileClassification: input modelName not found!" return (-1, -1, -1) if modelType == 'svm': [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(r, True) elif modelType == 'knn': [Model, MEAN, STD, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(r, True) curFV = (MidTermFeatures - MEAN) / STD # normalization R.append(regressionWrapper(Model, modelType, curFV)) # classification return R, regressionNames
def fileClassification(inputFile, modelName, modelType): # Load classifier: if not os.path.isfile(modelName): print "fileClassification: input modelName not found!" return (-1, -1, -1) if not os.path.isfile(inputFile): print "fileClassification: wav file not found!" return (-1, -1, -1) if (modelType) == 'svm' or (modelType == 'svm_rbf'): [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadSVModel(modelName) elif modelType == 'knn': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadKNNModel(modelName) elif modelType == 'randomforest': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadRandomForestModel(modelName) elif modelType == 'gradientboosting': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadGradientBoostingModel(modelName) elif modelType == 'extratrees': [Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT] = loadExtraTreesModel(modelName) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mtWin: return (-1, -1, -1) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean(axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classifierWrapper(Classifier, modelType, curFV) # classification return Result, P, classNames
def soundClassification(sound, Fs, model_name, model_type): # Load classifier: if not os.path.isfile(model_name): print("fileClassification: input model_name not found!") return (-1, -1, -1) if model_type == 'knn': [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model_knn(model_name) else: [ classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat ] = load_model(model_name) x = numpy.fromstring(sound, numpy.int16) if x.shape[0] / float(Fs) <= mt_win: return (-1, -1, -1) # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean( axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) mt_features = numpy.append(mt_features, beat) mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames
def main(argv): if argv[1] == "-dirMp3toWAV": # convert mp3 to wav (batch) if len(argv) == 5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)." return if argv[4] not in ["1", "2"]: print "Error. Number of output channels must be 1 or 2" return if not os.path.isdir(path): raise Exception("Input path not found!") useMp3TagsAsNames = True audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]), useMp3TagsAsNames) else: print "Error.\nSyntax: " + argv[ 0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" if argv[1] == "-dirWAVChangeFs": # convert mp3 to wav (batch) if len(argv) == 5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)." return if argv[4] not in ["1", "2"]: print "Error. Number of output channels must be 1 or 2" return if not os.path.isdir(path): raise Exception("Input path not found!") audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4])) else: print "Error.\nSyntax: " + argv[ 0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" elif argv[ 1] == "-featureExtractionFile": # short-term and mid-term feature extraction to files (csv and numpy) if len(argv) == 7: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception( "Mid-term and short-term window sizes and steps must be numbers!" ) mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) outFile = wavFileName aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin, stStep, outFile, True, True, True) else: print "Error.\nSyntax: " + argv[ 0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == "-beatExtraction": if len(argv) == 4: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3])): raise Exception("PLOT must be either 0 or 1") if not ((int(argv[3]) == 0) or (int(argv[3]) == 1)): raise Exception("PLOT must be either 0 or 1") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) F = aF.stFeatureExtraction(x, Fs, 0.050 * Fs, 0.050 * Fs) BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3]) == 1) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio) else: print "Error.\nSyntax: " + argv[ 0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>" elif argv[ 1] == '-featureExtractionDir': # same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path) if len(argv) == 7: path = argv[2] if not os.path.isdir(path): raise Exception("Input path not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception( "Mid-term and short-term window sizes and steps must be numbers!" ) mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep, True, True, True) else: print "Error.\nSyntax: " + argv[ 0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>" elif argv[ 1] == '-featureVisualizationDir': # visualize the content relationships between recordings stored in a folder if len(argv) == 3: if not os.path.isdir(argv[2]): raise Exception("Input folder not found!") aV.visualizeFeaturesFolder(argv[2], "pca", "") elif argv[ 1] == '-fileSpectrogram': # show spectogram of a sound stored in a file if len(argv) == 3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[ 1] == '-fileChromagram': # show spectogram of a sound stored in a file if len(argv) == 3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram( x, Fs, round(Fs * 0.040), round(Fs * 0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == "-trainClassifier": # Segment classifier training (OK) if len(argv) > 6: method = argv[2] beatFeatures = (int(argv[3]) == 1) listOfDirs = argv[4:len(argv) - 1] modelName = argv[-1] aT.featureAndTrain(listOfDirs, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT=beatFeatures) else: print "Error.\nSyntax: " + argv[ 0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>" elif argv[1] == "-trainRegression": # Segment regression model if len(argv) == 6: method = argv[2] beatFeatures = (int(argv[3]) == 1) dirName = argv[4] modelName = argv[5] aT.featureAndTrainRegression(dirName, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT=beatFeatures) else: print "Error.\nSyntax: " + argv[ 0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>" elif argv[1] == "-classifyFile": # Single File Classification (OK) if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Result, P, classNames] = aT.fileClassification(inputFile, modelName, modelType) print "{0:s}\t{1:s}".format("Class", "Probability") for i, c in enumerate(classNames): print "{0:s}\t{1:.2f}".format(c, P[i]) print "Winner class: " + classNames[int(Result)] else: print "Error.\nSyntax: " + argv[ 0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-regressionFile": # Single File Classification (OK) if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") R, regressionNames = aT.fileRegression(inputFile, modelName, modelType) for i in range(len(R)): print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i]) #print "{0:s}\t{1:.2f}".format(c,P[i]) else: print "Error.\nSyntax: " + argv[ 0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-classifyFolder": # Directory classification (Ok) if len(argv) == 6 or len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if len(argv) == 6: outputMode = argv[5] else: outputMode = "0" if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if outputMode not in ["0", "1"]: raise Exception("outputMode has to be 0 or 1") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) Result = int(Result) Results.append(Result) if outputMode == "1": print "{0:s}\t{1:s}".format(wavFile, classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames) + 1)) for i, h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[ 0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)" elif argv[ 1] == "-regressionFolder": # Regression applied on the WAV files of a folder if len(argv) == 5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList) == 0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: R, regressionNames = aT.fileRegression(wavFile, modelName, modelType) Results.append(R) Results = numpy.array(Results) for i, r in enumerate(regressionNames): [Histogram, bins] = numpy.histogram(Results[:, i]) centers = (bins[0:-1] + bins[1::]) / 2.0 plt.subplot(len(regressionNames), 1, i) plt.plot(centers, Histogram) plt.title(r) plt.show() # for h in Histogram: # print "{0:20d}".format(h), # if outputMode=="1": # for i,h in enumerate(Histogram): # print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[ 0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>" elif argv[1] == '-trainHMMsegmenter_fromfile': if len(argv) == 7: wavFile = argv[2] gtFile = argv[3] hmmModelName = argv[4] if not uT.isNum(argv[5]): print "Error: mid-term window size must be float!" return if not uT.isNum(argv[6]): print "Error: mid-term window step must be float!" return mtWin = float(argv[5]) mtStep = float(argv[6]) if not os.path.isfile(wavFile): print "Error: wavfile does not exist!" return if not os.path.isfile(gtFile): print "Error: groundtruth does not exist!" return aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[ 0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == '-trainHMMsegmenter_fromdir': if len(argv) == 6: dirPath = argv[2] hmmModelName = argv[3] if not uT.isNum(argv[4]): print "Error: mid-term window size must be float!" if not uT.isNum(argv[5]): print "Error: mid-term window step must be float!" mtWin = float(argv[4]) mtStep = float(argv[5]) aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[ 0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[ 1] == "-segmentClassifyFileHMM": # HMM-based segmentation-classification if len(argv) == 4: hmmModelName = argv[2] wavFile = argv[3] gtFile = wavFile.replace('.wav', '.segments') aS.hmmSegmentation(wavFile, hmmModelName, PLOT=True, gtFileName=gtFile) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentClassifyHMM <hmmModelName> <fileName>" elif argv[ 1] == '-segmentClassifyFile': # Segmentation-classification (fix-sized segment using knn or svm) if (len(argv) == 5): modelType = argv[2] modelName = argv[3] inputWavFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = inputWavFile.replace('.wav', '.segments') aS.mtFileClassification(inputWavFile, modelName, modelType, True, gtFile) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-segmentationEvaluation": if len(argv) == 5: methodName = argv[2] modelName = argv[3] dirName = argv[4] aS.evaluateSegmentationClassificationDir(dirName, modelName, methodName) else: print "Error.\nSyntax: " + argv[ 0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>" elif argv[1] == "-silenceRemoval": if len(argv) == 5: inputFile = argv[2] if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") smoothingWindow = float(argv[3]) weight = float(argv[4]) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio signal segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05, smoothingWindow, weight, False) # get onsets for i, s in enumerate(segmentLimits): strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format( inputFile[0:-4], s[0], s[1]) wavfile.write(strOut, Fs, x[int(Fs * s[0]):int(Fs * s[1])]) else: print "Error.\nSyntax: " + argv[ 0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>" elif argv[ 1] == '-speakerDiarization': # speaker diarization (from file): TODO inputFile = argv[2] nSpeakers = int(argv[3]) useLDA = (int(argv[4]) == 1) if useLDA: aS.speakerDiarization(inputFile, nSpeakers, PLOT=True) else: aS.speakerDiarization(inputFile, nSpeakers, LDAdim=0, PLOT=True) #print speechLimits elif argv[1] == "-speakerDiarizationScriptEval": dir = argv[2] listOfLDAs = [int(l) for l in argv[3::]] aS.speakerDiarizationEvaluateScript(dir, listOfLDAs) elif argv[1] == '-thumbnail': # music thumbnailing (OK) if len(argv) == 4: inputFile = argv[2] stWindow = 1.0 stStep = 1.0 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read file if Fs == -1: # could not read file return try: thumbnailSize = float(argv[3]) except ValueError: print "Thumbnail size must be a float (in seconds)" return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing( x, Fs, stWindow, stStep, thumbnailSize) # find thumbnail endpoints # write thumbnails to WAV files: thumbnailFileName1 = inputFile.replace(".wav", "_thumb1.wav") thumbnailFileName2 = inputFile.replace(".wav", "_thumb2.wav") wavfile.write(thumbnailFileName1, Fs, x[int(Fs * A1):int(Fs * A2)]) wavfile.write(thumbnailFileName2, Fs, x[int(Fs * B1):int(Fs * B2)]) print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format( thumbnailFileName1, A1, A2) print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format( thumbnailFileName2, B1, B2) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect='auto') plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1 / stStep + A2 / stStep) / 2.0 Ycenter = (B1 / stStep + B2 / stStep) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1, Smatrix.shape[0]], [A1, A1], color='k', linestyle='--', linewidth=2) plt.plot([B2, Smatrix.shape[0]], [A2, A2], color='k', linestyle='--', linewidth=2) plt.plot([B1, B1], [A1, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.plot([B2, B2], [A2, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel('frame no') plt.ylabel('frame no') plt.title('Self-similarity matrix') plt.show() else: print "Error.\nSyntax: " + argv[ 0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
def main(argv): if argv[1] == "-dirMp3toWAV": # convert mp3 to wav (batch) if len(argv)==5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return if argv[4] not in ["1","2"]: print "Error. Number of output channels must be 1 or 2"; return if not os.path.isdir(path): raise Exception("Input path not found!") useMp3TagsAsNames = True audioBasicIO.convertDirMP3ToWav(path, int(argv[3]), int(argv[4]), useMp3TagsAsNames) else: print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" if argv[1] == "-dirWAVChangeFs": # convert mp3 to wav (batch) if len(argv)==5: path = argv[2] if argv[3] not in ["8000", "16000", "32000", "44100"]: print "Error. Unsupported sampling rate (must be: 8000, 16000, 32000 or 44100)."; return if argv[4] not in ["1","2"]: print "Error. Number of output channels must be 1 or 2"; return if not os.path.isdir(path): raise Exception("Input path not found!") audioBasicIO.convertFsDirWavToWav(path, int(argv[3]), int(argv[4])) else: print "Error.\nSyntax: " + argv[0] + " -dirMp3toWAV <dirName> <sampling Freq> <numOfChannels>" elif argv[1] == "-featureExtractionFile": # short-term and mid-term feature extraction to files (csv and numpy) if len(argv)==7: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception("Mid-term and short-term window sizes and steps must be numbers!") mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) outFile = wavFileName aF.mtFeatureExtractionToFile(wavFileName, mtWin, mtStep, stWin, stStep, outFile, True, True, True) else: print "Error.\nSyntax: " + argv[0] + " -featureExtractionFile <wavFileName> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == "-beatExtraction": if len(argv)==4: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") if not (uT.isNum(argv[3])): raise Exception("PLOT must be either 0 or 1") if not ( (int(argv[3]) == 0) or (int(argv[3]) == 1) ): raise Exception("PLOT must be either 0 or 1") [Fs, x] = audioBasicIO.readAudioFile(wavFileName); F = aF.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs); BPM, ratio = aF.beatExtraction(F, 0.050, int(argv[3])==1) print "Beat: {0:d} bpm ".format(int(BPM)) print "Ratio: {0:.2f} ".format(ratio) else: print "Error.\nSyntax: " + argv[0] + " -beatExtraction <wavFileName> <PLOT (0 or 1)>" elif argv[1] == '-featureExtractionDir': # same as -featureExtractionFile, in a batch mode (i.e. for each WAV file in the provided path) if len(argv)==7: path = argv[2] if not os.path.isdir(path): raise Exception("Input path not found!") if not (uT.isNum(argv[3]) and uT.isNum(argv[4]) and uT.isNum(argv[5]) and uT.isNum(argv[6])): raise Exception("Mid-term and short-term window sizes and steps must be numbers!") mtWin = float(argv[3]) mtStep = float(argv[4]) stWin = float(argv[5]) stStep = float(argv[6]) aF.mtFeatureExtractionToFileDir(path, mtWin, mtStep, stWin, stStep, True, True, True) else: print "Error.\nSyntax: " + argv[0] + " -featureExtractionDir <path> <mtWin> <mtStep> <stWin> <stStep>" elif argv[1] == '-featureVisualizationDir': # visualize the content relationships between recordings stored in a folder if len(argv)==3: if not os.path.isdir(argv[2]): raise Exception("Input folder not found!") aV.visualizeFeaturesFolder(argv[2], "pca", "") elif argv[1] == '-fileSpectrogram': # show spectogram of a sound stored in a file if len(argv)==3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stSpectogram(x, Fs, round(Fs*0.040), round(Fs*0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == '-fileChromagram': # show spectogram of a sound stored in a file if len(argv)==3: wavFileName = argv[2] if not os.path.isfile(wavFileName): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(wavFileName) x = audioBasicIO.stereo2mono(x) specgram, TimeAxis, FreqAxis = aF.stChromagram(x, Fs, round(Fs*0.040), round(Fs*0.040), True) else: print "Error.\nSyntax: " + argv[0] + " -fileSpectrogram <fileName>" elif argv[1] == "-trainClassifier": # Segment classifier training (OK) if len(argv)>6: method = argv[2] beatFeatures = (int(argv[3])==1) listOfDirs = argv[4:len(argv)-1] modelName = argv[-1] aT.featureAndTrain(listOfDirs, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures) else: print "Error.\nSyntax: " + argv[0] + " -trainClassifier <method(svm or knn)> <beat features> <directory 1> <directory 2> ... <directory N> <modelName>" elif argv[1] == "-trainRegression": # Segment regression model if len(argv)==6: method = argv[2] beatFeatures = (int(argv[3])==1) dirName = argv[4] modelName = argv[5] aT.featureAndTrainRegression(dirName, 1, 1, aT.shortTermWindow, aT.shortTermStep, method.lower(), modelName, computeBEAT = beatFeatures) else: print "Error.\nSyntax: " + argv[0] + " -trainRegression <method(svm or knn)> <beat features> <directory> <modelName>" elif argv[1] == "-classifyFile": # Single File Classification (OK) if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Result, P, classNames] = aT.fileClassification(inputFile, modelName, modelType) print "{0:s}\t{1:s}".format("Class","Probability") for i,c in enumerate(classNames): print "{0:s}\t{1:.2f}".format(c,P[i]) print "Winner class: " + classNames[int(Result)] else: print "Error.\nSyntax: " + argv[0] + " -classifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-regressionFile": # Single File Classification (OK) if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") R, regressionNames = aT.fileRegression(inputFile, modelName, modelType) for i in range(len(R)): print "{0:s}\t{1:.3f}".format(regressionNames[i], R[i]) #print "{0:s}\t{1:.2f}".format(c,P[i]) else: print "Error.\nSyntax: " + argv[0] + " -regressionFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-classifyFolder": # Directory classification (Ok) if len(argv)==6 or len(argv)==5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if len(argv)==6: outputMode = argv[5] else: outputMode = "0" if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if outputMode not in ["0","1"]: raise Exception("outputMode has to be 0 or 1") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: [Result, P, classNames] = aT.fileClassification(wavFile, modelName, modelType) Result = int(Result) Results.append(Result) if outputMode=="1": print "{0:s}\t{1:s}".format(wavFile,classNames[Result]) Results = numpy.array(Results) # print distribution of classes: [Histogram, _] = numpy.histogram(Results, bins=numpy.arange(len(classNames)+1)) for i,h in enumerate(Histogram): print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[0] + " -classifyFolder <method(svm or knn)> <modelName> <folderName> <outputMode(0 or 1)" elif argv[1] == "-regressionFolder": # Regression applied on the WAV files of a folder if len(argv)==5: modelType = argv[2] modelName = argv[3] inputFolder = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") files = '*.wav' if os.path.isdir(inputFolder): strFilePattern = os.path.join(inputFolder, files) else: strFilePattern = inputFolder + files wavFilesList = [] wavFilesList.extend(glob.glob(strFilePattern)) wavFilesList = sorted(wavFilesList) if len(wavFilesList)==0: print "No WAV files found!" return Results = [] for wavFile in wavFilesList: R, regressionNames = aT.fileRegression(wavFile, modelName, modelType) Results.append(R) Results = numpy.array(Results) for i, r in enumerate(regressionNames): [Histogram, bins] = numpy.histogram(Results[:, i]) centers = (bins[0:-1] + bins[1::]) / 2.0 plt.subplot(len(regressionNames), 1, i); plt.plot(centers, Histogram) plt.title(r) plt.show() # for h in Histogram: # print "{0:20d}".format(h), # if outputMode=="1": # for i,h in enumerate(Histogram): # print "{0:20s}\t\t{1:d}".format(classNames[i], h) else: print "Error.\nSyntax: " + argv[0] + " -regressionFolder <method(svm or knn)> <modelName> <folderName>" elif argv[1] == '-trainHMMsegmenter_fromfile': if len(argv)==7: wavFile = argv[2] gtFile = argv[3] hmmModelName = argv[4] if not uT.isNum(argv[5]): print "Error: mid-term window size must be float!"; return if not uT.isNum(argv[6]): print "Error: mid-term window step must be float!"; return mtWin = float(argv[5]) mtStep = float(argv[6]) if not os.path.isfile(wavFile): print "Error: wavfile does not exist!"; return if not os.path.isfile(gtFile): print "Error: groundtruth does not exist!"; return aS.trainHMM_fromFile(wavFile, gtFile, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromfile <wavFilePath> <gtSegmentFilePath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == '-trainHMMsegmenter_fromdir': if len(argv)==6: dirPath = argv[2] hmmModelName = argv[3] if not uT.isNum(argv[4]): print "Error: mid-term window size must be float!" if not uT.isNum(argv[5]): print "Error: mid-term window step must be float!" mtWin = float(argv[4]) mtStep = float(argv[5]) aS.trainHMM_fromDir(dirPath, hmmModelName, mtWin, mtStep) else: print "Error.\nSyntax: " + argv[0] + " -trainHMMsegmenter_fromdir <dirPath> <hmmModelFileName> <mtWin> <mtStep>" elif argv[1] == "-segmentClassifyFileHMM": # HMM-based segmentation-classification if len(argv)==4: hmmModelName = argv[2] wavFile = argv[3] gtFile = wavFile.replace('.wav', '.segments'); aS.hmmSegmentation(wavFile, hmmModelName, PLOT = True, gtFileName = gtFile) else: print "Error.\nSyntax: " + argv[0] + " -segmentClassifyHMM <hmmModelName> <fileName>" elif argv[1] == '-segmentClassifyFile': # Segmentation-classification (fix-sized segment using knn or svm) if (len(argv)==5): modelType = argv[2] modelName = argv[3] inputWavFile = argv[4] if modelType not in ["svm", "knn"]: raise Exception("ModelType has to be either svm or knn!") if not os.path.isfile(modelName): raise Exception("Input modelName not found!") if not os.path.isfile(inputWavFile): raise Exception("Input audio file not found!") gtFile = inputWavFile.replace('.wav', '.segments'); aS.mtFileClassification(inputWavFile, modelName, modelType, True, gtFile) else: print "Error.\nSyntax: " + argv[0] + " -segmentClassifyFile <method(svm or knn)> <modelName> <fileName>" elif argv[1] == "-segmentationEvaluation": if len(argv)==5: methodName = argv[2] modelName = argv[3] dirName = argv[4] aS.evaluateSegmentationClassificationDir(dirName, modelName, methodName) else: print "Error.\nSyntax: " + argv[0] + " -segmentationEvaluation <method(svm or knn)> <modelName> <directoryName>" elif argv[1] == "-silenceRemoval": if len(argv)==5: inputFile = argv[2] if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") smoothingWindow = float(argv[3]) weight = float(argv[4]) [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio signal segmentLimits = aS.silenceRemoval(x, Fs, 0.05, 0.05, smoothingWindow, weight, False) # get onsets for i, s in enumerate(segmentLimits): strOut = "{0:s}_{1:.3f}-{2:.3f}.wav".format(inputFile[0:-4], s[0], s[1]) wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])]) else: print "Error.\nSyntax: " + argv[0] + " -silenceRemoval <inputFile> <smoothinWindow(secs)> <Threshold Weight>" elif argv[1] == '-speakerDiarization': # speaker diarization (from file): TODO inputFile = argv[2] nSpeakers = int(argv[3]) useLDA = (int(argv[4])==1) if useLDA: aS.speakerDiarization(inputFile, nSpeakers, PLOT = True); else: aS.speakerDiarization(inputFile, nSpeakers, LDAdim = 0, PLOT = True); #print speechLimits elif argv[1] == "-speakerDiarizationScriptEval": dir = argv[2] listOfLDAs = [int(l) for l in argv[3::]] aS.speakerDiarizationEvaluateScript(dir, listOfLDAs) elif argv[1] == '-thumbnail': # music thumbnailing (OK) if len(argv)==4: inputFile = argv[2] stWindow = 1.0 stStep = 1.0 if not os.path.isfile(inputFile): raise Exception("Input audio file not found!") [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read file if Fs == -1: # could not read file return try: thumbnailSize = float(argv[3]) except ValueError: print "Thumbnail size must be a float (in seconds)" return [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, Fs, stWindow, stStep, thumbnailSize) # find thumbnail endpoints # write thumbnails to WAV files: thumbnailFileName1 = inputFile.replace(".wav","_thumb1.wav") thumbnailFileName2 = inputFile.replace(".wav","_thumb2.wav") wavfile.write(thumbnailFileName1, Fs, x[int(Fs*A1):int(Fs*A2)]) wavfile.write(thumbnailFileName2, Fs, x[int(Fs*B1):int(Fs*B2)]) print "1st thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName1, A1, A2) print "2nd thumbnail (stored in file {0:s}): {1:4.1f}sec -- {2:4.1f}sec".format(thumbnailFileName2, B1, B2) # Plot self-similarity matrix: fig = plt.figure() ax = fig.add_subplot(111, aspect='auto') plt.imshow(Smatrix) # Plot best-similarity diagonal: Xcenter = (A1/stStep + A2/stStep) / 2.0 Ycenter = (B1/stStep + B2/stStep) / 2.0 e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter), thumbnailSize * 1.4, 3, angle=45, linewidth=3, fill=False) ax.add_patch(e1) plt.plot([B1, Smatrix.shape[0]], [A1, A1], color='k', linestyle='--', linewidth=2) plt.plot([B2, Smatrix.shape[0]], [A2, A2], color='k', linestyle='--', linewidth=2) plt.plot([B1, B1], [A1, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.plot([B2, B2], [A2, Smatrix.shape[0]], color='k', linestyle='--', linewidth=2) plt.xlim([0, Smatrix.shape[0]]) plt.ylim([Smatrix.shape[1], 0]) ax.yaxis.set_label_position("right") ax.yaxis.tick_right() plt.xlabel('frame no') plt.ylabel('frame no') plt.title('Self-similarity matrix') plt.show() else: print "Error.\nSyntax: " + argv[0] + " -thumbnail <filename> <thumbnailsize(seconds)>"
def fileClassification(inputFile, modelName, modelType, chunk_seconds=None): # Load classifier: print "DEBUG: fileClassification - inputFile: " + inputFile ''' if not os.path.isfile(modelName): print "fileClassification: input modelName not found!" return (-1, -1, -1) if not os.path.isfile(inputFile): print "fileClassification: wav file not found!" return (-1, -1, -1) ''' #print "computeBeat" if (modelType) == 'svm' or (modelType == 'svm_rbf'): [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadSVModel(modelName) elif modelType == 'knn': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadKNNModel(modelName) elif modelType == 'randomforest': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadRandomForestModel(modelName) elif modelType == 'gradientboosting': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadGradientBoostingModel(modelName) elif modelType == 'extratrees': [ Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, computeBEAT ] = loadExtraTreesModel(modelName) #print computeBEAT chunk_data = audioBasicIO.readAudioFile( inputFile, chunk_seconds) # read audio file and convert to mono if chunk_seconds: classification_data = [] features = [] for i, chunk in enumerate(chunk_data): #print str(i) + " of " + str(len(chunk_data)) [Fs, x] = chunk x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mtWin: return (-1, -1, -1) # feature extraction: [MidTermFeatures, stFeatures] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) # if i == (len(chunk_data)-1): # print "stTermFeatures" # print stFeatures[33] # print len(stFeatures) # print "---------------" #features.append(MidTermFeatures.tolist()) MidTermFeatures = MidTermFeatures.mean( axis=1) # long term averaging of mid-term statistics features.append(MidTermFeatures.tolist()) if computeBEAT: [beat, beatConf] = aF.beatExtraction(stFeatures, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classifierWrapper(Classifier, modelType, curFV) # classification #print(type(classNames)) #print(type(P.tolist())) classification_data.append([Result, P.tolist(), classNames]) return [classification_data, features] [Fs, x] = chunk_data #audioBasicIO.readAudioFile(inputFile) x = audioBasicIO.stereo2mono(x) if isinstance(x, int): # audio file IO problem return (-1, -1, -1) if x.shape[0] / float(Fs) <= mtWin: return (-1, -1, -1) # feature extraction: [MidTermFeatures, s] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * stWin), round(Fs * stStep)) MidTermFeatures = MidTermFeatures.mean( axis=1) # long term averaging of mid-term statistics if computeBEAT: [beat, beatConf] = aF.beatExtraction(s, stStep) MidTermFeatures = numpy.append(MidTermFeatures, beat) MidTermFeatures = numpy.append(MidTermFeatures, beatConf) curFV = (MidTermFeatures - MEAN) / STD # normalization [Result, P] = classifierWrapper(Classifier, modelType, curFV) # classification return [Result, P, classNames, MidTermFeatures.tolist()]