def evaluateClassifier(argv): dirName = argv[2] useAccelerometer = ((argv[3]=="1") or (argv[3]=="2") or (argv[3]=="3") or (argv[3]=="4")) useAccelerometerOnlyX = (argv[3]=="1") useAccelerometerOnlyY = (argv[3]=="2") useAccelerometerOnlyZ = (argv[3]=="3") useImage = (argv[4]=="1") fileList = sorted(glob.glob(os.path.join(dirName, "*.csv"))) GTs = [] eX = [] eY = [] eZ = [] featuresAll = [] classNames = [] for i, m in enumerate(fileList): gt = int(ntpath.basename(m).split("_")[-1].replace(".csv","")) className = ntpath.basename(m).split("_")[1] if not className in classNames: classNames.append(className) featuresAll.append([]) #if gt>0: if True: GTs.append(gt) FeatureVectorFusion = featureExtraction(m, useAccelerometer, useAccelerometerOnlyX, useAccelerometerOnlyY, useAccelerometerOnlyZ, useImage) print FeatureVectorFusion.shape if len(featuresAll[classNames.index(className)])==0: featuresAll[classNames.index(className)] = FeatureVectorFusion else: featuresAll[classNames.index(className)] = numpy.vstack((featuresAll[classNames.index(className)], FeatureVectorFusion)) #featuresAll = featuresY (featuresAll, MEAN, STD) = aT.normalizeFeatures(featuresAll) #bestParam = aT.evaluateClassifier(featuresAll, classNames, 1000, "svm", [0.05, 0.1, 0.5, 1, 2,3, 5, 10, 15, 20, 25, 50, 100, 200], 0, perTrain=0.80) bestParam = aT.evaluateClassifier(featuresAll, classNames, 1000, "svm", [0.05, 0.1, 0.5], 0, perTrain=0.80) MEAN = MEAN.tolist() STD = STD.tolist() # STEP C: Save the classifier to file Classifier = aT.trainSVM(featuresAll, bestParam) modelName = argv[5] with open(modelName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() '''
def Classify(clf, featuresAll, bestParam): if clf == 'svm': model = aT.trainSVM(featuresAll, bestParam) elif clf == 'svm_rbf': model = aT.trainSVM_RBF(featuresAll, bestParam) elif clf == 'extratrees': model = aT.trainExtraTrees(featuresAll, bestParam) elif clf == 'randomforest': model = aT.trainRandomForest(featuresAll, bestParam) elif clf == 'knn': model = aT.trainKNN(featuresAll, bestParam) elif clf == 'gradientboosting': model = aT.trainGradientBoosting(featuresAll, bestParam) return model
def train_SVM(st_feats): st_energy = st_feats[1, :] en = np.sort(st_energy) l1 = int(len(en) / 10) t1 = np.mean(en[0:l1]) + 0.000000000000001 # 计算10%较低能量的均值,作为低阈值 t2 = np.mean(en[-l1:-1]) + 0.000000000000001 # 计算10%较高能量的均值,作为高阈值 class1 = st_feats[:, np.where(st_energy <= t1)[0]] # 将能量低于低阈值的帧,作为class1 class2 = st_feats[:, np.where(st_energy >= t2)[0]] # 将能量高于高阈值的帧,作为class2 feats_s = [class1.T, class2.T] # class1.T:(58,68)|class2.T:(38,68) [feats_s_norm, means_s, stds_s] = aT.normalizeFeatures(feats_s) # 标准化:减均值除方差 svm = aT.trainSVM(feats_s_norm, 1.0) return svm, means_s, stds_s
def getTrainClassifier(f_train,classifier_name,param): if classifier_name == AudioClassifierManager.__svmModelName: classifier = aT.trainSVM(f_train, param) elif classifier_name == AudioClassifierManager.__svmRbfModelName: classifier = aT.trainSVM_RBF(f_train, param) elif classifier_name == AudioClassifierManager.__knnModelName: classifier = aT.trainKNN(f_train, param) elif classifier_name == AudioClassifierManager.__randomforestModelName: classifier = aT.trainRandomForest(f_train, param) elif classifier_name == AudioClassifierManager.__gradientboostingModelName: classifier = aT.trainGradientBoosting(f_train, param) elif classifier_name == AudioClassifierManager.__extratreesModelName: classifier = aT.trainExtraTrees(f_train, param) else: classifier = None return classifier
def trainDirs(self, dir_root): """ Train all wav files within the list of directories within dir The class name is derived as last entry after splitting /path/to/dir """ dir_list = glob.glob(dir_root+'/*') features=[] #is a list of feature matrices, one for each class self.classNames=[] for d in dir_list: log.logv('featurize %s\n' % (d)) self.classNames.append(d.split('/')[-1]) first = True class_features = np.array([]) for w in os.listdir(d) : if w.endswith('.wav') : _f = self.featurize(os.path.join(d, w)) # returns a matrix of numBlocks x numFeatures if first : first = False class_features = _f else: class_features = np.vstack((class_features, _f)) if class_features.shape[0] > 0 : #class features is a matrix M*Features features.append(class_features) classifierParams = np.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0]) # parameter mode 0 for best accuracy, 1 for best f1 score [featuresNew, self.MEAN, self.STD] = aT.normalizeFeatures(features) # normalize features bestParam = aT.evaluateClassifier(features, self.classNames, 100, "svm", classifierParams, 0, perTrain=0.90) print "Selected params: {0:.5f}".format(bestParam) # TODO # 1. normalize before evaluating? # 2. try gaussian kernel? self.Classifier = aT.trainSVM(featuresNew, bestParam)
def silenceRemoval(x, fs, st_win, st_step, smoothWindow=0.5, weight=0.5, plot=False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - fs: sampling freq - st_win, st_step: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - weight: (optinal) weight factor (0 < weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - seg_limits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if weight >= 1: weight = 0.99 if weight <= 0: weight = 0.01 # Step 1: feature extraction x = audioBasicIO.stereo2mono(x) st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs) # Step 2: train binary svm classifier of low vs high energy frames # keep only the energy short-term sequence (2nd feature) st_energy = st_feats[1, :] en = numpy.sort(st_energy) # number of 10% of the total short-term windows l1 = int(len(en) / 10) # compute "lower" 10% energy threshold t1 = numpy.mean(en[0:l1]) + 0.000000000000001 # compute "higher" 10% energy threshold t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001 # get all features that correspond to low energy class1 = st_feats[:, numpy.where(st_energy <= t1)[0]] # get all features that correspond to high energy class2 = st_feats[:, numpy.where(st_energy >= t2)[0]] # form the binary classification task and ... faets_s = [class1.T, class2.T] # normalize and train the respective svm probabilistic model # (ONSET vs SILENCE) [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s) svm = aT.trainSVM(faets_s_norm, 1.0) # Step 3: compute onset probability based on the trained svm prob_on_set = [] for i in range(st_feats.shape[1]): # for each frame cur_fv = (st_feats[:, i] - means_s) / stds_s # get svm probability (that it belongs to the ONSET class) prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1]) prob_on_set = numpy.array(prob_on_set) # smooth probability: prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step) # Step 4A: detect onset frame indices: prog_on_set_sort = numpy.sort(prob_on_set) # find probability Threshold as a weighted average # of top 10% and lower 10% of the values Nt = int(prog_on_set_sort.shape[0] / 10) T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) + weight * numpy.mean(prog_on_set_sort[-Nt::])) max_idx = numpy.where(prob_on_set > T)[0] # get the indices of the frames that satisfy the thresholding i = 0 time_clusters = [] seg_limits = [] # Step 4B: group frame indices to onset segments while i < len(max_idx): # for each of the detected onset indices cur_cluster = [max_idx[i]] if i == len(max_idx) - 1: break while max_idx[i + 1] - cur_cluster[-1] <= 2: cur_cluster.append(max_idx[i + 1]) i += 1 if i == len(max_idx) - 1: break i += 1 time_clusters.append(cur_cluster) seg_limits.append( [cur_cluster[0] * st_step, cur_cluster[-1] * st_step]) # Step 5: Post process: remove very small segments: min_dur = 0.2 seg_limits_2 = [] for s in seg_limits: if s[1] - s[0] > min_dur: seg_limits_2.append(s) seg_limits = seg_limits_2 if plot: timeX = numpy.arange(0, x.shape[0] / float(fs), 1.0 / fs) plt.subplot(2, 1, 1) plt.plot(timeX, x) for s in seg_limits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.subplot(2, 1, 2) plt.plot(numpy.arange(0, prob_on_set.shape[0] * st_step, st_step), prob_on_set) plt.title('Signal') for s in seg_limits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.title('svm Probability') plt.show() return seg_limits
def evaluateclassifier(features, class_names, n_exp, classifier_name, Params, parameterMode, perTrain=0.90): ''' ARGUMENTS: features: a list ([numOfClasses x 1]) whose elements containt numpy matrices of features. each matrix features[i] of class i is [n_samples x numOfDimensions] class_names: list of class names (strings) n_exp: number of cross-validation experiments classifier_name: svm or knn or randomforest Params: list of classifier parameters (for parameter tuning during cross-validation) parameterMode: 0: choose parameters that lead to maximum overall classification ACCURACY 1: choose parameters that lead to maximum overall f1 MEASURE RETURNS: bestParam: the value of the input parameter that optimizes the selected performance measure ''' # feature normalization: (features_norm, MEAN, STD) = normalizeFeatures(features) #features_norm = features; n_classes = len(features) ac_all = [] f1_all = [] precision_classes_all = [] recall_classes_all = [] f1_classes_all = [] cms_all = [] # compute total number of samples: n_samples_total = 0 for f in features: n_samples_total += f.shape[0] if n_samples_total > 1000 and n_exp > 50: n_exp = 50 print("Number of training experiments changed to 50 due to high number of samples") if n_samples_total > 2000 and n_exp > 10: n_exp = 10 print("Number of training experiments changed to 10 due to high number of samples") for Ci, C in enumerate(Params): # for each param value cm = numpy.zeros((n_classes, n_classes)) for e in range(n_exp): # for each cross-validation iteration: print("Param = {0:.5f} - classifier Evaluation " "Experiment {1:d} of {2:d}".format(C, e+1, n_exp)) # split features: f_train, f_test = randSplitFeatures(features_norm, perTrain) # train multi-class svms: if classifier_name == "svm": classifier = trainSVM(f_train, C) elif classifier_name == "svm_rbf": classifier = trainSVM_RBF(f_train, C) elif classifier_name == "knn": classifier = trainKNN(f_train, C) elif classifier_name == "randomforest": classifier = trainRandomForest(f_train, C) elif classifier_name == "gradientboosting": classifier = trainGradientBoosting(f_train, C) elif classifier_name == "extratrees": classifier = trainExtraTrees(f_train, C) elif classifier_name == "logisticregression": classifier = trainLogisticRegression(f_train, C) cmt = numpy.zeros((n_classes, n_classes)) for c1 in range(n_classes): n_test_samples = len(f_test[c1]) res = numpy.zeros((n_test_samples, 1)) for ss in range(n_test_samples): [res[ss], _] = classifierWrapperHead(classifier, classifier_name, f_test[c1][ss]) for c2 in range(n_classes): cmt[c1][c2] = float(len(numpy.nonzero(res == c2)[0])) cm = cm + cmt cm = cm + 0.0000000010 rec = numpy.zeros((cm.shape[0], )) pre = numpy.zeros((cm.shape[0], )) for ci in range(cm.shape[0]): rec[ci] = cm[ci, ci] / numpy.sum(cm[ci, :]) pre[ci] = cm[ci, ci] / numpy.sum(cm[:, ci]) precision_classes_all.append(pre) recall_classes_all.append(rec) f1 = 2 * rec * pre / (rec + pre) f1_classes_all.append(f1) ac_all.append(numpy.sum(numpy.diagonal(cm)) / numpy.sum(cm)) cms_all.append(cm) f1_all.append(numpy.mean(f1)) print("\t\t", end="") for i, c in enumerate(class_names): if i == len(class_names)-1: print("{0:s}\t\t".format(c), end="") else: print("{0:s}\t\t\t".format(c), end="") print("OVERALL") print("\tC", end="") for c in class_names: print("\tPRE\tREC\tf1", end="") print("\t{0:s}\t{1:s}".format("ACC", "f1")) best_ac_ind = numpy.argmax(ac_all) best_f1_ind = numpy.argmax(f1_all) for i in range(len(precision_classes_all)): print("\t{0:.3f}".format(Params[i]), end="") for c in range(len(precision_classes_all[i])): print("\t{0:.1f}\t{1:.1f}\t{2:.1f}".format(100.0 * precision_classes_all[i][c], 100.0 * recall_classes_all[i][c], 100.0 * f1_classes_all[i][c]), end="") print("\t{0:.1f}\t{1:.1f}".format(100.0 * ac_all[i], 100.0 * f1_all[i]), end="") if i == best_f1_ind: print("\t best f1", end="") if i == best_ac_ind: print("\t best Acc", end="") print("") if parameterMode == 0: # keep parameters that maximize overall classification accuracy: print("Confusion Matrix:") printConfusionMatrix(cms_all[best_ac_ind], class_names) return Params[best_ac_ind] elif parameterMode == 1: # keep parameters that maximize overall f1 measure: print("Confusion Matrix:") printConfusionMatrix(cms_all[best_f1_ind], class_names) return Params[best_f1_ind]
def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, classifier_type, model_name, compute_beat=False, perTrain=0.90, feats=["gfcc", "mfcc"]): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: list_of_dirs: list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files. mt_win, mt_step: mid-term window length and step st_win, st_step: short-term window and step classifier_type: "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees" model_name: name of the model to be saved RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs, mt_win, mt_step, st_win, st_step, compute_beat=compute_beat, feats=feats) if len(features) == 0: print("trainSVM_feature ERROR: No data found in any input folder!") return n_feats = features[0].shape[1] feature_names = ["features" + str(d + 1) for d in range(n_feats)] writeTrainDataToARFF(model_name, features, classNames, feature_names) for i, f in enumerate(features): if len(f) == 0: print("trainSVM_feature ERROR: " + list_of_dirs[i] + " folder is empty or non-existing!") return # STEP B: classifier Evaluation and Parameter Selection: if classifier_type == "svm" or classifier_type == "svm_rbf": classifier_par = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]) elif classifier_type == "randomforest": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "knn": classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifier_type == "gradientboosting": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "extratrees": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "logisticregression": classifier_par = numpy.array([0.01, 0.1, 1, 5]) # get optimal classifeir parameter: features2 = [] for f in features: fTemp = [] for i in range(f.shape[0]): temp = f[i,:] if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()) : fTemp.append(temp.tolist()) else: print("NaN Found! Feature vector not used for training") features2.append(numpy.array(fTemp)) features = features2 bestParam = evaluateclassifier(features, classNames, 300, classifier_type, classifier_par, 0, perTrain) # Hier!!!! print("Selected params: {0:.5f}".format(bestParam)) C = len(classNames) [features_norm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = features_norm # STEP C: Save the classifier to file if classifier_type == "svm": classifier = trainSVM(featuresNew, bestParam) elif classifier_type == "svm_rbf": classifier = trainSVM_RBF(featuresNew, bestParam) elif classifier_type == "randomforest": classifier = trainRandomForest(featuresNew, bestParam) elif classifier_type == "gradientboosting": classifier = trainGradientBoosting(featuresNew, bestParam) elif classifier_type == "extratrees": classifier = trainExtraTrees(featuresNew, bestParam) elif classifier_type == "logisticregression": classifier = trainLogisticRegression(featuresNew, bestParam) if classifier_type == "knn": [X, Y] = listOfFeatures2Matrix(featuresNew) X = X.tolist() Y = Y.tolist() fo = open(model_name, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifier_type == "svm" or classifier_type == "svm_rbf" or \ classifier_type == "randomforest" or \ classifier_type == "gradientboosting" or \ classifier_type == "extratrees" or \ classifier_type == "logisticregression": with open(model_name, 'wb') as fid: cPickle.dump(classifier, fid) fo = open(model_name + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
features.append(temp) classifierParams = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0]) nExp = 50 bestParam = audioTrainTest.evaluateClassifier(features, classNames, nExp, "svm", classifierParams, 0, perTrain=0.01) [featuresNorm, MEAN, STD] = audioTrainTest.normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm Classifier = audioTrainTest.trainSVM(featuresNew, bestParam) Classifier.save_model( os.path.dirname(os.path.realpath(sys.argv[0])) + '/classifier_data/' + modelName) fo = open( os.path.dirname(os.path.realpath(sys.argv[0])) + '/classifier_data/' + modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def main(rootName, modelType, classifierParam, signal_type): CMall = numpy.zeros((2, 2)) if modelType != "svm" and modelType != "svm_rbf": C = [int(classifierParam)] else: C = [(classifierParam)] F1s = [] Accs = [] for ifold in range(0, 10): # for each fold dirName = rootName + os.sep + "fold_{0:d}".format( ifold) # get fold path name classNamesTrain, featuresTrain = dirFeatureExtraction([ os.path.join(dirName, "train", "fail"), os.path.join(dirName, "train", "success") ], signal_type) # TRAINING data feature extraction bestParam = aT.evaluateClassifier( featuresTrain, classNamesTrain, 2, modelType, C, 0, 0.90) # internal cross-validation (for param selection) classNamesTest, featuresTest = dirFeatureExtraction([ os.path.join(dirName, "test", "fail"), os.path.join(dirName, "test", "success") ], signal_type) # trainGradientBoosting data feature extraction [featuresTrainNew, MEAN, STD] = aT.normalizeFeatures( featuresTrain) # training features NORMALIZATION if modelType == "svm": # classifier training Classifier = aT.trainSVM(featuresTrainNew, bestParam) elif modelType == "svm_rbf": Classifier = aT.trainSVM_RBF(featuresTrainNew, bestParam) elif modelType == "randomforest": Classifier = aT.trainRandomForest(featuresTrainNew, bestParam) elif modelType == "gradientboosting": Classifier = aT.trainGradientBoosting(featuresTrainNew, bestParam) elif modelType == "extratrees": Classifier = aT.trainExtraTrees(featuresTrainNew, bestParam) CM = numpy.zeros((2, 2)) # evaluation on testing data for iC, f in enumerate(featuresTest): # for each class for i in range( f.shape[0]): # for each testing sample (feature vector) curF = f[i, :] # get feature vector curF = (curF - MEAN) / STD # normalize test feature vector winnerClass = classNamesTrain[int( aT.classifierWrapper( Classifier, modelType, curF)[0])] # classify and get winner class trueClass = classNamesTest[iC] # get groundtruth class CM[classNamesTrain.index(trueClass)][classNamesTrain.index( winnerClass)] += 1 # update confusion matrix CMall += CM # update overall confusion matrix Recall, Precision, F1 = computePreRec( CM, classNamesTrain) # get recall, precision and F1 (per class) Acc = numpy.diagonal(CM).sum() / CM.sum() # get overall accuracy F1s.append(numpy.mean(F1)) # append average F1 Accs.append(Acc) # append clasification accuracy print print "FINAL RESULTS" print print "----------------------------------" print "fold\tacc\tf1" print "----------------------------------" for i in range(len(F1s)): print "{0:d}\t{1:.1f}\t{2:.1f}".format(i, 100 * Accs[i], 100 * F1s[i]) Acc = numpy.diagonal(CMall).sum() / CMall.sum() Recall, Precision, F1 = computePreRec(CMall, classNamesTrain) print "----------------------------------" print "{0:s}\t{1:.1f}\t{2:.1f}".format("Avg", 100 * numpy.mean(Accs), 100 * numpy.mean(F1s)) print "{0:s}\t{1:.1f}\t{2:.1f}".format("Av CM", 100 * Acc, 100 * numpy.mean(F1)) print "----------------------------------" print print "Overal Confusion matrix:" aT.printConfusionMatrix(CMall, classNamesTrain) print print "FAIL Recall = {0:.1f}".format(100 * Recall[classNamesTrain.index("fail")]) print "FAIL Precision = {0:.1f}".format( 100 * Precision[classNamesTrain.index("fail")]) print "SUCCESS Recall = {0:.1f}".format( 100 * Recall[classNamesTrain.index("success")]) print "SUCCESS Precision = {0:.1f}".format( 100 * Precision[classNamesTrain.index("success")]) return CMall, Acc, Recall, Precision, F1
def silenceRemoval(x, Fs, stWin, stStep, smoothWindow=0.5, Weight=0.5, plot=False): ''' Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - Fs: sampling freq - stWin, stStep: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - Weight: (optinal) weight factor (0 < Weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - segmentLimits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds ''' if Weight >= 1: Weight = 0.99 if Weight <= 0: Weight = 0.01 # Step 1: feature extraction x = audioBasicIO.stereo2mono(x) # convert to mono ShortTermFeatures = aF.stFeatureExtraction( x, Fs, stWin * Fs, stStep * Fs) # extract short-term features # Step 2: train binary SVM classifier of low vs high energy frames EnergySt = ShortTermFeatures[ 1, :] # keep only the energy short-term sequence (2nd feature) E = numpy.sort(EnergySt) # sort the energy feature values: L1 = int(len(E) / 10) # number of 10% of the total short-term windows T1 = numpy.mean( E[0:L1]) + 0.000000000000001 # compute "lower" 10% energy threshold T2 = numpy.mean( E[-L1:-1]) + 0.000000000000001 # compute "higher" 10% energy threshold Class1 = ShortTermFeatures[:, numpy.where( EnergySt <= T1)[0]] # get all features that correspond to low energy Class2 = ShortTermFeatures[:, numpy.where( EnergySt >= T2)[0]] # get all features that correspond to high energy featuresSS = [Class1.T, Class2.T] # form the binary classification task and ... [featuresNormSS, MEANSS, STDSS] = aT.normalizeFeatures(featuresSS) # normalize and ... SVM = aT.trainSVM( featuresNormSS, 1.0) # train the respective SVM probabilistic model (ONSET vs SILENCE) # Step 3: compute onset probability based on the trained SVM ProbOnset = [] for i in range(ShortTermFeatures.shape[1]): # for each frame curFV = (ShortTermFeatures[:, i] - MEANSS) / STDSS # normalize feature vector ProbOnset.append( SVM.predict_proba(curFV.reshape(1, -1))[0] [1]) # get SVM probability (that it belongs to the ONSET class) ProbOnset = numpy.array(ProbOnset) ProbOnset = smoothMovingAvg(ProbOnset, smoothWindow / stStep) # smooth probability # Step 4A: detect onset frame indices: ProbOnsetSorted = numpy.sort( ProbOnset ) # find probability Threshold as a weighted average of top 10% and lower 10% of the values Nt = int(ProbOnsetSorted.shape[0] / 10) T = (numpy.mean((1 - Weight) * ProbOnsetSorted[0:Nt]) + Weight * numpy.mean(ProbOnsetSorted[-Nt::])) MaxIdx = numpy.where(ProbOnset > T)[ 0] # get the indices of the frames that satisfy the thresholding i = 0 timeClusters = [] segmentLimits = [] # Step 4B: group frame indices to onset segments while i < len(MaxIdx): # for each of the detected onset indices curCluster = [MaxIdx[i]] if i == len(MaxIdx) - 1: break while MaxIdx[i + 1] - curCluster[-1] <= 2: curCluster.append(MaxIdx[i + 1]) i += 1 if i == len(MaxIdx) - 1: break i += 1 timeClusters.append(curCluster) segmentLimits.append([curCluster[0] * stStep, curCluster[-1] * stStep]) # Step 5: Post process: remove very small segments: minDuration = 0.2 segmentLimits2 = [] for s in segmentLimits: if s[1] - s[0] > minDuration: segmentLimits2.append(s) segmentLimits = segmentLimits2 if plot: timeX = numpy.arange(0, x.shape[0] / float(Fs), 1.0 / Fs) plt.subplot(2, 1, 1) plt.plot(timeX, x) for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.subplot(2, 1, 2) plt.plot(numpy.arange(0, ProbOnset.shape[0] * stStep, stStep), ProbOnset) plt.title('Signal') for s in segmentLimits: plt.axvline(x=s[0]) plt.axvline(x=s[1]) plt.title('SVM Probability') plt.show() return segmentLimits
def silenceCounter(x, fs, st_win, st_step, smoothWindow=0.5, weight=0.5, plot=False): if weight >= 1: weight = 0.99 if weight <= 0: weight = 0.01 # Step 1: feature extraction x = audioBasicIO.stereo2mono(x) st_feats, _ = aF.stFeatureExtraction(x, fs, st_win * fs, st_step * fs) # Step 2: train binary svm classifier of low vs high energy frames # keep only the energy short-term sequence (2nd feature) st_energy = st_feats[1, :] en = numpy.sort(st_energy) # number of 10% of the total short-term windows l1 = int(len(en) / 10) # compute "lower" 10% energy threshold t1 = numpy.mean(en[0:l1]) + 0.000000000000001 # compute "higher" 10% energy threshold t2 = numpy.mean(en[-l1:-1]) + 0.000000000000001 # get all features that correspond to low energy class1 = st_feats[:, numpy.where(st_energy <= t1)[0]] # get all features that correspond to high energy class2 = st_feats[:, numpy.where(st_energy >= t2)[0]] # form the binary classification task and ... # change the order of the array # faets_s = [class1.T, class2.T] # changing order gives the segmens with silence faets_s = [class2.T, class1.T] # normalize and train the respective svm probabilistic model # (SILENCE vs ONSET) [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s) svm = aT.trainSVM(faets_s_norm, 1.0) # Step 3: compute onset probability based on the trained svm prob_on_set = [] for i in range(st_feats.shape[1]): # for each frame cur_fv = (st_feats[:, i] - means_s) / stds_s # get svm probability (that it belongs to the ONSET class) prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1]) prob_on_set = numpy.array(prob_on_set) # smooth probability: prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step) # Step 4A: detect onset frame indices: prog_on_set_sort = numpy.sort(prob_on_set) # find probability Threshold as a weighted average # of top 10% and lower 10% of the values Nt = int(prog_on_set_sort.shape[0] / 10) T = (numpy.mean((1 - weight) * prog_on_set_sort[0:Nt]) + weight * numpy.mean(prog_on_set_sort[-Nt::])) max_idx = numpy.where(prob_on_set > T)[0] # get the indices of the frames that satisfy the thresholding i = 0 time_clusters = [] seg_limits = [] # Step 4B: group frame indices to onset segments while i < len(max_idx): # for each of the detected onset indices cur_cluster = [max_idx[i]] if i == len(max_idx) - 1: break while max_idx[i + 1] - cur_cluster[-1] <= 2: cur_cluster.append(max_idx[i + 1]) i += 1 if i == len(max_idx) - 1: break i += 1 time_clusters.append(cur_cluster) seg_limits.append( [cur_cluster[0] * st_step, cur_cluster[-1] * st_step]) # Step 5: Post process: remove very small segments: min_dur = 0.2 seg_limits_2 = [] for s in seg_limits: if s[1] - s[0] > min_dur: seg_limits_2.append(s) print(f"SEGMENTS 0.2: {seg_limits_2}") print(F"SEGMENTS: {seg_limits}")
if __name__ == '__main__': rospy.init_node("classifier_train_node") modelName = rospy.get_param('~classifier_name', 'modelSVM') features = [] classNames = rospy.get_param('~classes', {'silence', 'speech'}) classNames = classNames.split() for a in classNames: temp = numpy.load(os.path.dirname(os.path.realpath(sys.argv[0]))+'/classifier_data/'+a+'.npy') features.append(temp) classifierParams = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0]) nExp = 50 bestParam = audioTrainTest.evaluateClassifier(features, classNames, nExp, "svm", classifierParams, 0, perTrain = 0.01) [featuresNorm, MEAN, STD] = audioTrainTest.normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm Classifier = audioTrainTest.trainSVM(featuresNew, bestParam) Classifier.save_model(os.path.dirname(os.path.realpath(sys.argv[0]))+'/classifier_data/'+modelName) fo = open(os.path.dirname(os.path.realpath(sys.argv[0]))+'/classifier_data/'+modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(0, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def trainTextClassifiers(directoryPath, classifierType, classifierName): subdirectories = get_immediate_subdirectories(directoryPath) #tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features = 10000, stop_words='english') dicts = loadDictionaries("myDicts/") classNames = [] Features = [] # extract features from corpus for si, s in enumerate( subdirectories): # for each directory in training data print "Training folder {0:d} of {1:d} ({2:s})".format( si + 1, len(subdirectories), s), files = getListOfFilesInDir(directoryPath + os.sep + s, "*") # get list of files in directory if MAX_FILES_PER_CLASS > 0 and MAX_FILES_PER_CLASS < len(files): files = random.sample(files, MAX_FILES_PER_CLASS) print " - {0:d} files".format(len(files)) classNames.append(s) for ifile, fi in enumerate(files): # for each file in current class: with open(fi) as f: content = f.read() curF = getFeaturesFromText(content, dicts) # get feature vector if ifile == 0: # update feature matrix Features.append(curF.T) else: Features[-1] = numpy.concatenate((Features[-1], curF.T), axis=0) # define classifier parameters if classifierType == "svm": classifierParams = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0]) elif classifierType == "randomforest": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) elif classifierType == "knn": classifierParams = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifierType == "gradientboosting": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) elif classifierType == "extratrees": classifierParams = numpy.array([10, 25, 50, 100, 200, 500]) # evaluate classifier and select best param nExp = 10 bestParam = audioTrainTest.evaluateClassifier(Features, subdirectories, nExp, classifierType, classifierParams, 0, 0.9) # normalize features C = len(classNames) [featuresNorm, MEAN, STD] = audioTrainTest.normalizeFeatures(Features) MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm # save the classifier to file if classifierType == "svm": Classifier = audioTrainTest.trainSVM(featuresNew, bestParam) elif classifierType == "randomforest": Classifier = audioTrainTest.trainRandomForest(featuresNew, bestParam) elif classifierType == "gradientboosting": Classifier = audioTrainTest.trainGradientBoosting( featuresNew, bestParam) elif classifierType == "extratrees": Classifier = audioTrainTest.trainExtraTrees(featuresNew, bestParam) if 'Classifier' in locals(): with open(classifierName, 'wb') as fid: # save to file cPickle.dump(Classifier, fid) fo = open(classifierName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def silenceRemoval(x, fs, st_win, st_step, smoothWindow=0.5, weight=0.5, plot=False): """ Event Detection (silence removal) ARGUMENTS: - x: the input audio signal - fs: sampling freq - st_win, st_step: window size and step in seconds - smoothWindow: (optinal) smooth window (in seconds) - weight: (optinal) weight factor (0 < weight < 1) the higher, the more strict - plot: (optinal) True if results are to be plotted RETURNS: - seg_limits: list of segment limits in seconds (e.g [[0.1, 0.9], [1.4, 3.0]] means that the resulting segments are (0.1 - 0.9) seconds and (1.4, 3.0) seconds """ if weight >= 1: weight = 0.99 if weight <= 0: weight = 0.01 # Step 1: feature extraction 特征提取 x = audioBasicIO.stereo_to_mono(x) st_feats, _ = sF.feature_extraction(x, fs, st_win * fs, st_step * fs) # st_feats (68个特征,966) # Step 2: train binary svm classifier of low vs high energy frames 训练低能量帧与高能量帧的二进制svm分类器 # keep only the energy short-term sequence (2nd feature) 仅保留能量短期序列(第二个特征) st_energy = st_feats[1, :] # st_feats (966,) en = np.sort(st_energy) # 将帧按能量大小进行排序 # number of 10% of the total short-term windows 短期窗口总数的10% l1 = int(len(en) / 10) # compute "lower" 10% energy threshold 计算“较低”的10%能量阈值 均值 t1 = np.mean(en[0:l1]) + 0.000000000000001 # compute "higher" 10% energy threshold 计算“较高”的10%能量阈值 均值 t2 = np.mean(en[-l1:-1]) + 0.000000000000001 # get all features that correspond to low energy 获得所有与低能耗相对应的功能 class1 = st_feats[:, np.where(st_energy <= t1)[0]] # get all features that correspond to high energy 获得所有与高能量对应的特征 class2 = st_feats[:, np.where(st_energy >= t2)[0]] # form the binary classification task and ... 形成二进制分类任务并... faets_s = [class1.T, class2.T] # class1.T(58,68) class2.T(38,68) # normalize and train the respective svm probabilistic model 规范化并训练各自的svm概率模型 # (ONSET vs SILENCE) (开始vs沉默) [faets_s_norm, means_s, stds_s] = aT.normalizeFeatures(faets_s) # 标准化:减均值除方差 svm = aT.trainSVM(faets_s_norm, 1.0) # Step 3: compute onset probability based on the trained svm 根据受过训练的svm计算发作概率 prob_on_set = [] for i in range(st_feats.shape[1]): # st_feats.shape[1] 966 # for each frame cur_fv = (st_feats[:, i] - means_s) / stds_s # 每帧的特征 (68,) # get svm probability (that it belongs to the ONSET class) 获取svm概率(它属于ONSET类) prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1]) prob_on_set = np.array(prob_on_set) # smooth probability: 平稳概率 prob_on_set = smoothMovingAvg(prob_on_set, smoothWindow / st_step) # Step 4A: detect onset frame indices: 检测起始帧索引 prog_on_set_sort = np.sort(prob_on_set) # 对检测概率进行排序 # find probability Threshold as a weighted average 查找概率阈值作为加权平均值 # of top 10% and lower 10% of the values 值的前10%和下10% Nt = int(prog_on_set_sort.shape[0] / 10) T = ( np.mean((1 - weight) * prog_on_set_sort[0:Nt]) + # 排序后取 前96帧 weight * np.mean(prog_on_set_sort[-Nt::])) # 排序后取 后96帧 # 加权平均得到阈值 max_idx = np.where(prob_on_set > T)[0] # 大于阈值的帧(491,0) # get the indices of the frames that satisfy the thresholding 获取满足阈值的帧的索引 i = 0 time_clusters = [] seg_limits = [] # Step 4B: group frame indices to onset segments 将框架索引分组以开始片段 while i < len(max_idx): # for each of the detected onset indices 对于每个检测到的发病指数 cur_cluster = [max_idx[i]] if i == len(max_idx) - 1: break while max_idx[i + 1] - cur_cluster[-1] <= 2: cur_cluster.append(max_idx[i + 1]) i += 1 if i == len(max_idx) - 1: break i += 1 time_clusters.append(cur_cluster) seg_limits.append( [cur_cluster[0] * st_step, cur_cluster[-1] * st_step]) # seg_limits= [[0.12,1.73],[3.65,5.29],[7.72,9.35]] # Step 5: Post process: remove very small segments: 发布过程:删除非常小的细分 # 删除 小于0.2s的部分 min_dur = 0.2 seg_limits_2 = [] for s in seg_limits: if s[1] - s[0] > min_dur: seg_limits_2.append(s) seg_limits = seg_limits_2 if plot: timeX = np.arange(0, x.shape[0] / float(fs), 1.0 / fs) plt.subplot(2, 1, 1) plt.plot(timeX, x / x.max()) for s in seg_limits: plt.axvline(x=s[0], color='red') plt.axvline(x=s[1], color='red') plt.subplot(2, 1, 2) plt.plot(np.arange(0, prob_on_set.shape[0] * st_step, st_step), prob_on_set) plt.title('Signal') for s in seg_limits: plt.axvline(x=s[0], color='red') plt.axvline(x=s[1], color='red') plt.ylim(0, 1) plt.title('svm Probability') plt.tight_layout() plt.show() return seg_limits