def trainNN(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=False): #Feature Extraction [features, classNames, _] = aF.dirsWavFeatureExtraction(listOfDirs, mtWin, mtStep, stWin, stStep, computeBEAT=computeBEAT) if len(features) == 0: print "feature ERROR" return numOfFeatures = features[0].shape[1] featureNames = ["features" + str(d + 1) for d in range(numOfFeatures)] aT.writeTrainDataToARFF(modelName, features, classNames, featureNames) for i, f in enumerate(features): if len(f) == 0: print "feature ERROR" return C = len(classNames) [featuresNorm, MEAN, STD] = aT.normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = featuresNorm bestParam = evaluate(featuresNew, classNames, 100, numpy.array([1, 2, 3, 4, 5, 6]), 0, perTrain=0.80) clf = train(featuresNew, bestParam) with open(modelName, 'wb') as fid: cPickle.dump(clf, fid) fo = open(modelName + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mtStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stWin, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(stStep, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(computeBEAT, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, classifier_type, model_name, compute_beat=False, perTrain=0.90, feats=["gfcc", "mfcc"]): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. ARGUMENTS: list_of_dirs: list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files. mt_win, mt_step: mid-term window length and step st_win, st_step: short-term window and step classifier_type: "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees" model_name: name of the model to be saved RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' # STEP A: Feature Extraction: [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs, mt_win, mt_step, st_win, st_step, compute_beat=compute_beat, feats=feats) if len(features) == 0: print("trainSVM_feature ERROR: No data found in any input folder!") return n_feats = features[0].shape[1] feature_names = ["features" + str(d + 1) for d in range(n_feats)] writeTrainDataToARFF(model_name, features, classNames, feature_names) for i, f in enumerate(features): if len(f) == 0: print("trainSVM_feature ERROR: " + list_of_dirs[i] + " folder is empty or non-existing!") return # STEP B: classifier Evaluation and Parameter Selection: if classifier_type == "svm" or classifier_type == "svm_rbf": classifier_par = numpy.array([0.001, 0.01, 0.5, 1.0, 5.0, 10.0, 20.0]) elif classifier_type == "randomforest": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "knn": classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifier_type == "gradientboosting": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "extratrees": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "logisticregression": classifier_par = numpy.array([0.01, 0.1, 1, 5]) # get optimal classifeir parameter: features2 = [] for f in features: fTemp = [] for i in range(f.shape[0]): temp = f[i,:] if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()) : fTemp.append(temp.tolist()) else: print("NaN Found! Feature vector not used for training") features2.append(numpy.array(fTemp)) features = features2 bestParam = evaluateclassifier(features, classNames, 300, classifier_type, classifier_par, 0, perTrain) # Hier!!!! print("Selected params: {0:.5f}".format(bestParam)) C = len(classNames) [features_norm, MEAN, STD] = normalizeFeatures(features) # normalize features MEAN = MEAN.tolist() STD = STD.tolist() featuresNew = features_norm # STEP C: Save the classifier to file if classifier_type == "svm": classifier = trainSVM(featuresNew, bestParam) elif classifier_type == "svm_rbf": classifier = trainSVM_RBF(featuresNew, bestParam) elif classifier_type == "randomforest": classifier = trainRandomForest(featuresNew, bestParam) elif classifier_type == "gradientboosting": classifier = trainGradientBoosting(featuresNew, bestParam) elif classifier_type == "extratrees": classifier = trainExtraTrees(featuresNew, bestParam) elif classifier_type == "logisticregression": classifier = trainLogisticRegression(featuresNew, bestParam) if classifier_type == "knn": [X, Y] = listOfFeatures2Matrix(featuresNew) X = X.tolist() Y = Y.tolist() fo = open(model_name, "wb") cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close() elif classifier_type == "svm" or classifier_type == "svm_rbf" or \ classifier_type == "randomforest" or \ classifier_type == "gradientboosting" or \ classifier_type == "extratrees" or \ classifier_type == "logisticregression": with open(model_name, 'wb') as fid: cPickle.dump(classifier, fid) fo = open(model_name + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) fo.close()
def writeTrainDataToARFF(model_name, features, classNames): n_feats = AudioClassifierManager.getCountFeatures(features) feature_names = ["features" + str(d + 1) for d in range(n_feats)] aT.writeTrainDataToARFF(model_name, features, classNames, feature_names)