示例#1
0
 def saveClassifierModel(features,model_name,classifier_type,classifier,MEAN,STD,classNames,bestParam):
     if classifier_type == "knn":
         [X, Y] = aT.listOfFeatures2Matrix(features)
         X = X.tolist()
         Y = Y.tolist()
         fo = open(model_name, "wb")
         cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(Y, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(bestParam, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getMtWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getMtStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getStWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getStStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.__compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         fo.close()
     elif classifier_type == AudioClassifierManager.__svmModelName or classifier_type == AudioClassifierManager.__svmRbfModelName or \
                     classifier_type == AudioClassifierManager.__randomforestModelName or \
                     classifier_type == AudioClassifierManager.__gradientboostingModelName or \
                     classifier_type == AudioClassifierManager.__extratreesModelName:
         with open(model_name, 'wb') as fid:
             cPickle.dump(classifier, fid)
         fo = open(model_name + "MEANS", "wb")
         cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getMtWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getMtStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getStWin(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.getStStep(), fo, protocol=cPickle.HIGHEST_PROTOCOL)
         cPickle.dump(AudioClassifierManager.__compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
         fo.close()
示例#2
0
def train(featuresNew, param):
    [X, Y] = aT.listOfFeatures2Matrix(featuresNew)
    clf = MLPClassifier(solver='lbfgs',
                        alpha=1e-5,
                        hidden_layer_sizes=(5, param),
                        random_state=1)
    clf.fit(X, Y)
    return clf
示例#3
0
def train(files):
    #extract feature
    features, classes, filenames = aF.dirsWavFeatureExtraction(
        files, 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep)
    #normalize
    [featuresNorm, MEAN, STD] = aT.normalizeFeatures(features)
    [X, Y] = aT.listOfFeatures2Matrix(featuresNorm)
    #train using SVM
    clf = sklearn.svm.SVC(kernel='linear', probability=True)
    clf.fit(X, Y)
    return clf, MEAN, STD
def trainLogisticRegression(features, Cparam):
    '''
    Train a multi-class probabilitistic Logistic Regression classifier.
    Note:     This function is simply a wrapper to the sklearn functionality for logistic regression training
    ARGUMENTS:
        - features:         a list ([numOfClasses x 1]) whose elements containt numpy matrices of features
                            each matrix features[i] of class i is [n_samples x numOfDimensions]
        - Cparam:           Logistic Regression parameter C (Inverse of regularization strength)
    RETURNS:
        - lr:              the trained logistic regression variable
    NOTE:
        This function trains a Logistic Regression model for a given C value.
        For a different kernel, other types of parameters should be provided.
    '''

    [X, Y] = listOfFeatures2Matrix(features)
    lr = sklearn.linear_model.LogisticRegression(C=Cparam, multi_class="ovr")
    lr.fit(X,Y)

    return lr
def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step,
                    classifier_type, model_name,
                    compute_beat=False, perTrain=0.90, feats=["gfcc", "mfcc"]):
    '''
    This function is used as a wrapper to segment-based audio feature extraction and classifier training.
    ARGUMENTS:
        list_of_dirs:        list of paths of directories. Each directory contains a signle audio class whose samples are stored in seperate WAV files.
        mt_win, mt_step:        mid-term window length and step
        st_win, st_step:        short-term window and step
        classifier_type:        "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees"
        model_name:        name of the model to be saved
    RETURNS:
        None. Resulting classifier along with the respective model parameters are saved on files.
    '''

    # STEP A: Feature Extraction:
    [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs,
                                                            mt_win,
                                                            mt_step,
                                                            st_win,
                                                            st_step,
                                                            compute_beat=compute_beat,
                                                            feats=feats)

    if len(features) == 0:
        print("trainSVM_feature ERROR: No data found in any input folder!")
        return

    n_feats = features[0].shape[1]
    feature_names = ["features" + str(d + 1) for d in range(n_feats)]

    writeTrainDataToARFF(model_name, features, classNames, feature_names)

    for i, f in enumerate(features):
        if len(f) == 0:
            print("trainSVM_feature ERROR: " + list_of_dirs[i] + " folder is empty or non-existing!")
            return

    # STEP B: classifier Evaluation and Parameter Selection:
    if classifier_type == "svm" or classifier_type == "svm_rbf":
        classifier_par = numpy.array([0.001, 0.01,  0.5, 1.0, 5.0, 10.0, 20.0])
    elif classifier_type == "randomforest":
        classifier_par = numpy.array([10, 25, 50, 100,200,500])
    elif classifier_type == "knn":
        classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15])
    elif classifier_type == "gradientboosting":
        classifier_par = numpy.array([10, 25, 50, 100,200,500])
    elif classifier_type == "extratrees":
        classifier_par = numpy.array([10, 25, 50, 100,200,500])
    elif classifier_type == "logisticregression":
        classifier_par = numpy.array([0.01, 0.1, 1, 5])

    # get optimal classifeir parameter:
    features2 = []
    for f in features:
        fTemp = []
        for i in range(f.shape[0]):
            temp = f[i,:]
            if (not numpy.isnan(temp).any()) and (not numpy.isinf(temp).any()) :
                fTemp.append(temp.tolist())
            else:
                print("NaN Found! Feature vector not used for training")
        features2.append(numpy.array(fTemp))
    features = features2

    bestParam = evaluateclassifier(features, classNames, 300, classifier_type, classifier_par, 0, perTrain) # Hier!!!!

    print("Selected params: {0:.5f}".format(bestParam))

    C = len(classNames)
    [features_norm, MEAN, STD] = normalizeFeatures(features)        # normalize features
    MEAN = MEAN.tolist()
    STD = STD.tolist()
    featuresNew = features_norm

    # STEP C: Save the classifier to file
    if classifier_type == "svm":
        classifier = trainSVM(featuresNew, bestParam)
    elif classifier_type == "svm_rbf":
        classifier = trainSVM_RBF(featuresNew, bestParam)
    elif classifier_type == "randomforest":
        classifier = trainRandomForest(featuresNew, bestParam)
    elif classifier_type == "gradientboosting":
        classifier = trainGradientBoosting(featuresNew, bestParam)
    elif classifier_type == "extratrees":
        classifier = trainExtraTrees(featuresNew, bestParam)
    elif classifier_type == "logisticregression":
        classifier = trainLogisticRegression(featuresNew, bestParam)


    if classifier_type == "knn":
        [X, Y] = listOfFeatures2Matrix(featuresNew)
        X = X.tolist()
        Y = Y.tolist()
        fo = open(model_name, "wb")
        cPickle.dump(X, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(Y,  fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD,  fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames,  fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(bestParam,  fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
    elif classifier_type == "svm" or classifier_type == "svm_rbf" or \
                    classifier_type == "randomforest" or \
                    classifier_type == "gradientboosting" or \
                    classifier_type == "extratrees" or \
                    classifier_type == "logisticregression":
        with open(model_name, 'wb') as fid:
            cPickle.dump(classifier, fid)
        fo = open(model_name + "MEANS", "wb")
        cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(classNames, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(mt_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL)
        fo.close()
示例#6
0
#dirs= ["C:\\Users\\zacha\\PycharmProjects\\untitled\\samples"]
data = []
ids = []
for i in xrange(0, len(dirs)):  # Iterate through each test directory
    dir = dirs[i]
    os.chdir(dir)
    for file in glob.glob("*.npy"):
        features = numpy.load(file)
        ids.append(i)
        temp = []
        for f in features:
            temp.append(f[0])
        data.append(temp)

data = numpy.array(data)
[X, Y] = listOfFeatures2Matrix(data)
kmeans = KMeans(n_clusters=len(dirs)).fit(X, Y)
zero = [0, 0, 0]
one = [0, 0, 0]
two = [0, 0, 0]
#pickle.dump(kmeans, 'birds.km')
assert len(ids) == len(kmeans.labels_)
assert len(ids) == 180
for i in xrange(0, len(ids)):
    if ids[i] == 0:
        zero[kmeans.labels_[i]] += 1
    elif ids[i] == 1:
        one[kmeans.labels_[i]] += 1
    elif ids[i] == 2:
        two[kmeans.labels_[i]] += 1
    else: