Exemplo n.º 1
0
def get_model(classifier_name,
              training_data,
              target_data,
              model_settings=None):
    """
    Function returns a trained model based on the csv file and features chosen.
    Creation and training of models has been separated into their own file:
    models.py

    Arguments:
        training_data:      the data the training is performed on
        target_data:        the targets for supervised learning
        model_settings:     dict of settings for the classifier

    Returns:
        model:              the trained model

    """

    model = ((models.linreg(training_data, target_data)) if
             (classifier_name == "linreg") else
             (models.svm(training_data, target_data, model_settings)) if
             (classifier_name == "svm") else
             (models.cart(training_data, target_data)) if
             (classifier_name == "cart") else
             (models.knn(training_data, target_data, model_settings)) if
             (classifier_name == "knn") else
             (models.lda(training_data, target_data, model_settings)) if
             (classifier_name == "lda") else
             (models.nb(training_data, target_data)) if
             (classifier_name == "nb") else
             (models.lr(training_data, target_data)) if
             (classifier_name == "lr") else None)

    return model
Exemplo n.º 2
0
def main(argv):
    param_fpath = sys.argv[1]
    # prepare training data
    X, y = get_train_data(param_fpath)  # X: feature list  y: label list

    # train
    print '\nBegin Running Logistic Regression...'
    lr_acc = lr(X, y)
    print "LR Accuracy: " + str(lr_acc)

    print '\nBegin Running KNN...'
    knn_acc = knn(X, y)
    print "KNN Accuracy: " + str(knn_acc)

    print '\nBegin Running GNB...'
    gnb_acc = gnb(X, y)
    print "GNB Accuracy: " + str(gnb_acc)

    print '\nBegin Running MNB...'
    mnb_acc = mnb(X, y)
    print "MNB Accuracy: " + str(mnb_acc)

    print '\nBegin Running BNB...'
    bnb_acc = bnb(X, y)
    print "BNB Accuracy: " + str(bnb_acc)

    print '\nBegin Running Decision Tree...'
    dtree_acc = dtree(X, y)
    print "Decision Tree Accuracy: " + str(dtree_acc)

    print '\nBegin Running SVM...'
    svm_acc = svm(X, y)
    print "SVM Accuracy: " + str(svm_acc)
def training_KFold(index, data, name):
    _min = sys.maxsize
    _model = None
    for train, validation in index:
        train_set = data[train]
        validation_set = data[validation]
        training_X, training_y = models.features_labels(train_set)
        validation_X, validation_y = models.features_labels(validation_set)
        if name == 'gbr':
            model, error = models.gbr(training_X, training_y, validation_X,
                                      validation_y)
        if name == 'svm':
            model, error = models.svm(training_X, training_y, validation_X,
                                      validation_y)
        if name == 'rfr':
            model, error = models.rfr(training_X, training_y, validation_X,
                                      validation_y)
        if name == 'mlr':
            model, error = models.mlr(training_X, training_y, validation_X,
                                      validation_y)
        if _min > error:
            _min = error
            _model = model
            print("update model")
        print(error)
        print()
    return _model
def runAllModels(X_train,X_test,y_test,y_train):
    y_pred= xgb(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = rf(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = nn(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
    y_pred = svm(X_train,y_train,X_test)
    plotter(y_test,y_pred)
    cm,fscore,a=evaluate(y_test,y_pred)
def runModels(testdata, testlabel):
    res = models.knn(testdata, testlabel)
    print("knn: ", res.mean())

    res = models.svm(testdata, testlabel)
    print("svm: ", res.mean())

    res = models.decisionTree(testdata, testlabel)
    print("decision tree: ", res.mean())

    res = models.naiveBayes(testdata, testlabel)
    print("Gaussian NB: ", res.mean())

    res = models.randomForest(testdata, testlabel)
    print("random forest: ", res.mean())

    res = models.nearestCentroid(testdata, testlabel)
    print("nearest centroid: ", res.mean())

    res = models.extraTree(testdata, testlabel)
    print("extra tree: ", res.mean())

    res = models.extraTrees(testdata, testlabel)
    print("extra trees: ", res.mean())
def training(data, name, arg):
    training_set = data[:3 * len(data) // 4]
    validation_set = data[3 * len(data) // 4:]
    training_X, training_y = models.features_labels(training_set)
    validation_X, validation_y = models.features_labels(validation_set)
    if name == 'gbr':
        model, error = models.gbr(training_X,
                                  training_y,
                                  validation_X,
                                  validation_y,
                                  n_estimators=10,
                                  min_samples_split=arg)
    if name == 'svm':
        model, error = models.svm(training_X, training_y, validation_X,
                                  validation_y)
    if name == 'rfr':
        model, error = models.rfr(training_X, training_y, validation_X,
                                  validation_y)
    if name == 'mlr':
        model, error = models.mlr(training_X, training_y, validation_X,
                                  validation_y)
    print(error)
    print()
    return model
        training_file, development_file)
    print("Word length training: " + str(training_performance))
    print("Word length development: " + str(development_performance))

    frequency_training_performance, frequency_development_performance = \
        word_frequency_threshold(training_file, development_file, counts)
    print("Word frequency training: " + str(frequency_training_performance))
    print("Word frequency development: " +
          str(frequency_development_performance))

    nb_development_performance = naive_bayes(training_file, development_file,
                                             counts)
    print("Naive bayes development: " + str(nb_development_performance))

    lr_development_performance = logistic_regression(training_file,
                                                     development_file, counts)
    print("Logistic Regression development: " +
          str(lr_development_performance))

    svm_development_performance = models.svm(training_file, development_file,
                                             test_file, counts)
    print("Support vector machine development: " +
          str(svm_development_performance))

    random_forest_development_performance = models.random_forest(
        training_file, development_file, test_file, counts)
    print("Random forest development: " +
          str(random_forest_development_performance))

    # tune_parameter(training_file, development_file, counts)
Exemplo n.º 8
0
import pandas as pd
import preprocessingfile as preprocess
import models

data = 'pc2.csv'
original_data, original_X, original_Y, combined_training_data, x_train1, x_train2, x_train, x_test, x_val, y_train1, y_train2, y_train, y_test, y_val = preprocess.my_sdp_preprocessor(
    data)
all_data = [
    original_data, original_X, original_Y, combined_training_data, x_train1,
    x_train2, x_train, x_test, x_val, y_train1, y_train2, y_train, y_test,
    y_val
]

cnn_clf = models.cnn(*all_data)
svm_clf = models.svm(*all_data)
rf_clf = models.random_forest(*all_data)
nn_clf = models.NN(*all_data)

from sklearn.metrics import *


def print_accuracy(model):  #nn,cnn,svm,clf
    if (model == nn_clf):
        y_pred_on_val = model.predict(x_val) > 0.5
        y_pred_on_test = model.predict(x_test) > 0.5
    elif (model == cnn_clf):
        x_val_matrix = x_val.values
        x_val1 = x_val_matrix.reshape(x_val_matrix.shape[0], 1,
                                      len(x_val.columns), 1)
        y_pred_on_val = model.predict(x_val1) > 0.5