Пример #1
0
def runRegressionModelTest(featureSet, valueVector, model):
    output = ''
    clf = 0
    if model == 1:
        print "\nLINEAR REGRESSION\n"
        clf = linear_regression_fit(featureSet, valueVector)
    elif model == 2:
        print "\nSVR\n"
        clf = SVR_fit(featureSet, valueVector)
    elif model == 4:
        print "\nSTOCHASTIC\n"
        clf = SGD_r_fit(featureSet, valueVector)
        joblib.dump(clf, 'sgd.pkl')
    elif model == 5:
        print "\nNEIGHBOURS\n"
        clf = neighbours_fit(featureSet, valueVector)
    elif model == 6:
        print "\nLOGISTIC\n"
        clf = log_regression_fit(featureSet, valueVector)
    elif model == 7:
        print "\nBAYESIANRIDGE\n"
        clf = bayesian_ridge_fit(featureSet, valueVector)
    elif model == 8:
        print "\nRIDGE\n"
        clf = ridge_fit(featureSet, valueVector)
    elif model == 9:
        print "\nELASTIC NET\n"
        clf = elastic_fit(featureSet, valueVector)
    elif model == 10:
        print "\nLASSO\n"
        clf = lasso_fit(featureSet, valueVector)
    else:
        print 'Invalid choice\n'

    return clf
Пример #2
0
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model):
    output = ''
    score = 0
    clf = 0
    if model == 1:
        output += "\nLINEAR REGRESSION\n"
        clf = linear_regression_fit(featureSet, valueVector)
    elif model == 2:
        output += "\nSVR\n"
        clf = SVR_fit(featureSet, valueVector)
    elif model == 3:
        output += "\nEXTREME LEARNING MACHINE\n"
        clf = elm.ELMRegressor()
        clf.fit(featureSet, valueVector)
        joblib.dump(clf, 'elm.pkl')
    elif model == 4:
        output += "\nSTOCHASTIC\n"
        clf = SGD_r_fit(featureSet, valueVector)
        joblib.dump(clf, 'sgd.pkl')
    elif model == 5:
        output += "\nNEIGHBOURS\n"
        clf = neighbours_fit(featureSet, valueVector)
    elif model == 6:
        output += "\nLOGISTIC\n"
        clf = log_regression_fit(featureSet, valueVector)
    elif model == 7:
        output += "\nBAYESIANRIDGE\n"
        clf = bayesian_ridge_fit(featureSet, valueVector)
    else:
        output += 'Invalid choice\n'

    score = mean_squared_error(y_test, clf.predict(X_test))
    score2 = r2_score(y_test, clf.predict(X_test))
    cv = cross_validation.ShuffleSplit(featureSet.shape[0],
                                       n_iter=50,
                                       test_size=0.25,
                                       random_state=0)
    a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv)
    a = a[a > 0]
    output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n'
    output += ('Mean Score: %.3f\n' % np.mean(a))
    output += ('Mean Squared Error: %.3f\n' % score)
    output += ('R^2: %.3f\n' % score2)

    return output
Пример #3
0
def runRegressionModelTest(featureSet, valueVector, X_test, y_test, model):    
    output = ''
    score = 0
    clf = 0
    if model == 1:
        output += "\nLINEAR REGRESSION\n"
        clf = linear_regression_fit(featureSet, valueVector)
    elif model == 2:
        output += "\nSVR\n"
        clf = SVR_fit(featureSet, valueVector)
    elif model == 3:
        output += "\nEXTREME LEARNING MACHINE\n"
        clf = elm.ELMRegressor()
        clf.fit(featureSet, valueVector)
        joblib.dump(clf, 'elm.pkl')
    elif model == 4:
        output += "\nSTOCHASTIC\n"
        clf = SGD_r_fit(featureSet, valueVector)
        joblib.dump(clf, 'sgd.pkl')
    elif model == 5:        
        output += "\nNEIGHBOURS\n"
        clf = neighbours_fit(featureSet, valueVector)
    elif model == 6:        
        output += "\nLOGISTIC\n"
        clf = log_regression_fit(featureSet, valueVector)
    elif model == 7:        
        output += "\nBAYESIANRIDGE\n"
        clf = bayesian_ridge_fit(featureSet, valueVector)
    else :
        output += 'Invalid choice\n'
    
    score = mean_squared_error(y_test, clf.predict(X_test))
    score2 = r2_score(y_test, clf.predict(X_test))
    cv = cross_validation.ShuffleSplit(featureSet.shape[0], n_iter=50,test_size=0.25,random_state=0)
    a = cross_validation.cross_val_score(clf, featureSet, valueVector, cv=cv)
    a = a[a > 0]
    output += 'Cross V score: :' +  ' '.join("%10.3f" % x for x in a) + '\n'
    output += ('Mean Score: %.3f\n' % np.mean(a))
    output += ('Mean Squared Error: %.3f\n' % score)
    output += ('R^2: %.3f\n' % score2)
    
    return output 
Пример #4
0
def runClassificationTest(X, y, Xt, yt, model, labs):
    output = ''
    clf = 0
    
    
    if model == 1:
        output += "\nSVC\n"
        clf = svc_fit(X, y)
    elif model == 2:
        output += '\nLinearSVC\n'
        clf = linear_svc_fit(X, y)
    elif model == 3:
        output += '\nStochasticGradientDescent\n'
        clf = SGD_c_fit(X, y)  
    elif model == 4:
        output += '\nKNearestNeighbours\n'
        clf = nearest_fit(X, y)
    elif model == 5:
        output += '\nRandomForest\n'
        clf = random_forest_fit(X, y)
    elif model == 6:
        output += '\nLogistic\n'
        clf = log_regression_fit(X, y)
        
        
    
    accuracy = accuracy_score(yt, clf.predict(Xt))
    f1 = f1_score(yt, clf.predict(Xt), labels=labs)
    
    
    cv = cross_validation.ShuffleSplit(X.shape[0], n_iter=50,test_size=0.3,random_state=0)
    a = cross_validation.cross_val_score(clf, X, y, cv=cv)
    a = a[a > 0]
    output += 'Cross V score: :' +  ' '.join("%10.3f" % x for x in a) + '\n'
    output += "\n\nAccuracy " + str(accuracy)
    output += "\nF1 Score " + str(f1)
    

    return clf, output;
Пример #5
0
def runClassificationTest(X, y, Xt, yt, model, labs):
    output = ''
    clf = 0

    if model == 1:
        output += "\nSVC\n"
        clf = svc_fit(X, y)
    elif model == 2:
        output += '\nLinearSVC\n'
        clf = linear_svc_fit(X, y)
    elif model == 3:
        output += '\nStochasticGradientDescent\n'
        clf = SGD_c_fit(X, y)
    elif model == 4:
        output += '\nKNearestNeighbours\n'
        clf = nearest_fit(X, y)
    elif model == 5:
        output += '\nRandomForest\n'
        clf = random_forest_fit(X, y)
    elif model == 6:
        output += '\nLogistic\n'
        clf = log_regression_fit(X, y)

    accuracy = accuracy_score(yt, clf.predict(Xt))
    f1 = f1_score(yt, clf.predict(Xt), labels=labs)

    cv = cross_validation.ShuffleSplit(X.shape[0],
                                       n_iter=50,
                                       test_size=0.3,
                                       random_state=0)
    a = cross_validation.cross_val_score(clf, X, y, cv=cv)
    a = a[a > 0]
    output += 'Cross V score: :' + ' '.join("%10.3f" % x for x in a) + '\n'
    output += "\n\nAccuracy " + str(accuracy)
    output += "\nF1 Score " + str(f1)

    return clf, output
Пример #6
0
def train_classifier(X, y, model, featureset, data_source):
    kernel = 'rbf'

    parameters = np.zeros([21, 3])
    if data_source == 1:
        parameters[0] = [10000, 0.001, 10000]  # Manual
        parameters[1] = [10, 0.01, 0.1]  # Unigram
        parameters[2] = [10, 0.001, 0.1]
        parameters[3] = [10, 0.1, 1]
        parameters[4] = [100000, 0.001, 0.1]  # Bigram
        parameters[5] = [100, 0.1, 10]
        parameters[6] = [100, 0.001, 0.1]  # Trigram
        parameters[7] = [10, 0.1, 10000]
        parameters[8] = [1000, 0.001, 1]  # Bigram only
        parameters[9] = [10, 0.1, 100]
        parameters[10] = [0.01, 0.1, 1]  # Trigram only
        parameters[11] = [0.01, 10, 100]
        parameters[12] = [10, 0.0001, 0.001]  # Character Ngram
        parameters[13] = [10, 1, 10]
        parameters[14] = [10, 0.001, 0.01]  # Character Skipgram
        parameters[15] = [10000, 0.0001, 1]
        parameters[16] = [1000000, 10.0, 0.001]  # LDA
        parameters[17] = [10000, 0.0001, 1]  # Word2Vec TFIDF
        parameters[18] = [10000000, 0.0001, 10000]  # Word2Vec BOC
        parameters[19] = [1, 0.01, 10]  # Doc2Vec
        parameters[20] = [10, 0.001, 0.01]  # Google Word2vec TFIDF
    elif data_source == 2:  # TOY
        parameters[0] = [0.1, 0.1, 10]  # Manual
        parameters[1] = [100, 0.01, 1]  # Unigram
        parameters[2] = [100, 0.01, 1]
        parameters[3] = [10, 1, 10]
        parameters[4] = [10, 0.01, 0.1]  # Bigram
        parameters[5] = [100, 0.1, 1000]
        parameters[6] = [10, 0.01, 10]  # Trigram
        parameters[7] = [10, 0.1, 10000]
        parameters[8] = [1000, 0.1, 1]  # Bigram only
        parameters[9] = [10000, 0.1, 10000]
        parameters[10] = [10, 0.1, 1]  # Trigram only
        parameters[11] = [10, 10, 1000]
        parameters[12] = [10, 0.001, 0.1]  # Character Ngram
        parameters[13] = [100, 0.1, 100]
        parameters[14] = [100, 0.001, 0.1]  # Character Skipgram
        parameters[15] = [10, 1, 10]
        parameters[16] = [10000000, 1, 1]  # LDA
        parameters[17] = [10, 0.01, 0.01]  # Word2Vec TFIDF
        parameters[18] = [1000, 0.0001, 0.1]  # Word2Vec BOC
        parameters[19] = [100, 0.0001, 0.01]  # Doc2Vec
        parameters[20] = [10, 0.001, 1]  # Google Word2vec TFIDF
    elif data_source == 3:  # SLASHDOT
        parameters[0] = [1000000, 0.0001, 1000]  # Manual
        parameters[1] = [1000, 0.1, 1000]  # Unigram
        parameters[2] = [1000, 0.1, 1]
        parameters[3] = [1000, 1, 100]
        parameters[4] = [100, 0.001, 1000]  # Bigram
        parameters[5] = [1, 1, 10000]
        parameters[6] = [100, 0.001, 10000]  # Trigram
        parameters[7] = [1, 0.1, 10000]
        parameters[8] = [1000, 0.001, 1]  # Bigram only
        parameters[9] = [0.1, 1, 100]
        parameters[10] = [10, 0.1, 1]  # Trigram only
        parameters[11] = [100, 1, 1000]
        parameters[12] = [1000, 0.001, 0.01]  # Character Ngram
        parameters[13] = [10, 1, 100]
        parameters[14] = [10, 0.001, 0.1]  # Character Skipgram
        parameters[15] = [10, 1, 10]
        parameters[16] = [1000000, 0.01, 10]  # LDA
        parameters[17] = [10, 0.01, 100]  # Word2Vec TFIDF
        parameters[18] = [1, 0.001, 0.001]  # Word2Vec BOC
        parameters[19] = [1000000, 0.0001, 1000]  # Doc2Vec
        parameters[20] = [10000000, 0.0001, 10000]  # Google Word2vec TFIDF

    C = parameters[featureset][0]
    gamma = parameters[featureset][1]
    Lc = parameters[featureset][2]

    if model == 1:
        print "\nSVC\n"
        clf = svc_fit(X, y, kernel=kernel, C=C, gamma=gamma)
    elif model == 2:
        print '\nLinearSVC\n'
        clf = linear_svc_fit(X, y, C=Lc)
    elif model == 3:
        print '\nStochasticGradientDescent\n'
        clf = SGD_c_fit(X, y)
    elif model == 4:
        print '\nKNearestNeighbours\n'
        clf = nearest_fit(X, y)
    elif model == 5:
        print '\nRandomForest\n'
        clf = random_forest_fit(X, y)
    elif model == 6:
        print '\nLogistic\n'
        clf = log_regression_fit(X, y)

    return clf