def MNB_test(x, y): print 'MNB test' train_x, train_y, test_x, test_y = pp.split_data_set(x, y) clf = MultinomialNB() crossvalidation(clf, x, y) clf.fit(train_x, train_y) rst = clf.predict(test_x) print calculate_RMSE(rst, test_y) print pp.print_mean_median(rst)
def MLR_test(x, y): print 'MLR test' train_x, train_y, test_x, test_y = pp.split_data_set(x, y) clf = LogisticRegression(solver='lbfgs', multi_class='multinomial', C=4) crossvalidation(clf, x, y) clf.fit(train_x, train_y) rst = clf.predict(test_x) print calculate_RMSE(rst, test_y) print pp.print_mean_median(rst)
def tree_test(x, y): print 'DecisionTree' train_x, train_y, test_x, test_y = pp.split_data_set(x, y) clf = tree.DecisionTreeClassifier() crossvalidation(clf, x, y) clf.fit(train_x, train_y) rst = clf.predict(test_x) train_rst = clf.predict(train_x) print calculate_RMSE(train_rst, train_y) print calculate_RMSE(rst, test_y) print pp.print_mean_median(rst)
def random_forest_test(x, y): print 'RandomForest', train_x, train_y, test_x, test_y = pp.split_data_set(x, y) clf = RandomForestClassifier() crossvalidation(clf, x, y) clf.fit(train_x, train_y) rst = clf.predict(test_x) print rst print test_y print calculate_RMSE(rst, test_y) print 'mean rst: ', pp.print_mean_median(rst)
def svm_test(x, y): train_x, train_y, test_x, test_y = pp.split_data_set(x, y) clf = svm.SVC(C=0.001, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma='auto', kernel='linear', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) print 'fitting: ' crossvalidation(clf, x, y) clf.fit(train_x, train_y) print 'predicting: ' rst = clf.predict(test_x) # print sum(rst == test_y) / len(test_y) * 1.0 print calculate_RMSE(rst, test_y) pp.print_mean_median(rst)