Exemplo n.º 1
0
def classify(features, results, test_features, test_results, C, gamma):
    cli = "%s@%s" %(C, gamma) 
    st = time.time()
    log.info("Classifier begins")
    classifier = SVC(C=C, gamma=gamma, kernel="rbf")
    classifier.fit(features, results)
    st2 = time.time()
    prediction = classifier.predict(test_features)
    log.info("id: %s Training time: %s, Prediction time: %s" %(cli, st2-st, time.time()-st2) )
    error = 0
    for index, value in enumerate(prediction):
        if test_results[index] != value:
            error += 1
    return (error/float(len(test_results))) * 100
Exemplo n.º 2
0
def classify(train_file, test_file):
    """
    Train a model and test

    train_file: file that the model is trained on
    test_file: file that is used to test the model
    """
    X_train, y_train = load_svmlight_file(train_file)
    X_test, y_test = load_svmlight_file(test_file, X_train.shape[1])
    # X_train = X_train.todense()
    # X_test = X_test.todense()
    clf = SparseSVC(kernel="linear", C=0.2)
    # clf = LogisticRegression(C=1.0, penalty='l1', tol=1e-6)
    clf.fit(X_train, y_train)
    y_predict = clf.predict(X_test)
    print sklearn.metrics.classification_report(y_test, y_predict)
    print sklearn.metrics.confusion_matrix(y_test, y_predict)
Exemplo n.º 3
0
    # X_den_train, X_den_test = X_den[train_index], X_den[test_index]

    # feed models
    clf_mNB.fit(X_train_train, y_train_train)
    clf_kNN.fit(X_train_train, y_train_train)
    clf_ridge.fit(X_train_train, y_train_train)
    clf_lSVC.fit(X_train_train, y_train_train)
    clf_SVC.fit(X_train_train, y_train_train)

    # get prediction for this fold run
    pred_mNB    = clf_mNB.predict(X_train_test)
    pred_kNN    = clf_kNN.predict(X_train_test)
    pred_ridge  = clf_ridge.predict(X_train_test)
    pred_lSVC   = clf_lSVC.predict(X_train_test)
    pred_SVC    = clf_SVC.predict(X_train_test)

    # update z array for each model
    z_mNB   = np.append(z_mNB    , pred_mNB  , axis=None)
    z_kNN   = np.append(z_kNN    , pred_kNN  , axis=None)
    z_ridge = np.append(z_ridge  , pred_ridge, axis=None)
    z_lSVC  = np.append(z_lSVC   , pred_lSVC , axis=None)
    z_SVC   = np.append(z_SVC    , pred_SVC  , axis=None)


# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
z = np.array([z_mNB, z_kNN, z_ridge, z_lSVC, z_SVC], dtype=np.int32)
z = z.transpose()
    X_den_train, X_den_test = X_den[train_index], X_den[test_index]

    # feed models
    clf_mNB.fit(X_train, y_train)
    clf_kNN.fit(X_train, y_train)
    clf_ridge.fit(X_train, y_train)
    clf_lSVC.fit(X_train, y_train)
    clf_SVC.fit(X_train, y_train)

    # get prediction for this fold run
    pred_mNB    = clf_mNB.predict(X_test)
    pred_kNN    = clf_kNN.predict(X_test)
    pred_ridge  = clf_ridge.predict(X_test)
    pred_lSVC   = clf_lSVC.predict(X_test)
    pred_SVC    = clf_SVC.predict(X_test)

    # update z array for each model
    z_mNB   = np.append(z_mNB    , pred_mNB  , axis=None)
    z_kNN   = np.append(z_kNN    , pred_kNN  , axis=None)
    z_ridge = np.append(z_ridge  , pred_ridge, axis=None)
    z_lSVC  = np.append(z_lSVC   , pred_lSVC , axis=None)
    z_SVC   = np.append(z_SVC    , pred_SVC  , axis=None)


# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
z = np.array([z_mNB, z_kNN, z_ridge, z_lSVC, z_SVC], dtype=np.int32)
z = z.transpose()
        # Initialize variables for couting the average
        f1_all = []
        f5_all = []
        acc_all = []
        pre_all = []
        rec_all = []

        # Test for 10 rounds using the results from 10 fold cross validations
        for train_index, test_index in kf:

            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            # fit and predict
            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)

            # print y_test
            # print pred
            # print type(pred)

            # output tree into graph
            # out = StringIO()
            # out = export_graphviz(clf, out_file=out)

            # metrics

            # # Original
            f1_score = metrics.f1_score(y_test, pred)
            f5_score = metrics.fbeta_score(y_test, pred, beta=0.5)
            acc_score = metrics.zero_one_score(y_test, pred)