Пример #1
0
def classifyLR(train, test):
    classifier.fit(train, twenty_train.target)
    predicted = classifier.predict(test)
    predicted_probs = classifier.predict_proba(test)
    hlp.getStats(twenty_test.target, predicted)
    hlp.plot_roc(twenty_test.target, predicted_probs[:, 1],
                 'Logistic Regression')
Пример #2
0
def classifyWithSVC(valC):
    clf = svm.SVC(C=valC, probability=True, kernel='linear', random_state=42)

    svdListTrain = td.getsvdListTrain()
    nmfListTrain = td.getnmfListTrain()
    svdListTest = td.getsvdListTest()
    nmfListTest = td.getnmfListTest()

    for min_df in [2,5]:
        print ".......... With min_df = ", min_df , "..........."
        if min_df == 2:
            svd_matrix_train=svdListTrain[0]
            nmf_matrix_train=nmfListTrain[0]
            svd_matrix_test=svdListTest[0]
            nmf_matrix_test=nmfListTest[0]
        else: 
            svd_matrix_train=svdListTrain[1]
            nmf_matrix_train=nmfListTrain[1]
            svd_matrix_test=svdListTest[1]
            nmf_matrix_test=nmfListTest[1]        

        print "With SVD"
        clf.fit(svd_matrix_train, twenty_train.target)
        predicted = clf.predict(svd_matrix_test)
        probabilities = clf.predict_proba(svd_matrix_test)
        hlp.getStats(twenty_test.target, predicted)
        hlp.plot_roc(twenty_test.target, probabilities[:,1], 'SVM')

        print "With NMF"
        clf.fit(nmf_matrix_train, twenty_train.target)
        predicted = clf.predict(nmf_matrix_test)
        probabilitiesnmf = clf.predict_proba(nmf_matrix_test)
        hlp.getStats(twenty_test.target, predicted)
        hlp.plot_roc(twenty_test.target, probabilitiesnmf[:,1], 'SVM')
Пример #3
0
nmfListTest = td.getnmfListTest()

classifier = MultinomialNB()

for min_df in [2, 5]:
    print "WIth min_df = ", min_df
    if min_df == 2:
        nmf_matrix_train = nmfListTrain[0]
        nmf_matrix_test = nmfListTest[0]
        tfidf_matrix_train = tfidfListTrain[0]
        tfidf_matrix_test = tfidfListTest[0]
    else:
        nmf_matrix_train = nmfListTrain[1]
        nmf_matrix_test = nmfListTest[1]
        tfidf_matrix_train = tfidfListTrain[1]
        tfidf_matrix_test = tfidfListTest[1]

    print ".......... With SVD ........."
    classifier.fit(tfidf_matrix_train, twenty_train.target)
    predicted = classifier.predict(tfidf_matrix_test)
    probabilities = classifier.predict_proba(tfidf_matrix_test)
    hlp.getStats(twenty_test.target, predicted)
    hlp.plot_roc(twenty_test.target, probabilities[:, 1], 'MultinomialNB')

    print ".......... With NMF .........."
    classifier.fit(nmf_matrix_train, twenty_train.target)
    predicted = classifier.predict(nmf_matrix_test)
    probabilities = classifier.predict_proba(nmf_matrix_test)
    hlp.getStats(twenty_test.target, predicted)
    hlp.plot_roc(twenty_test.target, probabilities[:, 1], 'MultinomialNB')
Пример #4
0
def classifyLR_WithReg(train, test):
    l1_hyperPlaneParameters = []
    l2_hyperPlaneParameters = []

    l1_testErrors = []
    l2_testErrors = []

    l1_accu = []
    l2_accu = []

    l1_p = []
    l2_p = []

    l1_r = []
    l2_r = []

    for coeff in coeff_list:
        print "************** ", coeff, "****************"
        l1_classifier = LogisticRegression(penalty='l1',
                                           C=coeff,
                                           solver='liblinear')
        l2_classifier = LogisticRegression(penalty='l2',
                                           C=coeff,
                                           solver='liblinear')

        l1_classifier.fit(train, twenty_train.target)
        l2_classifier.fit(train, twenty_train.target)

        l1_predictions = l1_classifier.predict(test)
        l2_predictions = l2_classifier.predict(test)

        l1_predicted_probs = l1_classifier.predict_proba(test)
        l2_predicted_probs = l2_classifier.predict_proba(test)
        hlp.plot_roc(twenty_test.target, l1_predicted_probs[:, 1],
                     'LR with L1 regularization')
        hlp.plot_roc(twenty_test.target, l2_predicted_probs[:, 1],
                     'LR with L2 regularization')

        l1_testErrors.append(
            100 -
            smet.accuracy_score(twenty_test.target, l1_predictions) * 100)
        l2_testErrors.append(
            100 -
            smet.accuracy_score(twenty_test.target, l2_predictions) * 100)

        l1_accu.append(
            smet.accuracy_score(twenty_test.target, l1_predictions) * 100)
        l2_accu.append(
            smet.accuracy_score(twenty_test.target, l2_predictions) * 100)

        l1_p.append(
            smet.precision_score(
                twenty_test.target, l1_predictions, average='macro') * 100)
        l2_p.append(
            smet.precision_score(
                twenty_test.target, l2_predictions, average='macro') * 100)

        l1_r.append(
            smet.recall_score(
                twenty_test.target, l1_predictions, average='macro') * 100)
        l2_r.append(
            smet.recall_score(
                twenty_test.target, l2_predictions, average='macro') * 100)

        l1_hyperPlaneParameters.append(np.mean(l1_classifier.coef_))
        l2_hyperPlaneParameters.append(np.mean(l2_classifier.coef_))

    plotGraph(l1_testErrors, "l1")
    plotGraph(l2_testErrors, "l2")
    index = 0

    for coeff in coeff_list:
        print "coeff=", coeff
        print "Test error l1=", l1_testErrors[index]
        print "Mean of coeff l1=", l1_hyperPlaneParameters[index]
        print "Accuracy l1=", l1_accu[index]
        print "Precision l1=", l1_p[index]
        print "Recall l1=", l1_r[index]

        print "coeff=", coeff
        print "Test error l2=", l2_testErrors[index]
        print "Mean of coeff l2=", l2_hyperPlaneParameters[index]
        print "Accuracy l2=", l2_accu[index]
        print "Precision l2=", l2_p[index]
        print "Recall l2=", l2_r[index]
        index += 1
Пример #5
0
for min_df in [2, 5]:
    print "Calculating for min_df = ", min_df
    if min_df == 2:
        svd_matrix_train = svdListTrain[0]
        svd_matrix_test = svdListTest[0]
        nmf_matrix_train = nmfListTrain[0]
        nmf_matrix_test = nmfListTest[0]
    else:
        svd_matrix_train = svdListTrain[1]
        svd_matrix_test = svdListTest[1]
        nmf_matrix_train = nmfListTrain[1]
        nmf_matrix_test = nmfListTest[1]

    print "************With SVD***********"
    clf_5fold_svd = callClassifier(svd_matrix_train)
    clf_5fold_svd.fit(svd_matrix_train, twenty_train.target)
    predicted = clf_5fold_svd.predict(svd_matrix_test)
    probabilities = clf_5fold_svd.predict_proba(svd_matrix_test)
    hlp.getStats(twenty_test.target, predicted)
    hlp.plot_roc(twenty_test.target, probabilities[:, 1],
                 'SVM with cross-validation')

    print "************With NMF***********"
    clf_5fold_nmf = callClassifier(nmf_matrix_train)
    clf_5fold_nmf.fit(nmf_matrix_train, twenty_train.target)
    predictednmf = clf_5fold_nmf.predict(nmf_matrix_test)
    probabilitiesnmf = clf_5fold_nmf.predict_proba(nmf_matrix_test)
    hlp.getStats(twenty_test.target, predictednmf)
    hlp.plot_roc(twenty_test.target, probabilitiesnmf[:, 1],
                 'SVM with cross-validation')