def classifyLR(train, test): classifier.fit(train, twenty_train.target) predicted = classifier.predict(test) predicted_probs = classifier.predict_proba(test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, predicted_probs[:, 1], 'Logistic Regression')
def classifyWithSVC(valC): clf = svm.SVC(C=valC, probability=True, kernel='linear', random_state=42) svdListTrain = td.getsvdListTrain() nmfListTrain = td.getnmfListTrain() svdListTest = td.getsvdListTest() nmfListTest = td.getnmfListTest() for min_df in [2,5]: print ".......... With min_df = ", min_df , "..........." if min_df == 2: svd_matrix_train=svdListTrain[0] nmf_matrix_train=nmfListTrain[0] svd_matrix_test=svdListTest[0] nmf_matrix_test=nmfListTest[0] else: svd_matrix_train=svdListTrain[1] nmf_matrix_train=nmfListTrain[1] svd_matrix_test=svdListTest[1] nmf_matrix_test=nmfListTest[1] print "With SVD" clf.fit(svd_matrix_train, twenty_train.target) predicted = clf.predict(svd_matrix_test) probabilities = clf.predict_proba(svd_matrix_test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, probabilities[:,1], 'SVM') print "With NMF" clf.fit(nmf_matrix_train, twenty_train.target) predicted = clf.predict(nmf_matrix_test) probabilitiesnmf = clf.predict_proba(nmf_matrix_test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, probabilitiesnmf[:,1], 'SVM')
nmfListTest = td.getnmfListTest() classifier = MultinomialNB() for min_df in [2, 5]: print "WIth min_df = ", min_df if min_df == 2: nmf_matrix_train = nmfListTrain[0] nmf_matrix_test = nmfListTest[0] tfidf_matrix_train = tfidfListTrain[0] tfidf_matrix_test = tfidfListTest[0] else: nmf_matrix_train = nmfListTrain[1] nmf_matrix_test = nmfListTest[1] tfidf_matrix_train = tfidfListTrain[1] tfidf_matrix_test = tfidfListTest[1] print ".......... With SVD ........." classifier.fit(tfidf_matrix_train, twenty_train.target) predicted = classifier.predict(tfidf_matrix_test) probabilities = classifier.predict_proba(tfidf_matrix_test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, probabilities[:, 1], 'MultinomialNB') print ".......... With NMF .........." classifier.fit(nmf_matrix_train, twenty_train.target) predicted = classifier.predict(nmf_matrix_test) probabilities = classifier.predict_proba(nmf_matrix_test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, probabilities[:, 1], 'MultinomialNB')
def classifyLR_WithReg(train, test): l1_hyperPlaneParameters = [] l2_hyperPlaneParameters = [] l1_testErrors = [] l2_testErrors = [] l1_accu = [] l2_accu = [] l1_p = [] l2_p = [] l1_r = [] l2_r = [] for coeff in coeff_list: print "************** ", coeff, "****************" l1_classifier = LogisticRegression(penalty='l1', C=coeff, solver='liblinear') l2_classifier = LogisticRegression(penalty='l2', C=coeff, solver='liblinear') l1_classifier.fit(train, twenty_train.target) l2_classifier.fit(train, twenty_train.target) l1_predictions = l1_classifier.predict(test) l2_predictions = l2_classifier.predict(test) l1_predicted_probs = l1_classifier.predict_proba(test) l2_predicted_probs = l2_classifier.predict_proba(test) hlp.plot_roc(twenty_test.target, l1_predicted_probs[:, 1], 'LR with L1 regularization') hlp.plot_roc(twenty_test.target, l2_predicted_probs[:, 1], 'LR with L2 regularization') l1_testErrors.append( 100 - smet.accuracy_score(twenty_test.target, l1_predictions) * 100) l2_testErrors.append( 100 - smet.accuracy_score(twenty_test.target, l2_predictions) * 100) l1_accu.append( smet.accuracy_score(twenty_test.target, l1_predictions) * 100) l2_accu.append( smet.accuracy_score(twenty_test.target, l2_predictions) * 100) l1_p.append( smet.precision_score( twenty_test.target, l1_predictions, average='macro') * 100) l2_p.append( smet.precision_score( twenty_test.target, l2_predictions, average='macro') * 100) l1_r.append( smet.recall_score( twenty_test.target, l1_predictions, average='macro') * 100) l2_r.append( smet.recall_score( twenty_test.target, l2_predictions, average='macro') * 100) l1_hyperPlaneParameters.append(np.mean(l1_classifier.coef_)) l2_hyperPlaneParameters.append(np.mean(l2_classifier.coef_)) plotGraph(l1_testErrors, "l1") plotGraph(l2_testErrors, "l2") index = 0 for coeff in coeff_list: print "coeff=", coeff print "Test error l1=", l1_testErrors[index] print "Mean of coeff l1=", l1_hyperPlaneParameters[index] print "Accuracy l1=", l1_accu[index] print "Precision l1=", l1_p[index] print "Recall l1=", l1_r[index] print "coeff=", coeff print "Test error l2=", l2_testErrors[index] print "Mean of coeff l2=", l2_hyperPlaneParameters[index] print "Accuracy l2=", l2_accu[index] print "Precision l2=", l2_p[index] print "Recall l2=", l2_r[index] index += 1
for min_df in [2, 5]: print "Calculating for min_df = ", min_df if min_df == 2: svd_matrix_train = svdListTrain[0] svd_matrix_test = svdListTest[0] nmf_matrix_train = nmfListTrain[0] nmf_matrix_test = nmfListTest[0] else: svd_matrix_train = svdListTrain[1] svd_matrix_test = svdListTest[1] nmf_matrix_train = nmfListTrain[1] nmf_matrix_test = nmfListTest[1] print "************With SVD***********" clf_5fold_svd = callClassifier(svd_matrix_train) clf_5fold_svd.fit(svd_matrix_train, twenty_train.target) predicted = clf_5fold_svd.predict(svd_matrix_test) probabilities = clf_5fold_svd.predict_proba(svd_matrix_test) hlp.getStats(twenty_test.target, predicted) hlp.plot_roc(twenty_test.target, probabilities[:, 1], 'SVM with cross-validation') print "************With NMF***********" clf_5fold_nmf = callClassifier(nmf_matrix_train) clf_5fold_nmf.fit(nmf_matrix_train, twenty_train.target) predictednmf = clf_5fold_nmf.predict(nmf_matrix_test) probabilitiesnmf = clf_5fold_nmf.predict_proba(nmf_matrix_test) hlp.getStats(twenty_test.target, predictednmf) hlp.plot_roc(twenty_test.target, probabilitiesnmf[:, 1], 'SVM with cross-validation')