print "Fitting the classifier" t0 = time() clf = TransparentLogisticRegression(penalty='l1', C=0.1) clf.fit(X_train, y_train) duration = time() - t0 print print "Fitting took %0.2fs." % duration print print "Predicting the evidences" t0 = time() neg_evi, pos_evi = clf.predict_evidences(X_test) duration = time() - t0 print print "Predicting evidences took %0.2fs." % duration print print "Predicting the probs" t0 = time() probs = clf.predict_proba(X_test) duration = time() - t0 print
def testLR(): print "Loading the data" t0 = time() vect = CountVectorizer(min_df=5, max_df=1.0, binary=True, ngram_range=(1, 1)) X_train, y_train, X_test, y_test, train_corpus, test_corpus = load_imdb("C:\\Users\\Mustafa\\Desktop\\aclImdb", shuffle=True, vectorizer=vect) feature_names = vect.get_feature_names() duration = time() - t0 print print "Loading took %0.2fs." % duration print print "Fitting the classifier" t0 = time() clf = TransparentLogisticRegression(penalty='l1', C=0.1) clf.fit(X_train, y_train) duration = time() - t0 print print "Fitting took %0.2fs." % duration print print "Predicting the evidences" t0 = time() neg_evi, pos_evi = clf.predict_evidences(X_test) duration = time() - t0 print print "Predicting evidences took %0.2fs." % duration print print "Predicting the probs" t0 = time() probs = clf.predict_proba(X_test) duration = time() - t0 print print "Predicting probs took %0.2fs." % duration print ti = TopInstances(neg_evi, pos_evi, clf.get_bias()) total_evi = neg_evi + pos_evi print print "Most negative" print i = ti.most_negatives()[0] print total_evi[i], neg_evi[i], pos_evi[i], probs[i] print test_corpus[i] print print "Most positive" print i = ti.most_positives()[0] print total_evi[i], neg_evi[i], pos_evi[i], probs[i] print test_corpus[i]