print "Y_train total:", len(y[train_index])
print "Y_train positive:", np.sum(y[train_index])
print "Ratio of positive in train instances", np.sum(y[train_index])/float(len(y[train_index]))

clf_ori = TransparentLogisticRegression()
clf_ss = TransparentLogisticRegression()
clf_ig = TransparentLogisticRegression()


X_ss = scale(X)
scale_ = decision_tree_scale()

X_ig = scale_.fit_transform(X, y)

clf_ori.fit(X, y)
clf_ss.fit(X_ss, y)
clf_ig.fit(X_ig, y)

print clf_ori.coef_
print clf_ss.coef_
print clf_ig.coef_

bias_ori = clf_ori.intercept_
bias_ss = clf_ss.intercept_
bias_ig = clf_ig.intercept_

print bias_ori, bias_ss, bias_ig

print prob(bias_ori)
print prob(bias_ss)
示例#2
0
    
    vect = CountVectorizer(min_df=5, max_df=1.0, binary=True, ngram_range=(1, 1))
    X_train, y_train, X_test, y_test, train_corpus, test_corpus = load_imdb("C:\\Users\\Mustafa\\Desktop\\aclImdb", shuffle=True, vectorizer=vect)
    feature_names = vect.get_feature_names()
    
    duration = time() - t0

    print
    print "Loading took %0.2fs." % duration
    print
    
    print "Fitting the classifier"
    
    t0 = time()
    clf = TransparentLogisticRegression(penalty='l1', C=0.1)
    clf.fit(X_train, y_train)
    
    duration = time() - t0

    print
    print "Fitting took %0.2fs." % duration
    print
    
    print "Predicting the evidences"
    
    t0 = time()
    neg_evi, pos_evi = clf.predict_evidences(X_test)
    
    duration = time() - t0

    print
示例#3
0
def testLR():
    
    print "Loading the data"
    
    t0 = time()
    
    vect = CountVectorizer(min_df=5, max_df=1.0, binary=True, ngram_range=(1, 1))
    X_train, y_train, X_test, y_test, train_corpus, test_corpus = load_imdb("C:\\Users\\Mustafa\\Desktop\\aclImdb", shuffle=True, vectorizer=vect)
    feature_names = vect.get_feature_names()
    
    duration = time() - t0

    print
    print "Loading took %0.2fs." % duration
    print
    
    print "Fitting the classifier"
    
    t0 = time()
    clf = TransparentLogisticRegression(penalty='l1', C=0.1)
    clf.fit(X_train, y_train)
    
    duration = time() - t0

    print
    print "Fitting took %0.2fs." % duration
    print
    
    print "Predicting the evidences"
    
    t0 = time()
    neg_evi, pos_evi = clf.predict_evidences(X_test)
    
    duration = time() - t0

    print
    print "Predicting evidences took %0.2fs." % duration
    print
    
    print "Predicting the probs"
    
    t0 = time()
    probs = clf.predict_proba(X_test)
    
    duration = time() - t0

    print
    print "Predicting probs took %0.2fs." % duration
    print
    
    ti = TopInstances(neg_evi, pos_evi, clf.get_bias())
    
    total_evi = neg_evi + pos_evi
    
    print
    print "Most negative"
    print
    i = ti.most_negatives()[0]
    print total_evi[i], neg_evi[i], pos_evi[i], probs[i]
    print test_corpus[i]
    
    print
    print "Most positive"
    print
    i = ti.most_positives()[0]
    print total_evi[i], neg_evi[i], pos_evi[i], probs[i]
    print test_corpus[i]