Пример #1
0
#coding:utf-8

import numpy as np 
from scipy import io
from sklearn.externals import joblib
from logreg import LogisticRegression


if __name__ == "__main__":
	X_train = io.loadmat("X_train")["X_train"]
	X_train = X_train.tocsr() #疎行列の種類の変更(tfidfVectorizerで出力されるものと同じものにする)
	y_train = np.load("y_train.npy")
	clf = LogisticRegression("logreg")
	y_predict = clf.predict(X_train)
	probs = clf.predict_proba(X_train)
	labels = ["-1", "+1"]
	print("正解ラベル\t予測ラベル\t予測確率(+1)")
	for (train, pred, prob) in zip(y_train, y_predict, probs):
		print("%s\t%s\t%s" % (labels[int(train)], labels[int(pred)], prob))
Пример #2
0
    for threshold in threshold_list:
        y_predict = clf.predict(X_train, threshold)
        precision_list.append(precision_score(y_train, y_predict))
        recall_list.append(recall_score(y_train, y_predict))

    plt.plot(threshold_list, precision_list, label="precision", color="red")
    plt.plot(threshold_list, recall_list, label="recall", color="blue")

    plt.xlabel("threshold")
    plt.ylabel("rate")
    plt.xlim(0.0, 1.0)
    plt.ylim(0, 1)
    plt.title("logistic_regresssion")
    plt.legend(loc=3)
    plt.show()

    #precision-recall curveをプロット
    from sklearn.metrics import precision_recall_curve
    precision, recall, thresholds = precision_recall_curve(
        y_train, clf.predict_proba(X_train))
    print(len(thresholds))
    plt.clf()
    plt.plot(recall, precision, label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall curve')
    plt.legend(loc="lower left")
    plt.show()