''' 75. 素性の重み 73で学習したロジスティック回帰モデルの中で, 重みの高い素性トップ10と,重みの低い素性トップ10を確認せよ. ''' from sklearn.linear_model import LogisticRegression from no73 import load_feats import sys feats, labels, sents = load_feats(sys.argv[1]) model = LogisticRegression(C=1e5) model.fit(feats, labels) with open('data/feats.csv') as f: header = f.readline() header = header.split(",")[2:] feats_dict = {} for word, weight in zip(header, model.coef_[0]): feats_dict[word] = weight for i, elem in enumerate(sorted(feats_dict.items(), key=lambda x: x[1])): if i > 9: break print("{}: {}".format(elem[0], elem[1])) for i, elem in enumerate(sorted(feats_dict.items(), key=lambda x: -x[1])): if i > 9: break print("{}: {}".format(elem[0], elem[1]))
predicts.append(1) else: predicts.append(-1) return predicts if __name__ == '__main__': from no73 import load_feats from no77 import get_metrics from no78 import split_seq, get_train from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt import sys fname = sys.argv[1] K = 5 feats, labels, _ = load_feats(fname) split_feats = split_seq(feats, K) split_labels = split_seq(labels, K) probs_list = [] for i in range(K): model = LogisticRegression(C=1e5) model.fit(get_train(split_feats, i), get_train(split_labels, i)) probs = model.predict_proba(split_feats[i]) probs_list.append(probs) threshold_list = [] precision_list = [] recall_list = [] for threshold in range(101): threshold /= 100 metrics_list = []