def test(clf): global labels, pres, recall, f1score y_pred = clf.predict(docs_test) p, r, f1, s = precision_recall_fscore_support(y_test, y_pred) labels.append(str(clf)) res[0].append(np.mean(p)) res[1].append(np.mean(r)) res[2].append(np.mean(f1)) res[3].append(np.mean(y_pred == y_test)) conf_arr = stat.confusion_matrix(y_test, y_pred) norm_conf = [] for i in conf_arr: a = 0 tmp_arr = [] a = sum(i,0) for j in i: tmp_arr.append(float(j)/float(a)) norm_conf.append(tmp_arr) return conf_arr, norm_conf
def test(clf): global labels, res y_pred = clf.predict(docs_test) p, r, f1, s = precision_recall_fscore_support(y_test, y_pred) labels.append(str(clf)) res[0].append(np.mean(p)) res[1].append(np.mean(r)) res[2].append(np.mean(f1)) res[3].append(np.mean(y_pred == y_test)) conf_arr = stat.confusion_matrix(y_test, y_pred) norm_conf = [] for z in conf_arr: a = sum(z, 0) norm_conf.append([float(k)/float(a) for k in z]) plt.clf() fig = plt.figure() ax = fig.add_subplot(111) plt.title('Confusion Matrix for %s' % str(clf)) ares = ax.imshow(array(norm_conf), cmap=cm.jet, interpolation='nearest', vmin=0, vmax=1.0) for i, cas in enumerate(conf_arr): for j, c in enumerate(cas): if c>0: plt.text(j-.2, i+.2, c, fontsize=14) cb = fig.colorbar(ares) savefig("plots/gridsearch/confusion_matrix_%s.png" % str(clf), format="png")
def test(clf): global labels, pres, recall, f1score y_pred = clf.predict(docs_test) p, r, f1, s = precision_recall_fscore_support(y_test, y_pred) labels.append(str(clf)) res[0].append(np.mean(p)) res[1].append(np.mean(r)) res[2].append(np.mean(f1)) res[3].append(np.mean(y_pred == y_test)) conf_arr = stat.confusion_matrix(y_test, y_pred) norm_conf = [] for i in conf_arr: a = 0 tmp_arr = [] a = sum(i, 0) for j in i: tmp_arr.append(float(j) / float(a)) norm_conf.append(tmp_arr) return conf_arr, norm_conf
def test(clf): global labels, res y_pred = clf.predict(docs_test) p, r, f1, s = precision_recall_fscore_support(y_test, y_pred) labels.append(str(clf)) res[0].append(np.mean(p)) res[1].append(np.mean(r)) res[2].append(np.mean(f1)) res[3].append(np.mean(y_pred == y_test)) conf_arr = stat.confusion_matrix(y_test, y_pred) norm_conf = [] for z in conf_arr: a = sum(z, 0) norm_conf.append([float(k) / float(a) for k in z]) plt.clf() fig = plt.figure() ax = fig.add_subplot(111) plt.title('Confusion Matrix for %s' % str(clf)) ares = ax.imshow(array(norm_conf), cmap=cm.jet, interpolation='nearest', vmin=0, vmax=1.0) for i, cas in enumerate(conf_arr): for j, c in enumerate(cas): if c > 0: plt.text(j - .2, i + .2, c, fontsize=14) cb = fig.colorbar(ares) savefig("plots/gridsearch/confusion_matrix_%s.png" % str(clf), format="png")
vect_options = { 'ngram_range': (1,1), 'sublinear_tf': True, 'preprocessor': pr.remove_noise, 'use_idf': False, 'stop_words': None } default_options = { 'C': 1.0 } clf = SVM(docs_train, y_train, default_options=default_options, vect_options=vect_options) y_predicted = clf.predict(docs_test) conf_arr = s.confusion_matrix(y_test, y_predicted) norm_conf = [] for i in conf_arr: a = 0 tmp_arr = [] a = sum(i,0) for j in i: tmp_arr.append(float(j)/float(a)) norm_conf.append(tmp_arr) plt.clf() fig = plt.figure() ax = fig.add_subplot(111) res = ax.imshow(array(norm_conf), cmap=cm.jet, interpolation='nearest') for i, cas in enumerate(conf_arr):