Пример #1
0
print('Confusion Matrix')
print('================')
cm = confusion_matrix(test_y, pred_y)
print(cm)

# plt.figure()
# plot_confusion_matrix(cm)
#
##  plt.show()


print('\nCalculating predictions on Crowdflower')
print('--------------------------------------')

cf_x, cf_y = prep.get_crowdflower(1000)
pred_cf = pipe.predict(cf_x)

print()

print(classification_report(cf_y, pred_cf))
print()

print("Accuracy score")
print("==============")
print(accuracy_score(cf_y, pred_cf))
print()

print('Confusion Matrix')
print('================')
cm = confusion_matrix(cf_y, pred_cf)
    tick_marks = np.arange(len(CM_LABELS))
    plt.xticks(tick_marks, CM_LABELS, rotation=45)
    plt.yticks(tick_marks, CM_LABELS)
    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")


NUM_PER_CATEGORY = 1000

print("loading data....")
print(str(NUM_PER_CATEGORY) + " examples per category")
start_time = time.time()
train_x, train_y = prep.get_data(NUM_PER_CATEGORY)
train_x = train_x.astype("float32")
test_x, test_y = prep.get_crowdflower(NUM_PER_CATEGORY)
test_x = test_x.astype("float32")
print("Total data load time:")
print("---------------------")
print(time.time() - start_time)

os.system('say "data is loaded"')


# Consider trying different values for output_layers
print("\nstarting nn trained on twitter with logit @ -2....")
tf = OverfeatTransformer(output_layers=[-2])
clf = LogisticRegression()
# clf = SVC()
# clf = RandomForestClassifier()
pipe = make_pipeline(tf, clf)