class Model: def __init__(self, name): self.name = name self.construct() def construct(self): if self.name == "linear": self.model = LinearSVC() elif self.name == "logistic": # self.model = LogisticRegression(solver='lbfgs',multi_class='multinomial', n_jobs=-1) self.model = LogisticRegression() elif self.name == "cnn": self.model = CNN() elif self.name == "bayes": self.model = MultinomialNB() def predict(self, X): return self.model.predict(X) def predict_proba(self, X): if self.name == "linear": return self.model.decision_function(X) elif self.name == "logistic": return self.model.predict_proba(X) elif self.name == "cnn": return self.model.decision_function(X) elif self.name == "bayes": return self.model.predict_proba(X) def fit(self, X, Y): self.model.fit(X, Y)
def test(method, train_data, test_data): train_text, train_label, train_votes = function.data2vec(train_data) test_text, test_label, test_votes = function.data2vec(test_data) # train vec_model = CountVectorizer() train_text_vec = vec_model.fit_transform(train_text) if method == 'naive_bayes': cls = MultinomialNB() else: cls = LinearSVC() cls.fit(train_text_vec, train_label) # predict label test_text_vec = vec_model.transform(test_text) predicted_label = cls.predict(test_text_vec) print( classification_report(test_label, predicted_label, digits=4, target_names=EMOTIONS_LIST, zero_division=0)) # predict prob for coefficient if method == 'naive_bayes': predicted_votes = cls.predict_proba(test_text_vec) else: predicted_votes = cls.decision_function(test_text_vec) coefficient = 0 for p_vote, t_vote in zip(predicted_votes, test_votes): coefficient += pearsonr(p_vote, t_vote)[0] coefficient /= len(predicted_votes) print(f"coefficient = {coefficient:.4f}")
df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names ) plt.figure(figsize=(5, 5)) hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20}, yticklabels=df_cm.columns, xticklabels=df_cm.columns) hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=10) hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=10) plt.ylabel('True label', fontsize=20) plt.xlabel('Predicted label', fontsize=20) # Show heat map plt.tight_layout() plt.show() # ----------------------------------------------------------------------- # Plot ROC Area Under Curve y_predict_probability = clf.decision_function(X_test) fpr, tpr, _ = roc_curve(y_test, y_predict_probability) auc = roc_auc_score(y_test, y_predict_probability) # print(fpr) # print(tpr) # print(auc) plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate')