from utilities import ConfusionMatrix a = [0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3] p = [0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 1, 3, 3, 3] cm = ConfusionMatrix(a, p) print("ACTUAL") print(a) print("PREDICTED") print(p) for i in range(4): print("\ncategory %s:" % i) print("TP: ", end="") print(cm.TP(label=i)) print("FP: ", end="") print(cm.FP(label=i)) print("TN: ", end="") print(cm.TN(label=i)) print("FN: ", end="") print(cm.FN(label=i)) print("avg accuracy: ", end="") print(cm.average_accuracy()) print("accuracy: ", end="") print(cm.accuracy()) print("precision: ", end="") print(cm.precision()) print("recall: ", end="") print(cm.recall())
return Y_test, predicted to_save = [] if test_what == 'train_size': to_save.append(('train_size', 'accuracy', 'precision', 'recall')) for train_size in [ 1000*n for n in range(1,10) ]: print('train_size:',train_size) # train the naive bayes and obtain the actual, predicted vectors. actual, predicted = run_nb(train_size=train_size, learn_code=learn_code) # get confusion matrix to get metrics CM = ConfusionMatrix(actual, predicted) to_save.append( (train_size, CM.average_accuracy(), CM.precision(), CM.recall() ) ) elif test_what == 'cumulative_ngram': to_save.append(('ngram_max', 'accuracy', 'precision', 'recall')) for max_ngram in range(4,9): print('max_ngram:',max_ngram) # train the naive bayes and obtain the actual, predicted vectors. actual, predicted = run_nb(ngram_range=(1,max_ngram), learn_code=learn_code) # get confusion matrix to get metrics CM = ConfusionMatrix(actual, predicted) to_save.append( (max_ngram, CM.average_accuracy(), CM.precision(), CM.recall() ) )
to_save = [] if test_what == 'train_size': to_save.append(('train_size', 'accuracy', 'precision', 'recall')) for train_size in [1000 * n for n in range(1, 10)]: print('train_size:', train_size) # train the naive bayes and obtain the actual, predicted vectors. actual, predicted = run_nb(train_size=train_size, learn_code=learn_code) # get confusion matrix to get metrics CM = ConfusionMatrix(actual, predicted) to_save.append( (train_size, CM.average_accuracy(), CM.precision(), CM.recall())) elif test_what == 'cumulative_ngram': to_save.append(('ngram_max', 'accuracy', 'precision', 'recall')) for max_ngram in range(4, 9): print('max_ngram:', max_ngram) # train the naive bayes and obtain the actual, predicted vectors. actual, predicted = run_nb(ngram_range=(1, max_ngram), learn_code=learn_code) # get confusion matrix to get metrics CM = ConfusionMatrix(actual, predicted)
# In[166]: mb.fit(X_train2, list(Y_train)) # In[167]: X_test2 = kb.transform(cv.transform(X_test)) X_test2 = cv.transform(X_test) # In[168]: Y_predicted = mb.predict(X_test2) # In[169]: cm = ConfusionMatrix(Y_test, Y_predicted) # In[170]: cm.average_accuracy() # In[171]: cm.confusion_matrix # In[152]: df = pd.read_csv('./clean/ml_dataset_test_in-1111.csv', index_col=0) # In[153]:
# In[167]: X_test2 = kb.transform(cv.transform(X_test)) X_test2 = cv.transform(X_test) # In[168]: Y_predicted = mb.predict(X_test2) # In[169]: cm = ConfusionMatrix(Y_test, Y_predicted) # In[170]: cm.average_accuracy() # In[171]: cm.confusion_matrix # In[152]: df = pd.read_csv('./clean/ml_dataset_test_in-1111.csv', index_col=0)