from bayes import Bayes


# calculate validation accuracy
def accuracy(type, result):
    if type == 'ham':
        num = len(result) - sum(result)
        return float(num / len(result))
    elif type == 'spam':
        num = sum(result)
        return float(num / len(result))
    return 0

# now do the prediction, when you run the code change the file path to train_1, train_2, train_3, train_4, train_5, test_1....
ham_file = './ham/train_1.txt'
spam_file = './spam/train_1.txt'
test_file = './ham/test_1.txt'
train_matrix, class_labels, test_doc = Bayes.process_include_test_data(ham_file, spam_file, test_file)
test_file_2 = './spam/test_1.txt'
train_matrix_2, class_labels_2, test_doc_2 = Bayes.process_include_test_data(ham_file, spam_file, test_file_2)
result = Bayes.MyMultinomialNB(train_matrix, class_labels, test_doc)
result2 = Bayes.MyMultinomialNB(train_matrix_2, class_labels_2, test_doc_2)
print('accuracy of ham: ' + str(accuracy('ham', result)))
print('accuracy of spam: ' + str(accuracy('spam', result2)))
precision=len(test_doc_2)*accuracy('spam', result2)/(len(test_doc_2)*accuracy('spam', result2)+len(test_doc)*(1-accuracy('ham', result)))
recall = accuracy('spam', result2)
F_1=2*precision*recall/(precision+recall)
print('precision '+str(precision))
print('recall ' +str(recall))
print('F_1 '+str(F_1))