def simpleTest(): vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \ naiveBayes.getTrainedModelInfo() fileFolder = './test/' smsWords, classLables = naiveBayes.loadMailData(fileFolder) smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, smsWords[0]) print(smsType)
def testClassifyErrorRateByIndex(): fileFolder = './public/' smsWords, classLables = naiveBayes.loadMailDataTest(fileFolder) test_index = [ 2, 6, 7, 8, 13, 16, 19, 29, 35, 37, 40, 42, 43, 45, 46, 49, 51, 52, 64, 65, 71, 72, 78, 79, 80, 84, 85, 90, 91, 98, 103, 109, 111, 117, 123, 129, 135, 138, 142, 149, 169, 188, 191, 192, 203, 221, 225, 226, 229, 232, 236, 243, 250, 254, 257, 258, 259, 264, 268, 281, 298, 300, 308, 319, 322, 329, 333, 335, 338, 339, 340, 344, 347, 358, 359, 362, 382, 385, 391, 394, 402, 410, 415, 417, 418, 422, 423, 424, 425, 428, 437, 441, 456, 461, 462, 470, 472, 477, 480, 481 ] testWords = [smsWords[i] for i in test_index] testWordsType = [classLables[i] for i in test_index] # testCount = 200 # for i in range(testCount): # randomIndex = int(random.uniform(0, len(classLables))) # testWordsType.append(classLables[randomIndex]) # testWords.append(smsWords[randomIndex]) # del (smsWords[randomIndex]) # del (classLables[randomIndex]) vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \ naiveBayes.getTrainedModelInfo() errorCount = 0.0 tp, tn, fp, fn = 0, 0, 0, 0 for i in range(len(test_index)): smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, testWords[i]) print('predicted:', smsType, ' actual:', testWordsType[i]) if smsType != testWordsType[i]: if (smsType == 1): fp += 1 else: fn += 1 else: if (smsType == 1): tp += 1 else: tn += 1 print(""" Predicted: | SPAM | HAM ---------------------------- Ground Truth: | | SPAM | %4d | %4d HAM | %4d | %4d """ % (tp, fn, fp, tn)) acc = (tp + tn) / (fp + fn + tp + tn) print("acc->", acc)
def testClassifyErrorRateMSE(): fileFolder = './public/' mailWords, classLables = naiveBayes.loadMailData(fileFolder) test_index = [ 2, 6, 7, 8, 13, 16, 19, 29, 35, 37, 40, 42, 43, 45, 46, 49, 51, 52, 64, 65, 71, 72, 78, 79, 80, 84, 85, 90, 91, 98, 103, 109, 111, 117, 123, 129, 135, 138, 142, 149, 169, 188, 191, 192, 203, 221, 225, 226, 229, 232, 236, 243, 250, 254, 257, 258, 259, 264, 268, 281, 298, 300, 308, 319, 322, 329, 333, 335, 338, 339, 340, 344, 347, 358, 359, 362, 382, 385, 391, 394, 402, 410, 415, 417, 418, 422, 423, 424, 425, 428, 437, 441, 456, 461, 462, 470, 472, 477, 480, 481 ] testWords = [mailWords[i] for i in test_index] testWordsType = [classLables[i] for i in test_index] # testCount = 200 # for i in range(testCount): # randomIndex = int(random.uniform(0, len(classLables))) # testWordsType.append(classLables[randomIndex]) # testWords.append(smsWords[randomIndex]) # del (smsWords[randomIndex]) # del (classLables[randomIndex]) vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \ naiveBayes.getTrainedModelInfo() errorCount = 0.0 tp, tn, fp, fn = 0, 0, 0, 0 se = 0 for i in range(len(test_index)): testWordsCount = naiveBayes.setOfWordsToVecTor(vocabularyList, testWords[i]) trainMarkedWords = np.array(testWordsCount) p1, p0, type = naiveBayes.adaboostClassify(vocabularyList, pWordsSpamicity, pWordsHealthy, DS, pSpam, trainMarkedWords) autual = testWordsType[i] if autual == 1: se += (pow((p1 / 20000 - 1), 2) + pow((p0 / 20000), 2)) / 2 else: se += (pow((p1 / 20000), 2) + pow((p0 / 20000 - 1), 2)) / 2 print("mse->", se / len(test_index))
def testClassifyErrorRate(): fileFolder = './public/' smsWords, classLables = naiveBayes.loadMailDataTest(fileFolder) #smsWords, classLables = naiveBayes.loadMailData(fileFolder) testWords = smsWords testWordsType = classLables vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \ naiveBayes.getTrainedModelInfo() errorCount = 0.0 tp, tn, fp, fn = 0, 0, 0, 0 for i in range(len(classLables)): # smsType = naiveBayes.adaboostClassifyForPredict(vocabularyList, pWordsSpamicity, # pWordsHealthy, DS, pSpam, testWords[i]) smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, testWords[i]) #print('predicted:', smsType, ' actual:', testWordsType[i]) if smsType != testWordsType[i]: print(i) if (smsType == 1): fp += 1 else: fn += 1 else: if (smsType == 1): tp += 1 else: tn += 1 print(""" Predicted: | SPAM | HAM ---------------------------- Ground Truth: | | SPAM | %4d | %4d HAM | %4d | %4d """ % (tp, fn, fp, tn)) acc = 100.0 * (tp + tn) / (fp + fn + tp + tn) print("acc->", acc)