def simpleTest():
    vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \
        naiveBayes.getTrainedModelInfo()

    fileFolder = './test/'
    smsWords, classLables = naiveBayes.loadMailData(fileFolder)

    smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity,
                                  pWordsHealthy, pSpam, smsWords[0])
    print(smsType)
def testClassifyErrorRateByIndex():
    fileFolder = './public/'
    smsWords, classLables = naiveBayes.loadMailDataTest(fileFolder)

    test_index = [
        2, 6, 7, 8, 13, 16, 19, 29, 35, 37, 40, 42, 43, 45, 46, 49, 51, 52, 64,
        65, 71, 72, 78, 79, 80, 84, 85, 90, 91, 98, 103, 109, 111, 117, 123,
        129, 135, 138, 142, 149, 169, 188, 191, 192, 203, 221, 225, 226, 229,
        232, 236, 243, 250, 254, 257, 258, 259, 264, 268, 281, 298, 300, 308,
        319, 322, 329, 333, 335, 338, 339, 340, 344, 347, 358, 359, 362, 382,
        385, 391, 394, 402, 410, 415, 417, 418, 422, 423, 424, 425, 428, 437,
        441, 456, 461, 462, 470, 472, 477, 480, 481
    ]

    testWords = [smsWords[i] for i in test_index]
    testWordsType = [classLables[i] for i in test_index]

    # testCount = 200
    # for i in range(testCount):
    #     randomIndex = int(random.uniform(0, len(classLables)))
    #     testWordsType.append(classLables[randomIndex])
    #     testWords.append(smsWords[randomIndex])
    #     del (smsWords[randomIndex])
    #     del (classLables[randomIndex])

    vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \
        naiveBayes.getTrainedModelInfo()

    errorCount = 0.0
    tp, tn, fp, fn = 0, 0, 0, 0
    for i in range(len(test_index)):
        smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity,
                                      pWordsHealthy, pSpam, testWords[i])
        print('predicted:', smsType, ' actual:', testWordsType[i])
        if smsType != testWordsType[i]:
            if (smsType == 1):
                fp += 1
            else:
                fn += 1
        else:
            if (smsType == 1):
                tp += 1
            else:
                tn += 1
    print("""
    Predicted:    | SPAM | HAM
    ----------------------------
    Ground Truth: |      |
        SPAM      | %4d | %4d
        HAM       | %4d | %4d
    """ % (tp, fn, fp, tn))
    acc = (tp + tn) / (fp + fn + tp + tn)
    print("acc->", acc)
def testClassifyErrorRateMSE():
    fileFolder = './public/'
    mailWords, classLables = naiveBayes.loadMailData(fileFolder)

    test_index = [
        2, 6, 7, 8, 13, 16, 19, 29, 35, 37, 40, 42, 43, 45, 46, 49, 51, 52, 64,
        65, 71, 72, 78, 79, 80, 84, 85, 90, 91, 98, 103, 109, 111, 117, 123,
        129, 135, 138, 142, 149, 169, 188, 191, 192, 203, 221, 225, 226, 229,
        232, 236, 243, 250, 254, 257, 258, 259, 264, 268, 281, 298, 300, 308,
        319, 322, 329, 333, 335, 338, 339, 340, 344, 347, 358, 359, 362, 382,
        385, 391, 394, 402, 410, 415, 417, 418, 422, 423, 424, 425, 428, 437,
        441, 456, 461, 462, 470, 472, 477, 480, 481
    ]

    testWords = [mailWords[i] for i in test_index]
    testWordsType = [classLables[i] for i in test_index]

    # testCount = 200
    # for i in range(testCount):
    #     randomIndex = int(random.uniform(0, len(classLables)))
    #     testWordsType.append(classLables[randomIndex])
    #     testWords.append(smsWords[randomIndex])
    #     del (smsWords[randomIndex])
    #     del (classLables[randomIndex])

    vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \
        naiveBayes.getTrainedModelInfo()

    errorCount = 0.0
    tp, tn, fp, fn = 0, 0, 0, 0
    se = 0
    for i in range(len(test_index)):
        testWordsCount = naiveBayes.setOfWordsToVecTor(vocabularyList,
                                                       testWords[i])
        trainMarkedWords = np.array(testWordsCount)
        p1, p0, type = naiveBayes.adaboostClassify(vocabularyList,
                                                   pWordsSpamicity,
                                                   pWordsHealthy, DS, pSpam,
                                                   trainMarkedWords)

        autual = testWordsType[i]
        if autual == 1:
            se += (pow((p1 / 20000 - 1), 2) + pow((p0 / 20000), 2)) / 2
        else:
            se += (pow((p1 / 20000), 2) + pow((p0 / 20000 - 1), 2)) / 2

    print("mse->", se / len(test_index))
def testClassifyErrorRate():
    fileFolder = './public/'
    smsWords, classLables = naiveBayes.loadMailDataTest(fileFolder)
    #smsWords, classLables = naiveBayes.loadMailData(fileFolder)

    testWords = smsWords
    testWordsType = classLables

    vocabularyList, pWordsSpamicity, pWordsHealthy, pSpam, DS = \
        naiveBayes.getTrainedModelInfo()

    errorCount = 0.0
    tp, tn, fp, fn = 0, 0, 0, 0
    for i in range(len(classLables)):
        # smsType = naiveBayes.adaboostClassifyForPredict(vocabularyList, pWordsSpamicity,
        #                               pWordsHealthy, DS, pSpam, testWords[i])
        smsType = naiveBayes.classify(vocabularyList, pWordsSpamicity,
                                      pWordsHealthy, pSpam, testWords[i])
        #print('predicted:', smsType, ' actual:', testWordsType[i])
        if smsType != testWordsType[i]:
            print(i)
            if (smsType == 1):
                fp += 1
            else:
                fn += 1
        else:
            if (smsType == 1):
                tp += 1
            else:
                tn += 1
    print("""
    Predicted:    | SPAM | HAM
    ----------------------------
    Ground Truth: |      |
        SPAM      | %4d | %4d
        HAM       | %4d | %4d
    """ % (tp, fn, fp, tn))
    acc = 100.0 * (tp + tn) / (fp + fn + tp + tn)
    print("acc->", acc)