示例#1
0
    def test_process(self):
        sample_docs = [\
                "hello this is virus mail",\
                "hi this is from friend",\
                "how about buy this virus",\
                "facebook friend contact to you",\
                "I love you baby virus",\
                "what a nice day how about you"\
            ]

        docs_label =\
                ['spam','real','spam','real','spam','real']

        nlp_eng = nlp("eng")

        # extract vocabulary from docs
        voca = nlp_eng.extract_vocabulary(sample_docs)
        self.tlog(voca)
        assert len(voca) == 12

        # convert docs to bag of word vector using vocabulary
        docs_vector = []
        for doc in sample_docs:
            docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc))
        self.tlog(docs_vector)

        # training NaiveBayes
        nbayes = NaiveBayes(docs_vector, docs_label)
        nbayes.fit()

        # test case 1
        tc1 = "this is virus mail"
        tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1)

        self.tlog(tc1)
        self.tlog(tc1_vec)

        r1 = autotest.eval_predict_one(nbayes, tc1_vec, 'spam', self.logging)
        assert r1 == True

        # test case 2
        tc2 = "I love you love"
        tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2)

        self.tlog(tc2)
        self.tlog(tc2_vec)

        r2 = autotest.eval_predict_one(nbayes, tc2_vec, 'spam', self.logging)
        assert r2 == True
示例#2
0
    def test_process(self):
        sample_docs = [
            "hello this is virus mail",
            "hi this is from friend",
            "how about buy this virus",
            "facebook friend contact to you",
            "I love you baby virus",
            "what a nice day how about you",
        ]

        docs_label = ["spam", "real", "spam", "real", "spam", "real"]

        nlp_eng = nlp("eng")

        # extract vocabulary from docs
        voca = nlp_eng.extract_vocabulary(sample_docs)
        self.tlog(voca)
        assert len(voca) == 12

        # convert docs to bag of word vector using vocabulary
        docs_vector = []
        for doc in sample_docs:
            docs_vector.append(nlp_eng.bag_of_word2vector(voca, doc))
        self.tlog(docs_vector)

        # training NaiveBayes
        nbayes = NaiveBayes(docs_vector, docs_label)
        nbayes.fit()

        # test case 1
        tc1 = "this is virus mail"
        tc1_vec = nlp_eng.bag_of_word2vector(voca, tc1)

        self.tlog(tc1)
        self.tlog(tc1_vec)

        r1 = autotest.eval_predict_one(nbayes, tc1_vec, "spam", self.logging)
        assert r1 == True

        # test case 2
        tc2 = "I love you love"
        tc2_vec = nlp_eng.bag_of_word2vector(voca, tc2)

        self.tlog(tc2)
        self.tlog(tc2_vec)

        r2 = autotest.eval_predict_one(nbayes, tc2_vec, "spam", self.logging)
        assert r2 == True
示例#3
0
    def test_process(self):

        nlp_eng = nlp("eng")

        email_data_file = "sample_data/email/email.tsv"
        emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test = fs.tsv_loader_with_nlp(
            email_data_file, 0.3, nlp_eng
        )
        self.tlog(voca)

        email_nbayes = NaiveBayes(emailmat_train, emaillabel_train)
        email_nbayes.fit()

        error_rate = autotest.eval_predict(email_nbayes, emailmat_test, emaillabel_test, self.logging)
        self.tlog("spam-mail predict (with NaiveBayes) error rate : " + str(error_rate))

        assert error_rate <= 0.1
示例#4
0
    def test_process(self):

        nlp_eng = nlp("eng_lower")

        email_data_file = "sample_data/email/email.tsv"
        emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test \
                = fs.tsv_loader_with_nlp(email_data_file, 0.4, nlp_eng)
        self.tlog(voca)

        email_nbayes = NaiveBayes(emailmat_train, emaillabel_train)
        email_nbayes.fit()

        error_rate = autotest.eval_predict(email_nbayes, emailmat_test,
                                           emaillabel_test, self.logging)
        self.tlog("spam-mail predict (with NaiveBayes) error rate : " +
                  str(error_rate))

        assert error_rate <= 0.1