def test_update_dictionary_correct_shorten_dictionary(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        model.update_dictionary(self.shortened_dictionary)
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Model Parameter Updates
        self.assertEqual(model.priors, self.correct_priors)
        self.assertEqual(model.label_counts, self.correct_label_count)
        self.assertEqual(model.empty_likelihoods,
                         self.correct_shortened_empty_likelihoods)
        self.assertDictEqual(model.likelihoods,
                             self.correct_shortened_likelihoods)

        # Correct Scores
        self.assertIsNotNone(score)
        self.assertAlmostEqual(
            score[0]["sport"],
            self.correct_shortened_a_very_close_game_score["sport"])
        self.assertAlmostEqual(
            score[0]["not sport"],
            self.correct_shortened_a_very_close_game_score["not sport"])

        # Correct Prediction
        self.assertEqual(prediction[0], "sport")
    def test_predict_prediction_and_score(self):
        model = Multinomial()
        model.train(self.sports_labels, self.sports_data)
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Prediction Output
        self.assertEqual(prediction[0], "sport")

        # Correct Score Output
        self.assertIsNotNone(score)
        self.assertAlmostEqual(score[0]["sport"],
                               self.correct_a_very_close_game_score["sport"])
        self.assertAlmostEqual(
            score[0]["not sport"],
            self.correct_a_very_close_game_score["not sport"])
    def test_update_add_more_training_data(self):
        model = Multinomial()
        model.train(self.sports_labels[0:4], self.sports_data[0:4])
        model.update([self.sports_labels[4]], [self.sports_data[4]])
        prediction, score = model.predict(self.a_very_close_game)

        # Correct Model
        self.assertEqual(model.priors, self.correct_priors)
        self.assertEqual(model.label_counts, self.correct_label_count)
        self.assertEqual(model.empty_likelihoods,
                         self.correct_empty_likelihoods)
        self.assertDictEqual(model.likelihoods, self.correct_likelihoods)

        # Correct Score Output
        self.assertIsNotNone(score)
        self.assertAlmostEqual(score[0]["sport"],
                               self.correct_a_very_close_game_score["sport"])
        self.assertAlmostEqual(
            score[0]["not sport"],
            self.correct_a_very_close_game_score["not sport"])

        # Correct Prediction Output
        self.assertEqual(prediction[0], "sport")
Пример #4
0
with open('./sample_data/nltk/wine/wine_data.pkl', 'rb') as f:
    wine_data = pickle.load(f)

    ####################
    # Dictionary Model #
    ####################

    wine_bow_data = wine_data["bagofwords"]

    raw_data_top_removed = wine_bow_data["raw_data_top_removed"]
    raw_data = wine_bow_data["raw_data"]
    labels = wine_bow_data["labels"]

    model_dict = DictMultinomial()
    model_dict.train(labels, raw_data_top_removed)
    dict_predictions, dict_scores = model_dict.predict(raw_data)

    matches = 0
    for i in range(0, len(labels)):
        if dict_predictions[i] == labels[i]:
            matches += 1

    print("- Dictionary")
    print("Accuracy: " + str(matches / len(labels)))

    ################
    # Vector Model #
    ################

    wine_vector_data = wine_data["vectors"]