Пример #1
0
 def test_01_from_file_02(self):
     """Verify that a classifier can be constructed from a file with weights."""
     classifier = PerceptronClassifier({'good': 1, 'terrible': -1})
     classifier.save('data/ex05_from_file_test.model')
     classifier = PerceptronClassifier.from_file(
         'data/ex05_from_file_test.model')
     if classifier is None:
         self.fail(
             msg=
             "Constructing classifier from file failed: from_file returned None"
         )
     expected_weights = {'good': 1, 'terrible': -1}
     self.assertEqual(classifier.weights, expected_weights)
Пример #2
0
 def test_02_for_dataset_02(self):
     """Verify that a classifier can be constructed with initial weights for a fiven dataset."""
     expected_weights = {'highly': 0, 'boring': 0, 'green': 0, 'eggs': 0}
     classifier = PerceptronClassifier.for_dataset(
         self.small_dataset_train_2)
     if classifier is None:
         self.fail(
             msg=
             'Constructing classifier for dataset failed: for_dataset returned None'
         )
     self.assertEqual(classifier.weights, expected_weights)
Пример #3
0
    def test_04_update_02(self):
        """Verify that the perceptron update is performed correctly."""
        classifier = PerceptronClassifier({'highly': 1, 'boring': -1})
        classifier.update(self.small_instance_list_do_update[1])
        expected_weigths = {'highly': 1, 'boring': 0}
        self.assertEqual(classifier.weights, expected_weigths)

        classifier = PerceptronClassifier({'highly': 1, 'boring': -1})
        do_update = classifier.update(self.small_instance_list_no_update[1])
        self.assertEqual(False, do_update)
Пример #4
0
    def test_04_update_01(self):
        """Verify that the perceptron update is performed correctly."""
        classifier = PerceptronClassifier({'highly': 1, 'boring': -1})
        # Test document: ("highly", "doc25", False)
        classifier.update(self.small_instance_list_do_update[0])
        expected_weigths = {'highly': 0, 'boring': -1}
        self.assertEqual(classifier.weights, expected_weigths)

        classifier = PerceptronClassifier({'highly': 1, 'boring': -1})
        # Test document: ("boring", "doc26", True),
        do_update = classifier.update(self.small_instance_list_no_update[0])
        self.assertEqual(False, do_update)
Пример #5
0
def nltk_movie_review_accuracy(num_iterations):
    """ Try different number of features, and optimize number of training iterations."""
    # Exercise 4: remove line
    (training_documents, dev_documents, test_documents) = load_reviews()

    best_development_accuracy = 0.0
    best_num_features = 0
    best_classifier = None
    best_feature_set = None

    # Test different numbers of features.
    for n in [100,1000,10000]:
        print("Training with %d features..." % n)
        # Training set
        training_set = Dataset.from_document_collection(training_documents, num_features=n)
        # Development set
        development_set = Dataset.from_document_collection(dev_documents, feature_set=training_set.feature_set)

        # Train classifier # Exercise 4: train the classifier
        classifier = PerceptronClassifier.for_dataset(training_set)
        classifier.train(training_set, development_set,num_iterations)

        # Accuracies of classifier with n features
        train_accuracy = classifier.test_accuracy(training_set)
        development_accuracy = classifier.test_accuracy(development_set)

        if development_accuracy > best_development_accuracy:
            best_development_accuracy = development_accuracy
            best_num_features = n
            best_classifier = classifier.copy()
            best_feature_set = training_set.feature_set

    print("Best classifier with %d features: \t Train Accuracy: %.4f \t Dev Accuracy: %.4f" % (n, train_accuracy, best_development_accuracy))
    print("Best number of features: %d " % best_num_features)
    print("Top features for positive class:")
    print(best_classifier.features_for_class(True))
    print("Top features for negative class:")
    print(best_classifier.features_for_class(False))

    # Compute test score for best setting.
    testing_set = Dataset.from_document_collection(test_documents, feature_set=best_feature_set)
    testing_accuracy = best_classifier.test_accuracy(testing_set)
    print("Test score for best setting: %.4f" % testing_accuracy)
    return best_development_accuracy, testing_accuracy
Пример #6
0
 def test_03_prediction_02(self):
     """Verify that the predictions of the classifier are as expected."""
     classifier = PerceptronClassifier({'highly': 1, 'boring': -2})
     for instance in self.small_dataset_pred_test_2.instance_list:
         prediction = classifier.prediction(instance.feature_counts)
         self.assertEqual(prediction, instance.label)