def test_generate_thresholdcurve_data(self):
        """
        Tests the generate_thresholdcurve_data method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1-3"])
        cls = classifiers.Classifier(
            classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        self.assertEqual(13,
                         data.num_attributes,
                         msg="number of attributes differs")
        self.assertEqual(769, data.num_instances, msg="number of rows differs")
        attname = "True Positives"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "False Positive Rate"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "Lift"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
Exemplo n.º 2
0
    def test_against_self(self, training_fraction=0.5):
        """Tests this classifier using part of its own dataset as test data. Otherwise behaves as test_against.
		The portion of the dataset to be used as training data may be given by the training_fraction parameter;
		if not, it is 0.5 by default."""
        training_size = int(self.dataset.numInstances() * training_fraction)
        training_data = core.Instances(self.dataset, 0, training_size)
        test_data = core.Instances(self.dataset, training_size,
                                   self.dataset.numInstances() - training_size)
        evaluator = classifiers.Evaluation(training_data)
        c45 = trees.J48()
        c45.buildClassifier(training_data)
        evaluator.evaluateModel(c45, test_data)
        print evaluator.toSummaryString()
Exemplo n.º 3
0
    def test_plot_classifier_errors(self):
        """
        Tests the plot_classifier_errors method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        bolts_data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(bolts_data)
        bolts_data.class_is_last()

        classifier = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
        evaluation = classifiers.Evaluation(bolts_data)
        evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
        plot.plot_classifier_errors(evaluation.predictions, wait=False)
Exemplo n.º 4
0
    def test_plot_prc(self):
        """
        Tests the plot_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        plot.plot_prc(evl, class_index=[0, 1], wait=False)
Exemplo n.º 5
0
    def test_get_prc(self):
        """
        Tests the get_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        area = plot.get_prc(data)
        self.assertAlmostEqual(0.892, area, places=3, msg="PRC differs")
Exemplo n.º 6
0
    def test_evaluation(self):
        """
        Tests the Evaluation class.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)

        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        self.assertIsNone(evl.header.equal_headers(data))
        self.assertEqual([9, 100, 685, 1, 68, 41], evl.class_priors.tolist())

        # cross-validate
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertEqual([[5.0, 0.0, 3.0, 0.0, 0.0, 0.0], [0.0, 99.0, 0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 680.0, 0.0, 0.0, 2.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 67.0, 0.0], [0.0, 0.0, 7.0, 0.0, 0.0, 33.0]], evl.confusion_matrix.tolist(), msg="confusion matrix differs")
        self.assertAlmostEqual(0.749, evl.area_under_prc(0), places=3, msg="area_under_prc(0) differs")
        self.assertAlmostEqual(0.931, evl.area_under_roc(0), places=3, msg="area_under_roc(0) differs")
        self.assertAlmostEqual(0, evl.avg_cost, places=3, msg="avg_cost differs")
        self.assertEqual(884, evl.correct, msg="correct differs")
        self.assertAlmostEqual(98.775, evl.coverage_of_test_cases_by_predicted_regions, places=3, msg="coverage_of_test_cases_by_predicted_regions differs")
        self.assertAlmostEqual(0.016, evl.error_rate, places=3, msg="error_rate differs")
        self.assertAlmostEqual(0.769, evl.f_measure(0), places=3, msg="f_measure(0) differs")
        self.assertAlmostEqual(0.375, evl.false_negative_rate(0), places=3, msg="false_negative_rate(0) differs")
        self.assertAlmostEqual(0.0, evl.false_positive_rate(0), places=3, msg="false_positive_rate(0) differs")
        self.assertEqual(14, evl.incorrect, msg="incorrect differs")
        self.assertAlmostEqual(0.961, evl.kappa, places=3, msg="kappa differs")
        self.assertAlmostEqual(1017.196, evl.kb_information, places=3, msg="kb_information differs")
        self.assertAlmostEqual(1.133, evl.kb_mean_information, places=3, msg="kb_mean_information differs")
        self.assertAlmostEqual(95.017, evl.kb_relative_information, places=3, msg="kb_relative_information differs")
        self.assertAlmostEqual(0.789, evl.matthews_correlation_coefficient(0), places=3, msg="matthews_correlation_coefficient(0) differs")
        self.assertAlmostEqual(0.006, evl.mean_absolute_error, places=3, msg="mean_absolute_error differs")
        self.assertAlmostEqual(0.134, evl.mean_prior_absolute_error, places=3, msg="mean_prior_absolute_error differs")
        self.assertAlmostEqual(3.0, evl.num_false_negatives(0), places=3, msg="num_false_negatives(0) differs")
        self.assertAlmostEqual(0.0, evl.num_false_positives(0), places=3, msg="num_false_positives(0) differs")
        self.assertAlmostEqual(898.0, evl.num_instances, places=3, msg="num_instances differs")
        self.assertAlmostEqual(890.0, evl.num_true_negatives(0), places=3, msg="num_true_negatives(0) differs")
        self.assertAlmostEqual(5.0, evl.num_true_positives(0), places=3, msg="num_true_positives(0) differs")
        self.assertAlmostEqual(98.44, evl.percent_correct, places=2, msg="percent_correct differs")
        self.assertAlmostEqual(1.56, evl.percent_incorrect, places=2, msg="percent_incorrect differs")
        self.assertAlmostEqual(0, evl.percent_unclassified, places=2, msg="percent_unclassified differs")
        self.assertAlmostEqual(1.0, evl.precision(0), places=3, msg="precision(0) differs")
        self.assertAlmostEqual(0.625, evl.recall(0), places=3, msg="recall(0) differs")
        self.assertAlmostEqual(4.187, evl.relative_absolute_error, places=3, msg="relative_absolute_error differs")
        self.assertAlmostEqual(0.258, evl.root_mean_prior_squared_error, places=3, msg="root_mean_prior_squared_error differs")
        self.assertAlmostEqual(0.067, evl.root_mean_squared_error, places=3, msg="root_mean_squared_error differs")
        self.assertAlmostEqual(25.912, evl.root_relative_squared_error, places=3, msg="root_relative_squared_error differs")
        self.assertAlmostEqual(-75.044, evl.sf_entropy_gain, places=3, msg="sf_entropy_gain differs")
        self.assertAlmostEqual(-0.084, evl.sf_mean_entropy_gain, places=3, msg="sf_mean_entropy_gain differs")
        self.assertAlmostEqual(1.192, evl.sf_mean_prior_entropy, places=3, msg="sf_mean_prior_entropy differs")
        self.assertAlmostEqual(1.276, evl.sf_mean_scheme_entropy, places=3, msg="sf_mean_scheme_entropy differs")
        self.assertAlmostEqual(16.722, evl.size_of_predicted_regions, places=3, msg="size_of_predicted_regions differs")
        self.assertEqual(0, evl.unclassified, msg="unclassified differs")
        self.assertAlmostEqual(0.0, evl.total_cost, places=3, msg="total_cost differs")
        self.assertAlmostEqual(0.926, evl.unweighted_macro_f_measure, places=3, msg="unweighted_macro_f_measure differs")
        self.assertAlmostEqual(0.984, evl.unweighted_micro_f_measure, places=3, msg="unweighted_micro_f_measure differs")
        self.assertAlmostEqual(0.992, evl.weighted_area_under_prc, places=3, msg="weighted_area_under_prc differs")
        self.assertAlmostEqual(0.995, evl.weighted_area_under_roc, places=3, msg="weighted_area_under_roc differs")
        self.assertAlmostEqual(0.984, evl.weighted_f_measure, places=3, msg="weighted_f_measure differs")
        self.assertAlmostEqual(0.016, evl.weighted_false_negative_rate, places=3, msg="weighted_false_negative_rate differs")
        self.assertAlmostEqual(0.036, evl.weighted_false_positive_rate, places=3, msg="weighted_false_positive_rate differs")
        self.assertAlmostEqual(0.959, evl.weighted_matthews_correlation, places=3, msg="weighted_matthews_correlation differs")
        self.assertAlmostEqual(0.984, evl.weighted_precision, places=3, msg="weighted_precision differs")
        self.assertAlmostEqual(0.984, evl.weighted_recall, places=3, msg="weighted_recall differs")
        self.assertAlmostEqual(0.964, evl.weighted_true_negative_rate, places=3, msg="weighted_true_negative_rate differs")
        self.assertAlmostEqual(0.984, evl.weighted_true_positive_rate, places=3, msg="weighted_true_positive_rate differs")
        self.assertIsNotNone(evl.predictions, msg="Should have had predictions")
        self.assertEqual(data.num_instances, len(evl.predictions), msg="Number of predictions differ from training set size")

        # train/test
        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        cls.build_classifier(data)
        evl.test_model(cls, data)
        self.assertAlmostEqual(99.777, evl.percent_correct, places=3, msg="percent_correct differs")

        # predictions: no
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        evl = classifiers.Evaluation(data)
        evl.discard_predictions = True
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertIsNone(evl.predictions, msg="Should have had no predictions")

        # output
        cname = "weka.classifiers.trees.J48"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname)
        evl = classifiers.Evaluation(data)
        cname = "weka.classifiers.evaluation.output.prediction.PlainText"
        pout = classifiers.PredictionOutput(classname=cname)
        self.assertEqual(cname, pout.classname, msg="Output classnames differ!")
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        evl.crossvalidate_model(cls, data, 10, classes.Random(1), output=pout)
        self.assertGreater(len(str(pout)), 0, msg="Should have generated output")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        cls = classifiers.Classifier(classname=cname)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname )

        evl = classifiers.Evaluation(data)
        self.assertIsNotNone(evl, msg="Failed to instantiate Evaluation")
        self.assertIsNone(evl.header.equal_headers(data))

        # cross-validate
        evl.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertAlmostEqual(0.919, evl.correlation_coefficient, places=3, msg="correlation_coefficient differs")
        self.assertAlmostEqual(10.697, evl.error_rate, places=3, msg="error_rate differs")
Exemplo n.º 7
0
    def test_against(self, test):
        """Tests this classifier using another Classifier's dataset as test data. Prints the results of the
		test on the console."""
        evaluator = classifiers.Evaluation(self.dataset)
        evaluator.evaluateModel(self.c45, test.dataset)
        print evaluator.toSummaryString()