Exemplo n.º 1
0
    def test_build_classifier(self):
        """
        Tests the build_classifier method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        cls.build_classifier(data)

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        cls.build_classifier(data)
Exemplo n.º 2
0
    def test_classify_instance(self):
        """
        Tests the classify_instance method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        preds = []
        for i in range(10, 20):
            pred = cls.classify_instance(data.get_instance(i))
            self.assertIsNotNone(pred)
            preds.append(pred)
        self.assertEqual([2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 5.0, 5.0, 2.0, 2.0],
                         preds,
                         msg="Classifications differ")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        preds = []
        for i in range(10):
            pred = cls.classify_instance(data.get_instance(i))
            self.assertIsNotNone(pred)
            preds.append(pred)
        expected = [
            24.313, 33.359, 28.569, 26.365, 32.680, 29.149, 26.998, 22.971,
            13.160, 7.394
        ]
        for i in range(len(preds)):
            self.assertAlmostEqual(expected[i],
                                   preds[i],
                                   places=3,
                                   msg="Classifications differ")
Exemplo n.º 3
0
    def test_distribution_for_instance(self):
        """
        Tests the distribution_for_instance method.
        """
        # 1. nominal
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = ["-C", "0.3"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        for i in range(10):
            dist = cls.distribution_for_instance(data.get_instance(i))
            self.assertIsNotNone(dist)
            self.assertEqual(6,
                             len(dist),
                             msg="Number of classes in prediction differ!")

        # 2. numeric
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.functions.LinearRegression"
        options = ["-R", "0.1"]
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_classifier(data)
        for i in range(10):
            dist = cls.distribution_for_instance(data.get_instance(i))
            self.assertIsNotNone(dist)
            self.assertEqual(
                1,
                len(dist),
                msg=
                "Number of classes in prediction should be one for numeric classifier!"
            )
    def test_generate_thresholdcurve_data(self):
        """
        Tests the generate_thresholdcurve_data method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1-3"])
        cls = classifiers.Classifier(
            classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        data = plot.generate_thresholdcurve_data(evl, 0)
        self.assertEqual(13,
                         data.num_attributes,
                         msg="number of attributes differs")
        self.assertEqual(769, data.num_instances, msg="number of rows differs")
        attname = "True Positives"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "False Positive Rate"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
        attname = "Lift"
        self.assertIsNotNone(data.attribute_by_name(attname),
                             msg="Failed to locate attribute: " + attname)
Exemplo n.º 5
0
    def test_stats(self):
        """
        Tests the Stats class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data, msg="Failed to load data!")

        stats = data.attribute_stats(3)
        numstats = stats.numeric_stats
        self.assertAlmostEqual(898,
                               numstats.count,
                               places=3,
                               msg="count differs")
        self.assertAlmostEqual(70, numstats.max, places=3, msg="max differs")
        self.assertAlmostEqual(3.635,
                               numstats.mean,
                               places=3,
                               msg="mean differs")
        self.assertAlmostEqual(0.0, numstats.min, places=3, msg="min differs")
        self.assertAlmostEqual(13.717,
                               numstats.stddev,
                               places=3,
                               msg="stddev differs")
        self.assertAlmostEqual(3264, numstats.sum, places=3, msg="sum differs")
        self.assertAlmostEqual(180636,
                               numstats.sumsq,
                               places=3,
                               msg="sumsq differs")
    def test_build_clusterer(self):
        """
        Tests the build_clusterer class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.delete_last_attribute()

        # batch
        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(data)
        self.assertIsNotNone(
            str(cls), msg="Failed to obtain string representation of model")

        # incremental
        cls = clusterers.Clusterer(classname="weka.clusterers.Cobweb")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(dataset.Instances.template_instances(data))
        for inst in data:
            cls.update_clusterer(inst)
        cls.update_finished()
        self.assertIsNotNone(
            str(cls), msg="Failed to obtain string representation of model")
    def test_attribute_selection(self):
        """
        Tests attribute selection.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.attributeSelection.BestFirst"
        options = ["-D", "1", "-N", "5"]
        search = attribute_selection.ASSearch(classname=cname, options=options)
        self.assertIsNotNone(search, msg="Search should not be None: " + cname + "/" + str(options))

        cname = "weka.attributeSelection.CfsSubsetEval"
        options = ["-P", "1", "-E", "1"]
        evaluation = attribute_selection.ASEvaluation(classname=cname, options=options)
        self.assertIsNotNone(evaluation, msg="Evaluation should not be None: " + cname + "/" + str(options))

        attsel = attribute_selection.AttributeSelection()
        self.assertIsNotNone(search, msg="AttributeSelection should not be None!")

        attsel.search(search)
        attsel.evaluator(evaluation)
        attsel.select_attributes(data)
        self.assertEqual(9, attsel.number_attributes_selected, msg="number_attributes_selected differs")
        self.assertEqual([0, 4, 8, 11, 12, 19, 24, 32, 33, 38], attsel.selected_attributes.tolist(), msg="selected_attributes differ")
        self.assertGreater(len(attsel.results_string), 0, msg="results_string should get produced")
Exemplo n.º 8
0
    def test_build_and_use_forecaster(self):
        """
        Tests building and using of a forecaster.
        """
        self._ensure_package_is_installed()

        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("airline.arff"))
        self.assertIsNotNone(data, msg="Data should not be none")
        data.class_is_last()

        airline_train, airline_test = data.train_test_split(90.0)
        forecaster = timeseries.WekaForecaster()
        self.assertIsNotNone(forecaster)
        forecaster.fields_to_forecast = ["passenger_numbers"]
        forecaster.base_forecaster = classifiers.Classifier(
            classname="weka.classifiers.functions.LinearRegression")
        forecaster.fields_to_forecast = "passenger_numbers"
        forecaster.build_forecaster(airline_train)
        num_prime_instances = 12
        airline_prime = dataset.Instances.copy_instances(
            airline_train, airline_train.num_instances - num_prime_instances,
            num_prime_instances)
        forecaster.prime_forecaster(airline_prime)
        num_future_forecasts = airline_test.num_instances
        preds = forecaster.forecast(num_future_forecasts)
        self.assertIsNotNone(preds, msg="Predictions should not be none")
        self.assertEqual(len(preds),
                         airline_test.num_instances,
                         msg="# of predictions should equal prime window size")
Exemplo n.º 9
0
    def test_batch_filtering(self):
        """
        Tests the Filter.filter method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1,3"])
        flter.inputformat(data)
        filtered = flter.filter(data)
        self.assertEqual(data.num_attributes - 2,
                         filtered.num_attributes,
                         msg="Number of attributes differ")
        self.assertEqual(data.num_instances,
                         filtered.num_instances,
                         msg="Number of instances differ")

        # multple files
        data = loader.load_file(
            self.datafile("reutersTop10Randomized_1perc_shortened-train.arff"))
        self.assertIsNotNone(data)
        data2 = loader.load_file(
            self.datafile("reutersTop10Randomized_1perc_shortened-test.arff"))
        self.assertIsNotNone(data2)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.StringToWordVector")
        flter.inputformat(data)
        filtered = flter.filter([data, data2])
        self.assertIsNone(filtered[0].equal_headers(filtered[1]),
                          msg="Headers should be compatible")
    def test_classes_to_clusters(self):
        """
        Tests the classes_to_clusters method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        #data.class_is_last()

        train = dataset.Instances.copy_instances(data, 0, data.num_instances)
        train.delete_last_attribute()

        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(train)

        test = dataset.Instances.copy_instances(data, 0, data.num_instances)
        test.class_is_last()
        evl = clusterers.ClusterEvaluation()
        self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!")
        evl.set_model(cls)
        evl.test_model(test)
        self.assertEqual([2, 4],
                         evl.classes_to_clusters.tolist(),
                         msg="classes to clusters differs")
    def test_reduce(self):
        """
        Tests reducing of attributes.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.attributeSelection.BestFirst"
        options = ["-D", "1", "-N", "5"]
        search = attribute_selection.ASSearch(classname=cname, options=options)
        self.assertIsNotNone(search, msg="Search should not be None: " + cname + "/" + str(options))

        cname = "weka.attributeSelection.CfsSubsetEval"
        options = ["-P", "1", "-E", "1"]
        evaluation = attribute_selection.ASEvaluation(classname=cname, options=options)
        self.assertIsNotNone(evaluation, msg="Evaluation should not be None: " + cname + "/" + str(options))

        attsel = attribute_selection.AttributeSelection()
        self.assertIsNotNone(search, msg="AttributeSelection should not be None!")

        attsel.search(search)
        attsel.evaluator(evaluation)
        attsel.select_attributes(data)
        reduced = attsel.reduce_dimensionality(data)
        self.assertEqual(attsel.number_attributes_selected + 1, reduced.num_attributes, msg="number of attributes differs")
        self.assertEqual(data.num_instances, reduced.num_instances, msg="number of rows differs")
Exemplo n.º 12
0
    def test_ranking(self):
        """
        Tests ranking of attributes.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        search = attribute_selection.ASSearch(
            classname="weka.attributeSelection.Ranker", options=["-N", "-1"])
        self.assertIsNotNone(search, msg="Search should not be None!")

        evaluation = attribute_selection.ASEvaluation(
            "weka.attributeSelection.InfoGainAttributeEval")
        self.assertIsNotNone(evaluation, msg="Evaluation should not be None!")

        attsel = attribute_selection.AttributeSelection()
        self.assertIsNotNone(attsel,
                             msg="AttributeSelection should not be None!")

        attsel.ranking(True)
        attsel.folds(2)
        attsel.crossvalidation(True)
        attsel.seed(42)
        attsel.search(search)
        attsel.evaluator(evaluation)
        attsel.select_attributes(data)

        self.assertGreater(len(str(attsel.ranked_attributes)),
                           0,
                           msg="results_string should get produced")
        self.assertGreater(len(attsel.results_string),
                           0,
                           msg="results_string should get produced")
    def test_distribution_for_instance(self):
        """
        Tests the distribution_for_instance method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.delete_last_attribute()

        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(data)

        preds = []
        for i in range(10):
            dist = cls.distribution_for_instance(data.get_instance(i))
            self.assertIsNotNone(
                dist, msg="Failed to obtain cluster membership for instance")
            self.assertEqual(2, len(dist), msg="Number of clusters differs")
            preds.append(dist)
        expected = [[0., 1.], [1., 0.], [1., 0.], [0., 1.], [0., 1.], [0., 1.],
                    [1., 0.], [1., 0.], [1., 0.], [1., 0.]]
        self.assertEqual(
            len(expected),
            len(preds),
            msg="Expected/predicted differ in length - update required!")
        for i in range(len(expected)):
            self.assertEqual(expected[i],
                             preds[i].tolist(),
                             msg="Cluster distributions differ")
Exemplo n.º 14
0
def load_Arff_file(file):
    if type(file) is not str:
        file = str(file)

    loader = converters.Loader(classname="weka.core.converters.ArffLoader")
    data = loader.load_file(file)
    return data
Exemplo n.º 15
0
    def test_clusterevaluation(self):
        """
        Tests the ClusterEvaluation class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.delete_last_attribute()

        # simple test set
        test = dataset.Instances.copy_instances(data, 0, 10)
        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(data)

        evl = clusterers.ClusterEvaluation()
        self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!")
        evl.set_model(cls)
        evl.test_model(test)
        self.assertGreater(len(evl.cluster_results), 0, msg="No evaluation string!")
        self.assertEqual(0.0, evl.log_likelihood, msg="log_likelihood differs")
        self.assertEqual(2, evl.num_clusters, msg="num_clusters differs")
        self.assertEquals(
            [1., 0., 0., 1., 1., 1., 0., 0., 0., 0.], evl.cluster_assignments.tolist(),
            msg="cluster_assignments differs")

        # cross-validation
        cls = clusterers.Clusterer(classname="weka.clusterers.EM", options=["-I", "3", "-X", "2", "-max", "5"])
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        llh = clusterers.ClusterEvaluation.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertAlmostEqual(-34.397, llh, places=3, msg="Failed to cross-validate clusterer!")
Exemplo n.º 16
0
    def test_batchpredictor(self):
        """
        Tests the batch predictor methods.
        """

        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.classifiers.trees.J48"
        options = []
        cls = classifiers.Classifier(classname=cname, options=options)
        self.assertIsNotNone(cls, msg="Failed to instantiate: " + cname + "/" + str(options))

        # batch predictor?
        self.assertTrue(cls.is_batchpredictor, msg="not a batch predictor: " + cname + "/" + str(options))

        # more efficient implementation?
        cls.has_efficient_batch_prediction()

        # batch size
        self.assertIsNotNone(cls.batch_size, msg="batch size is not initialized")

        # distributions_for_instances
        cls.build_classifier(data)
        dists = cls.distributions_for_instances(data)
        self.assertIsNotNone(dists, msg="no distributions generated")
        self.assertEqual(len(dists), len(data), msg="number of predictions differ")
        self.assertEqual(len(dists[0]), data.class_attribute.num_values, msg="size of distribution array does not match number of classes")
Exemplo n.º 17
0
    def test_incremental_filtering(self):
        """
        Tests the Filter.input/output methods.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)

        flter = filters.Filter(
            classname="weka.filters.unsupervised.attribute.Remove",
            options=["-R", "1,3"])
        flter.inputformat(data)
        filtered = flter.outputformat()
        self.assertIsNotNone(filtered)
        self.assertTrue(isinstance(filtered, dataset.Instances),
                        msg="Should be Instances object")

        for inst in data:
            flter.input(inst)
            finst = flter.output()
            filtered.add_instance(finst)

        self.assertEqual(data.num_attributes - 2,
                         filtered.num_attributes,
                         msg="Number of attributes differ")
        self.assertEqual(data.num_instances,
                         filtered.num_instances,
                         msg="Number of instances differ")
Exemplo n.º 18
0
 def test_matrix_plot(self):
     """
     Tests the matrix_plot method.
     """
     loader = converters.Loader("weka.core.converters.ArffLoader")
     iris_data = loader.load_file(self.datafile("iris.arff"))
     iris_data.class_is_last()
     plot.matrix_plot(iris_data, percent=50, title="Matrix plot iris", wait=False)
Exemplo n.º 19
0
    def test_instance(self):
        """
        Tests the Instance class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.ORIG.arff"))
        self.assertIsNotNone(data, msg="Failed to load data!")

        inst = data.get_instance(0)
        self.assertEqual(39, inst.num_attributes, msg="num_attributes differs")
        self.assertEqual(-1, data.class_index, msg="class_index differs")

        data.class_index = data.num_attributes - 1
        self.assertEqual(38, data.class_index, msg="class_index differs")

        data.class_is_first()
        self.assertEqual(0, data.class_index, msg="class_index differs")

        data.class_is_last()
        self.assertEqual(38, data.class_index, msg="class_index differs")

        self.assertIsNotNone(inst.dataset, msg="Dataset reference should not be None!")
        self.assertTrue(inst.has_missing(), msg="Should have missing values")
        self.assertTrue(inst.is_missing(0), msg="First value should be missing")
        self.assertFalse(inst.is_missing(1), msg="Second value should not be missing")

        self.assertEqual("C", inst.get_string_value(1), msg="string value differs")
        inst.set_string_value(1, "H")
        self.assertEqual("H", inst.get_string_value(1), msg="string value differs")

        self.assertEqual(8, inst.get_value(3), msg="numeric value differs")
        inst.set_value(3, 6.3)
        self.assertEqual(6.3, inst.get_value(3), msg="numeric value differs")

        self.assertEqual(1, inst.weight, msg="weight should be 1")
        inst.weight = 0.5
        self.assertEqual(0.5, inst.weight, msg="weights differ")

        values = [1.0, 2.0, 3.0]
        inst = dataset.Instance.create_instance(values, classname="weka.core.DenseInstance")
        self.assertEqual(3, inst.num_attributes, msg="#attributes differ")
        self.assertEqual(1.0, inst.get_value(0), msg="value at #" + str(0) + " differs")
        self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs")
        self.assertEqual(3.0, inst.get_value(2), msg="value at #" + str(2) + " differs")

        values = [0.0, 2.0, 0.0]
        inst = dataset.Instance.create_instance(values, classname="weka.core.SparseInstance")
        self.assertEqual(3, inst.num_attributes, msg="#attributes differ")
        self.assertEqual(0.0, inst.get_value(0), msg="value at #" + str(0) + " differs")
        self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs")
        self.assertEqual(0.0, inst.get_value(2), msg="value at #" + str(2) + " differs")

        values = [(1, 2.0)]
        inst = dataset.Instance.create_sparse_instance(values, 3, classname="weka.core.SparseInstance")
        self.assertEqual(3, inst.num_attributes, msg="#attributes differ")
        self.assertEqual(0.0, inst.get_value(0), msg="value at #" + str(0) + " differs")
        self.assertEqual(2.0, inst.get_value(1), msg="value at #" + str(1) + " differs")
        self.assertEqual(0.0, inst.get_value(2), msg="value at #" + str(2) + " differs")
Exemplo n.º 20
0
 def test_arff_loader(self):
     """
     Tests the Loader class using an ArffLoader.
     """
     loader = converters.Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(self.datafile("anneal.arff"))
     self.assertIsNotNone(data)
     self.assertEqual(898,
                      data.num_instances,
                      msg="Number of instances differs!")
Exemplo n.º 21
0
 def test_incremental_arff_loader(self):
     """
     Tests the Loader class using an incremental ArffLoader.
     """
     loader = converters.Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(self.datafile("anneal.arff"), incremental=True)
     self.assertIsNotNone(data)
     count = 0
     for inst in loader:
         count += 1
     self.assertEqual(898, count, msg="Number of instances differs!")
Exemplo n.º 22
0
 def test_scatter_plot(self):
     """
     Tests the scatter_plot method.
     """
     loader = converters.Loader("weka.core.converters.ArffLoader")
     iris_data = loader.load_file(self.datafile("iris.arff"))
     iris_data.class_is_last()
     plot.scatter_plot(iris_data,
                       iris_data.attribute_by_name("petalwidth").index,
                       iris_data.attribute_by_name("petallength").index,
                       percent=50,
                       wait=False)
Exemplo n.º 23
0
    def test_learning_curve(self):
        """
        Tests the learning_curve method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        cls = [
            classifiers.Classifier(classname="weka.classifiers.trees.J48"),
            classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable")]
        plot.plot_learning_curve(cls, data, wait=False)
        plot.plot_learning_curve(cls, data, increments=0.1, wait=False)
Exemplo n.º 24
0
    def test_plot_classifier_errors(self):
        """
        Tests the plot_classifier_errors method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        bolts_data = loader.load_file(self.datafile("bolts.arff"))
        self.assertIsNotNone(bolts_data)
        bolts_data.class_is_last()

        classifier = classifiers.Classifier(classname="weka.classifiers.functions.LinearRegression", options=["-S", "1", "-C"])
        evaluation = classifiers.Evaluation(bolts_data)
        evaluation.crossvalidate_model(classifier, bolts_data, 10, Random(42))
        plot.plot_classifier_errors(evaluation.predictions, wait=False)
Exemplo n.º 25
0
    def test_plot_dot_graph(self):
        """
        Tests the plot_dot_graph method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        cls = classifiers.Classifier(classname="weka.classifiers.trees.J48",
                                     options=["-C", "0.3"])
        cls.build_classifier(data)

        graph.plot_dot_graph(cls.graph)
Exemplo n.º 26
0
 def test_arff_saver(self):
     """
     Tests the Saver class using an ArffSaver.
     """
     loader = converters.Loader(classname="weka.core.converters.ArffLoader")
     data = loader.load_file(self.datafile("anneal.arff"))
     self.assertIsNotNone(data)
     outfile = self.tempfile("out.arff")
     self.delfile(outfile)
     saver = converters.Saver(classname="weka.core.converters.ArffSaver")
     saver.save_file(data, outfile)
     self.assertTrue(os.path.exists(outfile),
                     "File does not exist: " + outfile)
     self.delfile(outfile)
Exemplo n.º 27
0
    def test_build_associator(self):
        """
        Tests the build_classifier method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("nursery.arff"))
        self.assertIsNotNone(data)
        data.class_is_last()

        cname = "weka.associations.Apriori"
        options = None
        cls = associations.Associator(classname=cname, options=options)
        self.assertIsNotNone(cls,
                             msg="Failed to instantiate: " + cname + "/" +
                             str(options))

        cls.build_associations(data)
Exemplo n.º 28
0
    def test_plot_prc(self):
        """
        Tests the plot_prc method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("diabetes.arff"))
        data.class_is_last()

        remove = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1-3"])
        cls = classifiers.Classifier(classname="weka.classifiers.bayes.NaiveBayes")
        fc = classifiers.FilteredClassifier()
        fc.filter = remove
        fc.classifier = cls

        evl = classifiers.Evaluation(data)
        evl.crossvalidate_model(cls, data, 10, Random(1))
        plot.plot_prc(evl, class_index=[0, 1], wait=False)
Exemplo n.º 29
0
    def test_attributestats(self):
        """
        Tests the AttributeStats class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data, msg="Failed to load data!")

        stats = data.attribute_stats(2)
        self.assertIsNotNone(stats, msg="Failed to obtain stats!")
        self.assertEqual(8, stats.distinct_count, "distinct_count differs")
        self.assertEqual(898, stats.int_count, "int_count differs")
        self.assertEqual(0, stats.missing_count, "missing_count differs")
        self.assertEqual([86, 256, 440, 0, 51, 20, 10, 19, 16], stats.nominal_counts.tolist(), "nominal_counts differs")
        self.assertEqual([86, 256, 440, 0, 51, 20, 10, 19, 16], stats.nominal_weights.tolist(), "nominal_weights differs")
        self.assertEqual(898, stats.total_count, "total_count differs")
        self.assertEqual(0, stats.unique_count, "unique_count differs")
Exemplo n.º 30
0
    def test_plot_cluster_assignments(self):
        """
        Tests the plot_cluster_assignments method.
        """
        loader = converters.Loader("weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("iris.arff"))
        data.delete_last_attribute()

        # build a clusterer and output model
        clusterer = clusterers.Clusterer(
            classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
        clusterer.build_clusterer(data)
        evaluation = clusterers.ClusterEvaluation()
        evaluation.set_model(clusterer)
        evaluation.test_model(data)
        plot.plot_cluster_assignments(evaluation,
                                      data,
                                      inst_no=True,
                                      wait=False)