def test_classes_to_clusters(self):
        """
        Tests the classes_to_clusters method.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        #data.class_is_last()

        train = dataset.Instances.copy_instances(data, 0, data.num_instances)
        train.delete_last_attribute()

        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(train)

        test = dataset.Instances.copy_instances(data, 0, data.num_instances)
        test.class_is_last()
        evl = clusterers.ClusterEvaluation()
        self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!")
        evl.set_model(cls)
        evl.test_model(test)
        self.assertEqual([2, 4],
                         evl.classes_to_clusters.tolist(),
                         msg="classes to clusters differs")
예제 #2
0
    def test_clusterevaluation(self):
        """
        Tests the ClusterEvaluation class.
        """
        loader = converters.Loader(classname="weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("anneal.arff"))
        self.assertIsNotNone(data)
        data.delete_last_attribute()

        # simple test set
        test = dataset.Instances.copy_instances(data, 0, 10)
        cls = clusterers.Clusterer(classname="weka.clusterers.SimpleKMeans")
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        cls.build_clusterer(data)

        evl = clusterers.ClusterEvaluation()
        self.assertIsNotNone(cls, msg="Failed to instantiate evaluation!")
        evl.set_model(cls)
        evl.test_model(test)
        self.assertGreater(len(evl.cluster_results), 0, msg="No evaluation string!")
        self.assertEqual(0.0, evl.log_likelihood, msg="log_likelihood differs")
        self.assertEqual(2, evl.num_clusters, msg="num_clusters differs")
        self.assertEquals(
            [1., 0., 0., 1., 1., 1., 0., 0., 0., 0.], evl.cluster_assignments.tolist(),
            msg="cluster_assignments differs")

        # cross-validation
        cls = clusterers.Clusterer(classname="weka.clusterers.EM", options=["-I", "3", "-X", "2", "-max", "5"])
        self.assertIsNotNone(cls, msg="Failed to instantiate clusterer!")
        llh = clusterers.ClusterEvaluation.crossvalidate_model(cls, data, 10, classes.Random(1))
        self.assertAlmostEqual(-34.397, llh, places=3, msg="Failed to cross-validate clusterer!")
예제 #3
0
    def test_plot_cluster_assignments(self):
        """
        Tests the plot_cluster_assignments method.
        """
        loader = converters.Loader("weka.core.converters.ArffLoader")
        data = loader.load_file(self.datafile("iris.arff"))
        data.delete_last_attribute()

        # build a clusterer and output model
        clusterer = clusterers.Clusterer(
            classname="weka.clusterers.SimpleKMeans", options=["-N", "3"])
        clusterer.build_clusterer(data)
        evaluation = clusterers.ClusterEvaluation()
        evaluation.set_model(clusterer)
        evaluation.test_model(data)
        plot.plot_cluster_assignments(evaluation,
                                      data,
                                      inst_no=True,
                                      wait=False)