Пример #1
0
    def clustering(self, sample_path, answer_path, amount, ccore):
        attempts = 10

        failures = ""

        for _ in range(attempts):
            data = read_sample(sample_path)

            gmeans_instance = gmeans(data, amount, ccore).process()

            reader = answer_reader(answer_path)
            expected_length_clusters = sorted(reader.get_cluster_lengths())

            clusters = gmeans_instance.get_clusters()
            centers = gmeans_instance.get_centers()
            wce = gmeans_instance.get_total_wce()

            self.assertEqual(len(expected_length_clusters), len(centers))

            if len(clusters) > 1:
                self.assertGreater(wce, 0.0)
            else:
                self.assertGreaterEqual(wce, 0.0)

            unique_indexes = set()
            for cluster in clusters:
                for index_point in cluster:
                    unique_indexes.add(index_point)

            if len(data) != len(unique_indexes):
                failures += "1. %d != %d\n" % (len(data), len(unique_indexes))
                continue

            expected_total_length = sum(expected_length_clusters)
            actual_total_length = sum([len(cluster) for cluster in clusters])
            if expected_total_length != actual_total_length:
                failures += "2. %d != %d\n" % (expected_total_length,
                                               actual_total_length)
                continue

            actual_length_clusters = sorted(
                [len(cluster) for cluster in clusters])
            if expected_length_clusters != actual_length_clusters:
                failures += "3. %s != %s\n" % (str(expected_length_clusters),
                                               str(actual_length_clusters))
                continue

            return

        self.fail("Expected result is not obtained during %d attempts: %s\n" %
                  (attempts, failures))
Пример #2
0
def template_clustering(sample_path, k_init=1, ccore=True, **kwargs):
    sample = read_sample(sample_path)

    gmeans_instance = gmeans(sample, k_init, ccore, repeat=5).process()
    clusters = gmeans_instance.get_clusters()
    centers = gmeans_instance.get_centers()

    visualize = kwargs.get('visualize', True)
    if visualize:
        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.append_cluster(centers, None, marker='*', markersize=10)
        visualizer.show()

    return sample, clusters
Пример #3
0
    def template_predict(self, path, amount, points, ccore):
        metric = distance_metric(type_metric.EUCLIDEAN)

        sample = read_sample(path)
        gmeans_instance = gmeans(sample, amount, ccore).process()
        centers = gmeans_instance.get_centers()

        closest_clusters = gmeans_instance.predict(points)

        self.assertEqual(len(points), len(closest_clusters))

        for i in range(len(points)):
            cluster_index = closest_clusters[i]
            distance = metric(centers[cluster_index], points[i])
            for center_index in range(len(centers)):
                if center_index != cluster_index:
                    other_distance = metric(centers[center_index], points[i])
                    self.assertLessEqual(distance, other_distance)
    def clustering(self, sample_path, answer, amount, ccore, **kwargs):
        attempts = 10

        failures = ""

        k_max = kwargs.get('k_max', -1)
        random_state = kwargs.get('random_state', None)
        data = read_sample(sample_path)

        if isinstance(answer, str):
            reader = answer_reader(answer)
            expected_length_clusters = sorted(reader.get_cluster_lengths())
            amount_clusters = len(expected_length_clusters)

        elif isinstance(answer, int):
            expected_length_clusters = None
            amount_clusters = answer

        else:
            expected_length_clusters = answer
            amount_clusters = len(answer)

        for _ in range(attempts):
            gmeans_instance = gmeans(data,
                                     amount,
                                     ccore,
                                     k_max=k_max,
                                     random_state=random_state).process()

            clusters = gmeans_instance.get_clusters()
            centers = gmeans_instance.get_centers()
            wce = gmeans_instance.get_total_wce()

            self.assertEqual(amount_clusters, len(centers))

            if len(clusters) > 1:
                self.assertGreater(wce, 0.0)
            else:
                self.assertGreaterEqual(wce, 0.0)

            if len(clusters) != amount_clusters:
                failures += "1. %d != %d\n" % (len(clusters), amount_clusters)
                continue

            unique_indexes = set()
            for cluster in clusters:
                for index_point in cluster:
                    unique_indexes.add(index_point)

            if len(data) != len(unique_indexes):
                failures += "2. %d != %d\n" % (len(data), len(unique_indexes))
                continue

            if expected_length_clusters is None:
                return

            expected_total_length = sum(expected_length_clusters)
            actual_total_length = sum([len(cluster) for cluster in clusters])
            if expected_total_length != actual_total_length:
                failures += "3. %d != %d\n" % (expected_total_length,
                                               actual_total_length)
                continue

            actual_length_clusters = sorted(
                [len(cluster) for cluster in clusters])
            if expected_length_clusters != actual_length_clusters:
                failures += "4. %s != %s\n" % (str(expected_length_clusters),
                                               str(actual_length_clusters))
                continue

            return

        self.fail("Expected result is not obtained during %d attempts: %s\n" %
                  (attempts, failures))
Пример #5
0
 def test_predict_without_process(self):
     self.assertEqual([], gmeans([[0], [1]]).predict([0]))