示例#1
0
    def templateSyncsegmSegmentation(image_source, radius_color, radius_object,
                                     noise_size, expected_color_segments,
                                     expected_object_segments, collect_dynamic,
                                     ccore_flag):
        result_testing = False
        color_segments, object_segments = [], []

        for _ in range(0, 10, 1):
            algorithm = syncsegm(radius_color,
                                 radius_object,
                                 noise_size,
                                 ccore=ccore_flag)
            analyser = algorithm.process(image_source, collect_dynamic, 0.9995,
                                         0.9995)

            color_segments = analyser.allocate_colors()
            object_segments = analyser.allocate_objects(0.2)

            if (len(color_segments) != expected_color_segments) or (
                    len(object_segments) != expected_object_segments):
                continue

            result_testing = True
            break

        assertion.eq(expected_color_segments, len(color_segments))
        assertion.eq(expected_object_segments, len(object_segments))
        assertion.true(result_testing)
示例#2
0
    def clustering(path, threshold1, threshold2, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN))

        sample = read_sample(path)

        ttsas_instance = ttsas(sample,
                               threshold1,
                               threshold2,
                               ccore=ccore,
                               metric=metric)
        ttsas_instance.process()

        clusters = ttsas_instance.get_clusters()
        representatives = ttsas_instance.get_representatives()

        obtained_length = 0
        obtained_cluster_length = []
        for cluster in clusters:
            obtained_length += len(cluster)
            obtained_cluster_length.append(len(cluster))

        assertion.eq(len(sample), obtained_length)
        assertion.eq(len(expected), len(clusters))
        assertion.eq(len(expected), len(representatives))
        assertion.ge(len(sample), len(clusters))

        dimension = len(sample[0])
        for rep in representatives:
            assertion.eq(dimension, len(rep))

        expected.sort()
        obtained_cluster_length.sort()

        assertion.eq(expected, obtained_cluster_length)
示例#3
0
    def templateAllocateRequestedClusterAmount(data, amount_clusters,
                                               initial_medoids, ccore_flag):
        if (initial_medoids is None):
            initial_medoids = []
            for _ in range(amount_clusters):
                index_point = randint(0,
                                      len(data) - 1)
                while (index_point in initial_medoids):
                    index_point = randint(0,
                                          len(data) - 1)

                initial_medoids.append(index_point)

        kmedoids_instance = kmedoids(data,
                                     initial_medoids,
                                     0.025,
                                     ccore=ccore_flag)
        kmedoids_instance.process()
        clusters = kmedoids_instance.get_clusters()

        assertion.eq(len(clusters), amount_clusters)
        amount_objects = 0
        for cluster in clusters:
            amount_objects += len(cluster)

        assertion.eq(amount_objects, len(data))
示例#4
0
    def clustering(path, amount, threshold, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        sample = read_sample(path);

        bsas_instance = bsas(sample, amount, threshold, ccore=ccore, metric=metric);
        bsas_instance.process();

        clusters = bsas_instance.get_clusters();
        representatives = bsas_instance.get_representatives();

        obtained_length = 0;
        obtained_cluster_length = [];
        for cluster in clusters:
            obtained_length += len(cluster);
            obtained_cluster_length.append(len(cluster));

        assertion.eq(len(sample), obtained_length);
        assertion.eq(len(expected), len(clusters));
        assertion.eq(len(expected), len(representatives));
        assertion.ge(amount, len(clusters));

        dimension = len(sample[0]);
        for rep in representatives:
            assertion.eq(dimension, len(rep));

        expected.sort();
        obtained_cluster_length.sort();

        assertion.eq(expected, obtained_cluster_length);
    def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs):
        if isinstance(input_data, str):
            sample = read_sample(input_data)
        else:
            sample = input_data

        numpy_usage = kwargs.get('numpy_usage', False)
        if numpy_usage is True:
            sample = numpy.array(sample)
         
        cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag)
        cure_instance.process()
         
        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assertion.eq(len(clusters), number_cluster)
        assertion.eq(len(representors), number_cluster)
        assertion.eq(len(means), number_cluster)
         
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         
        total_length = sum(obtained_cluster_sizes)
        assertion.eq(total_length, len(sample))
         
        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assertion.eq(cluster_sizes, obtained_cluster_sizes)
    def random_sampling(data, n, algorithm, repeat, ccore=True):
        for _ in range(repeat):
            sample = algorithm(data, n)
            unique_values = set(sample)

            assertion.eq(n, len(sample))
            assertion.eq(len(unique_values), len(sample))
示例#7
0
 def testMinkowskiDistance(self):
     assertion.eq(0.0, metric.minkowski_distance([0], [0]))
     assertion.eq(0.0, metric.minkowski_distance([0], [0], 2))
     assertion.eq(-2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0],
                                                  1))
     assertion.eq(2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0], 2))
     assertion.eq(2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0], 4))
示例#8
0
 def testManhattanDistance(self):
     assertion.eq(0.0, metric.manhattan_distance([0], [0]))
     assertion.eq(1.0, metric.manhattan_distance([0.0, 1.0], [0.0, 0.0]))
     assertion.eq(2.0, metric.manhattan_distance([1.0, 1.0], [0.0, 0.0]))
     assertion.eq(4.0, metric.manhattan_distance([1.0, 1.0], [-1.0, -1.0]))
     assertion.eq(2.0, metric.manhattan_distance([-1.0, -1.0],
                                                 [-2.0, -2.0]))
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
示例#10
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
示例#11
0
    def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster,
                                     offset, kinitial, kmax):
        input_data = []
        for index in range(amount_clusters):
            for _ in range(size_cluster):
                input_data.append([
                    random.random() * index * offset,
                    random.random() * index * offset
                ])

        initial_centers = random_center_initializer(input_data,
                                                    kinitial).initialize()
        xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025,
                                 splitting_type.BAYESIAN_INFORMATION_CRITERION,
                                 ccore_flag)
        xmeans_instance.process()

        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()

        if len(clusters) != len(centers):
            print(input_data)
            print(initial_centers)

        assertion.ge(kmax, len(clusters))
        assertion.ge(kmax, len(centers))
        assertion.eq(len(clusters), len(centers))
示例#12
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 5  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0])

            if actual_elbow != len(answer.get_clusters()):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
    def templateLengthProcessSpecificData(data_type, path_to_file, radius,
                                          min_number_neighbors,
                                          max_number_neighbors, ccore):
        for _ in range(min_number_neighbors, max_number_neighbors, 1):
            sample = read_sample(path_to_file)

            if data_type == 'distance_matrix':
                input_data = calculate_distance_matrix(sample)
            elif data_type == 'points':
                input_data = sample
            else:
                raise ValueError("Incorrect data type '%s' is specified" %
                                 data_type)

            dbscan_instance = dbscan(input_data,
                                     radius,
                                     min_number_neighbors,
                                     ccore,
                                     data_type=data_type)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()
            noise = dbscan_instance.get_noise()

            length = len(noise)
            length += sum([len(cluster) for cluster in clusters])

            assertion.eq(len(sample), length)
示例#15
0
 def testChiSquareMetric(self):
     metric_instance = metric_wrapper(type_metric.CHI_SQUARE, [], None)
     assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0]))
     assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0]))
     assertion.eq(0.5, metric_instance([0.75, 0.75], [0.25, 0.25]))
     assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0]))
     assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
示例#16
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm,
                        ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample,
                                                  kmin,
                                                  kmax,
                                                  algorithm=algorithm,
                                                  ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
示例#17
0
 def testCanberraMetric(self):
     metric_instance = metric_wrapper(type_metric.CANBERRA, [], None)
     assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0]))
     assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0]))
     assertion.eq(1.0, metric_instance([0.75, 0.75], [0.25, 0.25]))
     assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0]))
     assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
示例#18
0
    def clustering(path, amount, threshold, expected, ccore, **kwargs):
        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        sample = read_sample(path);

        mbsas_instance = mbsas(sample, amount, threshold, ccore=ccore, metric=metric);
        mbsas_instance.process();

        clusters = mbsas_instance.get_clusters();
        representatives = mbsas_instance.get_representatives();

        obtained_length = 0;
        obtained_cluster_length = [];
        for cluster in clusters:
            obtained_length += len(cluster);
            obtained_cluster_length.append(len(cluster));

        assertion.eq(len(sample), obtained_length);
        assertion.eq(len(expected), len(clusters));
        assertion.eq(len(expected), len(representatives));
        assertion.ge(amount, len(clusters));

        dimension = len(sample[0]);
        for rep in representatives:
            assertion.eq(dimension, len(rep));

        expected.sort();
        obtained_cluster_length.sort();

        assertion.eq(expected, obtained_cluster_length);
示例#19
0
 def testBuildGowerDistanceFromMetricWithNumpyData(self):
     metric = distance_metric(type_metric.GOWER,
                              data=numpy.array([[-3.0, -3.0], [-4.0, -3.0],
                                                [-4.5, -3.0], [-5.0,
                                                               -3.0]]))
     ccore_metric = metric_wrapper.create_instance(metric)
     assertion.eq(0.5, ccore_metric([-3.0, -3.0], [-5.0, -3.0]))
示例#20
0
    def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs):
        if isinstance(input_data, str):
            sample = read_sample(input_data)
        else:
            sample = input_data

        numpy_usage = kwargs.get('numpy_usage', False)
        if numpy_usage is True:
            sample = numpy.array(sample)
         
        cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag)
        cure_instance.process()
         
        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assertion.eq(len(clusters), number_cluster)
        assertion.eq(len(representors), number_cluster)
        assertion.eq(len(means), number_cluster)
         
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         
        total_length = sum(obtained_cluster_sizes)
        assertion.eq(total_length, len(sample))
         
        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assertion.eq(cluster_sizes, obtained_cluster_sizes)
示例#21
0
    def templateLengthProcessData(input_sample, start_centers, expected_cluster_length, type_splitting, kmax, ccore, **kwargs):
        if isinstance(input_sample, str):
            sample = read_sample(input_sample)
        else:
            sample = input_sample

        xmeans_instance = xmeans(sample, start_centers, kmax, 0.025, type_splitting, ccore, **kwargs)
        xmeans_instance.process()
         
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()
        wce = xmeans_instance.get_total_wce()
    
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        assertion.eq(len(sample), sum(obtained_cluster_sizes))
        assertion.eq(len(clusters), len(centers))
        assertion.le(len(centers), kmax)

        expected_wce = 0.0
        metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)
        for index_cluster in range(len(clusters)):
            for index_point in clusters[index_cluster]:
                expected_wce += metric(sample[index_point], centers[index_cluster])

        assertion.eq(expected_wce, wce)

        if expected_cluster_length is not None:
            assertion.eq(len(centers), len(expected_cluster_length))

            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            
            assertion.eq(obtained_cluster_sizes, expected_cluster_length)
    def templateClusterAllocationOneDimensionDataSpecificData(
            data_type, ccore_flag):
        for _ in range(50):
            sample = [[random()] for _ in range(10)] + [
                [random() + 3] for _ in range(10)
            ] + [[random() + 6] for _ in range(10)] + [[random() + 9]
                                                       for _ in range(10)]

            if data_type == 'distance_matrix':
                input_data = calculate_distance_matrix(sample)
            elif data_type == 'points':
                input_data = sample
            else:
                raise ValueError("Incorrect data type '%s' is specified" %
                                 data_type)

            dbscan_instance = dbscan(input_data,
                                     1.0,
                                     2,
                                     ccore_flag,
                                     data_type=data_type)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()

            assertion.eq(4, len(clusters))
            for cluster in clusters:
                assertion.eq(10, len(cluster))
示例#23
0
 def testCanberraMetric(self):
     metric_instance = metric_wrapper(type_metric.CANBERRA, [], None)
     assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0]))
     assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0]))
     assertion.eq(1.0, metric_instance([0.75, 0.75], [0.25, 0.25]))
     assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0]))
     assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
示例#24
0
    def correct_processing_data_types(sample_path, answer_path, ccore_flag):
        scores_points = silhouette_test_template.correct_scores(
            sample_path, answer_path, ccore_flag, data_type='points')
        scores_matrix = silhouette_test_template.correct_scores(
            sample_path, answer_path, ccore_flag, data_type='distance_matrix')

        assertion.eq(len(scores_points), len(scores_matrix))
        assertion.eq(scores_points, scores_matrix)
    def template_compare_output(self, path, k, candidates, random_state, metric):
        sample = read_sample(path)
        matrix = calculate_distance_matrix(sample, metric=metric)

        result1 = kmeans_plusplus_initializer(sample, k, candidates, random_state=random_state, data_type='points', metric=metric).initialize(return_index=True)
        result2 = kmeans_plusplus_initializer(matrix, k, candidates, random_state=random_state, data_type='distance_matrix', metric=metric).initialize(return_index=True)

        assertion.eq(result1, result2)
    def templateKmeasPlusPlusCenterInitializer(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize()

        assertion.eq(amount, len(centers))

        for center in centers:
            assertion.eq(len(data[0]), len(center))

        return centers
    def templateKmeasPlusPlusCenterInitializer(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize()

        assertion.eq(amount, len(centers))

        for center in centers:
            assertion.eq(len(data[0]), len(center))

        return centers
    def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs):
        sample = read_sample(path_to_file)
        data_type = kwargs.get('data_type', 'points')
        input_type = kwargs.get('input_type', 'list')
        initialize_medoids = kwargs.get('initialize_medoids', None)
        itermax = kwargs.get('itermax', 200)

        if metric is None:
            metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)

        input_data = sample
        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)

            if input_type == 'numpy':
                input_data = numpy.array(input_data)

        testing_result = False
        testing_attempts = 1
        if initialize_medoids is not None:  # in case center initializer randomization appears
            testing_attempts = 10

        for _ in range(testing_attempts):
            if initialize_medoids is not None:
                initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True)

            kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax)
            kmedoids_instance.process()

            clusters = kmedoids_instance.get_clusters()
            medoids = kmedoids_instance.get_medoids()

            if itermax == 0:
                assertion.eq([], clusters)
                assertion.eq(medoids, initial_medoids)
                return

            if len(clusters) != len(medoids):
                continue

            if len(set(medoids)) != len(medoids):
                continue

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if len(sample) != sum(obtained_cluster_sizes):
                continue

            if expected_cluster_length is not None:
                obtained_cluster_sizes.sort()
                expected_cluster_length.sort()
                if obtained_cluster_sizes != expected_cluster_length:
                    continue

            testing_result = True

        assertion.true(testing_result)
示例#29
0
    def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs):
        sample = read_sample(path_to_file)
        data_type = kwargs.get('data_type', 'points')
        input_type = kwargs.get('input_type', 'list')
        initialize_medoids = kwargs.get('initialize_medoids', None)
        itermax = kwargs.get('itermax', 200)

        if metric is None:
            metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)

        input_data = sample
        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)

            if input_type == 'numpy':
                input_data = numpy.array(input_data)

        testing_result = False
        testing_attempts = 1
        if initialize_medoids is not None:  # in case center initializer randomization appears
            testing_attempts = 10

        for _ in range(testing_attempts):
            if initialize_medoids is not None:
                initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True)

            kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax)
            kmedoids_instance.process()

            clusters = kmedoids_instance.get_clusters()
            medoids = kmedoids_instance.get_medoids()

            if itermax == 0:
                assertion.eq([], clusters)
                assertion.eq(medoids, initial_medoids)
                return

            if len(clusters) != len(medoids):
                continue

            if len(set(medoids)) != len(medoids):
                continue

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if len(sample) != sum(obtained_cluster_sizes):
                continue

            if expected_cluster_length is not None:
                obtained_cluster_sizes.sort()
                expected_cluster_length.sort()
                if obtained_cluster_sizes != expected_cluster_length:
                    continue

            testing_result = True

        assertion.true(testing_result)
示例#30
0
    def test_logical_block_neighbors(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(3)
        assertion.eq(4, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([2, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
        assertion.true([1, 2] in neighbors)
示例#31
0
    def test_logical_block_neighbors(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(3)
        assertion.eq(4, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([2, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
        assertion.true([1, 2] in neighbors)
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
         
        cure_instance = cure(input_data, 4, ccore = ccore_flag)
        cure_instance.process()
        clusters = cure_instance.get_clusters()

        assertion.eq(4, len(clusters))
        for cluster in clusters:
            assertion.eq(10, len(cluster))
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [[random()] for _ in range(10)] + [[random() + 3] for _ in range(10)] + [[random() + 5] for _ in range(10)] + [[random() + 8] for _ in range(10)]

        kmedoids_instance = kmedoids(input_data, [5, 15, 25, 35], 0.025, ccore_flag)
        kmedoids_instance.process()
        clusters = kmedoids_instance.get_clusters()
         
        assertion.eq(4, len(clusters))
        for cluster in clusters:
            assertion.eq(10, len(cluster))
示例#34
0
 def templateClusterAllocationOneDimensionData(ccore_flag):
     input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ];
     
     kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag);
     kmeans_instance.process();
     clusters = kmeans_instance.get_clusters();
     
     assertion.eq(4, len(clusters));
     for cluster in clusters:
         assertion.eq(10, len(cluster));
示例#35
0
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
         
        cure_instance = cure(input_data, 4, ccore = ccore_flag)
        cure_instance.process()
        clusters = cure_instance.get_clusters()

        assertion.eq(4, len(clusters))
        for cluster in clusters:
            assertion.eq(10, len(cluster))
示例#36
0
    def template_correct_scores(self, sample_path, answer_path):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
示例#37
0
 def templateClusterAllocationOneDimensionData(ccore_flag):
     input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
     
     kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag)
     kmeans_instance.process()
     clusters = kmeans_instance.get_clusters()
     
     assertion.eq(4, len(clusters))
     for cluster in clusters:
         assertion.eq(10, len(cluster))
    def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True)

        assertion.eq(amount, len(centers))

        for center_index in centers:
            assertion.gt(len(data), center_index)
            assertion.le(0, center_index)

        return centers
    def correct_scores(sample_path, answer_path, ccore_flag):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
    def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount):
        centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True)

        assertion.eq(amount, len(centers))

        for center_index in centers:
            assertion.gt(len(data), center_index)
            assertion.le(0, center_index)
            assertion.eq(1, centers.count(center_index))

        return centers
示例#41
0
    def testGowerDistanceIntegrity(self):
        a, b = [1.2, 3.4], [1.0, 2.2]
        npa, npb = numpy.array(a), numpy.array(b)

        gower = metric.distance_metric(metric.type_metric.GOWER,
                                       data=[a, b],
                                       numpy_usage=False)
        gower_numpy = metric.distance_metric(metric.type_metric.GOWER,
                                             data=numpy.array([a, b]),
                                             numpy_usage=True)
        assertion.eq(gower(a, b), gower_numpy(npa, npb))
    def templatePredict(path_to_file, initial_medoids, points, expected_closest_clusters, ccore, **kwargs):
        sample = read_sample(path_to_file)

        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
        itermax = kwargs.get('itermax', 200)

        kmedoids_instance = kmedoids(sample, initial_medoids, 0.001, ccore, metric=metric, itermax=itermax)
        kmedoids_instance.process()

        closest_clusters = kmedoids_instance.predict(points)
        assertion.eq(len(expected_closest_clusters), len(closest_clusters))
        assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
示例#43
0
    def templateClusterAllocationTheSameObjects(number_objects, number_clusters, ccore_flag = False):
        value = random()
        input_data = [ [value] ] * number_objects
        
        initial_medoids = []
        step = int(math.floor(number_objects / number_clusters))
        for i in range(number_clusters):
            initial_medoids.append(i * step)
        
        kmedoids_instance = kmedoids(input_data, initial_medoids, ccore=ccore_flag)
        kmedoids_instance.process()
        clusters = kmedoids_instance.get_clusters()
        medoids = kmedoids_instance.get_medoids()

        assertion.eq(len(clusters), len(medoids))
        assertion.eq(len(set(medoids)), len(medoids))
        
        object_mark = [False] * number_objects
        allocated_number_objects = 0
        
        for cluster in clusters:
            for index_object in cluster: 
                assertion.eq(False, object_mark[index_object])    # one object can be in only one cluster.
                
                object_mark[index_object] = True
                allocated_number_objects += 1
            
        assertion.eq(number_objects, allocated_number_objects)    # number of allocated objects should be the same.
示例#44
0
 def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename)
     
     observer = kmeans_observer()
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer)
     kmeans_instance.process()
     
     assertion.le(1, len(observer))
     for i in range(len(observer)):
         assertion.le(1, len(observer.get_centers(i)))
         for center in observer.get_centers(i):
             assertion.eq(len(sample[0]), len(center))
         
         assertion.le(1, len(observer.get_clusters(i)))
示例#45
0
    def templateClusteringWithAnswers(sample_path, answer_path, radius, neighbors, ccore, **kwargs):
        random_order = kwargs.get('random_order', False)
        repeat = kwargs.get('repeat', 1)

        for _ in range(repeat):
            sample = read_sample(sample_path)

            sample_index_map = [ i for i in range(len(sample)) ]
            if random_order:
                shuffle(sample_index_map)

            sample_shuffled = [ sample[i] for i in sample_index_map ]

            dbscan_instance = dbscan(sample_shuffled, radius, neighbors, ccore)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()
            noise = dbscan_instance.get_noise()

            for cluster in clusters:
                for i in range(len(cluster)):
                    cluster[i] = sample_index_map[cluster[i]]

            for i in range(len(noise)):
                noise[i] = sample_index_map[noise[i]]
            noise = sorted(noise)

            reader = answer_reader(answer_path)
            expected_noise = sorted(reader.get_noise())
            expected_length_clusters = reader.get_cluster_lengths()

            assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise))
            assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters]))
            assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
            assertion.eq(expected_noise, noise)
示例#46
0
    def test_logical_block_neighbors_on_edge(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)

        block.logical_location = [0, 0]
        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
示例#47
0
    def templateEncoderProcedures(ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
        
        cure_instance = cure(sample, 4, 5, 0.5, ccore = ccore_flag)
        cure_instance.process()
        
        clusters = cure_instance.get_clusters()
        encoding = cure_instance.get_cluster_encoding()
        
        encoder = cluster_encoder(encoding, clusters, sample)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
        encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)

        assertion.eq(4, len(clusters))
示例#48
0
 def templateEncoderProcedures(filename, initial_centers, number_clusters, ccore_flag):
     sample = read_sample(filename)
     
     kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag)
     kmeans_instance.process()
     
     clusters = kmeans_instance.get_clusters()
     encoding = kmeans_instance.get_cluster_encoding()
     
     encoder = cluster_encoder(encoding, clusters, sample)
     encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
     encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
     encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)
     
     assertion.eq(number_clusters, len(clusters))
示例#49
0
    def testChiSquareMetric(self):
        metric_instance = metric_wrapper(type_metric.CHI_SQUARE, [], None)
        assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0]))
        assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0]))
        assertion.eq(0.5, metric_instance([0.75, 0.75], [0.25, 0.25]))
        assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0]))
        assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))


    # TODO: doesn't work for some platforms.
    #def testUserDefinedMetric(self):
    #    user_metric = lambda p1, p2 : p1[0] + p2[0];
    #    metric_instance = metric_wrapper(type_metric.USER_DEFINED, [], user_metric);
    #    assertion.eq(2.0, metric_instance([0.0, 0.0], [2.0, 0.0]));
    #    assertion.eq(4.0, metric_instance([3.0, 2.0], [1.0, 5.0]));
示例#50
0
    def templateClusterAllocationOneDimensionDataSpecificData(data_type, ccore_flag):
        for _ in range(50):
            sample = [[random()] for _ in range(10)] + [[random() + 3] for _ in range(10)] + [[random() + 6] for _ in range(10)] + [[random() + 9] for _ in range(10)]

            if data_type == 'distance_matrix':
                input_data = calculate_distance_matrix(sample)
            elif data_type == 'points':
                input_data = sample
            else:
                raise ValueError("Incorrect data type '%s' is specified" % data_type)

            dbscan_instance = dbscan(input_data, 1.0, 2, ccore_flag, data_type=data_type)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()

            assertion.eq(4, len(clusters))
            for cluster in clusters:
                assertion.eq(10, len(cluster))
示例#51
0
    def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster, offset, kinitial, kmax):
        input_data = []
        for index in range(amount_clusters):
            for _ in range(size_cluster):
                input_data.append([random.random() * index * offset, random.random() * index * offset])
        
        initial_centers = random_center_initializer(input_data, kinitial).initialize()
        xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag)
        xmeans_instance.process()
        
        clusters = xmeans_instance.get_clusters()
        centers = xmeans_instance.get_centers()

        if len(clusters) != len(centers):
            print(input_data)
            print(initial_centers)

        assertion.ge(kmax, len(clusters))
        assertion.ge(kmax, len(centers))
        assertion.eq(len(clusters), len(centers))
示例#52
0
 def templateAllocateRequestedClusterAmount(data, amount_clusters, initial_medoids, ccore_flag):
     if initial_medoids is None:
         initial_medoids = []
         for _ in range(amount_clusters):
             index_point = randint(0, len(data) - 1)
             while (index_point in initial_medoids):
                 index_point = randint(0, len(data) - 1)
             
             initial_medoids.append(index_point)
         
     kmedoids_instance = kmedoids(data, initial_medoids, 0.025, ccore = ccore_flag)
     kmedoids_instance.process()
     clusters = kmedoids_instance.get_clusters()
     
     assertion.eq(len(clusters), amount_clusters)
     amount_objects = 0
     for cluster in clusters:
         amount_objects += len(cluster)
     
     assertion.eq(amount_objects, len(data))
示例#53
0
    def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag):
        result_testing = False
        color_segments, object_segments = [], []

        for _ in range(0, 10, 1):
            algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag)
            analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995)
            
            color_segments = analyser.allocate_colors()
            object_segments = analyser.allocate_objects(0.2)

            if (len(color_segments) != expected_color_segments) or (len(object_segments) != expected_object_segments):
                continue
            
            result_testing = True
            break

        assertion.eq(expected_color_segments, len(color_segments))
        assertion.eq(expected_object_segments, len(object_segments))
        assertion.true(result_testing)
示例#54
0
    def templateLengthProcessSpecificData(data_type, path_to_file, radius, min_number_neighbors, max_number_neighbors, ccore):
        for _ in range(min_number_neighbors, max_number_neighbors, 1):
            sample = read_sample(path_to_file)

            if data_type == 'distance_matrix':
                input_data = calculate_distance_matrix(sample)
            elif data_type == 'points':
                input_data = sample
            else:
                raise ValueError("Incorrect data type '%s' is specified" % data_type)

            dbscan_instance = dbscan(input_data, radius, min_number_neighbors, ccore, data_type=data_type)
            dbscan_instance.process()

            clusters = dbscan_instance.get_clusters()
            noise = dbscan_instance.get_noise()

            length = len(noise)
            length += sum([len(cluster) for cluster in clusters])

            assertion.eq(len(sample), length)
示例#55
0
    def templateClusteringResultsSpecificData(data_type, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore):
        sample = read_sample(path)

        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)
        else:
            input_data = sample

        optics_instance = optics(input_data, radius, neighbors, amount_clusters, ccore, data_type=data_type)
        optics_instance.process()

        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()
        optics_objects = optics_instance.get_optics_objects()

        object_indexes = set( [ obj.index_object for obj in optics_objects ] )
        assertion.eq(len(optics_objects), len(object_indexes))
        for obj in optics_objects:
            if obj.core_distance is not None:
                assertion.ge(obj.core_distance, 0)

            if obj.reachability_distance is not None:
                assertion.ge(obj.reachability_distance, 0)

        assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample)
        assert len(clusters) == len(expected_length_clusters)
        assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters)
        assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters)

        if amount_clusters is not None:
            analyser = ordering_analyser(optics_instance.get_ordering())
            assert len(analyser) > 0

            amount_clusters, borders = analyser.extract_cluster_amount(optics_instance.get_radius())
            assert amount_clusters == len(expected_length_clusters)
            assert len(borders) == amount_clusters - 1
示例#56
0
    def templateClusteringDistanceMatrix(path_to_file, radius, neighbors, expected_length_clusters, ccore):
        sample = read_sample(path_to_file)
        distance_matrix = calculate_distance_matrix(sample)

        dbscan_instance = dbscan(distance_matrix, radius, neighbors, ccore, data_type='distance_matrix')
        dbscan_instance.process()

        clusters = dbscan_instance.get_clusters()
        noise = dbscan_instance.get_noise()

        assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise))
        assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters]))
        assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
示例#57
0
    def templateClusteringResults(path, radius, neighbors, expected_length_clusters, ccore, **kwargs):
        random_order = kwargs.get('random_order', False)

        sample = read_sample(path)
        if random_order:
            shuffle(sample)
         
        dbscan_instance = dbscan(sample, radius, neighbors, ccore)
        dbscan_instance.process()
         
        clusters = dbscan_instance.get_clusters()
        noise = dbscan_instance.get_noise()

        assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise))
        assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters]))
        assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
示例#58
0
 def testChebyshevMetric(self):
     metric_instance = metric_wrapper(type_metric.CHEBYSHEV, [], None)
     assertion.eq(4.0, metric_instance([1.0, 4.0], [0.0, 0.0]))
示例#59
0
 def testManhattanMetric(self):
     metric_instance = metric_wrapper(type_metric.MANHATTAN, [], None)
     assertion.eq(3.0, metric_instance([1.0, 2.0], [0.0, 0.0]))
示例#60
0
 def testMinkowskiMetric(self):
     metric_instance = metric_wrapper(type_metric.MINKOWSKI, [2.0], None)
     assertion.eq(2.0, metric_instance([0.0, 0.0], [2.0, 0.0]))