Пример #1
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
Пример #2
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm,
                        ccore_flag):
        attempts = 15
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample,
                                                  kmin,
                                                  kmax,
                                                  algorithm=algorithm,
                                                  ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Пример #3
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 5  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0])

            if actual_elbow != len(answer.get_clusters()):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Пример #4
0
    def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag):
        attempts = 10
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            upper_limit = len(clusters) + 1
            lower_limit = len(clusters) - 1
            if lower_limit < 1:
                lower_limit = 1

            if (amount > upper_limit) or (amount < lower_limit):
                continue

            testing_result = True
            break

        assertion.true(testing_result)
Пример #5
0
    def templateSyncsegmSegmentation(image_source, radius_color, radius_object,
                                     noise_size, expected_color_segments,
                                     expected_object_segments, collect_dynamic,
                                     ccore_flag):
        result_testing = False
        color_segments, object_segments = [], []

        for _ in range(0, 10, 1):
            algorithm = syncsegm(radius_color,
                                 radius_object,
                                 noise_size,
                                 ccore=ccore_flag)
            analyser = algorithm.process(image_source, collect_dynamic, 0.9995,
                                         0.9995)

            color_segments = analyser.allocate_colors()
            object_segments = analyser.allocate_objects(0.2)

            if (len(color_segments) != expected_color_segments) or (
                    len(object_segments) != expected_object_segments):
                continue

            result_testing = True
            break

        assertion.eq(expected_color_segments, len(color_segments))
        assertion.eq(expected_object_segments, len(object_segments))
        assertion.true(result_testing)
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs):
        repeat = 10  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False

        initializer = kwargs.get('initializer', kmeans_plusplus_initializer)

        sample = read_sample(path_to_data)
        answer = answer_reader(path_to_answer)

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce), kmax - kmin)
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if actual_elbow != len(answer.get_clusters()):
                additional_info.append(actual_elbow)
                #time.sleep(0.05)    # sleep to gain new seed for random generator
                continue

            testing_result = True
            break

        message = str(len(answer.get_clusters())) + ": " + str(additional_info)
        assertion.true(testing_result, message=message)
Пример #7
0
    def templateLengthProcessWithMetric(path_to_file, initial_medoids,
                                        expected_cluster_length, metric,
                                        ccore_flag, **kwargs):
        sample = read_sample(path_to_file)
        data_type = kwargs.get('data_type', 'points')
        input_type = kwargs.get('input_type', 'list')
        initialize_medoids = kwargs.get('initialize_medoids', None)

        if metric is None:
            metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)

        input_data = sample
        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)

            if input_type == 'numpy':
                input_data = numpy.array(input_data)

        testing_result = False
        testing_attempts = 1
        if initialize_medoids is not None:  # in case center initializer randomization appears
            testing_attempts = 10

        for _ in range(testing_attempts):
            if initialize_medoids is not None:
                initial_medoids = kmeans_plusplus_initializer(
                    sample, initialize_medoids).initialize(return_index=True)

            kmedoids_instance = kmedoids(input_data,
                                         initial_medoids,
                                         0.025,
                                         ccore_flag,
                                         metric=metric,
                                         data_type=data_type)
            kmedoids_instance.process()

            clusters = kmedoids_instance.get_clusters()
            medoids = kmedoids_instance.get_medoids()

            if len(clusters) != len(medoids):
                continue

            if len(set(medoids)) != len(medoids):
                continue

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if len(sample) != sum(obtained_cluster_sizes):
                continue

            if expected_cluster_length is not None:
                obtained_cluster_sizes.sort()
                expected_cluster_length.sort()
                if obtained_cluster_sizes != expected_cluster_length:
                    continue

            testing_result = True

        assertion.true(testing_result)
Пример #8
0
    def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs):
        sample = read_sample(path_to_file)
        data_type = kwargs.get('data_type', 'points')
        input_type = kwargs.get('input_type', 'list')
        initialize_medoids = kwargs.get('initialize_medoids', None)
        itermax = kwargs.get('itermax', 200)

        if metric is None:
            metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)

        input_data = sample
        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample)

            if input_type == 'numpy':
                input_data = numpy.array(input_data)

        testing_result = False
        testing_attempts = 1
        if initialize_medoids is not None:  # in case center initializer randomization appears
            testing_attempts = 10

        for _ in range(testing_attempts):
            if initialize_medoids is not None:
                initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True)

            kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax)
            kmedoids_instance.process()

            clusters = kmedoids_instance.get_clusters()
            medoids = kmedoids_instance.get_medoids()

            if itermax == 0:
                assertion.eq([], clusters)
                assertion.eq(medoids, initial_medoids)
                return

            if len(clusters) != len(medoids):
                continue

            if len(set(medoids)) != len(medoids):
                continue

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if len(sample) != sum(obtained_cluster_sizes):
                continue

            if expected_cluster_length is not None:
                obtained_cluster_sizes.sort()
                expected_cluster_length.sort()
                if obtained_cluster_sizes != expected_cluster_length:
                    continue

            testing_result = True

        assertion.true(testing_result)
Пример #9
0
    def clustering_with_answer(data_file, answer_file, ccore, **kwargs):
        data = read_sample(data_file)
        reader = answer_reader(answer_file)

        amount_medoids = len(reader.get_clusters())

        initial_medoids = kmeans_plusplus_initializer(
            data, amount_medoids, **kwargs).initialize(return_index=True)
        kmedoids_instance = kmedoids(data, initial_medoids, 0.001, ccore,
                                     **kwargs)

        kmedoids_instance.process()

        clusters = kmedoids_instance.get_clusters()
        medoids = kmedoids_instance.get_medoids()

        expected_length_clusters = sorted(reader.get_cluster_lengths())

        assertion.eq(len(expected_length_clusters), len(medoids))
        assertion.eq(len(data), sum([len(cluster) for cluster in clusters]))
        assertion.eq(sum(expected_length_clusters),
                     sum([len(cluster) for cluster in clusters]))

        unique_medoids = set()
        for medoid in medoids:
            assertion.false(
                medoid in unique_medoids,
                message="Medoids '%s' is not unique (actual medoids: '%s')" %
                (str(medoid), str(unique_medoids)))
            unique_medoids.add(medoid)

        unique_points = set()
        for cluster in clusters:
            for point in cluster:
                assertion.false(
                    point in unique_points,
                    message=
                    "Point '%s' is already assigned to one of the clusters." %
                    str(point))
                unique_points.add(point)

        assertion.eq(expected_length_clusters,
                     sorted([len(cluster) for cluster in clusters]))

        expected_clusters = reader.get_clusters()
        for actual_cluster in clusters:
            cluster_found = False
            for expected_cluster in expected_clusters:
                if actual_cluster == expected_cluster:
                    cluster_found = True

            assertion.true(
                cluster_found,
                message="Actual cluster '%s' is not found among expected." %
                str(actual_cluster))
Пример #10
0
    def templatePredict(path_to_file, initial_medoids, points, expected_closest_clusters, ccore, **kwargs):
        sample = read_sample(path_to_file)

        metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
        itermax = kwargs.get('itermax', 200)

        kmedoids_instance = kmedoids(sample, initial_medoids, 0.001, ccore, metric=metric, itermax=itermax)
        kmedoids_instance.process()

        closest_clusters = kmedoids_instance.predict(points)
        assertion.eq(len(expected_closest_clusters), len(closest_clusters))
        assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
Пример #11
0
    def templatePredict(path_to_file, initial_centers, points, expected_amount, expected_closest_clusters, ccore, **kwargs):
        sample = read_sample(path_to_file)

        kmax = kwargs.get('kmax', 20)

        xmeans_instance = xmeans(sample, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore)
        xmeans_instance.process()

        closest_clusters = xmeans_instance.predict(points)
        assertion.eq(expected_amount, len(xmeans_instance.get_clusters()))
        assertion.eq(len(expected_closest_clusters), len(closest_clusters))
        assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
Пример #12
0
    def test_logical_block_neighbors(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(3)
        assertion.eq(4, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([2, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
        assertion.true([1, 2] in neighbors)
Пример #13
0
    def test_logical_block_neighbors(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(3)
        assertion.eq(4, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([2, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
        assertion.true([1, 2] in neighbors)
Пример #14
0
    def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore,
                        **kwargs):
        repeat = 15  # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails.
        testing_result = False
        kstep = kwargs.get('kstep', 1)

        sample = read_sample(path_to_data)

        expected_clusters_amount = None
        if path_to_answer is not None:
            if isinstance(path_to_answer, int):
                expected_clusters_amount = path_to_answer
            else:
                expected_clusters_amount = len(
                    answer_reader(path_to_answer).get_clusters())

        additional_info = []

        for _ in range(repeat):
            elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, **kwargs)
            elbow_instance.process()

            actual_elbow = elbow_instance.get_amount()
            actual_wce = elbow_instance.get_wce()

            assertion.gt(actual_elbow, kmin)
            assertion.lt(actual_elbow, kmax)
            assertion.eq(len(actual_wce),
                         math.floor((kmax - kmin) / kstep + 1))
            assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001)

            if (expected_clusters_amount is not None) and (
                    actual_elbow != expected_clusters_amount):
                additional_info.append(actual_elbow)
                continue

            testing_result = True
            break

        message = None
        if expected_clusters_amount is not None:
            message = str(expected_clusters_amount) + ": " + str(
                additional_info)

        assertion.true(testing_result, message=message)
Пример #15
0
    def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag):
        result_testing = False
        color_segments, object_segments = [], []

        for _ in range(0, 10, 1):
            algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag)
            analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995)
            
            color_segments = analyser.allocate_colors()
            object_segments = analyser.allocate_objects(0.2)

            if (len(color_segments) != expected_color_segments) or (len(object_segments) != expected_object_segments):
                continue
            
            result_testing = True
            break

        assertion.eq(expected_color_segments, len(color_segments))
        assertion.eq(expected_object_segments, len(object_segments))
        assertion.true(result_testing)
Пример #16
0
    def test_logical_block_neighbors_on_edge(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)

        block.logical_location = [0, 0]
        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
Пример #17
0
    def test_logical_block_neighbors_on_edge(self):
        block = clique_block()
        block.logical_location = [1, 1]

        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)

        block.logical_location = [0, 0]
        neighbors = block.get_location_neighbors(2)
        assertion.eq(2, len(neighbors))
        assertion.true([0, 1] in neighbors)
        assertion.true([1, 0] in neighbors)
Пример #18
0
    def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax,
                                 algorithm):
        attempts = 5
        testing_result = False

        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        for _ in range(attempts):
            ksearch_instance = silhouette_ksearch(
                sample, kmin, kmax, algorithm=algorithm).process()
            amount = ksearch_instance.get_amount()
            score = ksearch_instance.get_score()
            scores = ksearch_instance.get_scores()

            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
            assertion.eq(kmax - kmin, len(scores))

            if amount != len(clusters): continue
            testing_result = True
            break

        assertion.true(testing_result)
Пример #19
0
    def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs):
        sample = read_sample(path_to_file)
        data_type = kwargs.get('data_type', 'points')
        input_type = kwargs.get('input_type', 'list')
        initialize_medoids = kwargs.get('initialize_medoids', None)
        itermax = kwargs.get('itermax', 200)

        if metric is None:
            metric = distance_metric(type_metric.EUCLIDEAN_SQUARE)

        input_data = sample
        if data_type == 'distance_matrix':
            input_data = calculate_distance_matrix(sample, metric)

            if input_type == 'numpy':
                input_data = numpy.array(input_data)

        testing_result = False
        testing_attempts = 1
        if initialize_medoids is not None:  # in case center initializer randomization appears
            testing_attempts = 10

        for _ in range(testing_attempts):
            if initialize_medoids is not None:
                initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True)

            kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore=ccore_flag, metric=metric, data_type=data_type, itermax=itermax)
            kmedoids_instance.process()

            clusters = kmedoids_instance.get_clusters()
            medoids = kmedoids_instance.get_medoids()

            if itermax == 0:
                assertion.eq(0, kmedoids_instance.get_iterations())
                assertion.eq(0.0, kmedoids_instance.get_total_deviation())
                assertion.eq([], clusters)
                assertion.eq(medoids, initial_medoids)
                return

            if len(clusters) != len(medoids):
                continue

            if len(set(medoids)) != len(medoids):
                continue

            obtained_cluster_sizes = [len(cluster) for cluster in clusters]
            if len(sample) != sum(obtained_cluster_sizes):
                continue

            for cluster in clusters:
                if len(cluster) == 0:
                    continue

            if expected_cluster_length is not None:
                obtained_cluster_sizes.sort()
                expected_cluster_length.sort()
                if obtained_cluster_sizes != expected_cluster_length:
                    continue

            assertion.gt(kmedoids_instance.get_iterations(), 0)

            expected_total_deviation = 0.0
            for index_cluster in range(len(clusters)):
                index_point_medoid = medoids[index_cluster]
                for index_point in clusters[index_cluster]:
                    if index_point == index_point_medoid:
                        continue

                    expected_total_deviation += metric(sample[index_point_medoid], sample[index_point])

            assertion.eq_float(expected_total_deviation, kmedoids_instance.get_total_deviation(), 0.000001)

            labels = kmedoids_instance.get_labels()
            assertion.eq(len(sample), len(labels))
            for index_point in range(len(labels)):
                actual_index_cluster = labels[index_point]
                assertion.true(index_point in clusters[actual_index_cluster])

            testing_result = True

        assertion.true(testing_result)