def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag): result_testing = False color_segments, object_segments = [], [] for _ in range(0, 10, 1): algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag) analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995) color_segments = analyser.allocate_colors() object_segments = analyser.allocate_objects(0.2) if (len(color_segments) != expected_color_segments) or ( len(object_segments) != expected_object_segments): continue result_testing = True break assertion.eq(expected_color_segments, len(color_segments)) assertion.eq(expected_object_segments, len(object_segments)) assertion.true(result_testing)
def clustering(path, threshold1, threshold2, expected, ccore, **kwargs): metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN)) sample = read_sample(path) ttsas_instance = ttsas(sample, threshold1, threshold2, ccore=ccore, metric=metric) ttsas_instance.process() clusters = ttsas_instance.get_clusters() representatives = ttsas_instance.get_representatives() obtained_length = 0 obtained_cluster_length = [] for cluster in clusters: obtained_length += len(cluster) obtained_cluster_length.append(len(cluster)) assertion.eq(len(sample), obtained_length) assertion.eq(len(expected), len(clusters)) assertion.eq(len(expected), len(representatives)) assertion.ge(len(sample), len(clusters)) dimension = len(sample[0]) for rep in representatives: assertion.eq(dimension, len(rep)) expected.sort() obtained_cluster_length.sort() assertion.eq(expected, obtained_cluster_length)
def templateAllocateRequestedClusterAmount(data, amount_clusters, initial_medoids, ccore_flag): if (initial_medoids is None): initial_medoids = [] for _ in range(amount_clusters): index_point = randint(0, len(data) - 1) while (index_point in initial_medoids): index_point = randint(0, len(data) - 1) initial_medoids.append(index_point) kmedoids_instance = kmedoids(data, initial_medoids, 0.025, ccore=ccore_flag) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() assertion.eq(len(clusters), amount_clusters) amount_objects = 0 for cluster in clusters: amount_objects += len(cluster) assertion.eq(amount_objects, len(data))
def clustering(path, amount, threshold, expected, ccore, **kwargs): metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN)); sample = read_sample(path); bsas_instance = bsas(sample, amount, threshold, ccore=ccore, metric=metric); bsas_instance.process(); clusters = bsas_instance.get_clusters(); representatives = bsas_instance.get_representatives(); obtained_length = 0; obtained_cluster_length = []; for cluster in clusters: obtained_length += len(cluster); obtained_cluster_length.append(len(cluster)); assertion.eq(len(sample), obtained_length); assertion.eq(len(expected), len(clusters)); assertion.eq(len(expected), len(representatives)); assertion.ge(amount, len(clusters)); dimension = len(sample[0]); for rep in representatives: assertion.eq(dimension, len(rep)); expected.sort(); obtained_cluster_length.sort(); assertion.eq(expected, obtained_cluster_length);
def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs): if isinstance(input_data, str): sample = read_sample(input_data) else: sample = input_data numpy_usage = kwargs.get('numpy_usage', False) if numpy_usage is True: sample = numpy.array(sample) cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag) cure_instance.process() clusters = cure_instance.get_clusters() representors = cure_instance.get_representors() means = cure_instance.get_means() assertion.eq(len(clusters), number_cluster) assertion.eq(len(representors), number_cluster) assertion.eq(len(means), number_cluster) obtained_cluster_sizes = [len(cluster) for cluster in clusters] total_length = sum(obtained_cluster_sizes) assertion.eq(total_length, len(sample)) cluster_sizes.sort() obtained_cluster_sizes.sort() assertion.eq(cluster_sizes, obtained_cluster_sizes)
def random_sampling(data, n, algorithm, repeat, ccore=True): for _ in range(repeat): sample = algorithm(data, n) unique_values = set(sample) assertion.eq(n, len(sample)) assertion.eq(len(unique_values), len(sample))
def testMinkowskiDistance(self): assertion.eq(0.0, metric.minkowski_distance([0], [0])) assertion.eq(0.0, metric.minkowski_distance([0], [0], 2)) assertion.eq(-2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0], 1)) assertion.eq(2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0], 2)) assertion.eq(2.0, metric.minkowski_distance([3.0, 3.0], [5.0, 3.0], 4))
def testManhattanDistance(self): assertion.eq(0.0, metric.manhattan_distance([0], [0])) assertion.eq(1.0, metric.manhattan_distance([0.0, 1.0], [0.0, 0.0])) assertion.eq(2.0, metric.manhattan_distance([1.0, 1.0], [0.0, 0.0])) assertion.eq(4.0, metric.manhattan_distance([1.0, 1.0], [-1.0, -1.0])) assertion.eq(2.0, metric.manhattan_distance([-1.0, -1.0], [-2.0, -2.0]))
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 10 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) additional_info = [] for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001) if actual_elbow != len(answer.get_clusters()): additional_info.append(actual_elbow) #time.sleep(0.05) # sleep to gain new seed for random generator continue testing_result = True break message = str(len(answer.get_clusters())) + ": " + str(additional_info) assertion.true(testing_result, message=message)
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 10 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) additional_info = [] for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0] + 0.0000001) if actual_elbow != len(answer.get_clusters()): additional_info.append(actual_elbow) #time.sleep(0.05) # sleep to gain new seed for random generator continue testing_result = True break message = str(len(answer.get_clusters())) + ": " + str(additional_info) assertion.true(testing_result, message=message)
def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster, offset, kinitial, kmax): input_data = [] for index in range(amount_clusters): for _ in range(size_cluster): input_data.append([ random.random() * index * offset, random.random() * index * offset ]) initial_centers = random_center_initializer(input_data, kinitial).initialize() xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() if len(clusters) != len(centers): print(input_data) print(initial_centers) assertion.ge(kmax, len(clusters)) assertion.ge(kmax, len(centers)) assertion.eq(len(clusters), len(centers))
def calculate_elbow(path_to_data, path_to_answer, kmin, kmax, ccore, **kwargs): repeat = 5 # Elbow method randomly chooses initial centers therefore we need to repeat test if it fails. testing_result = False initializer = kwargs.get('initializer', kmeans_plusplus_initializer) sample = read_sample(path_to_data) answer = answer_reader(path_to_answer) for _ in range(repeat): elbow_instance = elbow(sample, kmin, kmax, ccore=ccore, initializer=initializer) elbow_instance.process() actual_elbow = elbow_instance.get_amount() actual_wce = elbow_instance.get_wce() assertion.gt(actual_elbow, kmin) assertion.lt(actual_elbow, kmax) assertion.eq(len(actual_wce), kmax - kmin) assertion.lt(actual_wce[-1], actual_wce[0]) if actual_elbow != len(answer.get_clusters()): continue testing_result = True break assertion.true(testing_result)
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 10 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def templateLengthProcessSpecificData(data_type, path_to_file, radius, min_number_neighbors, max_number_neighbors, ccore): for _ in range(min_number_neighbors, max_number_neighbors, 1): sample = read_sample(path_to_file) if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) elif data_type == 'points': input_data = sample else: raise ValueError("Incorrect data type '%s' is specified" % data_type) dbscan_instance = dbscan(input_data, radius, min_number_neighbors, ccore, data_type=data_type) dbscan_instance.process() clusters = dbscan_instance.get_clusters() noise = dbscan_instance.get_noise() length = len(noise) length += sum([len(cluster) for cluster in clusters]) assertion.eq(len(sample), length)
def testChiSquareMetric(self): metric_instance = metric_wrapper(type_metric.CHI_SQUARE, [], None) assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0])) assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0])) assertion.eq(0.5, metric_instance([0.75, 0.75], [0.25, 0.25])) assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0])) assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 15 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def testCanberraMetric(self): metric_instance = metric_wrapper(type_metric.CANBERRA, [], None) assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0])) assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0])) assertion.eq(1.0, metric_instance([0.75, 0.75], [0.25, 0.25])) assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0])) assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
def clustering(path, amount, threshold, expected, ccore, **kwargs): metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN)); sample = read_sample(path); mbsas_instance = mbsas(sample, amount, threshold, ccore=ccore, metric=metric); mbsas_instance.process(); clusters = mbsas_instance.get_clusters(); representatives = mbsas_instance.get_representatives(); obtained_length = 0; obtained_cluster_length = []; for cluster in clusters: obtained_length += len(cluster); obtained_cluster_length.append(len(cluster)); assertion.eq(len(sample), obtained_length); assertion.eq(len(expected), len(clusters)); assertion.eq(len(expected), len(representatives)); assertion.ge(amount, len(clusters)); dimension = len(sample[0]); for rep in representatives: assertion.eq(dimension, len(rep)); expected.sort(); obtained_cluster_length.sort(); assertion.eq(expected, obtained_cluster_length);
def testBuildGowerDistanceFromMetricWithNumpyData(self): metric = distance_metric(type_metric.GOWER, data=numpy.array([[-3.0, -3.0], [-4.0, -3.0], [-4.5, -3.0], [-5.0, -3.0]])) ccore_metric = metric_wrapper.create_instance(metric) assertion.eq(0.5, ccore_metric([-3.0, -3.0], [-5.0, -3.0]))
def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs): if isinstance(input_data, str): sample = read_sample(input_data) else: sample = input_data numpy_usage = kwargs.get('numpy_usage', False) if numpy_usage is True: sample = numpy.array(sample) cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag) cure_instance.process() clusters = cure_instance.get_clusters() representors = cure_instance.get_representors() means = cure_instance.get_means() assertion.eq(len(clusters), number_cluster) assertion.eq(len(representors), number_cluster) assertion.eq(len(means), number_cluster) obtained_cluster_sizes = [len(cluster) for cluster in clusters] total_length = sum(obtained_cluster_sizes) assertion.eq(total_length, len(sample)) cluster_sizes.sort() obtained_cluster_sizes.sort() assertion.eq(cluster_sizes, obtained_cluster_sizes)
def templateLengthProcessData(input_sample, start_centers, expected_cluster_length, type_splitting, kmax, ccore, **kwargs): if isinstance(input_sample, str): sample = read_sample(input_sample) else: sample = input_sample xmeans_instance = xmeans(sample, start_centers, kmax, 0.025, type_splitting, ccore, **kwargs) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() wce = xmeans_instance.get_total_wce() obtained_cluster_sizes = [len(cluster) for cluster in clusters] assertion.eq(len(sample), sum(obtained_cluster_sizes)) assertion.eq(len(clusters), len(centers)) assertion.le(len(centers), kmax) expected_wce = 0.0 metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) for index_cluster in range(len(clusters)): for index_point in clusters[index_cluster]: expected_wce += metric(sample[index_point], centers[index_cluster]) assertion.eq(expected_wce, wce) if expected_cluster_length is not None: assertion.eq(len(centers), len(expected_cluster_length)) obtained_cluster_sizes.sort() expected_cluster_length.sort() assertion.eq(obtained_cluster_sizes, expected_cluster_length)
def templateClusterAllocationOneDimensionDataSpecificData( data_type, ccore_flag): for _ in range(50): sample = [[random()] for _ in range(10)] + [ [random() + 3] for _ in range(10) ] + [[random() + 6] for _ in range(10)] + [[random() + 9] for _ in range(10)] if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) elif data_type == 'points': input_data = sample else: raise ValueError("Incorrect data type '%s' is specified" % data_type) dbscan_instance = dbscan(input_data, 1.0, 2, ccore_flag, data_type=data_type) dbscan_instance.process() clusters = dbscan_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def testCanberraMetric(self): metric_instance = metric_wrapper(type_metric.CANBERRA, [], None) assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0])) assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0])) assertion.eq(1.0, metric_instance([0.75, 0.75], [0.25, 0.25])) assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0])) assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0]))
def correct_processing_data_types(sample_path, answer_path, ccore_flag): scores_points = silhouette_test_template.correct_scores( sample_path, answer_path, ccore_flag, data_type='points') scores_matrix = silhouette_test_template.correct_scores( sample_path, answer_path, ccore_flag, data_type='distance_matrix') assertion.eq(len(scores_points), len(scores_matrix)) assertion.eq(scores_points, scores_matrix)
def template_compare_output(self, path, k, candidates, random_state, metric): sample = read_sample(path) matrix = calculate_distance_matrix(sample, metric=metric) result1 = kmeans_plusplus_initializer(sample, k, candidates, random_state=random_state, data_type='points', metric=metric).initialize(return_index=True) result2 = kmeans_plusplus_initializer(matrix, k, candidates, random_state=random_state, data_type='distance_matrix', metric=metric).initialize(return_index=True) assertion.eq(result1, result2)
def templateKmeasPlusPlusCenterInitializer(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize() assertion.eq(amount, len(centers)) for center in centers: assertion.eq(len(data[0]), len(center)) return centers
def templateKmeasPlusPlusCenterInitializer(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize() assertion.eq(amount, len(centers)) for center in centers: assertion.eq(len(data[0]), len(center)) return centers
def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs): sample = read_sample(path_to_file) data_type = kwargs.get('data_type', 'points') input_type = kwargs.get('input_type', 'list') initialize_medoids = kwargs.get('initialize_medoids', None) itermax = kwargs.get('itermax', 200) if metric is None: metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) input_data = sample if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) if input_type == 'numpy': input_data = numpy.array(input_data) testing_result = False testing_attempts = 1 if initialize_medoids is not None: # in case center initializer randomization appears testing_attempts = 10 for _ in range(testing_attempts): if initialize_medoids is not None: initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True) kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() if itermax == 0: assertion.eq([], clusters) assertion.eq(medoids, initial_medoids) return if len(clusters) != len(medoids): continue if len(set(medoids)) != len(medoids): continue obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue if expected_cluster_length is not None: obtained_cluster_sizes.sort() expected_cluster_length.sort() if obtained_cluster_sizes != expected_cluster_length: continue testing_result = True assertion.true(testing_result)
def templateLengthProcessWithMetric(path_to_file, initial_medoids, expected_cluster_length, metric, ccore_flag, **kwargs): sample = read_sample(path_to_file) data_type = kwargs.get('data_type', 'points') input_type = kwargs.get('input_type', 'list') initialize_medoids = kwargs.get('initialize_medoids', None) itermax = kwargs.get('itermax', 200) if metric is None: metric = distance_metric(type_metric.EUCLIDEAN_SQUARE) input_data = sample if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) if input_type == 'numpy': input_data = numpy.array(input_data) testing_result = False testing_attempts = 1 if initialize_medoids is not None: # in case center initializer randomization appears testing_attempts = 10 for _ in range(testing_attempts): if initialize_medoids is not None: initial_medoids = kmeans_plusplus_initializer(sample, initialize_medoids).initialize(return_index=True) kmedoids_instance = kmedoids(input_data, initial_medoids, 0.001, ccore_flag, metric=metric, data_type=data_type, itermax=itermax) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() if itermax == 0: assertion.eq([], clusters) assertion.eq(medoids, initial_medoids) return if len(clusters) != len(medoids): continue if len(set(medoids)) != len(medoids): continue obtained_cluster_sizes = [len(cluster) for cluster in clusters] if len(sample) != sum(obtained_cluster_sizes): continue if expected_cluster_length is not None: obtained_cluster_sizes.sort() expected_cluster_length.sort() if obtained_cluster_sizes != expected_cluster_length: continue testing_result = True assertion.true(testing_result)
def test_logical_block_neighbors(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(3) assertion.eq(4, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([2, 1] in neighbors) assertion.true([1, 0] in neighbors) assertion.true([1, 2] in neighbors)
def test_logical_block_neighbors(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(3) assertion.eq(4, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([2, 1] in neighbors) assertion.true([1, 0] in neighbors) assertion.true([1, 2] in neighbors)
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ] cure_instance = cure(input_data, 4, ccore = ccore_flag) cure_instance.process() clusters = cure_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [[random()] for _ in range(10)] + [[random() + 3] for _ in range(10)] + [[random() + 5] for _ in range(10)] + [[random() + 8] for _ in range(10)] kmedoids_instance = kmedoids(input_data, [5, 15, 25, 35], 0.025, ccore_flag) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]; kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag); kmeans_instance.process(); clusters = kmeans_instance.get_clusters(); assertion.eq(4, len(clusters)); for cluster in clusters: assertion.eq(10, len(cluster));
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ] cure_instance = cure(input_data, 4, ccore = ccore_flag) cure_instance.process() clusters = cure_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def template_correct_scores(self, sample_path, answer_path): sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() scores = silhouette(sample, clusters).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score)
def templateClusterAllocationOneDimensionData(ccore_flag): input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ] kmeans_instance = kmeans(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025, ccore_flag) kmeans_instance.process() clusters = kmeans_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True) assertion.eq(amount, len(centers)) for center_index in centers: assertion.gt(len(data), center_index) assertion.le(0, center_index) return centers
def correct_scores(sample_path, answer_path, ccore_flag): sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score() assertion.eq(len(sample), len(scores)) for score in scores: assertion.le(-1.0, score) assertion.ge(1.0, score)
def templateKmeasPlusPlusCenterInitializerIndexReturn(self, data, amount): centers = kmeans_plusplus_initializer(data, amount).initialize(return_index=True) assertion.eq(amount, len(centers)) for center_index in centers: assertion.gt(len(data), center_index) assertion.le(0, center_index) assertion.eq(1, centers.count(center_index)) return centers
def testGowerDistanceIntegrity(self): a, b = [1.2, 3.4], [1.0, 2.2] npa, npb = numpy.array(a), numpy.array(b) gower = metric.distance_metric(metric.type_metric.GOWER, data=[a, b], numpy_usage=False) gower_numpy = metric.distance_metric(metric.type_metric.GOWER, data=numpy.array([a, b]), numpy_usage=True) assertion.eq(gower(a, b), gower_numpy(npa, npb))
def templatePredict(path_to_file, initial_medoids, points, expected_closest_clusters, ccore, **kwargs): sample = read_sample(path_to_file) metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE)) itermax = kwargs.get('itermax', 200) kmedoids_instance = kmedoids(sample, initial_medoids, 0.001, ccore, metric=metric, itermax=itermax) kmedoids_instance.process() closest_clusters = kmedoids_instance.predict(points) assertion.eq(len(expected_closest_clusters), len(closest_clusters)) assertion.true(numpy.array_equal(numpy.array(expected_closest_clusters), closest_clusters))
def templateClusterAllocationTheSameObjects(number_objects, number_clusters, ccore_flag = False): value = random() input_data = [ [value] ] * number_objects initial_medoids = [] step = int(math.floor(number_objects / number_clusters)) for i in range(number_clusters): initial_medoids.append(i * step) kmedoids_instance = kmedoids(input_data, initial_medoids, ccore=ccore_flag) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() medoids = kmedoids_instance.get_medoids() assertion.eq(len(clusters), len(medoids)) assertion.eq(len(set(medoids)), len(medoids)) object_mark = [False] * number_objects allocated_number_objects = 0 for cluster in clusters: for index_object in cluster: assertion.eq(False, object_mark[index_object]) # one object can be in only one cluster. object_mark[index_object] = True allocated_number_objects += 1 assertion.eq(number_objects, allocated_number_objects) # number of allocated objects should be the same.
def templateCollectEvolution(filename, initial_centers, number_clusters, ccore_flag): sample = read_sample(filename) observer = kmeans_observer() kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag, observer=observer) kmeans_instance.process() assertion.le(1, len(observer)) for i in range(len(observer)): assertion.le(1, len(observer.get_centers(i))) for center in observer.get_centers(i): assertion.eq(len(sample[0]), len(center)) assertion.le(1, len(observer.get_clusters(i)))
def templateClusteringWithAnswers(sample_path, answer_path, radius, neighbors, ccore, **kwargs): random_order = kwargs.get('random_order', False) repeat = kwargs.get('repeat', 1) for _ in range(repeat): sample = read_sample(sample_path) sample_index_map = [ i for i in range(len(sample)) ] if random_order: shuffle(sample_index_map) sample_shuffled = [ sample[i] for i in sample_index_map ] dbscan_instance = dbscan(sample_shuffled, radius, neighbors, ccore) dbscan_instance.process() clusters = dbscan_instance.get_clusters() noise = dbscan_instance.get_noise() for cluster in clusters: for i in range(len(cluster)): cluster[i] = sample_index_map[cluster[i]] for i in range(len(noise)): noise[i] = sample_index_map[noise[i]] noise = sorted(noise) reader = answer_reader(answer_path) expected_noise = sorted(reader.get_noise()) expected_length_clusters = reader.get_cluster_lengths() assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise)) assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters])) assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters])) assertion.eq(expected_noise, noise)
def test_logical_block_neighbors_on_edge(self): block = clique_block() block.logical_location = [1, 1] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors) block.logical_location = [0, 0] neighbors = block.get_location_neighbors(2) assertion.eq(2, len(neighbors)) assertion.true([0, 1] in neighbors) assertion.true([1, 0] in neighbors)
def templateEncoderProcedures(ccore_flag): sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3) cure_instance = cure(sample, 4, 5, 0.5, ccore = ccore_flag) cure_instance.process() clusters = cure_instance.get_clusters() encoding = cure_instance.get_cluster_encoding() encoder = cluster_encoder(encoding, clusters, sample) encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING) encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION) encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION) assertion.eq(4, len(clusters))
def templateEncoderProcedures(filename, initial_centers, number_clusters, ccore_flag): sample = read_sample(filename) kmeans_instance = kmeans(sample, initial_centers, 0.025, ccore_flag) kmeans_instance.process() clusters = kmeans_instance.get_clusters() encoding = kmeans_instance.get_cluster_encoding() encoder = cluster_encoder(encoding, clusters, sample) encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING) encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION) encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION) assertion.eq(number_clusters, len(clusters))
def testChiSquareMetric(self): metric_instance = metric_wrapper(type_metric.CHI_SQUARE, [], None) assertion.eq(0.0, metric_instance([0.0, 0.0], [0.0, 0.0])) assertion.eq(2.0, metric_instance([0.0, 0.0], [1.0, 1.0])) assertion.eq(0.5, metric_instance([0.75, 0.75], [0.25, 0.25])) assertion.eq(0.0, metric_instance([-1.0, -1.0], [-1.0, -1.0])) assertion.eq(0.4, metric_instance([-2.0, -2.0], [-3.0, -3.0])) # TODO: doesn't work for some platforms. #def testUserDefinedMetric(self): # user_metric = lambda p1, p2 : p1[0] + p2[0]; # metric_instance = metric_wrapper(type_metric.USER_DEFINED, [], user_metric); # assertion.eq(2.0, metric_instance([0.0, 0.0], [2.0, 0.0])); # assertion.eq(4.0, metric_instance([3.0, 2.0], [1.0, 5.0]));
def templateClusterAllocationOneDimensionDataSpecificData(data_type, ccore_flag): for _ in range(50): sample = [[random()] for _ in range(10)] + [[random() + 3] for _ in range(10)] + [[random() + 6] for _ in range(10)] + [[random() + 9] for _ in range(10)] if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) elif data_type == 'points': input_data = sample else: raise ValueError("Incorrect data type '%s' is specified" % data_type) dbscan_instance = dbscan(input_data, 1.0, 2, ccore_flag, data_type=data_type) dbscan_instance.process() clusters = dbscan_instance.get_clusters() assertion.eq(4, len(clusters)) for cluster in clusters: assertion.eq(10, len(cluster))
def templateMaxAllocatedClusters(ccore_flag, amount_clusters, size_cluster, offset, kinitial, kmax): input_data = [] for index in range(amount_clusters): for _ in range(size_cluster): input_data.append([random.random() * index * offset, random.random() * index * offset]) initial_centers = random_center_initializer(input_data, kinitial).initialize() xmeans_instance = xmeans(input_data, initial_centers, kmax, 0.025, splitting_type.BAYESIAN_INFORMATION_CRITERION, ccore_flag) xmeans_instance.process() clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() if len(clusters) != len(centers): print(input_data) print(initial_centers) assertion.ge(kmax, len(clusters)) assertion.ge(kmax, len(centers)) assertion.eq(len(clusters), len(centers))
def templateAllocateRequestedClusterAmount(data, amount_clusters, initial_medoids, ccore_flag): if initial_medoids is None: initial_medoids = [] for _ in range(amount_clusters): index_point = randint(0, len(data) - 1) while (index_point in initial_medoids): index_point = randint(0, len(data) - 1) initial_medoids.append(index_point) kmedoids_instance = kmedoids(data, initial_medoids, 0.025, ccore = ccore_flag) kmedoids_instance.process() clusters = kmedoids_instance.get_clusters() assertion.eq(len(clusters), amount_clusters) amount_objects = 0 for cluster in clusters: amount_objects += len(cluster) assertion.eq(amount_objects, len(data))
def templateSyncsegmSegmentation(image_source, radius_color, radius_object, noise_size, expected_color_segments, expected_object_segments, collect_dynamic, ccore_flag): result_testing = False color_segments, object_segments = [], [] for _ in range(0, 10, 1): algorithm = syncsegm(radius_color, radius_object, noise_size, ccore=ccore_flag) analyser = algorithm.process(image_source, collect_dynamic, 0.9995, 0.9995) color_segments = analyser.allocate_colors() object_segments = analyser.allocate_objects(0.2) if (len(color_segments) != expected_color_segments) or (len(object_segments) != expected_object_segments): continue result_testing = True break assertion.eq(expected_color_segments, len(color_segments)) assertion.eq(expected_object_segments, len(object_segments)) assertion.true(result_testing)
def templateLengthProcessSpecificData(data_type, path_to_file, radius, min_number_neighbors, max_number_neighbors, ccore): for _ in range(min_number_neighbors, max_number_neighbors, 1): sample = read_sample(path_to_file) if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) elif data_type == 'points': input_data = sample else: raise ValueError("Incorrect data type '%s' is specified" % data_type) dbscan_instance = dbscan(input_data, radius, min_number_neighbors, ccore, data_type=data_type) dbscan_instance.process() clusters = dbscan_instance.get_clusters() noise = dbscan_instance.get_noise() length = len(noise) length += sum([len(cluster) for cluster in clusters]) assertion.eq(len(sample), length)
def templateClusteringResultsSpecificData(data_type, path, radius, neighbors, amount_clusters, expected_length_clusters, ccore): sample = read_sample(path) if data_type == 'distance_matrix': input_data = calculate_distance_matrix(sample) else: input_data = sample optics_instance = optics(input_data, radius, neighbors, amount_clusters, ccore, data_type=data_type) optics_instance.process() clusters = optics_instance.get_clusters() noise = optics_instance.get_noise() optics_objects = optics_instance.get_optics_objects() object_indexes = set( [ obj.index_object for obj in optics_objects ] ) assertion.eq(len(optics_objects), len(object_indexes)) for obj in optics_objects: if obj.core_distance is not None: assertion.ge(obj.core_distance, 0) if obj.reachability_distance is not None: assertion.ge(obj.reachability_distance, 0) assert sum([len(cluster) for cluster in clusters]) + len(noise) == len(sample) assert len(clusters) == len(expected_length_clusters) assert sum([len(cluster) for cluster in clusters]) == sum(expected_length_clusters) assert sorted([len(cluster) for cluster in clusters]) == sorted(expected_length_clusters) if amount_clusters is not None: analyser = ordering_analyser(optics_instance.get_ordering()) assert len(analyser) > 0 amount_clusters, borders = analyser.extract_cluster_amount(optics_instance.get_radius()) assert amount_clusters == len(expected_length_clusters) assert len(borders) == amount_clusters - 1
def templateClusteringDistanceMatrix(path_to_file, radius, neighbors, expected_length_clusters, ccore): sample = read_sample(path_to_file) distance_matrix = calculate_distance_matrix(sample) dbscan_instance = dbscan(distance_matrix, radius, neighbors, ccore, data_type='distance_matrix') dbscan_instance.process() clusters = dbscan_instance.get_clusters() noise = dbscan_instance.get_noise() assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise)) assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters])) assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
def templateClusteringResults(path, radius, neighbors, expected_length_clusters, ccore, **kwargs): random_order = kwargs.get('random_order', False) sample = read_sample(path) if random_order: shuffle(sample) dbscan_instance = dbscan(sample, radius, neighbors, ccore) dbscan_instance.process() clusters = dbscan_instance.get_clusters() noise = dbscan_instance.get_noise() assertion.eq(len(sample), sum([len(cluster) for cluster in clusters]) + len(noise)) assertion.eq(sum(expected_length_clusters), sum([len(cluster) for cluster in clusters])) assertion.eq(expected_length_clusters, sorted([len(cluster) for cluster in clusters]))
def testChebyshevMetric(self): metric_instance = metric_wrapper(type_metric.CHEBYSHEV, [], None) assertion.eq(4.0, metric_instance([1.0, 4.0], [0.0, 0.0]))
def testManhattanMetric(self): metric_instance = metric_wrapper(type_metric.MANHATTAN, [], None) assertion.eq(3.0, metric_instance([1.0, 2.0], [0.0, 0.0]))
def testMinkowskiMetric(self): metric_instance = metric_wrapper(type_metric.MINKOWSKI, [2.0], None) assertion.eq(2.0, metric_instance([0.0, 0.0], [2.0, 0.0]))