Пример #1
0
    def templateLengthProcessData(data, start_medians, expected_cluster_length, ccore, **kwargs):
        tolerance = kwargs.get('tolerance', 0.01)
        metric = kwargs.get('metric', None)
        itermax = kwargs.get('itermax', 200)

        if isinstance(data, str):
            sample = read_sample(data)
        else:
            sample = data

        kmedians_instance = kmedians(sample, start_medians, tolerance, ccore, metric=metric, itermax=itermax)
        kmedians_instance.process()
        
        clusters = kmedians_instance.get_clusters()
        medians = kmedians_instance.get_medians()

        if itermax == 0:
            assert clusters == []
            assert start_medians == medians
            return

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
        assert len(sample) == sum(obtained_cluster_sizes)
        assert len(medians) == len(clusters)
        
        if expected_cluster_length is not None:
            obtained_cluster_sizes.sort()
            expected_cluster_length.sort()
            if obtained_cluster_sizes != expected_cluster_length:
                print(obtained_cluster_sizes)
            assert obtained_cluster_sizes == expected_cluster_length
Пример #2
0
 def templateClusterAllocationOneDimensionData(self):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     kmedians_instance = kmedians(input_data, [ [0.0], [3.0], [5.0], [8.0] ], 0.025);
     kmedians_instance.process();
     clusters = kmedians_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
Пример #3
0
def template_clustering(start_centers, path, tolerance = 0.25):
    sample = read_sample(path);
    
    kmedians_instance = kmedians(sample, start_centers, tolerance);
    (ticks, result) = timedcall(kmedians_instance.process);
    
    clusters = kmedians_instance.get_clusters();
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    draw_clusters(sample, clusters);
Пример #4
0
 def templateLengthProcessData(self, path_to_file, start_centers, expected_cluster_length):
     sample = read_sample(path_to_file);
     
     kmedians_instance = kmedians(sample, start_centers, 0.025);
     kmedians_instance.process();
     
     clusters = kmedians_instance.get_clusters();
 
     obtained_cluster_sizes = [len(cluster) for cluster in clusters];
     assert len(sample) == sum(obtained_cluster_sizes);
     
     obtained_cluster_sizes.sort();
     expected_cluster_length.sort();
     assert obtained_cluster_sizes == expected_cluster_length;
Пример #5
0
 def templateClusterAllocationTheSameObjects(self, number_objects, number_clusters, ccore_flag = False):
     value = random();
     input_data = [ [value] ] * number_objects;
     
     initial_centers = [];
     for i in range(number_clusters):
         initial_centers.append([ random() ]);
     
     kmedians_instance = kmedians(input_data, initial_centers);
     kmedians_instance.process();
     clusters = kmedians_instance.get_clusters();
     
     object_mark = [False] * number_objects;
     allocated_number_objects = 0;
     
     for cluster in clusters:
         for index_object in cluster: 
             assert (object_mark[index_object] == False);    # one object can be in only one cluster.
             
             object_mark[index_object] = True;
             allocated_number_objects += 1;
         
     assert (number_objects == allocated_number_objects);    # number of allocated objects should be the same.
Пример #6
0
 def templateClusterAllocationTheSameObjects(number_objects, number_clusters, ccore_flag):
     value = random()
     input_data = [ [value] ] * number_objects
      
     initial_centers = []
     for i in range(number_clusters):
         initial_centers.append([ random() ])
      
     kmedians_instance = kmedians(input_data, initial_centers, ccore=ccore_flag)
     kmedians_instance.process()
     clusters = kmedians_instance.get_clusters()
      
     object_mark = [False] * number_objects
     allocated_number_objects = 0
      
     for cluster in clusters:
         for index_object in cluster: 
             assert (object_mark[index_object] is False)    # one object can be in only one cluster.
              
             object_mark[index_object] = True
             allocated_number_objects += 1
          
     assert (number_objects == allocated_number_objects)    # number of allocated objects should be the same.
Пример #7
0
 def testDifferentDimensions(self):
     kmedians_instance = kmedians([ [0, 1, 5], [0, 2, 3] ], [ [0, 3] ]);
     self.assertRaises(NameError, kmedians_instance.process);
Пример #8
0
 def testCoreInterfaceIntInputData(self):
     kmedians_instance = kmedians([ [1], [2], [3], [20], [21], [22] ], [ [2], [21] ], ccore=True)
     kmedians_instance.process()
     assert len(kmedians_instance.get_clusters()) == 2
Пример #9
0
def process_kmedians(sample):
    instance = kmedians(sample, [ [random() + (multiplier * 5), random() + (multiplier + 5)] for multiplier in range(NUMBER_CLUSTERS) ])
    (ticks, _) = timedcall(instance.process)
    return ticks
Пример #10
0
 def testDifferentDimensions(self):
     kmedians_instance = kmedians([ [0, 1, 5], [0, 2, 3] ], [ [0, 3] ], ccore=False)
     self.assertRaises(NameError, kmedians_instance.process)
Пример #11
0
def process_kmedians(sample):
    instance = kmedians(sample, [ [random() + (multiplier * 5), random() + (multiplier + 5)] for multiplier in range(NUMBER_CLUSTERS) ])
    (ticks, _) = timedcall(instance.process)
    return ticks
Пример #12
0
 def testTotalWCESimple4(self):
     sample = [[0, 1, 5], [7, 8, 9], [0, 2, 3], [4, 5, 6]]
     initial_medians = [[0, 3, 2], [4, 6, 5]]
     kmedians_instance = kmedians(sample, initial_medians, ccore=False)
     self.assertNotEqual(self, kmedians_instance.get_total_wce(), 16.0)
Пример #13
0
# Clean up our visual
ax.set(title='Elbow Plot',
       xlabel='Number of Clusters',
       ylabel='Total distance')
sns.despine(offset=5, trim=True)

# Let's now run a k-medians (instead of mean distance, let's calculate median distance)
# We can do this efficiently with the pyclustering library.
# If you don't have it installed, install it!
#python -m pip install pyclustering
from pyclustering.cluster.kmedians import kmedians
from pyclustering.cluster import cluster_visualizer

# Create instance of K-Medians algorithm.
initial_medians = [[0.0, 0.1], [2.5, 0.7], [3.5, 1.5]]
kmedians_instance = kmedians(xt, initial_medians)
# Run cluster analysis and obtain results.
kmedians_instance.process()
clusters = kmedians_instance.get_clusters()
medians = kmedians_instance.get_medians()
# How well did it do?
# Sum of metric errors is calculated using distance between point and its center
print(kmedians_instance.get_total_wce())

# Visualize clustering results.
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, xt)
#visualizer.append_cluster(initial_medians, marker='*', markersize=10)
visualizer.append_cluster(medians, marker='*', markersize=10, color='k')
visualizer.show()