Exemplo n.º 1
0
    def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs):
        if isinstance(input_data, str):
            sample = read_sample(input_data)
        else:
            sample = input_data

        numpy_usage = kwargs.get('numpy_usage', False)
        if numpy_usage is True:
            sample = numpy.array(sample)
         
        cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag)
        cure_instance.process()
         
        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assertion.eq(len(clusters), number_cluster)
        assertion.eq(len(representors), number_cluster)
        assertion.eq(len(means), number_cluster)
         
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         
        total_length = sum(obtained_cluster_sizes)
        assertion.eq(total_length, len(sample))
         
        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assertion.eq(cluster_sizes, obtained_cluster_sizes)
Exemplo n.º 2
0
    def template_cluster_allocation(path,
                                    cluster_sizes,
                                    number_cluster,
                                    number_represent_points=5,
                                    compression=0.5,
                                    ccore_flag=False):
        sample = read_sample(path)

        cure_instance = cure(sample,
                             number_cluster,
                             number_represent_points,
                             compression,
                             ccore=ccore_flag)
        cure_instance.process()

        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assert len(clusters) == number_cluster
        assert len(representors) == number_cluster
        assert len(means) == number_cluster

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        total_length = sum(obtained_cluster_sizes)
        assert total_length == len(sample)

        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assert cluster_sizes == obtained_cluster_sizes
Exemplo n.º 3
0
    def template_cluster_allocation(input_data, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False, **kwargs):
        if isinstance(input_data, str):
            sample = read_sample(input_data)
        else:
            sample = input_data

        numpy_usage = kwargs.get('numpy_usage', False)
        if numpy_usage is True:
            sample = numpy.array(sample)
         
        cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore = ccore_flag)
        cure_instance.process()
         
        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        assertion.eq(len(clusters), number_cluster)
        assertion.eq(len(representors), number_cluster)
        assertion.eq(len(means), number_cluster)
         
        obtained_cluster_sizes = [len(cluster) for cluster in clusters]
         
        total_length = sum(obtained_cluster_sizes)
        assertion.eq(total_length, len(sample))
         
        cluster_sizes.sort()
        obtained_cluster_sizes.sort()
        assertion.eq(cluster_sizes, obtained_cluster_sizes)
def cureAlgo(filename, col_name):
    df = pd.read_csv(filename, usecols=[col_name])
    df[col_name] = df[col_name]
    data = df[col_name]
    rownumber = len(data)
    if rownumber % 2 == 1:
        rownumber += 1

    #converting pandas series into ndarray
    input_data = np.asarray(data)
    input_data.shape = (rownumber // 2, 2)
    print(input_data)
    print(input_data.shape)
    print(
        "----------------------------------------------------------------------------------------------------------------------"
    )
    # Allocate three clusters:
    cure_instance = cure(input_data.tolist(), 10)
    cure_instance.process()
    clusters = cure_instance.get_clusters()
    print(clusters)
    print(timeit.timeit('"-".join(str(n) for n in range(100))', number=10000))
    # Visualize clusters:
    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, None)
    visualizer.show(display=False)
    plt.savefig(
        "C:/Users/Nupura Hajare/Desktop/flask_app/web/static/img/CURE.png")
Exemplo n.º 5
0
def template_clustering(number_clusters,
                        path,
                        number_represent_points=5,
                        compression=0.5,
                        draw=True,
                        ccore_flag=False):
    sample = read_sample(path)

    cure_instance = cure(sample, number_clusters, number_represent_points,
                         compression, ccore_flag)
    (ticks, _) = timedcall(cure_instance.process)

    clusters = cure_instance.get_clusters()
    representors = cure_instance.get_representors()
    means = cure_instance.get_means()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    if (draw is True):
        visualizer = cluster_visualizer()

        if (ccore_flag is True):
            visualizer.append_clusters(clusters, sample)

        else:
            visualizer.append_clusters(clusters, None)

        visualizer.append_clusters(representors, marker='*', markersize=10)
        visualizer.append_clusters([means], None, marker='o')
        visualizer.show()
Exemplo n.º 6
0
    def get_cure_clusters(data, count_clusters=3):
        rows = data.getRows()
        input_data = list()
        result_clusters = list()
        for row in rows:
            input_data.append(row.getDataArray())
        SST = calculate_sst(input_data)
        cure_instance = cure(input_data, count_clusters)
        cure_instance.process()
        clusters = cure_instance.get_clusters()
        colorRange = Constants.DEFAULT_COLOR_SET
        SSB = 0
        SSW = 0
        for i, cluster in enumerate(clusters):
            SSW = SSW + calculate_ssw(cluster)
            result_cluster = Cluster(CureWindow.get_rows(data, cluster))
            colour = random.choice(colorRange)
            result_cluster.setName(colour)
            result_cluster.setColor(colour)
            result_clusters.append(result_cluster)
        SSB = calculate_ssb(SST, SSW)
        RS_RESULT.append(SSB / SST)

        print(RS_RESULT)
        return result_clusters
Exemplo n.º 7
0
def template_clustering(number_clusters,
                        path,
                        number_represent_points=5,
                        compression=0.5,
                        draw=True,
                        ccore_flag=False):
    sample = read_sample(path)

    cure_instance = cure(sample, number_clusters, number_represent_points,
                         compression, ccore_flag)
    (ticks, _) = timedcall(cure_instance.process)

    clusters = cure_instance.get_clusters()
    representors = cure_instance.get_representors()
    means = cure_instance.get_means()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")
    #print([len(cluster) for cluster in clusters])

    if draw is True:
        visualizer = cluster_visualizer()

        visualizer.append_clusters(clusters, sample)

        for cluster_index in range(len(clusters)):
            visualizer.append_cluster_attribute(0, cluster_index,
                                                representors[cluster_index],
                                                '*', 10)
            visualizer.append_cluster_attribute(0, cluster_index,
                                                [means[cluster_index]], 'o')

        visualizer.show()
def cure_func(data, k):
    data = DataFrame(data)
    data = data.apply(pd.to_numeric)
    X = data.to_numpy()
    cure_instance = cure(X, int(k))
    cure_instance.process()
    clusters = cure_instance.get_clusters()
    return clusters
Exemplo n.º 9
0
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
         
        cure_instance = cure(input_data, 4, ccore = ccore_flag)
        cure_instance.process()
        clusters = cure_instance.get_clusters()

        assertion.eq(4, len(clusters))
        for cluster in clusters:
            assertion.eq(10, len(cluster))
Exemplo n.º 10
0
 def templateClusterAllocationOneDimensionData(self, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
     
     cure_instance = cure(input_data, 4, ccore = ccore_flag);
     cure_instance.process();
     clusters = cure_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
Exemplo n.º 11
0
    def templateClusterAllocationOneDimensionData(ccore_flag):
        input_data = [ [random()] for _ in range(10) ] + [ [random() + 3] for _ in range(10) ] + [ [random() + 5] for _ in range(10) ] + [ [random() + 8] for _ in range(10) ]
         
        cure_instance = cure(input_data, 4, ccore = ccore_flag)
        cure_instance.process()
        clusters = cure_instance.get_clusters()

        assertion.eq(4, len(clusters))
        for cluster in clusters:
            assertion.eq(10, len(cluster))
Exemplo n.º 12
0
 def templateClusterAllocationOneDimensionData(self, ccore_flag):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 5] for i in range(10) ] + [ [random() + 8] for i in range(10) ];
      
     cure_instance = cure(input_data, 4, ccore = ccore_flag);
     cure_instance.process();
     clusters = cure_instance.get_clusters();
      
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
Exemplo n.º 13
0
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
Exemplo n.º 14
0
def template_clustering(number_clusters, path, number_represent_points = 5, compression = 0.5, draw = True, ccore_flag = False):
    sample = read_sample(path);
    
    cure_instance = cure(sample, number_clusters, number_represent_points, compression, ccore_flag);
    (ticks, result) = timedcall(cure_instance.process);
    clusters = cure_instance.get_clusters();
    
    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    if (draw is True):
        if (ccore_flag is True):
            draw_clusters(sample, clusters);
        else:
            draw_clusters(None, clusters);
Exemplo n.º 15
0
    def template_cluster_allocation(self, path, cluster_sizes, number_cluster, number_represent_points = 5, compression = 0.5, ccore_flag = False):
        sample = read_sample(path);
        
        cure_instance = cure(sample, number_cluster, ccore = ccore_flag);
        cure_instance.process();
        clusters = cure_instance.get_clusters();

        obtained_cluster_sizes = [len(cluster) for cluster in clusters];
        
        total_length = sum(obtained_cluster_sizes);
        assert total_length == len(sample);
        
        cluster_sizes.sort();
        obtained_cluster_sizes.sort();
        assert cluster_sizes == obtained_cluster_sizes;
Exemplo n.º 16
0
    def templateEncoderProcedures(ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)

        cure_instance = cure(sample, 4, 5, 0.5, ccore=ccore_flag)
        cure_instance.process()

        clusters = cure_instance.get_clusters()
        encoding = cure_instance.get_cluster_encoding()

        encoder = cluster_encoder(encoding, clusters, sample)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
        encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)

        assert 4 == len(clusters)
Exemplo n.º 17
0
    def templateEncoderProcedures(ccore_flag):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
        
        cure_instance = cure(sample, 4, 5, 0.5, ccore = ccore_flag)
        cure_instance.process()
        
        clusters = cure_instance.get_clusters()
        encoding = cure_instance.get_cluster_encoding()
        
        encoder = cluster_encoder(encoding, clusters, sample)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LABELING)
        encoder.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION)
        encoder.set_encoding(type_encoding.CLUSTER_INDEX_LIST_SEPARATION)

        assertion.eq(4, len(clusters))
Exemplo n.º 18
0
 def testVisualizeClusterWithAttributes(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     cure_instance = cure(sample, 2, 5, 0.5, False);
     cure_instance.process();
     
     clusters = cure_instance.get_clusters();
     representors = cure_instance.get_representors();
     means = cure_instance.get_means();
     
     visualizer = cluster_visualizer();
     visualizer.append_clusters(clusters, sample);
     
     for cluster_index in range(len(clusters)):
         visualizer.append_cluster_attribute(0, cluster_index, representors[cluster_index], '*', 10);
         visualizer.append_cluster_attribute(0, cluster_index, [ means[cluster_index] ], 'o');
     
     visualizer.show();
Exemplo n.º 19
0
    def exception(type, input_data, number_cluster, number_represent_points, compression, ccore_flag):
        try:
            if isinstance(input_data, str):
                sample = read_sample(input_data)
            else:
                sample = input_data

            cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore=ccore_flag)
            cure_instance.process()

        except type:
            return

        except Exception as ex:
            raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__))

        raise AssertionError("Expected: '%s', Actual: 'None'" % type)
Exemplo n.º 20
0
    def exception(type, input_data, number_cluster, number_represent_points, compression, ccore_flag):
        try:
            if isinstance(input_data, str):
                sample = read_sample(input_data)
            else:
                sample = input_data

            cure_instance = cure(sample, number_cluster, number_represent_points, compression, ccore=ccore_flag)
            cure_instance.process()

        except type:
            return

        except Exception as ex:
            raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__))

        raise AssertionError("Expected: '%s', Actual: 'None'" % type)
Exemplo n.º 21
0
 def testVisualizeClusterWithAttributes(self):
     sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1);
     cure_instance = cure(sample, 2, 5, 0.5, False);
     cure_instance.process();
     
     clusters = cure_instance.get_clusters();
     representors = cure_instance.get_representors();
     means = cure_instance.get_means();
     
     visualizer = cluster_visualizer();
     visualizer.append_clusters(clusters, sample);
     
     for cluster_index in range(len(clusters)):
         visualizer.append_cluster_attribute(0, cluster_index, representors[cluster_index], '*', 10);
         visualizer.append_cluster_attribute(0, cluster_index, [ means[cluster_index] ], 'o');
     
     visualizer.show();
Exemplo n.º 22
0
    def runCURE(self, k, X):
        cluster_points = {}
        for q in range(k):
            cluster_points[q] = list()

        cure_instance = cure(data=X, number_cluster=k)
        cure_instance.process()
        clusters = cure_instance.get_clusters()

        for id_point in range(len(X)):
            for cluster_id in range(len(clusters)):
                point_ids_in_cluster = [
                    int(point_id_in_cluster)
                    for point_id_in_cluster in clusters[cluster_id]
                ]
                if (id_point in point_ids_in_cluster):
                    cluster_points[cluster_id].append(X[id_point])

        return cluster_points
Exemplo n.º 23
0
def template_clustering(number_clusters,
                        path,
                        number_represent_points=5,
                        compression=0.5,
                        draw=True,
                        ccore_flag=False):
    sample = read_sample(path)

    cure_instance = cure(sample, number_clusters, number_represent_points,
                         compression, ccore_flag)
    (ticks, result) = timedcall(cure_instance.process)
    clusters = cure_instance.get_clusters()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    if (draw is True):
        if (ccore_flag is True):
            draw_clusters(sample, clusters)
        else:
            draw_clusters(None, clusters)
Exemplo n.º 24
0
def r_python_cure_iterface(filepath):
    samples = []
    with open(filepath, 'r') as csvfile:
        csvreader = csv.reader(csvfile, delimiter=",")
        count = 0
        for row in csvreader:
            if count != 0:
                sample = row
                sample = [float(x) for x in sample]
                samples.append(sample)
            count = count + 1

    cure_instance = cure(samples,
                         8,
                         number_represent_points=8,
                         compression=0.25)
    cure_instance.process()
    clusters = cure_instance.get_clusters()

    return clusters
Exemplo n.º 25
0
    def testVisualizeClusterWithAttributesNumpy(self):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1,
                             return_type='numpy')
        cure_instance = cure(sample, 2, 5, 0.5, False)
        cure_instance.process()

        clusters = cure_instance.get_clusters()
        representors = cure_instance.get_representors()
        means = cure_instance.get_means()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)

        for cluster_index in range(len(clusters)):
            visualizer.append_cluster_attribute(
                0, cluster_index, numpy.array(representors[cluster_index]),
                '*', 10)
            visualizer.append_cluster_attribute(
                0, cluster_index, numpy.array([means[cluster_index]]), 'o')

        visualizer.show()
Exemplo n.º 26
0
def cure_clustering(k=-1):
    """Perform CURE clustering algorithm."""

    if k == -1:
        begin = 5
        end = 1001
    else:
        begin = k
        end = k + 1

    input_data = read_sample("results/pca_result.txt")

    for k in range(begin, end, 5):
        print(str(k) + " clusters started for CURE")
        cure_instance = cure(input_data, k)
        cure_instance.process()
        cure_clusters = cure_instance.get_clusters()
        print(str(k) + " clusters completed for CURE")

        results_file = open('results/cure_' + str(k) + '.pickle', 'wb')
        pickle.dump(cure_clusters, results_file)
        results_file.close()
Exemplo n.º 27
0
def template_clustering(number_clusters, path, number_represent_points=5, compression=0.5, draw=True, ccore_flag=True):
    sample = read_sample(path)
    
    cure_instance = cure(sample, number_clusters, number_represent_points, compression, ccore_flag)
    (ticks, _) = timedcall(cure_instance.process)
    
    clusters = cure_instance.get_clusters()
    representors = cure_instance.get_representors()
    means = cure_instance.get_means()

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")
    #print([len(cluster) for cluster in clusters])

    if draw is True:
        visualizer = cluster_visualizer()

        visualizer.append_clusters(clusters, sample)

        for cluster_index in range(len(clusters)):
            visualizer.append_cluster_attribute(0, cluster_index, representors[cluster_index], '*', 10)
            visualizer.append_cluster_attribute(0, cluster_index, [ means[cluster_index] ], 'o')

        visualizer.show()
Exemplo n.º 28
0
    model = Doc2Vec.load('models/%s.d2v' % i)
    plt.figure()

    # convert sequence to array
    docvecs = []
    for num in range(len(model.docvecs)):
        # print(num)
        # print(model.docvecs[num])
        docvecs.append(np.array(model.docvecs[num]))

    for Rpoint in Parameter.represent_point:
        silhouette_scores = []
        calinski_scores = []

        for index in Parameter.K:
            cure_model = cure(docvecs, index, number_represent_points=Rpoint)
            cure_model.process()
            clusters = cure_model.get_clusters()
            labels = [1] * len(docvecs)
            for ind in range(len(clusters)):
                for element in clusters[ind]:
                    labels[element] = ind


            print("Performance with threshold %d:" % i)
            silhouette_scores.append(metrics.silhouette_score(docvecs, labels))
            calinski_scores.append(metrics.calinski_harabaz_score(docvecs, labels))

        plt.subplot(1, 2, 1)
        plt.plot(Parameter.K, silhouette_scores, label=str(Rpoint))
        plt.legend()
Exemplo n.º 29
0
from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.cure import cure
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import FCPS_SAMPLES

# Input data in following format [ [0.1, 0.5], [0.3, 0.1], ... ].
input_data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
lines = open("t4.8k", "r")
inp = []
for line in lines:
    cords = line.split()
    if len(cords) != 2:
        continue
    inp.append([float(cords[0]), float(cords[1])])

# Allocate clusters.
cure_instance = cure(inp, 6)
cure_instance.process()
clusters = cure_instance.get_clusters()

# Visualize allocated clusters.
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, inp)
visualizer.show()
Exemplo n.º 30
0
if True:
	data = np.loadtxt(args.i)
	data = np.delete(data, 0, 1)

	# data = preprocessing.normalize(data, norm = 'max', axis = 0);

	# clus = cluster.AgglomerativeClustering(n_clusters = args.n)
	clusK = cluster.KMeans(n_clusters = args.n, init = 'k-means++', n_init = 1, verbose = args.d, tol = args.t, copy_x = False, algorithm = 'elkan')
	# clusK = cluster.BisectingKMeans(n_clusters = args.n, init = 'k-means++', n_init = 1, verbose = args.d, tol = args.t, copy_x = False, algorithm = 'elkan')
	clus = cluster.Birch(n_clusters = clusK, copy = False, threshold = args.t, branching_factor = 50)
	# clus = cluster.SpectralClustering(n_clusters = args.n, assign_labels = 'discretize', affinity = 'nearest_neighbors', n_neighbors = 15, random_state = 42, eigen_tol = args.t)

	clus.fit(data)

	np.savetxt(args.i + '.membership', clus.labels_, fmt = '%d')
else:
	data  = read_sample(args.i)

	pyc = cure(data = data, number_cluster = args.n);
	
	pyc.process();
	clusters = pyc.get_clusters();
	print(clusters)
	
	points_clusters = [0] * len(data)
	for i, clus in enumerate(clusters):
		for c in clus:
			points_clusters[c] = i	
	
	np.savetxt(args.i + '.membership', points_clusters, fmt = '%d')
Exemplo n.º 31
0
# In[23]:


import pyclustering
from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.cure import cure
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import FCPS_SAMPLES

# Input data in following format [ [0.1, 0.5], [0.3, 0.1], ... ].
#input_data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN);
# Allocate three clusters.

X=finalDataFrame.iloc[:,[0,1]].to_numpy()
cure_instance = cure(X, 5)
cure_instance.process()
clusters = cure_instance.get_clusters()

clusters


# In[24]:


# Visualize allocated clusters.
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, X)
visualizer.show()

Exemplo n.º 32
0
    [15, 12],
    [43, 67],
    [45, 56],
    [63, 54],
    [49, 50],
    [24, 10],
    [30, 30],
    [85, 70],
    [71, 80],
    [60, 78],
    [70, 55],
    [80, 91],
])

#    cure_instance = cure(sample, number_clusters, number_represent_points, compression, ccore_flag)
cure_instance = cure(X, 3)
cure_instance.process()
clusters = cure_instance.get_clusters()
print(clusters)

representors = cure_instance.get_representors()
means = cure_instance.get_means()

print("Sample: ", X)

visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, X)

for cluster_index in range(len(clusters)):
    visualizer.append_cluster_attribute(0, cluster_index,
                                        representors[cluster_index], '*', 10)
Exemplo n.º 33
0
def process_cure(sample):
    instance = cure(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks
Exemplo n.º 34
0
    plt.show()

    # Gaussian Mixture
    y_pred = GaussianMixture(n_components=k).fit(X).predict(X)
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    plt.title("Gaussian Mixture")
    plt.show()

    # Spectral Clustering
    y_pred = SpectralClustering(n_clusters=k).fit_predict(X)
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    plt.title("Spectral Clustering")
    plt.show()

    # CURE
    cure_instance = cure(data=X, number_cluster=k);
    cure_instance.process();
    clusters = cure_instance.get_clusters();
    visualizer = cluster_visualizer(titles=["Cure"]);
    visualizer.append_clusters(clusters, X);
    visualizer.show();

    # CLARANS
    clarans_instance = clarans(data=X, number_clusters=k, numlocal=5, maxneighbor=5);
    clarans_instance.process();
    clusters = clarans_instance.get_clusters();
    visualizer = cluster_visualizer(titles=["Clarans"]);
    visualizer.append_clusters(clusters, X);
    visualizer.show();

    # Agglomerative
def process_cure(sample):
    instance = cure(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks
Exemplo n.º 36
0
Created on Mar 22, 2017
@author: arno


Experiment the cure clustering algorithm on the song 2D vector (diversity, size)

*** Requires wordcount.py from wordcount package to be run beforehand ***

'''

from pyclustering.cluster.cure import cure
from pyclustering.cluster import cluster_visualizer
from pyclustering.utils import read_sample

SONG_VECTORS_FILE = "../wordcount/output/song_vectors_pyclustering_regular.txt"

# read data for clustering from some file
input_data = read_sample(SONG_VECTORS_FILE)

# create instance of cure algorithm for cluster analysis
cure_instance = cure(input_data, 5, 8, 0.7, False)

# run cluster analysis
cure_instance.process()

# get results of clustering
clusters = cure_instance.get_clusters()

visualizer = cluster_visualizer()
visualizer.append_clusters(clusters)
visualizer.show()
Exemplo n.º 37
0
ww = silhouette_avgs.argmax()
k = ks[ww]

km = cluster.KMeans(n_clusters=k, random_state=42).fit(df)
tags = km.labels_
rds.set('tags_k-mean', pickle.dumps(tags))
rds.set('score_k-mean', silhouette_avgs[ww])
###################################################################################
#############################      CURE          ##################################
###################################################################################
data = df.as_matrix()
silhouette_avg = []
ks = range(5, 51)
for k in ks:
    print('k =', k)
    cure_instance = cure(data, k, number_represent_points=5, compression=0.5)
    cure_instance.process()
    tags_index = cure_instance.get_clusters()
    tags = np.arange(len(data))
    for i, index in enumerate(tags_index):
        tags[index] = i
    silhouette_avg.append(metrics.silhouette_score(data, tags))

silhouette_avg = np.array(silhouette_avg)
ww = silhouette_avg.argmax()

k = ks[ww]
cure_instance = cure(data, k, number_represent_points=5, compression=0.5)
cure_instance.process()
tags_index = cure_instance.get_clusters()
tags = np.arange(len(data))
Exemplo n.º 38
0
 def testCoreInterfaceIntInputData(self):
     cure_instance = cure([[1], [2], [3], [20], [21], [22]], 2, ccore=True)
     cure_instance.process()
     assert len(cure_instance.get_clusters()) == 2
Exemplo n.º 39
0
 def testCoreInterfaceIntInputData(self):
     cure_instance = cure([ [1], [2], [3], [20], [21], [22] ], 2, ccore = True)
     cure_instance.process()
     assert len(cure_instance.get_clusters()) == 2;
Exemplo n.º 40
0
docvecs = []
for num in range(len(model.docvecs)):
    # print(num)
    # print(model.docvecs[num])
    docvecs.append(np.array(model.docvecs[num]))

index = [i for i in range(3, 50)]
compression_index = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
for compression in compression_index:
    all_silhouette_scores = []
    all_calinski_scores = []

    silhouette_scores = []
    calinski_scores = []
    for i in index:
        cure_model = cure(docvecs, i, compression=compression)
        cure_model.process()
        clusters = cure_model.get_clusters()
        labels = [1] * len(docvecs)
        for ind in range(len(clusters)):
            for element in clusters[ind]:
                labels[element] = ind

        print("Performance with threshold %d:" % i)
        silhouette_scores.append(metrics.silhouette_score(docvecs, labels))
        calinski_scores.append(metrics.calinski_harabaz_score(docvecs, labels))

    plt.subplot(1, 2, 1)
    plt.plot(index, silhouette_scores, label=str(compression))
    plt.legend()
    plt.title("silhouette_scores")