Пример #1
0
    def templateClusterAllocation(
            self,
            path,
            cluster_sizes,
            number_clusters,
            branching_factor=5,
            max_node_entries=5,
            initial_diameter=0.1,
            type_measurement=measurement_type.CENTROID_EUCLIDIAN_DISTANCE,
            entry_size_limit=200,
            diameter_multiplier=1.5):
        sample = read_sample(path)

        birch_instance = birch(sample, number_clusters, branching_factor,
                               max_node_entries, initial_diameter,
                               type_measurement, entry_size_limit,
                               diameter_multiplier)
        birch_instance.process()

        clusters = birch_instance.get_clusters()

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        total_length = sum(obtained_cluster_sizes)
        assert total_length == len(sample)

        if (cluster_sizes != None):
            cluster_sizes.sort()
            obtained_cluster_sizes.sort()
            assert cluster_sizes == obtained_cluster_sizes
def template_clustering(
        number_clusters,
        path,
        branching_factor=50,
        max_node_entries=100,
        initial_diameter=0.5,
        type_measurement=measurement_type.CENTROID_EUCLIDEAN_DISTANCE,
        entry_size_limit=200,
        diameter_multiplier=1.5,
        show_result=True):
    print("Sample: ", path)

    sample = read_sample(path)

    birch_instance = birch(sample, number_clusters, branching_factor,
                           max_node_entries, initial_diameter,
                           type_measurement, entry_size_limit,
                           diameter_multiplier)
    birch_instance.process()
    clusters = birch_instance.get_clusters()

    if show_result is True:
        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.show()

    return sample, clusters
Пример #3
0
def birchAlgo(filename, col_name):
    df = pd.read_csv(filename, usecols=[col_name])
    df[col_name] = df[col_name]
    data = df[col_name]
    rownumber = len(data)
    if rownumber % 2 == 1:
        rownumber += 1

    #converting pandas series into ndarray
    input_data = np.asarray(data)
    input_data.shape = (rownumber // 2, 2)
    print(input_data.dtype)
    print(input_data.shape)
    print(
        "----------------------------------------------------------------------------------------------------------------------"
    )
    # create BIRCH algorithm for allocation three objects.
    birch_instance = birch(input_data.tolist(), 10)
    # start processing - cluster analysis of the input data.
    birch_instance.process()
    # allocate clusters.
    clusters = birch_instance.get_clusters()
    print(clusters)
    print(timeit.timeit('"-".join(str(n) for n in range(100))', number=10000))
    #Visualize clusters:
    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, input_data)
    visualizer.show(display=False)
    plt.savefig(
        "C:/Users/Nupura Hajare/Desktop/flask_app/web/static/img/birch.png")
Пример #4
0
def template_clustering(
        number_clusters,
        path,
        branching_factor=5,
        max_node_entries=5,
        initial_diameter=0.0,
        type_measurement=measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE,
        entry_size_limit=200,
        diameter_multiplier=1.5,
        show_result=True):
    sample = read_sample(path)

    birch_instance = birch(sample, number_clusters, branching_factor,
                           max_node_entries, initial_diameter,
                           type_measurement, entry_size_limit,
                           diameter_multiplier)
    (ticks, result) = timedcall(birch_instance.process)

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n")

    clusters = birch_instance.get_clusters()

    if (show_result is True):
        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.show()

    return (sample, clusters)
def BIRCH_func(data, k):
    data = DataFrame(data)
    data = data.apply(pd.to_numeric)
    X = data.values.tolist()
    birch_instance = birch(X, k, diameter=3.0)
    birch_instance.process()
    clusters = birch_instance.get_clusters()
    return clusters
Пример #6
0
def template_clustering(number_clusters, path, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.0, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True):
    sample = read_sample(path);

    birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore);
    (ticks, result) = timedcall(birch_instance.process);

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    clusters = birch_instance.get_clusters();
    draw_clusters(sample, clusters);
Пример #7
0
 def templateClusterAllocationOneDimensionData(self, branching_factor=5, max_node_entries=10, initial_diameter=1.0, type_measurement=measurement_type.CENTROID_EUCLIDEAN_DISTANCE, entry_size_limit=20):
     input_data = [[random()] for _ in range(10)] + [[random() + 4] for _ in range(10)] + [[random() + 8] for _ in range(10)] + [[random() + 12] for _ in range(10)]
      
     birch_instance = birch(input_data, 4, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit)
     birch_instance.process()
     clusters = birch_instance.get_clusters()
      
     assert len(clusters) == 4
     for cluster in clusters:
         assert len(cluster) == 10
Пример #8
0
 def templateClusterAllocationOneDimensionData(self, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.1, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True):
     input_data = [ [random()] for i in range(10) ] + [ [random() + 3] for i in range(10) ] + [ [random() + 6] for i in range(10) ] + [ [random() + 9] for i in range(10) ];
     
     birch_instance = birch(input_data, 4, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore);
     birch_instance.process();
     clusters = birch_instance.get_clusters();
     
     assert len(clusters) == 4;
     for cluster in clusters:
         assert len(cluster) == 10;
Пример #9
0
def template_clustering(number_clusters, path, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.0, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True):
    sample = read_sample(path);

    birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore)
    (ticks, result) = timedcall(birch_instance.process);

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    clusters = birch_instance.get_clusters();
    draw_clusters(sample, clusters);
    def get_modelo(self, algoritmo, eps, neig):
        print(algoritmo + ' ' + str(eps) + ' - ' + str(neig))
        instance = None

        if algoritmo == 'AGNES':
            instance = agglomerative(self.amostras,
                                     self.numero_clusters,
                                     link=None)
        elif algoritmo == 'BIRCH':
            instance = birch(self.amostras,
                             self.numero_clusters,
                             entry_size_limit=10000)
        elif algoritmo == 'CLARANS':
            instance = clarans(self.amostras,
                               self.numero_clusters,
                               numlocal=100,
                               maxneighbor=1)
        elif algoritmo == 'CURE':
            instance = cure(self.amostras,
                            self.numero_clusters,
                            number_represent_points=5,
                            compression=0.5)
        elif algoritmo == 'DBSCAN':
            instance = dbscan(self.amostras, eps=eps, neighbors=neig)
        elif algoritmo == 'FCM':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = fcm(self.amostras, initial_centers)
        elif algoritmo == 'KMEANS':
            initial_centers = kmeans_plusplus_initializer(
                self.amostras, self.numero_clusters).initialize()
            instance = kmeans(self.amostras, initial_centers, tolerance=0.001)
        elif algoritmo == 'KMEDOIDS':
            instance = kmedoids(self.amostras,
                                initial_index_medoids=[0, 0, 0, 0, 0, 0, 0],
                                tolerance=0.0001)  #ajustar o n_de cluster
        elif algoritmo == 'OPTICS':
            instance = optics(self.amostras, eps=eps, minpts=neig)
        elif algoritmo == 'ROCK':
            instance = rock(self.amostras,
                            eps=eps,
                            number_clusters=self.numero_clusters,
                            threshold=0.5)
        else:
            pass

        instance.process()
        lista_agrupada = self.get_lista_agrupada(instance.get_clusters())
        lista_agrupada = np.array(lista_agrupada)

        if (neig != 0):
            n_grupos = len(np.unique(lista_agrupada))
            if n_grupos > self.numero_clusters:
                lista_agrupada = self.get_modelo(algoritmo, eps, neig + 1)
        return lista_agrupada
Пример #11
0
    def templateClusterAllocation(self, path, cluster_sizes, number_clusters, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.1, type_measurement = measurement_type.CENTROID_EUCLIDIAN_DISTANCE, entry_size_limit = 200, ccore = True):
        sample = read_sample(path);
        
        cure_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, ccore);
        cure_instance.process();
        clusters = cure_instance.get_clusters();

        obtained_cluster_sizes = [len(cluster) for cluster in clusters];
        
        total_length = sum(obtained_cluster_sizes);
        assert total_length == len(sample);
        
        cluster_sizes.sort();
        obtained_cluster_sizes.sort();
        assert cluster_sizes == obtained_cluster_sizes;
Пример #12
0
def template_clustering(number_clusters, path, branching_factor = 5, max_node_entries = 5, initial_diameter = 0.0, type_measurement = measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE, entry_size_limit = 200, diameter_multiplier = 1.5, show_result = True):
    sample = read_sample(path);

    birch_instance = birch(sample, number_clusters, branching_factor, max_node_entries, initial_diameter, type_measurement, entry_size_limit, diameter_multiplier);
    (ticks, result) = timedcall(birch_instance.process);

    print("Sample: ", path, "\t\tExecution time: ", ticks, "\n");

    clusters = birch_instance.get_clusters();
    
    if (show_result is True):
        visualizer = cluster_visualizer();
        visualizer.append_clusters(clusters, sample);
        visualizer.show();
    
    return (sample, clusters);
Пример #13
0
    def templateClusterAllocation(
            self,
            path,
            cluster_sizes,
            number_clusters,
            branching_factor=50,
            max_node_entries=100,
            initial_diameter=0.5,
            type_measurement=measurement_type.CENTROID_EUCLIDEAN_DISTANCE,
            entry_size_limit=200,
            diameter_multiplier=1.5):
        sample = read_sample(path)

        birch_instance = birch(sample, number_clusters, branching_factor,
                               max_node_entries, initial_diameter,
                               type_measurement, entry_size_limit,
                               diameter_multiplier)
        birch_instance.process()

        clusters = birch_instance.get_clusters()
        cf_clusters = birch_instance.get_cf_cluster()
        cf_entries = birch_instance.get_cf_entries()

        self.assertEqual(birch_instance.get_cluster_encoding(),
                         type_encoding.CLUSTER_INDEX_LIST_SEPARATION)
        self.assertEqual(number_clusters, len(clusters))
        self.assertEqual(number_clusters, len(cf_clusters))
        self.assertGreater(len(cf_entries), 0)
        self.assertLessEqual(len(cf_entries), entry_size_limit)

        obtained_cluster_sizes = [len(cluster) for cluster in clusters]

        total_length = sum(obtained_cluster_sizes)
        self.assertEqual(total_length, len(sample))

        if cluster_sizes is not None:
            cluster_sizes.sort()
            obtained_cluster_sizes.sort()
            self.assertEqual(cluster_sizes, obtained_cluster_sizes)
Пример #14
0
def doBirchClustering(i):
    #data=pd.read_csv("soy_rock.csv",header=None,dtype='int')
    #data=pd.read_csv("votepreporcess.csv",header=None,dtype='int')
    data = pd.read_csv("lung_number.csv", header=None, dtype='int')
    data_size = len(data.index)
    sample = data.values.tolist()

    #i=12
    #print( "random initlization i in Brich ", i )
    birch_instance = birch(sample,
                           7,
                           branching_factor=i,
                           max_node_entries=i,
                           initial_diameter=0.1)
    birch_instance.process()
    clusters = birch_instance.get_clusters()
    value = []
    for data in range(0, data_size):
        for cluster_number in range(0, len(clusters)):
            if data in clusters[cluster_number]:
                value.append(cluster_number)
                break
    return value
Пример #15
0
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.cluster import KMeans
from sklearn.metrics import completeness_score, homogeneity_score
from sklearn.naive_bayes import GaussianNB
from sklearn import cross_validation
from pylab import figure, subplot, hist, xlim, show, plot
from numpy import genfromtxt, zeros
from sklearn.metrics import confusion_matrix
from numpy import mean
from sklearn.cross_validation import cross_val_score
import pandas as pd

actoridata = genfromtxt(
    'C:\\Users\\26087\\PycharmProjects\\untitled\\venv\\coo_times_arr.csv',
    encoding='utf-8',
    delimiter=',',
    usecols=(0, 1, 2),
    dtype=str)
print(actoridata)
sample = read_sample(actoridata)
# 使用birch算法,聚成三类,这里将类实例化,变成对象
birch_instance = birch(actoridata, 128)
# 使用对象里的方法,开始聚类
birch_instance.process()
# 获取聚类结果
clusters = birch_instance.get_clusters()
# 查看形状,可以看到长度为3,被分为三类
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, sample)
visualizer.show()
Пример #16
0
def process_birch(sample):
    instance = birch(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks
Пример #17
0
# In[26]:

from pyclustering.cluster.birch import birch
from pyclustering.cluster import cluster_visualizer
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import FAMOUS_SAMPLES

# list 형태로!
X = finalDataFrame.iloc[:, [0, 1]].values.tolist()
X

birch

for k in range(2, 10):
    birch_instance = birch(X, k, diameter=3.0)
    # Cluster analysis
    birch_instance.process()
    # Obtain results of clustering
    clusters = birch_instance.get_clusters()
    # Visualize allocated clusters
    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, X)
    print("\nk=", k)
    visualizer.show()

# In[27]:

X = finalDataFrame.iloc[:, [0, 1]].values.tolist()

for k in range(10, 20):
Пример #18
0
# an example of clustering by BIRCH algorithm.
from pyclustering.cluster.birch import birch

from pyclustering.utils import read_sample

# load data from the FCPS set that is provided by the library.
sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)

# create BIRCH algorithm for allocation three objects.
birch_instance = birch(sample, 3)

# start processing - cluster analysis of the input data.
birch_instance.process()

# allocate clusters.
clusters = birch_instance.get_clusters()

# visualize obtained clusters.
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, sample)
visualizer.show()
Пример #19
0
"""
from pyclustering.cluster.birch import birch
import os
import time
import shutil
#import subprocess
############### Configure the below variables for a change in system #################
PROJECT='/root/Desktop/ram/'
HADOOP_HOME='/user/root/in/'
#####################################################################################

PROC_DIR= PROJECT + 'PROJECT/BIRCH_PROCESS/'
HADOOP_PROC_DIR= PROJECT + 'PROJECT/HADOOP_PROCESS/'
PROCESSED_DIR= PROJECT + 'PROJECT/PROCESSED/'
LOGDIR=  PROJECT + 'PROJECT/LOGDIR/logfile'
birch1=birch([[]],10,[],initial_diameter=10)
z=0
while True :   
    while (os.listdir(PROC_DIR)==[]):
        time.sleep(30)
    for datefile in os.listdir(PROC_DIR):
        DATE=datefile
    HADOOP_PATH= HADOOP_HOME + DATE +'/'
    os.popen("touch "+HADOOP_PROC_DIR + "DONE")
    #Below loop is to check moving of files from Hadoop dir to Birch Dir is finished    
    while(os.path.isfile(PROC_DIR+datefile) == True):
        time.sleep(10)
    DONEFILE=PROC_DIR+"DONE"
    os.remove(DONEFILE)
    d1={}
    d2=os.listdir(PROC_DIR)
def process_birch(sample):
    instance = birch(sample, NUMBER_CLUSTERS)
    (ticks, _) = timedcall(instance.process)
    return ticks