def cluster(self):
        l_method = agglomerative_l_method(self.X)

        suggest_n = len(l_method.cluster_centers_)
        agg = AgglomerativeClustering(suggest_n)
        agg.fit(np.array(self.X, copy=True))
        # agg.fit(self.X)

        # agg_labels = agg.labels_
        # l_method_labels = l_method.labels_
        #
        # print('agg_labels:', agg_labels)
        # print('l_method_labels:', l_method_labels)

        # first tier clustering, using agglomerative clustering
        self.clustering_model = DividableClustering()
        self.clustering_model.fit(self.X, l_method.labels_)
Exemplo n.º 2
0
def main(argv):
    path = argv[1]
    samples = Data(path).create_samples()

    single_link = SingleLink()
    print("single link:")
    agglomerate = AgglomerativeClustering(single_link, samples)
    agglomerate.run(7)

    print("")
    complete_link = CompleteLink()
    print("complete link:")
    agglomerate = AgglomerativeClustering(complete_link, samples)
    agglomerate.run(7)
Exemplo n.º 3
0
def main(argv):
    path = argv[1]
    genes_data = Data(path)
    sample_list = genes_data.create_samples()
    single_agro_clustering = AgglomerativeClustering(SingleLink, sample_list)
    complete_agro_clustering = AgglomerativeClustering(CompleteLink,
                                                       sample_list)
    single_final_clusters = single_agro_clustering.run(int(argv[3]))
    Complete_final_clusters = complete_agro_clustering.run(int(argv[3]))
    missions_to_print = argv[2].split(", ")
    general_printer(missions_to_print, single_final_clusters,
                    Complete_final_clusters)
import pandas as pd
import sklearn.cluster as sklearn_cluster
from sklearn import datasets
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from agglomerative_clustering import AgglomerativeClustering

iris = datasets.load_iris()
X = iris.data
y = iris.target
n_clusters = len(iris.target_names)

print("\n===========================\n")

print("Agglomerative Clustering (Single) from Scratch")
y_predict = AgglomerativeClustering(pd.DataFrame(X), n_clusters,
                                    'single').fit_predict()
print(y_predict)

print('Confusion Matrix :', confusion_matrix(y, y_predict))
print('Accuracy Score :', accuracy_score(y, y_predict))

print("\n===========================\n")

print("Agglomerative Clustering (Single) SKLearn")
y_predict = sklearn_cluster.AgglomerativeClustering(
    linkage='single').fit_predict(X)
print(y_predict)

print('Confusion Matrix :', confusion_matrix(y, y_predict))
print('Accuracy Score :', accuracy_score(y, y_predict))
from agglomerative_clustering import AgglomerativeClusteringMaxMergeDist, AgglomerativeClustering
from dataset import *

dataset = get_iris()

print("dataset size:", len(dataset.X))
#
# agg = AgglomerativeClusteringMaxMergeDist()
# centroids, cluster_member_cnt = agg.fit(dataset.X, 0.2)
#
# print('grouped size:', len(centroids))

agg = AgglomerativeClustering(3)
agg.fit(dataset.X)

predict_X = agg.predict(dataset.X)
print("predict_X:", predict_X)
from dividable_clustering import DividableClustering
from agglomerative_clustering import AgglomerativeClustering
from sklearn.cluster import KMeans
from dataset import *
from sklearn.neighbors import BallTree

dataset = get_iris()

agg = AgglomerativeClustering(3)
agg.fit(dataset.X)

model = DividableClustering()
model.fit(dataset.X, agg.labels_)

print(len(model.X_by_label[0]))
print(len(model.X_by_label[1]))
print(len(model.X_by_label[2]))

kmeans = KMeans(3)
kmeans.fit(model.get_X(0))

model.split(0, kmeans.labels_)

print(len(model.X_by_label[3]))
print(len(model.X_by_label[4]))
print(len(model.X_by_label[5]))

print(model.X_by_label.keys())

model.relabel()
Exemplo n.º 7
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics.cluster import normalized_mutual_info_score

from utils import *
from datasets import *
from classifiers import *
from metrics import *

from agglomerative_clustering import AgglomerativeClustering
from dbscan import DBSCAN

X, y = read_dataset(dataset='Iris')

print("--- AgglomerativeClustering ---")
model = AgglomerativeClustering(n_clusters=3,
                                verbose=False,
                                linkage='complete',
                                distance_metric='l1')
cluster_pred = model.fit_predict(X)
print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred))
print(" normalized_mutual_info_score",
      normalized_mutual_info_score(y, cluster_pred))

print("--- DBSCAN ---")
cluster_pred = DBSCAN(eps=1, MinPts=5).fit_predict(X)
print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred))
print(" normalized_mutual_info_score",
      normalized_mutual_info_score(y, cluster_pred))