def cluster(self): l_method = agglomerative_l_method(self.X) suggest_n = len(l_method.cluster_centers_) agg = AgglomerativeClustering(suggest_n) agg.fit(np.array(self.X, copy=True)) # agg.fit(self.X) # agg_labels = agg.labels_ # l_method_labels = l_method.labels_ # # print('agg_labels:', agg_labels) # print('l_method_labels:', l_method_labels) # first tier clustering, using agglomerative clustering self.clustering_model = DividableClustering() self.clustering_model.fit(self.X, l_method.labels_)
def main(argv): path = argv[1] samples = Data(path).create_samples() single_link = SingleLink() print("single link:") agglomerate = AgglomerativeClustering(single_link, samples) agglomerate.run(7) print("") complete_link = CompleteLink() print("complete link:") agglomerate = AgglomerativeClustering(complete_link, samples) agglomerate.run(7)
def main(argv): path = argv[1] genes_data = Data(path) sample_list = genes_data.create_samples() single_agro_clustering = AgglomerativeClustering(SingleLink, sample_list) complete_agro_clustering = AgglomerativeClustering(CompleteLink, sample_list) single_final_clusters = single_agro_clustering.run(int(argv[3])) Complete_final_clusters = complete_agro_clustering.run(int(argv[3])) missions_to_print = argv[2].split(", ") general_printer(missions_to_print, single_final_clusters, Complete_final_clusters)
import pandas as pd import sklearn.cluster as sklearn_cluster from sklearn import datasets from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score from agglomerative_clustering import AgglomerativeClustering iris = datasets.load_iris() X = iris.data y = iris.target n_clusters = len(iris.target_names) print("\n===========================\n") print("Agglomerative Clustering (Single) from Scratch") y_predict = AgglomerativeClustering(pd.DataFrame(X), n_clusters, 'single').fit_predict() print(y_predict) print('Confusion Matrix :', confusion_matrix(y, y_predict)) print('Accuracy Score :', accuracy_score(y, y_predict)) print("\n===========================\n") print("Agglomerative Clustering (Single) SKLearn") y_predict = sklearn_cluster.AgglomerativeClustering( linkage='single').fit_predict(X) print(y_predict) print('Confusion Matrix :', confusion_matrix(y, y_predict)) print('Accuracy Score :', accuracy_score(y, y_predict))
from agglomerative_clustering import AgglomerativeClusteringMaxMergeDist, AgglomerativeClustering from dataset import * dataset = get_iris() print("dataset size:", len(dataset.X)) # # agg = AgglomerativeClusteringMaxMergeDist() # centroids, cluster_member_cnt = agg.fit(dataset.X, 0.2) # # print('grouped size:', len(centroids)) agg = AgglomerativeClustering(3) agg.fit(dataset.X) predict_X = agg.predict(dataset.X) print("predict_X:", predict_X)
from dividable_clustering import DividableClustering from agglomerative_clustering import AgglomerativeClustering from sklearn.cluster import KMeans from dataset import * from sklearn.neighbors import BallTree dataset = get_iris() agg = AgglomerativeClustering(3) agg.fit(dataset.X) model = DividableClustering() model.fit(dataset.X, agg.labels_) print(len(model.X_by_label[0])) print(len(model.X_by_label[1])) print(len(model.X_by_label[2])) kmeans = KMeans(3) kmeans.fit(model.get_X(0)) model.split(0, kmeans.labels_) print(len(model.X_by_label[3])) print(len(model.X_by_label[4])) print(len(model.X_by_label[5])) print(model.X_by_label.keys()) model.relabel()
import numpy as np import matplotlib.pyplot as plt from sklearn import metrics from sklearn.metrics.cluster import normalized_mutual_info_score from utils import * from datasets import * from classifiers import * from metrics import * from agglomerative_clustering import AgglomerativeClustering from dbscan import DBSCAN X, y = read_dataset(dataset='Iris') print("--- AgglomerativeClustering ---") model = AgglomerativeClustering(n_clusters=3, verbose=False, linkage='complete', distance_metric='l1') cluster_pred = model.fit_predict(X) print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred)) print(" normalized_mutual_info_score", normalized_mutual_info_score(y, cluster_pred)) print("--- DBSCAN ---") cluster_pred = DBSCAN(eps=1, MinPts=5).fit_predict(X) print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred)) print(" normalized_mutual_info_score", normalized_mutual_info_score(y, cluster_pred))