def clusterRegularVerbs_AgglomerativeClustering(): from sklearn.cluster import AgglomerativeClustering from sklearn import metrics from sklearn.datasets.samples_generator import make_blobs from sklearn.preprocessing import StandardScaler from numpy import genfromtxt my_data = genfromtxt('FeatureMatrix_number.csv', delimiter=',', skip_header=1) clustering = AgglomerativeClustering(linkage='complete', n_clusters=11) clustering.fit(my_data) params = clustering.get_params(deep=True) return clustering
def perform_hierarchial(self, no_clusters, params={}): hierarchial_clusterer = AgglomerativeClustering(n_clusters=no_clusters, **params) hierarchial_clusterer.fit(self.data, hdf5_file=self.hdf5_file) self.hierarchial_results = { "parameters": hierarchial_clusterer.get_params(), "labels": hierarchial_clusterer.labels_, "n_clusters": no_clusters, 'clusters': label_cnt_dict(hierarchial_clusterer.labels_) } print_dict(self.hierarchial_results)
def baseline_ac(data_matrix, k, plot_params={'plotit':True, 'x': 'latitude', 'y': 'longitude', 'show': True}): # Baseline AC clusters using banned library model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='average') labels = model.fit_predict(data_matrix) print('Model Params: ') print( model.get_params() ) if plot_params['plotit']: x = plot_params['x'] y = plot_params['y'] plot(data_matrix, labels, title=plot_params['title'], show=plot_params['show'], xlabel=x, ylabel=y ) return
affinities = ('cosine', ) linkages = ('complete', 'average') n_clusterss = (5, 10, 20, 30) tau = 0.9 methods = list() if algo == "agglomerative": # agglomerative for affinity, linkage, n_clusters in itertools.product( affinities, linkages, n_clusterss): agg = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity, linkage=linkage) params = agg.get_params() params.pop('memory') params.pop('pooling_func') info = { 'name': 'Agglomerative Clustering', "rep": rep, "rep_params": rep_params, 'params': params } methods.append((agg, info)) elif algo == "kmeans" or algo == "k-means": # k-means for n_clusters in n_clusterss: km = KMeans(n_clusters=n_clusters, n_jobs=-1,
def _agglomerative_clustering(table, input_cols, prediction_col='prediction', linkage='ward', affinity='euclidean', n_clusters=2, compute_full_tree_auto=True, compute_full_tree=None): feature_names, inputarr = check_col_type(table, input_cols) _compute_full_tree = 'auto' if compute_full_tree_auto else compute_full_tree _affinity = 'euclidean' if linkage == 'ward' else affinity ac = AgglomerativeClustering(linkage=linkage, affinity=_affinity, n_clusters=n_clusters, compute_full_tree=_compute_full_tree) ac.fit(inputarr) label_name = { 'linkage': 'Linkage', 'affinity': 'Affinity', 'n_clusters': 'N Clusters', 'compute_full_tree': 'Compute Full Tree' } get_param = ac.get_params() param_table = pd.DataFrame.from_items( [['Parameter', list(label_name.values())], ['Value', [get_param[x] for x in list(label_name.keys())]]]) labels = ac.labels_ colors = cm.nipy_spectral(np.arange(n_clusters).astype(float) / n_clusters) if len(feature_names) > 1: pca2_model = PCA(n_components=2).fit(inputarr) pca2 = pca2_model.transform(inputarr) fig_samples = _agglomerative_clustering_samples_plot( labels, table, input_cols, 100, n_clusters, colors) if len( table.index) > 100 else _agglomerative_clustering_samples_plot( labels, table, input_cols, None, n_clusters, colors) if len(feature_names) > 1: fig_pca = _agglomerative_clustering_pca_plot(labels, pca2_model, pca2, n_clusters, colors) rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Spectral Clustering Result | ### Samples | {fig_samples} | {fig_pca} | ### Parameters | {params} """.format(fig_pca=fig_pca, fig_samples=fig_samples, params=pandasDF2MD(param_table)))) else: rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | ## Mean Shift Result | - Samples | {fig_samples} | ### Parameters | {params} """.format(fig_samples=fig_samples, params=pandasDF2MD(param_table)))) model = _model_dict('agglomerative_clustering') model['model'] = ac model['input_cols'] = input_cols model['_repr_brtc_'] = rb.get() out_table = table.copy() out_table[prediction_col] = labels return {'out_table': out_table, 'model': model}
from sklearn.cluster import AgglomerativeClustering import numpy as np data = np.loadtxt(r'sp3combined.test.txt', delimiter=',', skiprows=19995, usecols=(4, 5, 6, 7, 8, 9, 10)) for d in data: estimator = AgglomerativeClustering(n_clusters=2) estimator.fit_predict(d) estimator.get_params(d)
# ======================================================================= # data # ======================================================================= X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) X_test = np.array([[1, 3], [2, 4]]) # ======================================================================= # clustering # ======================================================================= clustering = AgglomerativeClustering() clustering.fit(X) print(clustering) print(clustering.get_params()) print(clustering.labels_) print(clustering.n_leaves_) print(clustering.n_components_) print(clustering.children_) predictions = clustering.fit_predict(X_test) print(predictions)