def run_k_means_elbow(params, x_data): model = KMeans() visualizer = KElbowVisualizer(model, k=(2, 40), metric='distortion') plt.figure() visualizer.fit(x_data) visualizer.set_title(params['elbow_graph']) try: path = params['path'] + params['elbow_graph'] + '.png' except: path = params['elbow_graph'] + '.png' visualizer.show(outpath=path)
def perform_elbow_method(x_scaled): """ Perform the elbow method to help us decide the number of clusters :param x_scaled: Values of our data after normalization :return: A plot of the elbow method """ # Instantiate the clustering model and visualizer mpl.rcParams['xtick.labelsize'] = 12 mpl.rcParams['ytick.labelsize'] = 12 mpl.rcParams['axes.titlesize'] = 18 mpl.rcParams['axes.labelsize'] = 14 model = KMeans() visualizer = KElbowVisualizer(model, k=(1, 12)) # Fit the data to the visualizer visualizer.fit(x_scaled) visualizer.set_title("The Elbow Method") visualizer.show()
def perform_elbow_method(points, method): """ Perform and visualize elbow method. :param points: the data's points :param method: clustering method - K means or Hierarchical :return: None """ if method == 'K means': model = KMeans() elif method == 'Hierarchical': model = AgglomerativeClustering() else: raise Exception( 'This elbow method designed only for K means and Hierarchical') visualizer = KElbowVisualizer(model, k=(1, 12)) # Fit the data to the visualizer visualizer.fit(points) visualizer.set_title("The Elbow Method") visualizer.show()
def elbow_kmeans(datasetDir, flag): all_data = datasets.load_files(datasetDir, description=None, load_content=True, encoding='utf-8', shuffle=False) #-------------------------------------------------------------- count_vectorizer = TfidfVectorizer(stop_words='english') X = count_vectorizer.fit_transform(raw_documents=all_data.data) svd = TruncatedSVD(n_components=200) normalizer = Normalizer(copy=False) lsa = make_pipeline(svd, normalizer) X = lsa.fit_transform(X) # Instantiate the clustering model and visualizer title = "Elbow calinski_harabasz Score of Kmeans clustering" kwargs = {'title': title} plot_Dir = os.path.join(os.getcwd(), "elbow for kmeans") if not (os.path.exists(plot_Dir)): os.makedirs(plot_Dir) filename = plot_Dir + "\\" model = KMeans(random_state=42) if flag == 0: suffix = "_full.png" if flag == 1: suffix = "_stemming.png" if flag == 2: suffix = "_lemmatizing.png" visualizer = KElbowVisualizer( model, k=(4,30), metric='calinski_harabasz', **kwargs ) visualizer.fit(X) # Fit the data to the visualizer visualizer.set_title(title=title) plt.savefig(filename + title + suffix) plt.close() # visualizer.show() # Finalize and render the figure # plt.savefig(filename + title + suffix) title = "Elbow silhouette Score of Kmeans clustering" kwargs = {'title': title} visualizer = KElbowVisualizer( model, k=(4,30), metric='silhouette', **kwargs ) visualizer.fit(X) # Fit the data to the visualizer visualizer.set_title(title=title) plt.savefig(filename + title + suffix) plt.close() # visualizer.show() # Finalize and render the figure # plt.savefig(filename + title + suffix) title = "Elbow distortion of Kmeans clustering" kwargs = {'title': title} visualizer = KElbowVisualizer( model, k=(4,20), metric='distortion', **kwargs ) visualizer.fit(X) # Fit the data to the visualizer visualizer.set_title(title=title) plt.savefig(filename + title + suffix) plt.close() # visualizer.show() # Finalize and render the figure # plt.savefig(filename + title + suffix) sil = [] for n_cluster in range(4, 30): model = KMeans(random_state=42, n_clusters=n_cluster).fit(X) labels = model.labels_ sil.append(silhouette_score(X, labels, metric = 'euclidean')) # model = KMeans(random_state=42, n_clusters=n_cluster) # Svisualizer = SilhouetteVisualizer(model) # Svisualizer.fit(X) # Fit the data to the visualizer # Svisualizer.poof() # Draw/show/poof the data # plt. plt.plot(list(range(4, 30)), sil) plt.grid(True) plt.savefig(filename + "sihouette" + suffix) plt.close()