def plot_figures(som): #BMU map vhts = BmuHitsView(12, 12, "Hits Map", text_size=12) #U matrix u = sompy.umatrix.UMatrixView(50, 50, 'umatrix', show_axis=True, text_size=8, show_text=True) UMAT = u.build_u_matrix(sm, distance=1, row_normalized=False) #Cluster map sm.cluster(6) hits = HitMapView(10, 10, "Clustering", text_size=12) #Show factor influence view2D = View2D(15, 15, "time-series", text_size=10, names=names) #Show plots view2D.show(sm, col_sz=4, which_dim="all", denormalize=True) vhts.show(sm, anotate=True, onlyzeros=False, labelsize=12, cmap="Greys", logaritmic=False) UMAT = u.show(sm, distance2=1, row_normalized=False, show_data=True, contooor=True, blob=False) a = hits.show(sm) plt.show()
def som_kmeans_clustering_predict(som, k): # This performed K-means clustering with k clusters on the SOM grid to PREDICT clusters #[labels, km, norm_data] = som.cluster(K,K_opt) map_labels = som.cluster(n_clusters=k) data_labels = np.array([map_labels[int(k)] for k in som._bmu[0]]) hits = HitMapView(20, 20, "Clustering", text_size=12) a = hits.show(som) return som, map_labels
def plot_clusters(nb_clusters, sm): sm.cluster(nb_clusters) hits = HitMapView(12, 12, "Clustering", text_size=10, cmap=plt.cm.jet) a = hits.show(sm, anotate=True, onlyzeros=False, labelsize=7, cmap="Pastel1") plt.show()
view2D = View2D(10,10,"", text_size=7) view2D.show(sm, col_sz=5, what = 'codebook',)#which_dim="all", denormalize=True) plt.show() # Number of people in each neuron from sompy.visualization.bmuhits import BmuHitsView vhts = BmuHitsView(12,12,"Hits Map",text_size=7) vhts.show(sm, anotate=True, onlyzeros=False, labelsize=10, cmap="autumn", logaritmic=False) # Apply k- means over SOM # K-Means Clustering from sompy.visualization.hitmap import HitMapView sm.cluster(3)# <n_clusters> hits = HitMapView(10,10,"Clustering",text_size=7) a=hits.show(sm, labelsize=12) #Apply hierarchical clustering on the top of SOM #k means - hierarchical # Apply first k means with lots of centroids (huge number!!) # Then, apply hierarchical clustering over the k means centroids # SOM - K-means? # SOM: is less sensitive to outliers # SOM - hierarchical # Same as we do with k means - hierarchical
# 14.1.1.1 Silhouette scores # Average silhouette score silhouette_avg_som_hc_cons = silhouette_score(std_cons, som_hc_cons['Hierarchical Clustering'].values) # Silhouette scores individual to each observation sample_silhouette_som_hc_cons = pd.DataFrame( silhouette_samples(std_cons, som_hc_cons['Hierarchical Clustering'].values), columns=['Value']) # Number of positives silhouette scores pos_sample_hc_cons = sample_silhouette_som_hc_cons[sample_silhouette_som_hc_cons.Value > 0].count() # 14.1.2 K-Means Clustering on top of SOM # Visualize to which of the k cluster from the k-means belongs each neuron k = 3 som_kmeans_cons = sm_consump.cluster(k) hits = HitMapView(10, 10, "Clustering", text_size=7) a = hits.show(sm_consump) # 'som_kmeans_cons' is a dataframe with a column 'K-means' that specifies to which cluster belongs each client som_kmeans_cons = pd.DataFrame(som_kmeans_cons, columns=['K_means']) som_kmeans_cons['Labels'] = range(mapsize_consump * mapsize_consump) som_kmeans_cons = final_clusters_consump.merge(som_kmeans_cons, how='inner', on='Labels', right_index=True) som_kmeans_cons = som_kmeans_cons.sort_index() # Verify the number of observations associated of each cluster and the cluster centroids coordinates count_obs_som_kmeans_cons = som_kmeans_cons.groupby('K_means').count() centroids_som_kmeans_cons = som_kmeans_cons.groupby('K_means').mean() centroids_som_kmeans_cons = centroids_som_kmeans_cons.drop(columns='Labels') # 14.1.2.1 silhouette scores # Average silhouette score silhouette_avg_som_k_means_cons = silhouette_score(std_cons, som_kmeans_cons['K_means'].values)
center, radius = [0.5, 0.5], 0.5 verts = np.vstack([np.sin(theta), np.cos(theta)]).T circle = mpath.Path(verts * radius + center) axes2.set_boundary(circle, transform=axes2.transAxes) plt.savefig(os.path.join(output_path, 'Map_AMET_E_ERAI.jpg'), dpi=400) print '*******************************************************************' print '*************** K-means clustering *****************' print '*******************************************************************' # determine how many groups we want to divide # this is closely relate to physical aspect of view # for instance, if there are 4 weather regimes in the Arctic # then it is better to make 4 group cl = som.cluster(n_clusters=4) setattr(som, 'cluster_labels', [0, 1, 2, 3]) hits = HitMapView(5, 7, 'Weather regimes clustering', text_size=12) hits.show(som) hits.save(os.path.join(output_path, 'K_AMET_E_ERAI.jpg'), dpi=400) print '*******************************************************************' print '*************** U matrix visualization *****************' print '*******************************************************************' u = sompy.umatrix.UMatrixView(5, 7, 'U-Matrix of SLP', show_axis=True, text_size=12, show_text=True) # U matrxi value UMat = u.build_u_matrix(som, distance=1, row_normalized=False) # visualization UMat = u.show(som, distance2=1,
view2D = View2D(10, 10, "rand data", text_size=12) view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True) # U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=10, height=10, title='U-matrix') umat.show(sm) # do the K-means clustering on the SOM grid, sweep across k = 2 to 20 from sompy.visualization.hitmap import HitMapView K = 20 # stop at this k for SSE sweep K_opt = 18 # optimal K already found [labels, km, norm_data] = sm.cluster(K, K_opt) hits = HitMapView(20, 20, "Clustering", text_size=12) a = hits.show(sm) import gmplot gmap = gmplot.GoogleMapPlotter(54.2, -124.875224, 6) j = 0 for i in km.cluster_centers_: gmap.marker(i[0], i[1], 'red', title="Centroid " + str(j)) j += 1 gmap.draw("centroids_map.html") from bs4 import BeautifulSoup def insertapikey(fname, apikey):
from sompy.visualization.mapview import View2D view2D = View2D(4,4,"rand data",text_size=16) view2D.show(som, col_sz=2, which_dim="all", desnormalize=True) # U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=10,height=10,title='U-matrix') umat.show(som) from sompy.visualization.hitmap import HitMapView K=10 Kluster = som.cluster(K) hits = HitMapView(20,20,"K-Means Clustering",text_size=16) a=hits.show(som) # som.cluster(n_clusters=K) #som.cluster() returns the k-means cluster labels for each neuron of the map, #but it is straightforward to retrieve the cluster labels for the whole training set, #by assigning them the label of the BMUs (best-matching units). You can can do for example: #Make sure indices line up.... map_labels = som.cluster(n_clusters=K) # som._bmu[0] data_labels = np.array([map_labels[int(k)] for k in som._bmu[0]]) clusters = pd.Series(data_labels) clusters = clusters.rename('cluster').to_frame() #concat cluster column with small original som input df df_labeled = None
def draw_cluster_map(self): from sompy.visualization.hitmap import HitMapView hits = HitMapView(20, 20, "Clustering", text_size=12) hits.show(self.sm) plt.show()
umat = UMatrixView(width=20, height=20, title='U-matrix') umat.show(som) from sompy.visualization.hitmap import HitMapView from sompy.visualization.bmuhits import BmuHitsView bmuhitsview = BmuHitsView(12, 12, 'Data per node', text_size=24) bmuhitsview.show(som, anotate=False, onlyzeros=False, labelsize=7, logaritmic=False) Kluster = som.cluster(5) hits = HitMapView(20, 20, "K-Means Clustering", text_size=16) a = hits.show(som, anotate=False, labelsize=7, cmap='viridis') def HowManyK(k): '''compute SSE for up to k clusters''' SSE = np.empty(0) K = np.arange(2, k) for i in K: totalERROR = 0 map_labels = som.cluster( n_clusters=i) # will eventually return more than labels.... data_labels = np.array([ map_labels[int(x)] for x in som._bmu[0] ]) # mapping labels from size of grid to total size of df clusters = pd.Series(data_labels)