:param topics: array, [n_tokens] """ top_features = topic.argsort()[::-1] return [terms[ind] for ind in top_features[:n_tokens]] print("Top 5-term topics per cluster:") km_centroids = km.cluster_centers_ terms = vectorizer.get_feature_names() def get_features_in_classifier(classfier_data, nodes, topics_data, terms): sorted_centroids = classfier_data.argsort()[:, ::-1] for i in range(nodes): print("Cluster %d:" % i) top_topics = sorted_centroids[i, :4] for topic_id in top_topics: tokens = get_tokens_per_topic(topics_data[topic_id, :], terms, 15) print('Topic:%d:\t' % (topic_id) + ','.join(tokens)) print() get_features_in_classifier(km_centroids, true_k, Y, terms) ############################################################################### # Perform SOM clustering from som.som import SOMMapper som = SOMMapper(kshape=(5, 5), n_iter=300, learning_rate=0.005) kohonen = som.fit_transform(Y) get_features_in_classifier(kohonen, som.n_nodes, Y, terms)
__author__ = 'husser' import numpy as np from matplotlib import pyplot as plt from sklearn.feature_extraction.image import grid_to_graph from sklearn.cluster import AgglomerativeClustering from som.som import SOMMapper, build_U_matrix kshape_test = (30, 20) n_iter_test = 300 learning_rate_test = 0.005 n_colors = 200 spcolors = np.random.rand(n_colors, 3) mapper = SOMMapper(kshape=kshape_test, n_iter=n_iter_test, learning_rate=learning_rate_test) kohonen = mapper.fit_transform(spcolors) U_Matrix = build_U_matrix(kohonen, kshape_test, topology="rect") fig = plt.figure() ax1 = fig.add_subplot(121) ax1.imshow(np.split(kohonen, kshape_test[0], axis=0)) ax1.set_title("Kohonen Map") # Clustering n_clusters = 8 # number of regions connectivity = grid_to_graph(kshape_test[0], kshape_test[1]) ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward', connectivity=connectivity).fit(kohonen) label = np.reshape(ward.labels_, kshape_test)