def plot(self, xy = (0,1)): """ Outputs 2d embeded plot based on `pos` :param xy: specifies the dimsntions of pos to be plotted. :type xy: tuple, optional """ return plot_clusters(self.pos[:,[xy[0],xy[1]]], self.labels, clusters=self._cls)
break else: print "Replacing cluster %i with trajectory %i" % (clusterIndex, tjcIndex) means[clusterIndex] = tjcs[tjcIndex] meansIndex.append(tjcIndex) ll_old = 0 i = 0 continue else: ll_old = ll_new i += 1 print "Iteration Number: ", i # This is to plot the results of the E-M algorithm. fig2 = plt.figure() myplt.plot_clusters(clusters, tjcs, "Results after Clustering", "600x500+850+0") # Save means structre, which represents the clusters typical trayectories into # a npy file. timestr = time.strftime("%Y%m%d-%H%M%S") filename=path.join("data/"+timestr+"-clusters") np.save(filename, means) # Let's plot the found clusters. fig3 = plt.figure() myplt.plot_time_model(means, "Clusters", "600x500+200+600") # Plot cluster contribs cluster_contribs = np.sum(clusters, 0) fig4 = plt.figure() wm = plt.get_current_fig_manager()
def main(_): data_folder = FLAGS.data_folder filename = FLAGS.filename if FLAGS.task == 'metrics': output = FLAGS.output k = FLAGS.k # find top-k, mid-k and bottom-k scores, phrases = read_phrase_list(data_folder=data_folder, filename=filename) bottom = bottom_k(scores, phrases, k=k) mid = mid_k(scores, phrases, k=k) top = top_k(scores, phrases, k=k) with open(data_folder + '/' + output, 'w') as out: out.write('Bottom-k (' + str(k) + ')' + '\n') for entry in bottom: out.write(entry) out.write('\n') out.write('Mid-k (' + str(k) + ')' + '\n') for entry in mid: out.write(entry) out.write('\n') out.write('Top-k (' + str(k) + ')' + '\n') for entry in top: out.write(entry) elif FLAGS.task == 'clustering': clusters_folder = data_folder + '/' + FLAGS.cluster_folder print('Current step: Reading data set', end='\r') y_train, x_train = read_dataset(filename=filename, data_folder=data_folder) print('Current step: Performing clustering', end='\r') task = PhraseClustering() y_pred = task.run(x_train, n_clusters=FLAGS.n_clusters, distance=FLAGS.distance) # create result files print('Current step: Saving clustered phrases', end='\r') if not os.path.exists(clusters_folder): os.makedirs(clusters_folder) cluster_ids = np.unique(y_pred) files = {} for cluster in cluster_ids: out = open(clusters_folder + '/cluster' + str(cluster) + '.txt', 'w') files[cluster] = out for i, cluster in enumerate(y_pred.tolist()): files[cluster].write(y_train[i] + '\n') for _, file in files.items(): file.close() print('Current step: Saving numpy files', end='\r') save_cluster_numpy(x_train, y_pred, data_folder) elif FLAGS.task == 'plot': print('Current step: Reading data set', end='\r') y_train, x_train = read_dataset(filename=filename, data_folder=data_folder) y_pred = load_cluster_numpy(data_folder) print('Current step: Generating plots', end='\r') plot_clusters(x_train, y_pred, FLAGS.n_clusters) elif FLAGS.task == 'compare': data = read_segmentation_metrics(filename=filename, data_folder=data_folder) plot_avg_phrases_curve(data) plot_total_phrases_curve(data)
if i < iterations - 1: c_index, c_score = em.worst_cluster(clusters) t_index, t_score = em.worst_trajectory(clusters, c_index, c_score, tjcIndex, tjcs, covariance) # If a worst trajectory is not found. if t_index == -1: #break means[c_index] = -1 else: #print "Replacing cluster %i with trajectory %i" % ( c_index, t_index ) means[c_index] = tjcs[t_index] tjcIndex.append( t_index ) # This is for plotting the results. plt.title("Clustering, Iteration Number: %s" %(i+1)) myplt.plot_clusters(clusters, tjcs) plt.pause(0.01) # Save means structre, which represents the clusters typical trayectories into # a npy file. np.save("data/clusters.npy", means) # Let's plot the found clusters. fig3 = plt.figure() wm = plt.get_current_fig_manager() wm.window.wm_geometry("600x500+350+200") fi3 = plt.title("Clusters") plt.grid() plt.xticks(np.arange(0, 16, 1.0)) plt.yticks(np.arange(0, 12, 1.0)) plt.xlabel('X (mts)')
##################################################################### ##################################################################### best_clustering = get_best_clustering(working_directory) best_clustering = Clustering.from_dic(best_clustering["clustering"]) scores = [] for a_cluster in best_clustering.clusters: scores.append((score_cluster(a_cluster, m_handler.distance_matrix, all_metrics), a_cluster)) # Remember: first metric is spawning point, second metric must be energy most_negative = find_most_negative_cluster(scores) most_negative_cluster = most_negative[1] ####################################################################################################################### # Plot clusters ####################################################################################################################### plot_clusters(os.path.join(base_dir, "clusters.svg"), all_metrics, scores, scores.index(most_negative)) ####################################################################################################################### # Store the elements of the most negative cluster ####################################################################################################################### # This works because we have only one traj. Merging before should be mandatory. trajectory_path = os.path.join(os.getcwd(), traj_pdb) in_handler = open(trajectory_path,"r") most_negative_path = os.path.join(base_dir,"most_negative_cluster.pdb") out_handler = open(most_negative_path,"w") extract_frames_from_trajectory_sequentially(file_handler_in = in_handler, number_of_frames = get_number_of_frames(trajectory_path), file_handler_out = out_handler, frames_to_save = most_negative_cluster.all_elements, keep_header = True, write_frame_number_instead_of_correlative_model_number = True)