def do_statistics(options): stats_fn = (options.output_dir + "/" + options.identifier + "/" + options.identifier + ".stats") predicted_labels = load_labels(options) data,data_time,true_labels = experimenter.load_data(options) out = [ "Statistics for result set: " + options.identifier] out.append("============================================================") num_particles = predicted_labels.shape[0] # Descriptive statistics num_predicted = predicted_labels.shape[1] num_original = true_labels.shape[0] unique_true = unique(true_labels) unique_predicted = zeros(num_particles,dtype=int32) for i in range(num_particles): unique_predicted[i] = unique(predicted_labels[i,:]).shape[0] out.append("") out.append("Data set length (processed/total): " + str(num_predicted) + "/" + str(num_original)) out.append("Number of particles: " +str(num_particles)) out.append("") out.append("Number of clusters") out.append("------------------") out.append(descriptive2str(get_descriptive(unique_predicted))) out.append("") out.append("Label Entropy") out.append("------------------") ent = compute_label_entropy(predicted_labels) out.append(descriptive2str(get_descriptive(ent))) # Rand indices rand_indices = zeros((4,num_particles)) for i in range(num_particles): ind = compute_rand_index(predicted_labels[i,:],true_labels) rand_indices[:,i] = ind out.append("") out.append("Rand indices") out.append("------------") out.append("Adjusted: ") out.append(descriptive2str(get_descriptive(rand_indices[0,:]))) out.append("Unadjusted: ") out.append(descriptive2str(get_descriptive(rand_indices[1,:]))) # Variation of information vi = zeros(num_particles) for i in range(num_particles): vi[i] = variation_of_information(predicted_labels[i,:],true_labels) out.append("") out.append("Variation of Information") out.append("------------------------") out.append(descriptive2str(get_descriptive(vi))) out.append("MAP: VI: %.4f; Rand: %.4f" % (vi[0], rand_indices[0,0])) if options.binary_label: tp = zeros(num_particles) fp = zeros(num_particles) tn = zeros(num_particles) fn = zeros(num_particles) rpvs = zeros(num_particles) for l in range(num_particles): labels = predicted_labels[l,:] match = find_best_match(labels,true_labels) times = data_time[labels==match] rpvs[l]= sum(times[1:]-times[:-1]<2) tp[l] = sum(logical_and(labels==match,true_labels==1)) fp[l] = sum(logical_and(labels==match,true_labels!=1)) tn[l] = sum(logical_and(labels!=match,true_labels!=1)) fn[l] = sum(logical_and(labels!=match,true_labels==1)) precision = tp / (tp+fp) recall = tp / (tp+fn) fscore = 2*precision*recall/(precision + recall) accuracy = (tp + tn)/(tp + fp + tn + fn) out.append("\nBinary label") out.append("------------") out.append("Precision: ") out.append(descriptive2str(get_descriptive(precision))) out.append("Recall: ") out.append(descriptive2str(get_descriptive(recall))) out.append("Fscore: ") out.append(descriptive2str(get_descriptive(fscore))) out.append("FP %: ") out.append(descriptive2str(get_descriptive(fp/labels.shape[0]))) out.append("FN %: ") out.append(descriptive2str(get_descriptive(fn/(fn + tp)))) out.append("RPVs: ") out.append(descriptive2str(get_descriptive(rpvs))) out.append("MAP: FP: %.4f; FN: %.4f; FScore: %.4f; RPV: %i" % (fp[0]/labels.shape[0], fn[0]/(fn[0] + tp[0]),fscore[0],rpvs[0])) outstr = '\n'.join(out) if not options.quiet: print outstr else: print (options.identifier + ": VI: %.2f (%.1f)" % (mean(vi),mean(unique_predicted))) outfile = open(stats_fn,"w") outfile.write(outstr) outfile.close()
def do_plotting(options): print "Generating Plots ..." particle_id = options.use_particle ext = "." + options.output_format plot_dir = options.output_dir + "/" + options.identifier + "/plots" if not exists(plot_dir): mkdir(plot_dir) data,data_time,true_labels = experimenter.load_data(options) if options.true_labels: predicted_labels = true_labels predicted_labels.shape = (1,predicted_labels.shape[0]) ext = "_true" + ext else: predicted_labels = load_labels(options) s_data,s_data_time,s_predicted_labels,idx = subsample( data,data_time,predicted_labels,options) T = data_time.shape[0] ess = load_ess(options) # Labeled 2D scatter plot of the first two PCs clf() plotting.plot_scatter_2d(s_data[0:2,:],s_predicted_labels[particle_id,:]) grid() savefig(plot_dir + "/" + "scatter_predicted" + ext) # 2D scatter plot with entropy heatmap clf() #ent = compute_label_entropy(s_predicted_labels) ent = ess[2,idx] certain = ent==0 uncertain = logical_not(certain) if sum(certain)>0: scatter(s_data[0,certain],s_data[1,certain],10,marker="s",facecolors="none", linewidth=0.3) if sum(uncertain)>0: scatter(s_data[0,uncertain],s_data[1,uncertain],10,ent[uncertain], linewidth=0.3,cmap=cm.hot) grid() title("Label Entropy") savefig(plot_dir + "/" + "scatter_entropy" + ext) # Label entropy vs. time clf() ent = compute_label_entropy(predicted_labels) #subplot(2,1,1) axes([0.25, 0.2, 0.7, 0.7]) plot(data_time,ent,'x',linewidth=1.5) #title("Label Entropy") #ylabel("Entropy") xlabel("Time") axis([-1,max(data_time)+2,-0.1,1.1]) grid() #subplot(2,1,2) #plot(data_time,ess[2,:],'x',linewidth=1.5) #ylabel("Entropy") #xlabel("Time (ms)") #axis([0,60000,0,1]) #title("Average Label Filtering Distribution Entropy (SMC)") #grid() F = gcf() F.set_size_inches(2,2) savefig(plot_dir + "/" + "entropy" + ext) clf() subplot(2,1,1) #axes([0.25, 0.2, 0.7, 0.7]) plot(data_time,ent,linewidth=0.5) title("Label Entropy (M-H sampler)") ylabel("Entropy") #xlabel("Time") axis([0,60000,-0.1,1.1]) grid() subplot(2,1,2) plot(data_time,ess[2,:],linewidth=0.5) ylabel("Entropy") xlabel("Time (ms)") axis([0,60000,-0.1,1.1]) title("Average Label Filtering Distribution Entropy (SMC)") grid() F = gcf() F.set_size_inches(6,4) savefig(plot_dir + "/" + "entropy_both" + ext) # 2D scatter plot of PCs against time with predicted labels (1st particle) clf() plotting.plot_pcs_against_time_labeled(s_data,s_data_time, s_predicted_labels[particle_id,:]) F = gcf() F.set_size_inches(8.3,4*data.shape[0]) savefig(plot_dir + "/" + "pcs_vs_time_predicted" + ext) # 2D scatter plot of PCs against time for RPV candidates clf() isi = data_time[1:]-data_time[:-1] rpvs = where(isi < 2)[0] + 1 rpvs = hstack((rpvs,rpvs-1)) if rpvs.shape[0] > 0: plotting.plot_pcs_against_time_labeled(data[:,rpvs],data_time[rpvs], predicted_labels[particle_id,rpvs]) F = gcf() F.set_size_inches(8.3,4*data.shape[0]) savefig(plot_dir + "/" + "pcs_vs_time_rpv" + ext) # 2D scatter plot with binary labels if options.binary_label: clf() match = find_best_match(predicted_labels[particle_id,:],true_labels) matches = (predicted_labels[particle_id,:]==match)[idx] non_matches = (predicted_labels[particle_id,:]!=match)[idx] subplot(1,2,1) plot(s_data[0,matches],s_data[1,matches],'x') plot(s_data[0,non_matches],s_data[1,non_matches],'.') grid() title("Predicted Labels") axis([-5,5,-5,5]) subplot(1,2,2) plot(s_data[0,true_labels[idx]==1],s_data[1,true_labels[idx]==1],'x') plot(s_data[0,true_labels[idx]!=1],s_data[1,true_labels[idx]!=1],'.') axis([-5,5,-5,5]) grid() title("True Labels") F = gcf() F.set_size_inches(6,3) savefig(plot_dir + "/" + "scatter_binary" + ext) # plot of effective sample size clf() subplot(2,1,1) plot(ess[1,:],linewidth=0.3) title('Unique Particles') ylabel("Unique Particles") axis([0,650,0,1100]) grid() xlabel("Time Step") subplot(2,1,2) plot(ess[0,:],linewidth=0.3) axis([0,650,0,1100]) title("Effective Sample Size") xlabel("Time Step") ylabel("ESS") grid() F = gcf() F.set_size_inches(6,4) savefig(plot_dir + "/" + "ess" + ext) # ISI histogram for each neuron clf() l = predicted_labels[particle_id,:] unique_labels = unique(l) for i in range(unique_labels.shape[0]): c = unique_labels[i] points = data[:,l==c] times = data_time[l==c] isi = times[1:] - times[0:-1] subplot(unique_labels.shape[0],2,2*i+1) label_colors = array(unique_labels,dtype=float64)/max(unique_labels+1) colors = ones(sum(l==c))*label_colors[i] scatter(points[0,:],points[1,:],marker=plotting.markers[c],c=colors, cmap=matplotlib.cm.jet, norm=no_norm(), linewidths=(0.3,)) title("Cluster %i (weight=%.2f)" % (c,sum(l==c)/float(l.shape[0]))) grid() axis([-5,5,-5,5]) subplot(unique_labels.shape[0],2,2*i+2) hist(isi,bins=100,range=(0,100),normed=True,facecolor='k') xx = arange(2,100,0.1) rate = 1/mean(isi-2) plot(xx,rate*exp(-rate*(xx-2))) title("ISI (mean = %.2f)" % mean(isi)) F = gcf() F.set_size_inches(8.3,2*unique_labels.shape[0]) savefig(plot_dir + "/" + "isi" + ext) particle = experimenter.load_particle(options) if particle != None: ### plots requiring the information from at least one particle clf() # plot of clusters + data at fixed, equally spaced time points num_plots = 9 timepoints = array(arange(1,num_plots+1)*(T-1)/(float(num_plots)),dtype=int32) for i in range(num_plots): subplot(3,3,i+1) t = timepoints[i] plotting.plot_state_with_data(particle,data,data_time,t) title("t = " + str(t)) grid() F.set_size_inches(6,6) savefig(plot_dir + "/" + "cluster_evolution" + ext) # plot of cluster means and variances over time clf() plotting.plot_pcs_against_time_labeled_with_particle( data,data_time,predicted_labels[0,:],particle) F.set_size_inches(40,8*data.shape[0]) savefig(plot_dir + "/" + "clusters_vs_time" + ext) # plot of mstore for each clusters clf() plotting.plot_mstore_against_time(particle) savefig(plot_dir + "/" + "mstore" + ext)