示例#1
0
def do_statistics(options):
    stats_fn = (options.output_dir + "/" + options.identifier
                + "/" + options.identifier + ".stats")
    predicted_labels = load_labels(options)
    data,data_time,true_labels = experimenter.load_data(options)
    out = [    "Statistics for result set: " + options.identifier]
    out.append("============================================================")
    num_particles = predicted_labels.shape[0]
    # Descriptive statistics
    num_predicted = predicted_labels.shape[1]
    num_original = true_labels.shape[0]
    unique_true = unique(true_labels)
    unique_predicted = zeros(num_particles,dtype=int32)
    for i in range(num_particles):
        unique_predicted[i] = unique(predicted_labels[i,:]).shape[0]
    out.append("")
    out.append("Data set length (processed/total): " + str(num_predicted) +
               "/" + str(num_original))
    out.append("Number of particles: " +str(num_particles))
    out.append("")
    out.append("Number of clusters")
    out.append("------------------")
    out.append(descriptive2str(get_descriptive(unique_predicted))) 
    
    out.append("")
    out.append("Label Entropy")
    out.append("------------------")
    ent = compute_label_entropy(predicted_labels)
    out.append(descriptive2str(get_descriptive(ent))) 

    # Rand indices
    rand_indices = zeros((4,num_particles))
    for i in range(num_particles):
        ind = compute_rand_index(predicted_labels[i,:],true_labels)
        rand_indices[:,i] = ind
    out.append("")
    out.append("Rand indices")
    out.append("------------")
    out.append("Adjusted: ")
    out.append(descriptive2str(get_descriptive(rand_indices[0,:]))) 
    out.append("Unadjusted: ")
    out.append(descriptive2str(get_descriptive(rand_indices[1,:]))) 
    
    # Variation of information
    vi = zeros(num_particles)
    for i in range(num_particles):
        vi[i] = variation_of_information(predicted_labels[i,:],true_labels)
    out.append("")
    out.append("Variation of Information")
    out.append("------------------------")
    out.append(descriptive2str(get_descriptive(vi))) 
    out.append("MAP: VI: %.4f; Rand: %.4f" % (vi[0], rand_indices[0,0]))
    
    if options.binary_label:
        tp = zeros(num_particles)
        fp = zeros(num_particles)
        tn = zeros(num_particles)
        fn = zeros(num_particles)
        rpvs = zeros(num_particles)
        for l in range(num_particles):
            labels = predicted_labels[l,:]
            match = find_best_match(labels,true_labels)
            times = data_time[labels==match]
            rpvs[l]= sum(times[1:]-times[:-1]<2)
            tp[l] = sum(logical_and(labels==match,true_labels==1))
            fp[l] = sum(logical_and(labels==match,true_labels!=1))
            tn[l] = sum(logical_and(labels!=match,true_labels!=1))
            fn[l] = sum(logical_and(labels!=match,true_labels==1))
        precision = tp / (tp+fp)
        recall = tp / (tp+fn)
        fscore = 2*precision*recall/(precision + recall)
        accuracy = (tp + tn)/(tp + fp + tn + fn)
        out.append("\nBinary label")
        out.append("------------")
        out.append("Precision: ")
        out.append(descriptive2str(get_descriptive(precision))) 
        out.append("Recall: ")
        out.append(descriptive2str(get_descriptive(recall))) 
        out.append("Fscore: ")
        out.append(descriptive2str(get_descriptive(fscore))) 
        out.append("FP %: ")
        out.append(descriptive2str(get_descriptive(fp/labels.shape[0]))) 
        out.append("FN %: ")
        out.append(descriptive2str(get_descriptive(fn/(fn + tp)))) 
        out.append("RPVs: ")
        out.append(descriptive2str(get_descriptive(rpvs))) 
        out.append("MAP: FP: %.4f; FN: %.4f; FScore: %.4f; RPV: %i" % (fp[0]/labels.shape[0], fn[0]/(fn[0] + tp[0]),fscore[0],rpvs[0]))

    outstr = '\n'.join(out)
    if not options.quiet:
        print outstr
    else:
        print (options.identifier + ": VI: %.2f (%.1f)" % (mean(vi),mean(unique_predicted)))
    outfile = open(stats_fn,"w")
    outfile.write(outstr)
    outfile.close()
示例#2
0
def do_plotting(options):
    print "Generating Plots ..."
    particle_id = options.use_particle
    ext = "." + options.output_format
    plot_dir = options.output_dir + "/" + options.identifier + "/plots"
    if not exists(plot_dir):
        mkdir(plot_dir)
    data,data_time,true_labels = experimenter.load_data(options)
    if options.true_labels:
        predicted_labels = true_labels
        predicted_labels.shape = (1,predicted_labels.shape[0])
        ext = "_true" + ext
    else:
        predicted_labels = load_labels(options)

    s_data,s_data_time,s_predicted_labels,idx = subsample(
            data,data_time,predicted_labels,options)
    T = data_time.shape[0]
    ess = load_ess(options)
    
    # Labeled 2D scatter plot of the first two PCs
    clf()
    plotting.plot_scatter_2d(s_data[0:2,:],s_predicted_labels[particle_id,:])
    grid()
    savefig(plot_dir + "/" + "scatter_predicted" + ext)

    # 2D scatter plot with entropy heatmap
    clf()
    #ent = compute_label_entropy(s_predicted_labels)
    ent = ess[2,idx]
    certain = ent==0
    uncertain = logical_not(certain)
    if sum(certain)>0:
        scatter(s_data[0,certain],s_data[1,certain],10,marker="s",facecolors="none",
                linewidth=0.3)
    if sum(uncertain)>0:
        scatter(s_data[0,uncertain],s_data[1,uncertain],10,ent[uncertain],
                linewidth=0.3,cmap=cm.hot)
    grid()
    title("Label Entropy")
    savefig(plot_dir + "/" + "scatter_entropy" + ext)

    # Label entropy vs. time
    clf()
    ent = compute_label_entropy(predicted_labels)
    #subplot(2,1,1)
    axes([0.25, 0.2, 0.7, 0.7])
    plot(data_time,ent,'x',linewidth=1.5)
    #title("Label Entropy")
    #ylabel("Entropy")
    xlabel("Time")
    axis([-1,max(data_time)+2,-0.1,1.1])
    grid()
    #subplot(2,1,2)
    #plot(data_time,ess[2,:],'x',linewidth=1.5)
    #ylabel("Entropy")
    #xlabel("Time (ms)")
    #axis([0,60000,0,1])
    #title("Average Label Filtering Distribution Entropy (SMC)")
    #grid()
    F = gcf()
    F.set_size_inches(2,2)
    savefig(plot_dir + "/" + "entropy" + ext)
    
    clf()
    subplot(2,1,1)
    #axes([0.25, 0.2, 0.7, 0.7])
    plot(data_time,ent,linewidth=0.5)
    title("Label Entropy (M-H sampler)")
    ylabel("Entropy")
    #xlabel("Time")
    axis([0,60000,-0.1,1.1])
    grid()
    subplot(2,1,2)
    plot(data_time,ess[2,:],linewidth=0.5)
    ylabel("Entropy")
    xlabel("Time (ms)")
    axis([0,60000,-0.1,1.1])
    title("Average Label Filtering Distribution Entropy (SMC)")
    grid()
    F = gcf()
    F.set_size_inches(6,4)
    savefig(plot_dir + "/" + "entropy_both" + ext)


    # 2D scatter plot of PCs against time with predicted labels (1st particle)
    clf()
    plotting.plot_pcs_against_time_labeled(s_data,s_data_time,
            s_predicted_labels[particle_id,:])
    F = gcf()
    F.set_size_inches(8.3,4*data.shape[0])
    savefig(plot_dir + "/" + "pcs_vs_time_predicted" + ext)

    # 2D scatter plot of PCs against time for RPV candidates
    clf()
    isi = data_time[1:]-data_time[:-1]
    rpvs = where(isi < 2)[0] + 1
    rpvs = hstack((rpvs,rpvs-1))
    if rpvs.shape[0] > 0:
        plotting.plot_pcs_against_time_labeled(data[:,rpvs],data_time[rpvs],
                predicted_labels[particle_id,rpvs])
        F = gcf()
        F.set_size_inches(8.3,4*data.shape[0])
        savefig(plot_dir + "/" + "pcs_vs_time_rpv" + ext)


    # 2D scatter plot with binary labels
    if options.binary_label:
        clf()
        match = find_best_match(predicted_labels[particle_id,:],true_labels)
        matches = (predicted_labels[particle_id,:]==match)[idx]
        non_matches = (predicted_labels[particle_id,:]!=match)[idx]
        subplot(1,2,1)
        plot(s_data[0,matches],s_data[1,matches],'x')
        plot(s_data[0,non_matches],s_data[1,non_matches],'.')
        grid()
        title("Predicted Labels")
        axis([-5,5,-5,5])
        subplot(1,2,2)
        plot(s_data[0,true_labels[idx]==1],s_data[1,true_labels[idx]==1],'x')
        plot(s_data[0,true_labels[idx]!=1],s_data[1,true_labels[idx]!=1],'.')
        axis([-5,5,-5,5])
        grid()
        title("True Labels")
        F = gcf()
        F.set_size_inches(6,3)
        savefig(plot_dir + "/" + "scatter_binary" + ext)

    # plot of effective sample size
    clf()
    subplot(2,1,1)
    plot(ess[1,:],linewidth=0.3)
    title('Unique Particles')
    ylabel("Unique Particles")
    axis([0,650,0,1100])
    grid()
    xlabel("Time Step")
    subplot(2,1,2)
    plot(ess[0,:],linewidth=0.3)
    axis([0,650,0,1100])
    title("Effective Sample Size")
    xlabel("Time Step")
    ylabel("ESS")
    grid()
    F = gcf()
    F.set_size_inches(6,4)
    savefig(plot_dir + "/" + "ess" + ext)

    # ISI histogram for each neuron
    clf()
    l = predicted_labels[particle_id,:]
    unique_labels = unique(l)
    for i in range(unique_labels.shape[0]):
        c = unique_labels[i]
        points = data[:,l==c]
        times = data_time[l==c]
        isi = times[1:] - times[0:-1]
        subplot(unique_labels.shape[0],2,2*i+1)
        label_colors = array(unique_labels,dtype=float64)/max(unique_labels+1)
        colors = ones(sum(l==c))*label_colors[i]
        scatter(points[0,:],points[1,:],marker=plotting.markers[c],c=colors,
                cmap=matplotlib.cm.jet,
                norm=no_norm(),
                linewidths=(0.3,))
        title("Cluster %i (weight=%.2f)" % (c,sum(l==c)/float(l.shape[0])))
        grid()
        axis([-5,5,-5,5])
        subplot(unique_labels.shape[0],2,2*i+2)
        hist(isi,bins=100,range=(0,100),normed=True,facecolor='k')
        xx = arange(2,100,0.1)
        rate = 1/mean(isi-2)
        plot(xx,rate*exp(-rate*(xx-2)))
        title("ISI (mean = %.2f)" % mean(isi))
    F = gcf()
    F.set_size_inches(8.3,2*unique_labels.shape[0])
    savefig(plot_dir + "/" + "isi" + ext)
    
    
    particle = experimenter.load_particle(options)
    if particle != None:
        ### plots requiring the information from at least one particle
        clf()
        # plot of clusters + data at fixed, equally spaced time points
        num_plots = 9
        timepoints = array(arange(1,num_plots+1)*(T-1)/(float(num_plots)),dtype=int32)
        for i in range(num_plots):
            subplot(3,3,i+1)
            t = timepoints[i]
            plotting.plot_state_with_data(particle,data,data_time,t)
            title("t = " + str(t))
            grid()
        F.set_size_inches(6,6)
        savefig(plot_dir + "/" + "cluster_evolution" + ext)

        # plot of cluster means and variances over time
        clf()
        plotting.plot_pcs_against_time_labeled_with_particle(
                data,data_time,predicted_labels[0,:],particle)
        F.set_size_inches(40,8*data.shape[0])
        savefig(plot_dir + "/" + "clusters_vs_time" + ext)

        # plot of mstore for each clusters
        clf()
        plotting.plot_mstore_against_time(particle)
        savefig(plot_dir + "/" + "mstore" + ext)