示例#1
0
def dist_table(options):
    """ make the jaccard distance table by scraping together all the comparison
    json files
    """
    # tsv header
    dist_table =  "#\t{}\t\t\t\t\\t\tn".format(options.baseline)
    dist_table += "#graph\tgraph_dist\tlinear_dist\taugmented_dist\tsample_dist\tdelta_linear\tdelta_augmented\tdelta_sample\n"
    
    for gam in options.in_gams:
        baseline = baseline_path(gam, options)
        graph_comp_path = comp_path(baseline, graph_path(gam, options), options)
        graph_dist = jaccard_dist(graph_comp_path)
        aug_comp_path = comp_path(baseline, augmented_vg_path(gam, options), options)
        aug_graph_dist = jaccard_dist(aug_comp_path)
        lin_comp_path = comp_path(baseline, linear_vg_path(gam, options), options)
        lin_graph_dist = jaccard_dist(lin_comp_path)
        sam_comp_path = comp_path(baseline, sample_vg_path(gam, options), options)
        sam_graph_dist = jaccard_dist(sam_comp_path)        

        delta_lin = lin_graph_dist - graph_dist
        delta_aug = aug_graph_dist - graph_dist
        delta_sam = sam_graph_dist - graph_dist

        dist_table += "{}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\n".format(
            os.path.splitext(os.path.basename(graph_path(gam, options)))[0],
            graph_dist,
            lin_graph_dist,
            aug_graph_dist,
            sam_graph_dist,
            delta_lin,
            delta_aug,
            delta_sam)

    with open(dist_tsv_path(options), "w") as ofile:
        ofile.write(dist_table)
示例#2
0
def compute_comparison(job, baseline, graph, options):
    """ run vg compare between two graphs
    """
    graph_index_path = index_path(graph, options)
    assert os.path.exists(graph_index_path)
    baseline_index_path = index_path(baseline, options)
    assert os.path.exists(baseline_index_path)

    out_path = comp_path(baseline, graph, options)
    do_comp = options.overwrite or not os.path.exists(out_path)
    
    if do_comp:        
        os.system("vg compare {} {} -t {} > {}".format(baseline, graph,
                                                       min(2, options.vg_cores),
                                                       out_path))
示例#3
0
def acc_table(options):
    """ make the accuracy table by scraping together all the comparison
    json files
    """
    # tsv header
    acc_table =  "#\t{}\t\t\t\t\t\t\t\t\t\n".format(options.baseline)
    acc_table += "#graph\tgraph_prec\tgraph_rec\tgraph_f1"
    acc_table += "\tlinear_prec\tlinear_rec\tlinear_f1"
    acc_table += "\taugmented_prec\taugmented_rec\taugmented_f1"
    acc_table += "\tsample_prec\tsample_rec\tsample_f1"

    sums = defaultdict(lambda : (0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.))
    counts = defaultdict(lambda : 0.)
    
    for gam in options.in_gams:
        baseline = baseline_path(gam, options)
        graph_comp_path = comp_path(baseline, graph_path(gam, options), options)
        graph_acc = accuracy(graph_comp_path)
        aug_comp_path = comp_path(baseline, augmented_vg_path(gam, options), options)
        aug_graph_acc = accuracy(aug_comp_path)
        lin_comp_path = comp_path(baseline, linear_vg_path(gam, options), options)
        lin_graph_acc = accuracy(lin_comp_path)
        sam_comp_path = comp_path(baseline, sample_vg_path(gam, options), options)
        sam_graph_acc = accuracy(sam_comp_path)

        name = graph_path(gam, options)

        sums[name] = (sums[name][0] +  graph_acc[0],
                      sums[name][1] +  graph_acc[1],
                      sums[name][2] +  graph_acc[2],
                      sums[name][3] +  lin_graph_acc[0],
                      sums[name][4] +  lin_graph_acc[1],
                      sums[name][5] +  lin_graph_acc[2],
                      sums[name][6] +  aug_graph_acc[0],
                      sums[name][7] +  aug_graph_acc[1],
                      sums[name][8] +  aug_graph_acc[2],
                      sums[name][9] +  sam_graph_acc[0],
                      sums[name][10] + sam_graph_acc[1],
                      sums[name][11] + sam_graph_acc[2])

        counts[name] = counts[name] + 1
        
    for name in list(set(map(lambda x : graph_path(x, options), options.in_gams))):
        acc_table += "{}\t{:.4}\t{:.4}\t{:.4}\t".format(
            os.path.splitext(os.path.basename(graph_path(gam, options)))[0],
            float(sums[name][0]) / float(counts[name]),
            float(sums[name][1]) / float(counts[name]),
            float(sums[name][2]) / float(counts[name]))
        
        acc_table += "{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t".format(
            float(sums[name][3]) / float(counts[name]),
            float(sums[name][4]) / float(counts[name]),
            float(sums[name][5]) / float(counts[name]),
            float(sums[name][6]) / float(counts[name]),
            float(sums[name][7]) / float(counts[name]),
            float(sums[name][8]) / float(counts[name]))
        acc_table +="{:.4}\t{:.4}\t{:.4}\n".format(
            float(sums[name][9]) / float(counts[name]),
            float(sums[name][10]) / float(counts[name]),
            float(sums[name][11]) / float(counts[name]))

    with open(acc_tsv_path(options), "w") as ofile:
        ofile.write(acc_table)