Exemplo n.º 1
0
def get_single_performance(region, outdir, method, debug=False):
    sglfo = glutils.read_glfo(outdir + '/germlines/simulation', locus=args.locus)
    iglfo = glutils.read_glfo(outdir + '/' + method + '/sw/germline-sets', locus=args.locus)
    glutils.synchronize_glfos(ref_glfo=sglfo, new_glfo=iglfo, region=region)
    missing_alleles = set(sglfo['seqs'][region]) - set(iglfo['seqs'][region])
    spurious_alleles = set(iglfo['seqs'][region]) - set(sglfo['seqs'][region])
    if debug:
        if len(missing_alleles) > 0:
            print '    %2d  missing %s' % (len(missing_alleles), ' '.join([utils.color_gene(g) for g in missing_alleles]))
        if len(spurious_alleles) > 0:
            print '    %2d spurious %s' % (len(spurious_alleles), ' '.join([utils.color_gene(g) for g in spurious_alleles]))
        if len(missing_alleles) == 0 and len(spurious_alleles) == 0:
            print '    none missing'
    return {
        'missing' : len(missing_alleles),
        'spurious' : len(spurious_alleles),
        'total' : len([g for g in sglfo['seqs'][region] if '+' in g]),  # anybody with a '+' should be a new allele
    }
Exemplo n.º 2
0
def get_gene_sets(glsfnames, glslabels, ref_label=None):
    glfos = {}
    for label, fname in zip(glslabels, glsfnames):
        gldir = os.path.dirname(fname).replace('/' + args.locus, '')
        glfos[label] = glutils.read_glfo(
            gldir, args.locus
        )  # this is gonna fail for tigger since you only have the .fa

    if ref_label is not None:
        for label in [l for l in glslabels if l != ref_label]:
            print '    syncronizing %s names to match %s' % (label, ref_label)
            glutils.synchronize_glfos(ref_glfo=glfos[ref_label],
                                      new_glfo=glfos[label],
                                      region=args.region)

    gl_sets = {
        label:
        {g: seq
         for g, seq in glfos[label]['seqs'][args.region].items()}
        for label in glfos
    }
    all_genes = {g: s for gls in gl_sets.values() for g, s in gls.items()}

    return all_genes, gl_sets