Пример #1
0
def merge_maps(maps):
    all_genotypes = defaultdict(dict)
    all_maploci = {}
    increment = 0
    
    for m in maps:
        if ',' in m:
            mapf,mIDf = m.split(',')
        else:
            mapf = m
            mIDf = None
        maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf)
        #print >> sys.stderr, m,'\n',[(k,len(v)) for k,v in genotypes.items()]


        all_maploci.update(increment_lg(maploci,increment))
        for k,v in genotypes.items():
            all_genotypes[k].update(v)
        increment = max([v[0] for v in all_maploci.values()])

    return all_maploci,all_genotypes
    ax.set_xticks(range(len(cols)))
    ax.set_yticks(range(len(rows)))
    ax.set_xticklabels(cols)
    ax.set_yticklabels(rows)
    pylab.matplotlib.pyplot.xticks(rotation=90)
    pylab.matplotlib.pyplot.yticks(fontsize=fontsize)
    pylab.matplotlib.pyplot.xticks(fontsize=fontsize)
    

if __name__ == "__main__":

    mapf,id_header,gr,tab,uniqued,blastdb = sys.argv[1:7]
    outroot,mapname = os.path.split(mapf)
    
    
    maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,'skip',id_header=id_header)

    clids = [k for k,v in sorted(maploci.items(),key = lambda x: x[1])]

    blastouts = run_parallel_blasts(clids,mapname,gr,tab,uniqued,blastdb)


    hits_by_site = defaultdict(dict)
    for f in blastouts:
        for l in open(f):
            if l.startswith("#"): continue
            fields = l.strip().split()
            hits_by_site[fields[0]][fields[1]] = (int(fields[8]),float(fields[10]))
        
    chrom_size = get_chrom_size(blastdb)
    chrom_step = calc_chrom_step(chrom_size,maploci)
Пример #3
0
						sd['indiv_gt'][ind] = this_gt
						if return_map:
							new_map[ind].update({loc:''.join([allele_map[loc][n] for n in sd['indiv_gt'][ind]['GT'].split('/')])})
			if not return_map:
				vcf_data[key] = sd
			print >> sys.stderr, '%s individuals processed' % len(sd['indiv_gt'])

	if return_map:
		return new_map
	else:
		return vcf_data

source_map_f, source_vcf_f, new_vcf_f, id_header = sys.argv[1:]

print >> sys.stderr, 'load source map:', source_map_f
loci,geno = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(source_map_f,mIDlookup=False,id_header=id_header)
print >> sys.stderr, '%s loci in %s individuals loaded from source map' % (len(loci),len(geno))


print >> sys.stderr, 'load source vcf:', source_vcf_f
source_vcf = load_vcf(source_vcf_f,loci,indiv_gt_phred_cut=gq)
print >> sys.stderr, '%s loci loaded from source vcf' % (len(source_vcf))

allele_map = {}

for loc in loci.keys():
	if not source_vcf.has_key(tuple(loc.split('.'))):
		print >> sys.stderr, 'no key %s for site %s found in source vcf!' % (tuple(loc.split('.')),loc)
		continue
	vcf_loc = source_vcf[tuple(loc.split('.'))]
	AA_ind  = [k for k,v in geno.items() if v.get(loc,'') == 'AA']
Пример #4
0
                            })
            if not return_map:
                vcf_data[key] = sd
            print >> sys.stderr, '%s individuals processed' % len(
                sd['indiv_gt'])

    if return_map:
        return new_map
    else:
        return vcf_data


source_map_f, source_vcf_f, new_vcf_f, id_header = sys.argv[1:]

print >> sys.stderr, 'load source map:', source_map_f
loci, geno = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(
    source_map_f, mIDlookup=False, id_header=id_header)
print >> sys.stderr, '%s loci in %s individuals loaded from source map' % (
    len(loci), len(geno))

print >> sys.stderr, 'load source vcf:', source_vcf_f
source_vcf = load_vcf(source_vcf_f, loci, indiv_gt_phred_cut=gq)
print >> sys.stderr, '%s loci loaded from source vcf' % (len(source_vcf))

allele_map = {}

for loc in loci.keys():
    if not source_vcf.has_key(tuple(loc.split('.'))):
        print >> sys.stderr, 'no key %s for site %s found in source vcf!' % (
            tuple(loc.split('.')), loc)
        continue
    vcf_loc = source_vcf[tuple(loc.split('.'))]
            else:
                print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id']
        else:
            print >> sys.stderr, 'no id in %s' % pd
            
    return phenomaploci,phenomap

if __name__ == '__main__':
    db,mapfile,outfile = sys.argv[1:4]

    if ',' in mapfile:
        mapf,mIDf = m.split(',')
    else:
        mapf = mapfile
        mIDf = False

    if ',' in db:
        phenotypes = []
        for db_i in db.split(','):
            phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True))
    else:
        phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True)
    
    maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf)
    
    phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes)
    print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap))
    og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)