def merge_maps(maps): all_genotypes = defaultdict(dict) all_maploci = {} increment = 0 for m in maps: if ',' in m: mapf,mIDf = m.split(',') else: mapf = m mIDf = None maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf) #print >> sys.stderr, m,'\n',[(k,len(v)) for k,v in genotypes.items()] all_maploci.update(increment_lg(maploci,increment)) for k,v in genotypes.items(): all_genotypes[k].update(v) increment = max([v[0] for v in all_maploci.values()]) return all_maploci,all_genotypes
ax.set_xticks(range(len(cols))) ax.set_yticks(range(len(rows))) ax.set_xticklabels(cols) ax.set_yticklabels(rows) pylab.matplotlib.pyplot.xticks(rotation=90) pylab.matplotlib.pyplot.yticks(fontsize=fontsize) pylab.matplotlib.pyplot.xticks(fontsize=fontsize) if __name__ == "__main__": mapf,id_header,gr,tab,uniqued,blastdb = sys.argv[1:7] outroot,mapname = os.path.split(mapf) maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,'skip',id_header=id_header) clids = [k for k,v in sorted(maploci.items(),key = lambda x: x[1])] blastouts = run_parallel_blasts(clids,mapname,gr,tab,uniqued,blastdb) hits_by_site = defaultdict(dict) for f in blastouts: for l in open(f): if l.startswith("#"): continue fields = l.strip().split() hits_by_site[fields[0]][fields[1]] = (int(fields[8]),float(fields[10])) chrom_size = get_chrom_size(blastdb) chrom_step = calc_chrom_step(chrom_size,maploci)
sd['indiv_gt'][ind] = this_gt if return_map: new_map[ind].update({loc:''.join([allele_map[loc][n] for n in sd['indiv_gt'][ind]['GT'].split('/')])}) if not return_map: vcf_data[key] = sd print >> sys.stderr, '%s individuals processed' % len(sd['indiv_gt']) if return_map: return new_map else: return vcf_data source_map_f, source_vcf_f, new_vcf_f, id_header = sys.argv[1:] print >> sys.stderr, 'load source map:', source_map_f loci,geno = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(source_map_f,mIDlookup=False,id_header=id_header) print >> sys.stderr, '%s loci in %s individuals loaded from source map' % (len(loci),len(geno)) print >> sys.stderr, 'load source vcf:', source_vcf_f source_vcf = load_vcf(source_vcf_f,loci,indiv_gt_phred_cut=gq) print >> sys.stderr, '%s loci loaded from source vcf' % (len(source_vcf)) allele_map = {} for loc in loci.keys(): if not source_vcf.has_key(tuple(loc.split('.'))): print >> sys.stderr, 'no key %s for site %s found in source vcf!' % (tuple(loc.split('.')),loc) continue vcf_loc = source_vcf[tuple(loc.split('.'))] AA_ind = [k for k,v in geno.items() if v.get(loc,'') == 'AA']
}) if not return_map: vcf_data[key] = sd print >> sys.stderr, '%s individuals processed' % len( sd['indiv_gt']) if return_map: return new_map else: return vcf_data source_map_f, source_vcf_f, new_vcf_f, id_header = sys.argv[1:] print >> sys.stderr, 'load source map:', source_map_f loci, geno = extract_genotypes_from_mclgr.load_cross_radtag_genotypes( source_map_f, mIDlookup=False, id_header=id_header) print >> sys.stderr, '%s loci in %s individuals loaded from source map' % ( len(loci), len(geno)) print >> sys.stderr, 'load source vcf:', source_vcf_f source_vcf = load_vcf(source_vcf_f, loci, indiv_gt_phred_cut=gq) print >> sys.stderr, '%s loci loaded from source vcf' % (len(source_vcf)) allele_map = {} for loc in loci.keys(): if not source_vcf.has_key(tuple(loc.split('.'))): print >> sys.stderr, 'no key %s for site %s found in source vcf!' % ( tuple(loc.split('.')), loc) continue vcf_loc = source_vcf[tuple(loc.split('.'))]
else: print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id'] else: print >> sys.stderr, 'no id in %s' % pd return phenomaploci,phenomap if __name__ == '__main__': db,mapfile,outfile = sys.argv[1:4] if ',' in mapfile: mapf,mIDf = m.split(',') else: mapf = mapfile mIDf = False if ',' in db: phenotypes = [] for db_i in db.split(','): phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True)) else: phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True) maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf) phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes) print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap)) og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)