vcfn,qd,gq,chi2crit = sys.argv[1:] outbase = os.path.splitext(vcfn)[0] cut_fn = lambda sd: sd.has_key('QD') and float(sd['QD']) >= float(qd) and len(sd['indiv_gt']) >= min_indiv and sd['fh'] < fh print >> sys.stderr, 'loading vcf',vcfn vcf = variant_detection.load_vcf(vcfn,cutoff_fn=cut_fn,indiv_gt_phred_cut=float(gq)) print >> sys.stderr, 'convert to pm/gt matrices' pm,gt = extract_genotypes_from_mclgr.genotypes_from_vcf_obj(vcf) parents_prefixes = dict(zip(['A', 'B'],parent_str.split(','))) parents = dict([(l,[k for k in gt.keys() if k.startswith(p)]) for l,p in parents_prefixes.items()]) polarized_loci,polarized_geno = extract_genotypes_from_mclgr.genotypes_by_parent(dict([(k,v) for k,v in pm.items() if int(k.split('.')[1]) < site_before]),gt,parents,remove_targets=reduce(lambda x,y: x+y,parents.values())) print >> sys.stderr, 'filter X linked, chi2 critical %s' % chi2crit xsites,autsites = extract_genotypes_from_mclgr.filter_Xlinked_loci(polarized_loci, polarized_geno,float(chi2crit)) print >> sys.stderr, '%s X linked, %s autosomal' % (len(xsites),len(autsites)) print >> sys.stderr, 'write output' ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(xsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit)) ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(autsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_autchi%s.csv' % (outbase,qd,gq,site_before,chi2crit)) print >> sys.stderr, 'wrote:' print >> sys.stderr, '%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit) print >> sys.stderr, '%s_QD%s-GQ%s_%sbp_autchi%s.csv' % (outbase,qd,gq,site_before,chi2crit) print >> sys.stderr, 'done'
AA_ind = [k for k,v in geno.items() if v.get(loc,'') == 'AA'] AA_gt = set([vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind if ind in vcf_loc['indiv_gt'].keys()]) #BB_ind = [k for k,v in geno.items() if v.get(loc,'') == 'BB'] #BB_gt = set([vcf_loc['indiv_gt'][ind]['GT'] for ind in BB_ind if ind in vcf_loc['indiv_gt'].keys()]) if len(AA_gt) != 1: #or len(BB_gt) != 1: AA_ctd = Util.countdict([vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind if ind in vcf_loc['indiv_gt']]) if len(AA_ctd) == 2 and min(AA_ctd.values()) == 1: print >> sys.stderr, 'ignoring 1 invalid AA genotype from vcf' else: print >> sys.stderr, '%s invalid homozygotes (AA: %s) ' % (loc,AA_ctd) continue AA_gt = list(AA_gt)[0] #BB_gt = list(BB_gt)[0] A = set(AA_gt.split('/')) #B = set(BB_gt.split('/')) if len(A) != 1: #or len(B) != 1: print >> sys.stderr, '%s invalid allele mapping (A: %s B: %s)' % (loc,A,B) continue A = list(A)[0] #B = list(B)[0] B = int(A) and '0' or '1' allele_map[loc] = {A:'A',B:'B'} #print >> sys.stderr, '%s %s' % (loc,allele_map[loc]) if len(allele_map) == 0: raise ValueError, 'no loci to load!' print >> sys.stderr, 'load %s loci from %s' % (len(allele_map), new_vcf_f) new_geno = load_vcf(new_vcf_f,allele_map,gq,return_map=True) #print >> sys.stderr, new_geno extract_genotypes_from_mclgr.output_cross_radtag_genotypes(loci, new_geno, sys.stdout)
for m in maps: if ',' in m: mapf,mIDf = m.split(',') else: mapf = m mIDf = None maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf) #print >> sys.stderr, m,'\n',[(k,len(v)) for k,v in genotypes.items()] all_maploci.update(increment_lg(maploci,increment)) for k,v in genotypes.items(): all_genotypes[k].update(v) increment = max([v[0] for v in all_maploci.values()]) return all_maploci,all_genotypes if __name__ == '__main__': out_to = sys.argv[1] if out_to == '-': outfh = sys.stdout else: outfh = open(out_to,'w') maps = sys.argv[2:] all_maploci,all_genotypes = merge_maps(maps) extract_genotypes_from_mclgr.output_cross_radtag_genotypes(all_maploci,all_genotypes,outfh)
vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind if ind in vcf_loc['indiv_gt'] ]) if len(AA_ctd) == 2 and min(AA_ctd.values()) == 1: print >> sys.stderr, 'ignoring 1 invalid AA genotype from vcf' else: print >> sys.stderr, '%s invalid homozygotes (AA: %s) ' % (loc, AA_ctd) continue AA_gt = list(AA_gt)[0] #BB_gt = list(BB_gt)[0] A = set(AA_gt.split('/')) #B = set(BB_gt.split('/')) if len(A) != 1: #or len(B) != 1: print >> sys.stderr, '%s invalid allele mapping (A: %s B: %s)' % (loc, A, B) continue A = list(A)[0] #B = list(B)[0] B = int(A) and '0' or '1' allele_map[loc] = {A: 'A', B: 'B'} #print >> sys.stderr, '%s %s' % (loc,allele_map[loc]) if len(allele_map) == 0: raise ValueError, 'no loci to load!' print >> sys.stderr, 'load %s loci from %s' % (len(allele_map), new_vcf_f) new_geno = load_vcf(new_vcf_f, allele_map, gq, return_map=True) #print >> sys.stderr, new_geno extract_genotypes_from_mclgr.output_cross_radtag_genotypes( loci, new_geno, sys.stdout)
else: print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id'] else: print >> sys.stderr, 'no id in %s' % pd return phenomaploci,phenomap if __name__ == '__main__': db,mapfile,outfile = sys.argv[1:4] if ',' in mapfile: mapf,mIDf = m.split(',') else: mapf = mapfile mIDf = False if ',' in db: phenotypes = [] for db_i in db.split(','): phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True)) else: phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True) maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf) phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes) print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap)) og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)