Exemplo n.º 1
0
    vcfn,qd,gq,chi2crit = sys.argv[1:]
    
    
    outbase = os.path.splitext(vcfn)[0]

    cut_fn = lambda sd: sd.has_key('QD') and float(sd['QD']) >= float(qd) and len(sd['indiv_gt']) >= min_indiv and sd['fh'] < fh


    print >> sys.stderr, 'loading vcf',vcfn
    vcf = variant_detection.load_vcf(vcfn,cutoff_fn=cut_fn,indiv_gt_phred_cut=float(gq))

    print >> sys.stderr, 'convert to pm/gt matrices'
    pm,gt = extract_genotypes_from_mclgr.genotypes_from_vcf_obj(vcf)

    parents_prefixes = dict(zip(['A', 'B'],parent_str.split(',')))
    parents = dict([(l,[k for k in gt.keys() if k.startswith(p)]) for l,p in parents_prefixes.items()])

    polarized_loci,polarized_geno = extract_genotypes_from_mclgr.genotypes_by_parent(dict([(k,v) for k,v in pm.items() if int(k.split('.')[1]) < site_before]),gt,parents,remove_targets=reduce(lambda x,y: x+y,parents.values()))

    print >> sys.stderr, 'filter X linked, chi2 critical %s' % chi2crit
    xsites,autsites = extract_genotypes_from_mclgr.filter_Xlinked_loci(polarized_loci, polarized_geno,float(chi2crit))
    print >> sys.stderr, '%s X linked, %s autosomal' % (len(xsites),len(autsites))

    print >> sys.stderr, 'write output'
    ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(xsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit))
    ret = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(autsites,polarized_geno,'%s_QD%s-GQ%s_%sbp_autchi%s.csv' % (outbase,qd,gq,site_before,chi2crit))
    print >> sys.stderr, 'wrote:'
    print >> sys.stderr, '%s_QD%s-GQ%s_%sbp_Xchi%s.csv' % (outbase,qd,gq,site_before,chi2crit)
    print >> sys.stderr, '%s_QD%s-GQ%s_%sbp_autchi%s.csv' % (outbase,qd,gq,site_before,chi2crit)
    print >> sys.stderr, 'done'
Exemplo n.º 2
0
	AA_ind  = [k for k,v in geno.items() if v.get(loc,'') == 'AA']
	AA_gt = set([vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind if ind in vcf_loc['indiv_gt'].keys()])
	#BB_ind  = [k for k,v in geno.items() if v.get(loc,'') == 'BB']
	#BB_gt = set([vcf_loc['indiv_gt'][ind]['GT'] for ind in BB_ind if ind in vcf_loc['indiv_gt'].keys()])
	if len(AA_gt) != 1: #or len(BB_gt) != 1:
		AA_ctd = Util.countdict([vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind if ind in vcf_loc['indiv_gt']])
		if len(AA_ctd) == 2 and min(AA_ctd.values()) == 1:
			print >> sys.stderr, 'ignoring 1 invalid AA genotype from vcf'
		else:
			print >> sys.stderr, '%s invalid homozygotes (AA: %s) ' % (loc,AA_ctd)
			continue
	AA_gt = list(AA_gt)[0]
	#BB_gt = list(BB_gt)[0]
	A = set(AA_gt.split('/'))
	#B = set(BB_gt.split('/'))
	if len(A) != 1: #or len(B) != 1:
		print >> sys.stderr, '%s invalid allele mapping (A: %s B: %s)' % (loc,A,B)
		continue
	A = list(A)[0]
	#B = list(B)[0]
	B = int(A) and '0' or '1'
	allele_map[loc] = {A:'A',B:'B'}
	#print >> sys.stderr, '%s %s' % (loc,allele_map[loc])

if len(allele_map) == 0:
	raise ValueError, 'no loci to load!'
print >> sys.stderr, 'load %s loci from %s' % (len(allele_map), new_vcf_f)
new_geno = load_vcf(new_vcf_f,allele_map,gq,return_map=True)
#print >> sys.stderr, new_geno
extract_genotypes_from_mclgr.output_cross_radtag_genotypes(loci, new_geno, sys.stdout)
Exemplo n.º 3
0
    
    for m in maps:
        if ',' in m:
            mapf,mIDf = m.split(',')
        else:
            mapf = m
            mIDf = None
        maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf)
        #print >> sys.stderr, m,'\n',[(k,len(v)) for k,v in genotypes.items()]


        all_maploci.update(increment_lg(maploci,increment))
        for k,v in genotypes.items():
            all_genotypes[k].update(v)
        increment = max([v[0] for v in all_maploci.values()])

    return all_maploci,all_genotypes

if __name__ == '__main__':

    out_to = sys.argv[1]
    if out_to == '-':
        outfh = sys.stdout
    else:
        outfh = open(out_to,'w')
    maps = sys.argv[2:]
    all_maploci,all_genotypes = merge_maps(maps)

    extract_genotypes_from_mclgr.output_cross_radtag_genotypes(all_maploci,all_genotypes,outfh)
    
Exemplo n.º 4
0
            vcf_loc['indiv_gt'][ind]['GT'] for ind in AA_ind
            if ind in vcf_loc['indiv_gt']
        ])
        if len(AA_ctd) == 2 and min(AA_ctd.values()) == 1:
            print >> sys.stderr, 'ignoring 1 invalid AA genotype from vcf'
        else:
            print >> sys.stderr, '%s invalid homozygotes (AA: %s) ' % (loc,
                                                                       AA_ctd)
            continue
    AA_gt = list(AA_gt)[0]
    #BB_gt = list(BB_gt)[0]
    A = set(AA_gt.split('/'))
    #B = set(BB_gt.split('/'))
    if len(A) != 1:  #or len(B) != 1:
        print >> sys.stderr, '%s invalid allele mapping (A: %s B: %s)' % (loc,
                                                                          A, B)
        continue
    A = list(A)[0]
    #B = list(B)[0]
    B = int(A) and '0' or '1'
    allele_map[loc] = {A: 'A', B: 'B'}
    #print >> sys.stderr, '%s %s' % (loc,allele_map[loc])

if len(allele_map) == 0:
    raise ValueError, 'no loci to load!'
print >> sys.stderr, 'load %s loci from %s' % (len(allele_map), new_vcf_f)
new_geno = load_vcf(new_vcf_f, allele_map, gq, return_map=True)
#print >> sys.stderr, new_geno
extract_genotypes_from_mclgr.output_cross_radtag_genotypes(
    loci, new_geno, sys.stdout)
            else:
                print >> sys.stderr, 'no matching genotypes for pheno line %s' % pd['id']
        else:
            print >> sys.stderr, 'no id in %s' % pd
            
    return phenomaploci,phenomap

if __name__ == '__main__':
    db,mapfile,outfile = sys.argv[1:4]

    if ',' in mapfile:
        mapf,mIDf = m.split(',')
    else:
        mapf = mapfile
        mIDf = False

    if ',' in db:
        phenotypes = []
        for db_i in db.split(','):
            phenotypes.extend(preprocess_radtag_lane.get_table_as_dict(db_i,suppress_fc_check=True))
    else:
        phenotypes = preprocess_radtag_lane.get_table_as_dict(db,suppress_fc_check=True)
    
    maploci,genotypes = extract_genotypes_from_mclgr.load_cross_radtag_genotypes(mapf,mIDf)
    
    phenomaploci,phenomap = add_pheno_to_map(phenotypes,maploci,genotypes)
    print >> sys.stderr, '%s pheno+map loci, %s lines' % (len(phenomaploci),len(phenomap))
    og,mID = extract_genotypes_from_mclgr.output_cross_radtag_genotypes(phenomaploci,phenomap,outfile)