def process_variant_detail_vcf(self, record, assaytype): """Process info file variant detail records Set up a json-stype document and add it to the variant buffer """ doc = {} doc["assaytype"] = assaytype vcfr = VCFrecord(record) prfx, sfx = vcfr.get_prfx_sfx() doc["rsid"] = vcfr.get_varid() # always store chromosome as a 2-digit string doc["chromosome"] = "%.2d" % (int(vcfr.get_chr())) alleleA, alleleB = vcfr.get_alleles() doc["alleleA"] = alleleA doc["alleleB"] = alleleB doc["position"] = vcfr.get_posn_as_int() try: doc["ref_maf"] = float(vcfr.get_info_value("RefPanelAF")) except: pass try: doc["info"] = float(vcfr.get_info_value("INFO")) except: doc["info"] = 1.0 self.variantbuff.append(doc)
def main(options): hdr = [] hdrlen = 0 count = 0 try: fh = open(options.chrommap) chrom_map = load_chrom_map(fh) except: print "Unable to open", options.chrommap exit() for line in sys.stdin: count += 1 line = line.strip() if (line.startswith('#')): print line else: vcfr = VCFrecord(line) strchrom = vcfr.get_chr() try: vcfr.set_chr(chrom_map[strchrom]) except: logging.info("Chromosome not found in map %s, %s" % (options.chrommap, strchrom)) exit() print vcfr.get_record() return count
def main(): mafh = Mafhelper() hweh = Hwehelper() in_count = 0 hdr_count = 0 homr_total = 0 het_total = 0 homa_total = 0 virt_nc_total = 0 miss_total = 0 print "SNPId,AssayType,chr,pos,REF,ALT,Minor,MAF,CallRate,HWE_pval" for line in sys.stdin: line = line.strip() in_count += 1 if line.startswith("#"): hdr_count += 1 continue vcfr = VCFrecord(line) varid = vcfr.get_varid_ukb() chromosome = vcfr.get_chr() posn = vcfr.get_posn_as_int() ref, alt = vcfr.get_alleles() homref_count, het_count, homalt_count, virt_nc_count, miss_count = vcfr.get_allele_counts() call_count = homref_count + het_count + homalt_count #nocall_count = virt_nc_count + miss_count nocall_count = virt_nc_count call_rate = float(call_count) / float(call_count + nocall_count) homr_total += homref_count het_total += het_count homa_total += homalt_count virt_nc_total += virt_nc_count miss_total += miss_count try: hwe = hweh.HWE_exact(het_count, homref_count, homalt_count, call_count) maf, ma = mafh.maf(het_count, homref_count, ref, homalt_count, alt, virt_nc_count) except ZeroDivisionError: logging.info("DIV 0 error at %d (%d), where hom_r=%d, het=%d, home_a=%d, cc=%d", in_count, posn, homref_count, het_count, homalt_count, call_count) print "%s,combo,%s,%d,%s,%s,%s,%s,%.3f,%s" % (varid, chromosome, posn, ref, alt, ma, maf, call_rate, hwe) return in_count, hdr_count, homr_total, het_total, homa_total, virt_nc_total, miss_total