def next(self): r = csv.DictReader.next(self) try: mask = split_mask(r['mask'].upper()) except ValueError, e: print "ERROR: %r: %s, skipping" % (label, e) return self.next()
def check_mask(mask): try: lflank, alleles, rflank = split_mask(mask) except ValueError: problem = "bad mask format" else: if not(2 <= len(alleles) <= 4 and set(alleles) <= POSSIBLE_ALLELES): problem = "bad alleles: %r" % list(alleles) else: problem = None return problem
def build_fastq_records(label, mask, name_serializer, logger=None): logger = logger or NullLogger() records = [] try: lflank, alleles, rflank = split_mask(mask) except ValueError: status = "no mask" if mask == "None" else "bad mask format" logger.warn("%r: %s, skipping" % (label, status)) else: snp_offset = len(lflank) for a, c in izip(alleles, ALLELE_CODES): seq = "%s%s%s" % (lflank, a, rflank) seq_id = name_serializer.serialize(label, c, snp_offset, alleles) r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~'*len(seq)) records.append(r) return records
def build_fastq_records(label, mask, name_serializer, logger=None): logger = logger or NullLogger() records = [] try: lflank, alleles, rflank = split_mask(mask) except ValueError: status = "no mask" if mask == "None" else "bad mask format" logger.warn("%r: %s, skipping" % (label, status)) else: snp_offset = len(lflank) for a, c in izip(alleles, ALLELE_CODES): seq = "%s%s%s" % (lflank, a, rflank) seq_id = name_serializer.serialize(label, c, snp_offset, alleles) r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~' * len(seq)) records.append(r) return records
def canonize_call(mask, abi_call): """ Canonize call against top mask. Directly uses the base called by TaqMan to compute the relevant allele code. """ if abi_call.upper() == 'BOTH': return SnpCall.AB if abi_call.upper() == 'UNDETERMINED': return SnpCall.NOCALL _, call_base = abi_call.split('-') _, alleles, _ = split_mask(mask) if call_base in [alleles[0], rc(alleles[0])]: return SnpCall.AA elif call_base in [alleles[1], rc(alleles[1])]: return SnpCall.BB else: raise ValueError('Cannot map %s (alleles: %s)' % (abi_call, alleles))
records = [r for r in reader] vids = [r['source'] for r in records] markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask']) with open(outfn, 'w') as outf: fieldnames = ['marker_vid', 'marker_indx', 'allele_flip'] writer = csv.DictWriter(outf, delimiter="\t", lineterminator=os.linesep, fieldnames=fieldnames) writer.writeheader() for i, (m, r) in enumerate(it.izip(markers, records)): assert m.id == r['source'] try: _, stored_alleles, _ = snp.split_mask(m.mask) except ValueError: sys.stdout.write("WARNING: could not split mask for %r\n" % r['source']) flip = False else: alleles = r['allele_a'], r['allele_b'] fl_alleles = r['allele_b'], r['allele_a'] if alleles == stored_alleles or rc(alleles) == stored_alleles: flip = False elif fl_alleles == stored_alleles or rc( fl_alleles) == stored_alleles: flip = True else: raise ValueError("%s: got inconsistent mask from db: %r" % (m.id, m.mask))
def test_good(self): for s, t in SPLIT_MASK_PAIRS: self.assertEqual(usnp.split_mask(s), t)
with open(fn) as f: reader = csv.DictReader(f, delimiter="\t") records = [r for r in reader] vids = [r['source'] for r in records] markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask']) with open(outfn, 'w') as outf: fieldnames = ['marker_vid', 'marker_indx', 'allele_flip'] writer = csv.DictWriter(outf, delimiter="\t", lineterminator=os.linesep, fieldnames=fieldnames) writer.writeheader() for i, (m, r) in enumerate(it.izip(markers, records)): assert m.id == r['source'] try: _, stored_alleles, _ = snp.split_mask(m.mask) except ValueError: sys.stdout.write("WARNING: could not split mask for %r\n" % r['source']) flip = False else: alleles = r['allele_a'], r['allele_b'] fl_alleles = r['allele_b'], r['allele_a'] if alleles == stored_alleles or rc(alleles) == stored_alleles: flip = False elif fl_alleles == stored_alleles or rc(fl_alleles) == stored_alleles: flip = True else: raise ValueError("%s: got inconsistent mask from db: %r" % (m.id, m.mask)) index = r.get("marker_indx", i) writer.writerow({"marker_vid": m.id, "marker_indx": index,