def write_output(sam_reader, outf, reftag, outfmt, flank_size, logger=None): logger = logger or NullLogger() hit_processor = SnpHitProcessor(reftag, outf, outfmt, flank_size, logger) hit_processor.write_header() for i, m in enumerate(sam_reader): hit_processor.process(m) hit_processor.dump_current_hits() # last pair return i + 1
def write_output(db_snp_reader, outf, mask_size, logger=None): logger = logger or NullLogger() bad_count = 0 for rs_label, lflank, alleles, rflank in db_snp_reader: alleles = alleles.split("/") if 2 <= len(alleles) <= 4 and set(alleles) <= POSSIBLE_ALLELES: mask = build_mask(lflank, alleles, rflank, mask_size) outf.write("%s\t%s\t%s\n" % (rs_label, rs_label, mask)) else: logger.warn("%r: bad alleles %r, skipping" % (rs_label, alleles)) bad_count += 1 return bad_count
def write_output(reader, outf, logger=None): logger = logger or NullLogger() seq_count = 0 name_serializer = SeqNameSerializer() for r in reader: fastq_records = build_fastq_records(r['label'], r['mask'], name_serializer, logger=logger) seq_count += len(fastq_records) for r in fastq_records: outf.write("%s\n" % "\n".join(r)) return seq_count
def __init__(self, ref_tag, outf, outfmt=DEFAULT_OUTPUT_FORMAT, flank_size=DEFAULT_FLANK_SIZE, logger=None): self.logger = logger or NullLogger() self.ref_tag = ref_tag self.outf = outf self.outfmt = outfmt self.flank_size = flank_size self.current_id = None self.current_hits = [] self.serializer = SeqNameSerializer()
def build_fastq_records(label, mask, name_serializer, logger=None): logger = logger or NullLogger() records = [] try: lflank, alleles, rflank = split_mask(mask) except ValueError: status = "no mask" if mask == "None" else "bad mask format" logger.warn("%r: %s, skipping" % (label, status)) else: snp_offset = len(lflank) for a, c in izip(alleles, ALLELE_CODES): seq = "%s%s%s" % (lflank, a, rflank) seq_id = name_serializer.serialize(label, c, snp_offset, alleles) r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~' * len(seq)) records.append(r) return records
def write_output(reader, outf, logger=None): logger = logger or NullLogger() bad_count = 0 for r in reader: label = r['Probe Set ID'] if r['dbSNP RS ID'].startswith('rs'): rs_label = r['dbSNP RS ID'] else: rs_label = 'None' mask = r['Flank'] problem = check_mask(mask) if problem: mask = 'None' logger.warn("%r: %s, setting mask to 'None'" % (label, problem)) bad_count += 1 outf.write("%s\t%s\t%s\t%s\t%s\n" % (label, rs_label, mask, r['Allele A'], r['Allele B'])) return bad_count
def write_output(reader, outf, logger=None): logger = logger or NullLogger() bad_count = 0 for r in reader: label = r['IlmnID'] if r['Name'].startswith('rs'): rs_label = r['Name'] else: rs_label = 'None' mask = r['TopGenomicSeq'] # alleles are the same as those extracted from the mask if strand # is TOP; if strand is BOT they are their complement (NOT reversed) allele_a, allele_b = r['SNP'].strip("[]").split("/") problem = check_mask(mask) if problem: mask = 'None' logger.warn("%r: %s, setting mask to 'None'" % (label, problem)) bad_count += 1 outf.write("%s\t%s\t%s\t%s\t%s\n" % (label, rs_label, mask, allele_a, allele_b)) return bad_count