示例#1
0
def write_output(sam_reader, outf, reftag, outfmt, flank_size, logger=None):
    logger = logger or NullLogger()
    hit_processor = SnpHitProcessor(reftag, outf, outfmt, flank_size, logger)
    hit_processor.write_header()
    for i, m in enumerate(sam_reader):
        hit_processor.process(m)
    hit_processor.dump_current_hits()  # last pair
    return i + 1
示例#2
0
def write_output(db_snp_reader, outf, mask_size, logger=None):
  logger = logger or NullLogger()
  bad_count = 0
  for rs_label, lflank, alleles, rflank in db_snp_reader:
    alleles = alleles.split("/")
    if 2 <= len(alleles) <= 4 and set(alleles) <= POSSIBLE_ALLELES:
      mask = build_mask(lflank, alleles, rflank, mask_size)
      outf.write("%s\t%s\t%s\n" % (rs_label, rs_label, mask))
    else:
      logger.warn("%r: bad alleles %r, skipping" % (rs_label, alleles))
      bad_count += 1
  return bad_count
def write_output(reader, outf, logger=None):
    logger = logger or NullLogger()
    seq_count = 0
    name_serializer = SeqNameSerializer()
    for r in reader:
        fastq_records = build_fastq_records(r['label'],
                                            r['mask'],
                                            name_serializer,
                                            logger=logger)
        seq_count += len(fastq_records)
        for r in fastq_records:
            outf.write("%s\n" % "\n".join(r))
    return seq_count
示例#4
0
 def __init__(self,
              ref_tag,
              outf,
              outfmt=DEFAULT_OUTPUT_FORMAT,
              flank_size=DEFAULT_FLANK_SIZE,
              logger=None):
     self.logger = logger or NullLogger()
     self.ref_tag = ref_tag
     self.outf = outf
     self.outfmt = outfmt
     self.flank_size = flank_size
     self.current_id = None
     self.current_hits = []
     self.serializer = SeqNameSerializer()
def build_fastq_records(label, mask, name_serializer, logger=None):
    logger = logger or NullLogger()
    records = []
    try:
        lflank, alleles, rflank = split_mask(mask)
    except ValueError:
        status = "no mask" if mask == "None" else "bad mask format"
        logger.warn("%r: %s, skipping" % (label, status))
    else:
        snp_offset = len(lflank)
        for a, c in izip(alleles, ALLELE_CODES):
            seq = "%s%s%s" % (lflank, a, rflank)
            seq_id = name_serializer.serialize(label, c, snp_offset, alleles)
            r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~' * len(seq))
            records.append(r)
    return records
示例#6
0
def write_output(reader, outf, logger=None):
  logger = logger or NullLogger()
  bad_count = 0
  for r in reader:
    label = r['Probe Set ID']
    if r['dbSNP RS ID'].startswith('rs'):
      rs_label = r['dbSNP RS ID']
    else:
      rs_label = 'None'
    mask = r['Flank']
    problem = check_mask(mask)
    if problem:
      mask = 'None'
      logger.warn("%r: %s, setting mask to 'None'" % (label, problem))
      bad_count += 1
    outf.write("%s\t%s\t%s\t%s\t%s\n" %
               (label, rs_label, mask, r['Allele A'], r['Allele B']))
  return bad_count
示例#7
0
def write_output(reader, outf, logger=None):
    logger = logger or NullLogger()
    bad_count = 0
    for r in reader:
        label = r['IlmnID']
        if r['Name'].startswith('rs'):
            rs_label = r['Name']
        else:
            rs_label = 'None'
        mask = r['TopGenomicSeq']
        # alleles are the same as those extracted from the mask if strand
        # is TOP; if strand is BOT they are their complement (NOT reversed)
        allele_a, allele_b = r['SNP'].strip("[]").split("/")
        problem = check_mask(mask)
        if problem:
            mask = 'None'
            logger.warn("%r: %s, setting mask to 'None'" % (label, problem))
            bad_count += 1
        outf.write("%s\t%s\t%s\t%s\t%s\n" %
                   (label, rs_label, mask, allele_a, allele_b))
    return bad_count