def gen_output_lines(args, fin): """ Yield observation lines, given a filtered pileup file open for reading. """ # unpack some relevant arguments reqambig = args.reqambig fill = args.fill errlow, errhigh = args.errlow, args.errhigh low = 1 if args.low == 'drosophila' else args.low high = args.high # create some state maintained across input lines filler = None chrom_name = None name_to_drosophila_length = dict(DGRP.g_chromosome_length_pairs) # define the default line to write default_obs = (0, 0, 0, 0) # process the input file line by line for line in fin: srow = line.split() if not srow: continue row = DGRP.filtered_pileup_row_to_typed(srow) obs = DGRP.filtered_pileup_typed_to_obs(row) name, pos, ref = row[:3] if filler is None: # set the chromosome name chrom_name = name # if appropriate, update the high value using the chrom name if args.high == 'drosophila': high = name_to_drosophila_length.get(name, None) if high is None: raise Exception('invalid fly chromosome: ' + name) else: high = args.high # define the filler generator object filler = iterfiller.FillerGenerator(low, high, fill, errlow, errhigh, default_obs) # check the chromosome name for consistency if name != chrom_name: raise Exception( 'conflicting chromosome ' 'names: %s %s' % (name, chrom_name)) # check for reference nucleotide weirdness if reqambig: if not filler.check_bounds(pos): if ref != 'N': raise Exception( 'expected out of bounds reference nucleotides ' 'to be N but found %s ' 'at position %d of chrom %s' % (ref, pos, name)) # process lines emitted by the filler for value in filler.fill(pos, obs): yield '\t'.join(str(x) for x in value) # process final lines emitted by the filler for value in filler.finish(): yield '\t'.join(str(x) for x in value)
def gen_typed_rows(fin): for line in fin: srow = line.split() if srow: yield DGRP.filtered_pileup_row_to_typed(srow)