def action(args): if args.inplace and args.infile is sys.stdin: log.error('Error: cannot use the --inplace option with stdin') return if args.rename: raise NotImplementedError reader = csv.DictReader(args.infile) fieldnames = reader.fieldnames or [] new_fields = parse_extras(args.add) if args.add else {} if new_fields: fieldnames.extend(new_fields.keys()) reader = imap(lambda row: dict(row, **new_fields), reader) if args.inplace: outfile = tmp(args.infile.name) else: outfile = args.outfile with opener(outfile, 'w') as fout: writer = csv.DictWriter(fout, fieldnames, extrasaction='ignore') writer.writeheader() writer.writerows(reader) if args.inplace: os.rename(fout.name, args.infile.name)
def build_parser(parser): parser.add_argument('aligns', nargs = '?', default = sys.stdin, type = Opener('r'), help = 'csvfile of ssearch results') parser.add_argument('-o', '--out', default = sys.stdout, type = Opener('w'), help = 'csv file tallying each error category for each read') parser.add_argument('-m', '--homopolymer-matrix', dest = 'matrix', type = lambda f: writer(Opener('w')(f)), help = 'csv file containing transition matrix of homopolymer lengths') parser.add_argument('-M', '--homopolymer-max', default = 6, type = int, help = 'csv homopolymer length above which counts are binned') parser.add_argument('--step', action = 'store_true', help = 'step through reults (for debugging)') parser.add_argument('-f', '--extra-fields', type = lambda f: parse_extras(f), default = {}, help="extra fields for csv file in form 'name1:val1,name2:val2'") parser.add_argument('--output-alignment', action='store_true', help = 'Include the actual alignment in csv output')
def action(args): extras = parse_extras(args.extra_fields) if args.extra_fields else {} aligns = islice(parse_ssearch36(args.alignments, False), args.limit) if args.min_zscore: aligns = (a for a in aligns if float(a['sw_zscore']) >= args.min_zscore) aligns = groupby(aligns, key=itemgetter('q_name')) if args.top_alignment: aligns = (next(a) for _, a in aligns) else: aligns = (a for _, i in aligns for a in i) # flatten groupby iters if args.rlefile: decoding = {k: v for d in args.rlefile for k, v in d.items()} def decode(aligns): aligns['t_seq'], aligns['q_seq'] = homodecodealignment( aligns['t_seq'], from_ascii(decoding[aligns['t_name']]), aligns['q_seq'], from_ascii(decoding[aligns['q_name']])) return aligns aligns = imap(decode, aligns) if args.print_one: pprint.pprint(aligns.next()) sys.exit() if args.with_diff: aligns = imap(add_diff, aligns) if args.fieldnames: fieldnames = args.fieldnames else: # peek at first row fieldnames top = next(aligns, {}) fieldnames = top.keys() aligns = chain([top], aligns) if extras: fieldnames += extras.keys() aligns = (dict(d, **extras) for d in aligns) writer = csv.DictWriter(args.out, extrasaction='ignore', fieldnames=fieldnames) if args.header: writer.writeheader() for a in aligns: writer.writerow(a)
def build_parser(parser): parser.add_argument('aligns', nargs='?', default=sys.stdin, type=Opener('r'), help='csvfile of ssearch results') parser.add_argument( '-o', '--out', default=sys.stdout, type=Opener('w'), help='csv file tallying each error category for each read') parser.add_argument( '-m', '--homopolymer-matrix', dest='matrix', type=lambda f: writer(Opener('w')(f)), help='csv file containing transition matrix of homopolymer lengths') parser.add_argument( '-M', '--homopolymer-max', default=6, type=int, help='csv homopolymer length above which counts are binned') parser.add_argument('--step', action='store_true', help='step through reults (for debugging)') parser.add_argument( '-f', '--extra-fields', type=lambda f: parse_extras(f), default={}, help="extra fields for csv file in form 'name1:val1,name2:val2'") parser.add_argument('--output-alignment', action='store_true', help='Include the actual alignment in csv output')
default=True, dest='show_header') BASES = set(['A', 'C', 'G', 'T']) def action(args): try: from numpy import mean except ImportError, e: print(e) sys.exit(1) extras = parse_extras(args.extra_fields) if args.extra_fields else {} # TODO: add 'ambiguities' fieldnames = ['name', 'length', 'mean', 'ambig'] fieldnames += extras.keys() stats = DictWriter(args.out, fieldnames=fieldnames) if args.show_header: stats.writeheader() for s in islice(SeqIO.parse(args.fastq, 'fastq'), args.limit): qual = s.letter_annotations["phred_quality"] ambig = len([1 for b in s.seq if b not in BASES]) row = [s.name.replace(':', '_'), len(s), mean(qual), ambig] row += extras.values()