def render(args, outfile): def parse_key(l): regex = re.compile(r'[^>\s\n]+') mat = regex.search(l) return mat and mat.group() or None recgens = [fasta.read(fn, False) for fn in args.filenames] records = itertools.chain(*recgens) if not args.order: keys = {parse_key(l) for l in open(args.listfile)} for rec in records: cmt = rec.cmt.split()[0] if cmt in keys: outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec fasta.write(outfile, outrec) return # simple and fast if args.order: # ^ in case of accidental deletion of that return # preserve order of keys in list file keys = (parse_key(l) for l in open(args.listfile)) selection = collections.OrderedDict((k, None) for k in keys if k) for rec in records: # use title.split()[0] to be tolerent to titles with description cmt = rec.cmt.split()[0] # this consumes a lot of memory if cmt in selection: selection[cmt] = rec for cmt in selection: rec = selection[cmt] outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec fasta.write(outfile, outrec)
def render(cls, args, outfile): if args.regex and args.regex[0] not in 'exs': msg = """ possible regex forms: 1. 'm/pattern/modifiers' - select matching ones 2. 'x/pattern/modifiers' - select non-matching ones 3. 's/pattern/repl/modifiers' - apply replacement on cmt """ print(msg, file=sys.stderr) sys.exit('error: invalid regex') for fn in args.filenames: recs = fasta.read(fn, args.concise) if args.velvet: recs = cls.filter_velvet_concise(recs) if args.insert_filename: ifn = args.insert_filename recs = cls.filter_insert_filename(recs, fn, ifn) if args.regex: try: regexon = Regexon.perl(args.regex) except ValueError as e: sys.exit(e) recs = cls.filter_regex(recs, regexon) fasta.write(outfile, recs, linewidth=args.width)
def render(cls, args, outfile): compl = CharsMapper.create_mapper_compl_dna() for fn in args.filenames: for rec in fasta.read(fn): rec.cmt += ".RC" rec.seq = compl.transcode(rec.seq)[::-1] fasta.write(outfile, rec)
def render(cls, args, outfile): if args.sigma: p = 1 - args.sigma ** 2.0 / args.len if p < 0: errmsg = "sigma should be smaller then sqrt(len)" sys.exit(errmsg) lengths = (binomial(args.len, p) for _ in six.moves.range(args.num)) else: lengths = (args.len for _ in six.moves.range(args.num)) for i, l in enumerate(lengths): cmt = "randseq.{}".format(i) seq = MutSimulator.gen_randseq(l).decode("ascii") fasta.write(outfile, dict(cmt=cmt, seq=seq))
def render(cls, args, outfile): dic = translate.get_transl_table(args.table) tab = CodonTable(dic) for fn in args.filenames: for rec in fasta.read(fn, args.concise): if len(rec.seq) % 3: if not args.quiet: msg = 'warning: lenth of seq not a multiple of 3\n' msg += '^debug:\t{}\t{}\t{}'.format(len(rec.seq), fn, rec.cmt) print(msg, file=sys.stderr) if args.skip: continue l = len(rec.seq) rec.seq = rec.seq[:int(l - l % 3)] rec.seq = tab.translate(rec.seq) fasta.write(outfile, rec)