示例#1
0
    def render(args, outfile):
        def parse_key(l):
            regex = re.compile(r'[^>\s\n]+')
            mat = regex.search(l)
            return mat and mat.group() or None

        recgens = [fasta.read(fn, False) for fn in args.filenames]
        records = itertools.chain(*recgens)

        if not args.order:
            keys = {parse_key(l) for l in open(args.listfile)}
            for rec in records:
                cmt = rec.cmt.split()[0]
                if cmt in keys:
                    outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec

                    fasta.write(outfile, outrec)
            return  # simple and fast

        if args.order:  # ^ in case of accidental deletion of that return
            # preserve order of keys in list file
            keys = (parse_key(l) for l in open(args.listfile))
            selection = collections.OrderedDict((k, None) for k in keys if k)

            for rec in records:
                # use title.split()[0] to be tolerent to titles with description
                cmt = rec.cmt.split()[0]
                # this consumes a lot of memory
                if cmt in selection:
                    selection[cmt] = rec
            for cmt in selection:
                rec = selection[cmt]
                outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec
                fasta.write(outfile, outrec)
示例#2
0
    def render(cls, args, outfile):
        if args.regex and args.regex[0] not in 'exs':
            msg = """
            possible regex forms:
            1. 'm/pattern/modifiers'    - select matching ones
            2. 'x/pattern/modifiers'    - select non-matching ones
            3. 's/pattern/repl/modifiers'   - apply replacement on cmt
            """
            print(msg, file=sys.stderr)
            sys.exit('error: invalid regex')

        for fn in args.filenames:
            recs = fasta.read(fn, args.concise)
            if args.velvet:
                recs = cls.filter_velvet_concise(recs)
            if args.insert_filename:
                ifn = args.insert_filename
                recs = cls.filter_insert_filename(recs, fn, ifn)

            if args.regex:
                try:
                    regexon = Regexon.perl(args.regex)
                except ValueError as e:
                    sys.exit(e)
                recs = cls.filter_regex(recs, regexon)

            fasta.write(outfile, recs, linewidth=args.width)
示例#3
0
文件: revcompl.py 项目: xinbo/molbiox
 def render(cls, args, outfile):
     compl = CharsMapper.create_mapper_compl_dna()
     for fn in args.filenames:
         for rec in fasta.read(fn):
             rec.cmt += ".RC"
             rec.seq = compl.transcode(rec.seq)[::-1]
             fasta.write(outfile, rec)
示例#4
0
文件: randseq.py 项目: xinbo/molbiox
 def render(cls, args, outfile):
     if args.sigma:
         p = 1 - args.sigma ** 2.0 / args.len
         if p < 0:
             errmsg = "sigma should be smaller then sqrt(len)"
             sys.exit(errmsg)
         lengths = (binomial(args.len, p) for _ in six.moves.range(args.num))
     else:
         lengths = (args.len for _ in six.moves.range(args.num))
     for i, l in enumerate(lengths):
         cmt = "randseq.{}".format(i)
         seq = MutSimulator.gen_randseq(l).decode("ascii")
         fasta.write(outfile, dict(cmt=cmt, seq=seq))
示例#5
0
 def render(cls, args, outfile):
     dic = translate.get_transl_table(args.table)
     tab = CodonTable(dic)
     for fn in args.filenames:
         for rec in fasta.read(fn, args.concise):
             if len(rec.seq) % 3:
                 if not args.quiet:
                     msg = 'warning: lenth of seq not a multiple of 3\n'
                     msg += '^debug:\t{}\t{}\t{}'.format(len(rec.seq), fn, rec.cmt)
                     print(msg, file=sys.stderr)
                 if args.skip:
                     continue
                 l = len(rec.seq)
                 rec.seq = rec.seq[:int(l - l % 3)]
             rec.seq = tab.translate(rec.seq)
             fasta.write(outfile, rec)