Пример #1
0
    def render(cls, args, outfile):
        if args.regex and args.regex[0] not in 'exs':
            msg = """
            possible regex forms:
            1. 'm/pattern/modifiers'    - select matching ones
            2. 'x/pattern/modifiers'    - select non-matching ones
            3. 's/pattern/repl/modifiers'   - apply replacement on cmt
            """
            print(msg, file=sys.stderr)
            sys.exit('error: invalid regex')

        for fn in args.filenames:
            recs = fasta.read(fn, args.concise)
            if args.velvet:
                recs = cls.filter_velvet_concise(recs)
            if args.insert_filename:
                ifn = args.insert_filename
                recs = cls.filter_insert_filename(recs, fn, ifn)

            if args.regex:
                try:
                    regexon = Regexon.perl(args.regex)
                except ValueError as e:
                    sys.exit(e)
                recs = cls.filter_regex(recs, regexon)

            fasta.write(outfile, recs, linewidth=args.width)
Пример #2
0
 def filter_insert_filename(cls, recs, filename, position):
     name = filename.split('.')[0]
     regs = {
         'head': r's/^([^\s]+)/{}.\1/'.format(name),
         'tail': r's/^([^\s]+)/\1.{}/'.format(name),
     }
     regexon = Regexon.perl(regs[position])
     return cls.filter_regex(recs, regexon)
Пример #3
0
from __future__ import print_function
import os
import unittest
from molbiox.algor.translate import CodonTable
from molbiox.frame.signature import Sig
from molbiox.frame.regexon import Regexon
from molbiox.kb.translate import get_transl_table

sub = Regexon.new(r'[^a-zA-Z]', '')

test_data = {
    'dna': """
    TTGATGGCTAAGAGTAAAATCTTAAAAAACACACTGGTTCTATATTTTCGTCAAGTTTTG
    ATTGTATTAATTACTCTCTATTCAATGAGAGTTGTATTAAATGAATTAGGTGTGGATGAT
    TTTGGTATTTATAGCGTTGTGGCTGGTTTTGTAACTTTACTTGCATTTTTACCCGGAAGC
    ATGGCGAGTGCAACGCAGCGGTTTTTCTCTTTTGCGATGGGGAAATCGGATATAGTAAAA
    TTAAAGCAAACCTTCAGTGTTAATTTAGTTATGTATACTGGCATAGCCTTGTTAGCATAT
    ATAACATTTCAAACTATCGGATTTTGGTATGTTGATGAATATCTAAAAATACCTCATAAC
    CGCTTTCATGCAGCCTTGGAATTATATCACTATGTGTCTTTATCATTTATTTTTTCAATT
    TTTTCTGCGCCTTTTATCGCGATTTTAATTGCGCACGAAGATATGCACATTTATGCGATC
    TTCTCGGTTTTTGATGCATTTTTAAAACTAGTAGCCGCAATTTCTTTAGACTATGTGAAC
    TATGATTTGTTAGCTTATTATGGAGTGGCTCTTTTGATTGTATCTGGATTGCTTGCTTTC
    GCGTATATTTTTATATGTATAAAGAAATATCCAGAGTGTCAAATGAAAAAGCTTTATTGG
    AGTTCGAGTATACTGAAAGAAATTATTGGTTTCACGATATGGACTTTGCTAGGTCAATTG
    AGCACTGTTTTTAGAAATCAGGCAGTAACTGTTCTTGTAAACCAAATGTTTAATCCTTCA
    ATTGTGGCAGCTCGTGCAATTGCCTTGAATGTGGCTAGTCAAGTTGGAATTTTTTCGAAT
    AATTTAAATACAGGGTTATATCCACCAATTATAAAAGCTTACGCAGCAAATCAAAAAGAG
    GAAATGCTGAGTTTAGTTTTTAATGGTTCTAAACTGACTTTCTTCTTGATGTGGGTATGT
    GCATTACCCATGTTGCTTGAAATGGAAACGATATTAACACTTTGGCTAAAAACACCACCA
    TCAGAAGCGATATTATTTACTCAGTTAGCGATTGTTGAATCCTTGATACTGGCTATAAGC