def render(cls, args, outfile): if args.regex and args.regex[0] not in 'exs': msg = """ possible regex forms: 1. 'm/pattern/modifiers' - select matching ones 2. 'x/pattern/modifiers' - select non-matching ones 3. 's/pattern/repl/modifiers' - apply replacement on cmt """ print(msg, file=sys.stderr) sys.exit('error: invalid regex') for fn in args.filenames: recs = fasta.read(fn, args.concise) if args.velvet: recs = cls.filter_velvet_concise(recs) if args.insert_filename: ifn = args.insert_filename recs = cls.filter_insert_filename(recs, fn, ifn) if args.regex: try: regexon = Regexon.perl(args.regex) except ValueError as e: sys.exit(e) recs = cls.filter_regex(recs, regexon) fasta.write(outfile, recs, linewidth=args.width)
def filter_insert_filename(cls, recs, filename, position): name = filename.split('.')[0] regs = { 'head': r's/^([^\s]+)/{}.\1/'.format(name), 'tail': r's/^([^\s]+)/\1.{}/'.format(name), } regexon = Regexon.perl(regs[position]) return cls.filter_regex(recs, regexon)
from __future__ import print_function import os import unittest from molbiox.algor.translate import CodonTable from molbiox.frame.signature import Sig from molbiox.frame.regexon import Regexon from molbiox.kb.translate import get_transl_table sub = Regexon.new(r'[^a-zA-Z]', '') test_data = { 'dna': """ TTGATGGCTAAGAGTAAAATCTTAAAAAACACACTGGTTCTATATTTTCGTCAAGTTTTG ATTGTATTAATTACTCTCTATTCAATGAGAGTTGTATTAAATGAATTAGGTGTGGATGAT TTTGGTATTTATAGCGTTGTGGCTGGTTTTGTAACTTTACTTGCATTTTTACCCGGAAGC ATGGCGAGTGCAACGCAGCGGTTTTTCTCTTTTGCGATGGGGAAATCGGATATAGTAAAA TTAAAGCAAACCTTCAGTGTTAATTTAGTTATGTATACTGGCATAGCCTTGTTAGCATAT ATAACATTTCAAACTATCGGATTTTGGTATGTTGATGAATATCTAAAAATACCTCATAAC CGCTTTCATGCAGCCTTGGAATTATATCACTATGTGTCTTTATCATTTATTTTTTCAATT TTTTCTGCGCCTTTTATCGCGATTTTAATTGCGCACGAAGATATGCACATTTATGCGATC TTCTCGGTTTTTGATGCATTTTTAAAACTAGTAGCCGCAATTTCTTTAGACTATGTGAAC TATGATTTGTTAGCTTATTATGGAGTGGCTCTTTTGATTGTATCTGGATTGCTTGCTTTC GCGTATATTTTTATATGTATAAAGAAATATCCAGAGTGTCAAATGAAAAAGCTTTATTGG AGTTCGAGTATACTGAAAGAAATTATTGGTTTCACGATATGGACTTTGCTAGGTCAATTG AGCACTGTTTTTAGAAATCAGGCAGTAACTGTTCTTGTAAACCAAATGTTTAATCCTTCA ATTGTGGCAGCTCGTGCAATTGCCTTGAATGTGGCTAGTCAAGTTGGAATTTTTTCGAAT AATTTAAATACAGGGTTATATCCACCAATTATAAAAGCTTACGCAGCAAATCAAAAAGAG GAAATGCTGAGTTTAGTTTTTAATGGTTCTAAACTGACTTTCTTCTTGATGTGGGTATGT GCATTACCCATGTTGCTTGAAATGGAAACGATATTAACACTTTGGCTAAAAACACCACCA TCAGAAGCGATATTATTTACTCAGTTAGCGATTGTTGAATCCTTGATACTGGCTATAAGC