def merge(pair1, pair2, fmt="fq"): if fmt in ("fq", "fastq"): from pyngs.biofile.fastq import parse elif fmt in ("fa", "fna", "fasta", "fsa"): from pyngs.biofile.fasta import parse read1s = parse(pair1) read2s = parse(pair2) for read1, read2 in izip(read1s, read2s): print read1 print read2
def _trim_pair(pair1, pair2, method='bwa', qtype='S', qthres=QTHRESHOLD, lthres=LTHRESHOLD): _trim = get_method(method) fq1s = parse(pair1, fmt=qtype) fq2s = parse(pair2, fmt=qtype) for fq1, fq2 in izip(fq1s, fq2s): mark = 0 start1, length1 = _trim(fq1, qthres) start2, length2 = _trim(fq2, qthres) if length1 < lthres: mark |= 1 elif length2 < lthres: mark |= 2 if mark == GOOD: yield GOOD, fq1[start1:start1+length1], fq2[start2:start2+length2] else: yield mark, fq1, fq2
def split(fname, fmt='fq'): if fmt in ('fq', 'fastq'): from pyngs.biofile.fastq import parse elif fmt in ('fa', 'fna', 'fasta', 'fsa'): from pyngs.biofile.fasta import parse for idx, read in enumerate(parse(fname)): if idx % 2: # read2 print >>sys.stderr, read else: # read1 print >>sys.stdout, read
def _trim_single(fname, method='bwa', qtype='S', qthres=QTHRESHOLD, lthres=LTHRESHOLD): """trim fastq by quality values Arguments: - `fname`: fastq file name - `method`: the way to trim Fastq object - `qtype`: the fastq qulity type - `qthres`: quality cutoff threshold - `lthres`: length cutoff threshold """ _trim = get_method(method=method) for fq in parse(fname, fmt=qtype): start, length = _trim(fq, qthres) if length < lthres: yield BAD1, fq else: yield GOOD, fq[start:start+length]