def trimPrimers(primer, verbose): """ @param primer: A BioPython C{Bio.Seq} primer sequence. @param verbose: A C{bool}, if C{True} output additional information about how often and where primers were found. """ reads = [] absentCount = forwardCount = reverseCount = count = 0 for seqRecord in SeqIO.parse(sys.stdin, 'fasta'): count += 1 start, end = findPrimerBidiLimits(primer, seqRecord.seq) if start == 0: if end == len(seqRecord): absentCount += 1 else: reverseCount += 1 else: forwardCount += 1 if end != len(seqRecord): reverseCount += 1 reads.append(seqRecord[start:end]) if verbose: print(('Read %d sequences. Found forward: %d, ' 'Found reversed: %d, Absent: %d') % (count, forwardCount, reverseCount, absentCount), file=sys.stderr) SeqIO.write(reads, sys.stdout, 'fasta')
def testFoundMultiple(self): """ If a primer is found multiple times, the correct value must be returned. """ seq = Seq('ACGTACGT', IUPAC.unambiguous_dna) self.assertEqual((7, 8), findPrimerBidiLimits('ACG', seq))
def trimPrimers(primer, verbose): """ @param primer: A BioPython C{Bio.Seq} primer sequence. @param verbose: A C{bool}, if C{True} output additional information about how often and where primers were found. """ reads = [] absentCount = forwardCount = reverseCount = count = 0 for seqRecord in SeqIO.parse(sys.stdin, 'fasta'): count += 1 start, end = findPrimerBidiLimits(primer, seqRecord.seq) if start == 0: if end == len(seqRecord): absentCount += 1 else: reverseCount += 1 else: forwardCount += 1 if end != len(seqRecord): reverseCount += 1 reads.append(seqRecord[start:end]) if verbose: print(( 'Read %d sequences. Found forward: %d, ' 'Found reversed: %d, Absent: %d') % ( count, forwardCount, reverseCount, absentCount), file=sys.stderr) SeqIO.write(reads, sys.stdout, 'fasta')
def testOverlappingBackwards(self): """ If a primer is present twice backwards but is overlapping, only the first instance should be returned. """ seq = Seq('GTTT', IUPAC.unambiguous_dna) self.assertEqual((0, 1), findPrimerBidiLimits('AA', seq))
def testNotFound(self): """ If a primer is not found, the returned offsets must include the whole sequence. """ seq = Seq('ACGT', IUPAC.unambiguous_dna) self.assertEqual((0, 4), findPrimerBidiLimits('BLAH', seq))
def testOverlappingForwards(self): """ If a primer is present twice forwards but is overlapping, only the first instance should be returned. """ seq = Seq('GAAA') self.assertEqual((3, 4), findPrimerBidiLimits('AA', seq))
def testFoundEndStart(self): """ If a primer is found in both directions in a sequence (end of the forward sequence, start of the reverse complement), the correct value must be returned. """ seq = Seq('ACGT', IUPAC.unambiguous_dna) self.assertEqual((4, 4), findPrimerBidiLimits('GT', seq))
def testFoundStartEnd(self): """ If a primer is found in both directions in a sequence (start of the forward sequence, end of the reverse complement), the correct value must be returned. """ seq = Seq('ACGT') self.assertEqual((2, 2), findPrimerBidiLimits('AC', seq))
def testLonger(self): """ Test a longer sequence. """ seq = Seq('AAAAAAAAAA' 'GGGGGGGGGG' 'AAAAAAAAAA' 'AAAAAAAAAA', IUPAC.unambiguous_dna) self.assertEqual((20, 40), findPrimerBidiLimits('GGGGGGGGGG', seq))