def find_restriction_sites_in_seq(seq, enzyme): occurrences = [] # Search strategy similar to Bio.Restriction.RestrictionBatch.search(), # which is to wrap the sequence we're searching across FormattedSeq object # and then use the finditer() method. forward_group_name = str(enzyme) if not isinstance(seq, Seq): seq = Seq(seq) fseq = FormattedSeq(seq) occurrence_iterator = fseq.finditer(enzyme.compsite, enzyme.size) occurrence_iterator = fseq.finditer(enzyme.compsite, enzyme.size) for start, match_group in occurrence_iterator: interval = (start, start + enzyme.size) if match_group(forward_group_name): strand = 1 else: strand = -1 # Adjust the interval to be pythonic. interval = tuple([pos - 1 for pos in interval]) # Append the data object representing this occurrence. site_occur_data = { 'enzyme': forward_group_name, 'site': enzyme.site, 'interval': interval, 'strand': strand } occurrences.append(site_occur_data) return occurrences
def test_sequence_object(self): """Test if sequence must be a Seq or MutableSeq object.""" with self.assertRaises(TypeError): seq = FormattedSeq('GATC') seq = FormattedSeq(Seq('TAGC')) seq = FormattedSeq(MutableSeq('AGTC')) seq = FormattedSeq(seq) with self.assertRaises(TypeError): EcoRI.search('GATC') EcoRI.search(Seq('ATGC')) EcoRI.search(MutableSeq('TCAG'))
def test_circular_sequences(self): """Deal with cutting circular sequences.""" parts = EcoRI.catalyse(self.ecosite_seq, linear=False) self.assertEqual(len(parts), 1) locations = EcoRI.search(parts[0], linear=False) self.assertEqual(locations, [1]) parts = KpnI.catalyse(self.kpnsite_seq, linear=False) self.assertEqual(len(parts), 1) locations = KpnI.search(parts[0], linear=False) self.assertEqual(locations, [1]) parts = SmaI.catalyse(self.smasite_seq, linear=False) self.assertEqual(len(parts), 1) locations = SmaI.search(parts[0], linear=False) self.assertEqual(locations, [1]) self.assertEqual( EarI.search(FormattedSeq(Seq('CTCTTCAAAAA')), linear=False), [8]) self.assertEqual( SnaI.search(FormattedSeq(Seq('GTATACAAAAA')), linear=False), [1])
def test_formatted_seq(self): """Test several methods of FormattedSeq.""" self.assertEqual(str(FormattedSeq(Seq('GATC'))), "FormattedSeq(Seq('GATC', Alphabet()), linear=True)") self.assertFalse(FormattedSeq(Seq('GATC')) == FormattedSeq(Seq('TAGC'))) self.assertFalse(FormattedSeq(Seq('TAGC')) == Seq('TAGC')) self.assertTrue(FormattedSeq(Seq('ATGC')) == FormattedSeq(Seq('ATGC'))) linear_seq = FormattedSeq(Seq('T')) self.assertTrue(linear_seq.is_linear()) linear_seq.circularise() self.assertFalse(linear_seq.is_linear()) linear_seq.linearise() circular_seq = linear_seq.to_circular() self.assertFalse(circular_seq.is_linear()) linear_seq = circular_seq.to_linear() self.assertTrue(linear_seq.is_linear())
def test_formatted_seq(self): """Test several methods of FormattedSeq.""" self.assertEqual(str(FormattedSeq(Seq('GATC'))), "FormattedSeq(Seq('GATC'), linear=True)") self.assertFalse( FormattedSeq(Seq('GATC')) == FormattedSeq(Seq('TAGC'))) self.assertFalse(FormattedSeq(Seq('TAGC')) == Seq('TAGC')) self.assertTrue(FormattedSeq(Seq('ATGC')) == FormattedSeq(Seq('ATGC'))) linear_seq = FormattedSeq(Seq('T')) self.assertTrue(linear_seq.is_linear()) linear_seq.circularise() self.assertFalse(linear_seq.is_linear()) linear_seq.linearise() circular_seq = linear_seq.to_circular() self.assertFalse(circular_seq.is_linear()) linear_seq = circular_seq.to_linear() self.assertTrue(linear_seq.is_linear())
def test_non_iupac_letters(self): """Test if non-IUPAC letters raise a TypeError.""" with self.assertRaises(TypeError): seq = FormattedSeq(Seq('GATCZ'))
def findPossOH_byPrimerLength(GGfrags, maxPrimerLength, annealingLength, gBlockMaxSize, enzyme): segments = [] forced_methods = [] for each in GGfrags: seg = [ each.fiveprimeOH + each.fiveprimeExt, each.seq, each.threeprimeExt + each.threeprimeOH ] segments.append(seg) forced_methods.append(each.forced_method) wiggleRoom = [] for i in range(len(segments)): leftWiggle = 0 rightWiggle = 0 #If junction is with vector, skip it because it is fixed if i == 0: pass else: leftSeg = segments[i - 1] rightSeg = segments[i] #constraints on leftWiggle from primer length leftWiggle_primer = maxPrimerLength - len( rightSeg[0]) - annealingLength #constraints on leftWiggle from piece length leftWiggle_pieceLen = len(leftSeg[1] + leftSeg[2]) - wiggleRoom[ i - 1][1] - annealingLength if forced_methods[i] == "gBlocks": leftWiggle_gBlock = gBlockMaxSize - len(rightSeg[0] + rightSeg[1] + rightSeg[2]) - 22 leftWiggle_pieceLen = min(leftWiggle_pieceLen, leftWiggle_gBlock) elif forced_methods[i] == "Oligo Assembly": leftWiggle_oligo = 200 - len(rightSeg[0] + rightSeg[1] + rightSeg[2]) leftWiggle_pieceLen = min(leftWiggle_pieceLen, leftWiggle_oligo) #assign the minimum constraints to leftWiggle leftWiggle = min(leftWiggle_primer, leftWiggle_pieceLen) #constraints on rightWiggle from primer length rightWiggle_primer = maxPrimerLength - len( leftSeg[2]) - annealingLength - 4 #constraints on rightWiggle from piece length #Don't have to substract 4 here, but it will make primer design easier rightWiggle_pieceLen = len(rightSeg[0] + rightSeg[1]) - annealingLength - 4 #assign the minimum constraints to rightWiggle if forced_methods[i - 1] == "gBlocks": rightWiggle_gBlock = gBlockMaxSize - len( leftSeg[0] + leftSeg[1] + leftSeg[2]) - 22 - 4 + wiggleRoom[i - 1][0] rightWiggle_pieceLen = min(rightWiggle_pieceLen, rightWiggle_gBlock) elif forced_methods[i - 1] == "Oligo Assembly": rightWiggle_oligo = 200 - len(leftSeg[0] + leftSeg[1] + leftSeg[2]) - 4 + wiggleRoom[ i - 1][0] rightWiggle_pieceLen = min(rightWiggle_pieceLen, rightWiggle_oligo) rightWiggle = min(rightWiggle_primer, rightWiggle_pieceLen) wiggleRoom.append((-leftWiggle, rightWiggle)) poss_ohs = [] for i in range(1, len(segments)): leftSeg = segments[i - 1] rightSeg = segments[i] leftPiece = leftSeg[0] + leftSeg[1] + leftSeg[2] rightPiece = rightSeg[0] + rightSeg[1] + rightSeg[2] combined = leftPiece.upper() + rightPiece.upper() oh_possibilities = [] poss_string = combined[len(leftPiece) + wiggleRoom[i][0]:len(leftPiece) + wiggleRoom[i][1] + 1] enz = getattr(Restriction, enzyme) if len(enz.search(FormattedSeq(Seq(poss_string)))) > 0: oh_index = None oh_seq = None if poss_string.find(enz.site) > -1: site_location = poss_string.find(enz.site) oh_index = site_location + wiggleRoom[i][0] + 7 oh_seq = poss_string[site_location + 7:site_location + 11] else: site_location = poss_string.find( str(Seq(enz.site).reverse_complement())) oh_index = site_location + wiggleRoom[i][0] - 5 oh_seq = poss_string[site_location - 5:site_location - 1] poss_ohs.append([(oh_seq, oh_index)]) else: for j in range( len(leftPiece) + wiggleRoom[i][0], len(leftPiece) + wiggleRoom[i][1] + 1): oh_possibilities.append( (combined[j:j + 4], j - len(leftPiece))) #If no overhang options were found for a junction, return false if len(oh_possibilities) == 0: return False else: oh_sorted = sorted(oh_possibilities, key=lambda overhang: abs(overhang[1])) poss_ohs.append(oh_sorted) return poss_ohs
def checkInput_afterOptimization(self): f_seq = FormattedSeq(Seq(self.partSeq), True) #Check to make sure BbsI/BsmBI sites aren't present if BbsI.search(f_seq) and BsmBI.search(f_seq): #comment the line below to let through parts with BbsI and BsmBI #be careful, though, these assemblies may be problematic #self.errors.append("Your part contains both BbsI and BsmBI sites and cannot be assembled using golden gate.") pass elif self.leftPartType in [ '1', '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '5', '6', '7' ] and BbsI.search(f_seq): self.errors.append( "Your part contains a BbsI site which must be removed prior to assembly." ) elif self.rightPartType in [ '1', '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '5', '6', '7' ] and BbsI.search(f_seq): self.errors.append( "Your part contains a BbsI site which must be removed prior to assembly." ) elif self.leftPartType in [ '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '6', '7' ] and BsmBI.search(f_seq): self.errors.append( "Your part contains a BsmBI site which must be removed prior to assembly." ) elif self.rightPartType in [ '2a', '2b', '3a', '3b', '3c', '3d', '3e', '4a', '4b', '6', '7' ] and BsmBI.search(f_seq): self.errors.append( "Your part contains a BsmBI site which must be removed prior to assembly." ) #Check to make sure connector parts have BsmBI sites bsmBIFor = self.partSeq.upper().count(BsmBI.site) bsmBIRev = self.partSeq.upper().count( str(Seq(BsmBI.site).reverse_complement())) if str(self.leftPartType) == "1" or str(self.rightPartType) == "1": if bsmBIFor + bsmBIRev != 1: self.warnings.append( "Your type 1 part should have exactly 1 BsmBI site. Consider modifying for multigene assembly." ) if str(self.leftPartType) == "5" or str(self.rightPartType) == "5": if bsmBIFor + bsmBIRev != 1: self.warnings.append( "Your type 5 part should have exactly 1 BsmBI site. Consider modifying for multigene assembly." ) #Warn if ORF has a start codon if self.leftPartType in ["3a"]: if str(self.partSeq.upper())[:3] != "ATG": self.warnings.append("Your part is missing a start codon.") #Warn if ORF has a stop codon if str(self.rightPartType) in ['3a', '3b', '3c', '3d', '3e', '4a']: if len(self.partSeq) % 3 != 0: self.warnings.append( "Your part appears to be out of frame (length is not a multiple of 3). If this is a coding sequence, check to make sure it is correct." ) if Seq(self.partSeq).translate().find("*") > -1: self.warnings.append( "Your part has a stop codon. If it is not removed, the part cannot be used for making N-terminal fusions." ) if len(self.errors) != 0: return False else: return True
def silentMutate(seq, leftIndex, rightIndex, enzyme_list=[]): firstCodonIndex = leftIndex - (leftIndex % 3) numCodons = int(rightIndex / 3) - int(leftIndex / 3) + rightIndex % 3 possCodons = [] # i holds the codon number for i in range(numCodons): currCodon = seq[firstCodonIndex + i * 3:firstCodonIndex + i * 3 + 3] if len(currCodon) == 3: allPossCodons = HsCodonUsage[GeneticCode[currCodon.upper()]] for codon in allPossCodons: # p holds the position within the codon of the mutated base (should always be 2 as written currently) p = singleBPmutation(codon[0], currCodon) if p and leftIndex <= p + i * 3 + firstCodonIndex < rightIndex: possCodons.append([codon[0], codon[1], i, p]) possCodons.sort(key=lambda x: x[1]) if len(possCodons) < 1: raise Exception("Couldn't find a base to mutate silently.") successfullyMutated = False while not successfullyMutated: newCodon = possCodons.pop() oldBase = seq[firstCodonIndex + newCodon[2] * 3 + newCodon[3]:firstCodonIndex + newCodon[2] * 3 + newCodon[3] + 1] newBase = "" if oldBase.islower(): newBase = newCodon[0][newCodon[3]].upper() else: newBase = newCodon[0][newCodon[3]].lower() mutationIndex = firstCodonIndex + newCodon[2] * 3 + newCodon[3] leftBase = seq[mutationIndex - 1:mutationIndex] rightBase = seq[mutationIndex + 1:mutationIndex + 2] front = seq[:mutationIndex] if (leftBase.islower() and newBase.islower()) or (leftBase.isupper() and newBase.isupper()): front = front.swapcase() back = seq[mutationIndex + 1:] if (rightBase.islower() and newBase.islower()) or (rightBase.isupper() and newBase.isupper()): back = back.swapcase() newSeq = front + newBase + back introducedNewSite = False for enzyme_name in enzyme_list: enzyme = getattr(Restriction, enzyme_name) orig = FormattedSeq(Seq(seq)) new = FormattedSeq(Seq(newSeq)) if len(enzyme.search(new)) > len(enzyme.search(orig)): introducedNewSite = True if not introducedNewSite or len(possCodons) < 1: successfullyMutated = True s1 = Seq(seq) s2 = Seq(newSeq) if str(s1.translate()) != str(s2.translate()): raise Exception( "Error: The attempted silent mutation wasn't silent!!!") return newSeq