def get_toeholds(self, n_ths=6, timeout=2): from time import time import stickydesign as sd """ Generate specified stickyends for the Soloveichik DSD approach A given run of stickydesign may not generate toeholds that match the Soloveichik approach. This function reports whether the run was successful or not. Otherwise, the toeholds are matched to respect the backwards strand back-to-back toeholds. Args: n_ths: Number of toeholds to generate. (6) timeout: Time duration allowed for finding toeholds in seconds. (8) Returns: List of toehold strings """ # Give StickyDesign a set of trivial, single-nucleotide toeholds to avoid poor # designs. I'm not sure if this helps now, but it did once. avoid_list = [i * int(self.length + 2) for i in ['a', 't']] # Generate toeholds fdev = self.deviation / self.targetdG notoes = True startime = time() while notoes: try: ends = sd.easyends('TD', self.length, interaction=self.targetdG, fdev=fdev, alphabet='h', adjs=['c', 'g'], maxspurious=self.max_spurious, energetics=self, oldends=avoid_list) notoes = len(ends) < n_ths + len(avoid_list) if (time() - startime) > timeout: e_avg, e_spr, e_dev, n_ends = self.calculate_unrestricted_toehold_characteristics( ) msg = "Cannot make toeholds to user specification! Try target energy:{:.2}, maxspurious:{:.2}, deviation:{:.2}, which makes {:d} toeholds." exception = ToeholdSpecificationError( msg.format(e_avg, e_spr, e_dev, n_ends)) raise exception except ValueError: e_avg, e_spr, e_dev, n_ends = self.calculate_unrestricted_toehold_characteristics( ) msg = "Cannot make toeholds to user specification! Try target energy:{:.2}, maxspurious:{:.2}, deviation:{:.2}, which makes {:d} toeholds." exception = ToeholdSpecificationError( msg.format(e_avg, e_spr, e_dev, n_ends)) raise exception th_cands = ends.tolist() # remove "avoid" sequences th_cands = th_cands[len(avoid_list):] # Make as many end in c as possible th_cands = th_cands[:n_ths] th_all = [th[1:-1] for th in th_cands] ends_full = sd.endarray(th_cands, 'TD') ends_all = sd.endarray(th_all, 'TD') return ends_all.tolist()
def calculate_unrestricted_toehold_characteristics(self): import stickydesign as sd ends = sd.easyends('TD', self.length, alphabet='h', adjs=['c', 'g'], energetics=self) n_ends = len(ends) e_array = sd.energy_array_uniform(ends, self) e_array = e_array[n_ends:, :n_ends] for i in range(n_ends): e_array[i, i] = 0 e_spr = e_array.max() / self.targetdG e_vec_ext = self.th_external_dG(ends) e_vec_int = self.th_internal_dG(ends) e_vec_all = np.concatenate((e_vec_int, e_vec_ext)) e_avg = e_vec_all.mean() e_dev = np.max(np.abs(e_vec_all - self.targetdG)) return e_avg, e_spr, e_dev, n_ends
def getTileSetFromYamlFile(inputFile, stickyEndLength = 5): tiles = [] northGlueLabels = [] southGlueLabels = [] eastGlueLabels = [] westGlueLabels = [] topGlueLabels = [] bottomGlueLabels = [] with open(inputFile, 'r') as f: try: for t in yaml.load_all(f): tile = Tile() tile.name = t['name'] tile.label = t['label'] if 'north' in t['glues']: tile.glues['north'] = Glue() tile.glues['north'].label = t['glues']['north']['label'] tile.glues['north'].strength = t['glues']['north']['strength'] if tile.glues['north'].label not in northGlueLabels: northGlueLabels.append(tile.glues['north'].label) if 'south' in t['glues']: tile.glues['south'] = Glue() tile.glues['south'].label = t['glues']['south']['label'] tile.glues['south'].strength = t['glues']['south']['strength'] if tile.glues['south'].label not in southGlueLabels and tile.glues['south'].label not in northGlueLabels: southGlueLabels.append(tile.glues['south'].label) if 'east' in t['glues']: tile.glues['east'] = Glue() tile.glues['east'].label = t['glues']['east']['label'] tile.glues['east'].strength = t['glues']['east']['strength'] if tile.glues['east'].label not in eastGlueLabels: eastGlueLabels.append(tile.glues['east'].label) if 'west' in t['glues']: tile.glues['west'] = Glue() tile.glues['west'].label = t['glues']['west']['label'] tile.glues['west'].strength = t['glues']['west']['strength'] if tile.glues['west'].label not in westGlueLabels and tile.glues['west'].label not in eastGlueLabels: westGlueLabels.append(tile.glues['west'].label) if 'top' in t['glues']: tile.glues['top'] = Glue() tile.glues['top'].label = t['glues']['top']['label'] tile.glues['top'].strength = t['glues']['top']['strength'] if tile.glues['top'].label not in topGlueLabels: topGlueLabels.append(tile.glues['top'].label) if 'bottom' in t['glues']: tile.glues['bottom'] = Glue() tile.glues['bottom'].label = t['glues']['bottom']['label'] tile.glues['bottom'].strength = t['glues']['bottom']['strength'] if tile.glues['bottom'].label not in bottomGlueLabels and tile.glues['bottom'].label not in topGlueLabels: bottomGlueLabels.append(tile.glues['bottom'].label) tiles.append(tile) except: logging.error("Invalid Yaml Tile Set Format.") sys.exit(1) try: numGlues = len(northGlueLabels) + len(southGlueLabels) + len(eastGlueLabels) + len(westGlueLabels) + len(topGlueLabels) + len(bottomGlueLabels) ends = stickydesign.easyends('DT', stickyEndLength, number=numGlues) print ends.tolist() except: logging.error("Error creating sticky ends.") sys.exit(1) # The following code assigns designed sticky ends to glues base on labels so that matching labels (with same direction) get the same sticky ends, and matching labels with opposite direction # get complementary sticky ends. i = 0 for t in tiles: glues = ['north', 'south', 'east', 'west', 'top', 'bottom'] def setGlueSequences(glue, i): if glue not in t.glues or i >= numGlues: return i comp = {'north':'south', 'south':'north', 'east':'west', 'west':'east', 'top':'bottom', 'bottom':'top'} if len(t.glues[glue].sequence) == 0: t.glues[glue].sequence = intsToSequence(ends[i])[0:] i = i + 1 for t2 in tiles: if comp[glue] in t2.glues and t2.glues[comp[glue]].label == t.glues[glue].label: t2.glues[comp[glue]].sequence = getComplement(t.glues[glue].sequence)[0:] if glue in t2.glues and t2.glues[glue].label == t.glues[glue].label: t2.glues[glue].sequence = t.glues[glue].sequence return i for glue in glues: i = setGlueSequences(glue, i) return tiles
def get_toeholds(n_ths=6, thold_l=int(7.0), thold_e=7.7, e_dev=0.5, m_spurious=0.4, e_module=efj, timeout=8): """ Generate specified stickyends for the Soloveichik DSD approach A given run of stickydesign may not generate toeholds that match the Soloveichik approach. This function reports whether the run was successful or not. Otherwise, the toeholds are matched to respect the backwards strand back-to-back toeholds. Args: ef: Stickydesign easyends object n_ths: Number of signal strand species to generate toeholds for thold_l: Nucleotides in the toeholds thold_e: Target binding energy for the toeholds in kcal/mol e_dev: Allowable energy deviation in kcal/mol m_spurious: Maximum spurious interaction strength as a ratio of target energy Returns: ends_all: Stickydesign stickyends object (e_avg, e_rng): Average and range (max minus min) of toehold energies """ # Give the energetics instance the target energy ef = e_module.energyfuncs(targetdG=thold_e) # Give StickyDesign a set of trivial, single-nucleotide toeholds to avoid poor # designs. I'm not sure if this helps now, but it did once. avoid_list = [i * int(thold_l + 2) for i in ['a', 'c', 't']] # Generate toeholds notoes = True startime = time() while notoes: try: ends = sd.easyends('TD', thold_l, interaction=thold_e, fdev=e_dev / thold_e, alphabet='h', adjs=['c', 'g'], maxspurious=m_spurious, energetics=ef, oldends=avoid_list) notoes = len(ends) < n_ths + len(avoid_list) except ValueError as e: if (time() - startime) > timeout: return -1 noetoes = True th_cands = ends.tolist() # remove "avoid" sequences th_cands = th_cands[len(avoid_list):] # Make as many end in c as possible th_cands = th_cands[:n_ths] th_all = [th[1:-1] for th in th_cands] ends_full = sd.endarray(th_cands, 'TD') ends_all = sd.endarray(th_all, 'TD') return ends_all.tolist()
def create_sticky_end_sequences( tileset, energetics=None ): """\ Create sticky end sequences for a tileset, using stickydesign. This new version should be more flexible, and should be able to keep old sticky ends, accounting for them in creating new ones. Parameters ---------- tileset: the tileset to create sticky ends sequences for. This will be copied and returned, not modified. energetics: the energetics instance to use for the design. If None (default), will use alhambra.designer.default_energetics. Outputs (tileset, new_ends) where new_ends is a list of new end names that were designed. """ if not energetics: energetics = default_energetics # Steps for doing this: # Create a copy of the tileset. newtileset = copy.deepcopy(tileset) # Build a list of ends from the endlist in the tileset. Do this # by creating a named_list, then merging them into it. ends = util.named_list() if 'ends' in newtileset.keys(): ends = util.merge_endlists( ends, newtileset['ends'], fail_immediate=False, in_place=True ) # This is the endlist from the tiles themselves. if 'tiles' in newtileset.keys(): # maybe you just want ends? # this checks for end/complement usage, and whether any # previously-describedends are unused # FIXME: implement #tiletypes.check_end_usage(newtileset['tiles'], ends) endlist_from_tiles = tiletypes.endlist_from_tilelist( newtileset['tiles'] ) ends = util.merge_endlists( ends, endlist_from_tiles, in_place=True ) # Ensure that if there are any resulting completely-undefined ends, they # have their sequences removed. for end in ends: if 'fseq' in end.keys() and end['fseq']=='nnnnnnn': del(end['fseq']) # Build inputs suitable for stickydesign: lists of old sequences for TD/DT, # and numbers of new sequences needed. oldDTseqs = [ end['fseq'] for end in ends \ if 'fseq' in end.keys() and end['type']=='DT' ] oldTDseqs = [ end['fseq'] for end in ends \ if 'fseq' in end.keys() and end['type']=='TD' ] newTDnames = [ end['name'] for end in ends \ if 'fseq' not in end.keys() and end['type']=='TD' ] newDTnames = [ end['name'] for end in ends \ if 'fseq' not in end.keys() and end['type']=='DT' ] # Deal with energetics, considering potential old sequences. # FIXME: EXPLAIN WHAT THIS ABSTRUSE CODE DOES... # TODO: tests needs to test this targets = [] if len(oldDTseqs)==0 and len(oldTDseqs)==0: targets.append( sd.enhist( 'DT', 5, energetics=energetics)[2]['emedian'] ) targets.append( sd.enhist( 'TD', 5, energetics=energetics)[2]['emedian'] ) if len(oldDTseqs)>0: targets.append( energetics.matching_uniform(sd.endarray(oldDTseqs,'DT')) ) if len(oldTDseqs)>0: targets.append( energetics.matching_uniform(sd.endarray(oldTDseqs,'TD')) ) targetint = np.average(targets) # Create new sequences. newTDseqs = sd.easyends( 'TD', 5, number=len(newTDnames), energetics=energetics, interaction=targetint).tolist() newDTseqs = sd.easyends( 'DT', 5, number=len(newDTnames), energetics=energetics, interaction=targetint).tolist() # FIXME: move to stickydesign assert len(newTDseqs) == len(newTDnames) assert len(newDTseqs) == len(newDTnames) # Shuffle the lists of end sequences, to ensure that they're random order, and that ends # used earlier in the set are not always better than those used later. shuffle(newTDseqs) shuffle(newDTseqs) for name,seq in zip(newDTnames,newDTseqs): ends[name]['fseq'] = seq for name,seq in zip(newTDnames,newTDseqs): ends[name]['fseq'] = seq ends.check_consistent() # Ensure that the old and new sets have consistent end definitions, # and that the tile definitions still fit. tiletypes.merge_endlists( tileset['ends'], ends ) tiletypes.merge_endlists( tiletypes.endlist_from_tilelist(newtileset['tiles']), ends ) # Apply new sequences to tile system. newtileset['ends'] = ends return (newtileset, newTDnames+newDTnames)
'--verbose', action='store_true', help='Print sets and toehold sequence') parser.add_argument('-g', '--dGtarget', default=0, help='Wanted dGTarget') args = parser.parse_args() sets = args.sets number = args.number endlength = args.length sys_file = args.sys_file interaction = args.dGtarget for t in range(sets): # easyends finds the toehold set toeholds=stickydesign.easyends('S',endlength ,number=number, maxspurious=0.4,interaction=interaction, \ tries=1,energetics=stickydesign.EnergeticsBasic(temperature=37), adjs=['c','g'],alphabet='h') # The EnergeticsBasic() model is the most suitable for toeholds, even if it contrasts the README.md Use section. #print("nt = {'a': 0, 'c': 1, 'g': 2, 't': 3}") ###Convert numbers to DNA as toeholds is an array of numbers insted nucletoids toes_list = [] # an array containing toeholds in 'AATTCC' format toes_seq = [ ] ## an array containing toeholds in 'A','A','T','T','C','C' format for i in range(len(toeholds)): toes = [] b = [] for j in range(len(toeholds[i])): if toeholds[i][j] == 0: toes.append('A') elif toeholds[i][j] == 1: toes.append('C')
ef = energetics.energyfuncs() ef.targetdG = 7.7 reps = 3 th_e = 7.7 max_ths = 20 th_len = 7 for i in np.arange(n): spurmx = spurmx_vec[i] for j in np.arange(n): deviat = deviat_vec[j] convenience_vec = np.zeros(reps) for x in np.arange(reps): # Generate three first toeholds try: ends = sd.easyends('TD', th_len, max_ths, interaction=th_e, fdev=deviat / th_e, alphabet='h', adjs=['h', 'd'], maxspurious=spurmx / th_e, energyfuncs=ef) convenience_vec[x] = len(ends) except ValueError as e: convenience_vec[x] = 0 result_mat[i, j] = convenience_vec.mean() np.savetxt('available_ths.csv', result_mat, delimiter=',')