def save_moves(sequence, s1, s2, dataset, indirect_moves, description): output_file = "./sample_seqs/indirect_db.csv" direct_search_width=1000 print ("saving:") direct_result = pathfinder.pathfinder(sequence, s1, s2, search_width=direct_search_width) direct_moves = [(i[0], i[1]) for i in direct_result.paths[0]] print ("direct path:") en_direct = print_moves(sequence, s1, s2, direct_moves) print ("indirect path:") en_indirect = print_moves(sequence, s1, s2, indirect_moves) # en_direct = print_moves(sequence, s1, s2, direct_moves) # en_indirect = print_moves(sequence, s1, s2, indirect_moves) # columns = ["sequence","s1","s2", "en_direct", "en_indirect", "direct_moves","indirect_moves","description"] # df = pd.DataFrame(columns=columns) # df.loc[0] = [sequence,s1,s2,en_direct, en_indirect, direct_moves,indirect_moves,description] # only save if indirect en is lower than direct en if en_indirect < en_direct: rna_db = RNA_db(dbname='./sample_db/rna_samples.sqlite') rna_db.insert_path(sequence, s1, s2, dataset, indirect_moves, description)
def save_moves(sequence, s1, s2, dataset, indirect_moves, description): direct_search_width = 1000 direct_result = pathfinder.pathfinder(sequence, s1, s2, search_width=direct_search_width) direct_moves = [(i[0], i[1]) for i in direct_result.paths[0]] print("direct path:") en_direct = print_moves(sequence, s1, s2, direct_moves) print("indirect path:") en_indirect = print_moves(sequence, s1, s2, indirect_moves) # only save if indirect en is lower than direct en if en_indirect < en_direct: rna_db = RNA_db(dbname='./sample_db/rna_samples.sqlite') rna_db.insert_path(sequence, s1, s2, dataset, indirect_moves, description)
def pathfinder(sequence, s1, s2, verbose=False, output=False, search_width = None, section = None): # prune off sections which will currently not be regarded / later merged if section: if len(section)==2: start = section[0] end = section[1] s1 = '.'*start + s1[start:end] + '.'*(len(s1)-end) s2 = '.'*start + s2[start:end] + '.'*(len(s1)-end) if len(section)==4: # print ('section 4', section) start = section[0] mid1 = section[1] mid2 = section[2] end = section[3] s1 = '.'*start + s1[start:mid1] + '.'*(mid2-mid1) + s1[mid2:end] + '.'*(len(s1)-end) s2 = '.'*start + s2[start:mid1] + '.'*(mid2-mid1) + s2[mid2:end] + '.'*(len(s1)-end) # print (s1) # print (s2) start = time.time() # Set model details (by Stefan) md = RNA.md() md.temperature = 37.0 # md.temperature = 22.0 # How to treat "dangling end" energies for bases adjacent to helices in free ends and multi-loops. # md.dangles = 0 md.dangles = 2 md.logML = 0 md.special_hp = True md.noGU = False md.noGUclosure = False fc = RNA.fold_compound(sequence, md) # fc = RNA.fold_compound(sequence) bpd = RNA.bp_distance(s1, s2) if verbose: print(f"Base-pair distance: {bpd}") max_energy = None # Specify upper bound for barrier energy (kcal/mol). # search_width = None # Adjust upper bound for findpath search. if search_width is None: search_width = 2 * bpd paths = fc.path_findpath(s1, s2, width=search_width) sE = float('-inf') # saddle energy s_pos = 0 en_min = float('inf') # minimum, assuming we get below E1 current_structure = "" # compare changes to previous move def str_compare(s1, s2): for i, (c1, c2) in enumerate(zip(s1, s2)): if c1 != c2: if c1 == ".": yield i else: yield -i moves_str = [] moves_en = [] moves_pos = [] for i, step in enumerate(paths): moves = [i for i in str_compare('.'+current_structure, '.'+step.s)] current_structure = step.s moves_str.append(step.s) moves_en.append(step.en) if moves==[]: moves_pos.append((0,0,step.en)) else: moves_pos.append((moves[0],moves[1],step.en)) if step.en > sE: sE = step.en s_pos = i e1 = round(fc.eval_structure(s1), 2) e2 = round(fc.eval_structure(s2), 2) barrier = sE - e1 # minimum between e1 and saddle energy (max energy) if len(moves_en[0:s_pos])==0: en_min_a = e1 else: en_min_a = min(moves_en[0:s_pos]) if len(moves_en[s_pos+1:])<=1: en_min_b = e2 else: en_min_b = min(moves_en[s_pos+1:]) # en_min_a = en_min-e1 end = time.time() runtime = end - start if verbose: print_moves(sequence, s1, s2, moves_pos, move_color='\033[92m') return_path = path_class() return_path.max_en=sE return_path.max_en_pos=s_pos return_path.min_en_a=en_min_a return_path.min_en_b=en_min_b return_path.e1_en=e1 return_path.e2_en=e2 return_path.paths = [moves_pos] return_path.runtime = runtime return return_path
def find_path(sequence, start_list, destination_list, max_energy=float("inf"), results=1, search_width=1000, Debug=False, Verbose=False): """ indirect findpath, main function settings: indirect_iterations, default value 2 (means 1 direct pass, 1 indirect pass) """ if Debug: coloredlogs.DEFAULT_LOG_FORMAT = '%(levelname)s %(message)s' coloredlogs.install(level='DEBUG') s1 = start_list s2 = destination_list[0] add_moves = [] fp_class = Fp_class(sequence, s1, s2, add_moves) current_width = search_width # for the time being, only 1 pass # generate move list fp_class.destinations = [[i, 0] for i in destination_list] fp_class.min_bp_dist = float("inf") fp_class.max_bp_dist = float("-inf") print_d(s1) print_d("destinations:") move_list = set() fp_class.available = dict() all_available = [set()] * len(destination_list) for a, s in enumerate(destination_list): bp_dist = RNA.bp_distance(s, s1) fp_class.destinations[a] = (s, bp_dist) fp_class.destinations.sort(key=lambda x: -x[1]) for a, (s, bp_dist) in enumerate(fp_class.destinations): current_move_list = set() if bp_dist < fp_class.min_bp_dist: fp_class.min_bp_dist = bp_dist if bp_dist > fp_class.max_bp_dist: fp_class.max_bp_dist = bp_dist c_pt = fp_class.p_tables[s] pt1 = fp_class.p_tables[s1] for i in range(1, c_pt[0]): if pt1[i] != c_pt[i]: if i < pt1[i]: # bp deletion move_list.add((-i, -pt1[i])) current_move_list.add((-i, -pt1[i])) if i < c_pt[i]: # add bp move_list.add((i, c_pt[i])) current_move_list.add((i, c_pt[i])) # current_move_list.add((-69, -79)) # current_move_list.add((-70, -78)) # current_move_list.add((-71, -77)) # current_move_list.add((69, 79)) # current_move_list.add((70, 78)) # current_move_list.add((71, 77)) # current_move_list.add((26, 36)) # current_move_list.add((27, 35)) fp_class.available[a] = current_move_list all_available[a] = current_move_list.copy() print_d("dest:", a, s, bp_dist) print_d(len(current_move_list), current_move_list) # break destination_list = [i[0] for i in fp_class.destinations] # all of them combined fp_class.moves_add = move_list # One-Liner to intersect a list of sets intersection = all_available[0].intersection(*all_available) additional_moves = set() for a in all_available: for current_set in a: if current_set not in intersection: # add inverse move a = -current_set[0] b = -current_set[1] additional_moves.add((a, b)) # print(intersection) # print ("add moves:", additional_moves) # add inverse moves to undo bs... for k, (s, bp_dist) in enumerate(fp_class.destinations): for a, b in additional_moves: if (-a, -b) in fp_class.available[k]: fp_class.available[k].add((a, b)) # print (-a,-b) # destinations: start with the largest bp dist # print (fp_class.destinations) # all_moves[0] = move_list # combined # fp_class.available = np.zeros([len(destination_list), len(move_list)]).astype(bool) # # which moves are available where # for b, t in enumerate(move_list): # for a in range(len(destination_list)): # fp_class.available[a][b] = t in all_moves[a] # fp_class.available = all_moves # print (fp_class.available) max_en = 9999999999 print_d("~FWD~~~~~~") result_dict = defaultdict(lambda: 9999) # fwd path_generator = fp_class.find_path_once( start_list, destination_list, max_energy, current_width, mode=True, sort_min=False, Debug=Debug, Verbose=Verbose) for path in path_generator: max_en = path.saddle_e end_s = RNA.db_from_ptable(path.p_table) # ? if end_s not in destination_list: continue start_index = path.start_pt start_s = fp_class.destinations[start_index][0] # print_moves(sequence, s1, start_s, path.moves) if max_en < result_dict[end_s]: result_dict[end_s] = max_en # check variations start_s_index = destination_list.index(start_s) print_d("index:", start_s_index) for a in range(len(fp_class.available)): if a == start_s_index: continue start_s_moves = fp_class.available[start_s_index] other_moves = fp_class.available[a] diff = start_s_moves-other_moves # print ("~~", a, diff) print_d("to:", a, diff) # try to remove these moves, see if we get a better result moves_i_j = [(x[0], x[1]) for x in path.moves] new_max_en, last_s = print_moves(sequence, s1, start_s, moves_i_j, Verbose=False, ignore_moves=diff) if new_max_en < max_en: if RNA.bp_distance(last_s, destination_list[a]) != 0: new_max_en_add = pathfinder.pathfinder( sequence, last_s, destination_list[a], search_width=search_width, verbose=False).max_en if new_max_en_add < new_max_en: print("better result:", new_max_en, RNA.bp_distance(last_s, destination_list[a]), "no effect") else: print("better result:", new_max_en, RNA.bp_distance(last_s, destination_list[a])) # fp_class.available[a] # for key in result_dict: # print("fwd:", key, result_dict[key]) # # if Debug: coloredlogs.DEFAULT_LOG_FORMAT = '%(levelname)s %(message)s' coloredlogs.install(level='DEBUG') print_d("~BWD~~~~~~") # max_energy = 999 # result_dict = defaultdict(lambda: 9999) # path_generator = fp_class.find_path_once( # start_list, destination_list, max_energy, current_width, mode=False, sort_min=False, Debug=Debug, Verbose=Verbose) # for path in path_generator: # max_en = path.saddle_e # end_s = RNA.db_from_ptable(path.p_table) # start_index = path.start_pt # start_s = fp_class.destinations[start_index][0] # # start_s = 3 # # print (start_s) # # print ("f", start_s, max_en, path.moves) # # print_moves(sequence, start_s, end_s, path.moves) # for e in path.embedded: # embedded_en = path.embedded[e] # if embedded_en <= max_en: # em_s = fp_class.destinations[e][0] # if max_en < result_dict[em_s]: # result_dict[em_s] = max_en # # print ("e", e, path.embedded[e]) # if max_en < result_dict[start_s]: # result_dict[start_s] = max_en # BWD END return_energy = float("inf") for key in result_dict: print("final:", key, result_dict[key]) if result_dict[key] < return_energy: return_energy = result_dict[key] # code here to not just return nothing return return_energy
rest = rest.replace('[', '').replace(']', '') rest = rest.replace('{', '').replace('}', '') rest = rest.replace(',', '') rest = rest.split() if len(rest)!=3: # print (line) continue i, j, en = rest i = int(i) j = int(j) en = float(en) if en > max_en: max_en = round(en, 2) moves.append((i, j, en)) print_moves(sequence, s1, s2, moves) sequence = row.sequence s1 = row.s1 s2 = row.s2 print ("\nregular findpath result:") pathfinder_result = pathfinder.pathfinder(sequence, s1, s2, search_width=search_width, verbose=True) # print ("regular findpath result:", pathfinder_result.max_en) # if index>0: # break
def find_path(sequence, s1, s2, add_moves=[], results=1, indirect_iterations=2, search_width=1000, Debug=False, Verbose=False): """ indirect findpath, main function settings: indirect_iterations, default value 2 (means 1 direct pass, 1 indirect pass) """ fp_class = Fp_class(sequence, s1, s2, add_moves) max_energy = float("inf") current_width = search_width # for the time being, only 1 pass indirect_moves = [add_moves] best_max_en = float("inf") best_max_pos = 0 best_path = False best_indirect_moves = [] best_ptables = [] best_indirect_move_count = 0 for iteration in range(indirect_iterations): # first pass is direct, then iterate over different indirect moves next_indirect_moves = [] for indirect_move in indirect_moves: if indirect_move == None: indirect_move = [] # first iteration only indirect_move = set(best_indirect_moves) | set(indirect_move) if Verbose: print("Iteration", iteration, "launching findpath with addtional moves:", indirect_move) # add a set of optional indirect moves to the direct move set, e.g. add {(6, 12), (5, 13)} fp_class.moves_add = indirect_move # main findpath_once function call with supplied settings path_generator = fp_class.find_path_once(s1, s2, max_energy, current_width, mode=True, sort_min=False, Debug=Debug, Verbose=Verbose) # iterate over all currently found paths. somepaths might not be optimal for path in path_generator: # path is an intermediate struct, containing saddle_e, moves, etc. if path.saddle_e < max_energy: max_energy = path.saddle_e e_0 = fp_class.evals[s1] e_1 = fp_class.evals[s2] current_ptable = list(fp_class.p_tables[s1]) ptables = [ ] # list of ptables according to moves, should not be needed later # workaround convert moves to list of ptables, del this later current_used_indirect_moves = [ ] # which indirect moves are actually used current_moves = [] for pos, (i, j, e) in enumerate(path.moves): current_moves.append((i, j)) if (i, j) in indirect_move: current_used_indirect_moves.append((i, j)) if i < 0: current_ptable[-i] = 0 current_ptable[-j] = 0 else: current_ptable[i] = j current_ptable[j] = i ptables.append(current_ptable.copy()) # save best path for print output / next iteration if max_energy < best_max_en or \ (max_energy == best_max_en and len(current_used_indirect_moves) < best_indirect_move_count): # or save best path with same energy, but fewer indirect moves: take this one instead best_indirect_move_count = len(current_used_indirect_moves) best_max_en = max_energy best_path = path.moves.copy() best_ptables = ptables.copy() best_indirect_moves = current_used_indirect_moves.copy() barrier = max_energy - e_0 if Verbose: print( f"New best result: {max_energy:6.2f} kcal/mol | B: {barrier:6.2f} kcal/mol | E[start]:{e_0:6.2f} E[end]:{e_1:6.2f} | additional moves: {current_used_indirect_moves}" ) if iteration + 1 != indirect_iterations: # dont find new indirect moves in the last iteration for current_indirect_moves in fp_class.find_stack( ptables, current_moves): # print ("find move", current_indirect_moves, next_indirect_moves) if current_indirect_moves not in next_indirect_moves: next_indirect_moves.append(current_indirect_moves) # print path during last iteration if iteration + 1 == indirect_iterations and Verbose: print_moves(sequence, s1, s2, best_path) # prepare for next indirect iteration else: # print ("found indirect moves:") # print (next_indirect_moves) indirect_moves = next_indirect_moves # best_path = [(0, 0), (-31, -45), (-35, -41), (-34, -42), (-33, -43), # (-32, -44), (-5, -11), (-4, -12), (6, 12), (7, 11), # (-3, -13), (5, 13), (-2, -14), (-1, -15), (4, 42), (3, 43), (2, 44), (1, 45), # (-5, -13),(5, 41), (-6, -12), (6, 40), (-7, -11), (7, 39), # (8, 38), (9, 37), (10, 36)] # post processing - check every indirect move - # confirm if they're actually useful (linear time) best_path_indirect_moves = [] # check which indirect moves were actually used filtered = [(i[0], i[1]) for i in best_path] for i, j in filtered: if (-i, -j) in filtered and ( -i, -j) not in best_path_indirect_moves and (i, j) != (0, 0): best_path_indirect_moves.append((i, j)) indirect_moves = best_path_indirect_moves.copy() for i, j in best_path_indirect_moves: current_test = [] for m, n, e in best_path: # if (-i, -j) in filtered: continue if (m, n) in filtered and (-m, -n) in filtered: continue current_test.append((m, n)) test_en = print_moves(sequence, s1, s2, current_test, Verbose=False) if test_en == best_max_en: filtered.remove((i, j)) filtered.remove((-i, -j)) indirect_moves.remove((i, j)) # code here to not just return nothing return best_max_en, filtered, indirect_moves
def find_path(sequence, s_ref, destination_list, max_energy=float("inf"), results=1, search_width=1000, Debug=False, Verbose=False): """ indirect findpath, main function settings: indirect_iterations, default value 2 (means 1 direct pass, 1 indirect pass) """ if Debug: coloredlogs.DEFAULT_LOG_FORMAT = '%(levelname)s %(message)s' coloredlogs.install(level='DEBUG') s1 = s_ref pt1 = list(RNA.ptable_from_string(s1)) # generate move list destination_list.sort() moves_dest = [set() for i in destination_list] moves_dest_add = [set() for i in destination_list] bp_dist_dest = [None for i in destination_list] pt_dest = [None for i in destination_list] min_bp_dist = float("inf") max_bp_dist = float("-inf") print_d(s_ref) print_d("destinations:") for a, s in enumerate(destination_list): bp_dist = RNA.bp_distance(s, s_ref) current_move_list = set() c_pt = list(RNA.ptable_from_string(s)) pt_dest[a] = c_pt for i in range(1, c_pt[0]): if pt1[i] != c_pt[i]: if i < pt1[i]: # bp deletion current_move_list.add((-i, -pt1[i])) if i < c_pt[i]: # add bp current_move_list.add((i, c_pt[i])) moves_dest[a] = current_move_list.copy() bp_dist = len(current_move_list) bp_dist_dest[a] = bp_dist if bp_dist < min_bp_dist: min_bp_dist = bp_dist if bp_dist > max_bp_dist: max_bp_dist = bp_dist print_d("dest:", a, s, bp_dist) print_d(len(current_move_list), current_move_list) # break # One-Liner to intersect a list of sets all_moves = moves_dest[0].union(*moves_dest) common_moves = moves_dest[0].intersection(*moves_dest) print_d("common moves:", common_moves, len(common_moves)) additional_moves = set() for a in range(len(moves_dest)): for current_set in moves_dest[a]: if current_set not in common_moves: # add inverse move i = -current_set[0] j = -current_set[1] moves_dest_add[a].add((-i, -j)) # additional_moves.add((-a,-b)) print_d("non-common moves", a, moves_dest_add[a], len(moves_dest_add[a])) # generate s_common from intersection moves pt_common = generate_end_s(pt1, common_moves) s_common = RNA.db_from_ptable(pt_common) print("bp dist common", RNA.bp_distance(s1, s_common), len(common_moves)) print_d(s1) print_d(s_common) search_width = 20 # Verbose = False Verbose = True r = pathfinder_cpp.find_path(sequence, s1, s_common, search_width=search_width, Verbose=Verbose) common_move_list = list(reversed( r.paths))[0:-1] # back to front, skip the 0,0 move common_move_list = r.paths[1:] print("common moves:", len(common_move_list), common_move_list) add_moves = moves_dest_add[0] print("add moves", len(add_moves), add_moves) print(moves_dest[a] - common_moves) pt_d0 = generate_end_s(pt_dest[0], common_moves) s_d0 = RNA.db_from_ptable(pt_d0) s0 = destination_list[0] r0 = pathfinder_cpp.find_path(sequence, s_common, s0, search_width=search_width, Verbose=Verbose) # r = pathfinder_cpp.find_path(sequence, s1, s0, search_width=search_width, Verbose=Verbose) # return # import merge_cpp # merge_search_width = 10 # merge_cpp.merge_findpath(sequence, s1, s0, r, r0, [],[], Debug=Debug, Verbose=Verbose, merge_search_width=merge_search_width) print("fp once") all_paths = [] for path in fp_once(sequence, s1, s0, max_energy, search_width, common_move_list, add_moves, True): all_paths.append(path) all_paths.sort(key=lambda x: x.saddle_e) # print (all_paths[0]) print_moves(sequence, s1, s2, all_paths[0].moves) return intersection = intersection.union(additional_moves) # print(intersection) # print ("add moves:", additional_moves) fp_class.moves_add = intersection.copy() # add inverse moves to undo bs... for k, (s, bp_dist) in enumerate(fp_class.destinations): for a, b in additional_moves: if (-a, -b) in fp_class.available[k]: fp_class.available[k].add((a, b)) # print (-a,-b) # destinations: start with the largest bp dist # print (fp_class.destinations) # all_moves[0] = move_list # combined # fp_class.available = np.zeros([len(destination_list), len(move_list)]).astype(bool) # # which moves are available where # for b, t in enumerate(move_list): # for a in range(len(destination_list)): # fp_class.available[a][b] = t in all_moves[a] # fp_class.available = all_moves # print (fp_class.available) # max_en = 9999999999 print_d("~FWD~~~~~~") result_dict = defaultdict(lambda: 9999) # fwd path_generator = fp_class.find_path_once(start_list, destination_list, max_energy, current_width, mode=True, sort_min=False, Debug=Debug, Verbose=Verbose) for path in path_generator: max_en = path.saddle_e end_s = RNA.db_from_ptable(path.p_table) # ? # if end_s not in destination_list: # continue start_index = path.start_pt start_s = fp_class.destinations[start_index][0] # print_moves(sequence, s1, start_s, path.moves) if max_en < result_dict[end_s]: result_dict[end_s] = max_en for key in result_dict: print("fwd:", key, result_dict[key]) # # if Debug: coloredlogs.DEFAULT_LOG_FORMAT = '%(levelname)s %(message)s' coloredlogs.install(level='DEBUG') print_d("~BWD~~~~~~") max_energy = 999 # result_dict = defaultdict(lambda: 9999) # path_generator = fp_class.find_path_once( # start_list, destination_list, max_energy, current_width, mode=False, sort_min=False, Debug=Debug, Verbose=Verbose) # for path in path_generator: # max_en = path.saddle_e # end_s = RNA.db_from_ptable(path.p_table) # start_index = path.start_pt # start_s = fp_class.destinations[start_index][0] # # start_s = 3 # # print (start_s) # # print ("f", start_s, max_en, path.moves) # # print_moves(sequence, start_s, end_s, path.moves) # for e in path.embedded: # embedded_en = path.embedded[e] # if embedded_en <= max_en: # em_s = fp_class.destinations[e][0] # if max_en < result_dict[em_s]: # result_dict[em_s] = max_en # # print ("e", e, path.embedded[e]) # if max_en < result_dict[start_s]: # result_dict[start_s] = max_en # BWD END return_energy = float("inf") for key in result_dict: print("final:", key, result_dict[key]) if result_dict[key] < return_energy: return_energy = result_dict[key] # code here to not just return nothing return return_energy