def create_pathways_for_test(self, target_product): network = Network() network.settings.update({ "combine_enantiomers": True, "remove_simple": True, "similarity_score_threshold": self.specificity_threshold, "num_enzymes": 1, "max_nodes": self.max_nodes, "prune_steps": 1, "only_postitive_enzyme_data": self.only_positive_specificity, 'prune_on_substrates': False, 'max_reactions': False, 'include_experimental': False, 'include_two_step': False }) network.generate(target_product, self.num_steps) bfs = BFS(network=network, max_pathways=self.max_pathways, min_weight=self.min_weight) bfs.run() pathways = bfs.get_pathways() if len(pathways) == self.max_pathways: self.log(f" ~max pathways reached") #pathways = group_pathways(pathways) return pathways
def apply_chemical_steps_molecule(): smiles = request.form['smiles'] smiles = rdkit_smile(smiles) if smiles is None or smiles == '': result = {'mol_dict': {}} return jsonify(result=result) network = Network(target_smiles=smiles) network.generate(smiles, 0, calculate_scores=False) new_substrate_nodes, new_reaction_nodes = network.add_chemical_step(smiles) list_processed = [] for smi in new_substrate_nodes: new_smi = re.sub(r"\[(?:[1-9]|[1-9][0-9])\*\]", '*', smi) list_processed.append(new_smi) mol_dict = {} for smi in list_processed: mol = Chem.MolFromSmiles(smi) img = get_images.moltosvg(mol, molSize=(200, 200), kekulize=True) mol_dict[smi] = img result = {'mol_dict': mol_dict} return jsonify(result=result)
def _get_retrobiocat_pathways(self, row, reaction_names, combine_enantiomers=True): product = row['product_1_smiles'] network = Network(print_log=False) network.settings.update({ "calculate_complexities": True, "calculate_substrate_specificity": False, "get_building_blocks": False, "combine_enantiomers": combine_enantiomers }) # Only want reactions in list rxns_to_keep = {} for rxn_name in network.rxns: if rxn_name in reaction_names: rxns_to_keep[rxn_name] = network.rxns[rxn_name] network.rxns = rxns_to_keep network.generate(product, 5) bfs = BFS(network=network, print_log=False, score_pathways=False) bfs.run() pathways = bfs.get_pathways() return pathways
def bfs_with_network_generation(): network = Network() network.generate('CCCCC=O', 5) bfs = BFS(network=network, max_pathways=10000) bfs.run() pathways = bfs.get_pathways() return pathways
def change_enzyme(): selected_node = request.form['selected_node'] selected_enzyme = request.form['selected_enzyme'] task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.settings = network_options network.add_attributes(attr_dict) network.calculate_scores() network.graph.nodes[selected_node]['attributes']['selected_enzyme'] = selected_enzyme data['attr_dict'] = json.dumps(network.attributes_dict()) current_app.redis.mset({task_id: json.dumps(data)}) time_to_expire = 15 * 60 # 15 mins * 60 seconds current_app.redis.expire(task_id, time_to_expire) successors = list(network.graph.successors(selected_node)) predecessors = list(network.graph.predecessors(selected_node)) subgraph = network.graph.subgraph([selected_node]+successors+predecessors) nodes, edges = network.get_visjs_nodes_and_edges(graph=subgraph) result = {'nodes': nodes, 'edges': edges} return jsonify(result=result)
def change_network_options(): reaction_colours = request.form['reaction_colours'] edge_colours = request.form['edge_colours'] task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.update_settings(network_options) network.add_attributes(attr_dict) network.settings['colour_reactions'] = reaction_colours network.settings["colour_arrows"] = edge_colours nodes, edges = network.get_visjs_nodes_and_edges() data['nodes'] = add_new(data['nodes'], nodes) data['edges'] = add_new(data['edges'], edges) data['network_options'] = json.dumps(network.settings) current_app.redis.mset({task_id: json.dumps(data)}) time_to_expire = 15 * 60 #15 mins * 60 seconds current_app.redis.expire(task_id, time_to_expire) result = { 'nodes': nodes, 'edges': edges, } return jsonify(result=result)
def test_product_against_rules(): target_smiles = request.form['target_smiles'] smarts = request.form['smarts'] try: smarts_list = yaml.load(smarts, Loader=yaml.FullLoader) except: return jsonify(result={'status': 'fail'}) try: rxn_list = [] for sma in smarts_list: rxn_list.append(rdchiralReaction(sma)) except: return jsonify(result={'status': 'fail'}) try: network = Network() if request.form['combine_enantiomers'] == 'true': network.settings['combine_enantiomers'] = True else: network.settings['combine_enantiomers'] = False network.rxn_obj.rxns = {'test_smarts': rxn_list} network.rxns = {'test_smarts': rxn_list} network.generate(target_smiles, 1) nodes, edges = network.get_visjs_nodes_and_edges() print(nodes) result = {'status': 'success', 'nodes': nodes, 'edges': edges} except: result = {'status': 'fail'} return jsonify(result=result)
def delete_step(): reaction = request.form['reaction'] task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles, print_log=not current_app.config['PRODUCTION']) network.update_settings(network_options) network.add_attributes(attr_dict) to_delete = network.delete_reaction_node(reaction) nodes = [] edges = [] data['graph_dict'] = json.dumps(nx.to_dict_of_lists(network.graph)) data['attr_dict'] = json.dumps(network.attributes_dict()) nodes = add_new(data['nodes'], nodes) edges = add_new(data['edges'], edges) nodes, edges = delete_nodes_and_edges(to_delete, nodes, edges) data['nodes'] = nodes data['edges'] = edges current_app.redis.mset({task_id: json.dumps(data)}) time_to_expire = 15 * 60 # 15 mins * 60 seconds current_app.redis.expire(task_id, time_to_expire) result = {'to_delete': to_delete} return jsonify(result=result)
def change_reaction_options(): task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.update_settings(network_options) network.add_attributes(attr_dict) network.update_settings( {'max_reactions': int(request.form['max_reactions'])}) """ if len(retrobiocat.retrorules_diameters) != 0: network.settings['rr_min_diameter'] = int(request.form['rr_min_diameter']) network.settings['rr_min_products'] = int(request.form['rr_min_products']) network.settings['rr_max_reactions'] = int(request.form['rr_max_reactions']) """ data['network_options'] = json.dumps(network.settings) current_app.redis.mset({task_id: json.dumps(data)}) time_to_expire = 15 * 60 #15 mins * 60 seconds current_app.redis.expire(task_id, time_to_expire) result = {'network_options': json.dumps(network.settings)} return jsonify(result=result)
def select_only_single_positive_chemical_step(df): network = Network() rxn_obj = network.rxn_obj rxn_obj.load_additional_info() # Change enzyme name to just 'Chemical' for chemical steps enzymes = [] for i, row in df.iterrows(): name = row['Reaction'] if name in rxn_obj.reactions: if name in rxn_obj.rules_by_type['Chemical']: enzymes.append('Chemical') else: enzymes.append(row['Enzyme name']) else: enzymes.append(row['Enzyme name']) df['Enzyme name'] = enzymes # Filter out negative binary data which is chemical #df = df[(df['Binary'] != 0) & (df['Enzyme name'] == 'Chemical') | (df['Enzyme name'] != 'Chemical')] # Remove duplicate entries df = df.drop_duplicates([ 'Reaction', 'Enzyme name', 'Product 1 SMILES', 'Substrate 1 SMILES', 'Substrate 2 SMILES' ]) return df
def __init__(self, network=None, target=None, max_pathways=50000, max_pathway_length=5, min_weight=1, use_random=False, print_log=False, score_pathways=True, allow_longer_pathways=False): """ Best First Search object, for generating pathways from a network After initialising, run search using the .run() method Args: network: a network object which has been generated min_weight: the minimum weight to assign to zero complexity change (and Stop) max_pathways: the maximum number of pathways to generate before stopping use_random: set the bfs to use weighted random selection rather than always picking the best """ self.score_pathways = score_pathways self.print_log = print_log self.min_weight = min_weight self.choices = {} self.max_pathways = max_pathways self.max_pathway_length = max_pathway_length self.allow_longer_pathways = allow_longer_pathways self.pathways = [] self.use_random = use_random self.network = network self.generate_network = False if self.network == None: self.target = node_analysis.rdkit_smile(target, warning=True) self.generate_network = True self.network = Network(target_smiles=self.target, number_steps=self.max_pathway_length, print_log=False) self.network.generate(self.target, 0) self.log('BFS - will generate network') else: self.target = self.network.target_smiles
def package_visjs_pathways(task_id, max_vis=100): network_data = json.loads(current_app.redis.get(task_id + '__network')) graph = nx.from_dict_of_lists(json.loads(network_data['graph_dict']), create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=network_data['target_smiles'], print_log=not current_app.config['PRODUCTION']) network.update_settings(json.loads(network_data['network_options'])) network.add_attributes(json.loads(network_data['attr_dict'])) evaluated_pathways = json.loads( current_app.redis.get(f"{task_id}__evaluated_pathways")) for i, pathway_varients in enumerate(evaluated_pathways): if i > max_vis: break pathway_vis_js_data = [] max_var = len(pathway_varients) for nodes in pathway_varients: pathway = Pathway(nodes, network, calc_scores=False) nodes, edges = pathway.get_visjs_nodes_and_edges() pathway_vis_js_data.append((nodes, edges, max_var)) current_app.redis.mset( {f"{task_id}__{i+1}": json.dumps(pathway_vis_js_data)}) current_app.redis.expire(f"{task_id}__{i+1}", 60 * 60)
def task_add_retrorule_step(form_data, network_id): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() clicked_node = form_data['smiles'] x = form_data['x'] y = form_data['y'] data = json.loads(current_app.redis.get(network_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.update_settings(network_options) network.add_attributes(attr_dict) network.retrorules.retrorules_rxns = current_app.retrorules_rxns network.retrorules.retrorule_db = current_app.retrorules_db new_substrate_nodes, new_reaction_nodes = network.retrorules.add_step( clicked_node) all_new_nodes = [clicked_node] + new_substrate_nodes + new_reaction_nodes subgraph = network.graph.subgraph(all_new_nodes) nodes, edges = network.get_visjs_nodes_and_edges(graph=subgraph) for i, node in enumerate(nodes): nodes[i].update({'x': x, 'y': y}) result = { 'nodes': nodes, 'edges': edges, 'to_delete': [], } data['graph_dict'] = json.dumps(nx.to_dict_of_lists(network.graph)) data['attr_dict'] = json.dumps(network.attributes_dict()) data['nodes'] = add_new(data['nodes'], nodes) data['edges'] = add_new(data['edges'], edges) current_app.redis.mset({network_id: json.dumps(data)}) current_app.redis.expire(network_id, 5 * 60) return result
def custom_reaction(): product_smiles = str(request.form['product']) substrate_smiles = str(request.form['substrate']) reaction_name = str(request.form['name']) task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.update_settings(network_options) network.add_attributes(attr_dict) new_substrate_nodes, new_reaction_nodes = network.custom_reaction( product_smiles, substrate_smiles, reaction_name) all_new_nodes = new_substrate_nodes + new_reaction_nodes subgraph = network.graph.subgraph(all_new_nodes) nodes, edges = network.get_visjs_nodes_and_edges(graph=subgraph) result = { 'nodes': nodes, 'edges': edges, } data['graph_dict'] = json.dumps(nx.to_dict_of_lists(network.graph)) data['attr_dict'] = json.dumps(network.attributes_dict()) nodes = add_new(data['nodes'], nodes) edges = add_new(data['edges'], edges) data['nodes'] = nodes data['edges'] = edges current_app.redis.mset({task_id: json.dumps(data)}) current_app.redis.expire(task_id, 5 * 60) return jsonify(result=result)
def _negative_tests(self, negative_tests, list_rxns): empty_network = Network() rule_applicator = RuleApplicator(empty_network) rxns = {'tests': list_rxns} try: negative_tests = yaml.load(negative_tests, Loader=yaml.FullLoader) except: self.state = 'danger' self.issues.append('Could not load negative tests yaml') return for test_product in negative_tests: try: rdkit_smile(test_product) except: self.state = 'danger' self.issues.append( f'Negative test SMILE: {test_product} not accepted by rdkit' ) return for test_product in negative_tests: reaction_outcomes = self._apply_reactions(empty_network, rule_applicator, test_product, rxns) if len(reaction_outcomes) != 0: self.state = 'danger' self.issues.append( f'Reaction should not be outcomes for tested negative product: {test_product}' ) try: for test_product in negative_tests: reaction_outcomes = self._apply_reactions( empty_network, rule_applicator, test_product, rxns) if len(reaction_outcomes) != 0: self.state = 'danger' self.issues.append( f'Reaction should not be outcomes for tested negative product: {test_product}' ) except: self.state = 'danger' self.issues.append('Problem running negative tests') return return True
def node_info(): if 'node' in request.form: node = str(request.form['node']) else: result = {'name': 'error', 'type': 'error', 'data': {}, 'html': ''} return jsonify(result=result) try: task_id = request.form['task_id'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles) network.settings = network_options network.add_attributes(attr_dict) network.get_node_types() if node in network.substrate_nodes: result = get_substrate_info(node) elif node in network.reaction_nodes: if 'retrorule' not in network.graph.nodes[node]['attributes']: result = get_reaction_info(node, network) else: result = get_retrorule_info(node, network) else: print('Error node not in substrates or reactions') result = {'name': 'error', 'type': 'error', 'data' : {}, 'html' : ''} return jsonify(result=result) except: result = {'name': 'error', 'type': 'error', 'data': {}, 'html': ''} return jsonify(result=result)
def task_reorder_pathways(weights, pathways_id): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() pathway_settings = json.loads( current_app.redis.get(pathways_id + '__pathway_settings')) pathway_settings.update({ 'weight_num_enzymes': weights[0], 'weight_complexity': weights[1], 'weight_starting': weights[2], 'weight_known_enzymes': weights[3], 'weight_diversity': weights[4] }) current_app.redis.mset( {f"{pathways_id}__pathway_settings": json.dumps(pathway_settings)}) current_app.redis.expire(pathways_id, 60 * 60) network_data = json.loads(current_app.redis.get(pathways_id + '__network')) graph = nx.from_dict_of_lists(json.loads(network_data['graph_dict']), create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=network_data['target_smiles'], print_log=not current_app.config['PRODUCTION']) network.update_settings(json.loads(network_data['network_options'])) network.add_attributes(json.loads(network_data['attr_dict'])) all_pathways_nodes, all_scores = json.loads( current_app.redis.get(f"{pathways_id}__all_pathways")) pathways = load_pathways(all_pathways_nodes, all_scores, network) pathway_evaluator = evaluate_pathways(pathways, weights) print(weights) package_evaluated_pathways(pathway_evaluator.df, pathways_id) package_visjs_pathways(pathways_id) job = get_current_job() job.meta['progress'] = 'complete' job.save_meta() result = {} return result
def get_visjs_pathway(task_id, pathway_id, varient): network_data = json.loads(current_app.redis.get(task_id + '__network')) pathway_data = json.loads( current_app.redis.get(task_id + f'__{pathway_id}')) pathway_nodes = pathway_data[varient - 1] graph = nx.from_dict_of_lists(json.loads(network_data['graph_dict']), create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=network_data['target_smiles'], print_log=not current_app.config['PRODUCTION']) network.update_settings(json.loads(network_data['network_options'])) network.add_attributes(json.loads(network_data['attr_dict'])) pathway = Pathway(pathway_nodes, network, calc_scores=False) nodes, edges = pathway.get_visjs_nodes_and_edges() max_var = len(pathway_data) return nodes, edges, max_var
def task_make_network(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() network = Network(include_experimental=bool(form_data['include_experimental']), include_two_step=bool(form_data['include_two_step']), include_requires_absence_of_water=bool(form_data['include_requires_absence_of_water']), print_log=not current_app.config['PRODUCTION']) network.update_settings({"allow_backwards_steps": bool(form_data['allow_backwards']), "remove_simple": bool(form_data['remove_small']), "similarity_score_threshold": float(form_data['sub_thres']), "combine_enantiomers" : bool(form_data['combine_enantiomers']), "num_enzymes": 1, "calculate_complexities": bool(form_data['calc_complexity']), "calculate_substrate_specificity": bool(form_data['sub_sim']), "max_nodes": int(form_data['max_initial_nodes'],), "colour_reactions" : form_data['colour_reactions'], "colour_arrows": form_data['colour_edges'], "show_negative_enzymes" : form_data['show_neg_enz'], "only_postitive_enzyme_data" : not form_data['show_neg_enz'], "max_reactions": form_data["max_reactions"], 'only_reviewed_activity_data': bool(form_data["only_reviewed"])}) if form_data["specificity_scoring_mode"] == 'Product + substrates (slower)': network.update_settings({'specificity_score_substrates' : True}) #print(f"include_experimental = {network.settings['include_experimental']}") #print(f"include_two_step = {network.settings['include_two_step']}") network.generate(form_data['target_smiles'], form_data['number_steps'], calculate_scores=False) job.meta['progress'] = 'network_generated' job.save_meta() network.calculate_scores() job.meta['progress'] = 'scores_calculated' job.save_meta() nodes, edges = network.get_visjs_nodes_and_edges() #options = {'interaction': {'multiselect': 'true',}} options = {} default_network_name = 'Network for ' + str(network.target_smiles) result = {'save_id':str(uuid.uuid4()), 'save_links' : [], 'save_name' : default_network_name, 'nodes':nodes, 'edges':edges, 'options':json.dumps(options), 'graph_dict':json.dumps(nx.to_dict_of_lists(network.graph)), 'target_smiles':str(network.target_smiles), 'network_options':json.dumps(network.settings), 'attr_dict':json.dumps(network.attributes_dict()), 'max_reactions' : int(network.settings['max_reactions'])} current_app.redis.mset({job.id: json.dumps(result)}) time_to_expire = 15*60 #15 mins * 60 seconds current_app.redis.expire(job.id, time_to_expire) return result
def select_best_enzyme(self, network): if network.settings['calculate_substrate_specificity'] == True: for node in network.reaction_nodes: current_enz = network.graph.nodes[node]['attributes'][ 'selected_enzyme'] current_score = network.graph.nodes[node]['attributes'][ 'specificity_scores'][current_enz] current_score_neg = True possible_enzymes = network.graph.nodes[node]['attributes'][ 'possible_enzymes'] for enz in possible_enzymes: score = network.graph.nodes[node]['attributes'][ 'specificity_scores'][enz] if (score > current_score and score != 0) or (abs(score) > current_score and current_score_neg == True): network.graph.nodes[node]['attributes'][ 'selected_enzyme'] = enz current_score = abs(score) if score < 0: current_score_neg = True if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network network = Network() network.generate('CCCCC=O', 4) # evaluator calculate scores called during generate
smiles = node_analysis.rdkit_smile(smiles) listSmiles, listReactions = self.retrosynthesisEngine.single_step( smiles, self.retrorules_rxns, self.network.graph) self.network.get_node_types() if calculate_scores == True: self.network.evaluator.calculate_scores(self.network) return listSmiles, listReactions if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network target = 'CCCCCO' network = Network() network.generate(target, 2) network.retrorules.diameters = [2] network.retrorules.load() network.retrorules.add_step('CCCCCC(C)=O') """ file = str(Path(__file__).parents[3]) + '/data/reaction_rules/retrorules/retrorules_all.pkl' rxns = pickle.load(open(file, "rb")) for d in rxns: print(d) file_name = 'rules' + str(d) + '.pkl' with open(file_name, 'wb') as handle: pickle.dump(rxns[d], handle, protocol=pickle.HIGHEST_PROTOCOL) """
def task_get_pathways(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() network = Network(print_log=not current_app.config['PRODUCTION'], include_experimental=form_data['include_experimental'], include_two_step=form_data['include_two_step'], include_requires_absence_of_water=bool( form_data['include_requires_absence_of_water'])) network.update_settings({ "remove_simple": bool(form_data['remove_small']), "combine_enantiomers": bool(form_data['combine_enantiomers']), 'max_nodes': int(form_data['max_nodes']), 'similarity_score_threshold': float(form_data['sub_thres']), 'colour_reactions': form_data['colour_reactions'], "colour_arrows": form_data['colour_edges'], "show_negative_enzymes": form_data['show_neg_enz'], "only_postitive_enzyme_data": not form_data['show_neg_enz'], 'only_reviewed_activity_data': bool(form_data["only_reviewed"]) }) if form_data[ "specificity_scoring_mode"] == 'Product + substrates (slower)': network.update_settings({'specificity_score_substrates': True}) network.generate(form_data['target_smiles'], form_data['number_steps'], calculate_scores=False) job.meta['progress'] = 'network_generated' job.save_meta() network.calculate_scores() job.meta['progress'] = 'network_scored' job.save_meta() network_data = { 'graph_dict': json.dumps(nx.to_dict_of_lists(network.graph)), 'target_smiles': str(network.target_smiles), 'network_options': json.dumps(network.settings), 'attr_dict': json.dumps(network.attributes_dict()) } current_app.redis.mset({f"{job.id}__network": json.dumps(network_data)}) current_app.redis.expire(f"{job.id}__network", 60 * 60) bfs = BFS(network=network, max_pathways=form_data['max_pathways'], max_pathway_length=form_data['number_steps'], min_weight=float(form_data['min_weight']), print_log=not current_app.config['PRODUCTION']) bfs.run() pathways = bfs.get_pathways() job.meta['progress'] = 'pathways_generated' job.save_meta() package_all_pathways(job.id, pathways) pathway_evaluator = evaluate_pathways(pathways, [ form_data['weight_num_enzymes'], form_data['weight_complexity'], form_data['weight_starting'], form_data['weight_known_enzymes'], form_data['weight_diversity'] ]) package_evaluated_pathways(pathway_evaluator.df, job.id) package_visjs_pathways(job.id) job.meta['progress'] = 'pathways_scored' job.save_meta() options = {} if form_data['hierarchical'] == True: options.update({ "layout": { "improvedLayout": 'true', 'hierarchical': { 'direction': 'DU', "sortMethod": "hubsize", "nodeSpacing": 200, "treeSpacing": 400 } } }) pathway_settings = { 'weight_num_enzymes': form_data['weight_num_enzymes'], 'weight_complexity': form_data['weight_complexity'], 'weight_starting': form_data['weight_starting'], 'weight_known_enzymes': form_data['weight_known_enzymes'], 'weight_diversity': form_data['weight_diversity'], 'options': options } current_app.redis.mset( {f"{job.id}__pathway_settings": json.dumps(pathway_settings)}) current_app.redis.expire(job.id, 60 * 60)
class BFS(): def __init__(self, network=None, target=None, max_pathways=50000, max_pathway_length=5, min_weight=1, use_random=False, print_log=False, score_pathways=True, allow_longer_pathways=False): """ Best First Search object, for generating pathways from a network After initialising, run search using the .run() method Args: network: a network object which has been generated min_weight: the minimum weight to assign to zero complexity change (and Stop) max_pathways: the maximum number of pathways to generate before stopping use_random: set the bfs to use weighted random selection rather than always picking the best """ self.score_pathways = score_pathways self.print_log = print_log self.min_weight = min_weight self.choices = {} self.max_pathways = max_pathways self.max_pathway_length = max_pathway_length self.allow_longer_pathways = allow_longer_pathways self.pathways = [] self.use_random = use_random self.network = network self.generate_network = False if self.network == None: self.target = node_analysis.rdkit_smile(target, warning=True) self.generate_network = True self.network = Network(target_smiles=self.target, number_steps=self.max_pathway_length, print_log=False) self.network.generate(self.target, 0) self.log('BFS - will generate network') else: self.target = self.network.target_smiles def log(self, msg): if self.print_log == True: print(msg) def _get_context(self, nodes): """ Returns the pathway context, which is a string of node numbers""" list_node_numbers = [] context = '' for node in nodes: list_node_numbers.append( self.network.graph.nodes[node]['attributes']['node_num']) sorted_node_numbers = sorted(list_node_numbers) for node_num in sorted_node_numbers: context += str(node_num) context += '-' return context def _expand_network(self, smi): nodes_added = [] new_substrates, new_reactions = self.network.add_step(smi) nodes_added.extend(new_substrates) nodes_added.extend(new_reactions) return nodes_added def _get_choices(self, end_nodes): """ Returns a list of reaction nodes (and Stop) which are choices for the next step""" def get_choice_scores(choices): scores = [0] for node in choices[1:]: scores.append(self.network.graph.nodes[node]['attributes'] ['change_in_complexity']) return scores def get_weighted_scores(scores): # invert changes so decreases in complexity are favoured inverted_reaction_complexity_changes = [x * -1 for x in scores] min_change = min(inverted_reaction_complexity_changes) if min_change < 0: min_change = -min_change else: min_change = 0 non_neg_changes = [ x + self.min_weight + min_change for x in inverted_reaction_complexity_changes ] return non_neg_changes def get_choices(end_nodes, graph): successor_reactions = ['Stop'] for node in end_nodes: successor_reactions.extend(list(graph.successors(node))) return successor_reactions def make_choice_dict(choices, scores): choice_dict = {} for i, choice in enumerate(choices): choice_dict[choice] = scores[i] return choice_dict choices = get_choices(end_nodes, self.network.graph) scores = get_choice_scores(choices) weighted_scores = get_weighted_scores(scores) choice_dict = make_choice_dict(choices, weighted_scores) return choice_dict def _pick_choice(self, context): """ Given a context, picks an option to extend (or stop) that pathway """ def pick_best(choices, scores): sorted_options = node_analysis.sort_by_score(choices, scores, reverse=False) return sorted_options[0] def pick_weighted_random(choices, scores): return random.choices(choices, scores, k=1)[0] def get_lists_choices_scores(choices_dict): list_choices = [] list_scores = [] for choice in choices_dict: list_choices.append(choice) list_scores.append(choices_dict[choice]) return list_choices, list_scores choices, scores = get_lists_choices_scores(self.choices[context]) if self.use_random == False: option = pick_best(choices, scores) else: option = pick_weighted_random(choices, scores) return option def _add_reaction(self, reaction_choice): new_end_nodes = list(self.network.graph.successors(reaction_choice)) added_nodes = [reaction_choice] + new_end_nodes return added_nodes, new_end_nodes def _check_pathway_has_end(self, nodes): pathway_subgraph = self.network.graph.subgraph(nodes) end_nodes = node_analysis.get_nodes_with_no_successors( pathway_subgraph) if len(end_nodes) == 0: return False return True def _make_pathway(self, nodes): """ Create pathway object from list of nodes""" return Pathway(nodes, self.network, calc_scores=self.score_pathways) def _check_if_should_expand_network(self, end_nodes, pathway_nodes): if self.generate_network == True: if self._num_reactions(pathway_nodes) < self.max_pathway_length: for node in end_nodes: if len(list(self.network.graph.successors(node))) == 0: self._expand_network(node) def _is_node_already_in_pathway(self, current_nodes, new_nodes): for node in new_nodes: if node in current_nodes: return True return False def _num_reactions(self, nodes): count = 0 for node in nodes: if self.network.graph.nodes[node]['attributes'][ 'node_type'] == 'reaction': count += 1 return count def run(self): """ Generate pathways using best first search Returns: list of pathways """ self.log('Run BFS') self.pathways = [] self.choices = {} nodes = [self.target] context = self._get_context(nodes) self._check_if_should_expand_network(nodes, nodes) self.choices[context] = self._get_choices(nodes) start_context = copy.deepcopy(context) while (len(self.pathways) < self.max_pathways) and (len( self.choices[start_context]) > 0): nodes = [self.network.target_smiles] context = self._get_context(nodes) steps = 0 while len(self.choices[context]) > 0: if steps > self.max_pathway_length: self.choices[context] = [] if self._check_pathway_has_end(nodes) == True: self.pathways.append(nodes) break best_choice = self._pick_choice(context) if best_choice == 'Stop': if self._check_pathway_has_end(nodes) == True: self.pathways.append(nodes) self.choices[context].pop('Stop') steps = 0 break else: steps += 1 added_nodes, new_end_nodes = self._add_reaction( best_choice) if self._is_node_already_in_pathway(nodes, added_nodes) == True: self.choices[context].pop(best_choice) break else: new_context = self._get_context(nodes + added_nodes) if new_context not in self.choices: self._check_if_should_expand_network( new_end_nodes, nodes + added_nodes) self.choices[new_context] = self._get_choices( new_end_nodes) if len(self.choices[new_context]) == 0: self.choices[context].pop(best_choice) else: nodes = nodes + added_nodes context = new_context self.log('BFS complete') if len(self.pathways) >= self.max_pathways: self.log('Max pathways reached') return self.pathways def get_pathways(self): pathway_objects = [] for list_nodes in self.pathways: pathway = self._make_pathway(list_nodes) if self.allow_longer_pathways == True: pathway_objects.append(pathway) elif len(pathway.reactions) <= self.max_pathway_length: pathway_objects.append(pathway) return pathway_objects
pathway.other_varients_as_nodes.append(other_pathway.list_nodes) pathways.append(pathway) return pathways def group_pathways(pathways, scores_to_use=None, by_enzyme=True): if scores_to_use == None: scores_to_use = default_scores_to_use end_nodes_dict = _generate_end_nodes_dict( pathways, scores_to_use) # groups by end_nodes, reactions, scores grouped_pathways = _get_grouped_pathways( end_nodes_dict) # converts dict to a list of lists new_pathways = _collapse_groups(grouped_pathways, by_enzyme) return new_pathways if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network from retrobiocat_web.retro.generation.pathway_generation.best_first_search import BFS network = Network(max_nodes=300) network.generate('[C@H]1(C2=CC=CC=C2)NCCCC1', 5) bfs = BFS(network=network, max_pathways=200) bfs.run() pathways = bfs.get_pathways() pathways = group_pathways(pathways) pathways = pathways[0:10]
def step(): clicked_node = request.form['smiles'] x = request.form['x'] y = request.form['y'] task_id = request.form['task_id'] max_reactions = request.form['max_reactions'] rbc_reaction_mode = request.form['rbc_reaction_mode'] data = json.loads(current_app.redis.get(task_id)) graph_dict = json.loads(data['graph_dict']) attr_dict = json.loads(data['attr_dict']) target_smiles = data['target_smiles'] network_options = json.loads(data['network_options']) graph = nx.from_dict_of_lists(graph_dict, create_using=nx.DiGraph) network = Network(graph=graph, target_smiles=target_smiles, print_log=not current_app.config['PRODUCTION']) network.update_settings(network_options) network.add_attributes(attr_dict) network.update_settings({ 'max_reactions': int(max_reactions), 'retrobiocat_reaction_mode': rbc_reaction_mode }) new_substrate_nodes, new_reaction_nodes = network.add_step(clicked_node) all_new_nodes = [clicked_node] + new_substrate_nodes + new_reaction_nodes subgraph = network.graph.subgraph(all_new_nodes) nodes, edges = network.get_visjs_nodes_and_edges(graph=subgraph) for i, node in enumerate(nodes): nodes[i].update({'x': x, 'y': y}) result = {'nodes': nodes, 'edges': edges} data['graph_dict'] = json.dumps(nx.to_dict_of_lists(network.graph)) data['attr_dict'] = json.dumps(network.attributes_dict()) nodes = add_new(data['nodes'], nodes) edges = add_new(data['edges'], edges) nodes, edges = delete_nodes_and_edges([], nodes, edges) data['nodes'] = nodes data['edges'] = edges current_app.redis.mset({task_id: json.dumps(data)}) time_to_expire = 15 * 60 #15 mins * 60 seconds current_app.redis.expire(task_id, time_to_expire) return jsonify(result=result)