def parses(s): s = convert_nx_to_smiles(convert_smiles_to_nx(s)) try: convert_nx_to_dict(convert_smiles_to_nx(s), atom_types, bond_types) return True except ValueError: return False
def update_actions(self, new_state: nx.Graph, allowed_space: Space): """Generate the available actions for a new state Uses the actions to redefine the action space for Args: new_state (str): Molecule used to define action space allowed_space (Space): Space of possible observations """ # Store the new state self._state = new_state # Compute the possible actions, which we describe by the new molecule they would form valid_actions = get_valid_actions( convert_nx_to_smiles(new_state), atom_types=self.atom_types, allow_removal=self.allow_removal, allow_no_modification=self.allow_no_modification, allowed_ring_sizes=self.allowed_ring_sizes, allow_bonds_between_rings=self.allow_bonds_between_rings, max_molecule_size=self.max_molecule_size) # Get only those actions which are in the desired space self._valid_actions = [ convert_smiles_to_nx(x) for x in valid_actions if x in allowed_space ]
def run_experiment(episodes, n_steps, update_q_every, log_file, rewards: Dict[str, RewardFunction]): """Perform the RL experiment Args: episodes (int): Number of episodes to run n_steps (int): Maximum number of steps per episode update_q_every (int): After how many updates to update the Q function log_file (DictWriter): Tool to write the output function """ best_reward = -1 * inf for e in tqdm(range(episodes), desc='RL Episodes', leave=True, disable=False): current_state = env.reset() for s in tqdm(range(n_steps), desc='\t RL Steps', disable=True): # Get action based on current state action, q, was_random = agent.action() # Fix cluster action new_state, reward, done, _ = env.step(action) # Check if it's the last step and flag as done if s == n_steps: logger.debug('Last step ... done') done = True # Save outcome agent.remember(current_state, action, reward, new_state, agent.env.action_space.get_possible_actions(), done) # Train model loss = agent.train() # Compute all of the rewards state_rewards = dict((name, r(new_state)) for name, r in rewards.items()) # Write to output log log_file.writerow({ 'episode': e, 'step': s, 'smiles': convert_nx_to_smiles(env.state), 'loss': loss, 'reward': reward, 'epsilon': agent.epsilon, 'q': q, 'random': was_random, **state_rewards }) # Update state current_state = new_state if reward > best_reward: best_reward = reward logger.info("Best reward: %s" % best_reward) if done: break # Update the Q network after certain numbers of episodes and adjust epsilon if e > 0 and e % update_q_every == 0: agent.update_target_q_network() agent.epsilon_adj()
def convert_nx_to_dict(graph: nx.Graph, atom_types: List[int], bond_types: List[str]) -> dict: """Convert networkx representation of a molecule to an MPNN-ready dict Args: graph: Molecule to be converted atom_types: Lookup table of observed atom types bond_types: Lookup table of observed bond types Returns: (dict) Molecule as a dict """ # Get the atom types atom_type = [n['atomic_num'] for _, n in graph.nodes(data=True)] atom_type_id = list(map(atom_types.index, atom_type)) # Get the bond types, making the data connectivity = [] edge_type = [] for a, b, d in graph.edges(data=True): connectivity.append([a, b]) connectivity.append([b, a]) edge_type.append(str(d['bond_type'])) edge_type.append(str(d['bond_type'])) edge_type_id = list(map(bond_types.index, edge_type)) # Sort connectivity array by the first column # This is needed for the MPNN code to efficiently group messages for # each node when performing the message passing step connectivity = np.array(connectivity) if connectivity.size > 0: # Skip a special case of a molecule w/o bonds inds = np.lexsort((connectivity[:, 1], connectivity[:, 0])) connectivity = connectivity[inds, :] # Tensorflow's "segment_sum" will cause problems if the last atom # is not bonded because it returns an array if connectivity.max() != len(atom_type) - 1: smiles = convert_nx_to_smiles(graph) raise ValueError(f"Problem with unconnected atoms for {smiles}") else: connectivity = np.zeros((0, 2)) return { 'n_atom': len(atom_type), 'n_bond': len(edge_type), 'atom': atom_type_id, 'bond': edge_type_id, 'connectivity': connectivity }
def generate_molecules( agent: DQNFinalState, episodes: int = 10, n_steps: int = 32, update_q_every: int = 10) -> Tuple[Set[str], DQNFinalState]: """Perform the RL experiment Args: agent (DQNFinalState): Molecular design agent episodes (int): Number of episodes to run n_steps (int): Maximum number of steps per episode update_q_every (int): After how many updates to update the Q function Returns: ([str]) List of molecules that were created """ # Prepare the output output = set() # Keep track of the smiles strings for e in range(episodes): current_state = agent.env.reset() logger.info(f'Starting episode {e+1}/{episodes}') for s in range(n_steps): # Get action based on current state action, _, _ = agent.action() # Fix cluster action new_state, reward, done, _ = agent.env.step(action) # Check if it's the last step and flag as done if s == n_steps: logger.debug('Last step ... done') done = True # Add the state to the output output.add(agent.env.state) # Save outcome agent.remember(current_state, action, reward, new_state, agent.env.action_space.get_possible_actions(), done) # Train model agent.train() # Update state current_state = new_state if done: break # Update the Q network after certain numbers of episodes and adjust epsilon if e > 0 and e % update_q_every == 0: agent.update_target_q_network() agent.epsilon_adj() # Clear out the memory: Too large to send back to client agent.memory.clear() # Convert the outputs back to SMILES strings output = set(convert_nx_to_smiles(x) for x in output) return output, agent
def _call(self, graph: nx.Graph) -> float: if graph is None: return 0 return self.model.predict([convert_nx_to_smiles(graph)])[0]