def parses(s):
    s = convert_nx_to_smiles(convert_smiles_to_nx(s))
    try:
        convert_nx_to_dict(convert_smiles_to_nx(s), atom_types, bond_types)
        return True
    except ValueError:
        return False
Exemplo n.º 2
0
def compute_ic50(gen: Iterator[List[dict]], model: tf.keras.Model, atom_types: List[int],
                 bond_types: List[str]) -> Iterator[List[dict]]:
    """Compute the IC50 of a chunk of molecules"""

    for chunk in gen:
        # Get the features for each molecule
        batch = []
        tested_mols = []
        for i, entry in enumerate(chunk):
            graph = convert_smiles_to_nx(entry['smiles'])
            try:
                graph_dict = convert_nx_to_dict(graph, atom_types, bond_types)
            except AssertionError:
                continue
            batch.append(graph_dict)
            tested_mols.append(i)

        # Prepare in input format
        keys = batch[0].keys()
        batch_dict = {}
        for k in keys:
            batch_dict[k] = np.concatenate([np.atleast_1d(b[k]) for b in batch], axis=0)
        inputs = combine_graphs(batch_dict)

        # Compute the IC50
        ic50 = model.predict_on_batch(inputs).numpy()[:, 0]

        # Store in in the chunk data
        for i, v in zip(tested_mols, ic50):
            chunk[i]['pIC50_mpnn'] = v

        yield chunk
Exemplo n.º 3
0
    def update_actions(self, new_state: nx.Graph, allowed_space: Space):
        """Generate the available actions for a new state

        Uses the actions to redefine the action space for

        Args:
            new_state (str): Molecule used to define action space
            allowed_space (Space): Space of possible observations
        """

        # Store the new state
        self._state = new_state

        # Compute the possible actions, which we describe by the new molecule they would form
        valid_actions = get_valid_actions(
            convert_nx_to_smiles(new_state),
            atom_types=self.atom_types,
            allow_removal=self.allow_removal,
            allow_no_modification=self.allow_no_modification,
            allowed_ring_sizes=self.allowed_ring_sizes,
            allow_bonds_between_rings=self.allow_bonds_between_rings,
            max_molecule_size=self.max_molecule_size)

        # Get only those actions which are in the desired space
        self._valid_actions = [
            convert_smiles_to_nx(x) for x in valid_actions
            if x in allowed_space
        ]
Exemplo n.º 4
0
def test_pickle(model, atom_types, bond_types):
    # Run inference on the first graph
    reward = MPNNReward(model, atom_types, bond_types)
    graph = convert_smiles_to_nx('CCC')
    reward(graph)

    # Clone the model
    reward2 = pkl.loads(pkl.dumps(reward))

    assert isclose(reward(graph), reward2(graph), abs_tol=1e-6)
Exemplo n.º 5
0
def test_reward(model, oneshot_model, atom_types, bond_types, target_mols):
    ic50_reward = MPNNReward(model, atom_types, bond_types)
    sim_reward = OneShotScore(oneshot_model, atom_types, bond_types,
                              target_mols)
    reward = LogisticCombination(ic50_reward, sim_reward)

    graph = convert_smiles_to_nx('C')
    x = reward(graph)
    assert isinstance(x, float)
    assert 0 < x < 3
Exemplo n.º 6
0
    def __init__(self,
                 smiles: List[str],
                 atom_types: List[int],
                 bond_types: List[str],
                 outputs: List[float],
                 batch_size: int,
                 shuffle: bool = True,
                 random_state: int = None):
        """

        Args:
            smiles: List of molecules
            atom_types: List of known atom types
            bond_types: List of known bond types
            outputs: List of molecular outputs
            batch_size: Number of batches to use to train model
            shuffle: Whether to shuffle after each epoch
            random_state: Random state for the shuffling
        """

        super(GraphLoader, self).__init__()

        # Convert the molecules to MPNN-ready formats
        mols = [
            convert_nx_to_dict(convert_smiles_to_nx(s), atom_types, bond_types)
            for s in smiles
        ]
        self.entries = np.array(list(zip(mols, outputs)))

        # Other data
        self.batch_size = batch_size
        self.shuffle = shuffle

        # Give it a first shuffle, if needed
        self.rng = np.random.RandomState(random_state)
        if shuffle:
            self.rng.shuffle(self.entries)
Exemplo n.º 7
0
def evaluate_mpnn(model_msg: MPNNMessage,
                  smiles: List[str],
                  atom_types: List[int],
                  bond_types: List[str],
                  batch_size: int = 128) -> np.ndarray:
    """Run inference on a list of molecules

    Args:
        model_msg: Serialized version of the model
        smiles: List of molecules to evaluate
        atom_types: List of known atom types
        bond_types: List of known bond types
        batch_size: List of molecules to create into matches
    Returns:
        Predicted value for each molecule
    """

    # Rebuild the model
    tf.keras.backend.clear_session()
    model = model_msg.get_model()

    # Convert all SMILES strings to batches of molecules
    # TODO (wardlt): Use multiprocessing. Could benefit from a persistent Pool to avoid loading in TF many times
    mols = [
        convert_nx_to_dict(convert_smiles_to_nx(s), atom_types, bond_types)
        for s in smiles
    ]
    chunks = [
        mols[start:start + batch_size]
        for start in range(0, len(mols), batch_size)
    ]
    batches = [_merge_batch(c) for c in chunks]

    # Feed the batches through the MPNN
    outputs = [model.predict_on_batch(b) for b in batches]
    return np.vstack(outputs)
Exemplo n.º 8
0
    mpnn_dir = os.path.join('notebooks', 'mpnn-training')
    with open(os.path.join(mpnn_dir, 'atom_types.json')) as fp:
        atom_types = json.load(fp)
    with open(os.path.join(mpnn_dir, 'bond_types.json')) as fp:
        bond_types = json.load(fp)
    pt = GetPeriodicTable()
    if len(args.elements) == 0:
        elements = [pt.GetElementSymbol(i) for i in atom_types]
    else:
        elements = args.elements
    elements = [e for e in elements if MolFromSmiles(e) is not None]
    logger.info(f'Using {len(elements)} elements: {elements}')

    # Prepare the one-shot model. We the molecules to compare against and the comparison model
    with open(os.path.join('seed-molecules', 'top_100_pIC50.json')) as fp:
        comparison_mols = [convert_smiles_to_nx(s) for s in json.load(fp)]
    oneshot_dir = 'similarity'
    oneshot_model = load_model(os.path.join(oneshot_dir, 'oneshot_model.h5'),
                               custom_objects=custom_objects)
    with open(os.path.join(oneshot_dir, 'atom_types.json')) as fp:
        os_atom_types = json.load(fp)
    with open(os.path.join(oneshot_dir, 'bond_types.json')) as fp:
        os_bond_types = json.load(fp)

    # Making all of the reward functions
    model = load_model(os.path.join(mpnn_dir, 'best_model.h5'),
                       custom_objects=custom_objects)

    rewards = {
        'logP':
        LogP(maximize=True),
Exemplo n.º 9
0
        'u0_atom': MPNNReward(model, atom_types, bond_types, maximize=False),
    }

    # Make the reward function
    if args.reward == 'u0_atom':
        reward = rewards['u0_atom']
    else:
        raise ValueError(f'Reward function not defined: {args.reward}')
    run_params['maximize'] = reward.maximize

    # Set up environment
    action_space = MoleculeActions(elements,
                                   allow_removal=not args.no_backtrack)
    init_mol = args.initial_molecule
    if init_mol is not None:
        init_mol = convert_smiles_to_nx(init_mol)
    env = Molecule(action_space, reward=reward, init_mol=init_mol)
    logger.debug('using environment: %s' % env)

    # Setup agent
    agent = DQNFinalState(env,
                          gamma=args.gamma,
                          preprocessor=MorganFingerprints(
                              args.fingerprint_size),
                          batch_size=args.batch_size,
                          epsilon=args.epsilon,
                          q_network_dense=args.hidden_layers,
                          epsilon_decay=args.epsilon_decay)

    # Make a test directory
    test_dir = os.path.join(
Exemplo n.º 10
0
def test_mpnn_reward(model, atom_types, bond_types):
    reward = MPNNReward(model, atom_types, bond_types)
    graph = convert_smiles_to_nx('CCC')
    assert isinstance(reward(graph), float)
Exemplo n.º 11
0
def test_reward(oneshot_model, atom_types, bond_types, target_mols):
    reward = OneShotScore(oneshot_model, atom_types, bond_types, target_mols)
    graph = convert_smiles_to_nx('CCC')
    assert isinstance(reward(graph), float)
Exemplo n.º 12
0
    'ic50': MPNNReward(model, atom_types, bond_types, maximize=True),
    'QED': QEDReward(),
    'SA': SAScore(),
    'cycles': CycleLength()
}

if __name__ == "__main__":
    # Parse the inputs
    parser = ArgumentParser()
    parser.add_argument("smiles_file")
    args = parser.parse_args()

    # Load in the molecules
    with open(args.smiles_file) as fp:
        mols = [x.strip() for x in fp]

    # Get only the molecules that parse with RDKit
    mols = [x for x in mols if MolFromSmiles(x) is not None]

    # Compute the reward function statistics for all the rewards
    stats = {}
    for name, reward in rewards.items():
        data = [
            reward(convert_smiles_to_nx(mol)) for mol in tqdm(mols, desc=name)
        ]
        stats[name] = {'mean': np.mean(data), 'scale': np.std(data)}

    # Save as a json file
    with open('reward_ranges.json', 'w') as fp:
        json.dump(stats, fp, indent=2)