Exemplo n.º 1
0
    def predict_outcome(self, list_of_mols, k=1):
        """
        Using a predictor, produce top-k most likely reactions

        Params:
        :list_of_mols: list of reactants and reagents (both of class Molecule)
                                     (former contribute atoms, latter don't)
        """
        react = ".".join([m.smiles for m in list_of_mols])
        (react, bond_preds, bond_scores,
         cur_att_score) = self.directcorefinder.predict(react)

        #---> TODO: add input check here: some molecules seem to be 'unparseable' <---#
        # this might be a problem of Rexgen, though
        outcomes = self.directcandranker.predict(react, bond_preds,
                                                 bond_scores)

        res = []
        for out in outcomes[:k]:
            smiles = out["smiles"][0]
            mol = Molecule(smiles)
            mol.set_synthesis(list_of_mols)
            res.append(mol)

        return res
Exemplo n.º 2
0
def get_chembl(n_mols=None, as_mols=True):
    """ Return list of SMILES """
    path = "./datasets/ChEMBL.txt"
    with open(path, "r") as f:
        if n_mols is None:
            res = [line.strip() for line in f]
        else:
            res = [f.readline().strip() for _ in range(n_mols)]
    return [Molecule(smile) for smile in res]
Exemplo n.º 3
0
    def test_sas(self):
        sas_func = lambda mol: calculateSAScore(Chem.MolFromSmiles(mol.smiles))
        print(sas_func(Molecule("CC")))
        test_pool = [
            "CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1",
            "c1ccccc1"
        ]
        test_pool = [Molecule(smiles) for smiles in test_pool]
        exp = RandomExplorer(sas_func, initial_pool=test_pool)
        print("Starting SA score optimization")
        t0 = time()
        exp.evolve(10)

        #check
        print("Completed SA score optimization, time elapsed: %.3fs" %
              (time() - t0))
        print(exp.pool)
        top = exp.get_best(1)[0]
        print(top.get_synthesis_path())
Exemplo n.º 4
0
    def test_len(self):
        dummy_func = lambda mol: len(mol.smiles)
        test_pool = [
            "CC", "O=C=O", "C#N", "CCN(CC)CC", "CC(=O)O", "C1CCCCC1",
            "c1ccccc1"
        ]
        test_pool = [Molecule(smiles) for smiles in test_pool]
        exp = RandomExplorer(dummy_func, initial_pool=test_pool)
        print("Starting len of SMILES optimization")
        exp.evolve(2)

        #check
        print(exp.pool)
Exemplo n.º 5
0
def ga_optimise_from_args(func, max_capital):
    # the func may accept iterable or a single Molecule
    mol = Molecule(smiles="c1cc(OCCCN2CCCCC2)ccc1CN1CCC2(CC1)OCCO2")
    try:
        func(mol)
        func_ = func
    except Exception as e:
        # print("Failed,", e)
        func_ = lambda m: func([m])

    explorer = RandomExplorer(func_)
    explorer.evolve(max_capital)
    top = explorer.get_best(k=1)
    val = func(top)
    return top, val
Exemplo n.º 6
0
    def test_chembl(self):
        """

        Problem with fixed-prop testing:
        Almost all of the results (<10% for init_pool of 50) seem to be outside of the database,
        and even less for smaller pool. Hence cannot get its score for testing;
        setting them to zero leads to slow exploration.

        """
        pool_all, dd = chembl_prop_exp()

        # loading with mol conversions takes 8 minutes
        # pool_all = [Molecule(smiles, conv_enabled=True) for smiles in tqdm(pool_all[:10000])]
        pool_all = [
            Molecule(smiles, conv_enabled=False) for smiles in pool_all
        ]
        start_pool = list(np.random.choice(pool_all, size=100, replace=False))

        def print_props(pool):
            props = [dd[mol.smiles] for mol in pool]
            print("Props of pool", len(pool), np.min(props), np.mean(props),
                  np.max(props))

        print_props(pool_all)
        print_props(start_pool)

        func = lambda mol: dd[mol.smiles]
        exp = RandomExplorer(func, initial_pool=start_pool)

        print("Starting ChEMBL score 1 optimization")
        t0 = time()
        exp.evolve(30)
        print("Completed ChEMBL score 1 optimization, time elapsed: %.3fs" %
              (time() - t0))

        # print(exp.pool)
        top = exp.get_best(1)[0]
        print(top.get_synthesis_path())

        print("Best achieved score: %.3f" % func(top))
        props = [dd[mol.smiles] for mol in pool_all]
        print("Best possible score: %.3f" % np.max(props))
 def test_wl_kernel(self):
     mols = [Molecule(S1), Molecule(S2), Molecule(S3)]
     print(mol_kernels.compute_wl_kernel(mols))
 def test_edgehist_kernel(self):
     mols = [Molecule(S1), Molecule(S2)]
     print(mol_kernels.compute_edgehist_kernel(mols))
 def test_conversions(self):
     mol = Molecule(S1)
     graph = mol_kernels.mol2graph_igraph(mol)
     print(graph)