Exemplo n.º 1
0
def test_fix_valence():
    sm = "Cl.[H][N:1]1=CC(O)=CC2CCCCC12"
    mol = Chem.MolFromSmiles(sm, sanitize=False)
    mol.UpdatePropertyCache(False)
    mol_copy = dm.copy_mol(mol)

    nitrogen_atom = [a for a in mol.GetAtoms() if a.GetAtomMapNum() == 1][0]
    nitrogen_valence = nitrogen_atom.GetExplicitValence()
    assert dm.incorrect_valence(nitrogen_atom, True)

    fixed_mol = dm.fix_valence_charge(mol, inplace=False)
    assert dm.to_mol(Chem.MolToSmiles(fixed_mol)) is not None

    # expect nitrogen atom to still be incorrect
    assert dm.incorrect_valence(nitrogen_atom, True)

    # in place fix
    fixed_mol = dm.fix_valence_charge(mol, inplace=True)
    # nitrogen should be charged positively if this was fixed.
    assert nitrogen_atom.GetFormalCharge() == 1

    fixed_mol2 = dm.fix_valence(mol_copy)
    fixed_nitrogen_atom = [
        a for a in fixed_mol2.GetAtoms() if a.GetAtomMapNum() == 1
    ][0]
    assert fixed_nitrogen_atom.GetExplicitValence() < nitrogen_valence

    # mol should be fixed
    assert dm.to_mol(Chem.MolToSmiles(fixed_mol2)) is not None
Exemplo n.º 2
0
def _run_at_all_rct(rxn, mol1, mol2):
    library = []
    rxn = rdChemReactions.ReactionFromSmarts(rdChemReactions.ReactionToSmarts(rxn))
    # display(rxn)
    m1 = rxn.GetReactantTemplate(0)
    m2 = rxn.GetReactantTemplate(1)
    mol1_valid = mol1 is not None
    mol2_valid = mol2 is not None
    isR1 = mol1_valid and mol1.HasSubstructMatch(m1)
    isR2 = mol1_valid and mol1.HasSubstructMatch(m2)
    if isR1 and mol2_valid and mol2.HasSubstructMatch(m2):
        library.extend(rxn.RunReactants((mol1, mol2)))
    if isR2 and mol2_valid and mol2.HasSubstructMatch(m1):
        library.extend(rxn.RunReactants((mol2, mol1)))
    if library:
        library = list(itertools.chain(*library))
    for m in library:
        mol = None
        mSmi = ""
        try:
            mSmi = Chem.MolToSmiles(m)
            mol = dm.to_mol(mSmi)
        except:
            pass
        if mol is None:
            try:
                mol.UpdatePropertyCache()
                mol = dm.sanitize_mol(mol)
                mSmi = Chem.MolToSmiles(m)
                mol = dm.to_mol(mSmi)
            except:
                pass
        if mSmi:
            yield mol, mSmi
Exemplo n.º 3
0
def remove_dummies(mol: Chem.rdchem.Mol, dummy: str = "*") -> Optional[Chem.rdchem.Mol]:
    """Remove dummy atoms from molecules."""
    du = dm.to_mol(dummy)
    out = mol
    try:
        out = Chem.ReplaceSubstructs(mol, du, dm.to_mol("[H]"), True)[0]
        out = Chem.RemoveHs(out)
    except Exception as e:
        out = Chem.DeleteSubstructs(mol, du)
    return out
Exemplo n.º 4
0
def test_to_neutral():

    smiles = "[NH4+]"
    mol = dm.to_mol(smiles, add_hs=False, explicit_only=False)

    smiles = dm.to_smiles(dm.to_neutral(mol))
    assert smiles == "[NH4]"

    smiles = "O=C(c1ccccc1)[O-]"
    mol = dm.to_mol(smiles, add_hs=False, explicit_only=False)
    uncharged_mol = dm.to_neutral(mol)
    assert sum([a.GetFormalCharge() for a in uncharged_mol.GetAtoms()]) == 0
Exemplo n.º 5
0
def test_to_mol():
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    assert mol.GetNumAtoms() == 13

    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles, add_hs=True)
    assert mol.GetNumAtoms() == 21

    smiles = "fake_smiles"
    mol = dm.to_mol(smiles)
    assert mol is None
Exemplo n.º 6
0
def assemble_fragment_order(
    fragmentlist,
    seen=None,
    allow_incomplete: bool = False,
    max_n_mols: float = float("inf"),
    RXNS=None,
):
    """Assemble a list of fragment into a set of possible molecules under rules defined by the brics algorithm

    ..note ::
        We are of course assuming:
        1. that the order in the fragmentlist matter :D !
        2. that none of the fragment has explicitly defined hydrogen atoms.
        3. only a list of unique molecule is internally maintained

    Args:
        fragmentlist: list of original fragments to grow
        seen: original molecules used as base. If none, the first element of fragment list will be poped out
        allow_incomplete: Whether to accept assembled molecules with missing fragment

    """

    if RXNS is None:
        RXNS = ALL_BRICS_RETRO

    fragmentlist = list(fragmentlist)
    yield_counter = 0
    if seen is None:
        seen = fragmentlist.pop(0)
    seen = [Chem.MolToSmiles(seen)]  # only one molecule to assemble
    while yield_counter < max_n_mols and len(fragmentlist) > 0:
        # find all the way to add this fragment to seen
        frag = fragmentlist.pop(0)
        level_set = [dm.to_mol(x) for x in seen]
        seen = set()
        for sm in level_set:
            try:
                # there is no point in even trying something on molecules that cannot be kekulized
                for rxn in RXNS:
                    for m, mSmi in _run_at_all_rct(rxn, frag, sm):
                        if allow_incomplete and mSmi not in seen:
                            yield m
                            yield_counter += 1
                        seen.add(mSmi)
            except Exception as e:
                print(e)
                pass

    for m in seen:
        if yield_counter < max_n_mols:
            yield dm.to_mol(m)
            yield_counter += 1
Exemplo n.º 7
0
def test_sdf_props_and_conformer_preserved(tmp_path):

    sdf_path = tmp_path / "test.sdf"

    # Generate an SDF file
    props = dict(test_int=588, test_str="hello")
    smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O"

    mol = dm.to_mol(smiles)
    mol = dm.set_mol_props(mol, props)
    mol = dm.conformers.generate(mol, n_confs=1)
    pos = mol.GetConformer().GetPositions()
    dm.to_sdf(mol, sdf_path)

    # Read sdf file
    mols = dm.read_sdf(sdf_path)
    mol = mols[0]

    # Check properties
    assert mol.GetPropsAsDict() == props

    # Check conformer
    conf = mol.GetConformer()
    assert mol.GetNumConformers() == 1
    assert conf.Is3D()
    np.testing.assert_almost_equal(conf.GetPositions(), pos, decimal=4)
Exemplo n.º 8
0
def smiles_to_fingerprint(smiles):

    mol = dm.to_mol(str(smiles), ordered=True)
    # mol = dm.fix_mol(mol)
    # mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
    # mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)

    fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
    pars = {
        "radius": 2,
        "nBits": 8192,
        "invariants": [],
        "fromAtoms": [],
        "useChirality": False,
        "useBondTypes": True,
        "useFeatures": False,
    }
    fp = fingerprint_function(mol, **pars)

    standard_smiles = dm.to_smiles(mol)
    # row["selfies"] = dm.to_selfies(mol)
    # row["inchi"] = dm.to_inchi(mol)
    # row["inchikey"] = dm.to_inchikey(mol)
    achiral_fp = list(fp.GetOnBits())
    return standard_smiles, achiral_fp
Exemplo n.º 9
0
def test_fp_deprecated_args_warnings():
    smiles = "CC(=O)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)

    args = {}
    args["mol"] = mol
    args["radius"] = 3
    args["fp_size"] = 2048
    args["useFeatures"] = True
    args["as_array"] = True
    args["fp_type"] = "ecfp"

    with warnings.catch_warnings(record=True) as w:
        dm.to_fp(**args)

        assert len(w) == 1
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "will be removed in datamol 0.5.0" in str(w[-1].message)

    args = {}
    args["mol"] = mol
    args["use_features"] = True
    args["as_array"] = True
    args["fp_type"] = "ecfp"

    with warnings.catch_warnings(record=True) as w:
        dm.to_fp(**args)

        assert len(w) == 1
        assert issubclass(w[-1].category, DeprecationWarning)
        assert "will be removed in datamol 0.5.0" in str(w[-1].message)
Exemplo n.º 10
0
def test_to_fp():

    smiles = "CC(=O)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)

    assert dm.to_fp(mol).shape[0] == 2048
    assert dm.to_fp(mol).sum() == 29
Exemplo n.º 11
0
def recap(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Fragment the molecule using the recap algorithm.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    res = Recap.RecapDecompose(mol)
    frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
Exemplo n.º 12
0
def frag(
    mol: Chem.Mol,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Generate all possible fragmentation of a molecule.

    Args:
        mol: a molecule.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = FraggleSim.generate_fraggle_fragmentation(mol)

    smiles = set([])
    for seq in frags:
        smiles |= {s.strip() for s in seq.split(".")}

    smiles = list(sorted(smiles, reverse=True))
    frags = [dm.to_mol(s) for s in smiles]

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]

    frags = [x for x in frags if x is not None]

    if remove_parent:
        return frags
    return [mol] + frags
Exemplo n.º 13
0
def test_copy_mol_props():
    source = dm.to_mol("CCC")
    destination = dm.to_mol("CC")

    props = {}
    props["bool"] = True
    props["number"] = 55
    props["float"] = 5.555
    props["string"] = "hello"
    props["something_else"] = type(int)

    dm.set_mol_props(source, props)

    dm.copy_mol_props(source, destination)

    assert destination.GetPropsAsDict() == source.GetPropsAsDict()
Exemplo n.º 14
0
def test_to_from_text(tmp_path):

    temp_file = tmp_path / "mols.smi"

    smiles_list = [
        "Cn1c(=S)ccc2nc[nH]c21",
        "Clc1n[nH]c2c1=[NH+]C(c1ccc[nH+]c1)C[NH+]=2",
        "Fc1ccsc1",
        "N#Cc1cc2c(o1)[NH2+]CCN2Cn1cnc2c1CSCC2",
        "O=CN1CCC2NC=CC2C1",
        "Oc1[nH]nc2c1-n1ncnc1C2",
        "OC1=NNC2(OC=CCO2)C2(C3CCCc4nonc43)NN=NN12",
        "[NH-]Sc1cc2nc[nH+]cc2o1",
        "[NH3+]C12CNCCOC1(N1CCCCC1)C=C(F)NC2",
    ]
    mols = [dm.to_mol(m) for m in smiles_list]

    # Save from text and read from text
    dm.to_smi(mols, temp_file)
    loaded_mols = dm.read_smi(temp_file)
    loaded_smiles = [dm.to_smiles(m) for m in loaded_mols]
    assert loaded_smiles == smiles_list

    # Check error raised when list is empty
    with pytest.raises(ValueError):
        dm.to_smi([], temp_file, error_if_empty=True)

    temp_file.unlink()

    # Check file like object works too
    file_like = io.StringIO()
    dm.to_smi(mols, file_like)
    assert file_like.getvalue().strip().split("\n") == smiles_list
Exemplo n.º 15
0
def test_adjust_singleton():
    sm = "Cl.[N:1]1=CC(O)=CC2CCCCC12.CC.C"
    mol = dm.to_mol(sm)
    fixed_mol = dm.adjust_singleton(mol)
    assert len(Chem.rdmolops.GetMolFrags(fixed_mol)) == 2
    assert fixed_mol.HasSubstructMatch(
        Chem.MolFromSmiles("CC"))  # assert ethyl is there
Exemplo n.º 16
0
def test_mmpa():
    smiles = "CCCOCc1cc(c2ncccc2)ccc1"
    mol = dm.to_mol(smiles)

    frags = dm.fragment.mmpa_cut(mol)
    assert len(frags) == 39
    assert "CCCOCc1cccc(-c2ccccn2)c1,C(C[*:2])[*:1],C[*:1].c1ccc(-c2cccc(CO[*:2])c2)nc1\n" in frags
Exemplo n.º 17
0
def _preprocess(i, row):
#     print('hello')
    mol = dm.to_mol(str(row[smiles_column]), ordered=True)
    mol = dm.fix_mol(mol)
    mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False)
    mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True)
    
    fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect
    pars = { "radius": 2,
                     "nBits": 8192,
                     "invariants": [],
                     "fromAtoms": [],
                     "useChirality": True,
                     "useBondTypes": True,
                     "useFeatures": False,
            }
    fp = fingerprint_function(mol, **pars)

    row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol))
    row["selfies"] = dm.to_selfies(mol)
    row["inchi"] = dm.to_inchi(mol)
    row["inchikey"] = dm.to_inchikey(mol)
    row["onbits_fp"] =list(fp.GetOnBits())
    
    return row
Exemplo n.º 18
0
def test_all_bond_remove():

    smiles = "OC1=CC2CCCCC2[N:1]=C1"
    mol = dm.to_mol(smiles)

    mols = dm.actions.all_bond_remove(mol)
    assert isinstance(mols, list)
Exemplo n.º 19
0
def test_standardize_mol():
    sm = "[Na]OC1=CC2CCCCC2N=C1"
    sm_standard = dm.to_smiles(dm.standardize_smiles(sm))
    standard_mol = dm.standardize_mol(dm.to_mol(sm),
                                      disconnect_metals=True,
                                      uncharge=True)
    mol_standard = dm.to_smiles(Chem.MolToSmiles(standard_mol))
    assert sm_standard == mol_standard
Exemplo n.º 20
0
def test_enumerate_tautomers():
    mol = dm.to_mol("OC1=CC2CCCCC2[N:1]=C1")

    mols = dm.enumerate_tautomers(mol, n_variants=10)

    assert {dm.to_smiles(m)
            for m in mols
            } == {"O=C1C=[N:1]C2CCCCC2C1", "OC1=CC2CCCCC2[N:1]=C1"}
Exemplo n.º 21
0
def test_sanitize():
    smiles = "CC(=O)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles, sanitize=False)
    mol = dm.sanitize_mol(mol, charge_neutral=True)
    assert dm.to_smiles(mol) == "CC(=O)Oc1ccccc1C(=O)O"

    mol = dm.sanitize_mol(None, charge_neutral=True)
    assert mol is None

    smiles_list = (
        "CC.[H][N:1]1(C)=CC(O)=CC2CCCCC12",  # broken
        "O=c1ccc2ccccc2n1",  # sanitize
        "Cc1nnnn1C",  # none
        "CCc1ccc2nc(=O)c(cc2c1)Cc1nnnn1C1CCCCC1",  # sanitize
        "c1cnc2cc3ccnc3cc12",  # none
        "c1cc2cc3ccnc3cc2n1",  # none
        "O=c1ccnc(c1)-c1cnc2cc3ccnc3cc12",  # sanitize
        "O=c1ccnc(c1)-c1cc1",  # broken
    )

    # check sanitize_mol
    assert dm.to_mol(smiles_list[1]) is None
    assert dm.to_mol(smiles_list[2]) is not None
    assert dm.sanitize_mol(None) is None
    assert dm.sanitize_mol(dm.to_mol(smiles_list[0], sanitize=False)) is None
    assert dm.sanitize_mol(dm.to_mol(smiles_list[1],
                                     sanitize=False)) is not None
    assert dm.sanitize_mol(dm.to_mol(smiles_list[2],
                                     sanitize=False)) is not None

    mol_2 = dm.sanitize_mol(dm.to_mol(smiles_list[1], sanitize=False))
    assert dm.to_smiles(mol_2) == dm.sanitize_smiles("O=c1ccc2ccccc2[nH]1")

    fixed_smiles = [dm.sanitize_smiles(smiles) for smiles in smiles_list]
    assert len([x for x in fixed_smiles if x is not None]) == 6
Exemplo n.º 22
0
def test_get_all_path_between():
    smiles = "c1cc2cccccc2c1"
    mol = dm.to_mol(smiles)

    all_paths = dm.get_all_path_between(mol, 8, 4, ignore_cycle_basis=False)
    assert all_paths == [[8, 2, 3, 4], [8, 7, 6, 5, 4], [8, 9, 0, 1, 2, 3, 4]]

    all_paths = dm.get_all_path_between(mol, 8, 4, ignore_cycle_basis=True)
    assert all_paths == [[8, 2, 3, 4], [8, 7, 6, 5, 4]]
Exemplo n.º 23
0
def test_randomize_atoms():
    smiles = "c1ccc(C(=O)O)c(c1)OC(=O)C"
    mol = dm.to_mol(smiles)
    orders = [a.GetAtomicNum() for a in mol.GetAtoms()]

    randomized_mol = dm.randomize_atoms(mol)
    randomized_orders = [a.GetAtomicNum() for a in randomized_mol.GetAtoms()]

    assert sum(orders) == sum(randomized_orders)
Exemplo n.º 24
0
def test_reorder_atoms():
    smiles = "c1ccc(C(=O)O)c(c1)OC(=O)C"
    mol = dm.to_mol(smiles, add_hs=False, explicit_only=False)

    orders = [a.GetAtomicNum() for a in mol.GetAtoms()]
    assert orders == [6, 6, 6, 6, 6, 8, 8, 6, 6, 8, 6, 8, 6]

    mol = dm.reorder_atoms(mol)
    orders = [a.GetAtomicNum() for a in mol.GetAtoms()]
    assert orders == [6, 8, 8, 8, 6, 6, 6, 6, 8, 6, 6, 6, 6]
Exemplo n.º 25
0
def test_to_sdf_single_mol(tmp_path):

    sdf_path = tmp_path / "test.sdf"

    smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O"
    mol = dm.to_mol(smiles)
    dm.to_sdf(mol, sdf_path)

    mols = dm.read_sdf(sdf_path)
    assert dm.to_smiles(mol) == dm.to_smiles(mols[0])
Exemplo n.º 26
0
def test_cluster_mols():

    # Get some mols
    data = dm.data.freesolv()
    smiles = data["smiles"].iloc[:100].tolist()
    mols = [dm.to_mol(s) for s in smiles]

    _, mol_clusters = dm.cluster_mols(mols, cutoff=0.7)
    cluster_sizes = [15, 12, 3, 6, 9, 9, 4, 1, 4, 3, 3, 2, 3]
    assert [len(c) for c in mol_clusters[:13]] == cluster_sizes
Exemplo n.º 27
0
def test_pick_centroids():
    data = dm.data.freesolv()
    smiles = data["smiles"].iloc[:100].tolist()
    mols = [dm.to_mol(s) for s in smiles]
    indices, centroids = dm.pick_centroids(
        mols, npick=18, threshold=0.7, method="sphere", n_jobs=-1
    )
    excepted_indices = np.array([0, 1, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 18, 19, 21, 23, 25, 32])

    assert np.all(indices == excepted_indices)
Exemplo n.º 28
0
def test_sanitize_mol_multiple_conformers_no_warning(caplog):

    # Generate a mol with props and a conformer
    smiles = "CCC[N+](=O)[O-]"

    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol, n_confs=10)

    # Check no warning log
    dm.sanitize_mol(mol, verbose=False)
    assert caplog.text == ""
Exemplo n.º 29
0
def test_to_smarts():
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)

    smarts = dm.to_smarts(mol, keep_hs=True)
    assert smarts == "[CH3]-[C](=[O])-[O]-[c]1:[cH]:[cH]:[cH]:[cH]:[c]:1-[C](=[O])-[OH]"

    smarts = dm.to_smarts(mol, keep_hs=False)
    assert smarts == "[CH3]-[C](=[O])-[O]-[c]1:[cH]:[cH]:[cH]:[cH]:[c]:1-[C](=[O])-[OH]"

    assert dm.to_smarts(None) is None
Exemplo n.º 30
0
def test_break_mol():
    smiles = "CCCOCc1cc(c2ncccc2)ccc1"
    mol = dm.to_mol(smiles)
    fragments, *_, tree = dm.fragment.break_mol(mol,
                                                randomize=False,
                                                mode="brics",
                                                returnTree=True)

    assert fragments == ["CCC", "O", "C", "c1ccncc1", "c1ccccc1"]
    assert list(tree.nodes) == [0, 1, 2, 3, 4, 5, 6, 7, 8]
    assert list(tree.edges) == [(0, 1), (0, 2), (2, 3), (2, 4), (4, 5), (4, 6),
                                (6, 7), (6, 8)]