Exemplo n.º 1
0
def test_exception_smarts():
    # syntax error: SMARTS Parse Error
    with pytest.raises(ValueError):
        m = Molecule('fake', 'smarts')
    # syntax error: SMARTS Parse Error
    with pytest.raises(ValueError):
        m = Molecule('[#6](=[#6]', 'smarts')
Exemplo n.º 2
0
def mols():
    m1 = Molecule('c1ccc1', 'smiles')
    m2 = Molecule('CNC', 'smiles')

    molecules = [m1, m2]

    return molecules
Exemplo n.º 3
0
def test_exception_inchi():
    # syntax error: SMARTS Parse Error
    with pytest.raises(ValueError):
        m = Molecule('fake', 'inchi')
    # inchi key: syntax error: SMARTS Parse Error
    with pytest.raises(ValueError):
        m = Molecule('RYYVLZVUVIJVGH-UHFFFAOYSA-N', 'inchi')
Exemplo n.º 4
0
def test_mol2xyz_MMFF(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    m.hydrogens('add')
    m.to_xyz('MMFF', maxIters=210)
    assert m.xyz.geometry.shape[0] == m.rdkit_molecule.GetNumAtoms()
    m.to_xyz('MMFF', maxIters=110, mmffVariant='MMFF94s')
    assert m.xyz.atomic_symbols.shape[0] == m.rdkit_molecule.GetNumAtoms()
    assert m.MMFF_args['mmffVariant'] == 'MMFF94s'
    # check default args
    m.to_xyz('MMFF', **m._default_MMFF_args)
Exemplo n.º 5
0
def test_mol2xyz_UFF(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    m.hydrogens('add')
    m.to_xyz('UFF', ignoreInterfragInteractions=False)
    assert m.xyz.geometry.shape[0] == m.rdkit_molecule.GetNumAtoms()
    m.to_xyz('UFF', maxIters=110)
    assert m.xyz.atomic_symbols.shape[0] == m.rdkit_molecule.GetNumAtoms()
    assert m.UFF_args['maxIters'] == 110
    # check default args
    m.to_xyz('UFF', **m._default_UFF_args)
Exemplo n.º 6
0
def test_exception_smiles():
    # syntax error: SMILES Parse Error
    with pytest.raises(ValueError):
        m = Molecule('fake', 'smiles')
    # wrong SMILES
    with pytest.raises(ValueError):
        m = Molecule('CO(C)C', 'smiles')
    # can't kekulize
    with pytest.raises(ValueError):
        m = Molecule('c1cc1', 'smiles')
Exemplo n.º 7
0
def mols2():
    m1 = Molecule('c1ccc1', 'smiles')
    m2 = Molecule('CNC', 'smiles')
    m3 = Molecule('CC', 'smiles')
    m4 = Molecule('CCC', 'smiles')

    molecules = [m1, m2, m3, m4]

    for mol in molecules:
        mol.to_xyz(optimizer='UFF')

    return molecules
Exemplo n.º 8
0
def mols():
    # Oxygen, Hydrogen, Hydrogen
    num = np.array([8, 1, 1])
    num = num.reshape((3, 1))
    sym = np.array(['O', 'H', 'H'])
    sym = sym.reshape((3, 1))
    c = np.array([[1.464, 0.707, 1.056], [0.878, 1.218, 0.498], [2.319, 1.126, 0.952]])
    xyz = XYZ(c,num,sym)
    m = Molecule('O', 'smiles')
    # forcefully overwrite xyz
    m._xyz = xyz
    return m
Exemplo n.º 9
0
def test_mol2smiles(caffeine_smiles, caffeine_canonical, caffeine_kekulize):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.smiles == caffeine_canonical
    m.to_smiles(kekuleSmiles=True)
    assert m.smiles == caffeine_kekulize
    # kekulize takes priority over canonical
    m.to_smiles(kekuleSmiles=True)
    assert m.smiles == caffeine_kekulize
    # if kekule is False, everything is OK
    m.to_smiles(canonical=True)
    assert m.smiles == caffeine_canonical
Exemplo n.º 10
0
def test_mol2xyz_exception(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    m.hydrogens('add')
    with pytest.raises(ValueError):
        m.to_xyz()
    with pytest.raises(ValueError):
        m.to_xyz('uff')
Exemplo n.º 11
0
def test_mol2inchi_defaultargs(caffeine_inchi):
    m = Molecule(caffeine_inchi, 'inchi')
    m.to_inchi()
    m.to_inchi(**m._default_rdkit_inchi_args)
    # all the args & treatWarningAsError=True
    args = m._default_rdkit_inchi_args
    args['treatWarningAsError'] = True
    m.to_inchi(**args)
    assert m.inchi_args['treatWarningAsError'] is True
Exemplo n.º 12
0
def test_mol2smarts_defaultargs(caffeine_smarts):
    m = Molecule(caffeine_smarts, 'smarts')
    m.to_smarts()
    m.to_smarts(**m._default_rdkit_smarts_args)
    # all the args & isomericSmiles=False
    args = m._default_rdkit_smarts_args
    args['isomericSmiles'] = False
    m.to_smarts(**args)
    assert m.smarts_args['isomericSmiles'] is False
Exemplo n.º 13
0
def test_mol2smiles_defaultargs(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    m.to_smiles(**m._default_rdkit_smiles_args)
    # all the args & canonical=False
    args = m._default_rdkit_smiles_args
    args['canonical'] = False
    m.to_smiles(**args)
    assert m.smiles_args['canonical'] is False
Exemplo n.º 14
0
def load_xyz_polarizability():
    """Load and return xyz files and polarizability (Bohr^3).
    The xyz coordinates of small organic molecules are optimized with BP86/def2svp level of theory.
    Polarizability of the molecules are also calcualted in the same level of thoery.

    =================   ======================
    rows                                    50
    Columns                                  1
    header                      polarizability
    molecules rep.                         xyz
    Features                                 0
    Returns             1 dataframe and 1 dict
    =================   ======================

    Returns
    -------
    molecules : list
        The list of molecule objects with xyz coordinates.

    pol : pandas dataframe
        The polarizability of each molecule as a column of dataframe.

    Examples
    --------
    >>> from chemml.datasets import load_xyz_polarizability
    >>> molecules, polarizabilities = load_xyz_polarizability()
    >>> print(len(molecules))
    50
    >>> print(polarizabilities.shape)
    (50, 1)
    """
    DATA_PATH = pkg_resources.resource_filename('chemml', os.path.join('datasets','data','organic_xyz'))
    # from chemml.initialization import XYZreader
    # reader = XYZreader(path_pattern=['[1-9]_opt.xyz', '[1-9][0-9]_opt.xyz'],
    #                    path_root=DATA_PATH,
    #                    reader='manual',
    #                    skip_lines=[2, 0])
    # molecules = reader.read()
    molecules = []
    for i in range(1,51):
        molecule = Molecule(os.path.join(DATA_PATH,"%i_opt.xyz"%i), "xyz")
        molecule.to_smiles()
        molecule.pybel_molecule = None
        molecules.append(molecule)

    df = pd.read_csv(os.path.join(DATA_PATH,'pol.csv'))
    return molecules, df
Exemplo n.º 15
0
def test_load_xyz_scenarios(xyz_path):
    path = os.path.join(xyz_path, '1_opt.xyz')
    m = Molecule(path, 'xyz')
    # 1 : xyz exist and optimizer is none
    assert m.to_xyz()
    # 2 xyz doesn't exist and pybel exist and optimizer is none
    m._xyz = None
    m.to_xyz()
    assert m.xyz.geometry.shape == (28, 3)
    # 3 xyz doesn't exist and pybel exist and optimizer is UFF
    m._xyz = None
    m.to_xyz(optimizer='UFF', maxIters=10210)
    assert m.xyz.geometry.shape == (28, 3)
Exemplo n.º 16
0
def test_load_xyzfile(xyz_path):
    path = os.path.join(xyz_path, '1_opt.xyz')
    m = Molecule(path, 'xyz')
    assert m.creator == ('XYZ', path)
    assert isinstance(m.pybel_molecule, pybel.Molecule)
    assert m.xyz.geometry.shape == (28, 3)
    m.to_smiles()
    assert m.smiles == 'c1ccc(CC2CCCC2)cc1'
    m.to_smarts()
    assert m.smarts == '[#6]1-[#6]-[#6]-[#6](-[#6]-1)-[#6]-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1'
    m.to_inchi()
    assert m.inchi == 'InChI=1S/C12H16/c1-2-6-11(7-3-1)10-12-8-4-5-9-12/h1-3,6-7,12H,4-5,8-10H2'
Exemplo n.º 17
0
def test_mol2smarts(caffeine_smarts):
    m = Molecule(caffeine_smarts, 'smarts')
    m.to_smarts()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.smarts == caffeine_smarts
    m.to_smarts(isomericSmiles=False)
    assert m.smarts == caffeine_smarts
Exemplo n.º 18
0
def bagofbounds(mol2, maxentry):
    from chemml.chem import Molecule
    #caffeine_smiles = 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C'
    #caffeine_smarts = '[#6]-[#7]1:[#6]:[#7]:[#6]2:[#6]:1:[#6](=[#8]):[#7](:[#6](=[#8]):[#7]:2-[#6])-[#6]'
    #caffeine_inchi = 'InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3'
    mol = Molecule(Chem.MolToSmiles(mol2), 'smiles')
    mol.hydrogens('add')
    mol.to_xyz(optimizer='MMFF', mmffVariant='MMFF94s', maxIters=300)  # 'UFF'
    print(mol)
    mol.visualize()
    #mol.visualize()

    from chemml.datasets import load_xyz_polarizability
    from chemml.chem import BagofBonds
    #coordinates, y = load_xyz_polarizability(mol)
    bob = BagofBonds(const=1.0)
    features = bob.represent(mol)
    print(features)
    print("number of entry")
    print(features.shape[1])
    if features.shape[1] > maxentry:
        maxentry = features.shape[1]
    return features, maxentry
Exemplo n.º 19
0
def mols():
    m = Molecule('c1ccc1', 'smiles')
    return m
Exemplo n.º 20
0
def mol_list():
    smi1 = Molecule('c1cc2cnc3c(cnc4cc(-c5ncncn5)c5nsnc5c34)c2c2nsnc12', 'smiles')
    smi2 = Molecule('[nH]1ccc2[nH]c3c4CC(=Cc4c4c[nH]cc4c3c12)c1scc2cc[nH]c12', 'smiles')
    return [smi1, smi2]
Exemplo n.º 21
0
def mols():
    m = Molecule('CCO', 'smiles')
    return m
Exemplo n.º 22
0
def test_load_inchi(caffeine_inchi):
    m = Molecule(caffeine_inchi, 'inchi')
    assert m.creator == ('InChi', caffeine_inchi)
    assert isinstance(m.rdkit_molecule, Chem.Mol)
Exemplo n.º 23
0
def test_load_smarts(caffeine_smarts):
    m = Molecule(caffeine_smarts, 'smarts')
    assert m.creator == ('SMARTS', caffeine_smarts)
    assert isinstance(m.rdkit_molecule, Chem.Mol)
Exemplo n.º 24
0
def test_load_smiles(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    assert m.creator == ('SMILES', caffeine_smiles)
    assert isinstance(m.rdkit_molecule, Chem.Mol)
Exemplo n.º 25
0
def chemml_molecule_list():
    molecules = ['C', 'CC', 'CCC', 'CC(C)C']
    mol_list = [Molecule(i, 'smiles') for i in molecules]
    yield mol_list
Exemplo n.º 26
0
from chemml.chem import Molecule
caffeine_smiles = 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C'
caffeine_smarts = '[#6]-[#7]1:[#6]:[#7]:[#6]2:[#6]:1:[#6](=[#8]):[#7](:[#6](=[#8]):[#7]:2-[#6])-[#6]'
caffeine_inchi = 'InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3'
mol = Molecule(caffeine_smiles, input_type='smiles')
mol.hydrogens('add')
mol.to_xyz(optimizer='MMFF', mmffVariant='MMFF94s', maxIters=300) # 'UFF'
print(mol)
mol.visualize()
#mol.visualize()



from chemml.datasets import load_xyz_polarizability
from chemml.chem import BagofBonds
#coordinates, y = load_xyz_polarizability(mol)
bob = BagofBonds(const= 1.0)
features = bob.represent(mol)
print(features)

Exemplo n.º 27
0
def test_mol2inchi(caffeine_inchi):
    m = Molecule(caffeine_inchi, 'inchi')
    m.to_inchi()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.inchi == caffeine_inchi
Exemplo n.º 28
0
def test_hydrogens(caffeine_smiles, caffeine_canonical, caffeine_inchi):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.smiles == caffeine_canonical
    # add hydrogens
    m.hydrogens('add', explicitOnly=False)
    # canonical smiles with hydrogens
    m.to_smiles()
    assert m.smiles == "[H]c1nc2c(c(=O)n(C([H])([H])[H])c(=O)n2C([H])([H])[H])n1C([H])([H])[H]"
    # test inchi
    m = Molecule(caffeine_inchi, 'inchi')
    m.to_inchi()
    assert m.inchi == caffeine_inchi
    # add hydrogens
    m.hydrogens('add')
    m.to_inchi()
    assert m.inchi == caffeine_inchi
Exemplo n.º 29
0
def test_hydrogens_exception(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    with pytest.raises(ValueError):
        m.hydrogens('addHs')
Exemplo n.º 30
0
def mol_single():
    smi1 = Molecule('c1cc2cnc3c(cnc4cc(-c5ncncn5)c5nsnc5c34)c2c2nsnc12', 'smiles')
    return smi1