Exemplo n.º 1
0
def normalize(smiles):
    #print(smiles)

    # Generate Mol
    mol = Chem.MolFromSmiles(smiles)

    # Uncharge
    uncharger = Uncharger()
    mol = uncharger.uncharge(mol)

    # LargestFragmentChooser
    flagmentChooser = LargestFragmentChooser()
    mol = flagmentChooser(mol)

    # Sanitaize
    Chem.SanitizeMol(mol)

    # Normalize
    normalizer = Normalizer()
    mol = normalizer.normalize(mol)

    tautomerCanonicalizer = TautomerCanonicalizer()
    mol = tautomerCanonicalizer.canonicalize(mol)

    return Chem.MolToSmiles(mol)
Exemplo n.º 2
0
def canonicalize(mols, ntauts):
    for n in mols:
        mol_can = TautomerCanonicalizer(max_tautomers=ntauts).canonicalize(mols[n]["mol"])
        if Chem.MolToSmiles(mols[n]["mol"]) == Chem.MolToSmiles(mol_can):
            mols[n]["mol_can"] = mols[n]["mol"]
        else:
            mols[n]["mol_can"] = mol_can
Exemplo n.º 3
0
def FullStandardization(smi):
    try:
        mol = Chem.MolFromSmiles(smi)
        if mol == None:
            # If rdkit could not parse the smiles, returns Error 1
            return "Error 1"
        else:
            STD = Standardizer()
            LFC = LargestFragmentChooser()
            UC = Uncharger()
            RI = Reionizer()
            TC = TautomerCanonicalizer()

            mol = STD(mol)
            mol = LFC(mol)

            allowed_elements = {
                "H", "B", "C", "N", "O", "F", "Si", "P", "S", "Cl", "Se", "Br",
                "I"
            }
            actual_elements = set(
                [atom.GetSymbol() for atom in mol.GetAtoms()])
            if len(actual_elements - allowed_elements) == 0:
                mol = UC(mol)
                mol = RI(mol)
                RemoveStereochemistry(mol)
                mol = TC(mol)
                return Chem.MolToSmiles(mol)
            else:
                # If molecule contains other than the allowed elements, returns "Error 2"
                return "Error 2"
    except:
        return "Check manually"
Exemplo n.º 4
0
def query_canonicalize(mol, ntauts):
    try:
        mol_sta = Standardizer().charge_parent(
            Standardizer().fragment_parent(mol), skip_standardize=True)
        mol_can = TautomerCanonicalizer(
            max_tautomers=ntauts).canonicalize(mol_sta)
        return mol_can
    except:
        return mol
Exemplo n.º 5
0
def normalize(smiles):
    from copy import deepcopy
    #print(smiles)

    # Generate Mol
    mol = Chem.MolFromSmiles(smiles)

    # Uncharge
    uncharger = Uncharger()
    try:
        mol2 = uncharger.uncharge(mol)
    except:
        mol2 = mol
    mol2 = mol

    # LargestFragmentChooser
    flagmentChooser = LargestFragmentChooser()
    try:
        mol3 = flagmentChooser(mol2)
    except:
        mol3 = mol2

    # Sanitaize
    mol3_tmp = deepcopy(mol3)
    try:
        ret = Chem.SanitizeMol(mol3, catchErrors=False)
    except:
        mol3 = mol3_tmp

    # Normalize
    normalizer = Normalizer()
    try:
        mol4 = normalizer.normalize(mol3)
    except:
        mol4 = mol3

    #print(mol4)
    tautomerCanonicalizer = TautomerCanonicalizer()
    mol4 = tautomerCanonicalizer.canonicalize(mol4)

    new_smiles = Chem.MolToSmiles(mol4)
    #print(new_smiles)
    return new_smiles
Exemplo n.º 6
0
class TautomerCheck(CurationStep):
    """ class to standaridze tautomers"""
    def __init__(self, **kwargs):
        self.tautomerizer = TautomerCanonicalizer(**kwargs)

    def filterFunction(self, cmp):
        return self.tautomerizer.canonicalize(cmp)

    def runStep(self, df, cmp_index):
        df.iloc[:, cmp_index] = [self.filterFunction(mol) for mol in df[cmp_index]]
        return df
Exemplo n.º 7
0
def do_standard_mp(mol: Chem.rdchem.Mol, n: int = 0, ntauts: int = 100) -> tuple:
    """
    Function to standardize and canonicalize and RDKit Mol.
    :param mol: molecule to be standardized
    :type mol: rdkit.Chem.rdchem.Mol
    :param n: molecule number, default = 0
    :type n: int
    :param ntauts: maximum number of tautomers to be enumerated in canonicalization step, default = 100
    :return: tuple containing the molecule number, the standardized molecule and the canonical tautomer (n, Mol, Mol)
    :rtype: tuple
    """
    try:
        mol_sta = Standardizer().charge_parent(Standardizer().fragment_parent(mol), skip_standardize=True)
        mol_can = TautomerCanonicalizer(max_tautomers=ntauts).canonicalize(mol_sta)
        if Chem.MolToSmiles(mol_sta) == Chem.MolToSmiles(mol_can):
            mol_can = mol_sta
    except:
        mol_sta = mol
        mol_can = mol
    return (n, mol_sta, mol_can)
Exemplo n.º 8
0
def do_standard(mols, ntauts):
    """
    Adds a standardized molecule and canonicalized tautomer to the database
    :param mols: dictionary containing rdkit.Chem.rdchem.Mol objects as follows: {0: {"mol": rdkit.Chem.rdchem.Mol}, 1: {"mol": rdkit.Chem.rdchem.Mol} ...}
    :type mols: dict
    :param ntauts: maximum number of tautomers to be enumerated in canonicalization step, default = 100
    :type ntauts: int
    """
    for n in mols:
        try:
            mol_sta = Standardizer().charge_parent(Standardizer().fragment_parent(mols[n]["mol"]),
                                                   skip_standardize=True)
            mol_can = TautomerCanonicalizer(max_tautomers=ntauts).canonicalize(mol_sta)
            mols[n]["mol"] = mol_sta
            if Chem.MolToSmiles(mol_sta) == Chem.MolToSmiles(mol_can):
                mols[n]["mol_can"] = mol_sta
            else:
                mols[n]["mol_can"] = mol_can
        except:
            mols[n]["mol_can"] = mols[n]["mol"]
Exemplo n.º 9
0
def canonicalize_mp(mol, n, ntauts):
    mol_can = TautomerCanonicalizer(max_tautomers=ntauts).canonicalize(mol)
    if Chem.MolToSmiles(mol) == Chem.MolToSmiles(mol_can):
        mol_can = mol
    return (n, mol_can)
Exemplo n.º 10
0
def standardize_mols(jobs, mol_counter, num_mols, results, start_time, vendors, max_stereo_isomers, failures,
                     tautomer, verbose):
    """
    This function passes molecules to the standardization functions.

    Parameters
    ----------
    jobs: multiprocessing.manager.list
        A list containing job information as dictionaries.

    mol_counter: multiprocessing.manager.value
        A counter keeping track of processed molecules.

    num_mols: int
        Total number of molecules to be processed.

    results: multiprocessing.manager.list
        A list containing lists describing the processed molecules.

    start_time: float
        Starting time of molecule processing.

    vendors: list
        List of vendors.

    max_stereo_isomers: int
        Maximal number of stereo isomers to generater per molecule.

    verbose : bool
        If RDKit warning should be displayed.

    """
    if not verbose:
        RDLogger.DisableLog('rdApp.*')
    job = 'initiate'
    processed_mols = []
    while job is not None:
        try:
            job = jobs.pop(0)
            vendor_position = vendors.index(job['vendor'])
            supplier = Chem.SDMolSupplier(job['sdf_path'])
            for mol_id in range(job['mol_start'], job['mol_end'] + 1):
                mol = supplier[mol_id]
                if job['identifier_field'] == 'None':
                    identifier = 'unknown'
                else:
                    try:
                        identifier = mol.GetProp(job['identifier_field'])
                    except AttributeError:
                        identifier = 'unknown'
                try:
                    # generate smiles for error catching
                    smiles = 'unknown'
                    smiles = Chem.MolToSmiles(mol)
                    # default standardization from molvs
                    mol = Standardizer().standardize(mol)
                    # choose largest fragment
                    mol = LargestFragmentChooser().choose(mol)
                    # canonicalize tautomer
                    if tautomer:
                        mol = TautomerCanonicalizer().canonicalize(mol)
                    # protonate mol
                    mol = protonate_mol(mol)
                    # molecular weight will not change anymore
                    if ExactMolWt(mol) < 1200:
                        # enumerate stereo isomers and append mols
                        if max_stereo_isomers > 0:
                            for mol in enumerate_stereo_isomers(mol, max_stereo_isomers):
                                mol_as_list = [Chem.MolToSmiles(mol)] + [''] * len(vendors)
                                mol_as_list[1 + vendor_position] = identifier
                                processed_mols.append(mol_as_list)
                        else:
                            mol_as_list = [Chem.MolToSmiles(mol)] + [''] * len(vendors)
                            mol_as_list[1 + vendor_position] = identifier
                            processed_mols.append(mol_as_list)
                except:
                    failures.append(' '.join(['standardize_error', smiles, job['vendor'], identifier]))
                with mol_counter.get_lock():
                    mol_counter.value += 1
                update_progress(mol_counter.value / num_mols, 'Progress of standardization',
                                ((time.time() - start_time) / mol_counter.value) * (num_mols - mol_counter.value))
        except IndexError:
            job = None
    results += processed_mols
    return
def get_tautomer(mol):
    TC = TautomerCanonicalizer()
    return TC(mol)
Exemplo n.º 12
0
 def __init__(self, **kwargs):
     self.tautomerizer = TautomerCanonicalizer(**kwargs)