Пример #1
0
def preprocess_smi(smi):

    # Filter 1- Convert to Canonical Smiles
    try:
        mol = Chem.MolFromSmiles(smi)
        can_smi = Chem.MolToSmiles(mol, True)
    except:
        return None

    # Filter 2- Remove salt
    remover = SaltRemover()
    mol = Chem.MolFromSmiles(can_smi)
    res, deleted = remover.StripMolWithDeleted(mol, dontRemoveEverything=True)
    removed_salt_smi = Chem.MolToSmiles(res)

    # Filter 3- Remove Charge
    uncharger = rdMolStandardize.Uncharger()
    m = Chem.MolFromSmiles(removed_salt_smi)
    p = uncharger.uncharge(m)
    uncharged_smi = Chem.MolToSmiles(p)

    # Filter 4 - Standardize the tautomer
    clean_smi = MolStandardize.canonicalize_tautomer_smiles(uncharged_smi)

    return clean_smi
Пример #2
0
 def test_withDontRemoveEverything(self):
   testFile = os.sep.join(
     [os.path.dirname(os.path.abspath(__file__)), 'test_data', 'witch-salts.sdf'])
   remover = SaltRemover(defnFilename=testFile, defnFormat=InputFormat.MOL)
   m = Chem.MolFromSmiles('Cc1ccccc1')
   mol, deleted = remover.StripMolWithDeleted(m, dontRemoveEverything=True)
   # List should be empty
   self.assertFalse(deleted)
   self.assertEqual(m, mol)
Пример #3
0
 def test_withSdfFile(self):
   testFile = os.sep.join(
     [os.path.dirname(os.path.abspath(__file__)), 'test_data', 'witch-salts.sdf'])
   remover = SaltRemover(defnFilename=testFile, defnFormat=InputFormat.MOL)
   self.assertEqual(len(remover.salts), 240)
   m = Chem.MolFromSmiles("Cc1onc(-c2ccccc2)c1C([O-])=NC1C(=O)N2C1SC(C)(C)C2C(=O)O.O.[Na+]")
   tuple = remover.StripMolWithDeleted(m)
   self.assertEqual(Chem.MolToSmiles(tuple.mol), 'Cc1onc(-c2ccccc2)c1C([O-])=NC1C(=O)N2C1SC(C)(C)C2C(=O)O.O')
   self.assertEqual(len(tuple.deleted), 1)
   self.assertEqual(Chem.MolToSmiles(tuple.deleted[0]), '[Na+]')
Пример #4
0
    def check_salt(self, molecule: str, subType: str) -> str:
        """
            Checks if the molecule is salt.

            :param molecule:

            :return salt:
        """

        remover = SaltRemover()
        salt = None

        res, deleted = remover.StripMolWithDeleted(self.smiles_mol)

        if len(deleted) >= 1:
            salt = '_'.join([subType, 'salt'])

        return salt
Пример #5
0
 def test_SmilesVsSmarts(self):
   # SMARTS
   remover = SaltRemover(defnData="[Cl,Br]")
   mol = Chem.MolFromSmiles('CN(Br)Cl.Cl')
   res = remover.StripMol(mol)
   self.assertEqual(res.GetNumAtoms(), 4)
   self.assertEqual(Chem.MolToSmiles(res), 'CN(Cl)Br')
   mol = Chem.MolFromSmiles('CN(C)C.Cl.Br')
   res, deleted = remover.StripMolWithDeleted(mol)
   self.assertEqual(Chem.MolToSmiles(res), 'CN(C)C')
   # Because we read in SMARTS, we should output as well. Otherwise, we will have
   # mismatches
   self.assertListEqual([Chem.MolToSmarts(m) for m in deleted], ['[Cl,Br]'])
   # SMILES
   remover = SaltRemover(defnData="Cl", defnFormat=InputFormat.SMILES)
   mol = Chem.MolFromSmiles('CN(Br)Cl.Cl')
   res = remover.StripMol(mol)
   self.assertEqual(res.GetNumAtoms(), 4)
   self.assertEqual(Chem.MolToSmiles(res), 'CN(Cl)Br')
Пример #6
0
def NeutraliseCharges_RemoveSalt(smiles, reactions=None):
    global _reactions
    if reactions is None:
        if _reactions is None:
            _reactions = _InitialiseNeutralisationReactions()
        reactions = _reactions
    mol = Chem.MolFromSmiles(smiles)
    if mol is not None:
        remover = SaltRemover()
        mol, deleted = remover.StripMolWithDeleted(mol)
        replaced = False
        for i, (reactant, product) in enumerate(reactions):
            while mol.HasSubstructMatch(reactant):
                replaced = True
                rms = AllChem.ReplaceSubstructs(mol, reactant, product)
                mol = rms[0]
        if replaced:
            return (Chem.MolToSmiles(mol, True), True)
        else:
            return (smiles, False)
    else:
        return (None, False)
if __name__ == '__main__':
    if len(sys.argv) != 3:
        print('Usage: python rdkit_hlogp_batch.py <smiles> <batch_size>')
        exit()
    
    BATCH_SIZE = int(sys.argv[2])
    hlogp_list = list()
    with open(sys.argv[1]) as smiles_file:
        file_lines = smiles_file.readlines()
        for line in file_lines:
            if line.strip():
                smiles, cid = str(line).strip().split()[:2]
                mol = MolFromSmiles(smiles)
                remover = SaltRemover()
                res, deleted = remover.StripMolWithDeleted(mol)
                if res is not None:
                    res.SetProp('_Name', cid)
                logp = MolLogP(res)
                num_heavy_atoms = res.GetNumHeavyAtoms()
                if num_heavy_atoms > 99:
                    num_heavy_atoms = 99
                scaled_logp = scale_logp_value(logp)
                if logp < 0.0:
                    sign = 'M'
                    #remove the minus sign so it's not printed
                    scaled_logp = scaled_logp * -1
                else:
                    sign = 'P'
                key_string = 'H{:02}{}{:03}'.format(num_heavy_atoms, sign, scaled_logp)