def read_standardise(smi) -> Chem.rdchem.Mol: null_mol = Chem.MolFromSmiles('') try: m = Chem.MolFromSmiles(smi) assert m is not None except: warnings.warn('SMILES: {} is invalid'.format(smi)) return null_mol m = standardizer.standardize_mol(m) m = standardizer.get_parent_mol(m)[0] return m
def standardize_smiles(smiles): try: # Convert to RDKit mol m = Chem.MolFromSmiles(smiles) # Neutralize and separate from counterion m = standardizer.get_parent_mol(m, neutralize=True, check_exclusion=True, verbose=False)[0] # Standardize representation m = standardizer.standardize_mol(m, check_exclusion=True) # Return RDKit canonical SMILES # remove stereochemistry information to avoid some activity cliffs return Chem.MolToSmiles(Chem.MolFromSmiles(Chem.MolToSmiles(m, isomericSmiles=False))) except: return np.nan
def check_errors(self, smi: str) -> str: """ This function processes the SMILES in order to canonicalize it and detect any errors. If errors are detected, the returned SMILES is not sanitized. :param smi: SMILES string of the compound :return final_smi: canonicalized SMILES """ try: final_smi = standardizer.standardize_mol(smi) except: final_smi = smi final_smi = Chem.MolToSmiles(final_smi) final_smi = self.salt_remover(final_smi) return final_smi
def __call__(self, mol): if chembl_standardizer.exclude_flag(mol, includeRDKitSanitization=False): raise StandardizationError( None, f'ChEMBL standardizer set the exclusion flag for molecule: {Chem.MolToSmiles(mol)}' ) # just for outputs try: smiles = Chem.MolToSmiles(mol) except Exception as exp: raise StandardizationError( exp, f'An exception occurred while getting the SMILES for molecule: {mol}' ) try: mol = chembl_standardizer.standardize_mol(mol, check_exclusion=False) except Exception as exp: raise StandardizationError( exp, f'An exception occurred while standardizing molecule: {smiles}' ) try: mol, _ = chembl_standardizer.get_parent_mol(mol, check_exclusion=False, verbose=True, neutralize=True) except Exception as exp: raise StandardizationError( exp, f'An exception occurred while getting the parent molecule of: {smiles}' ) return mol
# get all csvs from folders received_csv_files = [ f for f in dir_path.glob("**/*.csv") if "all_received_mols.csv" not in str(f) ] smiles_dict = {} for csv_file in received_csv_files: try: received_df = pd.read_csv(csv_file) received_df["SMILES"] = received_df["SMILES"].apply( lambda x: Chem.MolToSmiles( Chem.MolFromSmiles( Chem.MolToSmiles( standardizer.standardize_mol( standardizer.get_parent_mol(Chem.MolFromSmiles(x))[ 0]))))) received_smi = list(received_df["SMILES"]) for smi in received_smi: smiles_dict[smi] = str(csv_file).split("/")[-1] except Exception as e: print(f"FAILED ON {csv_file}") print(e) pass # write out final csv all_smiles = list(smiles_dict.keys()) all_shipments = [smiles_dict[x] for x in all_smiles]
def strip_and_standardize_smi(smi): return Chem.MolToSmiles( Chem.MolFromSmiles( Chem.MolToSmiles( standardizer.standardize_mol( standardizer.get_parent_mol(Chem.MolFromSmiles(smi))[0]))))