def clean_smiles(smiles_df): """ Helper function which runs the standardization tool on a list of smiles strings. Args: smiles_df: DataFrame which contains smiles strings in a column named "smiles" Returns: The original DataFrame, but with the smiles strings in the "smiles" column standardized and any rows which contained problematic smiles removed """ standard = Standardizer(prefer_organic=True) for index, row in smiles_df.iterrows(): try: mol = Chem.MolFromSmiles(row['smiles']) std_mol = standard.fragment_parent(mol, skip_standardize=False) smiles_df['smiles'][index] = Chem.MolToSmiles(std_mol) except: print("Error cleaning " + str(index) + " " + str(row['smiles'])) print(smiles_df.loc[index]) smiles_df.drop(index, inplace=True) return smiles_df
def testFragmentLong(self): if not doLong: raise unittest.SkipTest('long test') for data in self.readPCSdata(self.dataPCS_fragment100k): try: s = Standardizer() frag = s.fragment_parent(data.mol) ns = Chem.MolToSmiles(frag) except Exception: raise AssertionError(f'Line {data.lineNo}: MolVS normalization failed for SMILES {data.smiles}') self.assertEqual(ns, data.expected)
def testFragmentLong(self): if not doLong: raise unittest.SkipTest('long test') for data in self.readPCSdata(self.dataPCS_fragment100k): try: s = Standardizer() frag = s.fragment_parent(data.mol) ns = Chem.MolToSmiles(frag) except Exception: raise AssertionError( 'Line {0.lineNo}: MolVS normalization failed for SMILES {0.smiles}'.format(data)) self.assertEqual(ns, data.expected)
def clean_smiles(smi): """ Helper function which runs the standardization tool on the input smiles string Args: smi: Input smiles string Returns: The standardized version of the input smiles string """ s = Standardizer(prefer_organic=True) try: mol = Chem.MolFromSmiles(smi) std_mol = s.fragment_parent(mol, skip_standardize=False) std_smi = Chem.MolToSmiles(std_mol) return std_smi except: print("Issue with input smiles string. Unable to clean " + str(smi)) return None
def testFragmentShort(self): for data in self.readPCSdata(self.dataPCS_fragmnet1k): s = Standardizer() frag = s.fragment_parent(data.mol) ns = Chem.MolToSmiles(frag) self.assertEqual(ns, data.expected)
def testFragmentShort(self): for data in self.readPCSdata(self.dataPCS_fragmnet1k): s = Standardizer() frag = s.fragment_parent(data.mol) ns = Chem.MolToSmiles(frag) self.assertEqual(ns, data.expected)