Exemplo n.º 1
0
def clean_smiles(smiles_df):
    """
    Helper function which runs the standardization tool on a list of smiles
    strings.

    Args:
        smiles_df: DataFrame which contains smiles strings in a column named
        "smiles"

    Returns:
        The original DataFrame, but with the smiles strings in the
        "smiles" column standardized and any rows which contained
        problematic smiles removed
    """
    standard = Standardizer(prefer_organic=True)
    for index, row in smiles_df.iterrows():
        try:
            mol = Chem.MolFromSmiles(row['smiles'])
            std_mol = standard.fragment_parent(mol, skip_standardize=False)
            smiles_df['smiles'][index] = Chem.MolToSmiles(std_mol)
        except:
            print("Error cleaning " + str(index) + " " +
                  str(row['smiles']))
            print(smiles_df.loc[index])
            smiles_df.drop(index, inplace=True)
    return smiles_df
Exemplo n.º 2
0
 def testFragmentLong(self):
     if not doLong:
         raise unittest.SkipTest('long test')
     for data in self.readPCSdata(self.dataPCS_fragment100k):
         try:
             s = Standardizer()
             frag = s.fragment_parent(data.mol)
             ns = Chem.MolToSmiles(frag)
         except Exception:
             raise AssertionError(f'Line {data.lineNo}: MolVS normalization failed for SMILES {data.smiles}')
         self.assertEqual(ns, data.expected)
Exemplo n.º 3
0
 def testFragmentLong(self):
     if not doLong:
         raise unittest.SkipTest('long test')
     for data in self.readPCSdata(self.dataPCS_fragment100k):
         try:
             s = Standardizer()
             frag = s.fragment_parent(data.mol)
             ns = Chem.MolToSmiles(frag)
         except Exception:
             raise AssertionError(
                 'Line {0.lineNo}: MolVS normalization failed for SMILES {0.smiles}'.format(data))
         self.assertEqual(ns, data.expected)
Exemplo n.º 4
0
def clean_smiles(smi):
    """
    Helper function which runs the standardization tool on the input smiles
    string

    Args:
        smi: Input smiles string

    Returns:
        The standardized version of the input smiles string
    """
    s = Standardizer(prefer_organic=True)
    try:
        mol = Chem.MolFromSmiles(smi)
        std_mol = s.fragment_parent(mol, skip_standardize=False)
        std_smi = Chem.MolToSmiles(std_mol)
        return std_smi
    except:
        print("Issue with input smiles string. Unable to clean " + str(smi))
    return None
Exemplo n.º 5
0
 def testFragmentShort(self):
     for data in self.readPCSdata(self.dataPCS_fragmnet1k):
         s = Standardizer()
         frag = s.fragment_parent(data.mol)
         ns = Chem.MolToSmiles(frag)
         self.assertEqual(ns, data.expected)
Exemplo n.º 6
0
 def testFragmentShort(self):
     for data in self.readPCSdata(self.dataPCS_fragmnet1k):
         s = Standardizer()
         frag = s.fragment_parent(data.mol)
         ns = Chem.MolToSmiles(frag)
         self.assertEqual(ns, data.expected)