def setUp(self): """ Set up tests. """ self.reader = serial.MolReader() # generate molecules smiles = ['CC(=O)OC1=CC=CC=C1C(=O)O', 'CC(C)CC1=CC=C(C=C1)C(C)C(=O)O', 'CC1=CC=C(C=C1)C2=CC(=NN2C3=CC=C(C=C3)S(=O)(=O)N)C(F)(F)F'] names = ['aspirin', 'ibuprofen', 'celecoxib'] self.mols = [] for s, n in zip(smiles, names): mol = Chem.MolFromSmiles(s) mol.SetProp('_Name', n) AllChem.Compute2DCoords(mol) self.mols.append(mol) # write molecules to file self.temp_dir = tempfile.mkdtemp() writer = serial.MolWriter() _, self.filename = tempfile.mkstemp(dir=self.temp_dir, suffix='.sdf.gz') with writer.open(self.filename) as w: w.write(self.mols) self.sharder = DatasetSharder(filename=self.filename, write_shards=False) self.reader = serial.MolReader()
def test_not_close_other(self): """ Make sure MolIO doesn't close files it didn't open. """ _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir) with open(filename) as f: reader = serial.MolReader(f, mol_format='sdf') reader.close() assert not f.closed # also test the context manager with open(filename) as g: with serial.MolReader(g, mol_format='sdf'): pass assert not g.closed
def read_data(self): """ Read labeled molecules. """ with serial.MolReader().open(self.filename) as reader: mols = list(reader) return mols
def read_mols_from_file(self): """ Read molecules from a file. """ with serial.MolReader().open(self.filename) as reader: for mol in reader.get_mols(): yield mol
def test_is_a_salt(self): """ Test that a molecule that _is_ a salt is not returned empty. """ smiles = 'C(=CC(=O)O)C(=O)O' reader = serial.MolReader(StringIO(smiles), 'smi', remove_salts=True) mols = list(reader.get_mols()) assert len(mols) == 1 and mols[0].GetNumAtoms()
def test_skip_failures(self): """ Test skip read failures. """ smiles = 'CO(C)C' reader = serial.MolReader(StringIO(smiles), 'smi') mols = list(reader.get_mols()) assert len(mols) == 0
def test_no_remove_salts(self): """ Test salt retention. """ _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir) with open(filename, 'wb') as f: for mol in [self.aspirin_sodium, self.levalbuterol_hcl]: f.write(Chem.MolToMolBlock(mol)) f.write('$$$$\n') # molecule delimiter ref_mols = [self.aspirin_sodium, self.levalbuterol_hcl] self.reader = serial.MolReader(remove_salts=False) self.reader.open(filename) mols = self.reader.get_mols() mols = list(mols) assert len(mols) == 2 self.reader = serial.MolReader(remove_salts=True) for mol, ref_mol in zip(mols, ref_mols): assert mol.ToBinary() == ref_mol.ToBinary() desalted = self.reader.clean_mol(ref_mol) assert mol.GetNumAtoms() > desalted.GetNumAtoms()
def test_remove_hydrogens(self): """ Test hydrogen removal. """ _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir) with open(filename, 'wb') as f: f.write(Chem.MolToMolBlock(self.aspirin_h)) reader = serial.MolReader(remove_hydrogens=True) reader.open(filename) mols = reader.get_mols() assert mols.next().ToBinary() == self.aspirin.ToBinary()
def test_read_compressed_file_like(self): """ Read from a file-like object using gzip. """ _, filename = tempfile.mkstemp(suffix='.sdf.gz', dir=self.temp_dir) with gzip.open(filename, 'wb') as f: f.write(Chem.MolToMolBlock(self.aspirin)) with gzip.open(filename) as f: reader = serial.MolReader(f, mol_format='sdf') mols = reader.get_mols() assert mols.next().ToBinary() == self.aspirin.ToBinary()
def test_no_remove_hydrogens(self): """ Test hydrogen retention. """ _, filename = tempfile.mkstemp(suffix='.sdf', dir=self.temp_dir) with open(filename, 'wb') as f: f.write(Chem.MolToMolBlock(self.aspirin_h)) reader = serial.MolReader(remove_hydrogens=False, remove_salts=False) reader.open(filename) mols = reader.get_mols() # FIXME get ToBinary test to work # assert mols.next().ToBinary() == self.aspirin_h.ToBinary() assert Chem.MolToMolBlock(mols.next()) == Chem.MolToMolBlock( self.aspirin_h)
def get_smiles(filename, assign_stereo_from_3d=False): """ Get SMILES for molecules. Parameters ---------- filename : str Input molecule filename. assign_stereo_from_3d : bool, optional (default False) Assign stereochemistry from 3D coordinates. """ database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d) with serial.MolReader().open(filename) as reader: for mol in reader: database.add_mol(mol) return list(database.smiles)
def _ionize_3d(self, mol): """ Ionize a molecule while preserving conformers. Parameters ---------- mol : RDMol Molecule. """ assert mol.GetNumConformers() > 0 sdf = '' for conf in mol.GetConformers(): sdf += Chem.MolToMolBlock(mol, confId=conf.GetId(), includeStereo=True) sdf += '$$$$\n' args = ['obabel', '-i', 'sdf', '-o', 'sdf', '-p', str(self.pH)] p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ionized_sdf, _ = p.communicate(sdf) reader = serial.MolReader(StringIO(ionized_sdf), mol_format='sdf', remove_salts=False) # no changes try: mols = list(reader.get_mols()) except RuntimeError as e: # catch pre-condition violations raise IonizerError(e.message) # catch ionizer failure if len(mols) == 0: raise IonizerError(mol) # detection of stereochemistry based on 3D coordinates might result # in issues when attempting to recombine ionized conformers, but we # merge them anyway if len(mols) == 1: ionized_mol, = mols else: ionized_mol = mols[0] for other in mols[1:]: for conf in other.GetConformers(): ionized_mol.AddConformer(conf, assignId=True) return ionized_mol
def setUp(self): """ Write SDF and SMILES molecules to temporary files. """ self.temp_dir = tempfile.mkdtemp() # aspirin self.aspirin = self._get_mol_from_smiles('CC(=O)OC1=CC=CC=C1C(=O)O', 'aspirin') self.aspirin_h = Chem.AddHs(self.aspirin) self.aspirin_sodium = self._get_mol_from_smiles( 'CC(=O)OC1=CC=CC=C1C(=O)[O-].[Na+]', 'aspirin sodium') # levalbuterol (chiral) self.levalbuterol = self._get_mol_from_smiles( 'CC(C)(C)NC[C@@H](C1=CC(=C(C=C1)O)CO)O', 'levalbuterol') self.levalbuterol_hcl = self._get_mol_from_smiles( 'CC(C)(C)NC[C@@H](C1=CC(=C(C=C1)O)CO)O.Cl', 'levalbuterol hydrochloride') self.ref_mols = [self.aspirin, self.levalbuterol] self.reader = serial.MolReader(compute_2d_coords=False)
def main(input_filenames, output_filename, database_filename=None, assign_stereo_from_3d=False): """ Update or create a molecule database. Parameters ---------- input_filenames : list Input molecule filename(s). output_filename : str Output filename. database_filename : str, optional Existing database to update. assign_stereo_from_3d : bool, optional (default False) Whether to assign stereochemistry from 3D coordinates. """ database = MoleculeDatabase(assign_stereo_from_3d=assign_stereo_from_3d) if database_filename is not None: database.load(database_filename) initial_size = len(database) for filename in input_filenames: print filename with serial.MolReader().open(filename) as reader: for mol in reader: try: database.add_mol(mol) except ValueError: if mol.HasProp('_Name'): print 'Skipping {}'.format(mol.GetProp('_Name')) else: print 'Skipping {}'.format( Chem.MolToSmiles(mol, isomericSmiles=True)) final_size = len(database) print '{} molecules added to the database'.format(final_size - initial_size) database.save(output_filename)
def read_mols(input_filename): """ Read molecules from an input file and extract names. Parameters ---------- input_filename : str Filename containing molecules. """ print "Reading molecules..." reader = serial.MolReader() reader.open(input_filename) mols = [] names = [] for mol in reader.get_mols(): mols.append(mol) if mol.HasProp('_Name'): names.append(mol.GetProp('_Name')) else: names.append(None) reader.close() mols = np.asarray(mols) names = np.asarray(names) return mols, names