def set_protein(self, protein): """Change protein to dock to. Parameters ---------- protein: oddt.toolkit.Molecule object Protein object to be used. """ # generate new directory self._tmp_dir = None if protein: self.protein = protein if type(protein) is str: extension = protein.split('.')[-1] if extension == 'pdbqt': self.protein_file = protein self.protein = toolkit.readfile(extension, protein).next() else: self.protein = toolkit.readfile(extension, protein).next() self.protein.protein = True self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True) else: # write protein to file self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None, 'c':None}, overwrite=True)
def dock(self, ligands, protein=None, single=False): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. single: bool (default=False) A flag to indicate single ligand docking - performance reasons (eg. there is no need for subdirectory for one ligand) Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if single: ligands = [ligands] ligand_dir = mkdtemp(dir=self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt' ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True, opt={'b': None}) try: vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output.decode('ascii')) if self.skip_bad_mols: continue else: raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) # HACK # overcome connectivity problems in obabel source_ligand = six.next(toolkit.readfile('pdbqt', ligand_file)) del source_ligand.data['REMARK'] for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina): # HACK # copy data from source clone = source_ligand.clone clone.clone_coords(lig) clone.data.update(scores) output_array.append(clone) rmtree(ligand_dir) return output_array
def ligand(self): f = os.path.join(self.home, self.id, '%s_ligand.sdf' % self.id) if os.path.isfile(f): return next(toolkit.readfile('sdf', f, lazy=True, opt=self.opt)) f = os.path.join(self.home, self.id, '%s_ligand.mol2' % self.id) if os.path.isfile(f): return next(toolkit.readfile('mol2', f, lazy=True, opt=self.opt)) else: return None
def generate_descriptor(packed): pdbid, gen, pdbbind_dir, pdbbind_version = packed protein_file = "%s/v%s/%s/%s_pocket.pdb" % (pdbbind_dir, pdbbind_version, pdbid, pdbid) if not isfile(protein_file): protein_file = pdbbind_dir + "/v" + pdbbind_version + "/%s/%s_protein.pdb" % (pdbid, pdbid) ligand_file = pdbbind_dir + "/v" + pdbbind_version + "/%s/%s_ligand.sdf" % (pdbid, pdbid) protein = toolkit.readfile("pdb", protein_file, opt = {'b': None}).next() ligand = toolkit.readfile("sdf", ligand_file).next() return gen.build([ligand], protein).flatten()
def decoys(self): """Read a decoys file""" f = os.path.join(self.home, self.dude_id, 'decoys_final.mol2.gz') if os.path.isfile(f): return toolkit.readfile('mol2', f) # check if file is unpacked elif os.path.isfile(f[:-3]): return toolkit.readfile('mol2', f[:-3]) else: return None
def detectInteractions(protein, ligand): # interactions to be detected interactions_dict = { 'hbonds', 'halogenbonds', 'pi_stacking', 'salt_bridges', 'hydrophobic_contacts', 'pi_cation', 'pi_metal' } for contact in interactions_dict: exec('%s = set()' % contact) # prepare protein and ligand objects using oddt suffix = [protein.split('.')[1], ligand.split('.')[1]] protein = next(toolkit.readfile(suffix[0], protein)) protein.protein = True ligand = list(toolkit.readfile(suffix[1], ligand)) # to store ligands' interactions with protein profiles = [] # start to detect, ligand by ligand for lig in ligand: # to store each ligand's interactions with protein profile = {} """ start to detect interactions for each ligand, detect interaction at atom level, each contacted residue's atom will be recorded for the binding site, the contact will stay at residue level """ for contact in interactions_dict: # this function will return contacting atoms # and a boolean array indicating whether the interaction is strict exec('contact_atoms = interactions.%s(protein, lig)' % contact) if contact in ['honds', 'halogenbonds']: profile[contact] = locals()['contact_atoms'][0]['resnum'][ locals()['contact_atoms'][2]] elif contact == 'pi_stacking': # face to face; edge to face profile[contact] = np.concatenate( (locals()['contact_atoms'][0]['resnum'] [locals()['contact_atoms'][2]], locals()['contact_atoms'] [0]['resnum'][locals()['contact_atoms'][3]]), axis=None) elif contact in ['salt_bridges', 'hydrophobic_contacts']: profile[contact] = locals()['contact_atoms'][0]['resnum'] else: # ring; cation or metal profile[contact] = np.concatenate( (locals()['contact_atoms'][0]['resnum'] [locals()['contact_atoms'][2]], locals()['contact_atoms'] [1]['resnum'][locals()['contact_atoms'][2]]), axis=None) locals()[contact].update( profile[contact]) if profile[contact].size != 0 else exec( 'pass') profiles.append(profile) bindingsite = {} for contact in interactions_dict: bindingsite[contact] = locals()[contact] return bindingsite, profiles
def generate_descriptor(packed): pdbid, gen, pdbbind_dir, pdbbind_version = packed protein_file = pdbbind_dir + "/v" + pdbbind_version + "/%s/%s_pocket.pdb" % (pdbid, pdbid) if not isfile(protein_file): protein_file = pdbbind_dir + "/v" + pdbbind_version + "/%s/%s_protein.pdb" % (pdbid, pdbid) ligand_file = pdbbind_dir + "/v" + pdbbind_version + "/%s/%s_ligand.sdf" % (pdbid, pdbid) protein = toolkit.readfile("pdb", protein_file).next() # mark it as a protein protein.protein = True ligand = toolkit.readfile("sdf", ligand_file).next() return gen.build([ligand], protein).flatten()
def pocket(self): if isfile('%s/%s/%s_pocket.pdb' % (self.home, self.id,self.id)): return toolkit.readfile('pdb', '%s/%s/%s_pocket.pdb' % (self.home, self.id,self.id), lazy=True, opt = self.opt).next() elif self.protein: return self.protein else: return None
def write(self, fmt, filename, csv_filename = None, **kwargs): output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = dict(mol.data) #filter some internal data blacklist_keys = ['OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK'] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if kwargs.has_key('keep_pipe') and kwargs['keep_pipe']: #FIXME destroys data self._pipe = toolkit.readfile(fmt, filename)
def score(self, function, protein, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True if function.lower() == 'rfscore': from .scoring.functions.RFScore import rfscore sf = rfscore.load() sf.set_protein(protein) elif function.lower() == 'nnscore': from .scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) if self.n_cpu != 1: self._pipe = self._pool.imap(_parallel_helper, ((sf, 'predict_ligand', {'ligand': lig}) for lig in self._pipe)) else: self._pipe = sf.predict_ligands(self._pipe)
def ligand(self): if isfile("%s/%s/%s_ligand.mol2" % (self.home, self.id, self.id)): return toolkit.readfile( "mol2", "%s/%s/%s_ligand.mol2" % (self.home, self.id, self.id), lazy=True, opt=self.opt ).next() else: return None
def protein(self): """Read a protein file""" f = os.path.join(self.home, self.dude_id, 'receptor.pdb') if os.path.isfile(f): return next(toolkit.readfile('pdb', f)) else: return None
def protein(self): if isfile("%s/%s/%s_protein.pdb" % (self.home, self.id, self.id)): return toolkit.readfile( "pdb", "%s/%s/%s_protein.pdb" % (self.home, self.id, self.id), lazy=True, opt=self.opt ).next() else: return None
def ligand(self): """Read a ligand file""" f = os.path.join(self.home, self.dude_id, 'crystal_ligand.mol2') if os.path.isfile(f): return next(toolkit.readfile('mol2', f)) else: return None
def dock(self, ligands, protein = None, single = False): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. single: bool (default=False) A flag to indicate single ligand docking (performance reasons (eg. there is no need for subdirectory for one ligand) Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if not self.protein_file: raise IOError("No receptor.") if single: ligands = [ligands] ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt' ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True, opt={'b':None}) try: vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT)) except subprocess.CalledProcessError as e: sys.stderr.write(e.output) raise Exception('Autodock Vina failed. Command: "%s"' % ' '.join(e.cmd)) ### HACK # overcome connectivity problems in obabel source_ligand = toolkit.readfile('pdbqt', ligand_file).next() for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina): ### HACK # copy data from source clone = source_ligand.clone clone.clone_coords(lig) clone.data.update(scores) output_array.append(clone) rmtree(ligand_dir) return output_array
def decoys_docking(self): """Load decoys used for docking from mol2 file as list of ob.Molecule objects""" filepath = '%s/decoys_docking/%s_decoys.mol2' % (self.home, self.pdbid) if isfile(filepath): decoys = list(toolkit.readfile('mol2', filepath)) return decoys return None
def ligand(self): """Load target ligand from mol2 file as ob.Molecule object""" filepath = '%s/coreset/%s/%s_ligand.mol2' % ( self.home, self.pdbid, self.pdbid) if isfile(filepath): ligand = six.next(toolkit.readfile('mol2', filepath)) return ligand return None
def protein(self): """Load target protein from mol2 file as ob.Molecule object""" filepath = '%s/coreset/%s/%s_protein.mol2' % ( self.home, self.pdbid, self.pdbid) if isfile(filepath): protein = six.next(toolkit.readfile('mol2', filepath)) return protein return None
def score(self, function, protein = None, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = six.next(toolkit.readfile(extension, protein)) protein.protein = True # trigger cache protein.atom_dict if type(function) is str: if function.lower().startswith('rfscore'): from oddt.scoring.functions.RFScore import rfscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) elif bit.startswith('v'): new_kwargs['version'] = int(bit.replace('v', '')) sf = rfscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower().startswith('nnscore'): from oddt.scoring.functions.NNScore import nnscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) sf = nnscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower() == 'autodock_vina': from oddt.docking import autodock_vina sf = autodock_vina(protein, *args, **kwargs) sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) else: if hasattr(function, 'set_protein') and hasattr(function, 'predict_ligands') and hasattr(function, 'predict_ligand'): sf = function sf.set_protein(protein) else: raise ValueError('Supplied object "%s" is not an ODDT scoring funtion' % function.__name__) if self.n_cpu != 1: _parallel_helper_partial = partial(_parallel_helper, sf, 'predict_ligand') self._pipe = Pool(self.n_cpu if self.n_cpu > 0 else None).imap(_parallel_helper_partial, ({'ligand': lig} for lig in self._pipe), chunksize=100) else: self._pipe = sf.predict_ligands(self._pipe)
def pocket(self): f = os.path.join(self.home, self.id, '%s_pocket.pdb' % self.id) if os.path.isfile(f): pocket = next(toolkit.readfile('pdb', f, lazy=True, opt=self.opt)) if pocket is not None: pocket.protein = True return pocket else: return None
def ligand(self): if isfile('%s/%s/%s_ligand.mol2' % (self.home, self.id, self.id)): return toolkit.readfile('sdf', '%s/%s/%s_ligand.sdf' % (self.home, self.id, self.id), lazy=True, opt=self.opt).next() else: return None
def set_protein(self, protein): # generate new directory self._tmp_dir = None self.protein = protein if type(protein) is str: extension = protein.split('.')[-1] if extension == 'pdbqt': self.protein_file = protein self.protein = toolkit.readfile(extension, protein).next() else: self.protein = toolkit.readfile(extension, protein).next() self.protein.protein = True self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None,}, overwrite=True) else: # write protein to file self.protein_file = self.tmp_dir + '/protein.pdbqt' self.protein.write('pdbqt', self.protein_file, opt={'r':None,}, overwrite=True)
def set_protein(self, protein): """Change protein to dock to. Parameters ---------- protein: oddt.toolkit.Molecule object Protein object to be used. """ # generate new directory self._tmp_dir = None if protein: self.protein = protein if type(protein) is str: extension = protein.split('.')[-1] if extension == 'pdbqt': self.protein_file = protein self.protein = six.next(toolkit.readfile(extension, protein)) self.protein.protein = True else: self.protein = six.next(toolkit.readfile(extension, protein)) self.protein.protein = True self.protein_file = self.tmp_dir + '/protein.pdbqt' # remove OB 2.3 ROOT/ENDROOT tags with open(self.protein_file, 'w') as f: for line in self.protein.write('pdbqt', opt={'r': None, 'c': None}, overwrite=True).split('\n'): if line in ['ROOT', 'ENDROOT']: continue elif line[:7] == 'TORSDOF': f.write('TER\n') else: f.write(line + '\n') else: # write protein to file self.protein_file = self.tmp_dir + '/protein.pdbqt' # remove OB 2.3 ROOT/ENDROOT tags with open(self.protein_file, 'w') as f: for line in self.protein.write('pdbqt', opt={'r': None, 'c': None}, overwrite=True).split('\n'): if line in ['ROOT', 'ENDROOT']: continue elif line[:7] == 'TORSDOF': f.write('TER\n') else: f.write(line + '\n')
def pocket(self): if isfile('%s/%s/%s_pocket.pdb' % (self.home, self.id, self.id)): return six.next( toolkit.readfile('pdb', '%s/%s/%s_pocket.pdb' % (self.home, self.id, self.id), lazy=True, opt=self.opt)) else: return None
def decoys_screening(self): """Load decoys used for screening from mol2 files as list of ob.Molecule objects""" dirpath = '%s/decoys_screening/%s' % (self.home, self.pdbid) if isdir(dirpath): decoys = [] for file in listdir(dirpath): decoys.append(six.next( toolkit.readfile('mol2', dirpath + '/' + file))) return decoys return None
def dock(self, ligands, protein = None, single = False): """Automated docking procedure. Parameters ---------- ligands: iterable of oddt.toolkit.Molecule objects Ligands to dock protein: oddt.toolkit.Molecule object or None Protein object to be used. If None, then the default one is used, else the protein is new default. single: bool (default=False) A flag to indicate single ligand docking (performance reasons (eg. there is no need for subdirectory for one ligand) Returns ------- ligands : array of oddt.toolkit.Molecule objects Array of ligands (scores are stored in mol.data method) """ if protein: self.set_protein(protein) if single: ligands = [ligands] ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '.pdbqt' ligand_outfile = ligand_dir + '/' + str(n) + '_' + re.sub('[^A-Za-z0-9]+', '_', ligand.title) + '_out.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True) vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT)) ### HACK # overcome connectivity problems in obabel source_ligand = toolkit.readfile('pdbqt', ligand_file).next() for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina): ### HACK # copy data from source clone = source_ligand.clone clone.clone_coords(lig) clone.data.update(scores) output_array.append(clone) rmtree(ligand_dir) return output_array
def read_next_protein(proteins, format, previous, index, keep_hs=False): if previous and index >= len(proteins): return previous protein = next( toolkit.readfile(format, proteins[index], removeHs=not keep_hs)) if not protein: raise ValueError('Unable to read protein') else: utils.log('Read protein', index + 1) protein.protein = True protein.removeh() return protein
def write(self, fmt, filename, csv_filename=None, **kwargs): """Outputs molecules to a file Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a output file csv_filename: string Optional path to a CSV file """ if fmt == 'mol2' and toolkit.backend == 'ob': if 'opt' in kwargs: kwargs['opt']['c'] = None else: kwargs['opt'] = {'c': None} output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = mol.data.to_dict() # filter some internal data blacklist_keys = ['OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK'] for b in blacklist_keys: if b in data: del data[b] if len(data) > 0: data['name'] = mol.title else: print("There is no data to write in CSV file", file=sys.stderr) return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if 'keep_pipe' in kwargs and kwargs['keep_pipe']: if isfile(filename): kwargs.pop('overwrite') # this argument is unsupported in readfile self._pipe = toolkit.readfile(fmt, filename, **kwargs)
def load_ligands(self, file_type, ligands_file): """Loads file with ligands. Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a file, which is loaded to pipeline """ self._pipe = self._ligand_pipe(toolkit.readfile(file_type, ligands_file))
def score(self, function, protein = None, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True # trigger cache protein.atom_dict if type(function) is str: if function.lower().startswith('rfscore'): from oddt.scoring.functions.RFScore import rfscore tmp = function.lower().split('_') v = int(tmp[-1][1:]) if len(tmp) > 1 else 1 sf = rfscore.load(version=v) sf.set_protein(protein) elif function.lower() == 'nnscore': from oddt.scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) elif function.lower() == 'autodock_vina': from oddt.docking import autodock_vina sf = autodock_vina(protein, *args, **kwargs) sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) else: if hasattr(function, 'set_protein') and hasattr(function, 'predict_ligands') and hasattr(function, 'predict_ligand'): sf = function sf.set_protein(protein) else: raise ValueError('Supplied object "%s" is not an ODDT scoring funtion' % function.__name__) if self.n_cpu != 1: _parallel_helper_partial = partial(_parallel_helper, sf, 'predict_ligand') self._pipe = self._pool.imap(_parallel_helper_partial, ({'ligand': lig} for lig in self._pipe), chunksize=100) else: self._pipe = sf.predict_ligands(self._pipe)
def dock(self, ligands, protein = None, single = False): if protein: self.set_protein(protein) if single: ligands = [ligands] ligand_dir = mkdtemp(dir = self.tmp_dir, prefix='ligands_') output_array = [] for n, ligand in enumerate(ligands): # write ligand to file ligand_file = ligand_dir + '/' + str(n) + '_' + ligand.title + '.pdbqt' ligand_outfile = ligand_dir + '/' + str(n) + '_' + ligand.title + '_out.pdbqt' ligand.write('pdbqt', ligand_file, overwrite=True) vina = parse_vina_docking_output(subprocess.check_output([self.executable, '--receptor', self.protein_file, '--ligand', ligand_file, '--out', ligand_outfile] + self.params, stderr=subprocess.STDOUT)) ### HACK # overcome connectivity problems in obabel source_ligand = toolkit.readfile('pdbqt', ligand_file).next() for lig, scores in zip([lig for lig in toolkit.readfile('pdbqt', ligand_outfile, opt={'b': None})], vina): ### HACK # copy data from source clone = source_ligand.clone clone.clone_coords(lig) clone.data.update(scores) output_array.append(clone) rmtree(ligand_dir) return output_array
def load_ligands(self, file_type, ligands_file): """Loads file with ligands. Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a file, which is loaded to pipeline """ self._pipe = self._ligand_pipe( toolkit.readfile(file_type, ligands_file))
def write(self, fmt, filename, csv_filename = None, **kwargs): """Outputs molecules to a file Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a output file csv_filename: string Optional path to a CSV file """ if fmt == 'mol2' and toolkit.backend == 'ob': if 'opt' in kwargs: kwargs['opt']['c'] = None else: kwargs['opt'] = {'c': None} output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = dict(mol.data) #filter some internal data blacklist_keys = ['OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK'] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if kwargs.has_key('keep_pipe') and kwargs['keep_pipe']: if isfile(filename): kwargs.pop('overwrite') # this argument is unsupported in readfile self._pipe = toolkit.readfile(fmt, filename, **kwargs)
def write(self, fmt, filename, csv_filename=None, **kwargs): """Outputs molecules to a file Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a output file csv_filename: string Optional path to a CSV file """ output_mol_file = toolkit.Outputfile(fmt, filename, **kwargs) if csv_filename: f = open(csv_filename, 'w') csv_file = None for mol in self.fetch(): if csv_filename: data = dict(mol.data) #filter some internal data blacklist_keys = [ 'OpenBabel Symmetry Classes', 'MOL Chiral Flag', 'PartialCharges', 'TORSDO', 'REMARK' ] for b in blacklist_keys: if data.has_key(b): del data[b] if len(data) > 0: data['name'] = mol.title else: print "There is no data to write in CSV file" return False if csv_file is None: csv_file = csv.DictWriter(f, data.keys(), **kwargs) csv_file.writeheader() csv_file.writerow(data) # write ligand output_mol_file.write(mol) output_mol_file.close() if csv_filename: f.close() # if kwargs.has_key('keep_pipe') and kwargs['keep_pipe']: #FIXME destroys data self._pipe = toolkit.readfile(fmt, filename)
def load_ligands(self, fmt, ligands_file, *args, **kwargs): """Loads file with ligands. Parameters ---------- file_type: string Type of molecular file ligands_file: string Path to a file, which is loaded to pipeline """ if fmt == 'mol2' and toolkit.backend == 'ob': if 'opt' in kwargs: kwargs['opt']['c'] = None else: kwargs['opt'] = {'c': None} new_pipe = self._ligand_pipe(toolkit.readfile(fmt, ligands_file, *args, **kwargs)) self._pipe = chain(self._pipe, new_pipe) if self._pipe else new_pipe
def score(self, function, protein, *args, **kwargs): if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True if function.lower() == 'rfscore': from .scoring.functions.RFScore import rfscore sf = rfscore.load() sf.set_protein(protein) elif function.lower() == 'nnscore': from .scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) if self.n_cpu != 1: self._pipe = self._pool.imap(_parallel_helper, ((sf, 'predict_ligand', {'ligand': lig}) for lig in self._pipe)) else: self._pipe = sf.predict_ligands(self._pipe)
def score(self, function, protein, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = toolkit.readfile(extension, protein).next() protein.protein = True if function.lower() == 'rfscore': from .scoring.functions.RFScore import rfscore sf = rfscore.load() sf.set_protein(protein) elif function.lower() == 'nnscore': from .scoring.functions.NNScore import nnscore sf = nnscore.load() sf.set_protein(protein) else: raise ValueError( 'Scoring Function %s was not implemented in ODDT' % function) if self.n_cpu != 1: self._pipe = self._pool.imap(_parallel_helper, ((sf, 'predict_ligand', { 'ligand': lig }) for lig in self._pipe)) else: self._pipe = sf.predict_ligands(self._pipe)
def __init__(self, protein=None, size=(10,10,10), center=(0,0,0), auto_ligand=None, exhaustivness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True): self.dir = prefix_dir self._tmp_dir = None # define binding site self.size = size self.center = center # center automaticaly on ligand if auto_ligand: if type(auto_ligand) is str: extension = auto_ligand.split('.')[-1] auto_ligand = toolkit.readfile(extension, auto_ligand).next() self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float16).mean(axis=0)) # autodetect Vina executable if not executable: self.executable = subprocess.check_output(['which', 'vina']).split('\n')[0] else: self.executable = executable # detect version self.version = subprocess.check_output([self.executable, '--version']).split(' ')[2] self.autocleanup = autocleanup self.cleanup_dirs = set() # share protein to class if protein: self.set_protein(protein) #pregenerate common Vina parameters self.params = [] self.params = self.params + ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])] self.params = self.params + ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])] self.params = self.params + ['--cpu', str(n_cpu)] self.params = self.params + ['--exhaustiveness', str(exhaustivness)] if not seed is None: self.params = self.params + ['--seed', str(seed)] self.params = self.params + ['--num_modes', str(num_modes)] self.params = self.params + ['--energy_range', str(energy_range)]
def ligand(self): f = os.path.join(self.home, self.id, '%s_ligand.sdf' % self.id) if os.path.isfile(f): return next(toolkit.readfile('sdf', f, lazy=True, opt=self.opt)) else: return None
def ligand(self): if isfile('%s/%s/%s_ligand.mol2' % (self.home, self.id,self.id)): return toolkit.readfile('sdf', '%s/%s/%s_ligand.sdf' % (self.home, self.id,self.id), lazy=True, opt = self.opt).next() else: return None
def __init__(self, protein=None, auto_ligand=None, size=(20, 20, 20), center=(0, 0, 0), exhaustiveness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True, skip_bad_mols=True): """Autodock Vina docking engine, which extends it's capabilities: automatic box (auto-centering on ligand). Parameters ---------- protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. auto_ligand: oddt.toolkit.Molecule object or string (default=None) Ligand use to center the docking box. Either ODDT molecule or a file (opened based on extesion and read to ODDT molecule). Box is centered on geometric center of molecule. size: tuple, shape=[3] (default=(20, 20, 20)) Dimentions of docking box (in Angstroms) center: tuple, shape=[3] (default=(0,0,0)) The center of docking box in cartesian space. exhaustiveness: int (default=8) Exhaustiveness parameter of Autodock Vina num_modes: int (default=9) Number of conformations generated by Autodock Vina. The maximum number of docked poses is 9 (due to Autodock Vina limitation). energy_range: int (default=3) Energy range cutoff for Autodock Vina seed: int or None (default=None) Random seed for Autodock Vina prefix_dir: string (default=/tmp) Temporary directory for Autodock Vina files executable: string or None (default=None) Autodock Vina executable location in the system. It's realy necessary if autodetection fails. autocleanup: bool (default=True) Should the docking engine clean up after execution? skip_bad_mols: bool (default=True) Should molecules that crash Autodock Vina be skipped. """ self.dir = prefix_dir self._tmp_dir = None # define binding site self.size = size self.center = center # center automaticaly on ligand if auto_ligand: if type(auto_ligand) is str: extension = auto_ligand.split('.')[-1] auto_ligand = six.next(toolkit.readfile(extension, auto_ligand)) self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float32).mean(axis=0)) # autodetect Vina executable if not executable: try: self.executable = (subprocess.check_output(['which', 'vina']) .decode('ascii').split('\n')[0]) except subprocess.CalledProcessError: raise Exception('Could not find Autodock Vina binary.' 'You have to install it globaly or supply binary' 'full directory via `executable` parameter.') else: self.executable = executable # detect version self.version = (subprocess.check_output([self.executable, '--version']) .decode('ascii').split(' ')[2]) self.autocleanup = autocleanup self.cleanup_dirs = set() # share protein to class self.protein = None self.protein_file = None if protein: self.set_protein(protein) self.skip_bad_mols = skip_bad_mols # pregenerate common Vina parameters self.params = [] self.params += ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])] self.params += ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])] if n_cpu > 0: self.params += ['--cpu', str(n_cpu)] self.params += ['--exhaustiveness', str(exhaustiveness)] if seed is not None: self.params += ['--seed', str(seed)] if num_modes > 9 or num_modes < 1: raise ValueError('The number of docked poses must be between 1 and 9' ' (due to Autodock Vina limitation).') self.params += ['--num_modes', str(num_modes)] self.params += ['--energy_range', str(energy_range)]
def pocket(self): f = os.path.join(self.home, self.id, '%s_pocket.pdb' % self.id) if os.path.isfile(f): return next(toolkit.readfile('pdb', f, lazy=True, opt=self.opt)) else: return None
def load_ligands(self, file_type, ligands_file): self._pipe = self._ligand_pipe(toolkit.readfile(file_type, ligands_file))
def __init__(self, protein=None, auto_ligand=None, size=(10,10,10), center=(0,0,0), exhaustiveness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True): """Autodock Vina docking engine, which extends it's capabilities: automatic box (autocentering on ligand). Parameters ---------- protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. auto_ligand: oddt.toolkit.Molecule object or string (default=None) Ligand use to center the docking box. Either ODDT molecule or a file (opened based on extesion and read to ODDT molecule). Box is centered on geometric center of molecule. size: tuple, shape=[3] (default=(10,10,10)) Dimentions of docking box (in Angstroms) center: tuple, shape=[3] (default=(0,0,0)) The center of docking box in cartesian space. exhaustiveness: int (default=8) Exhaustiveness parameter of Autodock Vina num_modes: int (default=9) Number of conformations generated by Autodock Vina energy_range: int (default=3) Energy range cutoff for Autodock Vina seed: int or None (default=None) Random seed for Autodock Vina prefix_dir: string (default=/tmp) Temporary directory for Autodock Vina files executable: string or None (default=None) Autodock Vina executable location in the system. It's realy necessary if autodetection fails. autocleanup: bool (default=True) Should the docking engine clean up after execution? """ self.dir = prefix_dir self._tmp_dir = None # define binding site self.size = size self.center = center # center automaticaly on ligand if auto_ligand: if type(auto_ligand) is str: extension = auto_ligand.split('.')[-1] auto_ligand = toolkit.readfile(extension, auto_ligand).next() self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float32).mean(axis=0)) # autodetect Vina executable if not executable: try: self.executable = subprocess.check_output(['which', 'vina']).split('\n')[0] except subprocess.CalledProcessError: raise Exception('Could not find Autodock Vina binary. You have to install it globaly or supply binary full directory via `executable` parameter.') else: self.executable = executable # detect version self.version = subprocess.check_output([self.executable, '--version']).split(' ')[2] self.autocleanup = autocleanup self.cleanup_dirs = set() # share protein to class self.protein = None self.protein_file = None if protein: self.set_protein(protein) #pregenerate common Vina parameters self.params = [] self.params += ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])] self.params += ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])] if n_cpu > 0: self.params += ['--cpu', str(n_cpu)] self.params += ['--exhaustiveness', str(exhaustiveness)] if not seed is None: self.params += ['--seed', str(seed)] self.params += ['--num_modes', str(num_modes)] self.params += ['--energy_range', str(energy_range)]
def __init__(self, protein=None, auto_ligand=None, size=(10,10,10), center=(0,0,0), exhaustivness=8, num_modes=9, energy_range=3, seed=None, prefix_dir='/tmp', n_cpu=1, executable=None, autocleanup=True): """Autodock Vina docking engine, which extends it's capabilities: automatic box (autocentering on ligand). Parameters ---------- protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. auto_ligand: oddt.toolkit.Molecule object or string (default=None) Ligand use to center the docking box. Either ODDT molecule or a file (opened based on extesion and read to ODDT molecule). Box is centered on geometric center of molecule. size: tuple, shape=[3] (default=(10,10,10)) Dimentions of docking box (in Angstroms) center: tuple, shape=[3] (default=(0,0,0)) The center of docking box in cartesian space. exhaustiveness: int (default=8) Exhaustiveness parameter of Autodock Vina num_modes: int (default=9) Number of conformations generated by Autodock Vina energy_range: int (default=3) Energy range cutoff for Autodock Vina seed: int or None (default=None) Random seed for Autodock Vina prefix_dir: string (default=/tmp) Temporary directory for Autodock Vina files executable: string or None (default=None) Autodock Vina executable location in the system. It's realy necessary if autodetection fails. autocleanup: bool (default=True) Should the docking engine clean up after execution? """ self.dir = prefix_dir self._tmp_dir = None # define binding site self.size = size self.center = center # center automaticaly on ligand if auto_ligand: if type(auto_ligand) is str: extension = auto_ligand.split('.')[-1] auto_ligand = toolkit.readfile(extension, auto_ligand).next() self.center = tuple(np.array([atom.coords for atom in auto_ligand], dtype=np.float16).mean(axis=0)) # autodetect Vina executable if not executable: self.executable = subprocess.check_output(['which', 'vina']).split('\n')[0] else: self.executable = executable # detect version self.version = subprocess.check_output([self.executable, '--version']).split(' ')[2] self.autocleanup = autocleanup self.cleanup_dirs = set() # share protein to class if protein: self.set_protein(protein) #pregenerate common Vina parameters self.params = [] self.params = self.params + ['--center_x', str(self.center[0]), '--center_y', str(self.center[1]), '--center_z', str(self.center[2])] self.params = self.params + ['--size_x', str(self.size[0]), '--size_y', str(self.size[1]), '--size_z', str(self.size[2])] self.params = self.params + ['--cpu', str(n_cpu)] self.params = self.params + ['--exhaustiveness', str(exhaustivness)] if not seed is None: self.params = self.params + ['--seed', str(seed)] self.params = self.params + ['--num_modes', str(num_modes)] self.params = self.params + ['--energy_range', str(energy_range)]
def process(protein_files, ligands, writer, key_inters, protein_format=None, filter_strict=False, exact_protein=False, exact_ligand=False, keep_hs_protein=False, keep_hs_ligand=False, report_file=None, compare_file=None, nnscores=None, rfscores=None, plecscores=None): pformat = determine_protein_format(protein_files[0], protein_format) utils.log('Protein format:', pformat) utils.log(len(protein_files), 'proteins specified') ligands = toolkit.readfile('sdf', ligands, removeHs=not keep_hs_ligand) if report_file: report_data = [] else: report_data = None if compare_file: with open(compare_file, "r") as f: txt = f.read() compare_data = interactions.from_json(txt) else: compare_data = None if nnscores: print('Initialising NNSCORE') init_nnscore(nnscores) if rfscores: print('Initialising RFSCORE') init_rfscore(rfscores) total = 0 count = 0 errors = 0 protein = None for ligand in ligands: # print('Processing ligand', total + 1) try: protein = read_next_protein(protein_files, pformat, protein, total, keep_hs=keep_hs_protein) if nnscores: calc_nnscore(protein, ligand) if rfscores: calc_rfscore(protein, ligand) inter_data = process_mol(protein, ligand, key_inters, total, filter_strict=filter_strict, exact_protein=exact_protein, exact_ligand=exact_ligand, compare_data=compare_data) if report_data is not None: report_data.append(inter_data) # write the RDKit mol writer.write(ligand.Mol) count += 1 except: errors += 1 traceback.print_exc() finally: total += 1 # print(json.dumps(report_data, cls=interactions.InteractionEncoder)) if report_data: with open(report_file, 'w') as report: json.dump(report_data, report, cls=interactions.InteractionEncoder) return count, errors