def load(self, filename = ''): if not filename: for f in ['RFScore_v%i.pickle' % self.version, dirname(__file__) + '/RFScore_v%i.pickle' % self.version]: if isfile(f): filename = f break # if still no pickle found - train function from pregenerated descriptors if not filename: print "No pickle, training new scoring function." rf = rfscore() filename = rf.train() return scorer.load(filename)
def load(self, filename=None, pdbbind_version=2016): if filename is None: fname = 'NNScore_pdbbind%i.pickle' % (pdbbind_version) for f in [fname, path_join(dirname(__file__), fname)]: if isfile(f): filename = f break else: print('No pickle, training new scoring function.', file=sys.stderr) nn = nnscore() filename = nn.train(pdbbind_version=pdbbind_version) return scorer.load(filename)
def load(self, filename = ''): if not filename: for f in ['NNScore.pickle', dirname(__file__) + '/NNScore.pickle']: if isfile(f): filename = f break # if still no pickle found - train function from pregenerated descriptors if not filename: print "No pickle, training new scoring function." nn = nnscore() filename = nn.train() return scorer.load(filename)
def load(self, filename='', version=1, pdbbind_version=2016): if not filename: for f in ['RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version), dirname(__file__) + '/RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version)]: if isfile(f): filename = f break else: print("No pickle, training new scoring function.", file=sys.stderr) rf = rfscore(version=version) filename = rf.train(sf_pickle=filename, pdbbind_version=pdbbind_version) return scorer.load(filename)
def load(self, filename=None, version=1, pdbbind_version=2016): if filename is None: fname = 'RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version) for f in [fname, path_join(dirname(__file__), fname)]: if isfile(f): filename = f break else: print('No pickle, training new scoring function.', file=sys.stderr) rf = rfscore(version=version) filename = rf.train(sf_pickle=filename, pdbbind_version=pdbbind_version) return scorer.load(filename)
def load(self, filename='', version=1): if not filename: for f in [ 'RFScore_v%i.pickle' % version, dirname(__file__) + '/RFScore_v%i.pickle' % version ]: if isfile(f): filename = f break # if still no pickle found - train function from pregenerated descriptors if not filename: print "No pickle, training new scoring function." rf = rfscore(version=version) filename = rf.train(sf_pickle=filename) return scorer.load(filename)
def load(self, filename='', pdbbind_version=2016): if not filename: for f in [ 'NNScore_pdbbind%i.pickle' % (pdbbind_version), dirname(__file__) + '/NNScore_pdbbind%i.pickle' % (pdbbind_version) ]: if isfile(f): filename = f break else: print("No pickle, training new scoring function.", file=sys.stderr) nn = nnscore() filename = nn.train(pdbbind_version=pdbbind_version) return scorer.load(filename)
def load(self, filename=None, version='linear', pdbbind_version=2016, depth_protein=5, depth_ligand=1, size=65536): if filename is None: # FIXME: it would be cool to have templates of names for a class fname = ('PLEC%s_p%i_l%i_pdbbind%i_s%i.pickle' % (version, depth_protein, depth_ligand, pdbbind_version, size)) for f in [fname, path_join(dirname(__file__), fname)]: if isfile(f): filename = f break else: print('No pickle, training new scoring function.', file=sys.stderr) sf = PLECscore(version=version) filename = sf.train(sf_pickle=filename, pdbbind_version=pdbbind_version) return scorer.load(filename)
def test_vs_scoring(): protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) protein.protein = True data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016) pdbbind_dir = os.path.join(data_dir, 'pdbbind') for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if not os.path.isdir(version_dir): os.symlink(pdbbind_dir, version_dir) filenames = [] # train mocked SFs for model in [nnscore(n_jobs=1) ] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]: model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, home_dir=home_dir) filenames.append(model.train(home_dir=home_dir)) vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) # error if no protein is fed with pytest.raises(ValueError): vs.score('nnscore') # bad sf name with pytest.raises(ValueError): vs.score('bad_sf', protein=protein) vs.score('nnscore', protein=xiap_protein) vs.score('nnscore_pdbbind2016', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v1_pdbbind2016', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) # use pickle directly vs.score(filenames[0], protein=protein) # pass SF object directly vs.score(scorer.load(filenames[0]), protein=protein) # pass wrong object (sum is not an instance of scorer) with pytest.raises(ValueError): vs.score(sum, protein=protein) mols = list(vs.fetch()) assert len(mols) == 100 mol_data = mols[0].data assert 'nnscore' in mol_data assert 'rfscore_v1' in mol_data assert 'rfscore_v2' in mol_data assert 'rfscore_v3' in mol_data vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) vs.score('nnscore', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) with NamedTemporaryFile('w', suffix='.sdf') as molfile: with NamedTemporaryFile('w', suffix='.csv') as csvfile: vs.write('sdf', molfile.name, csv_filename=csvfile.name) data = pd.read_csv(csvfile.name) assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns mols = list(oddt.toolkit.readfile('sdf', molfile.name)) assert len(mols) == 100 vs.write_csv( csvfile.name, fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3']) data = pd.read_csv(csvfile.name) assert len(data.columns) == 4 assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns # remove files for f in filenames: os.unlink(f) # remove symlinks for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if os.path.islink(version_dir): os.unlink(version_dir)
def score(self, function, protein=None, *args, **kwargs): """Scoring procedure compatible with any scoring function implemented in ODDT and other pickled SFs which are subclasses of `oddt.scoring.scorer`. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Notes ----- Additional parameters are passed directly to the scoring function. """ if isinstance(protein, six.string_types): extension = protein.split('.')[-1] protein = next(oddt.toolkit.readfile(extension, protein)) protein.protein = True elif protein is None: raise ValueError('Protein needs to be set for structure based ' 'scoring') # trigger cache protein.atom_dict if isinstance(function, six.string_types): if isfile(function): sf = scorer.load(function) sf.set_protein(protein) elif function.lower().startswith('rfscore'): from oddt.scoring.functions.RFScore import rfscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) elif bit.startswith('v'): new_kwargs['version'] = int(bit.replace('v', '')) sf = rfscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower().startswith('nnscore'): from oddt.scoring.functions.NNScore import nnscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) sf = nnscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower().startswith('plec'): from oddt.scoring.functions.PLECscore import PLECscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) elif bit.startswith('plec'): new_kwargs['version'] = bit.replace('plec', '') elif bit.startswith('p'): new_kwargs['depth_protein'] = int(bit.replace('p', '')) elif bit.startswith('l'): new_kwargs['depth_ligand'] = int(bit.replace('l', '')) elif bit.startswith('s'): new_kwargs['size'] = int(bit.replace('s', '')) sf = PLECscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower() == 'autodock_vina': from oddt.docking import autodock_vina sf = autodock_vina(protein, *args, **kwargs) sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ' 'ODDT' % function) else: if isinstance(function, scorer): sf = function sf.set_protein(protein) else: raise ValueError('Supplied object "%s" is not an ODDT scoring ' 'funtion' % function.__name__) self._pipe.append(partial(method_caller, sf, 'predict_ligands'))
def score(self, function, protein=None, *args, **kwargs): """Scoring procedure. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if type(protein) is str: extension = protein.split('.')[-1] protein = six.next(toolkit.readfile(extension, protein)) protein.protein = True # trigger cache protein.atom_dict if type(function) is str: if function.lower().startswith('rfscore'): from oddt.scoring.functions.RFScore import rfscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) elif bit.startswith('v'): new_kwargs['version'] = int(bit.replace('v', '')) sf = rfscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower().startswith('nnscore'): from oddt.scoring.functions.NNScore import nnscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) sf = nnscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower() == 'autodock_vina': from oddt.docking import autodock_vina sf = autodock_vina(protein, *args, **kwargs) sf.set_protein(protein) elif isfile(function): sf = scorer.load(function) sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ODDT' % function) else: if isinstance(function, scorer): sf = function sf.set_protein(protein) else: raise ValueError('Supplied object "%s" is not an ODDT scoring funtion' % function.__name__) if self.n_cpu != 1: _parallel_helper_partial = partial(_parallel_helper, sf, 'predict_ligand') self._pipe = (Pool(self.n_cpu if self.n_cpu > 0 else None) .imap(_parallel_helper_partial, ({'ligand': lig} for lig in self._pipe), chunksize=100)) else: self._pipe = sf.predict_ligands(self._pipe)
def score(self, function, protein=None, *args, **kwargs): """Scoring procedure compatible with any scoring function implemented in ODDT and other pickled SFs which are subclasses of `oddt.scoring.scorer`. Parameters ---------- function: string Which scoring function to use. protein: oddt.toolkit.Molecule Default protein to use as reference Note ---- Additional parameters are passed directly to the scoring function. """ if isinstance(protein, six.string_types): extension = protein.split('.')[-1] protein = next(oddt.toolkit.readfile(extension, protein)) protein.protein = True elif protein is None: raise ValueError('Protein needs to be set for structure based ' 'scoring') # trigger cache protein.atom_dict if isinstance(function, six.string_types): if isfile(function): sf = scorer.load(function) sf.set_protein(protein) elif function.lower().startswith('rfscore'): from oddt.scoring.functions.RFScore import rfscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) elif bit.startswith('v'): new_kwargs['version'] = int(bit.replace('v', '')) sf = rfscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower().startswith('nnscore'): from oddt.scoring.functions.NNScore import nnscore new_kwargs = {} for bit in function.lower().split('_'): if bit.startswith('pdbbind'): new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', '')) sf = nnscore.load(**new_kwargs) sf.set_protein(protein) elif function.lower() == 'autodock_vina': from oddt.docking import autodock_vina sf = autodock_vina(protein, *args, **kwargs) sf.set_protein(protein) else: raise ValueError('Scoring Function %s was not implemented in ' 'ODDT' % function) else: if isinstance(function, scorer): sf = function sf.set_protein(protein) else: raise ValueError('Supplied object "%s" is not an ODDT scoring ' 'funtion' % function.__name__) self._pipe.append(partial(method_caller, sf, 'predict_ligands'))
def test_vs_scoring(): protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) protein.protein = True data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016) pdbbind_dir = os.path.join(data_dir, 'pdbbind') for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if not os.path.isdir(version_dir): os.symlink(pdbbind_dir, version_dir) filenames = [] # train mocked SFs for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]: model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, home_dir=home_dir) filenames.append(model.train(home_dir=home_dir)) vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) # error if no protein is fed with pytest.raises(ValueError): vs.score('nnscore') # bad sf name with pytest.raises(ValueError): vs.score('bad_sf', protein=protein) vs.score('nnscore', protein=xiap_protein) vs.score('nnscore_pdbbind2016', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v1_pdbbind2016', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) vs.score('pleclinear', protein=protein) vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein) # use pickle directly vs.score(filenames[0], protein=protein) # pass SF object directly vs.score(scorer.load(filenames[0]), protein=protein) # pass wrong object (sum is not an instance of scorer) with pytest.raises(ValueError): vs.score(sum, protein=protein) mols = list(vs.fetch()) assert len(mols) == 100 mol_data = mols[0].data assert 'nnscore' in mol_data assert 'rfscore_v1' in mol_data assert 'rfscore_v2' in mol_data assert 'rfscore_v3' in mol_data assert 'PLEClinear_p5_l1_s65536' in mol_data vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) vs.score('nnscore', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) with NamedTemporaryFile('w', suffix='.sdf') as molfile: with NamedTemporaryFile('w', suffix='.csv') as csvfile: vs.write('sdf', molfile.name, csv_filename=csvfile.name) data = pd.read_csv(csvfile.name) assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns mols = list(oddt.toolkit.readfile('sdf', molfile.name)) assert len(mols) == 100 vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3']) data = pd.read_csv(csvfile.name) assert len(data.columns) == 4 assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns # remove files for f in filenames: os.unlink(f) # remove symlinks for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if os.path.islink(version_dir): os.unlink(version_dir)