def run(self): h = Runner() # hotspot calculation settings s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.nrotations = 3000 s.sphere_maps = True hr = h.from_pdb(pdb_code=self.pdb, charged_probes=False, buriedness_method='ghecom', nprocesses=3, settings=s, cavities=None) out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(hr)
def run(self): # create pharmacophore ref = PharmacophoreModel.from_pdb(pdb_code=self.pdb, chain=self.chain, representatives=self.input().path, identifier=self.pdb) ref.rank_features(max_features=6, feature_threshold=5) # write pymol file ref.write(self.output()["pymol"].path) # write Results file temp = tempfile.mkdtemp() PDBResult(self.pdb).download(temp) result = Results(protein=Protein.from_file( os.path.join(temp, "{}.pdb".format(self.pdb))), super_grids=ref.dic) out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output()["grids"].path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(result) # write aligned molecules with MoleculeWriter(self.output()['aligned_mols'].path) as w: for l in ref.aligned_ligands: w.write(l) # points points = ref._comparision_dict() with open(self.output()['points'].path, 'wb') as w: pickle.dump(points, w)
def test_write_fake_multi(self): a = self.generate_fake(buriedness=True, superstar=True) b = self.generate_fake(buriedness=True, superstar=True) settings = HotspotWriter.Settings() settings.output_superstar = True with HotspotWriter("testdata/hs_io/minimal_multi_all_grids", settings=settings) as w: w.write([a, b])
def run(self): prot = Protein.from_file(self.input().path) mol = io.MoleculeReader('ligands/{}.sdf'.format(self.pdb))[0] h = Runner() s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.sphere_maps = True s.nrotations = 3000 hr = h.from_protein(prot, buriedness_method='ghecom', nprocesses=1, settings=s, cavities=mol) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(hr)
def test_generate_real(self): runner = Runner() hr = runner.from_pdb(pdb_code="2vta", buriedness_method='ghecom') settings = HotspotWriter.Settings() settings.output_superstar = True parent = "testdata/2vta" with HotspotWriter(parent) as w: w.write(hr)
def run(self): hr = HotspotReader(self.input().path).read() bcv = hr.tractability_map(volume=self.volume) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(bcv)
def _get_bcv(self, cav_id, other_id, lig_id): """ generate a BCV for each cavity, and each required volume :param cav_id: :return: """ # inputs hr = HotspotReader(path=os.path.join(self.hotspot[cav_id], "out.zip")).read() with open(self.ligand_volume[other_id][lig_id], 'r') as f: target_volume = f.read() # task start = time.time() extractor = Extractor(hr) bcv = extractor.extract_volume(volume=int(float(target_volume))) finish = time.time() # output out = self.bcv[cav_id][other_id][lig_id] create_directory(os.path.dirname(out)) create_directory(out) with HotspotWriter(path=out, grid_extension=".grd", zip_results=True) as writer: writer.write(bcv) with open(self.bcv_time[cav_id][other_id][lig_id], 'w') as t: t.write(str(finish - start)) with open(self.bcv_threshold[cav_id][other_id][lig_id], 'w') as s: s.write(str(bcv.step_threshold))
def to_grid(target, pdb): out_dir = "Z:/patel_set/{}/{}".format(target, pdb) mols = MoleculeReader( join(out_dir, "reference_pharmacophore", "aligned_mols.mol2")) p = PharmacophoreModel.from_ligands(ligands=mols, identifier="test") result = Results(super_grids=p.dic, protein=Protein.from_file( join(out_dir, "hs", "{}.pdb".format(pdb)))) out = Helper.get_out_dir(join(out_dir, "reference_pharmacophore", "grids")) settings = HotspotWriter.Settings() settings.isosurface_threshold = [2, 5, 10] with HotspotWriter(path=out, zip_results=True, settings=settings) as w: w.write(result)
def main(): """ main :return: """ base = "/local/pcurran/GOLD" pdbs = set(pd.read_csv(os.path.join(base, "targets.csv"))['PDB']) tmp = tempfile.mkdtemp() pid = [] times = [] for pdb in pdbs: prot = get_protein(pdb, base, tmp) hs, time = get_hotspot(prot) pid.append(pdb) times.append(time) with HotspotWriter(os.path.join(base, pdb), zip_results=True) as w: w.write(hs) df = pd.DataFrame({'PDB': pid, 'Time': times}) df.to_csv(os.path.join(base, "run_stats.csv")) timer.report()
def run(self): base = "/local/pcurran/leads_frag" pdbs = [ p for p in os.listdir(base) if os.path.isdir(os.path.join(base, p)) ] fails = [] for pdb in tqdm(pdbs): try: hotspot_path = os.path.join(os.path.join(base, pdb, "hotspot")) masked_path = os.path.join(hotspot_path, "masked_hotspot") print(pdb) if not os.path.exists(masked_path): masked = masked_hotspot(base, pdb, hotspot_path) with HotspotWriter(masked_path) as w: w.write(masked) fp_scheme(fpath=masked_path, percentile=float(self.args.percentile), low=float(self.args.low), high=float(self.args.high), id=self.args.id) except: print(f"{pdb} FAILED") fails.append(pdb)
def hot_calc(inputs): pdb, het, pdir = inputs p = Protein.from_file(os.path.join(pdir, f"{pdb}.pdb")) mol = MoleculeReader(os.path.join(pdir, f"{pdb}_{het}.mol2"))[0] runner = Runner() hr = runner.from_protein(p, nprocesses=3, cavities=mol) for p, g in hr.super_grids.items(): hr.super_grids[p] = g.max_value_of_neighbours() # with HotspotReader(os.path.join(pdir, "out.zip")) as r: # hr = [h for h in r.read() if h.identifier == "hotspot"][0] e = Extractor(hr) bv = e.extract_volume(volume=250) # smoothing for p, g in bv.super_grids.items(): bv.super_grids[p] = g.gaussian(sigma=0.5) bv.identifier = "bestvol" hr.identifier = "hotspot" with HotspotWriter(pdir) as w: w.write([hr, bv])
def run(self, cavity=True): """from fragment hotspot calc from protein""" h = Runner() settings = Runner.Settings(sphere_maps=False) if self.args.prepare is True: self.prepare_protein() else: self.prot = Protein.from_file(self.args.prot_fname) if cavity is True: cavs = Cavity.from_pdb_file(self.args.prot_fname) print(cavs) else: cavs = None result = h.from_protein(protein=self.prot, charged_probes=False, buriedness_method=self.args.buriedness_method, cavities=cavs, nprocesses=5, settings=settings) with HotspotWriter(path=self.in_dir, zip_results=self.args.zipped) as writer: writer.write(result)
def test_write_pymol_isosurfaces(self): # test out.zip prepared, generate minimal pymol commands to test isosurface gen code settings = HotspotWriter.Settings() writer = HotspotWriter("testdata/hs_io/minimal_all_grids", settings=settings) # we won't actually write # pymol file initialised in the writer init function, therefore the unzip code is already in place writer.pymol_out.commands += writer._write_pymol_isosurfaces({"apolar": None, "donor": None, "acceptor": None}, "hotspot", "hotspot", "fhm") writer.pymol_out.commands += writer._write_pymol_isosurfaces({"apolar": None, "donor": None, "acceptor": None}, "hotspot", "hotspot", "superstar") writer.pymol_out.write("testdata/hs_io/minimal_all_grids/test_write_pymol_isosurfaces.py")
def test_write_real_single(self): base = "testdata/1hcl" interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} buriedness = Grid.from_file(os.path.join(base, "buriedness.grd")) prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids) settings = HotspotWriter.Settings() settings.output_superstar = True with HotspotWriter("testdata/hs_io/minimal_all_grids_real", settings=settings) as w: w.write(hr)
def testconstruction(self): extractor = Extractor(self.result) # extractor.single_grid.write(os.path.join(self.out, "2vta_single_grid.grd")) hr = extractor.extract_volume() with HotspotWriter(self.bin) as w: w.write(hr)
def run(self): hs = HotspotReader(self.input().path).read() settings = Extractor.Settings() settings.cutoff = 12 settings.mvon = False extractor = Extractor(hs, settings) best = extractor.extract_best_volume(volume=100)[0] out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(best)
def shrink_hotspots(self, hotspot_paths, padding=2.0): """ Takes in the calculated hotspots on the aligned ensemble. Crops and saves only the area around the reference binding site. Results are stored in the same parent directory as the fullsized hotspots, in dir called "binding_site_maps" :param list hotspot_paths: Paths to the hotspot results we would like to shrink. :param float padding: How many angstroms away from furthest binding site atom to look. :return: list of the paths for all shrunk hotspots in the ensemble. """ # Get the area to truncate around the binding site: print("Shrinking hotspots for ensemble...") if not self.reference_binding_site: self.reference_binding_site = self.get_binding_site( self.reference_ID) # Find the maximum and minimum coordinates of the reference binding site dims = np.array([ a.coordinates for r in self.reference_binding_site.residues for a in r.atoms ]) min_coords = np.array( [np.min(dims[:, 0]), np.min(dims[:, 1]), np.min(dims[:, 2])]) max_coords = np.array( [np.max(dims[:, 0]), np.max(dims[:, 1]), np.max(dims[:, 2])]) # Add some padding in both directions: min_coords -= padding max_coords += padding h_out_dir_list = [] for p in hotspot_paths: # Read in hotspot result h_result = HotspotReader(p).read() # Shrink the grids for each probe type for probe, g in h_result.super_grids.items(): h_result.super_grids[probe] = self.shrink_to_binding_site( g, min_coords, max_coords) res_path = dirname(p) # Save shrunk hotspot, assuming the directory it was previously in was named sensibly. h_out_dir = join(res_path, "binding_site_maps") h_out_dir_list.append(join(h_out_dir, "out")) with HotspotWriter(h_out_dir, visualisation="pymol", grid_extension=".ccp4", zip_results=False) as writer: writer.write(h_result) return h_out_dir_list
def run(self): """from fragment hotspot calc from protein""" h = Runner() settings = Runner.Settings(sphere_maps=False) result = h.from_pdb(pdb_code=self.args.pdb, charged_probes=True, buriedness_method=self.args.buriedness_method, nprocesses=5, settings=settings) with HotspotWriter(path=self.args.out_dir, zip_results=self.args.zipped) as writer: writer.write(result)
def shrink_hotspot_maps(hs_result_paths, ligands, padding=4.0): """ Given the list of ligands in the ensemble and some hotspot maps, will shrink all the maps in the :param hs_result_paths: a list of Paths to precalculated hotspot results. Should be all for the same target (or actually, targets that we are looking to compare. :param ligands: a list of ccdc molecules corersponding to the ensmeble ligands. Needed to define the binding site of interest. :return: a list of *shrunk* hotspot results """ # Find the largest ligand and use it to define the binding site mws = [l.molecular_weight for l in ligands] biggest_lig = ligands[mws.index(max(mws))] # Get the dimensions in space of the largest ligand dims = np.array([a.coordinates for a in biggest_lig.atoms]) min_coords = np.array( [np.min(dims[:, 0]), np.min(dims[:, 1]), np.min(dims[:, 2])]) max_coords = np.array( [np.max(dims[:, 0]), np.max(dims[:, 1]), np.max(dims[:, 2])]) # Add some padding in both directions: min_coords -= padding max_coords += padding # Now shrink all the hotspot grids to the min and max dimensions shrunk_hs_results = [] for hpath in hs_result_paths: hs_res = HotspotReader(str(hpath.resolve())).read() probes = hs_res.super_grids.keys() # now to shrink the grids for each probe for p in probes: hs_res.super_grids[p] = EnsembleResult.shrink_to_binding_site( in_grid=hs_res.super_grids[p], new_origin=min_coords, new_far_corner=max_coords) shrunk_hs_results.append(hs_res) h_out_dir = Path(hpath.parent, 'binding_site_maps') if not h_out_dir.exists(): h_out_dir.mkdir() with HotspotWriter(str(h_out_dir.resolve()), visualisation="pymol", grid_extension=".ccp4", zip_results=False) as writer: writer.write(hs_res) return shrunk_hs_results
def run(self): prot = Protein.from_file(self.input().path) cavs = Cavity.from_pdb_file(self.input().path) h = Runner() s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.nrotations = 1000 hr = h.from_protein(prot, buriedness_method='ghecom', nprocesses=1, settings=s, cavities=cavs) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(hr)
def calc(args): prot_file, hotspot_file = args prot = Protein.from_file(prot_file) # pre prepared runner = Runner() settings = Runner.Settings() settings.apolar_translation_threshold = 8 settings.polar_translation_threshold = 10 # pdb = os.path.basename(prot_file)[0][:4] # # mol_path = os.path.join(os.path.dirname(prot_file)) hr = runner.from_protein(prot, nprocesses=3, settings=settings, probe_size=3) for p, g in hr.super_grids.items(): hr.super_grids[p] = g.dilate_by_atom() try: e = Extractor(hr) bv = e.extract_volume(volume=250) except: bv = Results( protein=hr.protein.copy(), super_grids={p: g.copy() for p, g in hr.super_grids.items()}) hr.identifier = "hotspot" bv.identifier = "bcv" with HotspotWriter(hotspot_file) as w: w.write([hr, bv])
def _get_hotspot(self, cav_id): """ calculate hotspot map from pre-calculated superstar and buriedness grids :param cav_id: :return: """ # inputs prot = Protein.from_file(self.apo_prep) sr = HotspotReader(path=os.path.join(self.superstar[cav_id], "out.zip")).read() superstar = [_AtomicHotspotResult(identifier=ident, grid=grid, buriedness=None) for ident, grid in sr.super_grids.items()] buriedness = Grid.from_file(self.buriedness) # tasks start = time.time() h = Runner() s = h.Settings() s.apolar_translation_threshold = 14 s.polar_translation_threshold = 14 s.polar_contributions = False s.sphere_maps = False s.nrotations = 3000 hr = h.from_superstar(prot, superstar, buriedness, settings=s, clear_tmp=True) finish = time.time() # output if not os.path.exists(self.hotspot[cav_id]): os.mkdir(self.hotspot[cav_id]) with open(self.hotspot_time[cav_id], 'w') as t: t.write(str(finish - start)) with HotspotWriter(self.hotspot[cav_id], zip_results=True) as writer: writer.write(hr)
def test_write_pymol_isoslider(self): # read in manually path = "testdata/hs_io/minimal_all_grids/out.zip" base = tempfile.mkdtemp() with zipfile.ZipFile(path) as hs_zip: hs_zip.extractall(base) base = os.path.join(base, "hotspot") interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, superstar=superstar_grids) hr.identifier = "hotspot" settings = HotspotWriter.Settings() settings.output_superstar = True writer = HotspotWriter("testdata/hs_io/minimal_all_grids", settings=settings) # we won't actually write writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.super_grids, "hotspot", "hotspot", "fhm") writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.superstar, "hotspot", "hotspot", "superstar") writer._write_pymol_isoslider(hr) writer.pymol_out.write("testdata/hs_io/minimal_all_grids/test_write_pymol_isoslider.py")
PDBResult(identifier=pdb).download(out_dir=dirname) if os.path.exists(reps): representatives = reps else: representatives = None try: result = HotspotReader(path=os.path.join(dirname, "out.zip")).read() pharmacophore = result.get_pharmacophore_model() pharmacophore.rank_features(max_features=5) except: pharmacophore = PharmacophoreModel.from_pdb( pdb_code=pdb, chain="H", out_dir=dirname, representatives=representatives) pharmacophore.rank_features(max_features=5) result = Results(super_grids=pharmacophore.dic, protein=Protein.from_file( os.path.join(dirname, pdb + ".pdb"))) pharmacophore.write(os.path.join(dirname, "crossminer.cm")) pharmacophore.write(os.path.join(dirname, "pharmit.json")) # write out Results object settings = HotspotWriter.Settings() settings.isosurface_threshold = [2, 5, 10] with HotspotWriter(dirname, settings=settings) as w: w.write(result)
def _get_superstar(self, cav_id=None): """ calculate SuperStar for each cavity if the buriedness method is ligsite, write out the grid for later :param cav_id: :return: """ # input prot = Protein.from_file(self.apo_prep) if cav_id is 'global': cavity_origin = None else: with open(self.cavities[cav_id], 'rb') as handle: cavity_origin = [pickle.load(handle)] # tasks start = time.time() a = _AtomicHotspot() a.settings.atomic_probes = {"apolar": "AROMATIC CH CARBON", "donor": "UNCHARGED NH NITROGEN", "acceptor": "CARBONYL OXYGEN"} self.superstar_grids = a.calculate(prot, nthreads=None, cavity_origins=cavity_origin) sr = Results(protein=prot, super_grids={result.identifier: result.grid for result in self.superstar_grids}) finish = time.time() # outputs if not os.path.exists(self.superstar[cav_id]): os.mkdir(self.superstar[cav_id]) if cav_id is not 'global': out = os.path.join(a.settings.temp_dir, str(0)) else: out = a.settings.temp_dir for interaction in ["apolar", "acceptor", "donor"]: shutil.copyfile(os.path.join(out, "{}.cavity.mol2".format(interaction)), os.path.join(self.superstar[cav_id], "{}.cavity.mol2".format(interaction))) shutil.make_archive(os.path.join(self.superstar[cav_id], "superstar"), 'zip', out) with HotspotWriter(path=self.superstar[cav_id], zip_results=True) as w: w.write(sr) with open(self.superstar_time[cav_id], 'w') as t: t.write(str(finish - start)) shutil.rmtree(a.settings.temp_dir) if self.buriedness_method == 'ligsite': # only write if it doesn't exist i.e. the first cavity run if not os.path.exists(self.buriedness): for ss in self.superstar_grids: if ss.identifier == "apolar": ss.buriedness.write(self.buriedness)
# hotspot calculation settings s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.nrotations = 3000 s.sphere_maps = True hr = h.from_protein(protein=p, charged_probes=False, buriedness_method='ghecom', nprocesses=3, settings=s, cavities=None) out_settings = HotspotWriter.Settings() out_settings.charged = False out = os.path.join(out_dir, p.identifier) if not os.path.exists(out): os.mkdir(out) with HotspotWriter(out, grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(hr) # read # Mean map
def generate_pharmacophore(ligands, ref_pdb, out_dir): lig_pharms = [] for ligand in ligands: ligand_pharmacophore = LigandPharmacophoreModel() ligand_pharmacophore.feature_definitions = [ "ring", "acceptor_projected", "donor_projected" ] ligand_pharmacophore.detect_from_ligand(ligand) for feat in ligand_pharmacophore.detected_features: ligand_pharmacophore.add_feature(feat) lig_pharms.append(ligand_pharmacophore) # 20 % cutoff = len(ligands) * 0.2 feats, feat_point_grds = create_consensus(lig_pharms, cutoff=cutoff) print(feats) for feat in feats: if feat.identifier == "ring": p = feat.spheres[0].centre feat.spheres = (GeometricDescriptors.Sphere((p[0], p[1], p[2]), 2.0), ) feat.point = feat.spheres[0] ensemble_pharm = LigandPharmacophoreModel() ensemble_pharm.detected_features = feats ensemble_pharm.feature_point_grids = feat_point_grds ensemble_pharm.ligands = ligands ensemble_pharm.detected_features = ensemble_pharm.top_features(num=6) pymol_o = os.path.join(out_dir, "pymol") if not os.path.exists(pymol_o): os.mkdir(pymol_o) ensemble_pharm.pymol_visulisation(pymol_o) # enable rescoring tmp = tempfile.mkdtemp() ftp_download([ref_pdb, tmp]) hr = Results(super_grids={ "apolar": feat_point_grds["ring"], "donor": feat_point_grds["donor_projected"], "acceptor": feat_point_grds["acceptor_projected"] }, protein=Protein.from_file( os.path.join(tmp, f"{ref_pdb}.pdb"))) hr_out = os.path.join(out_dir, "hr") if not os.path.exists(hr_out): os.mkdir(hr_out) with HotspotWriter(hr_out) as w: w.write(hr) p_out = os.path.join(out_dir, "ligand_pharmacophores") if not os.path.exists(p_out): os.mkdir(p_out) for n in [6, 5, 4, 3]: lp = LigandPharmacophoreModel() lp.detected_features = feats lp.detected_features = lp.top_features(num=n) for feat in lp.detected_features: lp.add_feature(feat) lp.intra_only = True lp.write(os.path.join(p_out, f"{n}.cm"))
from hotspots import calculation from hotspots.hs_io import HotspotWriter r = calculation.Runner() result = r.from_pdb("3cqw", charged_probes=False, nprocesses=3) with HotspotWriter("/home/pcurran/New folder/akt1/protoss") as w: w.write(result) print set([len(a.neighbours) for a in result.protein.atoms])