def _get_atomic_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") if os.path.exists(path): hr = HotspotReader(path).read() # tasks out = hr.atomic_volume_overlap(mol) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id)) out = {"donor": {}, "acceptor": {}, "apolar": {}} for a in mol.heavy_atoms: t = Helper.get_atom_type(a) if t == "doneptor": out["donor"].update({a.label: 0.0}) out["acceptor"].update({a.label: 0.0}) else: out[t].update({a.label: 0.0}) # output with open(self.atomic_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str(out))
def run(self): bestvol = HotspotReader(self.input().path).read() pharmacophore = bestvol.get_pharmacophore_model() pharmacophore.write(self.output()['pymol'].path) points = pharmacophore._comparision_dict() with open(self.output()['points'].path, 'wb') as w: pickle.dump(points, w)
def _get_volume_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ def nonzero(val): if val == 0: return 1 else: return val # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path1 = os.path.join(self.hotspot[cav_id], "out.zip") path2 = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") thresholds = [10, 14, 17] if os.path.exists(path1) and os.path.exists(path2): bcv = HotspotReader(path2).read() hot = HotspotReader(path1).read() # tasks other = Grid.from_molecule(mol) bcv_sg = Grid.get_single_grid(bcv.super_grids, mask=False) bcv_overlap = bcv_sg._mutually_inclusive(other=other).count_grid() lig_vol = (other > 0).count_grid() bcv_vol = (bcv_sg > 0).count_grid() hot_sgs = [(Grid.get_single_grid(hot.super_grids, mask=False) > t) for t in thresholds] hot_vols = [nonzero(hot_sg.count_grid()) for hot_sg in hot_sgs] hot_overlap = [hot_sg._mutually_inclusive(other=other).count_grid() for hot_sg in hot_sgs] # output with open(self.bcv_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / lig_vol) * 100)) with open(self.bcv_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / bcv_vol) * 100)) with open(self.hot_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_lig = [str((a / lig_vol) * 100) for a in hot_overlap] print(hot_lig) writer.write(",".join(hot_lig)) with open(self.hot_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_hot = [str((hot_overlap[i] / hot_vols[i]) * 100) for i in range(len(thresholds))] writer.write(",".join(hot_hot)) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
def main(): # input files ############################# mol_file = "data/gold_docking_poses.sdf" hotspot_files = "data/out.zip" output_file = "data/ranked.sdf" # option 1: rank based on apolar score # sort_on = ["apolar"] # option 2: rank based on donor and acceptor scores sort_on = ["donor", "acceptor"] # option 3: # sort_on = ["simple_score"] ########################################### # read hotspots and molecules mols = [m for m in MoleculeReader(mol_file) ] # so molecules can retain new attributes hr = HotspotReader(hotspot_files).read() for p, g in hr.super_grids.items(): hr.super_grids[p] = g.max_value_of_neighbours() # create a grid which can contain all docking poses small_blank = Grid.initalise_grid( coords={atm.coordinates for mol in mols for atm in mol.heavy_atoms}, padding=2) # set the protein to -1 to detect clashing protein_grid = hr.super_grids["apolar"].copy_and_clear() for atm in hr.protein.atoms: protein_grid.set_sphere(point=atm.coordinates, radius=atm.vdw_radius * 0.9, value=-1, scaling='None') protein_grid = _shrink(small=small_blank, big=protein_grid) # shrink hotspot maps to save time sub_grids = { p: _shrink(small=small_blank, big=g) + protein_grid for p, g in hr.super_grids.items() } # score the mols for i, mol in enumerate(mols): scores = example_score(sub_grids, mol, small_blank) mol.data = scores simple = simple_score(hr, mol) mol.data.update({"simple_score": simple}) ranked_mols = ranked_molecules(mols, sort_on) # output ranked mols in sdf format with data attached _output_sdf(ranked_mols, output_file)
def shrink_hotspots(self, hotspot_paths, padding=2.0): """ Takes in the calculated hotspots on the aligned ensemble. Crops and saves only the area around the reference binding site. Results are stored in the same parent directory as the fullsized hotspots, in dir called "binding_site_maps" :param list hotspot_paths: Paths to the hotspot results we would like to shrink. :param float padding: How many angstroms away from furthest binding site atom to look. :return: list of the paths for all shrunk hotspots in the ensemble. """ # Get the area to truncate around the binding site: print("Shrinking hotspots for ensemble...") if not self.reference_binding_site: self.reference_binding_site = self.get_binding_site( self.reference_ID) # Find the maximum and minimum coordinates of the reference binding site dims = np.array([ a.coordinates for r in self.reference_binding_site.residues for a in r.atoms ]) min_coords = np.array( [np.min(dims[:, 0]), np.min(dims[:, 1]), np.min(dims[:, 2])]) max_coords = np.array( [np.max(dims[:, 0]), np.max(dims[:, 1]), np.max(dims[:, 2])]) # Add some padding in both directions: min_coords -= padding max_coords += padding h_out_dir_list = [] for p in hotspot_paths: # Read in hotspot result h_result = HotspotReader(p).read() # Shrink the grids for each probe type for probe, g in h_result.super_grids.items(): h_result.super_grids[probe] = self.shrink_to_binding_site( g, min_coords, max_coords) res_path = dirname(p) # Save shrunk hotspot, assuming the directory it was previously in was named sensibly. h_out_dir = join(res_path, "binding_site_maps") h_out_dir_list.append(join(h_out_dir, "out")) with HotspotWriter(h_out_dir, visualisation="pymol", grid_extension=".ccp4", zip_results=False) as writer: writer.write(h_result) return h_out_dir_list
def run(self): hr = HotspotReader(self.input().path).read() bcv = hr.tractability_map(volume=self.volume) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(bcv)
def run(self): prot = Protein.from_file(self.input()['protein'].path) hr = HotspotReader(self.input()['hs_result'].path).read() scored_prot = hr.score(prot) with open(self.output().path, 'w') as csv_file: csv_file.write("mol_id,atom_id,score\n") for a in scored_prot.heavy_atoms: out_str = "protein,{},{}\n".format(a.label, a.partial_charge) csv_file.write(out_str)
def test_read(self): path = "testdata/hs_io/minimal_multi_all_grids/out.zip" with HotspotReader(path=path) as r: hr = r.read(identifier="hotspot-1") self.assertIsInstance(hr, Results) with HotspotReader(path=path) as r: hr = r.read() self.assertIsInstance(hr, list)
def run(self): mols = io.MoleculeReader(self.input()['ligands'].path) hr = HotspotReader(self.input()['hs_result'].path).read() with open(self.output().path, 'w') as csv_file: csv_file.write("mol_id,atom_id,score\n") for mol in mols: scored_mol = hr.score(mol) for a in scored_mol.heavy_atoms: out_str = "{},{},{}\n".format(mol.identifier, a.label, a.partial_charge) csv_file.write(out_str)
def _score_cavity(self, cav_id): """ score the cavity using the hotspot score :param cav_id: :return: """ print(self.apo) hr = HotspotReader(os.path.join(self.hotspot[cav_id], "out.zip")).read() s = hr.score() with open(self.cavity_score[cav_id], "w") as f: f.write(str(s))
def shrink_hotspot_maps(hs_result_paths, ligands, padding=4.0): """ Given the list of ligands in the ensemble and some hotspot maps, will shrink all the maps in the :param hs_result_paths: a list of Paths to precalculated hotspot results. Should be all for the same target (or actually, targets that we are looking to compare. :param ligands: a list of ccdc molecules corersponding to the ensmeble ligands. Needed to define the binding site of interest. :return: a list of *shrunk* hotspot results """ # Find the largest ligand and use it to define the binding site mws = [l.molecular_weight for l in ligands] biggest_lig = ligands[mws.index(max(mws))] # Get the dimensions in space of the largest ligand dims = np.array([a.coordinates for a in biggest_lig.atoms]) min_coords = np.array( [np.min(dims[:, 0]), np.min(dims[:, 1]), np.min(dims[:, 2])]) max_coords = np.array( [np.max(dims[:, 0]), np.max(dims[:, 1]), np.max(dims[:, 2])]) # Add some padding in both directions: min_coords -= padding max_coords += padding # Now shrink all the hotspot grids to the min and max dimensions shrunk_hs_results = [] for hpath in hs_result_paths: hs_res = HotspotReader(str(hpath.resolve())).read() probes = hs_res.super_grids.keys() # now to shrink the grids for each probe for p in probes: hs_res.super_grids[p] = EnsembleResult.shrink_to_binding_site( in_grid=hs_res.super_grids[p], new_origin=min_coords, new_far_corner=max_coords) shrunk_hs_results.append(hs_res) h_out_dir = Path(hpath.parent, 'binding_site_maps') if not h_out_dir.exists(): h_out_dir.mkdir() with HotspotWriter(str(h_out_dir.resolve()), visualisation="pymol", grid_extension=".ccp4", zip_results=False) as writer: writer.write(hs_res) return shrunk_hs_results
def _get_bcv(self, cav_id, other_id, lig_id): """ generate a BCV for each cavity, and each required volume :param cav_id: :return: """ # inputs hr = HotspotReader(path=os.path.join(self.hotspot[cav_id], "out.zip")).read() with open(self.ligand_volume[other_id][lig_id], 'r') as f: target_volume = f.read() # task start = time.time() extractor = Extractor(hr) bcv = extractor.extract_volume(volume=int(float(target_volume))) finish = time.time() # output out = self.bcv[cav_id][other_id][lig_id] create_directory(os.path.dirname(out)) create_directory(out) with HotspotWriter(path=out, grid_extension=".grd", zip_results=True) as writer: writer.write(bcv) with open(self.bcv_time[cav_id][other_id][lig_id], 'w') as t: t.write(str(finish - start)) with open(self.bcv_threshold[cav_id][other_id][lig_id], 'w') as s: s.write(str(bcv.step_threshold))
def test_docking_constraint_atoms(self): with PushDir("testdata/result/data"): # read hotspot maps with HotspotReader(path="out.zip") as r: self.result = r.read() print(self.result._docking_constraint_atoms())
def setUp(self) -> None: with HotspotReader(path="testdata/result/data/out.zip") as r: self.result = r.read() for p, g in self.result.super_grids.items(): self.result.super_grids[p] = g.dilate_by_atom() # bin self.bin = "testdata/result/Extractor/bin" # reuse self.out = "testdata/result/Extractor"
def test_docking_fitting_pts(self): with PushDir("testdata/2vta"): # read hotspot maps with HotspotReader(path="out.zip") as r: self.result = r.read() mol = [ m for m in MoleculeReader("crystal_ligand.sdf") if "LZ1" in m.identifier.split("_") ][0] print(mol.identifier) m = self.result._docking_fitting_pts(mol)
def setUp(self) -> None: x = 1 with HotspotReader( "testdata/pharmacophore_extension/provided_data/out.zip") as r: self.hr = [ hr for hr in r.read() if hr.identifier == "best_volume" ][0] # smoothing is really important to this workflow for p, g in self.hr.super_grids.items(): h = g.max_value_of_neighbours() h = h.gaussian() self.hr.super_grids[p] = h
def _get_matched_atoms(self, cav_id, other_id, lig_id): """ This is the ligand overlap implimentation in the DoGsiter paper :param cav_id: :param other_id: :param lig_id: :return: """ # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") if os.path.exists(path): hr = HotspotReader(path).read() # tasks perc, type_dic = hr.percentage_matched_atoms(mol=mol, threshold=0, match_atom_types=True) # output with open(self.matched[cav_id][other_id][lig_id], 'w') as writer: writer.write(str(perc) + "\n") writer.write(str(type_dic)) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
def fp_scheme(fpath, percentile, low, high, id): fpath = os.path.join(fpath, "out.zip") pdb = os.path.basename( os.path.dirname(os.path.dirname(os.path.dirname(fpath)))) if os.path.exists(fpath): with HotspotReader(fpath) as r: hr = r.read() hr.docking_fitting_pts(fname=os.path.join(os.path.dirname(fpath), f"fitting_pts_{id}.mol2"), percentile=percentile, low=low, high=high) else: print(f"FILE NOT FOUND: {pdb}")
def run(self): # inputs with HotspotReader(self.args.hotspot_path) as reader: hr = [ h for h in reader.read() if h.identifier == self.args.hotspot_identifier ][0] with MoleculeReader(self.args.docked_mols) as reader: out = os.path.join(os.path.dirname(self.args.docked_mols), "results_no_dummy.mol2") with MoleculeWriter(out) as writer: for mol in reader: for atm in mol.atoms: if atm.atomic_symbol == "Unknown": mol.remove_atom(atm) writer.write(mol) self.args.docked_mols = out entires = EntryReader(self.args.docked_mols) # outputs out_dir = os.path.join(os.path.dirname(self.args.docked_mols)) print(out_dir) # process hr = augmentation(hr, entires) # 1) rescore rescored = {e: score(hr, e) for e in entires} ordered_rescored = OrderedDict( sorted(rescored.items(), key=lambda item: item[1], reverse=True)) # 2) deduplicate: retain highest ranked pose only out_dic = deduplicate(ordered_rescored) # 3) output to dataframe ready for ccdc.descriptors API df = pd.DataFrame({ "identifier": [e.identifier for e in out_dic.keys()], "score": list(out_dic.values()), "activity": [activity_tag(e.identifier) for e in out_dic.keys()] }) df.to_csv(os.path.join(out_dir, "rescored.csv")) with EntryWriter(os.path.join(out_dir, "rescored.sdf")) as w: for e in out_dic.keys(): w.write(e)
def get_polar_cluster_coords(hs_result_path, hs_threshold=10): with HotspotReader(str(hs_result_path)) as hr: hs_result = hr.read() clust_id_list = [] clust_probe_list = [] clust_coords_list = [] clust_map_list = [] cluster_size = [] cluster_radii = [] polar_probes = ['donor', 'acceptor'] for p in polar_probes: probe_grid = hs_result.super_grids[p] probe_grid = probe_grid * (probe_grid > hs_threshold) probe_arr = np.array(probe_grid.to_vector()).reshape(probe_grid.nsteps) probe_clust_arr = _GridEnsemble.HDBSCAN_cluster(probe_arr, min_cluster_size=5) probe_clust_grid = as_grid(probe_grid.bounding_box[0], probe_grid.bounding_box[1], probe_clust_arr) cgrid_path = str( Path(hs_result_path.parent, str(f'{p}_cluster_ranges.ccp4'))) probe_clust_grid.write(cgrid_path) coords = get_clusters_centre_mass(probe_clust_arr, probe_arr) for cn in set(probe_clust_arr[probe_clust_arr > 0]): c_id = f"{p}_{int(cn)}" clust_id_list.append(c_id) clust_probe_list.append(p) clust_coords_list.append( get_coordinates_angstrom(coords[cn], probe_grid)) clust_map_list.append(cgrid_path) cluster_volume = len( (probe_clust_arr == cn).nonzero()[0]) * probe_grid.spacing**3 cluster_radius = (0.75 * cluster_volume / math.pi)**(1 / 3) cluster_size.append(cluster_volume) cluster_radii.append(cluster_radius) clust_df = pd.DataFrame() clust_df['cluster_id'] = clust_id_list clust_df['probe_type'] = clust_probe_list clust_df['centre_of_mass'] = clust_coords_list clust_df['cluster_map'] = cgrid_path clust_df['cluster_volume'] = cluster_size clust_df['cluster_radius'] = cluster_radii return clust_df
def pharms(out): with HotspotReader(out) as r: hr = [hr for hr in r.read() if hr.identifier == "bestvol"][0] p = HotspotPharmacophoreModel() p.from_hotspot(hr, projections=True) vis_out = os.path.join(os.path.dirname(out), "pharmacophores") if not os.path.exists(vis_out): os.mkdir(vis_out) p.pymol_visulisation(vis_out) for feat in p.detected_features: p.add_feature(feat) p.write(os.path.join(vis_out, "hot.cm"))
def run(self): hs = HotspotReader(self.input().path).read() settings = Extractor.Settings() settings.cutoff = 12 settings.mvon = False extractor = Extractor(hs, settings) best = extractor.extract_best_volume(volume=100)[0] out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(best)
def run(self): hr = [HotspotReader(self.input().path).read()] i = 0 all_cavs = [] for cav in hr: i += 1 hist = cav.map_values() all_points = [] for x in hist.values(): all_points += x.flatten().tolist() df = pd.DataFrame({'scores': all_points}) df = df[df['scores'] != 0] df['cavity'] = i all_cavs.append(df) all_df = pd.concat(all_cavs) all_df.to_csv(self.output().path)
def testscore_atoms_as_spheres(self): with PushDir("testdata/result/data"): mols = [m for m in MoleculeReader("gold_docking_poses.sdf")] # create a grid which can contain all docking poses small_blank = Grid.initalise_grid(coords={ atm.coordinates for mol in mols for atm in mol.heavy_atoms }, padding=2) # read hotspot maps with HotspotReader(path="out.zip") as r: self.result = r.read() # dilate the grids for p, g in self.result.super_grids.items(): self.result.super_grids[p] = g.dilate_by_atom() # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in self.result.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) self.result.super_grids = mask_dic # set background to 1 self.result.set_background() self.result.normalize_to_max() print([g.extrema for p, g in self.result.super_grids.items()]) for m in mols[:1]: s = self.result.score_atoms_as_spheres(m, small_blank) print(s)
def masked_hotspot(base, pdb, hotspot_path): assert os.path.exists(hotspot_path) with HotspotReader(os.path.join(hotspot_path, "out.zip")) as r: hr = [h for h in r.read() if h.identifier == "hotspot"][0] b = (hr.buriedness > 3) * hr.buriedness crystal_lig = MoleculeReader(os.path.join(base, pdb, f"{pdb}_ref.mol2"))[0] g = hr.buriedness.copy_and_clear() for atm in crystal_lig.heavy_atoms: g.set_sphere(point=atm.coordinates, radius=6, value=1, mode="replace", scaling="None") mol_buried = (g & b) * b common_mol_buried = hr.super_grids["apolar"].common_boundaries(mol_buried) apolar = (common_mol_buried & hr.super_grids["apolar"]) * hr.super_grids["apolar"] donor = (common_mol_buried & hr.super_grids["donor"]) * hr.super_grids["donor"] acceptor = (common_mol_buried & hr.super_grids["acceptor"]) * hr.super_grids["acceptor"] return Results(super_grids={ "apolar": apolar, "donor": donor, "acceptor": acceptor }, protein=hr.protein, buriedness=common_mol_buried)
def _get_hotspot(self, cav_id): """ calculate hotspot map from pre-calculated superstar and buriedness grids :param cav_id: :return: """ # inputs prot = Protein.from_file(self.apo_prep) sr = HotspotReader(path=os.path.join(self.superstar[cav_id], "out.zip")).read() superstar = [_AtomicHotspotResult(identifier=ident, grid=grid, buriedness=None) for ident, grid in sr.super_grids.items()] buriedness = Grid.from_file(self.buriedness) # tasks start = time.time() h = Runner() s = h.Settings() s.apolar_translation_threshold = 14 s.polar_translation_threshold = 14 s.polar_contributions = False s.sphere_maps = False s.nrotations = 3000 hr = h.from_superstar(prot, superstar, buriedness, settings=s, clear_tmp=True) finish = time.time() # output if not os.path.exists(self.hotspot[cav_id]): os.mkdir(self.hotspot[cav_id]) with open(self.hotspot_time[cav_id], 'w') as t: t.write(str(finish - start)) with HotspotWriter(self.hotspot[cav_id], zip_results=True) as writer: writer.write(hr)
def dock(inputs): """ submit a GOLD API docking calculation using docking constraints automatically generated from the Hotspot API :param ligand_path: :param out_path: :param hotspot: :param weight: :return: """ def add_ligands(docker, ligand_path): with gzip.open(os.path.join(ligand_path, "actives_final.mol2.gz"), 'rb') as f_in: with open( os.path.join(docker.settings.output_directory, "actives_final.mol2"), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) with gzip.open(os.path.join(ligand_path, "decoys_final.mol2.gz"), 'rb') as f_in: with open( os.path.join(docker.settings.output_directory, "decoys_final.mol2"), 'wb') as f_out: shutil.copyfileobj(f_in, f_out) docker.settings.add_ligand_file(os.path.join( docker.settings.output_directory, "actives_final.mol2"), ndocks=5) docker.settings.add_ligand_file(os.path.join( docker.settings.output_directory, "decoys_final.mol2"), ndocks=5) def add_protein(docker, hotspot, junk): pfile = os.path.join(junk, "protein.mol2") with MoleculeWriter(pfile) as w: w.write(hotspot.protein) docker.settings.add_protein_file(pfile) def define_binding_site(docker, ligand_path): crystal_ligand = MoleculeReader( os.path.join(ligand_path, "crystal_ligand.mol2"))[0] docker.settings.binding_site = docker.settings.BindingSiteFromLigand( protein=docker.settings.proteins[0], ligand=crystal_ligand) def add_hotspot_constraint(docker, hotspot, weight): if int(weight) != 0: constraints = docker.settings.HotspotHBondConstraint.create( protein=docker.settings.proteins[0], hr=hotspot, weight=int(weight), min_hbond_score=0.05, max_constraints=1) for constraint in constraints: docker.settings.add_constraint(constraint) def write(docker, out_path): results = Docker.Results(docker.settings) # write ligands with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w: for d in results.ligands: w.write(d.molecule) # copy ranking file # in this example, this is the only file we use for analysis. However, other output files can be useful. copyfile(os.path.join(junk, "bestranking.lst"), os.path.join(out_path, "bestranking.lst")) # GOLD docking routine ligand_path, out_path, hotspot, weight, search_efficiency = inputs docker = Docker() # GOLD settings docker.settings = DockerSettings() docker.settings.fitness_function = 'plp' docker.settings.autoscale = search_efficiency junk = os.path.join(out_path, "all") docker.settings.output_directory = junk # GOLD write lots of files we don't need in this example if not os.path.exists(junk): os.mkdir(junk) docker.settings.output_file = os.path.join(junk, "docked_ligands.mol2") # read the hotspot hotspot = HotspotReader(hotspot).read() # for p, g in hotspot.super_grids.items(): # hotspot.super_grids[p] = g.max_value_of_neighbours() # dilation to reduce noise add_ligands(docker, ligand_path) add_protein(docker, hotspot, junk) define_binding_site(docker, ligand_path) add_hotspot_constraint(docker, hotspot, weight) docker.dock(file_name=os.path.join(out_path, "hs_gold.conf")) write(docker, out_path) # Clean out unwanted files shutil.rmtree(junk)
def dock(inputs): """ submit a GOLD API docking calculation using docking constraints automatically generated from the Hotspot API :param ligand_path: :param out_path: :param hotspot: :param weight: :return: """ def add_ligands(docker, ligand_path): docker.settings.add_ligand_file(os.path.join(ligand_path, "actives_final.mol2"), ndocks=5) docker.settings.add_ligand_file(os.path.join(ligand_path, "decoys_final.mol2"), ndocks=5) def add_protein(docker, hotspot, junk): pfile = os.path.join(junk, "protein.mol2") with MoleculeWriter(pfile) as w: w.write(hotspot.protein) print(pfile) docker.settings.add_protein_file(pfile) def define_binding_site(docker, ligand_path): crystal_ligand = MoleculeReader(os.path.join(ligand_path, "crystal_ligand.mol2"))[0] docker.settings.binding_site = docker.settings.BindingSiteFromLigand(protein=docker.settings.proteins[0], ligand=crystal_ligand) def add_hotspot_constraint(docker, hotspot, weight): if int(weight) != 0: constraints = docker.settings.HotspotHBondConstraint.create(protein=docker.settings.proteins[0], hr=hotspot, weight=int(weight), min_hbond_score=0.05, max_constraints=1) for constraint in constraints: docker.settings.add_constraint(constraint) def write(docker, out_path): results = Docker.Results(docker.settings) # write ligands with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w: for d in results.ligands: mol = d.molecule # for atm in mol.atoms: # if atm.atomic_symbol == "Unknown": # mol.remove_atom(atm) w.write(mol) # copy ranking file # in this example, this is the only file we use for analysis. However, other output files can be useful. copyfile(os.path.join(junk, "bestranking.lst"), os.path.join(out_path, "bestranking.lst")) # GOLD docking routine ligand_path, out_path, hs_path, weight, search_efficiency = inputs docker = Docker() # GOLD settings docker.settings = DockerSettings() docker.settings.fitness_function = 'plp' docker.settings.autoscale = search_efficiency junk = check_dir(os.path.join(out_path, "all")) docker.settings.output_directory = junk # GOLD write lots of files we don't need in this example docker.settings.output_file = os.path.join(junk, "docked_ligands.mol2") # read the hotspot with HotspotReader(hs_path) as reader: # change if your hotspot is call something else hotspot = [h for h in reader.read() if h.identifier == "bestvol"][0] # for p, g in hotspot.super_grids.items(): # hotspot.super_grids[p] = g.dilate_by_atom() # dilation to reduce noise add_ligands(docker, ligand_path) add_protein(docker, hotspot, junk) define_binding_site(docker, ligand_path) add_hotspot_constraint(docker, hotspot, weight) docker.dock(file_name=os.path.join(out_path, "hs_gold.conf")) write(docker, out_path) # Clean out unwanted files shutil.rmtree(junk)
dirname = "./result" pdb = "1vr1" reps = "representatives.dat" if not os.path.exists(dirname): os.mkdir(dirname) PDBResult(identifier=pdb).download(out_dir=dirname) if os.path.exists(reps): representatives = reps else: representatives = None try: result = HotspotReader(path=os.path.join(dirname, "out.zip")).read() pharmacophore = result.get_pharmacophore_model() pharmacophore.rank_features(max_features=5) except: pharmacophore = PharmacophoreModel.from_pdb( pdb_code=pdb, chain="H", out_dir=dirname, representatives=representatives) pharmacophore.rank_features(max_features=5) result = Results(super_grids=pharmacophore.dic, protein=Protein.from_file( os.path.join(dirname, pdb + ".pdb"))) pharmacophore.write(os.path.join(dirname, "crossminer.cm"))
from ccdc import io from hotspots.hs_io import HotspotReader hs = HotspotReader("out.zip").read() c = hs._docking_constraint_atoms(max_constraints=1) d = c.to_molecule() with io.MoleculeWriter("constraints.mol2") as w: w.write(d)