def run(): # must be abspath parent = sys.argv[1] score = sys.argv[2] autoscale = sys.argv[3] run_id = sys.argv[4] crossminer_file = os.path.join(parent, sys.argv[5]) # data conf_name = "hs_gold.conf" out_path = check_dir(os.path.join(parent, "gold_results")) out_path = check_dir(os.path.join(out_path, run_id)) junk = check_dir(os.path.join(out_path, "all")) hotspot = os.path.join(parent, "hotspot_pharmacophore", "out.zip") crystal_ligand = os.path.join(parent, "crystal_ligand.mol2") actives = os.path.join(parent, "actives_final.mol2") decoys = os.path.join(parent, "decoys_final.mol2") prot_file = os.path.join(out_path, "protein.mol2") # output protein with hs_io.HotspotReader(hotspot) as reader: hr = [h for h in reader.read() if h.identifier == "bestvol"][0] with MoleculeWriter(prot_file) as w: w.write(hr.protein) hspm = HotspotPharmacophoreModel.from_file(crossminer_file) constraint_str = hspm.to_gold_conf(score=score) # create template gold_conf_str = template(autoscale, crystal_ligand, actives, decoys, junk, prot_file, constraint_str) print(gold_conf_str) with open(os.path.join(out_path, conf_name), "w") as w: w.write(gold_conf_str) # linux only gold_exe = os.path.join(os.environ["GOLD_DIR"], "bin/gold_auto") # run docking with PushDir(out_path): cmd = f"{gold_exe} {conf_name}" os.system(cmd) # process results docked = MoleculeReader(os.path.join(junk, "docked_ligands.mol2")) # make it consistent with other names with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w: for d in docked: for atm in d.atoms: if atm.atomic_symbol == "Unknown": d.remove_atom(atm) w.write(d) shutil.copyfile(os.path.join(junk, "bestranking.lst"), os.path.join(out_path, "bestranking.lst"))
def run(self): '''Search for target chains and superpose target chains''' target_chains = [] if self.args.sequence_file is not None: if self.args.sequence_search_tool is None: self.error('--sequence_search_tool argument required with --sequence_file') target_chains = self.sequence_search() elif self.args.target_chains is not None: for target_chain in self.args.target_chains.split(','): (target_pdb_id, target_chain_id) = target_chain.split(':') target_chains.append({'pdb_id': target_pdb_id, 'chain_id': target_chain_id, 'identity': 0, 'similarity': 0}) with MoleculeWriter(self.args.output_file) as mol_writer: if self.superposition_output == 'full_protein': mol_writer.write(self.reference_protein) else: mol_writer.write(self.reference_binding_site) for target_chain in target_chains: try: self.run_one(target_chain, mol_writer) except RuntimeError as exception: print("Superposition failed: {}".format(exception.args[0])) if self.args.csv_summary_file is not None: self.write_csv_summary_file(self.args.csv_summary_file, target_chains)
def get_percentile_fragments(self, percentile=90, mode="above", save=True): """ Selects the top scoring fragments :param percentile: :return: """ scores = self.summary_DF["Hotspot_Score"] threshold = np.percentile(scores.values, percentile) if mode == "above": select = self.summary_DF.loc[scores > threshold] elif mode == "below": select = self.summary_DF.loc[scores < threshold] else: print( "Unrecognised value for param 'mode' for method DFragScoreSummary.get_percentile_fragments()" ) return ids = select["Identifier"].values sc_ligs = self._read_ligands() top_ligs = [lig for lig in sc_ligs if lig.identifier in ids] if save: with MoleculeWriter( join( self.out_dir, "fragments_{}_{}_percentile.mol2".format( mode, percentile))) as writer: for l in top_ligs: writer.write(l) return top_ligs
def scaled_score_ligands(self, tolerance): """ Applies linear scaling to scores assigned to atom, depending on distance between atom and the scored point. :param int tolerance: How many gridpoints away is it acceptable for an atom to be from the nearest point of its corresponding map. :return: """ dsc = self.get_scorer_result() hs = dsc.get_hotspot() all_ligs = MoleculeReader( join(self.hotspot_path, "docking_tmp", "docked_ligands.mol2")) scored_ligs = [] for lig in all_ligs: scored_lig = dsc.get_scaled_score(lig, tolerance, hs)[0] ligand_score = np.mean( [a.partial_charge for a in scored_lig.heavy_atoms]) scored_lig.identifier += "_{}".format(round(ligand_score, 2)) scored_ligs.append(scored_lig) with MoleculeWriter( os.path.join(self.hotspot_path, "scored_docks.mol2")) as writer: for ligand in scored_ligs: writer.write(ligand)
def detect_from_binding_site(self, bs): """ Add features to pharmacophore model from a binding site :param `ccdc.crystal.Crystal` or `ccdc.molecule.Molecule` ligand: a ligand :return: """ prot = self._get_crystal(bs) with MoleculeWriter('binding_site.pdb') as w: w.write(bs) self.protein = prot self.detected_features = [] bs_features = [ "acceptor_projected", "donor_projected", "donor_ch_projected", "hydrophobe" ] for fd in self.feature_definitions.values(): if fd.identifier not in bs_features: continue detected_feats = fd.detect_features(prot) if len(detected_feats) != 0: for f in detected_feats: self.detected_features.append(f)
def _get_cmd(self, protein, cavity_origin, out=None): """ private method constructs the commandline str required to run atomic hotspot calculation :param str jobname: general format (<probe_type>.ins) :param str probename: probe identifier :param str out: output directory (outputting ins maybe useful for debugging) :return: """ cmds = [] if not out: out = self.settings.temp_dir with PushDir(out): with MoleculeWriter(join(out, 'protein.mol2')) as writer: writer.write(protein) for jobname, probename in self.settings.atomic_probes.items(): instruction = self.InstructionFile(jobname=jobname, probename=probename, settings=self.settings, cavity=cavity_origin) cmds.append('{}'.format(self.settings.superstar_executable) + ' ' + '{}.ins'.format(jobname)) with PushDir(out): instruction.write("{}.ins".format(jobname)) return cmds
def run(self): # create pharmacophore ref = PharmacophoreModel.from_pdb(pdb_code=self.pdb, chain=self.chain, representatives=self.input().path, identifier=self.pdb) ref.rank_features(max_features=6, feature_threshold=5) # write pymol file ref.write(self.output()["pymol"].path) # write Results file temp = tempfile.mkdtemp() PDBResult(self.pdb).download(temp) result = Results(protein=Protein.from_file( os.path.join(temp, "{}.pdb".format(self.pdb))), super_grids=ref.dic) out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output()["grids"].path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(result) # write aligned molecules with MoleculeWriter(self.output()['aligned_mols'].path) as w: for l in ref.aligned_ligands: w.write(l) # points points = ref._comparision_dict() with open(self.output()['points'].path, 'wb') as w: pickle.dump(points, w)
def add_protein(docker, hotspot, junk): pfile = os.path.join(junk, "protein.mol2") with MoleculeWriter(pfile) as w: w.write(hotspot.protein) docker.settings.add_protein_file(pfile)
def from_smiles(smiles, path, identifier=None, generate_initial_sites=True): """ Create a :class:`ccdc.molecule.Molecule` from a SMILES string. *e.g.*:: ethene = Molecule.from_smiles('C=C', 'Ethene') If ``identifier`` is not specified, the SMILES string will be used as the molecule identifier. :param smiles: str :param identifier: str :param generate_initial_sites: boolean - whether to include an initial guess at 3D coordinates :return: a :class:`ccdc.molecule.Molecule` instance with coordinates """ if identifier is None: identifier = smiles if generate_initial_sites: parameter_files = _CSDDatabaseLocator.get_conformer_parameter_file_location() molmaker = ConformerGeneratorLib.MoleculeTo3D(parameter_files) mol = Molecule(identifier, molmaker.create_conformation(smiles)) else: molmaker = ChemicalAnalysisLib.SMILESMoleculeMaker() mol = Molecule(identifier, _molecule=molmaker.siteless_atoms(smiles)) with MoleculeWriter(path) as w: w.write(mol) return mol
def dock(self): """ handle docking run with GOLD :return: """ docker = Docker() # enables hotspot constraints docker.settings = hs_screening.DockerSettings() f = os.path.join(self.temp, self.hs_pdb + ".mol2") with MoleculeWriter(f) as w: w.write(self.protein) # setup docker.settings.add_protein_file(f) docker.settings.binding_site = docker.settings.BindingSiteFromPoint(protein=docker.settings.proteins[0], origin=self.ligand.centre_of_geometry(), distance=12.0) docker.settings.fitness_function = 'plp' docker.settings.autoscale = 10. docker.settings.output_directory = self.temp docker.settings.output_file = "docked_ligands.mol2" docker.settings.add_ligand_file(self.search_ligands, ndocks=3) # constraints # docker.settings.add_constraint( # docker.settings.TemplateSimilarityConstraint(type="all", template=self.ligand, weight=150) #) # extractor = best_volume.Extractor(hr=self.hr, volume=300, mode="global", mvon=False) # bv = extractor.extracted_hotspots[0] # # with hs_io.HotspotWriter(path=os.path.join(self.path, "bv")) as hw: # hw.write(extractor.extracted_hotspots) # # hs = docker.settings.HotspotHBondConstraint.from_hotspot(protein=docker.settings.proteins[0], # hr=bv, # weight=150, # max_constraints=2) # # docker.settings.add_constraint(hs) # docker.settings.add_apolar_fitting_points(hr=self.hr) # # mol = Molecule(identifier="constraints") # for a in hs.atoms: # mol.add_atom(Atom(atomic_symbol="C", # atomic_number=14, # label="Du", # coordinates=a.coordinates)) # # with MoleculeWriter(os.path.join(self.path, "constaints.mol2")) as w: # w.write(mol) # dock docker.dock() return MoleculeReader(os.path.join(docker.settings.output_directory, docker.settings.output_file))
def main(): r = Organiser(path="/home/pcurran/use_case/no_constraints", hs_pdb="1hcl", ligand_pdb="2vta", ligand_identifier="LZ1") with MoleculeWriter("/home/pcurran/use_case/no_constraints/rescored.mol2") as w: for m in r.rescored_ligands: w.write(m)
def generate_conformer(self, mol): conformers = generate_confs(mol, int(self.args.number_of_conformers), 1) #full_file_path = os.path.join(directory, '%s_conformers.mol2' % mol.identifier) with MoleculeWriter('%s_conformers.mol2' % mol.identifier) as mol_writer: for c in conformers: mol_writer.write(c.molecule)
def write(self, path): df = pd.DataFrame({ att: [getattr(h, att) for h in self.hits] for att in self.hits[0].__dict__.keys() if att != "molecule" }) df.to_csv(os.path.join(path, "hits_attr.csv")) with MoleculeWriter(os.path.join(path, "hits_mols.mol2")) as w: for hit in self.hits: w.write(hit.molecule)
def __init__(self): super(self.__class__, self).__init__(description=__doc__) # handle command line arguments self.add_argument('protein', help='pdb_code of protein which was used in docking') self.add_argument('reference', help='pdb_code of reference') self.add_argument('chemical_id', help='PDB identifier for the docked ligand') self.add_argument('results', help='path to results files') self.add_argument('-r', '--chain_ref', default='A', help='Chain to used for alignment') self.add_argument('-p', '--chain_protein', default='A', help='Chain to used for alignment') self.args = self.parse_args() self.tmp = tempfile.mkdtemp() # download protein PDBResult(self.args.protein).download(self.tmp) self.protein = Protein.from_file( os.path.join(self.tmp, self.args.protein + ".pdb")) self.protein.add_hydrogens() # download reference PDBResult(self.args.reference).download(self.tmp) ref = Protein.from_file( os.path.join(self.tmp, self.args.reference + ".pdb")) ref.add_hydrogens() self.ref = self._align(self.protein, ref) self.reference_ligand = self._extract_ligands( protein=self.ref, ligand=self.args.chemical_id, chain=self.args.chain_ref)[0] with MoleculeWriter( os.path.join(os.path.dirname(os.path.realpath(__file__)), "reference.mol2")) as w: w.write(self.reference_ligand) self.results = MoleculeReader( os.path.join(os.path.dirname(os.path.realpath(__file__)), self.args.results)) self.rmsd_values = [] for l in self.results: self.rmsd_values.append(self.rmsd(l, self.reference_ligand))
def main(pdb="4est", stem="/local/pcurran/superstar_comparison"): protoss = Protoss() result = protoss.add_hydrogens(pdb_code=pdb) out = os.path.join(stem, pdb) if not os.path.exists(out): os.mkdir(out) with MoleculeWriter(os.path.join(out, f"{pdb}.pdb")) as w: w.write(result.protein)
def main(pdb="1hcl", stem="/home/pcurran"): protoss = Protoss() result = protoss.add_hydrogens(pdb_code=pdb) out = os.path.join(stem, pdb) if not os.path.exists(out): os.mkdir(out) with MoleculeWriter(os.path.join(out, f"{pdb}.pdb")) as w: w.write(result.protein)
def prepare_ligand_for_dock(self): """ :return: """ # TODO: behaviour in case there's several ligands in the file? lig = MoleculeReader(self.input_ligand_path)[0] # Apply to our supplied ligand the same functions that ligand_prep would to a CSD entry. lig.identifier = self.lig_name # Note -> self.lig_name should be the name of the query ligand, not the reference (unless they are same) lig.remove_unknown_atoms() lig.assign_bond_types() # Standrdises to CSD conventions - not entirely sure if this is necessary. lig.standardise_aromatic_bonds() lig.standardise_delocalised_bonds() # Does it matter what oder you protonate and assign hydrogens in? Docker.LigandPreparation()._protonation_rules.apply_rules( lig._molecule) lig.add_hydrogens() if self.minimise_ligand: # If the ligand has no 3D coordinates, the minimisation won't work. So let's generate some: if not lig.is_3d: print( f'Input ligand {lig.identifier} has no 3D coords. Generating 3D coords' ) lig = ccdc_mol_from_smiles(smiles=lig.smiles, identifier=lig.identifier) # Minimise the ligand conformation molminimiser = MoleculeMinimiser() lig = molminimiser.minimise(lig) print('Checking if ligand sucessfully minimised', type(lig)) # Save the prepped ligand: ligwr = MoleculeWriter(self.prepared_ligand_path) ligwr.write(lig)
def write(docker, out_path): results = Docker.Results(docker.settings) # write ligands with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w: for d in results.ligands: w.write(d.molecule) # copy ranking file copyfile(os.path.join(junk, "bestranking.lst"), os.path.join(out_path, "bestranking.lst"))
def _get_crystal(obj): """ Convert a obj's writable by MoleculeWriter to a crystal :param `ccdc.molecule.Molecule` obj: molecule or protein :return: `ccdc.crystal.Crystal` """ tmp = tempfile.mkdtemp() f = os.path.join(tmp, "obj.mol2") with MoleculeWriter(f) as w: w.write(obj) return CrystalReader(f)[0]
def prepare_protein(name): orign_protfile = '%s/%s_prot/%s_p.pdb' % (name, name, name) #orign_protfile = '%s/%s_prot/%s_p.mol2' % (name, name, name) mol = Protein.from_file(orign_protfile) ##name = os.path.basename(orign_protfile).split('_')[0] mol.remove_all_waters() mol.remove_unknown_atoms() mol.add_hydrogens() with MoleculeWriter('%s/%s_prot/%s_goldp.pdb' % (name, name, name)) as protein_writer: protein_writer.write(mol)
def prepare_protein_for_dock(self): """ :return: """ prot = Protein.from_file(self.input_protein_path) prot.identifier = self.prot_name prot.remove_all_waters() prot.remove_all_metals() prot.add_hydrogens() prot.detect_ligand_bonds() for l in prot.ligands: print(l.identifier) prot.remove_ligand(l.identifier) print('Ligands reminaing {}'.format(len(prot.ligands))) # Save the protein protwr = MoleculeWriter(self.prepared_protein_path) protwr.write(prot)
def cavity_from_protein(prot): """ currently the Protein API doesn't support the generation of cavities directly from the Protein instance this method handles the tedious writing / reading :param `ccdc.protein.Protein` prot: protein :return: `ccdc.cavity.Cavity` """ tfile = join(tempfile.mkdtemp(), "protein.pdb") with MoleculeWriter(tfile) as writer: writer.write(prot) return Cavity.from_pdb_file(tfile)
def write(docker, out_path): results = Docker.Results(docker.settings) # write ligands with MoleculeWriter(os.path.join(out_path, "docked_ligand.mol2")) as w: for d in results.ligands: w.write(d.molecule) # copy ranking file # in this example, this is the only file we use for analysis. However, other output files can be useful. copyfile(os.path.join(junk, "bestranking.lst"), os.path.join(out_path, "bestranking.lst"))
def main(): base = "/local/pcurran/leads_frag" pdbs = [ p for p in os.listdir(base) if os.path.isdir(os.path.join(base, p)) ] for pdb in pdbs: hetid = MoleculeReader(os.path.join( base, pdb, f"{pdb}_ligand.mol2"))[0].identifier mol = ftp_download(pdb, hetid) with MoleculeWriter(os.path.join(base, pdb, f"{pdb}_ref.mol2")) as w: w.write(mol)
def protonate(pdb, hetid, waters, outdir): """ :param pdb: :param outdir: :return: """ print(waters) protoss = Protoss() result = protoss.add_hydrogens(pdb_code=pdb) if not os.path.exists(outdir): os.mkdir(outdir) result.protein.detect_ligand_bonds() mol = [ lig for lig in result.protein.ligands if lig.identifier.split(":")[1][:3] == hetid ][0] with MoleculeWriter(os.path.join(outdir, f"{pdb}_{hetid}.mol2")) as w: w.write(mol) if not waters: result.protein.remove_all_waters() else: for w in result.protein.waters: if int(w.identifier.split(":")[1][3:]) in waters: print("here") else: result.protein.remove_water(w) for l in result.protein.ligands: result.protein.remove_ligand(l.identifier) with MoleculeWriter(os.path.join(outdir, f"{pdb}.pdb")) as w: w.write(result.protein)
def write(self, fname="results.mol2"): """ Writes out docking pose. :param fname: str, path to output file :return: None """ with MoleculeWriter(os.path.join(self.args.path, fname)) as w: try: for l in self.rescored_ligands: w.write(l) except: for l in self.docked_ligands: mol = l.molecule mol.identifier = "{}".format(l.fitness()) w.write(mol)
def make_compound_hitlist_from_df(scored_df, hits_dir, save_dir, savename): top_followups = [] # test_df_par = test_df[test_df['followup_id'].str.contains(par)] for i, row in scored_df.iterrows(): # open the correct scored pose: scored_mols = Path(hits_dir, row['followup_id'].split('_pose')[0], 'concat_ranked_docked_ligands_hs-scored.mol2') pose = int(row['followup_id'].split('_')[-1]) ccdc_lig = MoleculeReader(str(scored_mols))[pose] top_followups.append(ccdc_lig) print(scored_mols.parent, row['followup_smiles']) with MoleculeWriter(str(Path(save_dir, f'{savename}_hits_ranked.sdf'))) as mwr: for l in top_followups: mwr.write(l)
def run(self): # inputs with HotspotReader(self.args.hotspot_path) as reader: hr = [ h for h in reader.read() if h.identifier == self.args.hotspot_identifier ][0] with MoleculeReader(self.args.docked_mols) as reader: out = os.path.join(os.path.dirname(self.args.docked_mols), "results_no_dummy.mol2") with MoleculeWriter(out) as writer: for mol in reader: for atm in mol.atoms: if atm.atomic_symbol == "Unknown": mol.remove_atom(atm) writer.write(mol) self.args.docked_mols = out entires = EntryReader(self.args.docked_mols) # outputs out_dir = os.path.join(os.path.dirname(self.args.docked_mols)) print(out_dir) # process hr = augmentation(hr, entires) # 1) rescore rescored = {e: score(hr, e) for e in entires} ordered_rescored = OrderedDict( sorted(rescored.items(), key=lambda item: item[1], reverse=True)) # 2) deduplicate: retain highest ranked pose only out_dic = deduplicate(ordered_rescored) # 3) output to dataframe ready for ccdc.descriptors API df = pd.DataFrame({ "identifier": [e.identifier for e in out_dic.keys()], "score": list(out_dic.values()), "activity": [activity_tag(e.identifier) for e in out_dic.keys()] }) df.to_csv(os.path.join(out_dir, "rescored.csv")) with EntryWriter(os.path.join(out_dir, "rescored.sdf")) as w: for e in out_dic.keys(): w.write(e)
def run_docking(self): """ Reads in the follow-ups and tries to dock them :return: """ self.get_fragment() self.read_followups() docker = Docker() settings = docker.settings tempd = join(self.hotspot_path, "docking_tmp") # Get out the reference protein: scorer = self.get_scorer_result() hs = scorer.get_hotspot() prot = hs.protein # Change this from DiamondRunner - to save the protein in the results directory with MoleculeWriter(join(self.hotspot_path, "protein.pdb")) as prot_writer: prot_writer.write(prot) settings.add_protein_file(join(self.hotspot_path, "protein.pdb")) settings.binding_site = settings.BindingSiteFromPoint( settings.proteins[0], self.reference_fragment.centre_of_geometry(), 10.0) settings.fitness_function = 'plp' settings.autoscale = 10.0 settings.output_directory = tempd #settings.output_directory = self.in_dir settings.output_file = "docked_ligands.mol2" settings.add_ligand_file(join(self.hotspot_path, "follow_ups.mol2"), ndocks=10) # setup constraints settings.add_constraint( settings.TemplateSimilarityConstraint( type="all", template=self.reference_fragment, weight=150)) results = docker.dock() output_file = os.path.join(settings.output_directory, settings.output_file) docked_molecules = [ m for m in MoleculeReader(os.path.join(tempd, output_file)) ] return docked_molecules
def read_followups(self): """ Reads in the follow-ups from a .csv file :return: list of ccdc.molecule.Molecule instances """ mols = [] with open(self.csv_path, "r") as f: for line in f.readlines(): print(line) if len(line) > 1: mol = self.from_smiles(line) mols.append(mol) with MoleculeWriter(os.path.join(self.hotspot_path, "follow_ups.mol2")) as writer: for ligand in mols: writer.write(ligand)