def create(protein, hr, max_constraints=2, weight=5.0, min_hbond_score=0.001, cutoff=8): """ creates a :class:`hotspots.hs_docking.HotspotHBondConstraint` :param `ccdc.protein.Protein` protein: the protein to be used for docking :param `hotspots.calculation.Result` hr: a result from Fragment Hotspot Maps :param int max_constraints: max number of constraints :param float weight: the constraint weight (default to be determined) :param float min_hbond_score: float between 0.0 (bad) and 1.0 (good) determining the minimum hydrogen bond quality in the solutions. :param cutoff: minimum score required to assign the constraint :return list: list of :class:`hotspots.hs_docking.HotspotHBondConstraint` """ constraints = hr._docking_constraint_atoms( p=protein, max_constraints=max_constraints) with io.MoleculeWriter( "/home/pcurran/github_packages/GOLD/akt1/check.mol2") as w: w.write(constraints.to_molecule()) return [ DockerSettings.HotspotHBondConstraint( atoms=[protein.atoms[i]], weight=weight, min_hbond_score=min_hbond_score) for i, s in constraints.index_by_score.items() if s > cutoff ]
def run_conformers(inputs): mol_file, outdir = inputs fname = os.path.basename(mol_file).split(".")[0] + "_conf.mol2" conf_file = os.path.join(outdir, fname) conf_gen = conformer.ConformerGenerator() conf_gen.settings.max_conformers = 25 with io.MoleculeReader(mol_file) as reader, io.MoleculeWriter( conf_file) as writer: for m in reader: confs = conf_gen.generate(m) # If conformer generation fails, ConformerGenerator returns None rather # than []. Trying to iterate over this will crash the script, so we skip # further steps for the structure at this point. if confs is None: print( 'WARNING: Conformer generation failed for structure %s in %s!' % (m.identifier, mol_file)) continue for i, c in enumerate(confs): m = c.molecule # DUD-E includes multiple protonation and tautomeric states. For the database creation # these must be given unique ids. In the rank analysis only the highest ranked state # will count. # ID_{file number}_{molecule number}_{conformer number} --> ensures unique ID m.identifier = f"{m.identifier}_{i}" writer.write(m)
def get_mol_files_by_indentifiers(self, list_identifiers, path_save_mol_dir, list_solvent_names=None): """保存*.mol2到指定文件夹中 :param list_identifiers: 晶体的id,type:list or tuple or array :param path_save_mol_dir: 保存晶体mol2文件的文件夹绝对路径,type:string :param list_solvent_names: 溶剂的名称,type:list or None :return: None """ # 重新定义self.entry_reader self.entry_reader = [ self.entry_reader.entry(id_name) for id_name in list_identifiers ] # 删除溶剂 self.delete_solvents(list_solvent_names=list_solvent_names ) # 去除溶剂,并且重新生成去除溶剂过后的self.entry_reader # 判断储存mol文件的路径是否存在,若不存在,则创建 if not os.path.exists(path_save_mol_dir): os.makedirs(path_save_mol_dir) # 保存mol文件到指定文件夹 p_bar = tqdm(self.entry_reader) for entry in p_bar: mol_file = entry.molecule os.chdir(path_save_mol_dir) with io.MoleculeWriter('%s.mol2' % (entry.identifier)) as writer: writer.write(mol_file) p_bar.set_description('正在保存文件:') return None
def align(inputs): """""" try: ref_pdb, ref_mol, other_pdb, input_dir = inputs ref = Protein.from_file(os.path.join(input_dir, f"{ref_pdb}.pdb")) other_path = os.path.join(input_dir, f"{other_pdb}.pdb") other = Protein.from_file(other_path) if ref_mol: ref_mol_obj = [ lig for lig in ref.ligands if lig.identifier.split(":")[1] == ref_mol ][0] ref_bind_site = Protein.BindingSiteFromMolecule( protein=ref, molecule=ref_mol_obj, distance=12) else: ref_bind_site = None chain_superposition = Protein.ChainSuperposition() # other chains already striped rms, X = chain_superposition.superpose(ref.chains[0], other.chains[0], binding_site1=ref_bind_site) with io.MoleculeWriter(other_path) as w: w.write(other) return rms except: return 999
def run(self): prot = Protein.from_file(self.input().path) prot.detect_ligand_bonds() prot.add_hydrogens() with io.MoleculeWriter(self.output().path) as w: for l in prot.ligands: if 'HEM' not in l.identifier: w.write(l)
def _write_protein(path, prot): """ writes protein to output directory :param prot: a protein :type prot: `ccdc.protein.Protein` """ with io.MoleculeWriter(join(path, "protein.pdb")) as writer: writer.write(prot)
def _protein_preparation(self): ''' Remove water, ligands, metals and write out protein ready for call''' self.prot.remove_all_waters() for lig in self.prot.ligands: self.prot.remove_ligand(lig.identifier) self.prot.remove_all_metals() with io.MoleculeWriter(self.fname) as w: w.write(self.prot)
def _output_feature_centroid(self): dic = {"apolar": "C", "acceptor": "N", "donor": "O"} mol = Molecule(identifier="centroids") for i, feat in enumerate(self.features): coords = feat.grid.centroid() mol.add_atom( Atom(atomic_symbol=dic[feat.feature_type], atomic_number=14, coordinates=coords, label=str(i))) from ccdc import io with io.MoleculeWriter("cenroid.mol2") as w: w.write(mol)
def _write_protein(self, prot, out_dir=None): """ writes protein to output directory :param prot: :return: """ if not out_dir: out = join(self.out_dir, "protein.pdb") else: out = join(out_dir, "protein.pdb") with io.MoleculeWriter(out) as writer: writer.write(prot)
def testalign(self): pdbs = ["2VTA", "1HCL"] entities = [0, 0] download(pdbs, out_dir=self.tmp) for p, e in zip(pdbs, entities): prepare_protein(p, e, self.tmp) args = ["2VTA", 'LZ11301', "1HCL", self.tmp] other, rms = align(args) with io.MoleculeWriter( os.path.join(self.tmp, f"aligned_{other.identifier}.pdb")) as w: w.write(other)
def protoss_download(args): """""" try: pdb, out_dir = args protoss = Protoss() result = protoss.add_hydrogens(pdb_code=pdb) out_path = os.path.join(out_dir, f"{pdb}.pdb") with io.MoleculeWriter(out_path) as w: w.write(result.protein) return out_path except: print(f"ERROR {pdb}")
def generate_fitting_points(self, hr, volume=400, threshold=17, mode='threshold'): """ uses the Fragment Hotspot Maps to generate GOLD fitting points. GOLD fitting points are used to help place the molecules into the protein cavity. Pre-generating these fitting points using the Fragment Hotspot Maps helps to biast results towards making Hotspot interactions. :param `hotspots.result.Result` hr: a Fragment Hotspot Maps result :param int volume: volume of the occupied by fitting points in Angstroms ^ 3 :param float threshold: points above this value will be included in the fitting points :param str mode: 'threshold'- assigns fitting points based on a score cutoff or 'bcv'- assigns fitting points from best continuous volume analysis (recommended) """ temp = tempfile.mkdtemp() mol = molecule.Molecule(identifier="fitting_pts") if mode == 'threshold': dic = hr.super_grids["apolar"].grid_value_by_coordinates( threshold=threshold) elif mode == 'bcv': extractor = Extractor(hr=hr, volume=volume, mode="global") bv = extractor.extracted_hotspots[0].best_island dic = bv.grid_value_by_coordinates(threshold=17) else: raise TypeError( "{} not supported, see documentation for details".format(mode)) for score, v in dic.items(): for pts in v: atm = molecule.Atom(atomic_symbol='C', atomic_number=14, label='{:.2f}'.format(score), coordinates=pts) atm.partial_charge = score mol.add_atom(atom=atm) fname = os.path.join(temp, 'fit_pts.mol2') with io.MoleculeWriter(fname) as w: w.write(mol) self.fitting_points_file = fname
def _write_pharmacophore(self, pharmacophore): """ writes pharmacophore to output directory :param pharmacophore: :return: """ out = [ join(self.out_dir, "pharmacophore" + fmat) for fmat in self.settings.pharmacophore_format ] for o in out: pharmacophore.write(o) label = self.get_label(pharmacophore) with io.MoleculeWriter(join(self.out_dir, "label_threshold_{}.mol2".format(pharmacophore.identifier))) \ as writer: writer.write(label)
def generate_library(self): print "Decorate fragment..." m = Chem.MolFromMol2File(self.fragment) #inputs (place saver) mol = Chem.MolToSmiles(m) terminal = Chem.SDMolSupplier("r1-20.sdf") spacer = Chem.SDMolSupplier("r2-20.sdf") vl = self.supplier(mol, terminal, spacer) mols = [self.from_smiles(v) for v in vl] with io.MoleculeWriter("decorated_fragments.mol2") as w: for m in mols: w.write(m) return mols
def extract_ligands(self, other_id, lig_id): """ extracts the relevant ligand(s) from the aligned PDB to a mol2 file :param str other_id: position in list of 'other' proteins :return: """ # inputs other = Protein.from_file(self.other_pdbs[other_id]) # tasks other.detect_ligand_bonds() print([a.identifier for a in other.ligands]) print(other_id, lig_id) relevant = [l for l in other.ligands if lig_id in l.identifier.split(":")[1][0:3]] # output with io.MoleculeWriter(self.extracted_ligands[other_id][lig_id]) as writer: writer.write(relevant[0]) # if more than one ligand detected, output the first
def run_superstar(self, prot, out_dir): """ calls SuperStar as command-line subprocess :param prot: a :class:`ccdc.protein.Protein` instance :param out_dir: str, output directory :return: """ with utilities.PushDir(self.settings.working_directory): if prot is not None: with io.MoleculeWriter('protein.pdb') as writer: writer.write(prot) self._get_inputs(out_dir) env = os.environ.copy() env.update(self.settings._superstar_env) cmd = self.settings._superstar_executable + ' ' + self.fname subprocess.call(cmd, shell=sys.platform != 'win32', env=env) return SuperstarResult(self.settings)
def run(self, protein, grid_spacing=0.5): """ executes the command line call NB: different versions of ghecom have different commandline args :param protein: protein :param grid_spacing: grid spacing, must be the same used in hotspot calculation :type protein: `ccdc.protein.Protein` :type grid_spacing: float """ with PushDir(self.temp): with io.MoleculeWriter('protein.pdb') as writer: writer.write(protein) cmd = f"{os.environ['GHECOM_EXE']} -ipdb {self.input} -M M -gw {grid_spacing} -rli 2.5 -rlx 9.5 -opocpdb {self.output}" os.system(cmd) return os.path.join(self.temp, self.output)
def _prep_protein(self): """ removes no structural ligands and solvents from the protein. Hotspot method requires the cavitiy to be empty :return: None """ # input prot = Protein.from_file(self.apo_protein) # task prot.remove_all_waters() prot.detect_ligand_bonds() for l in prot.ligands: if 'HEM' not in l.identifier: prot.remove_ligand(l.identifier) # output with io.MoleculeWriter(self.apo_prep) as w: w.write(prot)
def prepare_protein(pdb, entity, out_dir): """""" # protein prep prot_file = os.path.join(out_dir, f"{pdb}.pdb") prot = Protein.from_file(prot_file) prot.remove_all_waters() prot.remove_all_metals() prot.detect_ligand_bonds() discard_chains = {c.identifier for c in prot.chains} - {prot.chains[entity].identifier} for chain_id in discard_chains: prot.remove_chain(chain_id) for ligand in prot.ligands: if ligand.identifier.split(":")[0] in discard_chains: prot.remove_ligand(ligand.identifier) # overwrite protein with io.MoleculeWriter(prot_file) as w: w.write(prot)
def chunk_files(mol_file, outdir, chunk_size=100): outfiles = list() if not os.path.exists(outdir): os.mkdir(outdir) mols = [m for m in io.MoleculeReader(mol_file)] chunks = [mols[x:x + chunk_size] for x in range(0, len(mols), chunk_size)] for i, chunk in enumerate(chunks): fname = f"{os.path.basename(mol_file).split('.')[0]}_chunk{i}.mol2" outfile = os.path.join(outdir, fname) outfiles.append(outfile) with io.MoleculeWriter(outfile) as w: for j, mol in enumerate(chunk): mol.identifier = f"{mol.identifier}_{i}_{j}" w.write(mol) return outfiles
def _write_grids(self, grid_dict, buriedness=None, mesh=None, out_dir=None): """ writes grids to output directory :param grid_dict: :param buriedness: :return: """ for p, g in grid_dict.items(): fname = "{}{}".format(p, self.settings.grid_extension) if not out_dir: g.write(join(self.out_dir, fname)) else: g.write(join(out_dir, fname)) if buriedness: buriedness.write( join(self.out_dir, "buriedness{}".format(self.settings.grid_extension))) if mesh: mesh.write( join(self.out_dir, "mesh{}".format(self.settings.grid_extension))) if self.settings.grid_labels: labels = { "label_threshold_{}.mol2".format(threshold): self.get_label(grid_dict, threshold=threshold) for threshold in self.settings.isosurface_threshold } for fname, label in labels.items(): with io.MoleculeWriter(join(self.out_dir, fname)) as writer: writer.write(label)
csd_and_updates_reader = io.EntryReader(csd_and_updates) #for i in range(1000): # out_q.put(["srandom text"]) exceptions = [] i = 0 for mol in csd_entry_reader.molecules(): start = time.time() try: ccdc_id = mol.identifier mol = mol.heaviest_component mol_smiles = mol.smiles assert len(mol_smiles) < 200 outfile = ccdc_id + ".pdb" out_path = os.path.join(db_root,pdb_folder,outfile) assert not mol.is_polymeric assert mol.is_organic with io.MoleculeWriter(out_path) as filehandle: filehandle.write(mol) filehandle.close() out_q.put([ccdc_id,outfile,mol_smiles]) except Exception as e: exceptions.append(e) print "exps:", "%.3f" % (1 / (time.time() - start)), "total:", i, "exceptions:", len(exceptions) i += 1 stop_event.set() print "closing table with queue"
from ccdc.protein import Protein from ccdc import io p0 = Protein.from_file(("1xkk.pdb")) p1 = p0.copy() p1.remove_all_waters() with io.MoleculeWriter("1xkk_dry.pdb") as w: w.write(p1) with io.MoleculeWriter("1xkk_dry.mol2") as w: w.write(p1) p2 = Protein.from_file("1xkk_dry.pdb") p3 = Protein.from_file("1xkk_dry.mol2") for p, l in zip([p0, p2, p3], ["original", "mol2writer", "pdbwriter"]): print(l) print(f"# atoms: {len(p.atoms)}") print(f"# residues: {len(p.residues)}, # waters: {len(p.waters)}") print(p.residues[30:50]) print(" ")
#!/usr/bin/env python from ccdc import io from ccdc.search import SubstructureSearch, MoleculeSubstructure, SMARTSSubstructure # main if __name__ == "__main__": strings = [ "NCC(=O)NCC(=O)", "N(C)CC(=O)NCC(=O)", "NCC(=O)N(C)CC(=O)", "N(C)CC(=O)N(C)CC(=O)" ] for pepsmile in strings: pep = SMARTSSubstructure(pepsmile) substructure_search_smiles = SubstructureSearch() sub_id = substructure_search_smiles.add_substructure(pep) print("Searching...") hits_smiles = substructure_search_smiles.search( max_hits_per_structure=1) print(len(hits_smiles), "hits found") for hit in hits_smiles: #print (hit.identifier) with io.MoleculeWriter("from_CSD/nmeth/" + hit.identifier + ".pdb") as pdb_writer: pdb_writer.write(hit.molecule)
conformers_dir = '%s-conformers' % mol_name if not os.path.isdir(conformers_dir): os.makedirs(conformers_dir) else: print('No conformers found!') args.nominimise = True # Minimise conformers and save --------------------------------------------------------------------- if args.nominimise: print('Skiping minimisation...') if conformers is not None: for conf_idx, conf in enumerate(conformers): conf_path = os.path.join( conformers_dir, '%s-%i.%s' % (mol_name, conf_idx + 1, args.format)) with io.MoleculeWriter(conf_path) as molecule_writer: molecule_writer.write(conf.molecule) conformers_mol = [c.molecule for c in conformers] print('Conformers saved in %s | format: %s\n' % (conformers_dir, args.format)) else: conformers_mol = [mol] else: # Run minimisation print('Minimising molecular geometry using Tripos force field...') molecule_minimiser = conformer.MoleculeMinimiser( ) # Uses Tripos force field min_conformers = [] log_data = {} for conf_idx, conf in enumerate(conformers): score, rmsd = conf.normalised_score, conf.rmsd(
def from_pdb(pdb_code, chain, out_dir=None, representatives=None, identifier="LigandBasedPharmacophore"): """ creates a Pharmacophore Model from a PDB code. This method is used for the creation of Ligand-Based pharmacophores. The PDB is searched for protein-ligand complexes of the same UniProt code as the input. These PDB's are align, the ligands are clustered and density of atom types a given point is assigned to a grid. :param str pdb_code: single PDB code from the target system :param str chain: chain of interest :param str out_dir: path to output directory :param representatives: path to .dat file containing previously clustered data (time saver) :param str identifier: identifier for the Pharmacophore Model :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel` >>> from hotspots.hs_pharmacophore import PharmacophoreModel >>> from hotspots.result import Results >>> from hotspots.hs_io import HotspotWriter >>> from ccdc.protein import Protein >>> from pdb_python_api import PDBResult >>> # get the PDB ligand-based Pharmacophore for CDK2 >>> model = PharmacophoreModel.from_pdb("1hcl") >>> # the models grid data is stored as PharmacophoreModel.dic >>> # download the PDB file and create a Results >>> PDBResult("1hcl").download(<output_directory>) >>> result = Result(protein=Protein.from_file("<output_directory>/1hcl.pdb"), super_grids=model.dic) >>> with HotspotWriter("<output_directory>") as w: >>> w.write(result) """ temp = tempfile.mkdtemp() ref = PDBResult(pdb_code) ref.download(out_dir=temp, compressed=False) if representatives: print("Reading representative PDB codes ...") reps = [] f = open(representatives, "r") entries = f.read().splitlines() for entry in entries: pdb_code, hetid, smiles = entry.split(",") reps.append((pdb_code, hetid)) else: accession_id = PDBResult( pdb_code).protein.sub_units[0].accession_id results = PharmacophoreModel._run_query(accession_id) ligands = PharmacophoreModel._get_ligands(results) top = os.path.dirname(os.path.dirname(out_dir)) PharmacophoreModel._to_file(ligands, top, fname="all_ligands.dat") k = int(round(len(ligands) / 5)) if k < 2: k = 2 cluster_dict, s = PharmacophoreModel._cluster_ligands( n=k, ligands=ligands) reps = [l[0] for l in cluster_dict.values() if len(l) != 0] targets = [] for rep in reps: try: r = PDBResult(identifier=rep.structure_id) r.clustered_ligand = rep.chemical_id except: try: r = PDBResult(identifier=rep[0]) r.clustered_ligand = rep[1] except: raise AttributeError r.download(out_dir=temp, compressed=False) targets.append(r) prots, ligands = PharmacophoreModel._align_proteins( reference=ref, reference_chain=chain, targets=targets) try: with open(os.path.join(top, "silhouette.dat"), "w") as sfile: sfile.write("{}".format(s)) PharmacophoreModel._to_file(reps, top, fname="representatives.dat") except: pass if out_dir: with io.MoleculeWriter(os.path.join(out_dir, "aligned_mols.mol2")) as w: for l in ligands: w.write(l) return PharmacophoreModel.from_ligands(ligands=ligands, identifier=identifier)
def from_ligands(ligands, identifier, protein=None, settings=None): """ creates a Pharmacophore Model from a collection of overlaid ligands :param `ccdc,molecule.Molecule` ligands: ligands from which the Model is created :param str identifier: identifier for the Pharmacophore Model :param `ccdc.protein.Protein` protein: target system that the model has been created for :param `hotspots.hs_pharmacophore.PharmacophoreModel.Settings` settings: Pharmacophore Model settings :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel` >>> from ccdc.io import MoleculeReader >>> from hotspots.hs_pharmacophore import PharmacophoreModel >>> mols = MoleculeReader("ligand_overlay_model.mol2") >>> model = PharmacophoreModel.from_ligands(mols, "ligand_overlay_pharmacophore") >>> # write to .json and search in pharmit >>> model.write("model.json") """ cm_dic = crossminer_features() blank_grd = Grid.initalise_grid( [a.coordinates for l in ligands for a in l.atoms]) feature_dic = { "apolar": blank_grd.copy(), "acceptor": blank_grd.copy(), "donor": blank_grd.copy() } if not settings: settings = PharmacophoreModel.Settings() if isinstance(ligands[0], Molecule): temp = tempfile.mkdtemp() with io.MoleculeWriter(join(temp, "ligs.mol2")) as w: for l in ligands: w.write(l) ligands = list(io.CrystalReader(join(temp, "ligs.mol2"))) try: Pharmacophore.read_feature_definitions() except: raise ImportError("Crossminer is only available to CSD-Discovery") feature_definitions = [ fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier != 'exit_vector' and fd.identifier != 'heavy_atom' and fd.identifier != 'hydrophobe' and fd.identifier != 'fluorine' and fd.identifier != 'bromine' and fd.identifier != 'chlorine' and fd.identifier != 'iodine' and fd.identifier != 'halogen' ] for fd in feature_definitions: detected = [fd.detect_features(ligand) for ligand in ligands] all_feats = [f for l in detected for f in l] if not all_feats: continue for f in all_feats: feature_dic[cm_dic[fd.identifier]].set_sphere( f.spheres[0].centre, f.spheres[0].radius, 1) features = [] for feat, feature_grd in feature_dic.items(): peaks = feature_grd.get_peaks(min_distance=4, cutoff=1) for p in peaks: coords = Coordinates(p[0], p[1], p[2]) projected_coordinates = None if feat == "donor" or feat == "acceptor": if protein: projected_coordinates = _PharmacophoreFeature.get_projected_coordinates( feat, coords, protein, settings) features.append( _PharmacophoreFeature( projected=None, feature_type=feat, feature_coordinates=coords, projected_coordinates=projected_coordinates, score_value=feature_grd.value_at_coordinate( coords, position=False), vector=None, settings=settings)) return PharmacophoreModel(settings, identifier=identifier, features=features, protein=protein, dic=feature_dic)
def write(self, fname): """ writes out pharmacophore. Supported formats: - ".cm" (*CrossMiner*), - ".json" (`Pharmit <http://pharmit.csb.pitt.edu/search.html/>`_), - ".py" (*PyMOL*), - ".csv", - ".mol2" :param str fname: path to output file """ extension = splitext(fname)[1] if extension == ".cm": print "WARNING! Charged features not currently supported in CrossMiner!" pharmacophore = self._get_crossminer_pharmacophore() pharmacophore.write(fname) elif extension == ".csv": with open(fname, "wb") as csv_file: csv_writer = csv.writer(csv_file, delimiter=",") line = 'Identifier, Feature_type, x, y, z, score, ' \ 'projected_x, projected_y, projected_z, ' \ 'vector_x, vector_y, vector_z' for feature in self._features: line += "{0},{1},{2},{3},{4},{5}".format( self.identifier, feature.feature_type, feature.feature_coordinates.x, feature.feature_coordinates.y, feature.feature_coordinates.z, feature.score_value) if feature.projected_coordinates: line += ",{0},{1},{2}".format( feature.projected_coordinates.x, feature.projected_coordinates.y, feature.projected_coordinates.z) else: line += ",0,0,0" if feature.vector: line += ",{0},{1},{2}".format(feature.vector.x, feature.vector.y, feature.vector.z) else: line += ",0,0,0" l = line.split(",") csv_writer.writerow(l) elif extension == ".py": with open(fname, "wb") as pymol_file: lfile = "label_threshold_{}.mol2".format(self.identifier) pymol_out = pymol_imports() pymol_out += pymol_arrow() lines = self._get_pymol_pharmacophore(lfile) pymol_out += lines pymol_file.write(pymol_out) label = self.get_label(self) with io.MoleculeWriter(join(dirname(fname), lfile)) as writer: writer.write(label) elif extension == ".json": with open(fname, "w") as pharmit_file: pts = [] interaction_dic = { 'apolar': 'Hydrophobic', 'donor': 'HydrogenDonor', 'acceptor': 'HydrogenAcceptor', 'negative': 'NegativeIon', 'positive': 'PositiveIon' } for feat in self._features: if feat.vector: point = { "name": interaction_dic[feat.feature_type], "hasvec": True, "x": feat.feature_coordinates.x, "y": feat.feature_coordinates.y, "z": feat.feature_coordinates.z, "radius": feat.settings.radius, "enabled": True, "vector_on": feat.settings.vector_on, "svector": { "x": feat.vector.x, "y": feat.vector.y, "z": feat.vector.z }, "minsize": "", "maxsize": "", "selected": False } else: point = { "name": interaction_dic[feat.feature_type], "hasvec": False, "x": feat.feature_coordinates.x, "y": feat.feature_coordinates.y, "z": feat.feature_coordinates.z, "radius": feat.settings.radius, "enabled": True, "vector_on": feat.settings.vector_on, "svector": { "x": 0, "y": 0, "z": 0 }, "minsize": "", "maxsize": "", "selected": False } pts.append(point) pharmit_file.write(json.dumps({"points": pts})) elif extension == ".mol2": mol = Molecule(identifier="pharmacophore_model") atom_dic = { "apolar": 'C', "donor": 'N', "acceptor": 'O', "negative": 'S', "positve": 'H' } pseudo_atms = [ Atom(atomic_symbol=atom_dic[feat.feature_type], atomic_number=14, coordinates=feat.feature_coordinates, label=str(feat.score_value)) for feat in self.features ] for a in pseudo_atms: mol.add_atom(a) with io.MoleculeWriter(fname) as w: w.write(mol) elif extension == ".grd": g = self._as_grid() g.write(fname) else: raise TypeError( """""{}" output file type is not currently supported.""". format(extension))
print target for pdb in pdbs: chain = chains[pdb] ligand_id = ligands[pdb] out_dir = os.path.join(base, target, pdb, "reference") if not os.path.exists(out_dir): os.mkdir(out_dir) try: p = PharmacophoreModel._from_siena(pdb, ligand_id, mode, target, out_dir=out_dir) p.write(os.path.join(out_dir, "reference_pharmacophore.py")) prot = hs_io.HotspotReader( os.path.join(base, target, pdb, "out.zip")).read().protein hs = Results(protein=prot, super_grids=p.dic) with hs_io.HotspotWriter(out_dir) as wf: wf.write(hs) with io.MoleculeWriter(os.path.join(out_dir, "aligned.mol2")) as w: for l in p.representatives: w.write(l) except RuntimeError: print "skipped {}".format(target)
def run(self): if not os.path.exists(self.args.output_directory): os.mkdir(self.args.output_directory) rms_cutoff = 1 tmp = tempfile.mkdtemp() print("Searching the PDB") pdbs, entities = pdb_search(self.args.uniprot, self.args.max_resolution) download(pdbs, out_dir=tmp, processes=self.args.processes) if self.args.ref_pdb not in pdbs: print("Downloading reference data") download([self.args.ref_pdb], out_dir=tmp, processes=1) # chain to entity ref_prot = Protein.from_file( os.path.join(tmp, f"{self.args.ref_pdb}.pdb")) entity = [c.identifier for c in ref_prot.chains].index(self.args.ref_chain) prepare_protein(pdb=self.args.ref_pdb, entity=entity, out_dir=tmp) print("Prepare protein") for pdb, entity in tqdm(zip(pdbs, entities), total=len(pdbs)): # other chains removed, max number chains = 1 try: # list(filter(None.__ne__, L)) prepare_protein(pdb=pdb, entity=entity, out_dir=tmp) except: print("ERROR", pdb, entity) pdbs.remove(pdb) entities.remove(entity) print("Aligning PDBs") # align args = zip([self.args.ref_pdb] * len(pdbs), [self.args.binding_site_ligand] * len(pdbs), pdbs, [tmp] * len(pdbs)) with Pool(processes=self.args.processes) as pool: rms_list = list(tqdm(pool.imap(align, args), total=len(pdbs))) print("Extracting Ligands...") all_ligands = [] # add the reference structure back to PDB list for pdb, rms in zip(pdbs + [self.args.ref_pdb], rms_list + [0]): if rms < rms_cutoff: all_ligands.extend(ligands_from_pdb(pdb, tmp)) print(f" {len(all_ligands)} detected") if self.args.binding_site_ligand: ref_prot = Protein.from_file( os.path.join(tmp, f"{self.args.ref_pdb}.pdb")) ref_mol = [ lig for lig in ref_prot.ligands if lig.identifier == f"{self.args.ref_chain}:{self.args.binding_site_ligand}" ][0] all_ligands = bs_filter(all_ligands, ref_mol) print( f" Binding site filter, {len(all_ligands)} ligands remaining" ) if self.args.no_solvents: all_ligands = remove_solvents(all_ligands) print( f" Removed solvents and metals, {len(all_ligands)} ligands remaining" ) if self.args.cluster: all_ligands = cluster_ligands(all_ligands) print( f" Clustered ligands, {len(all_ligands)} ligands remaining") pdbs, chains, hetids, resnum = zip(*[[ lig.identifier.split(":")[0], lig.identifier.split(":")[1], lig.identifier.split(":")[2][:3], lig.identifier.split(":")[2][3:] ] for lig in all_ligands]) df = pd.DataFrame({ "pdbs": pdbs, "chains": chains, "hetids": hetids, "resnum": resnum }) df.to_csv( os.path.join(self.args.output_directory, "ligand_overlay.csv")) with io.MoleculeWriter( os.path.join(self.args.output_directory, "ligand_overlay.mol2")) as w: for lig in all_ligands: # lig.add_hydrogens() w.write(lig)