def create_feature_db(mol_files, outdir, dbname="test"): # because CM is installed in a non-standard location f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())), "CSD_CrossMiner/feature_definitions") Pharmacophore.read_feature_definitions(f_defs) sdbs = [] for mol_file in mol_files: # DatabaseInfo is a named tupled (file name, num_strucs, colour) mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo( mol_file, 0, Colour(0, 255, 0, 255)) # Create structure databases mol_sqlx = os.path.join( outdir, os.path.basename(mol_file).replace('.mol2', '.csdsqlx')) if not os.path.exists(outdir): os.mkdir(outdir) mol_sdb = Pharmacophore.FeatureDatabase.Creator.StructureDatabase( mol_struc, use_crystal_symmetry=False, structure_database_path=mol_sqlx) sdbs.append(mol_sdb) # Create Feature database settings = Pharmacophore.FeatureDatabase.Creator.Settings( feature_definition_directory=f_defs, n_threads=6) creator = Pharmacophore.FeatureDatabase.Creator(settings=settings) db = creator.create(sdbs) db.write(os.path.join(outdir, f"{dbname}.feat"))
def __init__(self, **kw): settings = kw.get('settings') if settings is None: settings = self.Settings() self.settings = settings main_dir = os.environ.get('MAINDIR') if main_dir: if sys.platform == 'win32': self.settings._superstar_executable = 'superstar_app.exe' else: self.settings._superstar_executable = ' '.join([ os.path.join(os.environ['MAINDIR'], 'run.sh'), 'superstar_app.x' ]) self.settings._superstar_env = dict() else: if sys.platform == 'win32': base = os.path.dirname(io.csd_directory()) merc = glob.glob(os.path.join(base, 'mercury*')) if len(merc): merc = merc[0] self.settings._superstar_executable = os.path.join( merc, 'superstar_app.exe') elif sys.platform == 'darwin': self.settings._superstar_executable = os.path.join( os.path.dirname(io.csd_directory()), 'mercury.app', 'Contents', 'MacOS', 'superstar') else: self.settings._superstar_executable = os.path.join( os.path.dirname(io.csd_directory()), 'bin', 'superstar') self.settings._superstar_env = dict( SUPERSTAR_ISODIR=str( os.path.join(os.path.dirname(io.csd_directory()), 'isostar_files', 'istr')), SUPERSTAR_ROOT=str( os.path.join(os.path.dirname(io.csd_directory()), "Mercury"))) self.settings.working_directory = utilities._test_output_dir()
def _set_environment_variables(): """ private method sets up superstar environment variables :return: superstar executable (str), superstar env(str) """ base = csd_directory() main_dir = environ.get('MAINDIR') if main_dir: if sys.platform == 'win32': superstar_executable = 'superstar_app.exe' else: superstar_executable = ' '.join([ join(environ['MAINDIR'], 'run.sh'), 'superstar_app.x' ]) superstar_env = dict() else: if sys.platform == 'win32': base = dirname(base) merc = glob.glob(join(base, 'mercury*')) if type(merc) is list: try: merc = merc[0] except IndexError: raise IndexError( "No mercury path found, check API version") superstar_executable = join(merc, 'superstar_app.exe') if not isfile(superstar_executable): superstar_executable = join(merc, 'superstar.exe') if not isfile(superstar_executable): raise IOError("superstar executable not found") superstar_env = dict(SUPERSTAR_ISODIR=str( join(base, 'isostar_files', 'istr')), SUPERSTAR_ROOT=str( join(base, "Mercury"))) elif sys.platform == 'darwin': print("OS X not supported") else: base = dirname(base) superstar_executable = join(base, 'bin', 'superstar') superstar_env = dict(SUPERSTAR_ISODIR=str( join(base, 'isostar_files', 'istr')), SUPERSTAR_ROOT=str(base)) return superstar_executable, superstar_env
def setUp(self): self.parent_dir = "testdata/pharmacophore_extension/PharmacophoreModel" self.fnames = [ "1dmt_ligand.cm", "1r1h_ligand.cm", "1r1j_ligand.cm", "1y8j_ligand.cm" ] self.pharmacophores = [ PharmacophoreModel.from_file(os.path.join(self.parent_dir, f)) for f in self.fnames ] self.cm_dir = os.path.dirname(os.path.dirname(io.csd_directory())) Pharmacophore.read_feature_definitions( os.path.join(self.cm_dir, "CSD_CrossMiner/feature_definitions"))
def __init__(self): super().__init__() self.cm_dir = os.path.dirname(os.path.dirname(csd_directory())) Pharmacophore.read_feature_definitions(directory=os.path.join( self.cm_dir, "CSD_CrossMiner/feature_definitions")) self.__feature_options = { k: v for k, v in Pharmacophore.feature_definitions.items() } assert len(self.__feature_options) > 1 self.__feature_definitions = self.__feature_options self.tmp = tempfile.mkdtemp() self.__identifier = None self.__ligands = None self.__protein = None self.__detected_features = None self.__feature_point_grids = None
def __init__(self, features=None, _motif_pharmacophore=None): super().__init__(features=features, _motif_pharmacophore=_motif_pharmacophore) self.cm_dir = os.path.dirname(os.path.dirname(csd_directory())) feat_db = os.environ.get( "CCDC_CROSSMINER_FEATURE_DEFINITIONS", os.path.join(self.cm_dir, "../CSD_CrossMiner/feature_definitions")) Pharmacophore.read_feature_definitions(directory=feat_db) self.__feature_options = { k: v for k, v in Pharmacophore.feature_definitions.items() } assert len(self.__feature_options) > 1 self.__feature_definitions = self.__feature_options self.tmp = tempfile.mkdtemp() self.__identifier = None self.__ligands = None self.__protein = None self.__detected_features = None self.__feature_point_grids = None
from ccdc.pharmacophore import Pharmacophore from ccdc import io import os from shutil import copyfile from ccdc.utilities import Colour, Timer if __name__ == "__main__": outdir = "/home/pcurran/github_packages/pharmacophores/testdata/search/feat_db" f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())), "CSD_CrossMiner/feature_definitions") Pharmacophore.read_feature_definitions(f_defs) base = "/local/pcurran/patel/CDK2/screening_files/conformers" mol_files = [ os.path.join(base, f) for f in ["actives_final_chunk0_conf.mol2", "decoys_final_chunk0_conf.mol2"] ] sdbs = [] for mol_file in mol_files: # DatabaseInfo is a named tupled (file name, num_strucs, colour) mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo( mol_file, 0, Colour(0, 255, 0, 255)) # Create structure databases mol_sqlx = os.path.join( outdir, os.path.basename(mol_file).replace('.mol2', '.csdsqlx')) if not os.path.exists(outdir): os.mkdir(outdir)
def search(self, queryTargetId, queryTargetPath, resultPath, normalizeFlag=True, maxHits=50, searchType="similarity", suppressMetals=False): """Search the CCDC database for similar or substructure matches for the input query molecule. Args: queryTargetId (str): query identifier queryTargetPath (str): path to the query molfile (mol, sdf, mol2) resultPath (str): output path to match results normalizeFlag (bool, optional): do standard perceptions on matching molecules. Defaults to True. maxHits (int, optional): maximum number of matches to return. Defaults to 50. searchType (str, optional): search mode (substructure, similarity). Defaults to "similarity". suppressMetals (bool, optional): filter structures containing metals. Defaults to False. Returns: (int): number of matches """ mU = MarshalUtil() logger.info("Start search for target %s path %s result path %s", queryTargetId, queryTargetPath, resultPath) # summaryList = [] # targetDirPath = os.path.dirname(queryTargetPath) cifTargetPath = os.path.join(targetDirPath, queryTargetId + ".cif") # targetStructures = EntryReader(queryTargetPath) dirPath = os.path.join(resultPath, queryTargetId) numHits = 0 for ii, e in enumerate(targetStructures, 1): numHits = 0 startTime = time.time() targetMol = e.molecule if normalizeFlag: targetMol.assign_bond_types(which="unknown") targetMol.standardise_aromatic_bonds() targetMol.standardise_delocalised_bonds() # logger.info("(%d) begin %s search - query id %s", ii, searchType, queryTargetId) if searchType == "similarity": hits = self.__similaritySearch(targetMol, suppressMetals=suppressMetals) elif searchType == "substructure": hits = self.__moleculeSubstructureSearch( targetMol, suppressMetals=suppressMetals) else: hits = [] logger.info("(%d) completed search query id %s in %.3f seconds", ii, queryTargetId, time.time() - startTime) if hits: numHits += len(hits) logger.info("(%d) search for %s matched %d: %r", ii, queryTargetId, numHits, [targetHit.identifier for targetHit in hits]) # for targetHit in hits[:maxHits]: # hI = CcdcMatchIndexInst() hI.setCsdVersion(csd_version()) hI.setCsdDirectory(csd_directory()) hI.setTargetId(queryTargetId) hI.setTargetPath(queryTargetPath) if mU.exists(cifTargetPath): hI.setTargetCcPath(cifTargetPath) hI.setIdentifier(targetHit.identifier) hI.setMatchType(searchType) try: hI.setRFactor(targetHit.entry.r_factor) hI.setChemicalName(targetHit.entry.chemical_name) hI.setTemperature(targetHit.entry.temperature) hI.setRadiationSource(targetHit.entry.radiation_source) hI.setHasDisorder("N") cit = targetHit.entry.publication if cit.doi is not None: hI.setCitationDOI(cit.doi) if searchType == "similarity": hI.setSimilarityScore(targetHit.similarity) elif searchType == "substructure": hI.setMatchedAtomLength( len(targetHit.match_atoms())) except Exception as e: logger.exception("Failing with %s", str(e)) # # mU.mkdir(dirPath) mol2L = [] if searchType == "substructure": for jj, mc in enumerate(targetHit.match_components(), 1): fp = os.path.join( dirPath, queryTargetId + "_" + targetHit.identifier + "_%03d" % jj + ".mol2") mol2L.append(fp) with MoleculeWriter(fp) as ofh: ofh.write(mc) # Replace the title line with open(fp) as fin: lines = fin.readlines() lines[1] = lines[1].replace( "00", targetHit.identifier) # with open(fp, "w") as fout: fout.write("".join(lines)) # fp = os.path.join( dirPath, queryTargetId + "_" + targetHit.identifier + "_%03d" % jj + ".sdf") with MoleculeWriter(fp) as ofh: ofh.write(mc) # Replace the title line with open(fp) as fin: lines = fin.readlines() lines[0] = lines[0].replace( "00", targetHit.identifier) # with open(fp, "w") as fout: fout.write("".join(lines)) # # Check for multiple generated result files - # for jj, fp in enumerate(mol2L, 1): logger.debug("(%d) adding component fp %s", jj, fp) hI.setMatchNumber(jj) hI.setMol2Path(fp) tt = fp[:-4] + "sdf" hI.setMolPath(tt) summaryList.append(copy.deepcopy(hI.get())) # else: hI.setMatchNumber(1) summaryList.append(copy.deepcopy(hI.get())) else: logger.info("(%d) search for %s returns no matches", ii, targetMol.identifier) hits = None # if numHits > 0: mU.mkdir(dirPath) fp = os.path.join(dirPath, queryTargetId + "-index.json") cmI = CcdcMatchIndex(indexFilePath=fp, verbose=self.__verbose) cmI.load(summaryList) cmI.writeIndex() return numHits
def delete_solvents(self, list_solvent_names=None): """删除晶体中的溶剂,若没有指定溶剂列表,则默认为CCDC数据库自带的溶剂列表 :param list_solvent_names: 溶剂名称构成的列表,type:list or tuple :return: None """ # CSD数据库的溶剂所在的路径 solvent_file = os.path.join(os.path.dirname(io.csd_directory()), 'Mercury', 'molecular_libraries', 'ccdc_solvents') # 若没指定需要去除的溶剂列表,则会将CSD数据库中指定的74个溶剂都考虑进去。以下代码得到溶剂的smiles字符串 if not list_solvent_names: if os.path.isdir(solvent_file): solvent_smiles = [ io.MoleculeReader(f)[0].smiles for f in glob.glob(os.path.join(solvent_file, '*.mol2')) ] else: raise FileExistsError('路径不存在!') else: if os.path.isdir(solvent_file): solvent_smiles = [ io.MoleculeReader( os.path.join(solvent_file, solvent + '.mol2')[0].smiles for solvent in list_solvent_names) ] else: raise FileExistsError('路径不存在!') # 去除溶剂 list_crystals_remove_solvents = [] p_bar = tqdm(self.entry_reader) for entry in p_bar: try: if entry.has_3d_structure: # Ensure labels are unique mol = entry.molecule mol.normalise_labels() # Use a copy clone = mol.copy() # Remove all bonds containing a metal atom clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) # Work out which components to remove to_remove = [ c for c in clone.components if not self.has_metal(c) and (not self.is_multidentate( c, mol) or self.is_solvent(c, solvent_smiles)) ] # Remove the atoms of selected components mol.remove_atoms( mol.atom(a.label) for c in to_remove for a in c.atoms) # Write the CIF entry.crystal.molecule = mol list_crystals_remove_solvents.append(entry) else: list_crystals_remove_solvents.append(entry) except BaseException: list_crystals_remove_solvents.append(entry) p_bar.set_description('正在去除溶剂:') self.entry_reader = list_crystals_remove_solvents return None
pm = [ PharmacophoreModel.from_file(os.path.join(wrk_dir, f)) for f in fnames ] feats = create_consensus(pm, cutoff=1) out = PharmacophoreModel() out.detected_features = feats for feat in feats: out.add_feature(feat) out.pymol_visulisation( "/home/pcurran/github_packages/pharmacophores/testdata/concensus") if __name__ == "__main__": cm_dir = os.path.dirname(os.path.dirname(io.csd_directory())) Pharmacophore.read_feature_definitions( os.path.join(cm_dir, "CSD_CrossMiner/feature_definitions")) wrkdir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment" paths = [ "1AQ1_aligned.pdb", "1B38_aligned.pdb", "1B39_aligned.pdb", "1CKP_aligned.pdb" ] hetids = ["STU", "ATP", "ATP", "PVB"] chains = ["A", "A", "A", "A"] for path, het, chain in zip(paths, hetids, chains): create_pharmacophore(path, het, chain, out_dir=wrkdir) wrk_dir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment"