def create_feature_db(mol_files, outdir, dbname="test"):
    # because CM is installed in a non-standard location
    f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())),
                          "CSD_CrossMiner/feature_definitions")
    Pharmacophore.read_feature_definitions(f_defs)

    sdbs = []
    for mol_file in mol_files:
        # DatabaseInfo is a named tupled (file name, num_strucs, colour)
        mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo(
            mol_file, 0, Colour(0, 255, 0, 255))

        # Create structure databases
        mol_sqlx = os.path.join(
            outdir,
            os.path.basename(mol_file).replace('.mol2', '.csdsqlx'))
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        mol_sdb = Pharmacophore.FeatureDatabase.Creator.StructureDatabase(
            mol_struc,
            use_crystal_symmetry=False,
            structure_database_path=mol_sqlx)
        sdbs.append(mol_sdb)

    # Create Feature database
    settings = Pharmacophore.FeatureDatabase.Creator.Settings(
        feature_definition_directory=f_defs, n_threads=6)
    creator = Pharmacophore.FeatureDatabase.Creator(settings=settings)
    db = creator.create(sdbs)
    db.write(os.path.join(outdir, f"{dbname}.feat"))
示例#2
0
    def __init__(self, **kw):
        settings = kw.get('settings')
        if settings is None:
            settings = self.Settings()
        self.settings = settings

        main_dir = os.environ.get('MAINDIR')
        if main_dir:
            if sys.platform == 'win32':
                self.settings._superstar_executable = 'superstar_app.exe'
            else:
                self.settings._superstar_executable = ' '.join([
                    os.path.join(os.environ['MAINDIR'], 'run.sh'),
                    'superstar_app.x'
                ])
            self.settings._superstar_env = dict()
        else:
            if sys.platform == 'win32':
                base = os.path.dirname(io.csd_directory())
                merc = glob.glob(os.path.join(base, 'mercury*'))
                if len(merc):
                    merc = merc[0]
                self.settings._superstar_executable = os.path.join(
                    merc, 'superstar_app.exe')
            elif sys.platform == 'darwin':
                self.settings._superstar_executable = os.path.join(
                    os.path.dirname(io.csd_directory()), 'mercury.app',
                    'Contents', 'MacOS', 'superstar')
            else:
                self.settings._superstar_executable = os.path.join(
                    os.path.dirname(io.csd_directory()), 'bin', 'superstar')
            self.settings._superstar_env = dict(
                SUPERSTAR_ISODIR=str(
                    os.path.join(os.path.dirname(io.csd_directory()),
                                 'isostar_files', 'istr')),
                SUPERSTAR_ROOT=str(
                    os.path.join(os.path.dirname(io.csd_directory()),
                                 "Mercury")))
        self.settings.working_directory = utilities._test_output_dir()
示例#3
0
        def _set_environment_variables():
            """
            private method

            sets up superstar environment variables
            :return: superstar executable (str), superstar env(str)
            """
            base = csd_directory()
            main_dir = environ.get('MAINDIR')
            if main_dir:
                if sys.platform == 'win32':
                    superstar_executable = 'superstar_app.exe'
                else:
                    superstar_executable = ' '.join([
                        join(environ['MAINDIR'], 'run.sh'), 'superstar_app.x'
                    ])
                superstar_env = dict()
            else:
                if sys.platform == 'win32':
                    base = dirname(base)
                    merc = glob.glob(join(base, 'mercury*'))
                    if type(merc) is list:
                        try:
                            merc = merc[0]
                        except IndexError:
                            raise IndexError(
                                "No mercury path found, check API version")

                    superstar_executable = join(merc, 'superstar_app.exe')
                    if not isfile(superstar_executable):
                        superstar_executable = join(merc, 'superstar.exe')
                        if not isfile(superstar_executable):
                            raise IOError("superstar executable not found")

                    superstar_env = dict(SUPERSTAR_ISODIR=str(
                        join(base, 'isostar_files', 'istr')),
                                         SUPERSTAR_ROOT=str(
                                             join(base, "Mercury")))

                elif sys.platform == 'darwin':
                    print("OS X not supported")

                else:
                    base = dirname(base)
                    superstar_executable = join(base, 'bin', 'superstar')
                    superstar_env = dict(SUPERSTAR_ISODIR=str(
                        join(base, 'isostar_files', 'istr')),
                                         SUPERSTAR_ROOT=str(base))

            return superstar_executable, superstar_env
示例#4
0
    def setUp(self):
        self.parent_dir = "testdata/pharmacophore_extension/PharmacophoreModel"
        self.fnames = [
            "1dmt_ligand.cm", "1r1h_ligand.cm", "1r1j_ligand.cm",
            "1y8j_ligand.cm"
        ]
        self.pharmacophores = [
            PharmacophoreModel.from_file(os.path.join(self.parent_dir, f))
            for f in self.fnames
        ]

        self.cm_dir = os.path.dirname(os.path.dirname(io.csd_directory()))
        Pharmacophore.read_feature_definitions(
            os.path.join(self.cm_dir, "CSD_CrossMiner/feature_definitions"))
    def __init__(self):
        super().__init__()
        self.cm_dir = os.path.dirname(os.path.dirname(csd_directory()))
        Pharmacophore.read_feature_definitions(directory=os.path.join(
            self.cm_dir, "CSD_CrossMiner/feature_definitions"))
        self.__feature_options = {
            k: v
            for k, v in Pharmacophore.feature_definitions.items()
        }
        assert len(self.__feature_options) > 1

        self.__feature_definitions = self.__feature_options

        self.tmp = tempfile.mkdtemp()
        self.__identifier = None
        self.__ligands = None
        self.__protein = None
        self.__detected_features = None
        self.__feature_point_grids = None
示例#6
0
    def __init__(self, features=None, _motif_pharmacophore=None):
        super().__init__(features=features,
                         _motif_pharmacophore=_motif_pharmacophore)
        self.cm_dir = os.path.dirname(os.path.dirname(csd_directory()))

        feat_db = os.environ.get(
            "CCDC_CROSSMINER_FEATURE_DEFINITIONS",
            os.path.join(self.cm_dir, "../CSD_CrossMiner/feature_definitions"))

        Pharmacophore.read_feature_definitions(directory=feat_db)
        self.__feature_options = {
            k: v
            for k, v in Pharmacophore.feature_definitions.items()
        }
        assert len(self.__feature_options) > 1

        self.__feature_definitions = self.__feature_options

        self.tmp = tempfile.mkdtemp()
        self.__identifier = None
        self.__ligands = None
        self.__protein = None
        self.__detected_features = None
        self.__feature_point_grids = None
示例#7
0
from ccdc.pharmacophore import Pharmacophore
from ccdc import io
import os
from shutil import copyfile
from ccdc.utilities import Colour, Timer

if __name__ == "__main__":
    outdir = "/home/pcurran/github_packages/pharmacophores/testdata/search/feat_db"

    f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())),
                          "CSD_CrossMiner/feature_definitions")
    Pharmacophore.read_feature_definitions(f_defs)

    base = "/local/pcurran/patel/CDK2/screening_files/conformers"
    mol_files = [
        os.path.join(base, f) for f in
        ["actives_final_chunk0_conf.mol2", "decoys_final_chunk0_conf.mol2"]
    ]

    sdbs = []
    for mol_file in mol_files:
        # DatabaseInfo is a named tupled (file name, num_strucs, colour)
        mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo(
            mol_file, 0, Colour(0, 255, 0, 255))

        # Create structure databases
        mol_sqlx = os.path.join(
            outdir,
            os.path.basename(mol_file).replace('.mol2', '.csdsqlx'))
        if not os.path.exists(outdir):
            os.mkdir(outdir)
示例#8
0
    def search(self,
               queryTargetId,
               queryTargetPath,
               resultPath,
               normalizeFlag=True,
               maxHits=50,
               searchType="similarity",
               suppressMetals=False):
        """Search the CCDC database for similar or substructure matches for the input query molecule.

        Args:
            queryTargetId (str): query identifier
            queryTargetPath (str): path to the query molfile (mol, sdf, mol2)
            resultPath (str): output path to match results
            normalizeFlag (bool, optional): do standard perceptions on matching molecules. Defaults to True.
            maxHits (int, optional): maximum number of matches to return. Defaults to 50.
            searchType (str, optional): search mode (substructure, similarity). Defaults to "similarity".
            suppressMetals (bool, optional): filter structures containing metals. Defaults to False.

        Returns:
            (int): number of matches
        """

        mU = MarshalUtil()
        logger.info("Start search for target %s path %s result path %s",
                    queryTargetId, queryTargetPath, resultPath)
        #
        summaryList = []
        #
        targetDirPath = os.path.dirname(queryTargetPath)
        cifTargetPath = os.path.join(targetDirPath, queryTargetId + ".cif")

        #
        targetStructures = EntryReader(queryTargetPath)
        dirPath = os.path.join(resultPath, queryTargetId)
        numHits = 0
        for ii, e in enumerate(targetStructures, 1):
            numHits = 0
            startTime = time.time()
            targetMol = e.molecule
            if normalizeFlag:
                targetMol.assign_bond_types(which="unknown")
                targetMol.standardise_aromatic_bonds()
                targetMol.standardise_delocalised_bonds()
            #
            logger.info("(%d) begin %s search - query id %s", ii, searchType,
                        queryTargetId)
            if searchType == "similarity":
                hits = self.__similaritySearch(targetMol,
                                               suppressMetals=suppressMetals)
            elif searchType == "substructure":
                hits = self.__moleculeSubstructureSearch(
                    targetMol, suppressMetals=suppressMetals)
            else:
                hits = []
            logger.info("(%d) completed search query id %s in %.3f seconds",
                        ii, queryTargetId,
                        time.time() - startTime)

            if hits:
                numHits += len(hits)
                logger.info("(%d) search for %s matched %d: %r", ii,
                            queryTargetId, numHits,
                            [targetHit.identifier for targetHit in hits])

                #
                for targetHit in hits[:maxHits]:
                    #
                    hI = CcdcMatchIndexInst()
                    hI.setCsdVersion(csd_version())
                    hI.setCsdDirectory(csd_directory())
                    hI.setTargetId(queryTargetId)
                    hI.setTargetPath(queryTargetPath)
                    if mU.exists(cifTargetPath):
                        hI.setTargetCcPath(cifTargetPath)
                    hI.setIdentifier(targetHit.identifier)
                    hI.setMatchType(searchType)
                    try:
                        hI.setRFactor(targetHit.entry.r_factor)
                        hI.setChemicalName(targetHit.entry.chemical_name)
                        hI.setTemperature(targetHit.entry.temperature)
                        hI.setRadiationSource(targetHit.entry.radiation_source)
                        hI.setHasDisorder("N")
                        cit = targetHit.entry.publication
                        if cit.doi is not None:
                            hI.setCitationDOI(cit.doi)
                        if searchType == "similarity":
                            hI.setSimilarityScore(targetHit.similarity)
                        elif searchType == "substructure":
                            hI.setMatchedAtomLength(
                                len(targetHit.match_atoms()))
                    except Exception as e:
                        logger.exception("Failing with %s", str(e))
                        #
                    #
                    mU.mkdir(dirPath)
                    mol2L = []
                    if searchType == "substructure":
                        for jj, mc in enumerate(targetHit.match_components(),
                                                1):
                            fp = os.path.join(
                                dirPath, queryTargetId + "_" +
                                targetHit.identifier + "_%03d" % jj + ".mol2")
                            mol2L.append(fp)
                            with MoleculeWriter(fp) as ofh:
                                ofh.write(mc)
                            # Replace the title line
                            with open(fp) as fin:
                                lines = fin.readlines()
                            lines[1] = lines[1].replace(
                                "00", targetHit.identifier)
                            #
                            with open(fp, "w") as fout:
                                fout.write("".join(lines))
                            #
                            fp = os.path.join(
                                dirPath, queryTargetId + "_" +
                                targetHit.identifier + "_%03d" % jj + ".sdf")
                            with MoleculeWriter(fp) as ofh:
                                ofh.write(mc)

                            # Replace the title line
                            with open(fp) as fin:
                                lines = fin.readlines()
                            lines[0] = lines[0].replace(
                                "00", targetHit.identifier)
                            #
                            with open(fp, "w") as fout:
                                fout.write("".join(lines))
                        #
                        #  Check for multiple generated result files -
                        #
                        for jj, fp in enumerate(mol2L, 1):
                            logger.debug("(%d) adding component fp %s", jj, fp)
                            hI.setMatchNumber(jj)
                            hI.setMol2Path(fp)
                            tt = fp[:-4] + "sdf"
                            hI.setMolPath(tt)
                            summaryList.append(copy.deepcopy(hI.get()))
                            #
                    else:
                        hI.setMatchNumber(1)
                        summaryList.append(copy.deepcopy(hI.get()))
            else:
                logger.info("(%d) search for %s returns no matches", ii,
                            targetMol.identifier)
                hits = None
        #
        if numHits > 0:
            mU.mkdir(dirPath)
            fp = os.path.join(dirPath, queryTargetId + "-index.json")
            cmI = CcdcMatchIndex(indexFilePath=fp, verbose=self.__verbose)
            cmI.load(summaryList)
            cmI.writeIndex()

        return numHits
示例#9
0
    def delete_solvents(self, list_solvent_names=None):
        """删除晶体中的溶剂,若没有指定溶剂列表,则默认为CCDC数据库自带的溶剂列表

        :param list_solvent_names: 溶剂名称构成的列表,type:list or tuple
        :return: None
        """

        # CSD数据库的溶剂所在的路径
        solvent_file = os.path.join(os.path.dirname(io.csd_directory()),
                                    'Mercury', 'molecular_libraries',
                                    'ccdc_solvents')

        # 若没指定需要去除的溶剂列表,则会将CSD数据库中指定的74个溶剂都考虑进去。以下代码得到溶剂的smiles字符串
        if not list_solvent_names:
            if os.path.isdir(solvent_file):
                solvent_smiles = [
                    io.MoleculeReader(f)[0].smiles
                    for f in glob.glob(os.path.join(solvent_file, '*.mol2'))
                ]
            else:
                raise FileExistsError('路径不存在!')
        else:
            if os.path.isdir(solvent_file):
                solvent_smiles = [
                    io.MoleculeReader(
                        os.path.join(solvent_file, solvent + '.mol2')[0].smiles
                        for solvent in list_solvent_names)
                ]
            else:
                raise FileExistsError('路径不存在!')

        # 去除溶剂
        list_crystals_remove_solvents = []
        p_bar = tqdm(self.entry_reader)
        for entry in p_bar:
            try:
                if entry.has_3d_structure:
                    # Ensure labels are unique
                    mol = entry.molecule
                    mol.normalise_labels()
                    # Use a copy
                    clone = mol.copy()
                    # Remove all bonds containing a metal atom
                    clone.remove_bonds(b for b in clone.bonds
                                       if any(a.is_metal for a in b.atoms))
                    # Work out which components to remove
                    to_remove = [
                        c for c in clone.components
                        if not self.has_metal(c) and (not self.is_multidentate(
                            c, mol) or self.is_solvent(c, solvent_smiles))
                    ]
                    # Remove the atoms of selected components
                    mol.remove_atoms(
                        mol.atom(a.label) for c in to_remove for a in c.atoms)
                    # Write the CIF
                    entry.crystal.molecule = mol
                    list_crystals_remove_solvents.append(entry)
                else:
                    list_crystals_remove_solvents.append(entry)
            except BaseException:
                list_crystals_remove_solvents.append(entry)
            p_bar.set_description('正在去除溶剂:')
        self.entry_reader = list_crystals_remove_solvents
        return None
    pm = [
        PharmacophoreModel.from_file(os.path.join(wrk_dir, f)) for f in fnames
    ]
    feats = create_consensus(pm, cutoff=1)

    out = PharmacophoreModel()
    out.detected_features = feats
    for feat in feats:
        out.add_feature(feat)

    out.pymol_visulisation(
        "/home/pcurran/github_packages/pharmacophores/testdata/concensus")


if __name__ == "__main__":
    cm_dir = os.path.dirname(os.path.dirname(io.csd_directory()))
    Pharmacophore.read_feature_definitions(
        os.path.join(cm_dir, "CSD_CrossMiner/feature_definitions"))

    wrkdir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment"
    paths = [
        "1AQ1_aligned.pdb", "1B38_aligned.pdb", "1B39_aligned.pdb",
        "1CKP_aligned.pdb"
    ]
    hetids = ["STU", "ATP", "ATP", "PVB"]
    chains = ["A", "A", "A", "A"]

    for path, het, chain in zip(paths, hetids, chains):
        create_pharmacophore(path, het, chain, out_dir=wrkdir)

    wrk_dir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment"