Пример #1
0
 def __reload(self, dirPath, **kwargs):
     oD = None
     version = None
     startTime = time.time()
     useCache = kwargs.get("useCache", True)
     #
     # CARDDumpUrl = kwargs.get("CARDDumpUrl", "https://card.mcmaster.ca/latest/data/broadstreet-v3.1.0.tar.bz2")
     cardDumpUrl = kwargs.get("CARDDumpUrl",
                              "https://card.mcmaster.ca/latest/data")
     ok = False
     fU = FileUtil()
     cardDumpFileName = "card-data.tar.bz2"
     cardDumpPath = os.path.join(dirPath, cardDumpFileName)
     cardDumpDirPath = os.path.join(dirPath, "dump")
     #
     fU.mkdir(dirPath)
     cardDataPath = os.path.join(dirPath, "card-select-data.json")
     #
     logger.info("useCache %r CARDDumpPath %r", useCache, cardDumpPath)
     if useCache and self.__mU.exists(cardDataPath):
         qD = self.__mU.doImport(cardDataPath, fmt="json")
         version = qD["version"]
         oD = qD["data"]
     else:
         logger.info("Fetching url %s path %s", cardDumpUrl, cardDumpPath)
         ok = fU.get(cardDumpUrl, cardDumpPath)
         fU.mkdir(cardDumpDirPath)
         fU.uncompress(cardDumpPath, outputDir=cardDumpDirPath)
         fU.unbundleTarfile(os.path.join(cardDumpDirPath,
                                         cardDumpFileName[:-4]),
                            dirPath=cardDumpDirPath)
         logger.info("Completed fetch (%r) at %s (%.4f seconds)", ok,
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     time.time() - startTime)
         oD, version = self.__parseCardData(
             os.path.join(cardDumpDirPath, "card.json"))
         tS = datetime.datetime.now().isoformat()
         qD = {"version": version, "created": tS, "data": oD}
         oD = qD["data"]
         ok = self.__mU.doExport(cardDataPath, qD, fmt="json", indent=3)
         logger.info("Export CARD data (%d) status %r", len(oD), ok)
     # ---
     return oD, version
    def restoreDependencies(self,
                            url,
                            dirPath,
                            bundleLabel="A",
                            userName=None,
                            pw=None):
        """Restore bundled dependencies from remote storage and unbundle these in the
           current local cache directory.

        Args:
            url (str): remote URL
            dirPath (str): remote directory path on the
            bundleLabel (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')
            userName (str, optional): optional access information. Defaults to None.
            password (str, optional): optional access information. Defaults to None.
        """
        try:
            ok = False
            fileU = FileUtil()
            fn = self.__makeBundleFileName(self.__dependFileName,
                                           bundleLabel=bundleLabel)
            if not url:
                remotePath = os.path.join(dirPath, fn)
                ok = fileU.get(remotePath, self.__dependTarFilePath)

            elif url and url.startswith("http://"):
                remotePath = url + os.path.join("/", dirPath, fn)
                ok = fileU.get(remotePath, self.__dependTarFilePath)

            elif url and url.startswith("sftp://"):
                sftpU = SftpUtil()
                ok = sftpU.connect(url[7:], userName, pw=pw, port=22)
                if ok:
                    remotePath = os.path.join(dirPath, fn)
                    ok = sftpU.get(remotePath, self.__dependTarFilePath)
            else:
                logger.error("Unsupported protocol %r", url)
            if ok:
                ok = fileU.unbundleTarfile(self.__dependTarFilePath,
                                           dirPath=self.__cachePath)
            return ok
        except Exception as e:
            logger.exception("For %r %r Failing with %s", url, dirPath, str(e))
            ok = False
        return ok
Пример #3
0
    def fetchBundle(self, localRestoreDirPath, url, remoteDirPath, remoteStashPrefix="A", userName=None, password=None):
        """Restore bundled dependencies from remote storage and unbundle these in the
           current local cache directory.

        Args:
            localRestoreDirPath (str): local restore path
            url (str): remote URL
            remoteDirPath (str): remote directory path on the remote resource
            remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')
            userName (str, optional): optional access information. Defaults to None.
            password (str, optional): optional access information. Defaults to None.
        """
        try:
            ok = False
            fileU = FileUtil()
            fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix)
            if not url:
                remotePath = os.path.join(remoteDirPath, fn)
                if fileU.exists(remotePath):
                    ok = fileU.get(remotePath, self.__localStashTarFilePath)
                else:
                    ok = False
                    logger.warning("Missing bundle file %r", remotePath)

            elif url and (url.startswith("http://") or url.startswith("https://")):
                remotePath = url + os.path.join("/", remoteDirPath, fn)
                ok = fileU.get(remotePath, self.__localStashTarFilePath)

            elif url and url.startswith("sftp://"):
                sftpU = SftpUtil()
                ok = sftpU.connect(url[7:], userName, pw=password, port=22)
                if ok:
                    remotePath = os.path.join(remoteDirPath, fn)
                    ok = sftpU.get(remotePath, self.__localStashTarFilePath)
            else:
                logger.error("Unsupported protocol %r", url)
            if ok:
                ok = fileU.unbundleTarfile(self.__localStashTarFilePath, dirPath=localRestoreDirPath)
            return ok
        except Exception as e:
            logger.exception("For %r %r Failing with %s", url, remoteDirPath, str(e))
            ok = False
        return ok
    def __reload(self,
                 dirPath,
                 useCache=False,
                 imgtDumpUrl=None,
                 testList=None,
                 maxCount=None):
        imgtD = {}
        startTime = time.time()

        fU = FileUtil()
        fU.mkdir(dirPath)
        #
        imgtDataPath = os.path.join(self.__dirPath, "imgt-data.json")
        #
        logger.info("useCache %r imgtFeaturePath %r", useCache, imgtDataPath)
        if useCache and self.__mU.exists(imgtDataPath):
            imgtD = self.__mU.doImport(imgtDataPath, fmt="json")
            self.__version = imgtD["version"]
        else:
            imgtDumpUrl = imgtDumpUrl if imgtDumpUrl else "http://www.imgt.org/download/3Dstructure-DB/IMGT3DFlatFiles.tgz"
            imgtReadmeUrl = "http://www.imgt.org/download/3Dstructure-DB/RELEASE"
            imgtDumpFileName = fU.getFileName(imgtDumpUrl)
            imgtDumpPath = os.path.join(dirPath, imgtDumpFileName)
            imgtReleasePath = os.path.join(dirPath, "IMGT-release.txt")
            _, fn = os.path.split(imgtDumpUrl)
            imgtFlatFilePath = os.path.join(self.__dirPath, fn[:-4])
            #
            logger.info("Fetching url %s path %s", imgtDumpUrl, imgtDumpPath)
            ok1 = fU.get(imgtDumpUrl, imgtDumpPath)
            ok2 = fU.get(imgtReadmeUrl, imgtReleasePath)
            fU.unbundleTarfile(imgtDumpPath, dirPath=dirPath)
            logger.info("Completed fetch (%r) at %s (%.4f seconds)", ok1
                        and ok2,
                        time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                        time.time() - startTime)
            # ---
            readmeLines = self.__mU.doImport(imgtReleasePath, fmt="list")
            self.__version = readmeLines[0].strip() if readmeLines else None
            logger.info("IMGT version %r", self.__version)
            # ---
            chainD, rawD = self.__imgtFlatFileProcessor(imgtFlatFilePath,
                                                        maxCount=maxCount,
                                                        testList=testList)
            # ---
            tS = datetime.datetime.now().isoformat()
            # vS = datetime.datetime.now().strftime("%Y-%m-%d")
            if testList:
                imgtD = {
                    "version": self.__version,
                    "date": tS,
                    "chains": chainD,
                    "raw": rawD
                }
            else:
                imgtD = {
                    "version": self.__version,
                    "date": tS,
                    "chains": chainD
                }
            ok = self.__mU.doExport(imgtDataPath, imgtD, fmt="json", indent=3)
            logger.info("Completed flatfile prep (%r) at %s (%.4f seconds)",
                        ok, time.strftime("%Y %m %d %H:%M:%S",
                                          time.localtime()),
                        time.time() - startTime)
        return imgtD
Пример #5
0
class FileUtilTests(unittest.TestCase):
    def setUp(self):
        self.__verbose = True
        self.__pathPdbxDictionaryFile = os.path.join(TOPDIR, "rcsb",
                                                     "mock-data",
                                                     "dictionaries",
                                                     "mmcif_pdbx_v5_next.dic")

        self.__pathTaxonomyFile = os.path.join(TOPDIR, "rcsb", "mock-data",
                                               "NCBI", "names.dmp.gz")
        self.__zipFileUrl = "https://inventory.data.gov/dataset/794cd3d7-4d28-4408-8f7d-84b820dbf7f2/resource/6b78ec0c-4980-4ad8-9cbd-2d6eb9eda8e7/download/myfoodapediadata.zip"
        self.__xzFile = os.path.join(TOPDIR, "rcsb", "mock-data",
                                     "MOCK_MODBASE_MODELS",
                                     "NP_001030614.1_1.pdb.xz")
        #
        self.__ftpFileUrl = "ftp://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz"
        self.__httpsFileUrl = "https://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz"
        #
        self.__workPath = os.path.join(HERE, "test-output")
        self.__inpDirPath = os.path.join(HERE, "test-data")
        self.__fileU = FileUtil()
        self.__startTime = time.time()
        logger.debug("Running tests on version %s", __version__)
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))

    def tearDown(self):
        endTime = time.time()
        logger.debug("Completed %s at %s (%.4f seconds)", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                     endTime - self.__startTime)

    def testTarBundling(self):
        """Test case for tarfile bundling and unbundling"""
        try:
            tP = os.path.join(self.__workPath, "t0.tar.gz")
            dirPath = os.path.join(self.__inpDirPath, "topdir")

            ok = self.__fileU.bundleTarfile(tP, [dirPath],
                                            mode="w:gz",
                                            recursive=True)
            self.assertTrue(ok)

            numBytes = self.__fileU.size(tP)
            self.assertGreaterEqual(numBytes, 250)
            #
            md5 = self.__fileU.hash(tP, hashType="md5")
            self.assertTrue(md5 is not None)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)
            #
            tP = os.path.join(self.__workPath, "t1.tar.gz")
            dirPathList = [
                os.path.join(self.__inpDirPath, "topdir", "subdirA"),
                os.path.join(self.__inpDirPath, "topdir", "subdirB")
            ]

            ok = self.__fileU.bundleTarfile(tP,
                                            dirPathList,
                                            mode="w:gz",
                                            recursive=True)
            self.assertTrue(ok)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)

            tP = os.path.join(self.__workPath, "t2.tar")
            dirPathList = [
                os.path.join(self.__inpDirPath, "topdir", "subdirA"),
                os.path.join(self.__inpDirPath, "topdir", "subdirB")
            ]

            ok = self.__fileU.bundleTarfile(tP,
                                            dirPathList,
                                            mode="w",
                                            recursive=True)
            self.assertTrue(ok)
            #
            ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath)
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testGetFile(self):
        """Test case for a local files and directories"""
        try:
            remoteLocator = self.__pathPdbxDictionaryFile
            fn = self.__fileU.getFileName(remoteLocator)
            # _, fn = os.path.split(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            ok = self.__fileU.remove(lPath)
            self.assertTrue(ok)
            dPath = os.path.join(self.__workPath, "tdir")
            ok = self.__fileU.mkdir(dPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(dPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(";lakdjf")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testMoveAndCopyFile(self):
        """Test case for copying ("put") and moving ("replace") local files"""
        try:
            remoteLocator = self.__pathPdbxDictionaryFile
            fn = self.__fileU.getFileName(remoteLocator)
            # _, fn = os.path.split(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            # Test copy file
            dPath2 = os.path.join(self.__workPath, "tdir")
            ok = self.__fileU.mkdir(dPath2)
            self.assertTrue(ok)
            lPath2 = os.path.join(dPath2, fn)
            ok = self.__fileU.put(lPath, lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertTrue(ok)
            # Remove copied file (to test moving file next)
            ok = self.__fileU.remove(lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertFalse(ok)
            # Test move file
            ok = self.__fileU.replace(lPath, lPath2)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertFalse(ok)
            ok = self.__fileU.exists(lPath2)
            self.assertTrue(ok)
            # Now clean up files and dirs
            ok = self.__fileU.remove(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.remove(dPath2)
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testZipUrl(self):
        """Test case for downloading remote zip file and extracting contents."""
        try:
            remoteLocator = self.__zipFileUrl
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            lPath = os.path.join(self.__workPath,
                                 self.__fileU.getFileName(self.__zipFileUrl))
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath)
            ok = fp.endswith("Food_Display_Table.xlsx")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testFtpUrl(self):
        """Test case for downloading remote file ftp protocol and extracting contents."""
        try:
            remoteLocator = self.__ftpFileUrl
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            dirPath = os.path.join(self.__workPath, "chem_comp_models")
            lPath = os.path.join(dirPath,
                                 self.__fileU.getFileName(self.__ftpFileUrl))
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=dirPath)
            ok = fp.endswith("chem_comp_model.cif")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testRemote(self):
        """Test case remote status"""
        try:
            remoteLocator = self.__httpsFileUrl
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            ok = self.__fileU.exists(remoteLocator)
            self.assertTrue(ok)
            size = self.__fileU.size(remoteLocator)
            self.assertGreaterEqual(size, 1000)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    @unittest.skip("DrugBank example -- skipping")
    def testGetDrugBankUrl(self):
        """Test case for downloading drugbank master xml file"""
        try:
            remoteLocator = "https://www.drugbank.ca/releases/latest/downloads/all-full-database"
            un = "username"
            pw = "password"
            # fn = self.__fileU.getFileName(remoteLocator)
            ok = self.__fileU.isLocal(remoteLocator)
            self.assertFalse(ok)
            #
            lPath = os.path.join(self.__workPath, "db-download.zip")
            ok = self.__fileU.get(remoteLocator,
                                  lPath,
                                  username=un,
                                  password=pw)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            self.__fileU.uncompress(lPath, outputDir=self.__workPath)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()

    def testXzFile(self):
        """Test case for extracting contents from xz file"""
        try:
            remoteLocator = self.__xzFile
            fn = self.__fileU.getFileName(remoteLocator)
            lPath = os.path.join(self.__workPath, fn)
            ok = self.__fileU.get(remoteLocator, lPath)
            self.assertTrue(ok)
            ok = self.__fileU.exists(lPath)
            self.assertTrue(ok)
            ok = self.__fileU.isLocal(lPath)
            self.assertTrue(ok)
            tPath = self.__fileU.getFilePath(lPath)
            self.assertEqual(lPath, tPath)
            fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath)
            ok = fp.endswith(".pdb")
            self.assertTrue(ok)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #6
0
    def fetchPartitionedBundle(self, localRestoreDirPath, gitRepositoryPath, gitRawHost="raw.githubusercontent.com", gitBranch="master", remoteStashPrefix="A"):
        """Fetch bundle from a remote stash public git repository via http.

        Args:
            localRestoreDirPath (str): local restore path
            gitRepositoryPath (str): git repository path (e.g., rcsb/py-rcsb_exdb_assets_stash)
            gitHost (str, optional): git repository host name. Defaults to github.com.
            gitBranch (str, optional): git branch name. Defaults to master.
            remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')

        Returns:
          bool:  True for success or False otherwise

            https://raw.githubusercontent.com/rcsb/py-rcsb_exdb_assets_stash/master/stash/<file_or_dir>
        """
        try:
            ok = False
            fileU = FileUtil()
            bundleFileName = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix)
            urlBase = "https://" + gitRawHost
            rp = gitRepositoryPath[:-4] if gitRepositoryPath.endswith(".git") else gitRepositoryPath
            repoDirPath = os.path.join(urlBase, rp, gitBranch, "stash")

            # First fetch the manifest file
            remoteDirPath = os.path.join(repoDirPath, bundleFileName[:-7])
            remotePath = os.path.join(remoteDirPath, "MANIFEST")
            logger.debug("Manifest remote %r", remotePath)
            #
            localDirPath = os.path.join(self.__localBundlePath, bundleFileName[:-7])
            manifestPath = os.path.join(localDirPath, "MANIFEST")

            ok = fileU.get(remotePath, manifestPath)
            if not ok:
                logger.error("No manifest file at %r", remotePath)
                return ok
            # ---
            partFileName = "part_1"
            remotePartPath = os.path.join(repoDirPath, bundleFileName[:-7], partFileName)
            logger.debug("remotePartPath %r", remotePartPath)
            # ---
            partList = []
            with open(manifestPath, "r") as mfh:
                line = mfh.readline()
                tf, myHash = line[:-1].split("\t")
                logger.debug("Fetched manifest for %s hash %r", tf, myHash)
                for line in mfh:
                    partList.append(line[:-1])
            #
            logger.debug("Parts (%d) %r", len(partList), partList)
            for part in partList:
                localPath = os.path.join(localDirPath, part)
                remotePath = os.path.join(repoDirPath, bundleFileName[:-7], part)
                logger.debug("%r %r", remotePath, localPath)
                fileU.get(remotePath, localPath)
            #
            sj = SplitJoin()
            ok = sj.join(self.__localStashTarFilePath, localDirPath)
            if ok:
                ok = fileU.unbundleTarfile(self.__localStashTarFilePath, dirPath=localRestoreDirPath)
            return ok
        except Exception as e:
            logger.exception("Failing for %r with %s", bundleFileName, str(e))
            ok = False
        return ok