def __reload(self, dirPath, **kwargs): oD = None version = None startTime = time.time() useCache = kwargs.get("useCache", True) # # CARDDumpUrl = kwargs.get("CARDDumpUrl", "https://card.mcmaster.ca/latest/data/broadstreet-v3.1.0.tar.bz2") cardDumpUrl = kwargs.get("CARDDumpUrl", "https://card.mcmaster.ca/latest/data") ok = False fU = FileUtil() cardDumpFileName = "card-data.tar.bz2" cardDumpPath = os.path.join(dirPath, cardDumpFileName) cardDumpDirPath = os.path.join(dirPath, "dump") # fU.mkdir(dirPath) cardDataPath = os.path.join(dirPath, "card-select-data.json") # logger.info("useCache %r CARDDumpPath %r", useCache, cardDumpPath) if useCache and self.__mU.exists(cardDataPath): qD = self.__mU.doImport(cardDataPath, fmt="json") version = qD["version"] oD = qD["data"] else: logger.info("Fetching url %s path %s", cardDumpUrl, cardDumpPath) ok = fU.get(cardDumpUrl, cardDumpPath) fU.mkdir(cardDumpDirPath) fU.uncompress(cardDumpPath, outputDir=cardDumpDirPath) fU.unbundleTarfile(os.path.join(cardDumpDirPath, cardDumpFileName[:-4]), dirPath=cardDumpDirPath) logger.info("Completed fetch (%r) at %s (%.4f seconds)", ok, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime) oD, version = self.__parseCardData( os.path.join(cardDumpDirPath, "card.json")) tS = datetime.datetime.now().isoformat() qD = {"version": version, "created": tS, "data": oD} oD = qD["data"] ok = self.__mU.doExport(cardDataPath, qD, fmt="json", indent=3) logger.info("Export CARD data (%d) status %r", len(oD), ok) # --- return oD, version
def restoreDependencies(self, url, dirPath, bundleLabel="A", userName=None, pw=None): """Restore bundled dependencies from remote storage and unbundle these in the current local cache directory. Args: url (str): remote URL dirPath (str): remote directory path on the bundleLabel (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') userName (str, optional): optional access information. Defaults to None. password (str, optional): optional access information. Defaults to None. """ try: ok = False fileU = FileUtil() fn = self.__makeBundleFileName(self.__dependFileName, bundleLabel=bundleLabel) if not url: remotePath = os.path.join(dirPath, fn) ok = fileU.get(remotePath, self.__dependTarFilePath) elif url and url.startswith("http://"): remotePath = url + os.path.join("/", dirPath, fn) ok = fileU.get(remotePath, self.__dependTarFilePath) elif url and url.startswith("sftp://"): sftpU = SftpUtil() ok = sftpU.connect(url[7:], userName, pw=pw, port=22) if ok: remotePath = os.path.join(dirPath, fn) ok = sftpU.get(remotePath, self.__dependTarFilePath) else: logger.error("Unsupported protocol %r", url) if ok: ok = fileU.unbundleTarfile(self.__dependTarFilePath, dirPath=self.__cachePath) return ok except Exception as e: logger.exception("For %r %r Failing with %s", url, dirPath, str(e)) ok = False return ok
def fetchBundle(self, localRestoreDirPath, url, remoteDirPath, remoteStashPrefix="A", userName=None, password=None): """Restore bundled dependencies from remote storage and unbundle these in the current local cache directory. Args: localRestoreDirPath (str): local restore path url (str): remote URL remoteDirPath (str): remote directory path on the remote resource remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') userName (str, optional): optional access information. Defaults to None. password (str, optional): optional access information. Defaults to None. """ try: ok = False fileU = FileUtil() fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix) if not url: remotePath = os.path.join(remoteDirPath, fn) if fileU.exists(remotePath): ok = fileU.get(remotePath, self.__localStashTarFilePath) else: ok = False logger.warning("Missing bundle file %r", remotePath) elif url and (url.startswith("http://") or url.startswith("https://")): remotePath = url + os.path.join("/", remoteDirPath, fn) ok = fileU.get(remotePath, self.__localStashTarFilePath) elif url and url.startswith("sftp://"): sftpU = SftpUtil() ok = sftpU.connect(url[7:], userName, pw=password, port=22) if ok: remotePath = os.path.join(remoteDirPath, fn) ok = sftpU.get(remotePath, self.__localStashTarFilePath) else: logger.error("Unsupported protocol %r", url) if ok: ok = fileU.unbundleTarfile(self.__localStashTarFilePath, dirPath=localRestoreDirPath) return ok except Exception as e: logger.exception("For %r %r Failing with %s", url, remoteDirPath, str(e)) ok = False return ok
def __reload(self, dirPath, useCache=False, imgtDumpUrl=None, testList=None, maxCount=None): imgtD = {} startTime = time.time() fU = FileUtil() fU.mkdir(dirPath) # imgtDataPath = os.path.join(self.__dirPath, "imgt-data.json") # logger.info("useCache %r imgtFeaturePath %r", useCache, imgtDataPath) if useCache and self.__mU.exists(imgtDataPath): imgtD = self.__mU.doImport(imgtDataPath, fmt="json") self.__version = imgtD["version"] else: imgtDumpUrl = imgtDumpUrl if imgtDumpUrl else "http://www.imgt.org/download/3Dstructure-DB/IMGT3DFlatFiles.tgz" imgtReadmeUrl = "http://www.imgt.org/download/3Dstructure-DB/RELEASE" imgtDumpFileName = fU.getFileName(imgtDumpUrl) imgtDumpPath = os.path.join(dirPath, imgtDumpFileName) imgtReleasePath = os.path.join(dirPath, "IMGT-release.txt") _, fn = os.path.split(imgtDumpUrl) imgtFlatFilePath = os.path.join(self.__dirPath, fn[:-4]) # logger.info("Fetching url %s path %s", imgtDumpUrl, imgtDumpPath) ok1 = fU.get(imgtDumpUrl, imgtDumpPath) ok2 = fU.get(imgtReadmeUrl, imgtReleasePath) fU.unbundleTarfile(imgtDumpPath, dirPath=dirPath) logger.info("Completed fetch (%r) at %s (%.4f seconds)", ok1 and ok2, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime) # --- readmeLines = self.__mU.doImport(imgtReleasePath, fmt="list") self.__version = readmeLines[0].strip() if readmeLines else None logger.info("IMGT version %r", self.__version) # --- chainD, rawD = self.__imgtFlatFileProcessor(imgtFlatFilePath, maxCount=maxCount, testList=testList) # --- tS = datetime.datetime.now().isoformat() # vS = datetime.datetime.now().strftime("%Y-%m-%d") if testList: imgtD = { "version": self.__version, "date": tS, "chains": chainD, "raw": rawD } else: imgtD = { "version": self.__version, "date": tS, "chains": chainD } ok = self.__mU.doExport(imgtDataPath, imgtD, fmt="json", indent=3) logger.info("Completed flatfile prep (%r) at %s (%.4f seconds)", ok, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime) return imgtD
class FileUtilTests(unittest.TestCase): def setUp(self): self.__verbose = True self.__pathPdbxDictionaryFile = os.path.join(TOPDIR, "rcsb", "mock-data", "dictionaries", "mmcif_pdbx_v5_next.dic") self.__pathTaxonomyFile = os.path.join(TOPDIR, "rcsb", "mock-data", "NCBI", "names.dmp.gz") self.__zipFileUrl = "https://inventory.data.gov/dataset/794cd3d7-4d28-4408-8f7d-84b820dbf7f2/resource/6b78ec0c-4980-4ad8-9cbd-2d6eb9eda8e7/download/myfoodapediadata.zip" self.__xzFile = os.path.join(TOPDIR, "rcsb", "mock-data", "MOCK_MODBASE_MODELS", "NP_001030614.1_1.pdb.xz") # self.__ftpFileUrl = "ftp://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz" self.__httpsFileUrl = "https://ftp.wwpdb.org/pub/pdb/data/component-models/complete/chem_comp_model.cif.gz" # self.__workPath = os.path.join(HERE, "test-output") self.__inpDirPath = os.path.join(HERE, "test-data") self.__fileU = FileUtil() self.__startTime = time.time() logger.debug("Running tests on version %s", __version__) logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime())) def tearDown(self): endTime = time.time() logger.debug("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime) def testTarBundling(self): """Test case for tarfile bundling and unbundling""" try: tP = os.path.join(self.__workPath, "t0.tar.gz") dirPath = os.path.join(self.__inpDirPath, "topdir") ok = self.__fileU.bundleTarfile(tP, [dirPath], mode="w:gz", recursive=True) self.assertTrue(ok) numBytes = self.__fileU.size(tP) self.assertGreaterEqual(numBytes, 250) # md5 = self.__fileU.hash(tP, hashType="md5") self.assertTrue(md5 is not None) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) # tP = os.path.join(self.__workPath, "t1.tar.gz") dirPathList = [ os.path.join(self.__inpDirPath, "topdir", "subdirA"), os.path.join(self.__inpDirPath, "topdir", "subdirB") ] ok = self.__fileU.bundleTarfile(tP, dirPathList, mode="w:gz", recursive=True) self.assertTrue(ok) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) tP = os.path.join(self.__workPath, "t2.tar") dirPathList = [ os.path.join(self.__inpDirPath, "topdir", "subdirA"), os.path.join(self.__inpDirPath, "topdir", "subdirB") ] ok = self.__fileU.bundleTarfile(tP, dirPathList, mode="w", recursive=True) self.assertTrue(ok) # ok = self.__fileU.unbundleTarfile(tP, dirPath=self.__workPath) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testGetFile(self): """Test case for a local files and directories""" try: remoteLocator = self.__pathPdbxDictionaryFile fn = self.__fileU.getFileName(remoteLocator) # _, fn = os.path.split(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) ok = self.__fileU.remove(lPath) self.assertTrue(ok) dPath = os.path.join(self.__workPath, "tdir") ok = self.__fileU.mkdir(dPath) self.assertTrue(ok) ok = self.__fileU.remove(dPath) self.assertTrue(ok) ok = self.__fileU.remove(";lakdjf") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testMoveAndCopyFile(self): """Test case for copying ("put") and moving ("replace") local files""" try: remoteLocator = self.__pathPdbxDictionaryFile fn = self.__fileU.getFileName(remoteLocator) # _, fn = os.path.split(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) # Test copy file dPath2 = os.path.join(self.__workPath, "tdir") ok = self.__fileU.mkdir(dPath2) self.assertTrue(ok) lPath2 = os.path.join(dPath2, fn) ok = self.__fileU.put(lPath, lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath2) self.assertTrue(ok) # Remove copied file (to test moving file next) ok = self.__fileU.remove(lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath2) self.assertFalse(ok) # Test move file ok = self.__fileU.replace(lPath, lPath2) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertFalse(ok) ok = self.__fileU.exists(lPath2) self.assertTrue(ok) # Now clean up files and dirs ok = self.__fileU.remove(lPath) self.assertTrue(ok) ok = self.__fileU.remove(dPath2) self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testZipUrl(self): """Test case for downloading remote zip file and extracting contents.""" try: remoteLocator = self.__zipFileUrl # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # lPath = os.path.join(self.__workPath, self.__fileU.getFileName(self.__zipFileUrl)) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath) ok = fp.endswith("Food_Display_Table.xlsx") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testFtpUrl(self): """Test case for downloading remote file ftp protocol and extracting contents.""" try: remoteLocator = self.__ftpFileUrl # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # dirPath = os.path.join(self.__workPath, "chem_comp_models") lPath = os.path.join(dirPath, self.__fileU.getFileName(self.__ftpFileUrl)) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=dirPath) ok = fp.endswith("chem_comp_model.cif") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testRemote(self): """Test case remote status""" try: remoteLocator = self.__httpsFileUrl ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # ok = self.__fileU.exists(remoteLocator) self.assertTrue(ok) size = self.__fileU.size(remoteLocator) self.assertGreaterEqual(size, 1000) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() @unittest.skip("DrugBank example -- skipping") def testGetDrugBankUrl(self): """Test case for downloading drugbank master xml file""" try: remoteLocator = "https://www.drugbank.ca/releases/latest/downloads/all-full-database" un = "username" pw = "password" # fn = self.__fileU.getFileName(remoteLocator) ok = self.__fileU.isLocal(remoteLocator) self.assertFalse(ok) # lPath = os.path.join(self.__workPath, "db-download.zip") ok = self.__fileU.get(remoteLocator, lPath, username=un, password=pw) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) self.__fileU.uncompress(lPath, outputDir=self.__workPath) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail() def testXzFile(self): """Test case for extracting contents from xz file""" try: remoteLocator = self.__xzFile fn = self.__fileU.getFileName(remoteLocator) lPath = os.path.join(self.__workPath, fn) ok = self.__fileU.get(remoteLocator, lPath) self.assertTrue(ok) ok = self.__fileU.exists(lPath) self.assertTrue(ok) ok = self.__fileU.isLocal(lPath) self.assertTrue(ok) tPath = self.__fileU.getFilePath(lPath) self.assertEqual(lPath, tPath) fp = self.__fileU.uncompress(lPath, outputDir=self.__workPath) ok = fp.endswith(".pdb") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def fetchPartitionedBundle(self, localRestoreDirPath, gitRepositoryPath, gitRawHost="raw.githubusercontent.com", gitBranch="master", remoteStashPrefix="A"): """Fetch bundle from a remote stash public git repository via http. Args: localRestoreDirPath (str): local restore path gitRepositoryPath (str): git repository path (e.g., rcsb/py-rcsb_exdb_assets_stash) gitHost (str, optional): git repository host name. Defaults to github.com. gitBranch (str, optional): git branch name. Defaults to master. remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') Returns: bool: True for success or False otherwise https://raw.githubusercontent.com/rcsb/py-rcsb_exdb_assets_stash/master/stash/<file_or_dir> """ try: ok = False fileU = FileUtil() bundleFileName = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix) urlBase = "https://" + gitRawHost rp = gitRepositoryPath[:-4] if gitRepositoryPath.endswith(".git") else gitRepositoryPath repoDirPath = os.path.join(urlBase, rp, gitBranch, "stash") # First fetch the manifest file remoteDirPath = os.path.join(repoDirPath, bundleFileName[:-7]) remotePath = os.path.join(remoteDirPath, "MANIFEST") logger.debug("Manifest remote %r", remotePath) # localDirPath = os.path.join(self.__localBundlePath, bundleFileName[:-7]) manifestPath = os.path.join(localDirPath, "MANIFEST") ok = fileU.get(remotePath, manifestPath) if not ok: logger.error("No manifest file at %r", remotePath) return ok # --- partFileName = "part_1" remotePartPath = os.path.join(repoDirPath, bundleFileName[:-7], partFileName) logger.debug("remotePartPath %r", remotePartPath) # --- partList = [] with open(manifestPath, "r") as mfh: line = mfh.readline() tf, myHash = line[:-1].split("\t") logger.debug("Fetched manifest for %s hash %r", tf, myHash) for line in mfh: partList.append(line[:-1]) # logger.debug("Parts (%d) %r", len(partList), partList) for part in partList: localPath = os.path.join(localDirPath, part) remotePath = os.path.join(repoDirPath, bundleFileName[:-7], part) logger.debug("%r %r", remotePath, localPath) fileU.get(remotePath, localPath) # sj = SplitJoin() ok = sj.join(self.__localStashTarFilePath, localDirPath) if ok: ok = fileU.unbundleTarfile(self.__localStashTarFilePath, dirPath=localRestoreDirPath) return ok except Exception as e: logger.exception("Failing for %r with %s", bundleFileName, str(e)) ok = False return ok