def __reloadGlycans(self, baseUrl, fallbackUrl, dirPath, useCache=True): gD = {} logger.debug("Using dirPath %r", dirPath) self.__mU.mkdir(dirPath) # myDataPath = os.path.join(dirPath, "glygen-glycan-list.json") if useCache and self.__mU.exists(myDataPath): gD = self.__mU.doImport(myDataPath, fmt="json") logger.debug("GlyGen glycan data length %d", len(gD)) elif not useCache: logger.debug( "Fetch GlyGen glycan data from primary data source %s", baseUrl) endPoint = os.path.join(baseUrl, "glycan_masterlist.csv") # logger.info("Fetch GlyGen glycan data from primary data source %s", endPoint) rawPath = os.path.join(dirPath, "glycan_masterlist.csv") fU = FileUtil() ok = fU.get(endPoint, rawPath) logger.debug("Fetch GlyGen glycan data status %r", ok) if not ok: endPoint = os.path.join(fallbackUrl, "glycan_masterlist.csv") ok = fU.get(endPoint, rawPath) logger.info("Fetch fallback GlyGen glycan data status %r", ok) # if ok: gD = self.__parseGlycanList(rawPath) ok = self.__mU.doExport(myDataPath, gD, fmt="json") logger.info("Exported GlyGen glycan list (%d) (%r) %s", len(gD), ok, myDataPath) # return gD
def __init__(self, cfgOb, cachePath, useCache=True, rebuildFlag=False, **kwargs): """A collection of schema build and caching methods. Args: cfgOb (object): ConfigInfo() instance cachePath (str): path to directory containing schema useCache (bool, optional): use cached schema. Defaults to True. rebuildFlag (bool, optional): on-the-fly rebuild and cache schema """ self.__cfgOb = cfgOb self.__configName = self.__cfgOb.getDefaultSectionName() self.__cachePath = os.path.abspath(cachePath) self.__useCache = useCache self.__rebuildFlag = rebuildFlag self.__useCache = rebuildFlag if rebuildFlag else useCache # self.__workPath = os.path.join(self.__cachePath, "work") self.__fileU = FileUtil(workPath=os.path.join(self.__cachePath, "work")) self.__schemaCachePath = os.path.join(self.__cachePath, self.__cfgOb.get("SCHEMA_DEFINITION_CACHE_DIR", sectionName=self.__configName)) self.__jsonSchemaCachePath = os.path.join(self.__cachePath, self.__cfgOb.get("JSON_SCHEMA_DEFINITION_CACHE_DIR", sectionName=self.__configName)) self.__fileU.mkdir(self.__schemaCachePath) self.__fileU.mkdir(self.__jsonSchemaCachePath) self.__kwargs = kwargs
def __init__(self, cfgOb, cachePath, useCache=True, **kwargs): """Utilities to access and update provenance details. Args: cfgOb ([type]): ConfigInfo() instance cachePath ([type]): path to directory containing schema useCache (bool, optional): use cached schema. Defaults to True. """ self.__cfgOb = cfgOb self.__configName = self.__cfgOb.getDefaultSectionName() self.__cachePath = cachePath self.__useCache = useCache # self.__workPath = os.path.join(self.__cachePath, "work") self.__provenanceCachePath = os.path.join( self.__cachePath, self.__cfgOb.get("PROVENANCE_INFO_CACHE_DIR", sectionName=self.__configName)) self.__provenanceLocator = self.__cfgOb.getPath( "PROVENANCE_INFO_LOCATOR", sectionName=self.__configName) # self.__fileU = FileUtil(workPath=self.__workPath) self.__fileU.mkdir(self.__provenanceCachePath) self.__kwargs = kwargs
def __rebuildCache(self, urlTargetPfam, urlTargetPfamFB, dirPath, useCache): pfamD = {} fmt = "json" ext = fmt if fmt == "json" else "pic" pfamDataPath = os.path.join(dirPath, "pfam-data.%s" % ext) # logger.debug("Using cache data path %s", dirPath) self.__mU.mkdir(dirPath) # if useCache and self.__mU.exists(pfamDataPath): pfamD = self.__mU.doImport(pfamDataPath, fmt=fmt) logger.debug("Pfam data length %d", len(pfamD)) elif not useCache: # ------ fU = FileUtil() logger.info("Fetch data from source %s in %s", urlTargetPfam, dirPath) fp = os.path.join(dirPath, fU.getFileName(urlTargetPfam)) ok = fU.get(urlTargetPfam, fp) if not ok: fp = os.path.join(dirPath, fU.getFileName(urlTargetPfamFB)) ok = fU.get(urlTargetPfamFB, fp) logger.info("Fetch data fallback fetch status is %r", ok) pfamD = self.__getPfamIndex(fp) ok = self.__mU.doExport(pfamDataPath, pfamD, fmt=fmt) logger.info("Caching %d in %s status %r", len(pfamD), pfamDataPath, ok) # ------ # return pfamD
def __reloadEntryIds(self, urlTarget, urlFallbackTarget, dirPath, useCache=True): idD = {} fU = FileUtil() fn = fU.getFileName(urlTarget) fp = os.path.join(dirPath, fn) self.__mU.mkdir(dirPath) # if useCache and self.__mU.exists(fp): tdL = self.__mU.doImport(fp, fmt="json") logger.debug("Reading cached IDs list (%d)", len(tdL)) else: logger.info("Fetch ID list from %s", urlTarget) ok = fU.get(urlTarget, fp) if not ok: ok = fU.get(urlFallbackTarget, fp) # if ok: tdL = self.__mU.doImport(fp, fmt="json") # for td in tdL: for k, v in td.items(): try: idD[k] = datetime.datetime.fromisoformat(v) except Exception as e: logger.error("Date processing failing for %r %r with %s", k, v, str(e)) # sTupL = sorted(idD.items(), key=lambda item: item[1]) return {k: v for k, v in sTupL}
def setUp(self): self.__cachePath = os.path.join(HERE, "test-output", "CACHE") self.__dataPath = os.path.join(HERE, "test-data") dirPath = os.path.join(self.__cachePath, "rcsb_entry_info") fU = FileUtil() fn = "entry_info_details.json" fU.put(os.path.join(self.__dataPath, fn), os.path.join(dirPath, fn))
def split(self, inputFilePath, splitDirPath, prefixName="part_", maxSizeMB=50): chunkSize = maxSizeMB * 1000000 partNumber = 0 fU = FileUtil() fU.mkdir(splitDirPath) manifestPath = os.path.join(splitDirPath, "MANIFEST") myHash = fU.hash(inputFilePath, hashType="md5") with open(manifestPath, "w") as mfh: mfh.write("%s\t%s\n" % (inputFilePath, myHash)) with open(inputFilePath, "rb") as ifh: chunk = ifh.read(chunkSize) while chunk: partNumber += 1 partName = prefixName + str(partNumber) fp = os.path.join(splitDirPath, partName) with open(fp, "wb") as ofh: ofh.write(chunk) mfh.write("%s\n" % partName) # chunk = ifh.read(chunkSize) return partNumber
def __init__(self, **kwargs): self.__workPath = kwargs.get("workPath", ".") self.__workDirSuffix = kwargs.get("workDirSuffix", "marshall_") self.__workDirPrefix = kwargs.get("workDirSuffix", "_tempdir") # self.__fileU = FileUtil(workPath=self.__workPath) self.__ioU = IoUtil()
def __init__(self, cfgOb, cachePath, useCache=True, **kwargs): """Data type application and instance information provider. Args: cfgOb (object): ConfigInfo() object instance cachePath (str): path to hold the cache directory useCache (bool, optional): flag to use cached files. Defaults to True. """ self.__cfgOb = cfgOb self.__configName = self.__cfgOb.getDefaultSectionName() self.__useCache = useCache self.__cachePath = cachePath # self.__contentInfoConfigName = "content_info_helper_configuration" self.__fileU = FileUtil() self.__contentDefHelper = self.__cfgOb.getHelper( "CONTENT_DEF_HELPER_MODULE", sectionName=self.__configName, cfgOb=self.__cfgOb) self.__dirPath = os.path.join( cachePath, self.__cfgOb.get("DATA_TYPE_INFO_CACHE_DIR", sectionName=self.__configName)) self.__kwargs = kwargs # logger.debug("Leaving constructor")
def storeBundle(self, url, remoteDirPath, remoteStashPrefix="A", userName=None, password=None): """ Store a copy of the bundled search dependencies remotely - Args: url (str): URL string for the destination host (e.g. sftp://myserver.net or None for a local file) remoteDirPath (str): remote directory path on the remote resource remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') userName (str, optional): optional access information. Defaults to None. password (str, optional): optional access information. Defaults to None. Returns: bool: True for success or False otherwise """ try: ok = False fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix) if url and url.startswith("sftp://"): sftpU = SftpUtil() hostName = url[7:] ok = sftpU.connect(hostName, userName, pw=password, port=22) if ok: remotePath = os.path.join("/", remoteDirPath, fn) ok = sftpU.put(self.__localStashTarFilePath, remotePath) elif not url: fileU = FileUtil() remotePath = os.path.join(remoteDirPath, fn) ok = fileU.put(self.__localStashTarFilePath, remotePath) else: logger.error("Unsupported stash protocol %r", url) return ok except Exception as e: logger.exception("For %r %r failing with %s", url, remoteDirPath, str(e)) return False
def __reload(self, dirPath, baseVersion, useCache, **kwargs): startTime = time.time() mU = MarshalUtil(workPath=dirPath) chemblDbUrl = kwargs.get( "ChEMBLDbUrl", "ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/") ok = False fU = FileUtil() fU.mkdir(dirPath) # # ChEMBL current version <baseVersion>,... # template: chembl_<baseVersion>.fa.gz # targetFileName = "chembl_" + str(baseVersion) + ".fa.gz" mappingFileName = "chembl_uniprot_mapping.txt" # chemblTargetPath = os.path.join(dirPath, targetFileName) chemblMappingPath = os.path.join(dirPath, mappingFileName) mappingFilePath = os.path.join(dirPath, "chembl_uniprot_mapping.json") # mapD = {} if useCache and fU.exists(mappingFilePath): logger.info("useCache %r using %r and %r and %r", useCache, chemblTargetPath, chemblMappingPath, mappingFilePath) mapD = mU.doImport(mappingFilePath, fmt="json") else: # Get the ChEMBL UniProt mapping file url = os.path.join(chemblDbUrl, mappingFileName) ok = fU.get(url, chemblMappingPath) logger.info("Fetched %r url %s path %s", ok, url, chemblMappingPath) logger.info("Reading ChEMBL mapping file path %s", mappingFilePath) rowL = mU.doImport(chemblMappingPath, fmt="tdd", rowFormat="list") for row in rowL: mapD[row[0]] = (row[1], row[2], row[3]) ok = mU.doExport(mappingFilePath, mapD, fmt="json") logger.info("Processed mapping path %s (%d) %r", mappingFilePath, len(mapD), ok) # # Get the target FASTA files -- for vers in range(baseVersion, baseVersion + 10): logger.info("Now fetching version %r", vers) self.__version = vers targetFileName = "chembl_" + str(vers) + ".fa.gz" chemblTargetPath = os.path.join(dirPath, "chembl_targets_raw.fa.gz") url = os.path.join(chemblDbUrl, targetFileName) ok = fU.get(url, chemblTargetPath) logger.info("Fetched %r url %s path %s", ok, url, chemblTargetPath) if ok: break # logger.info("Completed reload at %s (%.4f seconds)", time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime) # return mapD
def clearRawCache(self): try: rawDirPath = os.path.join(self.__cachePath, self.__dirName + "-raw") fU = FileUtil() return fU.remove(rawDirPath) except Exception: pass return False
def put(self, localPath, remotePath): """Put a local file on a remote FTP server. Arguments: localPath (str): local file path remotePath (str): remote file path Returns: bool: True for success or false otherwise """ try: # First, make sure the provided localPath represents a file, not a directory if not os.path.isfile(localPath): logger.error( "put failing for localPath %s - path must be to a specific file, not a directory.", localPath) return False fileU = FileUtil() remotePathDir = fileU.getFilePath(remotePath) self.mkdir(remotePathDir) # If provided remotePath already exists and is a directory, put the file on the remote server using the local filename # to avoid unintentionally overwriting an entire remote directory with a single file if (os.path.exists(remotePath) and os.path.isdir(remotePath)): localFileName = FileUtil().getFileName(localPath) remoteFilePath = os.path.join(remotePath, localFileName) else: remoteFilePath = remotePath with open(localPath, 'rb') as lFP: self.__ftpClient.storbinary('STOR %s' % remoteFilePath, lFP) if remoteFilePath in self.listdir(remotePathDir): return True else: logger.error("put failing for localPath %s remoteFilePath %s", localPath, remoteFilePath) return False except Exception as e: if self.__raiseExceptions: raise e else: logger.error( "put failing for localPath %s remotePath %s with %s", localPath, remotePath, str(e)) return False
def __fetchFromBackup(self, urlBackupPath, cathDirPath): fn = self.__getCathDomainFileName() cathDomainPath = os.path.join(cathDirPath, fn) self.__mU.mkdir(cathDirPath) # backupUrl = urlBackupPath + "/" + fn logger.info("Using backup URL %r", backupUrl) fU = FileUtil() ok = fU.get(backupUrl, cathDomainPath) return ok
def __doAquireLock(self): fU = FileUtil() mode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_TRUNC try: fU.mkdir(os.path.dirname(self.__lockFilePath)) fd = os.open(self.__lockFilePath, mode) except (IOError, OSError): pass else: self.__lockFileFileDescriptor = fd return None
def __fetchFromBackup(self, urlBackupPath, scopDirPath): pyVersion = sys.version_info[0] fn = "scop_domains-py%s.pic" % str(pyVersion) scopDomainPath = os.path.join(scopDirPath, fn) self.__mU.mkdir(scopDirPath) # backupUrl = urlBackupPath + "/" + fn logger.info("Using backup URL %r", backupUrl) fU = FileUtil() ok = fU.get(backupUrl, scopDomainPath) return ok
def __modelFixture(self): fU = FileUtil() modelSourcePath = os.path.join(self.__mockTopPath, "AF") for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")): fn = os.path.basename(iPath) uId = fn.split("-")[1] h3 = uId[-2:] h2 = uId[-4:-2] h1 = uId[-6:-4] oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn) fU.put(iPath, oPath)
def __rebuildCache(self, urlTargetIsoLtwa, dirPath, useCache): """Rebuild the cache of ISO abbreviation term data Args: urlTargetIsoLtwa (str): URL for ISO4 LTWA title word abbreviations dirPath (str): cache path useCache (bool): flag to use cached files Returns: tuple: (dict) title word abbreviations (dict) language conflict dictionary (list) multi-word abbreviation targets Notes: ISO source file (tab delimited UTF-16LE) is maintained at the ISSN site - https://www.issn.org/wp-content/uploads/2013/09/LTWA_20160915.txt """ aD = {} mU = MarshalUtil(workPath=dirPath) fmt = "json" ext = fmt if fmt == "json" else "pic" isoLtwaNamePath = os.path.join(dirPath, "iso-ltwa.%s" % ext) logger.debug("Using cache data path %s", dirPath) mU.mkdir(dirPath) if not useCache: for fp in [isoLtwaNamePath]: try: os.remove(fp) except Exception: pass # if useCache and mU.exists(isoLtwaNamePath): aD = mU.doImport(isoLtwaNamePath, fmt=fmt) logger.debug("Abbreviation name length %d", len(aD["abbrev"])) elif not useCache: # ------ fU = FileUtil() logger.info("Fetch data from source %s in %s", urlTargetIsoLtwa, dirPath) fp = os.path.join(dirPath, fU.getFileName(urlTargetIsoLtwa)) ok = fU.get(urlTargetIsoLtwa, fp) aD = self.__getLtwaTerms(dirPath, fp) ok = mU.doExport(isoLtwaNamePath, aD, fmt=fmt) logger.debug("abbrevD keys %r", list(aD.keys())) logger.debug("Caching %d ISO LTWA in %s status %r", len(aD["abbrev"]), isoLtwaNamePath, ok) # abbrevD = aD["abbrev"] if "abbrev" in aD else {} conflictD = aD["conflicts"] if "conflicts" in aD else {} multiWordTermL = aD[ "multi_word_abbrev"] if "multi_word_abbrev" in aD else [] # return abbrevD, conflictD, multiWordTermL
def get(self, remotePath, localPath): try: fileU = FileUtil() fileU.mkdirForFile(localPath) self.__sftpClient.get(remotePath, localPath) return True except Exception as e: if self.__raiseExceptions: raise e else: logger.error("get failing for remotePath %s localPath %s with %s", remotePath, localPath, str(e)) return False
def __fetchFromBackup(self, fmt="json"): urlTarget = "https://raw.githubusercontent.com/rcsb/py-rcsb_exdb_assets/master/fall_back/SCOP2" # fn = self.__getAssignmentFileName(fmt=fmt) assignmentPath = os.path.join(self.__dirPath, fn) urlPath = os.path.join(urlTarget, fn) self.__mU.mkdir(assignmentPath) # logger.info("Using backup URL %r", urlPath) fU = FileUtil() ok = fU.get(urlPath, assignmentPath) return ok
def pushBundle(self, gitRepositoryPath, accessToken, gitHost="github.com", gitBranch="master", remoteStashPrefix="A", maxSizeMB=95): """Push bundle to remote stash git repository. Args: gitRepositoryPath (str): git repository path (e.g., rcsb/py-rcsb_exdb_assets_stash) accessToken (str): git repository access token gitHost (str, optional): git repository host name. Defaults to github.com. gitBranch (str, optional): git branch name. Defaults to master. remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A') maxSizeMB (int, optional): maximum stash bundle file size that will be committed. Defaults to 95MB. Returns: bool: True for success or False otherwise """ try: ok = False gU = GitUtil(token=accessToken, repositoryHost=gitHost) fU = FileUtil() localRepositoryPath = os.path.join(self.__localBundlePath, "stash_repository") fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix) # # Update existing local repository, otherwise clone a new copy if fU.exists(localRepositoryPath): ok = gU.pull(localRepositoryPath, branch=gitBranch) logger.debug("After pull status %r", gU.status(localRepositoryPath)) else: ok = gU.clone(gitRepositoryPath, localRepositoryPath, branch=gitBranch) # # Split all bundles mbSize = float(fU.size(self.__localStashTarFilePath)) / 1000000.0 logger.info("Splitting bundle %r (%.3f MB/Max %d MB)", fn, mbSize, maxSizeMB) sj = SplitJoin() splitDirPath = os.path.join(localRepositoryPath, "stash", fn[:-7]) sj.split(self.__localStashTarFilePath, splitDirPath, maxSizeMB=maxSizeMB) fU.remove(self.__localStashTarFilePath) # else: # fU.put(self.__localStashTarFilePath, os.path.join(localRepositoryPath, "stash", fn)) ok = gU.addAll(localRepositoryPath, branch=gitBranch) ok = gU.commit(localRepositoryPath, branch=gitBranch) logger.debug("After commit status %r", gU.status(localRepositoryPath)) # if accessToken: ok = gU.push(localRepositoryPath, branch=gitBranch) logger.info("After push status %r", gU.status(localRepositoryPath)) # return ok except Exception as e: logger.exception("For %r %r failing with %s", gitHost, gitRepositoryPath, str(e)) return False
def setUp(self): self.__workPath = os.path.join(HERE, "test-output") # self.__testLogFileMin = os.path.join(self.__workPath, "logfile-min.json") self.__testLogFileDetailed = os.path.join(self.__workPath, "logfile-detailed.json") fU = FileUtil() fU.remove(self.__testLogFileMin) fU.remove(self.__testLogFileDetailed) self.__startTime = time.time() logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
def __init__(self, dirPath, useCache=True): """Resource provider for dictionary APIs. Args: dirPath (str): path to the directory containing cache files useCache (bool, optional): flag to use cached files. Defaults to True. """ self.__apiMap = {} self.__dirPath = dirPath self.__useCache = useCache # self.__fileU = FileUtil(workPath=self.__dirPath) logger.debug("Leaving constructor")
def clearCache(self): try: self.__nameL = [] self.__mapD = {} self.__cacheFieldD = {} self.__nameLegacyL = [] self.__mapLegacyD = {} self.__cacheFieldLegacyD = {} dirPath = os.path.join(self.__cachePath, self.__dirName) fU = FileUtil() return fU.remove(dirPath) except Exception: pass return False
def join(self, outputFilePath, splitDirPath): manifestPath = os.path.join(splitDirPath, "MANIFEST") with open(outputFilePath, "wb") as ofh: with open(manifestPath, "r") as mfh: line = mfh.readline() fp, priorHash = line[:-1].split("\t") for line in mfh: fp = os.path.join(splitDirPath, line[:-1]) with open(fp, "rb") as ifh: data = ifh.read() ofh.write(data) fU = FileUtil() newHash = fU.hash(outputFilePath, hashType="md5") return newHash == priorHash
def __fetchUrl(self, urlTarget, dirPath, useCache=False): fU = FileUtil() fn = fU.getFileName(urlTarget) filePath = os.path.join(dirPath, fn) if not (useCache and fU.exists(filePath)): startTime = time.time() ok2 = fU.get(urlTarget, filePath) endTime = time.time() if ok2: logger.info("Fetched %s for resource file %s (status = %r) (%.4f seconds)", urlTarget, filePath, ok2, endTime - startTime) else: logger.error("Failing fetch for %s for resource file %s (status = %r) (%.4f seconds)", urlTarget, filePath, ok2, endTime - startTime) # return filePath
def makeBundle(self, localParentPath, subDirList): """ Bundle the subdirectories of the input parent directory path. Args: localParentPath (str): local parent directory path containing the bundling targets subDirList (list, str): list of subdirectories of the parent path to be bundled Returns: (bool): True for success or False otherwise """ fileU = FileUtil() dirPathList = [os.path.join(localParentPath, subDir) for subDir in subDirList] okT = fileU.bundleTarfile(self.__localStashTarFilePath, dirPathList, mode="w:gz", recursive=True) return okT
def __init__(self, cachePath, **kwargs): self.__cachePath = cachePath # self.__useCache = kwargs.get("useCache", True) self.__ccUrlTarget = kwargs.get("ccUrlTarget", None) self.__birdUrlTarget = kwargs.get("birdUrlTarget", None) self.__descriptorUrlTarget = kwargs.get( "descriptorUrlTarget", "http://www.crystallography.net/cod/smi/allcod.smi") self.__prefix = kwargs.get("prefix", None) self.__numProc = kwargs.get("numProc", 4) self.__chunkSize = kwargs.get("chunkSize", 50) self.__ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full" self.__fU = FileUtil()
def get(self, remotePath, localPath): """Get a file from a remote FTP server. Arguments: remotePath (str): remote file path localPath (str): local file path Returns: bool: True for success or false otherwise """ try: fileU = FileUtil() fileU.mkdirForFile(localPath) # If provided localPath already exists and is a directory, retrieve the file using the name on the remote server # to avoid unintentionally overwriting an entire local directory with a single retrieved file if (os.path.exists(localPath) and os.path.isdir(localPath)): remoteFileName = FileUtil().getFileName(remotePath) localFilePath = os.path.join(localPath, remoteFileName) else: localFilePath = localPath with open(localFilePath, 'wb') as lFP: self.__ftpClient.retrbinary('RETR %s' % remotePath, lFP.write) ok = fileU.exists(localFilePath) if ok: return True else: logger.error("get failing for remotePath %s localFilePath %s", remotePath, localFilePath) return False except Exception as e: if self.__raiseExceptions: raise e else: logger.error( "get failing for remotePath %s localPath %s with %s", remotePath, localPath, str(e)) return False
def __reloadGlycoproteins(self, baseUrl, fallbackUrl, dirPath, useCache=True): gD = {} logger.debug("Using dirPath %r", dirPath) self.__mU.mkdir(dirPath) # myDataPath = os.path.join(dirPath, "glygen-glycoprotein-list.json") if useCache and self.__mU.exists(myDataPath): gD = self.__mU.doImport(myDataPath, fmt="json") logger.debug("GlyGen glycoprotein data length %d", len(gD)) else: for fn in [ "sarscov1_protein_masterlist.csv", "sarscov2_protein_masterlist.csv", "hcv1b_protein_masterlist.csv", "hcv1a_protein_masterlist.csv", "human_protein_masterlist.csv", "mouse_protein_masterlist.csv", "rat_protein_masterlist.csv", ]: logger.debug( "Fetch GlyGen glycoprotein data from primary data source %s", baseUrl) endPoint = os.path.join(baseUrl, fn) # logger.debug( "Fetch GlyGen glycoprotein data from primary data source %s", endPoint) rawPath = os.path.join(dirPath, fn) fU = FileUtil() ok = fU.get(endPoint, rawPath) logger.debug("Fetch GlyGen glycoprotein data status %r", ok) if not ok: endPoint = os.path.join(fallbackUrl, fn) ok = fU.get(endPoint, rawPath) logger.info("Fetch fallback GlyGen data status %r", ok) # if ok: tD = self.__parseGlycoproteinList(rawPath) gD.update(tD) # ok = self.__mU.doExport(myDataPath, gD, fmt="json") logger.info("Exported GlyGen glycoprotein list (%d) (%r) %s", len(gD), ok, myDataPath) # return gD