示例#1
0
 def __reloadGlycans(self, baseUrl, fallbackUrl, dirPath, useCache=True):
     gD = {}
     logger.debug("Using dirPath %r", dirPath)
     self.__mU.mkdir(dirPath)
     #
     myDataPath = os.path.join(dirPath, "glygen-glycan-list.json")
     if useCache and self.__mU.exists(myDataPath):
         gD = self.__mU.doImport(myDataPath, fmt="json")
         logger.debug("GlyGen glycan data length %d", len(gD))
     elif not useCache:
         logger.debug(
             "Fetch GlyGen glycan data from primary data source %s",
             baseUrl)
         endPoint = os.path.join(baseUrl, "glycan_masterlist.csv")
         #
         logger.info("Fetch GlyGen glycan data from primary data source %s",
                     endPoint)
         rawPath = os.path.join(dirPath, "glycan_masterlist.csv")
         fU = FileUtil()
         ok = fU.get(endPoint, rawPath)
         logger.debug("Fetch GlyGen glycan data status %r", ok)
         if not ok:
             endPoint = os.path.join(fallbackUrl, "glycan_masterlist.csv")
             ok = fU.get(endPoint, rawPath)
             logger.info("Fetch fallback GlyGen glycan data status %r", ok)
         #
         if ok:
             gD = self.__parseGlycanList(rawPath)
             ok = self.__mU.doExport(myDataPath, gD, fmt="json")
             logger.info("Exported GlyGen glycan list (%d) (%r) %s",
                         len(gD), ok, myDataPath)
         #
     return gD
示例#2
0
    def __init__(self, cfgOb, cachePath, useCache=True, rebuildFlag=False, **kwargs):
        """A collection of schema build and caching methods.

        Args:
            cfgOb (object): ConfigInfo() instance
            cachePath (str): path to directory containing schema
            useCache (bool, optional): use cached schema. Defaults to True.
            rebuildFlag (bool, optional): on-the-fly rebuild and cache schema
        """

        self.__cfgOb = cfgOb
        self.__configName = self.__cfgOb.getDefaultSectionName()
        self.__cachePath = os.path.abspath(cachePath)
        self.__useCache = useCache
        self.__rebuildFlag = rebuildFlag
        self.__useCache = rebuildFlag if rebuildFlag else useCache
        #
        self.__workPath = os.path.join(self.__cachePath, "work")

        self.__fileU = FileUtil(workPath=os.path.join(self.__cachePath, "work"))
        self.__schemaCachePath = os.path.join(self.__cachePath, self.__cfgOb.get("SCHEMA_DEFINITION_CACHE_DIR", sectionName=self.__configName))
        self.__jsonSchemaCachePath = os.path.join(self.__cachePath, self.__cfgOb.get("JSON_SCHEMA_DEFINITION_CACHE_DIR", sectionName=self.__configName))
        self.__fileU.mkdir(self.__schemaCachePath)
        self.__fileU.mkdir(self.__jsonSchemaCachePath)
        self.__kwargs = kwargs
示例#3
0
    def __init__(self, cfgOb, cachePath, useCache=True, **kwargs):
        """Utilities to access and update provenance details.

        Args:
            cfgOb ([type]): ConfigInfo() instance
            cachePath ([type]): path to directory containing schema
            useCache (bool, optional): use cached schema. Defaults to True.
        """

        self.__cfgOb = cfgOb
        self.__configName = self.__cfgOb.getDefaultSectionName()
        self.__cachePath = cachePath
        self.__useCache = useCache
        #
        self.__workPath = os.path.join(self.__cachePath, "work")
        self.__provenanceCachePath = os.path.join(
            self.__cachePath,
            self.__cfgOb.get("PROVENANCE_INFO_CACHE_DIR",
                             sectionName=self.__configName))
        self.__provenanceLocator = self.__cfgOb.getPath(
            "PROVENANCE_INFO_LOCATOR", sectionName=self.__configName)
        #
        self.__fileU = FileUtil(workPath=self.__workPath)
        self.__fileU.mkdir(self.__provenanceCachePath)
        self.__kwargs = kwargs
示例#4
0
 def __rebuildCache(self, urlTargetPfam, urlTargetPfamFB, dirPath,
                    useCache):
     pfamD = {}
     fmt = "json"
     ext = fmt if fmt == "json" else "pic"
     pfamDataPath = os.path.join(dirPath, "pfam-data.%s" % ext)
     #
     logger.debug("Using cache data path %s", dirPath)
     self.__mU.mkdir(dirPath)
     #
     if useCache and self.__mU.exists(pfamDataPath):
         pfamD = self.__mU.doImport(pfamDataPath, fmt=fmt)
         logger.debug("Pfam data length %d", len(pfamD))
     elif not useCache:
         # ------
         fU = FileUtil()
         logger.info("Fetch data from source %s in %s", urlTargetPfam,
                     dirPath)
         fp = os.path.join(dirPath, fU.getFileName(urlTargetPfam))
         ok = fU.get(urlTargetPfam, fp)
         if not ok:
             fp = os.path.join(dirPath, fU.getFileName(urlTargetPfamFB))
             ok = fU.get(urlTargetPfamFB, fp)
             logger.info("Fetch data fallback fetch status is %r", ok)
         pfamD = self.__getPfamIndex(fp)
         ok = self.__mU.doExport(pfamDataPath, pfamD, fmt=fmt)
         logger.info("Caching %d in %s status %r", len(pfamD), pfamDataPath,
                     ok)
         # ------
     #
     return pfamD
 def __reloadEntryIds(self,
                      urlTarget,
                      urlFallbackTarget,
                      dirPath,
                      useCache=True):
     idD = {}
     fU = FileUtil()
     fn = fU.getFileName(urlTarget)
     fp = os.path.join(dirPath, fn)
     self.__mU.mkdir(dirPath)
     #
     if useCache and self.__mU.exists(fp):
         tdL = self.__mU.doImport(fp, fmt="json")
         logger.debug("Reading cached IDs list (%d)", len(tdL))
     else:
         logger.info("Fetch ID list from %s", urlTarget)
         ok = fU.get(urlTarget, fp)
         if not ok:
             ok = fU.get(urlFallbackTarget, fp)
         #
         if ok:
             tdL = self.__mU.doImport(fp, fmt="json")
     #
     for td in tdL:
         for k, v in td.items():
             try:
                 idD[k] = datetime.datetime.fromisoformat(v)
             except Exception as e:
                 logger.error("Date processing failing for %r %r with %s",
                              k, v, str(e))
     #
     sTupL = sorted(idD.items(), key=lambda item: item[1])
     return {k: v for k, v in sTupL}
示例#6
0
 def setUp(self):
     self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
     self.__dataPath = os.path.join(HERE, "test-data")
     dirPath = os.path.join(self.__cachePath, "rcsb_entry_info")
     fU = FileUtil()
     fn = "entry_info_details.json"
     fU.put(os.path.join(self.__dataPath, fn), os.path.join(dirPath, fn))
示例#7
0
 def split(self,
           inputFilePath,
           splitDirPath,
           prefixName="part_",
           maxSizeMB=50):
     chunkSize = maxSizeMB * 1000000
     partNumber = 0
     fU = FileUtil()
     fU.mkdir(splitDirPath)
     manifestPath = os.path.join(splitDirPath, "MANIFEST")
     myHash = fU.hash(inputFilePath, hashType="md5")
     with open(manifestPath, "w") as mfh:
         mfh.write("%s\t%s\n" % (inputFilePath, myHash))
         with open(inputFilePath, "rb") as ifh:
             chunk = ifh.read(chunkSize)
             while chunk:
                 partNumber += 1
                 partName = prefixName + str(partNumber)
                 fp = os.path.join(splitDirPath, partName)
                 with open(fp, "wb") as ofh:
                     ofh.write(chunk)
                 mfh.write("%s\n" % partName)
                 #
                 chunk = ifh.read(chunkSize)
     return partNumber
示例#8
0
 def __init__(self, **kwargs):
     self.__workPath = kwargs.get("workPath", ".")
     self.__workDirSuffix = kwargs.get("workDirSuffix", "marshall_")
     self.__workDirPrefix = kwargs.get("workDirSuffix", "_tempdir")
     #
     self.__fileU = FileUtil(workPath=self.__workPath)
     self.__ioU = IoUtil()
示例#9
0
    def __init__(self, cfgOb, cachePath, useCache=True, **kwargs):
        """Data type application and instance information provider.

        Args:
            cfgOb (object):  ConfigInfo() object instance
            cachePath (str): path to hold the cache directory
            useCache (bool, optional): flag to use cached files. Defaults to True.

        """
        self.__cfgOb = cfgOb
        self.__configName = self.__cfgOb.getDefaultSectionName()
        self.__useCache = useCache
        self.__cachePath = cachePath
        # self.__contentInfoConfigName = "content_info_helper_configuration"
        self.__fileU = FileUtil()
        self.__contentDefHelper = self.__cfgOb.getHelper(
            "CONTENT_DEF_HELPER_MODULE",
            sectionName=self.__configName,
            cfgOb=self.__cfgOb)
        self.__dirPath = os.path.join(
            cachePath,
            self.__cfgOb.get("DATA_TYPE_INFO_CACHE_DIR",
                             sectionName=self.__configName))
        self.__kwargs = kwargs
        #
        logger.debug("Leaving constructor")
示例#10
0
    def storeBundle(self, url, remoteDirPath, remoteStashPrefix="A", userName=None, password=None):
        """ Store a copy of the bundled search dependencies remotely -

        Args:
            url (str): URL string for the destination host (e.g. sftp://myserver.net or None for a local file)
            remoteDirPath (str): remote directory path on the remote resource
            remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')
            userName (str, optional): optional access information. Defaults to None.
            password (str, optional): optional access information. Defaults to None.

        Returns:
          bool:  True for success or False otherwise

        """
        try:
            ok = False
            fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix)
            if url and url.startswith("sftp://"):
                sftpU = SftpUtil()
                hostName = url[7:]
                ok = sftpU.connect(hostName, userName, pw=password, port=22)
                if ok:
                    remotePath = os.path.join("/", remoteDirPath, fn)
                    ok = sftpU.put(self.__localStashTarFilePath, remotePath)
            elif not url:
                fileU = FileUtil()
                remotePath = os.path.join(remoteDirPath, fn)
                ok = fileU.put(self.__localStashTarFilePath, remotePath)
            else:
                logger.error("Unsupported stash protocol %r", url)
            return ok
        except Exception as e:
            logger.exception("For %r %r failing with %s", url, remoteDirPath, str(e))
        return False
示例#11
0
 def __reload(self, dirPath, baseVersion, useCache, **kwargs):
     startTime = time.time()
     mU = MarshalUtil(workPath=dirPath)
     chemblDbUrl = kwargs.get(
         "ChEMBLDbUrl",
         "ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/latest/")
     ok = False
     fU = FileUtil()
     fU.mkdir(dirPath)
     #
     # ChEMBL current version <baseVersion>,...
     # template:  chembl_<baseVersion>.fa.gz
     #
     targetFileName = "chembl_" + str(baseVersion) + ".fa.gz"
     mappingFileName = "chembl_uniprot_mapping.txt"
     #
     chemblTargetPath = os.path.join(dirPath, targetFileName)
     chemblMappingPath = os.path.join(dirPath, mappingFileName)
     mappingFilePath = os.path.join(dirPath, "chembl_uniprot_mapping.json")
     #
     mapD = {}
     if useCache and fU.exists(mappingFilePath):
         logger.info("useCache %r using %r and %r and %r", useCache,
                     chemblTargetPath, chemblMappingPath, mappingFilePath)
         mapD = mU.doImport(mappingFilePath, fmt="json")
     else:
         # Get the ChEMBL UniProt mapping file
         url = os.path.join(chemblDbUrl, mappingFileName)
         ok = fU.get(url, chemblMappingPath)
         logger.info("Fetched %r url %s path %s", ok, url,
                     chemblMappingPath)
         logger.info("Reading ChEMBL mapping file path %s", mappingFilePath)
         rowL = mU.doImport(chemblMappingPath, fmt="tdd", rowFormat="list")
         for row in rowL:
             mapD[row[0]] = (row[1], row[2], row[3])
         ok = mU.doExport(mappingFilePath, mapD, fmt="json")
         logger.info("Processed mapping path %s (%d) %r", mappingFilePath,
                     len(mapD), ok)
         #
         # Get the target FASTA files --
         for vers in range(baseVersion, baseVersion + 10):
             logger.info("Now fetching version %r", vers)
             self.__version = vers
             targetFileName = "chembl_" + str(vers) + ".fa.gz"
             chemblTargetPath = os.path.join(dirPath,
                                             "chembl_targets_raw.fa.gz")
             url = os.path.join(chemblDbUrl, targetFileName)
             ok = fU.get(url, chemblTargetPath)
             logger.info("Fetched %r url %s path %s", ok, url,
                         chemblTargetPath)
             if ok:
                 break
     #
     logger.info("Completed reload at %s (%.4f seconds)",
                 time.strftime("%Y %m %d %H:%M:%S", time.localtime()),
                 time.time() - startTime)
     #
     return mapD
 def clearRawCache(self):
     try:
         rawDirPath = os.path.join(self.__cachePath,
                                   self.__dirName + "-raw")
         fU = FileUtil()
         return fU.remove(rawDirPath)
     except Exception:
         pass
     return False
示例#13
0
    def put(self, localPath, remotePath):
        """Put a local file on a remote FTP server.

        Arguments:
            localPath (str): local file path
            remotePath (str): remote file path

        Returns:
            bool: True for success or false otherwise
        """
        try:
            # First, make sure the provided localPath represents a file, not a directory
            if not os.path.isfile(localPath):
                logger.error(
                    "put failing for localPath %s - path must be to a specific file, not a directory.",
                    localPath)
                return False

            fileU = FileUtil()
            remotePathDir = fileU.getFilePath(remotePath)
            self.mkdir(remotePathDir)
            # If provided remotePath already exists and is a directory, put the file on the remote server using the local filename
            # to avoid unintentionally overwriting an entire remote directory with a single file
            if (os.path.exists(remotePath) and os.path.isdir(remotePath)):
                localFileName = FileUtil().getFileName(localPath)
                remoteFilePath = os.path.join(remotePath, localFileName)
            else:
                remoteFilePath = remotePath
            with open(localPath, 'rb') as lFP:
                self.__ftpClient.storbinary('STOR %s' % remoteFilePath, lFP)
            if remoteFilePath in self.listdir(remotePathDir):
                return True
            else:
                logger.error("put failing for localPath %s remoteFilePath %s",
                             localPath, remoteFilePath)
                return False
        except Exception as e:
            if self.__raiseExceptions:
                raise e
            else:
                logger.error(
                    "put failing for localPath %s  remotePath %s with %s",
                    localPath, remotePath, str(e))
                return False
示例#14
0
 def __fetchFromBackup(self, urlBackupPath, cathDirPath):
     fn = self.__getCathDomainFileName()
     cathDomainPath = os.path.join(cathDirPath, fn)
     self.__mU.mkdir(cathDirPath)
     #
     backupUrl = urlBackupPath + "/" + fn
     logger.info("Using backup URL %r", backupUrl)
     fU = FileUtil()
     ok = fU.get(backupUrl, cathDomainPath)
     return ok
示例#15
0
 def __doAquireLock(self):
     fU = FileUtil()
     mode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_TRUNC
     try:
         fU.mkdir(os.path.dirname(self.__lockFilePath))
         fd = os.open(self.__lockFilePath, mode)
     except (IOError, OSError):
         pass
     else:
         self.__lockFileFileDescriptor = fd
     return None
示例#16
0
 def __fetchFromBackup(self, urlBackupPath, scopDirPath):
     pyVersion = sys.version_info[0]
     fn = "scop_domains-py%s.pic" % str(pyVersion)
     scopDomainPath = os.path.join(scopDirPath, fn)
     self.__mU.mkdir(scopDirPath)
     #
     backupUrl = urlBackupPath + "/" + fn
     logger.info("Using backup URL %r", backupUrl)
     fU = FileUtil()
     ok = fU.get(backupUrl, scopDomainPath)
     return ok
示例#17
0
 def __modelFixture(self):
     fU = FileUtil()
     modelSourcePath = os.path.join(self.__mockTopPath, "AF")
     for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")):
         fn = os.path.basename(iPath)
         uId = fn.split("-")[1]
         h3 = uId[-2:]
         h2 = uId[-4:-2]
         h1 = uId[-6:-4]
         oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn)
         fU.put(iPath, oPath)
示例#18
0
    def __rebuildCache(self, urlTargetIsoLtwa, dirPath, useCache):
        """Rebuild the cache of ISO abbreviation term data

        Args:
            urlTargetIsoLtwa (str): URL for ISO4 LTWA title word abbreviations
            dirPath (str):  cache path
            useCache (bool):  flag to use cached files

        Returns:
            tuple: (dict) title word abbreviations
                   (dict) language conflict dictionary
                   (list) multi-word abbreviation targets

        Notes:
            ISO source file (tab delimited UTF-16LE) is maintained at the ISSN site -
            https://www.issn.org/wp-content/uploads/2013/09/LTWA_20160915.txt
        """
        aD = {}
        mU = MarshalUtil(workPath=dirPath)
        fmt = "json"
        ext = fmt if fmt == "json" else "pic"
        isoLtwaNamePath = os.path.join(dirPath, "iso-ltwa.%s" % ext)
        logger.debug("Using cache data path %s", dirPath)
        mU.mkdir(dirPath)
        if not useCache:
            for fp in [isoLtwaNamePath]:
                try:
                    os.remove(fp)
                except Exception:
                    pass
        #
        if useCache and mU.exists(isoLtwaNamePath):
            aD = mU.doImport(isoLtwaNamePath, fmt=fmt)
            logger.debug("Abbreviation name length %d", len(aD["abbrev"]))
        elif not useCache:
            # ------
            fU = FileUtil()
            logger.info("Fetch data from source %s in %s", urlTargetIsoLtwa,
                        dirPath)
            fp = os.path.join(dirPath, fU.getFileName(urlTargetIsoLtwa))
            ok = fU.get(urlTargetIsoLtwa, fp)
            aD = self.__getLtwaTerms(dirPath, fp)
            ok = mU.doExport(isoLtwaNamePath, aD, fmt=fmt)
            logger.debug("abbrevD keys %r", list(aD.keys()))
            logger.debug("Caching %d ISO LTWA in %s status %r",
                         len(aD["abbrev"]), isoLtwaNamePath, ok)
        #
        abbrevD = aD["abbrev"] if "abbrev" in aD else {}
        conflictD = aD["conflicts"] if "conflicts" in aD else {}
        multiWordTermL = aD[
            "multi_word_abbrev"] if "multi_word_abbrev" in aD else []
        #
        return abbrevD, conflictD, multiWordTermL
示例#19
0
 def get(self, remotePath, localPath):
     try:
         fileU = FileUtil()
         fileU.mkdirForFile(localPath)
         self.__sftpClient.get(remotePath, localPath)
         return True
     except Exception as e:
         if self.__raiseExceptions:
             raise e
         else:
             logger.error("get failing for remotePath %s localPath %s with %s", remotePath, localPath, str(e))
             return False
 def __fetchFromBackup(self, fmt="json"):
     urlTarget = "https://raw.githubusercontent.com/rcsb/py-rcsb_exdb_assets/master/fall_back/SCOP2"
     #
     fn = self.__getAssignmentFileName(fmt=fmt)
     assignmentPath = os.path.join(self.__dirPath, fn)
     urlPath = os.path.join(urlTarget, fn)
     self.__mU.mkdir(assignmentPath)
     #
     logger.info("Using backup URL %r", urlPath)
     fU = FileUtil()
     ok = fU.get(urlPath, assignmentPath)
     return ok
示例#21
0
    def pushBundle(self, gitRepositoryPath, accessToken, gitHost="github.com", gitBranch="master", remoteStashPrefix="A", maxSizeMB=95):
        """Push bundle to remote stash git repository.

        Args:
            gitRepositoryPath (str): git repository path (e.g., rcsb/py-rcsb_exdb_assets_stash)
            accessToken (str): git repository access token
            gitHost (str, optional): git repository host name. Defaults to github.com.
            gitBranch (str, optional): git branch name. Defaults to master.
            remoteStashPrefix (str, optional): optional label preppended to the stashed dependency bundle artifact (default='A')
            maxSizeMB (int, optional): maximum stash bundle file size that will be committed. Defaults to 95MB.

        Returns:
          bool:  True for success or False otherwise

        """
        try:
            ok = False
            gU = GitUtil(token=accessToken, repositoryHost=gitHost)
            fU = FileUtil()
            localRepositoryPath = os.path.join(self.__localBundlePath, "stash_repository")
            fn = self.__makeBundleFileName(self.__baseBundleFileName, remoteStashPrefix=remoteStashPrefix)
            #
            # Update existing local repository, otherwise clone a new copy
            if fU.exists(localRepositoryPath):
                ok = gU.pull(localRepositoryPath, branch=gitBranch)
                logger.debug("After pull status %r", gU.status(localRepositoryPath))
            else:
                ok = gU.clone(gitRepositoryPath, localRepositoryPath, branch=gitBranch)
            #
            # Split all bundles
            mbSize = float(fU.size(self.__localStashTarFilePath)) / 1000000.0
            logger.info("Splitting bundle %r (%.3f MB/Max %d MB)", fn, mbSize, maxSizeMB)
            sj = SplitJoin()
            splitDirPath = os.path.join(localRepositoryPath, "stash", fn[:-7])
            sj.split(self.__localStashTarFilePath, splitDirPath, maxSizeMB=maxSizeMB)
            fU.remove(self.__localStashTarFilePath)
            # else:
            # fU.put(self.__localStashTarFilePath, os.path.join(localRepositoryPath, "stash", fn))

            ok = gU.addAll(localRepositoryPath, branch=gitBranch)
            ok = gU.commit(localRepositoryPath, branch=gitBranch)
            logger.debug("After commit status %r", gU.status(localRepositoryPath))
            #
            if accessToken:
                ok = gU.push(localRepositoryPath, branch=gitBranch)
                logger.info("After push status %r", gU.status(localRepositoryPath))
            #
            return ok
        except Exception as e:
            logger.exception("For %r %r failing with %s", gitHost, gitRepositoryPath, str(e))
        return False
示例#22
0
    def setUp(self):
        self.__workPath = os.path.join(HERE, "test-output")
        #
        self.__testLogFileMin = os.path.join(self.__workPath,
                                             "logfile-min.json")
        self.__testLogFileDetailed = os.path.join(self.__workPath,
                                                  "logfile-detailed.json")
        fU = FileUtil()
        fU.remove(self.__testLogFileMin)
        fU.remove(self.__testLogFileDetailed)

        self.__startTime = time.time()
        logger.debug("Starting %s at %s", self.id(),
                     time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
    def __init__(self, dirPath, useCache=True):
        """Resource provider for dictionary APIs.

        Args:
            dirPath (str): path to the directory containing cache files
            useCache (bool, optional): flag to use cached files. Defaults to True.

        """
        self.__apiMap = {}
        self.__dirPath = dirPath
        self.__useCache = useCache
        #
        self.__fileU = FileUtil(workPath=self.__dirPath)
        logger.debug("Leaving constructor")
 def clearCache(self):
     try:
         self.__nameL = []
         self.__mapD = {}
         self.__cacheFieldD = {}
         self.__nameLegacyL = []
         self.__mapLegacyD = {}
         self.__cacheFieldLegacyD = {}
         dirPath = os.path.join(self.__cachePath, self.__dirName)
         fU = FileUtil()
         return fU.remove(dirPath)
     except Exception:
         pass
     return False
示例#25
0
 def join(self, outputFilePath, splitDirPath):
     manifestPath = os.path.join(splitDirPath, "MANIFEST")
     with open(outputFilePath, "wb") as ofh:
         with open(manifestPath, "r") as mfh:
             line = mfh.readline()
             fp, priorHash = line[:-1].split("\t")
             for line in mfh:
                 fp = os.path.join(splitDirPath, line[:-1])
                 with open(fp, "rb") as ifh:
                     data = ifh.read()
                     ofh.write(data)
     fU = FileUtil()
     newHash = fU.hash(outputFilePath, hashType="md5")
     return newHash == priorHash
 def __fetchUrl(self, urlTarget, dirPath, useCache=False):
     fU = FileUtil()
     fn = fU.getFileName(urlTarget)
     filePath = os.path.join(dirPath, fn)
     if not (useCache and fU.exists(filePath)):
         startTime = time.time()
         ok2 = fU.get(urlTarget, filePath)
         endTime = time.time()
         if ok2:
             logger.info("Fetched %s for resource file %s (status = %r) (%.4f seconds)", urlTarget, filePath, ok2, endTime - startTime)
         else:
             logger.error("Failing fetch for %s for resource file %s (status = %r) (%.4f seconds)", urlTarget, filePath, ok2, endTime - startTime)
     #
     return filePath
示例#27
0
    def makeBundle(self, localParentPath, subDirList):
        """ Bundle the subdirectories of the input parent directory path.

        Args:
            localParentPath (str): local parent directory path containing the bundling targets
            subDirList (list, str): list of subdirectories of the parent path to be bundled

        Returns:
            (bool): True for success or False otherwise
        """
        fileU = FileUtil()
        dirPathList = [os.path.join(localParentPath, subDir) for subDir in subDirList]
        okT = fileU.bundleTarfile(self.__localStashTarFilePath, dirPathList, mode="w:gz", recursive=True)
        return okT
示例#28
0
 def __init__(self, cachePath, **kwargs):
     self.__cachePath = cachePath
     #
     self.__useCache = kwargs.get("useCache", True)
     self.__ccUrlTarget = kwargs.get("ccUrlTarget", None)
     self.__birdUrlTarget = kwargs.get("birdUrlTarget", None)
     self.__descriptorUrlTarget = kwargs.get(
         "descriptorUrlTarget",
         "http://www.crystallography.net/cod/smi/allcod.smi")
     self.__prefix = kwargs.get("prefix", None)
     self.__numProc = kwargs.get("numProc", 4)
     self.__chunkSize = kwargs.get("chunkSize", 50)
     self.__ccFileNamePrefix = "cc-%s" % self.__prefix if self.__prefix else "cc-full"
     self.__fU = FileUtil()
示例#29
0
    def get(self, remotePath, localPath):
        """Get a file from a remote FTP server.

        Arguments:
            remotePath (str): remote file path
            localPath (str): local file path

        Returns:
            bool: True for success or false otherwise
        """
        try:
            fileU = FileUtil()
            fileU.mkdirForFile(localPath)
            # If provided localPath already exists and is a directory, retrieve the file using the name on the remote server
            # to avoid unintentionally overwriting an entire local directory with a single retrieved file
            if (os.path.exists(localPath) and os.path.isdir(localPath)):
                remoteFileName = FileUtil().getFileName(remotePath)
                localFilePath = os.path.join(localPath, remoteFileName)
            else:
                localFilePath = localPath
            with open(localFilePath, 'wb') as lFP:
                self.__ftpClient.retrbinary('RETR %s' % remotePath, lFP.write)
            ok = fileU.exists(localFilePath)
            if ok:
                return True
            else:
                logger.error("get failing for remotePath %s localFilePath %s",
                             remotePath, localFilePath)
                return False
        except Exception as e:
            if self.__raiseExceptions:
                raise e
            else:
                logger.error(
                    "get failing for remotePath %s localPath %s with %s",
                    remotePath, localPath, str(e))
                return False
示例#30
0
 def __reloadGlycoproteins(self,
                           baseUrl,
                           fallbackUrl,
                           dirPath,
                           useCache=True):
     gD = {}
     logger.debug("Using dirPath %r", dirPath)
     self.__mU.mkdir(dirPath)
     #
     myDataPath = os.path.join(dirPath, "glygen-glycoprotein-list.json")
     if useCache and self.__mU.exists(myDataPath):
         gD = self.__mU.doImport(myDataPath, fmt="json")
         logger.debug("GlyGen glycoprotein data length %d", len(gD))
     else:
         for fn in [
                 "sarscov1_protein_masterlist.csv",
                 "sarscov2_protein_masterlist.csv",
                 "hcv1b_protein_masterlist.csv",
                 "hcv1a_protein_masterlist.csv",
                 "human_protein_masterlist.csv",
                 "mouse_protein_masterlist.csv",
                 "rat_protein_masterlist.csv",
         ]:
             logger.debug(
                 "Fetch GlyGen glycoprotein data from primary data source %s",
                 baseUrl)
             endPoint = os.path.join(baseUrl, fn)
             #
             logger.debug(
                 "Fetch GlyGen glycoprotein data from primary data source %s",
                 endPoint)
             rawPath = os.path.join(dirPath, fn)
             fU = FileUtil()
             ok = fU.get(endPoint, rawPath)
             logger.debug("Fetch GlyGen glycoprotein data status %r", ok)
             if not ok:
                 endPoint = os.path.join(fallbackUrl, fn)
                 ok = fU.get(endPoint, rawPath)
                 logger.info("Fetch fallback GlyGen data status %r", ok)
             #
             if ok:
                 tD = self.__parseGlycoproteinList(rawPath)
                 gD.update(tD)
         #
         ok = self.__mU.doExport(myDataPath, gD, fmt="json")
         logger.info("Exported GlyGen glycoprotein list (%d) (%r) %s",
                     len(gD), ok, myDataPath)
     #
     return gD