示例#1
0
 def testCreateStatusRecord(self):
     """ Verify time stamp operations.
     """
     try:
         dL = []
         desp = DataExchangeStatus()
         ok = desp.setObject("my_database", "my_collection")
         self.assertTrue(ok)
         tS = desp.setStartTime()
         self.assertGreaterEqual(len(tS), 15)
         time.sleep(1)
         ok = desp.setStatus(updateId=None, successFlag="Y")
         self.assertTrue(ok)
         tS = desp.setEndTime()
         self.assertGreaterEqual(len(tS), 15)
         dL.append(desp.getStatus())
         self.assertEqual(len(dL), 1)
         logger.debug("Status record %r", dL[0])
         #
         ok = desp.setObject("my_database", "my_other_collection")
         self.assertTrue(ok)
         tS = desp.setStartTime()
         self.assertGreaterEqual(len(tS), 15)
         time.sleep(1)
         ok = desp.setStatus(updateId="2018_40", successFlag="Y")
         self.assertTrue(ok)
         tS = desp.setEndTime()
         self.assertGreaterEqual(len(tS), 15)
         dL.append(desp.getStatus())
         self.assertEqual(len(dL), 2)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
示例#2
0
 def __updateStatus(self, updateId, databaseName, collectionName, status,
                    startTimestamp):
     try:
         sFlag = "Y" if status else "N"
         desp = DataExchangeStatus()
         desp.setStartTime(tS=startTimestamp)
         desp.setObject(databaseName, collectionName)
         desp.setStatus(updateId=updateId, successFlag=sFlag)
         desp.setEndTime()
         self.__statusList.append(desp.getStatus())
         return True
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return False
    def testLoadExchangeStatus(self):
        """ Test case - load data exchange status objects.

        [data_exchange]
        DATABASE_NAME=data_exchange
        DATABASE_VERSION_STRING=v5
        COLLECTION_UPDATE_STATUS=rcsb_data_exchange_status
        COLLECTION_VERSION_STRING=v0_1

        """
        try:
            for ii in range(1, 100):
                collectionName = "my_collection_" + str(ii)
                dList = []
                desp = DataExchangeStatus()
                tS = desp.setStartTime()
                self.assertGreaterEqual(len(tS), 15)
                ok = desp.setObject("my_database", collectionName)
                self.assertTrue(ok)
                ok = desp.setStatus(updateId=None, successFlag="Y")
                self.assertTrue(ok)
                #
                tS = desp.setEndTime()
                self.assertGreaterEqual(len(tS), 15)
                dList.append(desp.getStatus())
                #
                self.assertEqual(len(dList), 1)
                logger.debug("Status record %r", dList[0])

                sectionName = "data_exchange_configuration"
                dl = DocumentLoader(
                    self.__cfgOb,
                    self.__cachePath,
                    self.__resourceName,
                    numProc=self.__numProc,
                    chunkSize=self.__chunkSize,
                    documentLimit=self.__documentLimit,
                    verbose=self.__verbose,
                    readBackCheck=self.__readBackCheck,
                )
                #
                databaseName = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
                # collectionVersion = self.__cfgOb.get('COLLECTION_VERSION_STRING', sectionName=sectionName)
                collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS", sectionName=sectionName)
                if ii == 1:
                    loadType = "full"
                else:
                    loadType = "append"
                ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=dList, indexAttributeList=["update_id", "database_name", "object_name"], keyNames=None)
                self.assertTrue(ok)
                #

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
 def doUpdate(self, updateId, updateLimit=None):
     desp = DataExchangeStatus()
     statusStartTimestamp = desp.setStartTime()
     #
     numUpd = 0
     updateDL = self.__buildUpdate(self.__assignRefD)
     if updateDL:
         if updateLimit:
             numUpd = self.__doUpdate(self.__cfgOb, updateDL[:updateLimit],
                                      self.__databaseName,
                                      self.__collectionName)
         else:
             numUpd = self.__doUpdate(self.__cfgOb, updateDL,
                                      self.__databaseName,
                                      self.__collectionName)
     self.__updateStatus(updateId, self.__databaseName,
                         self.__collectionName, True, statusStartTimestamp)
     return len(updateDL), numUpd
示例#5
0
 def doTransform(self, **kwargs):
     desp = DataExchangeStatus()
     statusStartTimestamp = desp.setStartTime()
     #
     databaseName = kwargs.get("databaseName", "pdbx_core")
     collectionName = kwargs.get("collectionName", "pdbx_core_entry")
     selectionQueryD = kwargs.get("selectionQuery", {})
     fetchLimit = kwargs.get("fetchLimit", None)
     tU = TimeUtil()
     updateId = kwargs.get("updateId", tU.getCurrentWeekSignature())
     #
     docSelectList = self.__selectObjectIds(databaseName, collectionName,
                                            selectionQueryD)
     docSelectList = docSelectList[:fetchLimit] if fetchLimit else docSelectList
     ok = self.__transform(databaseName, collectionName, docSelectList)
     #
     if updateId:
         okS = self.__updateStatus(updateId, databaseName, collectionName,
                                   ok, statusStartTimestamp)
     return ok and okS
示例#6
0
    def load(self, updateId, extResource, loadType="full"):
        """Load chemical reference integrated data for the input external resource-"""
        try:
            self.__statusList = []
            desp = DataExchangeStatus()
            statusStartTimestamp = desp.setStartTime()
            #
            if extResource == "DrugBank":
                databaseName = "drugbank_core"
                configName = self.__cfgOb.getDefaultSectionName()
                user = self.__cfgOb.get("_DRUGBANK_AUTH_USERNAME",
                                        sectionName=configName)
                pw = self.__cfgOb.get("_DRUGBANK_AUTH_PASSWORD",
                                      sectionName=configName)
                #
                dbP = DrugBankProvider(cachePath=self.__cachePath,
                                       useCache=self.__useCache,
                                       username=user,
                                       password=pw)
                #
                crExt = ChemRefExtractor(self.__cfgOb)
                idD = crExt.getChemCompAccessionMapping(extResource)
                dList = dbP.getDocuments(mapD=idD)
                #
                logger.info("Resource %r extracted mapped document length %d",
                            extResource, len(dList))
                logger.debug("Objects %r", dList[:2])
                sD, _, collectionList, _ = self.__schP.getSchemaInfo(
                    databaseName)
                collectionName = collectionList[
                    0] if collectionList else "unassigned"
                indexL = sD.getDocumentIndex(collectionName, "primary")
                logger.info("Database %r collection %r index attributes %r",
                            databaseName, collectionName, indexL)
                #
                collectionVersion = sD.getCollectionVersion(collectionName)
                addValues = {"_schema_version": collectionVersion}
                #
                addValues = {}
            #
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=indexL,
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)

            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
示例#7
0
    def load(self, updateId, loadType="full"):
        """Load legacy repository holdings and status data -

        Relevant configuration options:

        [DEFAULT]
        RCSB_EXCHANGE_SANDBOX_PATH=MOCK_EXCHANGE_SANDBOX

        [repository_holdings_configuration]
        DATABASE_NAME=repository_holdings
        DATABASE_VERSION_STRING=v5
        COLLECTION_HOLDINGS_UPDATE=rcsb_repository_holdings_update_entry
        COLLECTION_HOLDINGS_CURRENT=rcsb_repository_holdings_current_entry
        COLLECTION_HOLDINGS_UNRELEASED=rcsb_repository_holdings_unreleased_entry
        COLLECTION_HOLDINGS_REMOVED=rcsb_repository_holdings_removed_entry
        COLLECTION_VERSION_STRING=v0_1

        """
        try:
            self.__statusList = []
            desp = DataExchangeStatus()
            statusStartTimestamp = desp.setStartTime()

            discoveryMode = self.__cfgOb.get("DISCOVERY_MODE",
                                             sectionName=self.__cfgSectionName,
                                             default="local")
            # ---
            baseUrlPDB = self.__cfgOb.getPath(
                "PDB_REPO_URL",
                sectionName=self.__cfgSectionName,
                default="https://ftp.wwpdb.org/pub")
            fallbackUrlPDB = self.__cfgOb.getPath(
                "PDB_REPO_FALLBACK_URL",
                sectionName=self.__cfgSectionName,
                default="https://ftp.wwpdb.org/pub")
            edMapUrl = self.__cfgOb.getPath("RCSB_EDMAP_LIST_PATH",
                                            sectionName=self.__cfgSectionName,
                                            default=None)
            #
            kwD = {
                "holdingsTargetUrl":
                os.path.join(baseUrlPDB, "pdb", "holdings"),
                "holdingsFallbackUrl":
                os.path.join(fallbackUrlPDB, "pdb", "holdings"),
                "edmapsLocator":
                edMapUrl,
                "updateTargetUrl":
                os.path.join(baseUrlPDB, "pdb", "data", "status", "latest"),
                "updateFallbackUrl":
                os.path.join(fallbackUrlPDB, "pdb", "data", "status",
                             "latest"),
                "filterType":
                self.__filterType,
            }
            # ---
            if discoveryMode == "local":
                rhdp = RepoHoldingsDataPrep(cfgOb=self.__cfgOb,
                                            sandboxPath=self.__sandboxPath,
                                            cachePath=self.__cachePath,
                                            filterType=self.__filterType)
            else:
                rhdp = RepoHoldingsRemoteDataPrep(cachePath=self.__cachePath,
                                                  **kwD)
            #
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            sectionName = "repository_holdings_configuration"
            databaseName = self.__cfgOb.get("DATABASE_NAME",
                                            sectionName=sectionName)
            # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
            # addValues = {"_schema_version": collectionVersion}
            addValues = None
            #
            dList = rhdp.getHoldingsUpdateEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UPDATE",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsCurrentEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_CURRENT",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)

            dList = rhdp.getHoldingsUnreleasedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UNRELEASED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsRemovedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_REMOVED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsCombinedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_COMBINED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
示例#8
0
    def load(self, updateId, loadType="full", doLoad=True):
        """Load tree node lists and status data -

        Relevant configuration options:

        tree_node_lists_configuration:
            DATABASE_NAME: tree_node_lists
            DATABASE_VERSION_STRING: v5
            COLLECTION_VERSION_STRING: 1.0.0
            COLLECTION_TAXONOMY: tree_taxonomy_node_list
            COLLECTION_ENZYME: tree_ec_node_list
            COLLECTION_SCOP: tree_scop_node_list
            COLLECTION_CATH: tree_cath_node_list

        """
        try:
            useCache = self.__useCache
            #
            # if not useCache:
            #    cDL = ["domains_struct", "NCBI", "ec", "go", "atc"]
            #    for cD in cDL:
            #        try:
            #            cfp = os.path.join(self.__cachePath, cD)
            #            os.makedirs(cfp, 0o755)
            #        except Exception:
            #            pass
            #        #
            #        try:
            #            cfp = os.path.join(self.__cachePath, cD)
            #            fpL = glob.glob(os.path.join(cfp, "*"))
            #            if fpL:
            #                for fp in fpL:
            #                    os.remove(fp)
            #        except Exception:
            #            pass
            #
            #
            logger.info("Starting with cache path %r (useCache=%r)",
                        self.__cachePath, useCache)
            #
            self.__statusList = []
            desp = DataExchangeStatus()
            statusStartTimestamp = desp.setStartTime()
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            databaseName = "tree_node_lists"
            # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
            # addValues = {"_schema_version": collectionVersion}
            addValues = None
            # --- GO
            goP = GeneOntologyProvider(goDirPath=os.path.join(
                self.__cachePath, "go"),
                                       useCache=useCache)
            ok = goP.testCache()
            anEx = AnnotationExtractor(self.__cfgOb)
            goIdL = anEx.getUniqueIdentifiers("GO")
            logger.info("Unique GO assignments %d", len(goIdL))
            nL = goP.exportTreeNodeList(goIdL)
            logger.info("GO tree node list length %d", len(nL))
            if doLoad:
                collectionName = "tree_go_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
                # ---- CATH
            ccu = CathClassificationProvider(cachePath=self.__cachePath,
                                             useCache=useCache)
            nL = ccu.getTreeNodeList()
            logger.info("Starting load SCOP node tree length %d", len(nL))
            if doLoad:
                collectionName = "tree_cath_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            # ---- SCOP
            scu = ScopClassificationProvider(cachePath=self.__cachePath,
                                             useCache=useCache)
            nL = scu.getTreeNodeList()
            logger.info("Starting load SCOP node tree length %d", len(nL))
            if doLoad:
                collectionName = "tree_scop_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            # --- SCOP2
            scu = Scop2ClassificationProvider(cachePath=self.__cachePath,
                                              useCache=useCache)
            nL = scu.getTreeNodeList()
            logger.info("Starting load SCOP2 node tree length %d", len(nL))
            if doLoad:
                collectionName = "tree_scop2_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            # ---- Ecod
            ecu = EcodClassificationProvider(cachePath=self.__cachePath,
                                             useCache=useCache)
            nL = ecu.getTreeNodeList()
            logger.info("Starting load ECOD node tree length %d", len(nL))
            if doLoad:
                collectionName = "tree_ecod_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            # ---- EC
            edbu = EnzymeDatabaseProvider(cachePath=self.__cachePath,
                                          useCache=useCache)
            nL = edbu.getTreeNodeList()
            logger.info("Starting load of EC node tree length %d", len(nL))
            if doLoad:
                collectionName = "tree_ec_node_list"
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            # ---- Taxonomy
            # Get the taxon coverage in the current data set -
            epe = TaxonomyExtractor(self.__cfgOb)
            tL = epe.getUniqueTaxons()
            logger.info("Taxon coverage length %d", len(tL))
            #
            tU = TaxonomyProvider(cachePath=self.__cachePath,
                                  useCache=useCache)
            fD = {1}
            for taxId in tL:
                fD.update({k: True for k in tU.getLineage(taxId)})
            logger.info("Taxon filter dictionary length %d", len(fD))
            # logger.info("fD %r" % sorted(fD))
            #
            nL = tU.exportNodeList(filterD=fD)
            self.__checkTaxonNodeList(nL)
            logger.info("Starting load of taxonomy node tree length %d",
                        len(nL))
            if doLoad:
                collectionName = "tree_taxonomy_node_list"
                logger.debug("Taxonomy nodes (%d) %r", len(nL), nL[:5])
                ok = dl.load(databaseName,
                             collectionName,
                             loadType=loadType,
                             documentList=nL,
                             indexAttributeList=["update_id"],
                             keyNames=None,
                             addValues=addValues,
                             schemaLevel=None)
                self.__updateStatus(updateId, databaseName, collectionName, ok,
                                    statusStartTimestamp)
            logger.info("Tree loading operations completed.")
            #
            # ---  ATC
            crEx = ChemRefExtractor(self.__cfgOb)
            atcFilterD = crEx.getChemCompAccessionMapping("ATC")
            logger.info("Length of ATC filter %d", len(atcFilterD))
            atcP = AtcProvider(cachePath=self.__cachePath, useCache=useCache)
            nL = atcP.getTreeNodeList(filterD=atcFilterD)
            collectionName = "tree_atc_node_list"
            logger.debug("ATC node list length %d %r", len(nL), nL[:5])
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=nL,
                         indexAttributeList=["update_id"],
                         keyNames=None,
                         addValues=addValues,
                         schemaLevel=None)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            # ---
            logger.info("Completed tree node list loading operations.\n")
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
 def etl(self, dataSetId, dataLocator=None, loadType="full"):
     """ Prepare and load sequence cluster data by entity and by cluster identifer.
     """
     try:
         self.__statusList = []
         desp = DataExchangeStatus()
         statusStartTimestamp = desp.setStartTime()
         #
         docBySequenceD, docByClusterD = self.__extract(
             dataSetId=dataSetId,
             dataLocator=dataLocator,
             levels=self.__identityLevels)
         #
         dl = DocumentLoader(
             self.__cfgOb,
             self.__cachePath,
             self.__resourceName,
             numProc=self.__numProc,
             chunkSize=self.__chunkSize,
             documentLimit=self.__documentLimit,
             verbose=self.__verbose,
             readBackCheck=self.__readBackCheck,
         )
         #
         databaseName = self.__databaseName
         # addValues = {"_schema_version": self.__collectionVersion}
         addValues = None
         #
         collectionName = self.__entityMemberCollection
         dList = docBySequenceD[self.__entitySchemaName]
         ok1 = dl.load(
             databaseName,
             collectionName,
             loadType=loadType,
             documentList=dList,
             indexAttributeList=self.__entityMemberCollectionIndexL,
             keyNames=None,
             addValues=addValues)
         self.__updateStatus(dataSetId, databaseName, collectionName, ok1,
                             statusStartTimestamp)
         #
         collectionName = self.__clusterMembersCollection
         dList = docByClusterD[self.__clusterSchemaName]
         ok2 = dl.load(
             databaseName,
             collectionName,
             loadType=loadType,
             documentList=dList,
             indexAttributeList=self.__clusterMembersCollectionIndexL,
             keyNames=None,
             addValues=addValues)
         self.__updateStatus(dataSetId, databaseName, collectionName, ok2,
                             statusStartTimestamp)
         #
         pD = self.__fetchProvenance()
         collectionName = self.__clusterProvenanceCollection
         ok3 = dl.load(databaseName,
                       collectionName,
                       loadType=loadType,
                       documentList=[pD],
                       indexAttributeList=None,
                       keyNames=None,
                       addValues=addValues)
         self.__updateStatus(dataSetId, databaseName, collectionName, ok3,
                             statusStartTimestamp)
         #
         return ok1 and ok2 and ok3
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return False
示例#10
0
    def load(self, updateId, loadType="full"):
        """Load legacy repository holdings and status data -

        Relevant configuration options:

        [DEFAULT]
        RCSB_EXCHANGE_SANDBOX_PATH=MOCK_EXCHANGE_SANDBOX

        [repository_holdings_configuration]
        DATABASE_NAME=repository_holdings
        DATABASE_VERSION_STRING=v5
        COLLECTION_HOLDINGS_UPDATE=rcsb_repository_holdings_update_entry
        COLLECTION_HOLDINGS_CURRENT=rcsb_repository_holdings_current_entry
        COLLECTION_HOLDINGS_UNRELEASED=rcsb_repository_holdings_unreleased_entry
        COLLECTION_HOLDINGS_REMOVED=rcsb_repository_holdings_removed_entry
        COLLECTION_VERSION_STRING=v0_1

        """
        try:
            self.__statusList = []
            desp = DataExchangeStatus()
            statusStartTimestamp = desp.setStartTime()

            sectionName = "repository_holdings_configuration"

            rhdp = RepoHoldingsDataPrep(cfgOb=self.__cfgOb,
                                        sandboxPath=self.__sandboxPath,
                                        cachePath=self.__cachePath,
                                        filterType=self.__filterType)
            #
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            databaseName = self.__cfgOb.get("DATABASE_NAME",
                                            sectionName=sectionName)
            # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
            # addValues = {"_schema_version": collectionVersion}
            addValues = None
            #
            dList = rhdp.getHoldingsUpdateEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UPDATE",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsCurrentEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_CURRENT",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)

            dList = rhdp.getHoldingsUnreleasedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_UNRELEASED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsRemovedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_REMOVED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            dList = rhdp.getHoldingsCombinedEntry(updateId=updateId)
            collectionName = self.__cfgOb.get("COLLECTION_HOLDINGS_COMBINED",
                                              sectionName=sectionName)
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=["update_id", "entry_id"],
                         keyNames=None,
                         addValues=addValues)
            self.__updateStatus(updateId, databaseName, collectionName, ok,
                                statusStartTimestamp)
            #
            return True
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False
示例#11
0
    def load(self, updateId, extResource, loadType="full"):
        """Load sequence reference data"""
        try:
            self.__statusList = []
            desp = DataExchangeStatus()
            statusStartTimestamp = desp.setStartTime()
            #
            dList = indexL = []
            databaseName = collectionName = collectionVersion = None
            #
            if extResource == "UniProt":
                databaseName = "uniprot_core"
                # configName = self.__cfgOb.getDefaultSectionName()
                # dirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", self.__cfgOb.getDefaultSectionName()))
                #
                ok, rsP = self.__getReferenceSequenceProvider()
                if not ok:
                    return False
                #
                dList = rsP.getDocuments()
                logger.info("Resource %r extracted mapped document length %d",
                            extResource, len(dList))
                logger.debug("Objects %r", dList[:2])
                #
                cDL = self.__docHelper.getCollectionInfo(databaseName)
                collectionName = cDL[0]["NAME"]
                collectionVersion = cDL[0]["VERSION"]
                indexL = self.__docHelper.getDocumentIndexAttributes(
                    collectionName, "primary")
                logger.info(
                    "Database %r collection %r version %r index attributes %r",
                    databaseName, collectionName, collectionVersion, indexL)
                addValues = {}
            else:
                logger.error("Unsupported external resource %r", extResource)
            #
            if self.__doValidate:
                self.__valInst = self.__getValidator(databaseName,
                                                     collectionName,
                                                     schemaLevel="full")
                for dObj in dList:
                    self.__validateObj(databaseName,
                                       collectionName,
                                       dObj,
                                       label="Original")
            #
            dl = DocumentLoader(
                self.__cfgOb,
                self.__cachePath,
                self.__resourceName,
                numProc=self.__numProc,
                chunkSize=self.__chunkSize,
                documentLimit=self.__documentLimit,
                verbose=self.__verbose,
                readBackCheck=self.__readBackCheck,
            )
            #
            ok = dl.load(databaseName,
                         collectionName,
                         loadType=loadType,
                         documentList=dList,
                         indexAttributeList=indexL,
                         keyNames=None,
                         addValues=addValues)
            okS = self.__updateStatus(updateId, databaseName, collectionName,
                                      ok, statusStartTimestamp)

            return ok and okS
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return False