Пример #1
0
 def testReplaceList(self):
     """Test case -  create collection and insert document list - replace and upsert document list"""
     try:
         with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
             nDocs = 10
             mg = MongoDbUtil(client)
             ok = mg.createCollection(self.__dbName, self.__collectionName)
             self.assertTrue(ok)
             ok = mg.databaseExists(self.__dbName)
             self.assertTrue(ok)
             ok = mg.collectionExists(self.__dbName, self.__collectionName)
             self.assertTrue(ok)
             #
             dList = []
             for ii in range(nDocs):
                 dObj = self.__makeDataObj(2, 5, 5, ii)
                 dList.append(dObj)
             #
             keyName = "DOC_ID"
             rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True)
             self.assertEqual(len(rIdL), len(dList))
             #
             for ii, rId in enumerate(rIdL):
                 rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId)
                 # logger.debug("Return Object %s", pprint.pformat(rObj))
                 self.assertEqual(len(dList[ii]), len(rObj))
                 self.assertEqual(dList[ii], rObj)
             #
             #  Replace with 2x the list length - half are duplicates id's
             dList = []
             for ii in range(nDocs + nDocs):
                 dObj = self.__makeDataObj(4, 10, 10, ii)
                 dList.append(dObj)
             #
             updL = mg.replaceList(self.__dbName, self.__collectionName, dList, ["DOC_ID"], upsertFlag=True)
             #
             logger.info("Upserted id list length %d", len(updL))
             for ii in range(nDocs + nDocs):
                 kVal = "DOC_%d" % ii
                 rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal)
                 if not rObj:
                     logger.info("Failing to recover doc %s", kVal)
                 # logger.debug("Return Object %s", pprint.pformat(rObj))
                 rObj.pop("_id", None)
                 dList[ii].pop("_id", None)
                 self.assertEqual(len(dList[ii]), len(rObj))
                 self.assertEqual(dList[ii], rObj)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Пример #2
0
    def testReplaceSingle(self):
        """Test case -  create collection and insert document  and then replace document -"""
        try:
            with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                ok = mg.createCollection(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                dObj = self.__makeDataObj(2, 5, 5, 1)
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                self.assertTrue(rId is not None)
                # Note that dObj is mutated by additional key '_id' that is added on insert -
                #
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj), len(rObj))
                self.assertEqual(dObj, rObj)
                #
                # Now replace with a new document with the same document id
                dObj = self.__makeDataObj(3, 2, 2, 1)
                logger.debug("Replace Object %s", pprint.pformat(dObj))

                rId = mg.replace(self.__dbName, self.__collectionName, dObj, {"DOC_ID": "DOC_1"}, upsertFlag=True)
                # self.assertTrue(rId is not None)
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_1")
                rObj.pop("_id", None)
                dObj.pop("_id", None)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj), len(rObj))
                self.assertEqual(dObj, rObj)
                #
                # Now replace with a new document with a different key
                dObj2 = self.__makeDataObj(5, 5, 5, 2)
                logger.debug("Replace Object %s", pprint.pformat(dObj))
                #
                rId = mg.replace(self.__dbName, self.__collectionName, dObj2, {"DOC_ID": "DOC_2"}, upsertFlag=True)
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", "DOC_2")
                rObj.pop("_id", None)
                dObj2.pop("_id", None)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj2), len(rObj))
                self.assertEqual(dObj2, rObj)
                #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #3
0
    def testInsertSingle(self):
        """Test case -  create collection and insert data -

        """
        try:
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                ok = mg.createCollection(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                dObj = self.__makeDataObj(2, 5, 5)
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                self.assertTrue(rId is not None)
                # Note that dObj is mutated by additional key '_id' that is added on insert -
                #
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id",
                                   rId)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj), len(rObj))
                self.assertEqual(dObj, rObj)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #4
0
 def testInsertList(self):
     """Test case -  create collection and insert data -"""
     try:
         with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
             mg = MongoDbUtil(client)
             ok = mg.createCollection(self.__dbName, self.__collectionName)
             self.assertTrue(ok)
             ok = mg.databaseExists(self.__dbName)
             self.assertTrue(ok)
             ok = mg.collectionExists(self.__dbName, self.__collectionName)
             self.assertTrue(ok)
             #
             dList = []
             for ii in range(100):
                 dList.append(self.__makeDataObj(2, 5, 5, ii))
             #
             keyName = "DOC_ID"
             rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True)
             self.assertEqual(len(rIdL), len(dList))
             #
             # Note that dObj is mutated by additional key '_id' that is added on insert -
             #
             for ii, rId in enumerate(rIdL):
                 rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id", rId)
                 logger.debug("Return Object %s", pprint.pformat(rObj))
                 jj = int(rObj["DOC_ID"][4:])
                 self.assertEqual(len(dList[jj]), len(rObj))
                 self.assertEqual(dList[jj], rObj)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Пример #5
0
    def testSchemaValidation1(self):
        """Test case -  create collection and insert data with schema validation (ext. schema assignment)

        """

        #  Example of a Mongo flavor of JsonSchema
        vexpr = {"$jsonSchema": self.__mongoSchema}

        query = [("collMod", self.__collectionName), ("validator", vexpr),
                 ("validationLevel", "moderate")]
        query = OrderedDict(query)

        try:
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                if mg.databaseExists(self.__dbName):
                    ok = mg.dropDatabase(self.__dbName)
                    self.assertTrue(ok)
                #
                ok = mg.createDatabase(self.__dbName)
                self.assertTrue(ok)
                #
                ok = mg.createCollection(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                mg.databaseCommand(self.__dbName, query)
                dObj = {"x": 1}
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                logger.info("rId is %r", rId)
                self.assertEqual(rId, None)
                #
                s2 = unescapeXmlCharRef(
                    " " Φ Ψ α £  ℅  ☆  𝕫"
                )
                dObj = {
                    "strField1": "test value",
                    "strField2": s2,
                    "intField1": 50,
                    "enumField1": "v3",
                    "dblField1": 100.1
                }
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                logger.info("rId is %r", rId)
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id",
                                   rId)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj), len(rObj))
                self.assertEqual(dObj, rObj)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #6
0
    def __transform(self,
                    databaseName,
                    collectionName,
                    docSelectList,
                    logIncrement=100):
        """Return a list of object identifiers for the input selection query."""
        #
        ok = True
        try:
            self.__valInst = self.__getValidator(databaseName,
                                                 collectionName,
                                                 schemaLevel="full")
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                if mg.collectionExists(databaseName, collectionName):
                    numDoc = len(docSelectList)
                    for ii, dD in enumerate(docSelectList, 1):
                        if "_id" not in dD:
                            continue
                        rObj = mg.fetchOne(databaseName, collectionName, "_id",
                                           dD["_id"])
                        del rObj["_id"]
                        #
                        fOk = True

                        if self.__oAdapt:
                            self.__validateObj(databaseName,
                                               collectionName,
                                               rObj,
                                               label="Original")
                            fOk, rObj = self.__oAdapt.filter(rObj)
                            self.__validateObj(databaseName,
                                               collectionName,
                                               rObj,
                                               label="Updated")
                        if fOk:
                            rOk = mg.replace(databaseName, collectionName,
                                             rObj, dD)
                            if rOk is None:
                                tId = rObj[
                                    "rcsb_id"] if rObj and "rcsb_id" in rObj else "anonymous"
                                logger.error("%r %r (%r) failing",
                                             databaseName, collectionName, tId)
                                # logger.info("rObj.keys() %r", list(rObj.keys()))
                                # logger.info("rObj.items() %s", rObj.items())
                                rOk = False
                            ok = ok and rOk
                        #
                        if ii % logIncrement == 0 or ii == numDoc:
                            logger.info("Replace status %r object (%d of %d)",
                                        ok, ii, numDoc)
                        #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return ok
Пример #7
0
 def __selectObjects(self, **kwargs):
     """Return a dictionary of objects satisfying the input conditions (e.g. method, resolution limit)"""
     databaseName = kwargs.get("databaseName", "pdbx_core")
     collectionName = kwargs.get("collectionName", "pdbx_core_entry")
     selectionQueryD = kwargs.get("selectionQuery", {})
     #
     uniqueAttributes = kwargs.get("uniqueAttributes", ["rcsb_id"])
     #
     tV = kwargs.get("objectLimit", None)
     objLimit = int(tV) if tV is not None else None
     stripObjectId = kwargs.get("stripObjectId", False)
     logIncrement = kwargs.get("logIncrement", 10000)
     #
     objectD = {}
     try:
         with Connection(cfgOb=self.__cfgOb,
                         resourceName=self.__resourceName) as client:
             mg = MongoDbUtil(client)
             if mg.collectionExists(databaseName, collectionName):
                 logger.info("%s %s document count is %d", databaseName,
                             collectionName,
                             mg.count(databaseName, collectionName))
                 qD = {}
                 if selectionQueryD:
                     qD.update(selectionQueryD)
                 selectL = ["_id"]
                 dL = mg.fetch(databaseName,
                               collectionName,
                               selectL,
                               queryD=qD)
                 numDoc = len(dL) if dL else 0
                 logger.info("Selection %r fetch result count %d", selectL,
                             numDoc)
                 #
                 for ii, dD in enumerate(dL, 1):
                     if "_id" not in dD:
                         continue
                     rObj = mg.fetchOne(databaseName, collectionName, "_id",
                                        dD["_id"])
                     if stripObjectId and rObj and "_id" in rObj:
                         rObj.pop("_id")
                     else:
                         rObj["_id"] = str(rObj["_id"])
                     #
                     stKey = ".".join([rObj[ky] for ky in uniqueAttributes])
                     objectD[stKey] = copy.copy(rObj)
                     if objLimit and ii >= objLimit:
                         break
                     logger.debug("Saving %d %s", ii, stKey)
                     if ii % logIncrement == 0 or ii == numDoc:
                         logger.info("Extracting object (%d of %d)", ii,
                                     numDoc)
     except Exception as e:
         logger.exception("Failing with %s", str(e))
     return objectD
Пример #8
0
    def testSchemaValidation3(self):
        """Test case -  create collection and insert data with schema validation (warn mode) (integrated schema assignment)

        """
        try:
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                if mg.databaseExists(self.__dbName):
                    ok = mg.dropDatabase(self.__dbName)
                    self.assertTrue(ok)
                #
                ok = mg.createDatabase(self.__dbName)
                self.assertTrue(ok)
                #
                ok = mg.createCollection(self.__dbName,
                                         self.__collectionName,
                                         overWrite=True,
                                         bsonSchema=self.__mongoSchema,
                                         validationAction="warn")
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                dObj = {"x": 1}
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                logger.info("rId is %r", rId)
                self.assertNotEqual(rId, None)
                #
                s2 = unescapeXmlCharRef(
                    " " Φ Ψ α £  ℅  ☆  𝕫"
                )
                dObj = {
                    "strField1": "test value",
                    "strField2": s2,
                    "intField1": 50,
                    "enumField1": "v3a",
                    "dblField1": 100.1
                }
                rId = mg.insert(self.__dbName, self.__collectionName, dObj)
                self.assertNotEqual(rId, None)
                logger.info("rId is %r", rId)
                rObj = mg.fetchOne(self.__dbName, self.__collectionName, "_id",
                                   rId)
                logger.debug("Return Object %s", pprint.pformat(rObj))
                self.assertEqual(len(dObj), len(rObj))
                self.assertEqual(dObj, rObj)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #9
0
    def testSingleIndex(self):
        """Test case -  create collection, create simple single index, insert document list, read check documents"""
        try:
            with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
                nDocs = 100
                mg = MongoDbUtil(client)
                ok = mg.createCollection(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                # Create before insert
                ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True)
                self.assertTrue(ok)

                dList = []
                for ii in range(nDocs):
                    dObj = self.__makeDataObj(2, 5, 5, ii)
                    dList.append(dObj)
                #
                keyName = "DOC_ID"
                rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True)
                self.assertEqual(len(dList), len(rIdL))
                #
                for ii in range(nDocs):
                    kVal = "DOC_%d" % ii
                    rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal)
                    # logger.debug("Return Object %s" % pprint.pformat(rObj))
                    rObj.pop("_id", None)
                    dList[ii].pop("_id", None)
                    self.assertEqual(len(dList[ii]), len(rObj))
                    self.assertEqual(dList[ii], rObj)
                #
                ok = mg.dropIndex(self.__dbName, self.__collectionName, indexName="primary")
                self.assertTrue(ok)
                ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True)
                self.assertTrue(ok)
                ok = mg.reIndex(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #10
0
    def testSingleIndexSelect(self):
        """Test case -  create collection, create simple single index, insert document list, read check documents."""
        try:
            logger.debug("Starting testSingleIndexSelect")
            with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
                nDocs = 100
                mg = MongoDbUtil(client)
                ok = mg.createCollection(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                ok = mg.databaseExists(self.__dbName)
                self.assertTrue(ok)
                ok = mg.collectionExists(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
                # Create before insert
                ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True)
                self.assertTrue(ok)

                dList = []
                nRows = 5
                for ii in range(nDocs):
                    dObj = self.__makeDataObj(2, 5, nRows, ii)
                    dList.append(dObj)
                #
                keyName = "DOC_ID"
                rIdL = mg.insertList(self.__dbName, self.__collectionName, dList, keyNames=[keyName], salvage=True)
                self.assertEqual(len(dList), len(rIdL))
                #
                for ii in range(nDocs):
                    kVal = "DOC_%d" % ii
                    rObj = mg.fetchOne(self.__dbName, self.__collectionName, "DOC_ID", kVal)
                    # logger.debug("Return Object %s" % pprint.pformat(rObj))
                    rObj.pop("_id", None)
                    dList[ii].pop("_id", None)
                    self.assertEqual(len(dList[ii]), len(rObj))
                    self.assertEqual(dList[ii], rObj)
                #
                ok = mg.dropIndex(self.__dbName, self.__collectionName, indexName="primary")
                self.assertTrue(ok)
                ok = mg.createIndex(self.__dbName, self.__collectionName, keyList=["DOC_ID"], indexName="primary", indexType="DESCENDING", uniqueFlag=True)
                self.assertTrue(ok)
                ok = mg.reIndex(self.__dbName, self.__collectionName)
                self.assertTrue(ok)
                #
            with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                ii = mg.count(self.__dbName, self.__collectionName)
                logger.debug("collection length %d", ii)
                #
                dList = mg.fetch(self.__dbName, self.__collectionName, ["DOC_ID"])
                self.assertEqual(len(dList), nDocs)
                logger.debug("Fetch length %d", len(dList))
                for ii, dD in enumerate(dList):
                    logger.debug("Fetch num %d: %r", ii, dD)
                #
                dList = mg.fetch(self.__dbName, self.__collectionName, ["category_0.attribute_0"], queryD={"category_0.attribute_0": "val_0_0"})
                self.assertEqual(len(dList), nDocs)
                logger.debug("Fetch length %d", len(dList))
                for ii, dD in enumerate(dList):
                    logger.debug("Fetch num %d: %r", ii, dD)
                atName = "category_0.attribute_0"
                vL0 = mg.distinct(self.__dbName, self.__collectionName, atName)
                self.assertEqual(len(vL0), nRows + 2)
                logger.debug("vL0 %r", vL0)
                vL1 = mg.distinct(self.__dbName, self.__collectionName, "category_1.attribute_0")
                self.assertEqual(len(vL1), nRows + 2)
                for v in vL0:
                    num = mg.count(self.__dbName, self.__collectionName, countFilter={atName: v})
                    logger.debug("%s value %s (%d)", atName, v, num)
                    self.assertGreaterEqual(num, 100)
                #
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Пример #11
0
    def __loadDocuments(self,
                        dbName,
                        collectionName,
                        docList,
                        loadType="full",
                        readBackCheck=False,
                        keyNames=None):
        #
        # Load database/collection with input document list -
        #
        failList = []
        rIdL = []
        successList = []
        logger.debug(
            "Loading dbName %s collectionName %s with document count %d keynames %r",
            dbName, collectionName, len(docList), keyNames)
        if keyNames:
            # map the document list to some document key if this is provided
            indD = {}
            indL = []
            try:
                for ii, doc in enumerate(docList):
                    dIdTup = self.__getKeyValues(doc, keyNames)
                    indD[dIdTup] = ii
                indL = list(range(len(docList)))
            except Exception as e:
                logger.exception("Failing ii %d d %r with %s", ii, doc, str(e))
        try:
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                mg = MongoDbUtil(client)
                #
                if loadType == "replace" and keyNames:
                    dTupL = mg.deleteList(dbName, collectionName, docList,
                                          keyNames)
                    logger.debug("Deleted document status %r", (dTupL, ))
                #
                rIdL = mg.insertList(dbName,
                                     collectionName,
                                     docList,
                                     keyNames=keyNames)
                logger.debug("Insert returns rIdL length %r", len(rIdL))

                # ---
                #  If there is a failure then determine the specific successes and failures -
                #
                successList = docList
                failList = []
                if len(rIdL) != len(docList):
                    if keyNames:
                        successIndList = []
                        for rId in rIdL:
                            rObj = mg.fetchOne(dbName, collectionName, "_id",
                                               rId)
                            dIdTup = self.__getKeyValues(rObj, keyNames)
                            successIndList.append(indD[dIdTup])
                        failIndList = list(set(indL) - set(successIndList))
                        failList = [docList[ii] for ii in failIndList]
                        successList = [docList[ii] for ii in successIndList]
                    else:
                        # fail the whole batch if we don't have visibility into each document
                        failList = docList
                        successList = []
                #
                rbStatus = True
                if readBackCheck and keyNames:
                    #
                    # Note that objects in docList are mutated by the insert operation with the additional key '_id',
                    # hence, it is possible to compare the fetched object with the input object.
                    #
                    for ii, rId in enumerate(rIdL):
                        rObj = mg.fetchOne(dbName, collectionName, "_id", rId)
                        dIdTup = self.__getKeyValues(rObj, keyNames)
                        jj = indD[dIdTup]
                        if rObj != docList[jj]:
                            rbStatus = False
                            break
                #
                if readBackCheck and not rbStatus:
                    return False, successList, failList
                #
            return len(rIdL) == len(docList), successList, failList
        except Exception as e:
            logger.exception("Failing %r %r (len=%d) %s with %s", dbName,
                             collectionName, len(docList), keyNames, str(e))
        return False, [], docList