Exemplo n.º 1
0
 def testCompareCollectionSchema(self):
     try:
         difPathList = []
         for databaseName in self.__databaseNameList:
             dD = self.__schP.makeSchemaDef(databaseName,
                                            dataTyping="ANY",
                                            saveSchema=False)
             sD = SchemaDefAccess(dD)
             for cd in sD.getCollectionInfo():
                 collectionName = cd["NAME"]
                 for encodingType in self.__encodingTypes:
                     if encodingType.lower() != "json":
                         continue
                     for level in self.__validationLevels:
                         pth = self.__schP.jsonSchemaCompare(
                             databaseName, collectionName, encodingType,
                             level)
                         if pth:
                             difPathList.append(pth)
         if difPathList:
             logger.info("JSON schema difference path list %r",
                         [os.path.split(pth)[1] for pth in difPathList])
     except Exception as e:
         logger.exception("Failing with %s", str(e))
         self.fail()
Exemplo n.º 2
0
 def testBuildCollectionSchema(self):
     schemaDifPathList = []
     for databaseName in self.__databaseNameList:
         dD = self.__schP.makeSchemaDef(databaseName,
                                        dataTyping="ANY",
                                        saveSchema=False)
         sD = SchemaDefAccess(dD)
         for cd in sD.getCollectionInfo():
             collectionName = cd["NAME"]
             for encodingType in self.__encodingTypes:
                 if encodingType.lower() == "rcsb":
                     continue
                 for level in self.__validationLevels:
                     self.__schP.makeSchema(databaseName,
                                            collectionName,
                                            encodingType=encodingType,
                                            level=level,
                                            saveSchema=self.__saveSchema)
                     if self.__compareSchema and encodingType.lower(
                     ) == "json":
                         pth = self.__schP.jsonSchemaCompare(
                             databaseName, collectionName, encodingType,
                             level)
                         if pth:
                             schemaDifPathList.append(pth)
     if schemaDifPathList:
         logger.info("Path dif list %r", schemaDifPathList)
Exemplo n.º 3
0
    def getSchemaInfo(self, databaseName, dataTyping="ANY"):
        """Convenience method to return essential schema details for the input repository content type.

        Args:
            databaseName (str): schema name  (e.g. pdbx, bird, chem_comp, ...)
            dataTyping (str, optional): Application name for the target schema (e.g. ANY, SQL, ...)

        Returns:
            tuple: SchemaDefAccess(object), target database name, target collection name list, primary index attribute list


        """
        sd = None
        dbName = None
        collectionNameList = []
        docIndexD = {}
        try:
            mU = MarshalUtil(workPath=self.__workPath)
            schemaLocator = self.__getSchemaDefLocator(databaseName,
                                                       dataTyping=dataTyping)
            if self.__rebuildFlag:
                filePath = os.path.join(
                    self.__schemaCachePath,
                    self.__fileU.getFileName(schemaLocator))
                self.makeSchemaDef(databaseName,
                                   dataTyping=dataTyping,
                                   saveSchema=True)
            else:
                filePath = self.__reload(schemaLocator,
                                         self.__schemaCachePath,
                                         useCache=self.__useCache)

            if not filePath:
                logger.error("Unable to recover schema %s (%s)", databaseName,
                             dataTyping)
            logger.debug("ContentType %r dataTyping %r schemaLocator %r",
                         databaseName, dataTyping, schemaLocator)
            schemaDef = mU.doImport(filePath, fmt="json")
            if schemaDef:
                logger.debug(
                    "Using cached schema definition for %s application %s",
                    databaseName, dataTyping)
                sd = SchemaDefAccess(schemaDef)
                if sd:
                    dbName = sd.getDatabaseName()
                    collectionInfoList = sd.getCollectionInfo()
                    logger.debug("Schema %s database name %s collections %r",
                                 databaseName, dbName, collectionInfoList)
                    for cd in collectionInfoList:
                        collectionName = cd["NAME"]
                        collectionNameList.append(collectionName)
                        docIndexD[collectionName] = sd.getDocumentIndices(
                            collectionName)

        except Exception as e:
            logger.exception("Retreiving schema %s for %s failing with %s",
                             databaseName, dataTyping, str(e))

        return sd, dbName, collectionNameList, docIndexD
Exemplo n.º 4
0
    def testLoadPdbxFiles(self):
        try:
            cD = self.__schP.makeSchemaDef("pdbx",
                                           dataTyping="SQL",
                                           saveSchema=True)
            sd = SchemaDefAccess(cD)
            self.__schemaCreate(sd)

            inputPathList = self.__rpP.getLocatorObjList(contentType="pdbx")
            logger.debug("Input path list %r", inputPathList)
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                sdl = SchemaDefLoader(
                    self.__cfgOb,
                    schemaDefObj=sd,
                    dbCon=client,
                    cachePath=self.__cachePath,
                    workPath=self.__workPath,
                    cleanUp=False,
                    warnings="error",
                    verbose=self.__verbose,
                    restoreUseStash=False,
                    restoreUseGit=True,
                    providerTypeExclude=self.__excludeType,
                )
                ok = sdl.load(inputPathList=inputPathList,
                              loadType="batch-insert",
                              deleteOpt="all")
                self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 5
0
    def testLoadBirdReference(self):
        try:
            cD = self.__schP.makeSchemaDef("bird",
                                           dataTyping="SQL",
                                           saveSchema=True)
            sd = SchemaDefAccess(cD)
            self.__schemaCreate(sd)

            inputPathList = self.__rpP.getLocatorObjList(contentType="bird")
            inputPathList.extend(
                self.__rpP.getLocatorObjList(contentType="bird_family"))
            #
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                sdl = SchemaDefLoader(
                    self.__cfgOb,
                    schemaDefObj=sd,
                    dbCon=client,
                    cachePath=self.__cachePath,
                    workPath=self.__workPath,
                    cleanUp=False,
                    warnings="error",
                    verbose=self.__verbose,
                    restoreUseStash=False,
                    restoreUseGit=True,
                    providerTypeExclude=self.__excludeType,
                )
                ok = sdl.load(inputPathList=inputPathList,
                              loadType="batch-file")
                self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
    def testLoadChemCompReference(self):
        try:
            cD = self.__schP.makeSchemaDef("chem_comp",
                                           dataTyping="SQL",
                                           saveSchema=True)
            sd = SchemaDefAccess(cD)
            self.__schemaCreate(sd)

            inputPathList = self.__rpP.getLocatorObjList(
                contentType="chem_comp")
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                sdl = SchemaDefLoader(self.__cfgOb,
                                      schemaDefObj=sd,
                                      dbCon=client,
                                      cachePath=self.__cachePath,
                                      workPath=self.__workPath,
                                      cleanUp=False,
                                      warnings="error",
                                      verbose=self.__verbose)
                ok = sdl.load(inputPathList=inputPathList,
                              loadType="batch-file")
                self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 7
0
 def testSchemaCreate(self):
     """Create table schema for BIRD, chemical component, and PDBx data."""
     cD = self.__schP.makeSchemaDef("bird",
                                    dataTyping="SQL",
                                    saveSchema=True)
     sd = SchemaDefAccess(cD)
     self.__schemaCreate(sd)
     #
     cD = self.__schP.makeSchemaDef("chem_comp",
                                    dataTyping="SQL",
                                    saveSchema=True)
     sd = SchemaDefAccess(cD)
     self.__schemaCreate(sd)
     #
     # cD = self.__schP.makeSchemaDef("pdbx", dataTyping="SQL", saveSchema=True)
     # sd = SchemaDefAccess(cD)
     self.__schemaCreate(sd)
Exemplo n.º 8
0
 def testBuildColSchemaWithRefs(self):
     for databaseName in ["ihm_dev_full"]:
         dD = self.__schP.makeSchemaDef(databaseName,
                                        dataTyping="ANY",
                                        saveSchema=False)
         sD = SchemaDefAccess(dD)
         for cd in sD.getCollectionInfo():
             collectionName = cd["NAME"]
             for schemaType in self.__encodingTypes:
                 if schemaType.lower() == "rcsb":
                     continue
                 for level in self.__validationLevels:
                     self.__schP.makeSchema(
                         databaseName,
                         collectionName,
                         encodingType=schemaType,
                         level=level,
                         saveSchema=True,
                         extraOpts="addParentRefs|addPrimaryKey")
Exemplo n.º 9
0
    def __simpleSchemaDataPrep(self, contentType, filterType, styleType, mockLength, rejectLength=0, dataSelectors=None, mergeContentTypes=None):
        """Internal method for preparing file-based data NOT requiring dynamic methods, slicing, or key injection.

        Args:
            contentType (str): Content type name
            filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...)
            styleType (str): organization of output document (e.g. rowise-by-name)
            mockLength (int): Expected length of the test data for the input content type
            rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0.
            dataSelectors (list of str, optional): data selection criteria. Defaults to None.
            mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt'])
        """
        try:
            dataSelectors = dataSelectors if dataSelectors else ["PUBLIC_RELEASE"]
            dD = self.__schP.makeSchemaDef(contentType, dataTyping="ANY", saveSchema=True)
            _ = SchemaDefAccess(dD)
            inputPathList = self.__rpP.getLocatorObjList(contentType=contentType, mergeContentTypes=mergeContentTypes)
            sd, _, _, _ = self.__schP.getSchemaInfo(databaseName=contentType, dataTyping="ANY")
            dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=filterType)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose)
            #

            logger.debug("For %s mock length %d length of path list %d\n", contentType, mockLength, len(inputPathList))
            self.assertEqual(len(inputPathList), mockLength)
            tableDataDictList, containerNameList, rejectList = sdp.fetchDocuments(inputPathList, styleType=styleType, filterType=filterType, dataSelectors=dataSelectors)
            logger.debug("For %s mock length %d reject length %d length of tddl list %d\n", contentType, mockLength, rejectLength, len(tableDataDictList))
            self.assertEqual(len(tableDataDictList), mockLength - rejectLength)
            self.assertEqual(len(containerNameList), mockLength - rejectLength)

            if rejectList:
                logger.debug("For %s rejecting components %r", contentType, rejectList)
            #
            self.assertEqual(len(rejectList), rejectLength)
            fName = "simple-prep-%s-%s.json" % (contentType, styleType)
            if self.__exportFlag:
                fPath = os.path.join(self.__outputPath, fName)
                self.__mU.doExport(fPath, tableDataDictList, fmt="json", indent=3)
            if self.__diffFlag:
                fPath = os.path.join(self.__savedOutputPath, fName)
                refDocList = self.__mU.doImport(fPath, fmt="json")
                self.assertEqual(len(refDocList), len(tableDataDictList))
                #
                jD = diff(refDocList, tableDataDictList, syntax="explicit", marshal=True)
                if jD:
                    _, fn = os.path.split(fPath)
                    bn, _ = os.path.splitext(fn)
                    fPath = os.path.join(self.__outputPath, bn + "-diff.json")
                    logger.debug("jsondiff for %s %s = \n%s", contentType, styleType, pprint.pformat(jD, indent=3, width=100))
                    self.__mU.doExport(fPath, jD, fmt="json", indent=3)
                self.assertEqual(len(jD), 0)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 10
0
 def testSQLMethods(self):
     schemaNames = ["pdbx_core"]
     dataTyping = "SQL"
     for schemaName in schemaNames:
         dD = self.__sdu.makeSchemaDef(schemaName,
                                       dataTyping=dataTyping,
                                       saveSchema=False)
         sD = SchemaDefAccess(dD)
         self.__testSchemaCreate(sD)
         self.__testImportExport(sD)
         self.__testSelectionAndConditions(sD)
Exemplo n.º 11
0
    def testCompareSchemaCategories(self):
        """Compare common categories across schema definitions."""
        try:
            sdCc = SchemaDefAccess(
                self.__schP.makeSchemaDef("chem_comp_core",
                                          dataTyping="ANY",
                                          saveSchema=False))
            sdBcc = SchemaDefAccess(
                self.__schP.makeSchemaDef("bird_chem_comp_core",
                                          dataTyping="ANY",
                                          saveSchema=False))
            #
            logger.info("")
            for schemaId in ["CHEM_COMP", "PDBX_CHEM_COMP_AUDIT"]:
                atCcL = sdCc.getAttributeIdList(schemaId)
                atBCcL = sdBcc.getAttributeIdList(schemaId)

                logger.debug("%s attributes (%d) %r", schemaId, len(atCcL),
                             atCcL)
                logger.debug("%s attributes (%d) %r", schemaId, len(atBCcL),
                             atBCcL)

                sDif = set(atCcL) - set(atBCcL)
                if sDif:
                    logger.info("For %s attribute differences %r", schemaId,
                                sDif)
                self.assertEqual(len(sDif), 0)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 12
0
    def testReLoadBirdReference(self):
        try:
            cD = self.__schP.makeSchemaDef("bird",
                                           dataTyping="SQL",
                                           saveSchema=True)
            sd = SchemaDefAccess(cD)
            self.__schemaCreate(sd)

            inputPathList = self.__rpP.getLocatorObjList(contentType="bird")
            inputPathList.extend(
                self.__rpP.getLocatorObjList(contentType="bird_family"))
            #
            with Connection(cfgOb=self.__cfgOb,
                            resourceName=self.__resourceName) as client:
                sdl = SchemaDefLoader(self.__cfgOb,
                                      schemaDefObj=sd,
                                      dbCon=client,
                                      cachePath=self.__cachePath,
                                      workPath=self.__workPath,
                                      cleanUp=False,
                                      warnings="error",
                                      verbose=self.__verbose)
                sdl.load(inputPathList=inputPathList, loadType="batch-file")
                #
                logger.debug(
                    "INFO BATCH FILE RELOAD TEST --------------------------------------------\n"
                )
                ok = sdl.load(inputPathList=inputPathList,
                              loadType="batch-file",
                              deleteOpt="all")
                self.assertTrue(ok)
                #
                logger.debug(
                    "\n\n\n+INFO BATCH INSERT RELOAD TEST --------------------------------------------\n"
                )
                ok = sdl.load(inputPathList=inputPathList,
                              loadType="batch-file",
                              deleteOpt="selected")
                self.assertTrue(ok)
        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()
Exemplo n.º 13
0
    def __testAccessors(self, schemaDef):
        """Verify data and accessor mapping -"""

        sd = SchemaDefAccess(schemaDef)
        logger.debug("Schema name %s", sd.getName())
        logger.debug("Schema name %s", sd.getAppName())

        logger.debug("Database name %s", sd.getDatabaseName())
        logger.debug("Versioned database name %s",
                     sd.getVersionedDatabaseName())

        logger.debug("Collection info %r", sd.getCollectionInfo())

        for dS in sd.getDataSelectorNames():
            logger.debug("Selector %s %r", dS, sd.getDataSelectors(dS))

        collectionInfoL = sd.getCollectionInfo()
        for dD in collectionInfoL:
            collectionName = dD["NAME"]

            logger.debug("Collection excluded %r",
                         sd.getCollectionExcluded(collectionName))
            logger.debug("Collection included %r",
                         sd.getCollectionSelected(collectionName))
            logger.debug("Collection document key attribute names %r",
                         sd.getDocumentKeyAttributeNames(collectionName))

        schemaIdList = sd.getSchemaIdList()
        for schemaId in schemaIdList:
            #
            aIdL = sd.getAttributeIdList(schemaId)
            tObj = sd.getSchemaObject(schemaId)
            attributeIdList = tObj.getAttributeIdList()
            self.assertEqual(len(aIdL), len(attributeIdList))
            attributeNameList = tObj.getAttributeNameList()
            logger.debug("Ordered attribute Id   list %s",
                         str(attributeIdList))
            logger.debug("Ordered attribute name list %s",
                         str(attributeNameList))
            #
            mAL = tObj.getMapAttributeNameList()
            logger.debug("Ordered mapped attribute name list %s", str(mAL))

            mAL = tObj.getMapAttributeIdList()
            logger.debug("Ordered mapped attribute id   list %s", str(mAL))

            cL = tObj.getMapInstanceCategoryList()
            logger.debug("Mapped category list %s", str(cL))

            for cV in cL:
                aL = tObj.getMapInstanceAttributeList(cV)
                logger.debug("Mapped attribute list in %s :  %s", cV, str(aL))
        return True
Exemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser()
    #
    defaultConfigName = "site_info_configuration"
    #
    parser.add_argument(
        "--update_chem_comp_ref",
        default=False,
        action="store_true",
        help="Update schema for Chemical Component reference definitions")
    parser.add_argument(
        "--update_chem_comp_core_ref",
        default=False,
        action="store_true",
        help="Update core schema for Chemical Component reference definitions")
    parser.add_argument(
        "--update_bird_chem_comp_ref",
        default=False,
        action="store_true",
        help="Update schema for Bird Chemical Component reference definitions")
    parser.add_argument(
        "--update_bird_chem_comp_core_ref",
        default=False,
        action="store_true",
        help=
        "Update core schema for Bird Chemical Component reference definitions")

    parser.add_argument("--update_bird_ref",
                        default=False,
                        action="store_true",
                        help="Update schema for Bird reference definitions")
    parser.add_argument(
        "--update_bird_family_ref",
        default=False,
        action="store_true",
        help="Update schema for Bird Family reference definitions")

    parser.add_argument("--update_pdbx",
                        default=False,
                        action="store_true",
                        help="Update schema for PDBx entry data")
    parser.add_argument("--update_pdbx_core",
                        default=False,
                        action="store_true",
                        help="Update schema for PDBx core entry/entity data")
    parser.add_argument(
        "--update_pdbx_comp_model_core",
        default=False,
        action="store_true",
        help="Update schema for PDBx computational model core entry/entity data"
    )
    #
    parser.add_argument("--update_repository_holdings",
                        default=False,
                        action="store_true",
                        help="Update schema for repository holdings")
    parser.add_argument("--update_entity_sequence_clusters",
                        default=False,
                        action="store_true",
                        help="Update schema for entity sequence clusters")
    parser.add_argument("--update_data_exchange",
                        default=False,
                        action="store_true",
                        help="Update schema for data exchange status")
    parser.add_argument("--update_ihm_dev",
                        default=False,
                        action="store_true",
                        help="Update schema for I/HM dev entry data")
    parser.add_argument("--update_drugbank_core",
                        default=False,
                        action="store_true",
                        help="Update DrugBank schema")
    #
    parser.add_argument(
        "--update_config_all",
        default=False,
        action="store_true",
        help="Update using configuration settings (e.g. DATABASE_NAMES_ALL)")
    parser.add_argument(
        "--update_config_deployed",
        default=False,
        action="store_true",
        help=
        "Update using configuration settings (e.g. DATABASE_NAMES_DEPLOYED)")
    parser.add_argument(
        "--update_config_test",
        default=False,
        action="store_true",
        help="Update using configuration settings (e.g. DATABASE_NAMES_TEST)")
    #
    parser.add_argument("--config_path",
                        default=None,
                        help="Path to configuration options file")
    parser.add_argument("--config_name",
                        default=defaultConfigName,
                        help="Configuration section name")
    #
    parser.add_argument("--cache_path",
                        default=None,
                        help="Schema cache directory path")
    parser.add_argument(
        "--encoding_types",
        default=None,
        help="Schema encoding (rcsb|json|bson) (comma separated)")
    parser.add_argument(
        "--validation_levels",
        default=None,
        help="Schema validation level (full|min) (comma separated)")
    parser.add_argument("--compare_only",
                        default=False,
                        action="store_true",
                        help="Perform comparison with cached schema")
    #
    parser.add_argument("--debug",
                        default=False,
                        action="store_true",
                        help="Turn on verbose logging")
    parser.add_argument(
        "--mock",
        default=False,
        action="store_true",
        help="Use MOCK repository configuration for dependencies and testing")
    # parser.add_argument("--working_path", default=None, help="Working/alternative path for temporary and schema files")
    args = parser.parse_args()
    #
    debugFlag = args.debug
    if debugFlag:
        logger.setLevel(logging.DEBUG)
    # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- -
    #                                       Configuration Details
    configPath = args.config_path
    configName = args.config_name
    cachePath = args.cache_path
    compareOnly = args.compare_only
    #
    encodingTypes = args.encoding_types.split(
        ",") if args.encoding_types else []
    validationLevels = args.validation_levels.split(
        ",") if args.validation_levels else []
    dataTypingList = ["ANY", "SQL"]

    if not configPath:
        configPath = os.getenv("DBLOAD_CONFIG_PATH", None)
    try:
        if os.access(configPath, os.R_OK):
            os.environ["DBLOAD_CONFIG_PATH"] = configPath
            logger.info("Using configuation path %s (%s)", configPath,
                        configName)
        else:
            logger.error("Missing or access issue with config file %r",
                         configPath)
            exit(1)
        mockTopPath = os.path.join(TOPDIR, "rcsb",
                                   "mock-data") if args.mock else None
        cfgOb = ConfigUtil(configPath=configPath,
                           defaultSectionName=defaultConfigName,
                           mockTopPath=mockTopPath)
        if configName != defaultConfigName:
            cfgOb.replaceSectionName(defaultConfigName, configName)
    except Exception as e:
        logger.error("Missing or access issue with config file %r with %s",
                     configPath, str(e))
        exit(1)
    #
    databaseNameList = []
    if args.update_chem_comp_ref:
        databaseNameList.append("chem_comp")

    if args.update_bird_chem_comp_ref:
        databaseNameList.append("bird_chem_comp")

    if args.update_chem_comp_core_ref:
        databaseNameList.append("chem_comp_core")

    if args.update_bird_chem_comp_core_ref:
        databaseNameList.append("bird_chem_comp_core")

    if args.update_bird_ref:
        databaseNameList.append("bird")

    if args.update_bird_family_ref:
        databaseNameList.append("bird_family")

    if args.update_pdbx:
        databaseNameList.append("pdbx")

    if args.update_pdbx_core:
        databaseNameList.append("pdbx_core")

    if args.update_pdbx_comp_model_core:
        databaseNameList.append("pdbx_comp_model_core")

    if args.update_repository_holdings:
        databaseNameList.append("repository_holdings")

    if args.update_entity_sequence_clusters:
        databaseNameList.append("sequence_clusters")

    if args.update_data_exchange:
        databaseNameList.append("data_exchange")

    if args.update_ihm_dev:
        databaseNameList.append("ihm_dev")

    if args.update_drugbank_core:
        databaseNameList.append("drugbank_core")

    if args.update_config_deployed:
        databaseNameList = cfgOb.getList(
            "DATABASE_NAMES_DEPLOYED",
            sectionName="database_catalog_configuration")
        dataTypingList = cfgOb.getList(
            "DATATYPING_DEPLOYED",
            sectionName="database_catalog_configuration")
        validationLevels = cfgOb.getList(
            "VALIDATION_LEVELS_DEPLOYED",
            sectionName="database_catalog_configuration")
        encodingTypes = cfgOb.getList(
            "ENCODING_TYPES_DEPLOYED",
            sectionName="database_catalog_configuration")

    if args.update_config_all:
        databaseNameList = cfgOb.getList(
            "DATABASE_NAMES_ALL", sectionName="database_catalog_configuration")
        dataTypingList = cfgOb.getList(
            "DATATYPING_ALL", sectionName="database_catalog_configuration")
        validationLevels = cfgOb.getList(
            "VALIDATION_LEVELS_ALL",
            sectionName="database_catalog_configuration")
        encodingTypes = cfgOb.getList(
            "ENCODING_TYPES_ALL", sectionName="database_catalog_configuration")

    if args.update_config_test:
        databaseNameList = cfgOb.getList(
            "DATABASE_NAMES_TEST",
            sectionName="database_catalog_configuration")
        dataTypingList = cfgOb.getList(
            "DATATYPING_TEST", sectionName="database_catalog_configuration")
        validationLevels = cfgOb.getList(
            "VALIDATION_LEVELS_TEST",
            sectionName="database_catalog_configuration")
        encodingTypes = cfgOb.getList(
            "ENCODING_TYPES_TEST",
            sectionName="database_catalog_configuration")
    #
    scnD = cfgOb.get("document_collection_names",
                     sectionName="document_helper_configuration")
    #
    databaseNameList = list(set(databaseNameList))
    logger.debug("Collections %s", list(scnD.items()))
    logger.debug("databaseNameList %s", databaseNameList)

    if compareOnly:
        schP = SchemaProvider(cfgOb, cachePath, useCache=True)
        difPathList = []
        for databaseName in databaseNameList:
            for dataTyping in dataTypingList:
                logger.debug("Building schema %s with types %s", databaseName,
                             dataTyping)
                pth = schP.schemaDefCompare(databaseName, dataTyping)
                if pth:
                    difPathList.append(pth)
        if difPathList:
            logger.info("Schema definition difference path list %r",
                        difPathList)
        difPathList = []
        for databaseName in databaseNameList:
            dD = schP.makeSchemaDef(databaseName,
                                    dataTyping="ANY",
                                    saveSchema=False)
            sD = SchemaDefAccess(dD)
            for cd in sD.getCollectionInfo():
                collectionName = cd["NAME"]
                for encodingType in encodingTypes:
                    if encodingType.lower() != "json":
                        continue
                    for level in validationLevels:
                        pth = schP.jsonSchemaCompare(databaseName,
                                                     collectionName,
                                                     encodingType, level)
                        if pth:
                            difPathList.append(pth)
        if difPathList:
            logger.info("JSON schema difference path list %r", difPathList)

    else:
        schP = SchemaProvider(cfgOb, cachePath, useCache=False)
        for databaseName in databaseNameList:
            for encodingType in encodingTypes:
                if encodingType == "rcsb":
                    for dataTyping in dataTypingList:
                        logger.info(
                            "Creating schema definition for content type %s data typing %s",
                            databaseName, dataTyping)
                        schP.makeSchemaDef(databaseName,
                                           dataTyping=dataTyping,
                                           saveSchema=True)
                else:
                    if databaseName in scnD:
                        for dD in scnD[databaseName]:
                            collectionName = dD["NAME"]
                            for validationLevel in validationLevels:
                                logger.info(
                                    "Creating %r schema for content type %s collection %s",
                                    encodingType, databaseName, collectionName)
                                schP.makeSchema(databaseName,
                                                collectionName,
                                                encodingType=encodingType,
                                                level=validationLevel,
                                                saveSchema=True)
Exemplo n.º 15
0
    def __fullSchemaDataPrep(self,
                             contentType,
                             filterType,
                             styleType,
                             mockLength,
                             rejectLength=0,
                             dataSelectors=None,
                             mergeContentTypes=None,
                             excludeExtras=None):
        """Internal method for preparing file-based data requiring dynamic methods, slicing, or key injection.

        Args:
            contentType (str): Content type name
            filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...)
            styleType (str): organization of output document (e.g. rowise-by-name)
            mockLength (int): Expected length of the test data for the input content type
            rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0.
            dataSelectors (list of str, optional): data selection criteria. Defaults to None.
            mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt'])
        """
        try:
            excludeExtras = excludeExtras if excludeExtras else []
            _ = mockLength
            _ = rejectLength
            dD = self.__schP.makeSchemaDef(contentType,
                                           dataTyping="ANY",
                                           saveSchema=True)
            _ = SchemaDefAccess(dD)
            inputPathList = self.__rpP.getLocatorObjList(
                contentType=contentType, mergeContentTypes=mergeContentTypes)
            sd, _, collectionNameList, _ = self.__schP.getSchemaInfo(
                databaseName=contentType, dataTyping="ANY")
            #
            dP = DictionaryApiProviderWrapper(self.__cachePath,
                                              cfgOb=self.__cfgOb,
                                              configName=self.__configName,
                                              useCache=True)
            dictApi = dP.getApiByName(contentType)
            #
            rP = DictMethodResourceProvider(
                self.__cfgOb,
                configName=self.__configName,
                cachePath=self.__cachePath,
                restoreUseStash=False,
                restoreUseGit=True,
                providerTypeExclude=self.__excludeType,
            )
            dmh = DictMethodRunner(dictApi,
                                   modulePathMap=self.__modulePathMap,
                                   resourceProvider=rP)
            #
            dtf = DataTransformFactory(schemaDefAccessObj=sd,
                                       filterType=filterType)
            sdp = SchemaDefDataPrep(schemaDefAccessObj=sd,
                                    dtObj=dtf,
                                    workPath=self.__cachePath,
                                    verbose=self.__verbose)
            containerList = self.__rpP.getContainerList(inputPathList)
            for container in containerList:
                cName = container.getName()
                logger.debug("Processing container %s", cName)
                dmh.apply(container)
            #
            for collectionName in collectionNameList:
                tableIdExcludeList = sd.getCollectionExcluded(collectionName)
                tableIdIncludeList = sd.getCollectionSelected(collectionName)
                sliceFilter = sd.getCollectionSliceFilter(collectionName)
                sdp.setSchemaIdExcludeList(tableIdExcludeList)
                sdp.setSchemaIdIncludeList(tableIdIncludeList)
                #
                docList, _, _ = sdp.processDocuments(
                    containerList,
                    styleType=styleType,
                    sliceFilter=sliceFilter,
                    filterType=filterType,
                    dataSelectors=dataSelectors,
                    collectionName=collectionName)

                docList = sdp.addDocumentPrivateAttributes(
                    docList, collectionName)
                docList = sdp.addDocumentSubCategoryAggregates(
                    docList, collectionName)

                # Special exclusions for the test harness. (removes timestamped data items to allow diffs.)
                self.__filterDocuments(docList, excludeExtras)
                mergeS = "-".join(
                    mergeContentTypes) if mergeContentTypes else ""
                fName = "full-prep-%s-%s-%s-%s.json" % (
                    contentType, collectionName, mergeS, styleType)
                if self.__exportFlag:
                    self.__logDocumentOrder(docList)
                    fPath = os.path.join(self.__outputPath, fName)
                    self.__mU.doExport(fPath, docList, fmt="json", indent=3)
                    logger.debug("Exported %r", fPath)
                #
                if self.__diffFlag:
                    fPath = os.path.join(self.__savedOutputPath, fName)
                    refDocList = self.__mU.doImport(fPath, fmt="json")
                    self.assertEqual(len(refDocList), len(docList))
                    logger.debug("For %s %s len refDocList %d", contentType,
                                 collectionName, len(refDocList))
                    logger.debug("For %s %s len docList %d", contentType,
                                 collectionName, len(docList))
                    jD = diff(refDocList,
                              docList,
                              syntax="explicit",
                              marshal=True)
                    if jD:
                        _, fn = os.path.split(fPath)
                        bn, _ = os.path.splitext(fn)
                        fPath = os.path.join(self.__outputPath,
                                             bn + "-diff.json")
                        logger.debug("jsondiff for %s %s = \n%s", contentType,
                                     collectionName,
                                     pprint.pformat(jD, indent=3, width=100))
                        self.__mU.doExport(fPath, jD, fmt="json", indent=3)
                    self.assertEqual(len(jD), 0)

        except Exception as e:
            logger.exception("Failing with %s", str(e))
            self.fail()