def testCompareCollectionSchema(self): try: difPathList = [] for databaseName in self.__databaseNameList: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in self.__encodingTypes: if encodingType.lower() != "json": continue for level in self.__validationLevels: pth = self.__schP.jsonSchemaCompare( databaseName, collectionName, encodingType, level) if pth: difPathList.append(pth) if difPathList: logger.info("JSON schema difference path list %r", [os.path.split(pth)[1] for pth in difPathList]) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testBuildCollectionSchema(self): schemaDifPathList = [] for databaseName in self.__databaseNameList: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in self.__encodingTypes: if encodingType.lower() == "rcsb": continue for level in self.__validationLevels: self.__schP.makeSchema(databaseName, collectionName, encodingType=encodingType, level=level, saveSchema=self.__saveSchema) if self.__compareSchema and encodingType.lower( ) == "json": pth = self.__schP.jsonSchemaCompare( databaseName, collectionName, encodingType, level) if pth: schemaDifPathList.append(pth) if schemaDifPathList: logger.info("Path dif list %r", schemaDifPathList)
def getSchemaInfo(self, databaseName, dataTyping="ANY"): """Convenience method to return essential schema details for the input repository content type. Args: databaseName (str): schema name (e.g. pdbx, bird, chem_comp, ...) dataTyping (str, optional): Application name for the target schema (e.g. ANY, SQL, ...) Returns: tuple: SchemaDefAccess(object), target database name, target collection name list, primary index attribute list """ sd = None dbName = None collectionNameList = [] docIndexD = {} try: mU = MarshalUtil(workPath=self.__workPath) schemaLocator = self.__getSchemaDefLocator(databaseName, dataTyping=dataTyping) if self.__rebuildFlag: filePath = os.path.join( self.__schemaCachePath, self.__fileU.getFileName(schemaLocator)) self.makeSchemaDef(databaseName, dataTyping=dataTyping, saveSchema=True) else: filePath = self.__reload(schemaLocator, self.__schemaCachePath, useCache=self.__useCache) if not filePath: logger.error("Unable to recover schema %s (%s)", databaseName, dataTyping) logger.debug("ContentType %r dataTyping %r schemaLocator %r", databaseName, dataTyping, schemaLocator) schemaDef = mU.doImport(filePath, fmt="json") if schemaDef: logger.debug( "Using cached schema definition for %s application %s", databaseName, dataTyping) sd = SchemaDefAccess(schemaDef) if sd: dbName = sd.getDatabaseName() collectionInfoList = sd.getCollectionInfo() logger.debug("Schema %s database name %s collections %r", databaseName, dbName, collectionInfoList) for cd in collectionInfoList: collectionName = cd["NAME"] collectionNameList.append(collectionName) docIndexD[collectionName] = sd.getDocumentIndices( collectionName) except Exception as e: logger.exception("Retreiving schema %s for %s failing with %s", databaseName, dataTyping, str(e)) return sd, dbName, collectionNameList, docIndexD
def testLoadPdbxFiles(self): try: cD = self.__schP.makeSchemaDef("pdbx", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) inputPathList = self.__rpP.getLocatorObjList(contentType="pdbx") logger.debug("Input path list %r", inputPathList) with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: sdl = SchemaDefLoader( self.__cfgOb, schemaDefObj=sd, dbCon=client, cachePath=self.__cachePath, workPath=self.__workPath, cleanUp=False, warnings="error", verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, providerTypeExclude=self.__excludeType, ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-insert", deleteOpt="all") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testLoadBirdReference(self): try: cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) inputPathList = self.__rpP.getLocatorObjList(contentType="bird") inputPathList.extend( self.__rpP.getLocatorObjList(contentType="bird_family")) # with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: sdl = SchemaDefLoader( self.__cfgOb, schemaDefObj=sd, dbCon=client, cachePath=self.__cachePath, workPath=self.__workPath, cleanUp=False, warnings="error", verbose=self.__verbose, restoreUseStash=False, restoreUseGit=True, providerTypeExclude=self.__excludeType, ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testLoadChemCompReference(self): try: cD = self.__schP.makeSchemaDef("chem_comp", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) inputPathList = self.__rpP.getLocatorObjList( contentType="chem_comp") with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: sdl = SchemaDefLoader(self.__cfgOb, schemaDefObj=sd, dbCon=client, cachePath=self.__cachePath, workPath=self.__workPath, cleanUp=False, warnings="error", verbose=self.__verbose) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSchemaCreate(self): """Create table schema for BIRD, chemical component, and PDBx data.""" cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) # cD = self.__schP.makeSchemaDef("chem_comp", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) # # cD = self.__schP.makeSchemaDef("pdbx", dataTyping="SQL", saveSchema=True) # sd = SchemaDefAccess(cD) self.__schemaCreate(sd)
def testBuildColSchemaWithRefs(self): for databaseName in ["ihm_dev_full"]: dD = self.__schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for schemaType in self.__encodingTypes: if schemaType.lower() == "rcsb": continue for level in self.__validationLevels: self.__schP.makeSchema( databaseName, collectionName, encodingType=schemaType, level=level, saveSchema=True, extraOpts="addParentRefs|addPrimaryKey")
def __simpleSchemaDataPrep(self, contentType, filterType, styleType, mockLength, rejectLength=0, dataSelectors=None, mergeContentTypes=None): """Internal method for preparing file-based data NOT requiring dynamic methods, slicing, or key injection. Args: contentType (str): Content type name filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...) styleType (str): organization of output document (e.g. rowise-by-name) mockLength (int): Expected length of the test data for the input content type rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0. dataSelectors (list of str, optional): data selection criteria. Defaults to None. mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt']) """ try: dataSelectors = dataSelectors if dataSelectors else ["PUBLIC_RELEASE"] dD = self.__schP.makeSchemaDef(contentType, dataTyping="ANY", saveSchema=True) _ = SchemaDefAccess(dD) inputPathList = self.__rpP.getLocatorObjList(contentType=contentType, mergeContentTypes=mergeContentTypes) sd, _, _, _ = self.__schP.getSchemaInfo(databaseName=contentType, dataTyping="ANY") dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=filterType) sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose) # logger.debug("For %s mock length %d length of path list %d\n", contentType, mockLength, len(inputPathList)) self.assertEqual(len(inputPathList), mockLength) tableDataDictList, containerNameList, rejectList = sdp.fetchDocuments(inputPathList, styleType=styleType, filterType=filterType, dataSelectors=dataSelectors) logger.debug("For %s mock length %d reject length %d length of tddl list %d\n", contentType, mockLength, rejectLength, len(tableDataDictList)) self.assertEqual(len(tableDataDictList), mockLength - rejectLength) self.assertEqual(len(containerNameList), mockLength - rejectLength) if rejectList: logger.debug("For %s rejecting components %r", contentType, rejectList) # self.assertEqual(len(rejectList), rejectLength) fName = "simple-prep-%s-%s.json" % (contentType, styleType) if self.__exportFlag: fPath = os.path.join(self.__outputPath, fName) self.__mU.doExport(fPath, tableDataDictList, fmt="json", indent=3) if self.__diffFlag: fPath = os.path.join(self.__savedOutputPath, fName) refDocList = self.__mU.doImport(fPath, fmt="json") self.assertEqual(len(refDocList), len(tableDataDictList)) # jD = diff(refDocList, tableDataDictList, syntax="explicit", marshal=True) if jD: _, fn = os.path.split(fPath) bn, _ = os.path.splitext(fn) fPath = os.path.join(self.__outputPath, bn + "-diff.json") logger.debug("jsondiff for %s %s = \n%s", contentType, styleType, pprint.pformat(jD, indent=3, width=100)) self.__mU.doExport(fPath, jD, fmt="json", indent=3) self.assertEqual(len(jD), 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testSQLMethods(self): schemaNames = ["pdbx_core"] dataTyping = "SQL" for schemaName in schemaNames: dD = self.__sdu.makeSchemaDef(schemaName, dataTyping=dataTyping, saveSchema=False) sD = SchemaDefAccess(dD) self.__testSchemaCreate(sD) self.__testImportExport(sD) self.__testSelectionAndConditions(sD)
def testCompareSchemaCategories(self): """Compare common categories across schema definitions.""" try: sdCc = SchemaDefAccess( self.__schP.makeSchemaDef("chem_comp_core", dataTyping="ANY", saveSchema=False)) sdBcc = SchemaDefAccess( self.__schP.makeSchemaDef("bird_chem_comp_core", dataTyping="ANY", saveSchema=False)) # logger.info("") for schemaId in ["CHEM_COMP", "PDBX_CHEM_COMP_AUDIT"]: atCcL = sdCc.getAttributeIdList(schemaId) atBCcL = sdBcc.getAttributeIdList(schemaId) logger.debug("%s attributes (%d) %r", schemaId, len(atCcL), atCcL) logger.debug("%s attributes (%d) %r", schemaId, len(atBCcL), atBCcL) sDif = set(atCcL) - set(atBCcL) if sDif: logger.info("For %s attribute differences %r", schemaId, sDif) self.assertEqual(len(sDif), 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def testReLoadBirdReference(self): try: cD = self.__schP.makeSchemaDef("bird", dataTyping="SQL", saveSchema=True) sd = SchemaDefAccess(cD) self.__schemaCreate(sd) inputPathList = self.__rpP.getLocatorObjList(contentType="bird") inputPathList.extend( self.__rpP.getLocatorObjList(contentType="bird_family")) # with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client: sdl = SchemaDefLoader(self.__cfgOb, schemaDefObj=sd, dbCon=client, cachePath=self.__cachePath, workPath=self.__workPath, cleanUp=False, warnings="error", verbose=self.__verbose) sdl.load(inputPathList=inputPathList, loadType="batch-file") # logger.debug( "INFO BATCH FILE RELOAD TEST --------------------------------------------\n" ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file", deleteOpt="all") self.assertTrue(ok) # logger.debug( "\n\n\n+INFO BATCH INSERT RELOAD TEST --------------------------------------------\n" ) ok = sdl.load(inputPathList=inputPathList, loadType="batch-file", deleteOpt="selected") self.assertTrue(ok) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()
def __testAccessors(self, schemaDef): """Verify data and accessor mapping -""" sd = SchemaDefAccess(schemaDef) logger.debug("Schema name %s", sd.getName()) logger.debug("Schema name %s", sd.getAppName()) logger.debug("Database name %s", sd.getDatabaseName()) logger.debug("Versioned database name %s", sd.getVersionedDatabaseName()) logger.debug("Collection info %r", sd.getCollectionInfo()) for dS in sd.getDataSelectorNames(): logger.debug("Selector %s %r", dS, sd.getDataSelectors(dS)) collectionInfoL = sd.getCollectionInfo() for dD in collectionInfoL: collectionName = dD["NAME"] logger.debug("Collection excluded %r", sd.getCollectionExcluded(collectionName)) logger.debug("Collection included %r", sd.getCollectionSelected(collectionName)) logger.debug("Collection document key attribute names %r", sd.getDocumentKeyAttributeNames(collectionName)) schemaIdList = sd.getSchemaIdList() for schemaId in schemaIdList: # aIdL = sd.getAttributeIdList(schemaId) tObj = sd.getSchemaObject(schemaId) attributeIdList = tObj.getAttributeIdList() self.assertEqual(len(aIdL), len(attributeIdList)) attributeNameList = tObj.getAttributeNameList() logger.debug("Ordered attribute Id list %s", str(attributeIdList)) logger.debug("Ordered attribute name list %s", str(attributeNameList)) # mAL = tObj.getMapAttributeNameList() logger.debug("Ordered mapped attribute name list %s", str(mAL)) mAL = tObj.getMapAttributeIdList() logger.debug("Ordered mapped attribute id list %s", str(mAL)) cL = tObj.getMapInstanceCategoryList() logger.debug("Mapped category list %s", str(cL)) for cV in cL: aL = tObj.getMapInstanceAttributeList(cV) logger.debug("Mapped attribute list in %s : %s", cV, str(aL)) return True
def main(): parser = argparse.ArgumentParser() # defaultConfigName = "site_info_configuration" # parser.add_argument( "--update_chem_comp_ref", default=False, action="store_true", help="Update schema for Chemical Component reference definitions") parser.add_argument( "--update_chem_comp_core_ref", default=False, action="store_true", help="Update core schema for Chemical Component reference definitions") parser.add_argument( "--update_bird_chem_comp_ref", default=False, action="store_true", help="Update schema for Bird Chemical Component reference definitions") parser.add_argument( "--update_bird_chem_comp_core_ref", default=False, action="store_true", help= "Update core schema for Bird Chemical Component reference definitions") parser.add_argument("--update_bird_ref", default=False, action="store_true", help="Update schema for Bird reference definitions") parser.add_argument( "--update_bird_family_ref", default=False, action="store_true", help="Update schema for Bird Family reference definitions") parser.add_argument("--update_pdbx", default=False, action="store_true", help="Update schema for PDBx entry data") parser.add_argument("--update_pdbx_core", default=False, action="store_true", help="Update schema for PDBx core entry/entity data") parser.add_argument( "--update_pdbx_comp_model_core", default=False, action="store_true", help="Update schema for PDBx computational model core entry/entity data" ) # parser.add_argument("--update_repository_holdings", default=False, action="store_true", help="Update schema for repository holdings") parser.add_argument("--update_entity_sequence_clusters", default=False, action="store_true", help="Update schema for entity sequence clusters") parser.add_argument("--update_data_exchange", default=False, action="store_true", help="Update schema for data exchange status") parser.add_argument("--update_ihm_dev", default=False, action="store_true", help="Update schema for I/HM dev entry data") parser.add_argument("--update_drugbank_core", default=False, action="store_true", help="Update DrugBank schema") # parser.add_argument( "--update_config_all", default=False, action="store_true", help="Update using configuration settings (e.g. DATABASE_NAMES_ALL)") parser.add_argument( "--update_config_deployed", default=False, action="store_true", help= "Update using configuration settings (e.g. DATABASE_NAMES_DEPLOYED)") parser.add_argument( "--update_config_test", default=False, action="store_true", help="Update using configuration settings (e.g. DATABASE_NAMES_TEST)") # parser.add_argument("--config_path", default=None, help="Path to configuration options file") parser.add_argument("--config_name", default=defaultConfigName, help="Configuration section name") # parser.add_argument("--cache_path", default=None, help="Schema cache directory path") parser.add_argument( "--encoding_types", default=None, help="Schema encoding (rcsb|json|bson) (comma separated)") parser.add_argument( "--validation_levels", default=None, help="Schema validation level (full|min) (comma separated)") parser.add_argument("--compare_only", default=False, action="store_true", help="Perform comparison with cached schema") # parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging") parser.add_argument( "--mock", default=False, action="store_true", help="Use MOCK repository configuration for dependencies and testing") # parser.add_argument("--working_path", default=None, help="Working/alternative path for temporary and schema files") args = parser.parse_args() # debugFlag = args.debug if debugFlag: logger.setLevel(logging.DEBUG) # ----------------------- - ----------------------- - ----------------------- - ----------------------- - ----------------------- - # Configuration Details configPath = args.config_path configName = args.config_name cachePath = args.cache_path compareOnly = args.compare_only # encodingTypes = args.encoding_types.split( ",") if args.encoding_types else [] validationLevels = args.validation_levels.split( ",") if args.validation_levels else [] dataTypingList = ["ANY", "SQL"] if not configPath: configPath = os.getenv("DBLOAD_CONFIG_PATH", None) try: if os.access(configPath, os.R_OK): os.environ["DBLOAD_CONFIG_PATH"] = configPath logger.info("Using configuation path %s (%s)", configPath, configName) else: logger.error("Missing or access issue with config file %r", configPath) exit(1) mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=defaultConfigName, mockTopPath=mockTopPath) if configName != defaultConfigName: cfgOb.replaceSectionName(defaultConfigName, configName) except Exception as e: logger.error("Missing or access issue with config file %r with %s", configPath, str(e)) exit(1) # databaseNameList = [] if args.update_chem_comp_ref: databaseNameList.append("chem_comp") if args.update_bird_chem_comp_ref: databaseNameList.append("bird_chem_comp") if args.update_chem_comp_core_ref: databaseNameList.append("chem_comp_core") if args.update_bird_chem_comp_core_ref: databaseNameList.append("bird_chem_comp_core") if args.update_bird_ref: databaseNameList.append("bird") if args.update_bird_family_ref: databaseNameList.append("bird_family") if args.update_pdbx: databaseNameList.append("pdbx") if args.update_pdbx_core: databaseNameList.append("pdbx_core") if args.update_pdbx_comp_model_core: databaseNameList.append("pdbx_comp_model_core") if args.update_repository_holdings: databaseNameList.append("repository_holdings") if args.update_entity_sequence_clusters: databaseNameList.append("sequence_clusters") if args.update_data_exchange: databaseNameList.append("data_exchange") if args.update_ihm_dev: databaseNameList.append("ihm_dev") if args.update_drugbank_core: databaseNameList.append("drugbank_core") if args.update_config_deployed: databaseNameList = cfgOb.getList( "DATABASE_NAMES_DEPLOYED", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_DEPLOYED", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_DEPLOYED", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_DEPLOYED", sectionName="database_catalog_configuration") if args.update_config_all: databaseNameList = cfgOb.getList( "DATABASE_NAMES_ALL", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_ALL", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_ALL", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_ALL", sectionName="database_catalog_configuration") if args.update_config_test: databaseNameList = cfgOb.getList( "DATABASE_NAMES_TEST", sectionName="database_catalog_configuration") dataTypingList = cfgOb.getList( "DATATYPING_TEST", sectionName="database_catalog_configuration") validationLevels = cfgOb.getList( "VALIDATION_LEVELS_TEST", sectionName="database_catalog_configuration") encodingTypes = cfgOb.getList( "ENCODING_TYPES_TEST", sectionName="database_catalog_configuration") # scnD = cfgOb.get("document_collection_names", sectionName="document_helper_configuration") # databaseNameList = list(set(databaseNameList)) logger.debug("Collections %s", list(scnD.items())) logger.debug("databaseNameList %s", databaseNameList) if compareOnly: schP = SchemaProvider(cfgOb, cachePath, useCache=True) difPathList = [] for databaseName in databaseNameList: for dataTyping in dataTypingList: logger.debug("Building schema %s with types %s", databaseName, dataTyping) pth = schP.schemaDefCompare(databaseName, dataTyping) if pth: difPathList.append(pth) if difPathList: logger.info("Schema definition difference path list %r", difPathList) difPathList = [] for databaseName in databaseNameList: dD = schP.makeSchemaDef(databaseName, dataTyping="ANY", saveSchema=False) sD = SchemaDefAccess(dD) for cd in sD.getCollectionInfo(): collectionName = cd["NAME"] for encodingType in encodingTypes: if encodingType.lower() != "json": continue for level in validationLevels: pth = schP.jsonSchemaCompare(databaseName, collectionName, encodingType, level) if pth: difPathList.append(pth) if difPathList: logger.info("JSON schema difference path list %r", difPathList) else: schP = SchemaProvider(cfgOb, cachePath, useCache=False) for databaseName in databaseNameList: for encodingType in encodingTypes: if encodingType == "rcsb": for dataTyping in dataTypingList: logger.info( "Creating schema definition for content type %s data typing %s", databaseName, dataTyping) schP.makeSchemaDef(databaseName, dataTyping=dataTyping, saveSchema=True) else: if databaseName in scnD: for dD in scnD[databaseName]: collectionName = dD["NAME"] for validationLevel in validationLevels: logger.info( "Creating %r schema for content type %s collection %s", encodingType, databaseName, collectionName) schP.makeSchema(databaseName, collectionName, encodingType=encodingType, level=validationLevel, saveSchema=True)
def __fullSchemaDataPrep(self, contentType, filterType, styleType, mockLength, rejectLength=0, dataSelectors=None, mergeContentTypes=None, excludeExtras=None): """Internal method for preparing file-based data requiring dynamic methods, slicing, or key injection. Args: contentType (str): Content type name filterType (str): List of data processing options (separated by '|') (e.g. "drop-empty-attributes|drop-empty-tables|skip-max-width|...) styleType (str): organization of output document (e.g. rowise-by-name) mockLength (int): Expected length of the test data for the input content type rejectLength (int, optional): number of input data sets rejected by the dataselection criteria. Defaults to 0. dataSelectors (list of str, optional): data selection criteria. Defaults to None. mergeContentTypes (list of str, optional): list content types to merge with the input data set. Defaults to None. (e.g. ['vrpt']) """ try: excludeExtras = excludeExtras if excludeExtras else [] _ = mockLength _ = rejectLength dD = self.__schP.makeSchemaDef(contentType, dataTyping="ANY", saveSchema=True) _ = SchemaDefAccess(dD) inputPathList = self.__rpP.getLocatorObjList( contentType=contentType, mergeContentTypes=mergeContentTypes) sd, _, collectionNameList, _ = self.__schP.getSchemaInfo( databaseName=contentType, dataTyping="ANY") # dP = DictionaryApiProviderWrapper(self.__cachePath, cfgOb=self.__cfgOb, configName=self.__configName, useCache=True) dictApi = dP.getApiByName(contentType) # rP = DictMethodResourceProvider( self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, restoreUseStash=False, restoreUseGit=True, providerTypeExclude=self.__excludeType, ) dmh = DictMethodRunner(dictApi, modulePathMap=self.__modulePathMap, resourceProvider=rP) # dtf = DataTransformFactory(schemaDefAccessObj=sd, filterType=filterType) sdp = SchemaDefDataPrep(schemaDefAccessObj=sd, dtObj=dtf, workPath=self.__cachePath, verbose=self.__verbose) containerList = self.__rpP.getContainerList(inputPathList) for container in containerList: cName = container.getName() logger.debug("Processing container %s", cName) dmh.apply(container) # for collectionName in collectionNameList: tableIdExcludeList = sd.getCollectionExcluded(collectionName) tableIdIncludeList = sd.getCollectionSelected(collectionName) sliceFilter = sd.getCollectionSliceFilter(collectionName) sdp.setSchemaIdExcludeList(tableIdExcludeList) sdp.setSchemaIdIncludeList(tableIdIncludeList) # docList, _, _ = sdp.processDocuments( containerList, styleType=styleType, sliceFilter=sliceFilter, filterType=filterType, dataSelectors=dataSelectors, collectionName=collectionName) docList = sdp.addDocumentPrivateAttributes( docList, collectionName) docList = sdp.addDocumentSubCategoryAggregates( docList, collectionName) # Special exclusions for the test harness. (removes timestamped data items to allow diffs.) self.__filterDocuments(docList, excludeExtras) mergeS = "-".join( mergeContentTypes) if mergeContentTypes else "" fName = "full-prep-%s-%s-%s-%s.json" % ( contentType, collectionName, mergeS, styleType) if self.__exportFlag: self.__logDocumentOrder(docList) fPath = os.path.join(self.__outputPath, fName) self.__mU.doExport(fPath, docList, fmt="json", indent=3) logger.debug("Exported %r", fPath) # if self.__diffFlag: fPath = os.path.join(self.__savedOutputPath, fName) refDocList = self.__mU.doImport(fPath, fmt="json") self.assertEqual(len(refDocList), len(docList)) logger.debug("For %s %s len refDocList %d", contentType, collectionName, len(refDocList)) logger.debug("For %s %s len docList %d", contentType, collectionName, len(docList)) jD = diff(refDocList, docList, syntax="explicit", marshal=True) if jD: _, fn = os.path.split(fPath) bn, _ = os.path.splitext(fn) fPath = os.path.join(self.__outputPath, bn + "-diff.json") logger.debug("jsondiff for %s %s = \n%s", contentType, collectionName, pprint.pformat(jD, indent=3, width=100)) self.__mU.doExport(fPath, jD, fmt="json", indent=3) self.assertEqual(len(jD), 0) except Exception as e: logger.exception("Failing with %s", str(e)) self.fail()