def bootstrap(self, refSchemaTuple = tuple()): (id2ElemId , keyRefs , refSchemaCache) = refSchemaTuple keyList = keyRefs[self.triggerAttribute] errors = [] # Saving the unique locations # based on information from FeatureLoc elems for loc in keyList: fk_defs = loc.context[self.triggerAttribute] fk_defs_gid = str(id(loc.context)) #fk_defs_gid = loc.path for fk_loc_i, p_FK_decl in enumerate(fk_defs): fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i) ref_schema_id = p_FK_decl['schema_id'] if uritools.isabsuri(self.schemaURI): abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id) else: abs_ref_schema_id = ref_schema_id if abs_ref_schema_id not in refSchemaCache: errors.append({ 'reason': 'fk_no_schema', 'description': "No schema with {0} id, required by {1} ({2})".format(abs_ref_schema_id,self.jsonSchemaSource,self.schemaURI) }) fk_members = p_FK_decl.get('members',[]) fkLoc = FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path=loc.path+'/'+str(fk_loc_i),values=list()) fk_id = abs_ref_schema_id fkDefH = self.FKWorld.setdefault(fk_id,{}) # This control is here for same primary key referenced from multiple cases fkDefH[fk_loc_id] = FKDef(fkLoc=fkLoc,members=fk_members) return errors
def test_classification(self): cases = [ ('', False, False, False, False, True, True), ('#', False, False, False, False, True, True), ('#f', False, False, False, False, True, True), ('?', False, False, False, False, True, False), ('?q', False, False, False, False, True, False), ('p', False, False, False, False, True, False), ('/p', False, False, False, True, False, False), ('/p?', False, False, False, True, False, False), ('/p?q', False, False, False, True, False, False), ('/p#', False, False, False, True, False, False), ('/p#f', False, False, False, True, False, False), ('/p?q#f', False, False, False, True, False, False), ('//', False, False, True, False, False, False), ('//n?', False, False, True, False, False, False), ('//n?q', False, False, True, False, False, False), ('//n#', False, False, True, False, False, False), ('//n#f', False, False, True, False, False, False), ('//n?q#f', False, False, True, False, False, False), ('s:', True, True, False, False, False, False), ('s:p', True, True, False, False, False, False), ('s:p?', True, True, False, False, False, False), ('s:p?q', True, True, False, False, False, False), ('s:p#', True, False, False, False, False, False), ('s:p#f', True, False, False, False, False, False), ('s://', True, True, False, False, False, False), ('s://h', True, True, False, False, False, False), ('s://h/', True, True, False, False, False, False), ('s://h/p', True, True, False, False, False, False), ('s://h/p?', True, True, False, False, False, False), ('s://h/p?q', True, True, False, False, False, False), ('s://h/p#', True, False, False, False, False, False), ('s://h/p#f', True, False, False, False, False, False), ] for s, uri, absuri, netpath, abspath, relpath, samedoc in cases: for ref in [s, s.encode('ascii')]: parts = uritools.urisplit(ref) self.assertEqual(parts.isuri(), uri) self.assertEqual(parts.isabsuri(), absuri) self.assertEqual(parts.isnetpath(), netpath) self.assertEqual(parts.isabspath(), abspath) self.assertEqual(parts.isrelpath(), relpath) self.assertEqual(parts.issamedoc(), samedoc) self.assertEqual(uritools.isuri(ref), uri) self.assertEqual(uritools.isabsuri(ref), absuri) self.assertEqual(uritools.isnetpath(ref), netpath) self.assertEqual(uritools.isabspath(ref), abspath) self.assertEqual(uritools.isrelpath(ref), relpath) self.assertEqual(uritools.issamedoc(ref), samedoc)
def _traverse_dict(schemaURI, j, jp="", fragment=None): # Pre-processing newPartialSchemaURI = j.get('$id') if newPartialSchemaURI: # Computing the absolute schema URI if uritools.isabsuri(schemaURI): newSchemaURI , uriFragment = uritools.uridefrag(uritools.urijoin(schemaURI,newPartialSchemaURI)) else: newSchemaURI , uriFragment = uritools.uridefrag(newPartialSchemaURI) else: newSchemaURI = schemaURI # Are we jumping to a different place? if newSchemaURI == schemaURI: theId = id(j) theIdStr = str(theId) # Does the dictionary contain a '$ref'? isRef = REF_FEATURE in j for k,v in j.items(): # Following JSON reference standards, we have to # ignore other keys when there is a $ref one # https://tools.ietf.org/html/draft-pbryan-zyp-json-ref-03#section-3 if isRef and (k != REF_FEATURE): continue elemId = theIdStr + ':' + k elemPath = jp + '/' + k jp2val[elemPath] = elemId # Is the key among the "special ones"? if k in keySet: # Saving the correspondence from Python address # to unique id of the feature id2ElemId.setdefault(theId,{})[k] = [ elemId ] keyRefs.setdefault(k,[]).append(FeatureLoc(schemaURI=schemaURI,fragment=fragment,path=elemPath,context=j,id=elemId)) if isinstance(v,dict): # Fragment must not be propagated to children _traverse_dict(schemaURI,v,jp=elemPath) elif isinstance(v,list): _traverse_list(schemaURI,v,jp=elemPath) else: traverseJSONSchema(j,schemaURI=newSchemaURI,fragment=uriFragment,keys=keys,refSchemaListSet=refSchemaListSet)
def process_config_value(key, default_value, custom_value): if isinstance(default_value, dict): return _build_config(default_value, custom_value) if key == "root_build_dir": value = custom_value or default_value return value if uritools.isabsuri(value) else os.path.abspath( value) if key in ("cbp_dir", "cb_schema_templates_dir", "types_path"): if custom_value: return os.path.abspath(custom_value) return os.path.normpath( os.path.join(os.path.dirname(__file__), default_value)) return custom_value or default_value
def validate(self, validator, fp_def, value, schema): if fp_def and isinstance(fp_def, str): fp_loc_id = id(schema) # Getting the absolute schema id and the route if uritools.isabsuri(self.schemaURI): abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( uritools.urijoin(self.schemaURI, fp_def)) else: abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( fp_def) fpDef = self.FPWorld.setdefault(abs_ref_schema_id, {}).get(fp_loc_id) # And getting the foreign property definition if fpDef is None: fpDef = FPDef(schemaURI=self.schemaURI, refSchemaURI=abs_ref_schema_id, path='(unknown {})'.format(fp_loc_id), refPath=rel_json_pointer, values=list()) self.FPWorld[abs_ref_schema_id][fp_loc_id] = fpDef obtainedValues = [(value, )] isAtomicValue = len(obtainedValues) == 1 and len( obtainedValues[0]) == 1 and isinstance( obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES) if isAtomicValue: theValues = [obtainedValues[0][0]] else: theValues = UniqueKey.GenKeyStrings(obtainedValues) fpVals = fpDef.values # Second pass will do the validation for theValue in theValues: fpVals.append(FPVal(where=self.currentJSONFile, value=theValue))
def bootstrap(self, refSchemaTuple=tuple()): (id2ElemId, keyRefs, refSchemaCache) = refSchemaTuple keyList = keyRefs[self.triggerAttribute] errors = [] # Saving the unique locations # based on information from FeatureLoc elems for loc in keyList: fp_def = loc.context[self.triggerAttribute] fp_loc_id = id(loc.context) # Getting the absolute schema id and the route if uritools.isabsuri(self.schemaURI): abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( uritools.urijoin(self.schemaURI, fp_def)) else: abs_ref_schema_id, rel_json_pointer = uritools.uridefrag( fp_def) if abs_ref_schema_id not in refSchemaCache: errors.append({ 'reason': 'fp_no_schema', 'description': "No schema with {0} id, required by {1} ({2})".format( abs_ref_schema_id, self.jsonSchemaSource, self.schemaURI) }) fpDefH = self.FPWorld.setdefault(abs_ref_schema_id, {}) # This control is here for same primary key referenced from multiple cases fpDefH[fp_loc_id] = FPDef(schemaURI=self.schemaURI, refSchemaURI=abs_ref_schema_id, path=loc.path, refPath=rel_json_pointer, values=list()) return errors
def validate(self,validator,fk_defs,value,schema): if fk_defs and isinstance(fk_defs,(list,tuple)): fk_defs_gid = str(id(schema)) for fk_loc_i, p_FK_decl in enumerate(fk_defs): fk_loc_id = fk_defs_gid + '_' + str(fk_loc_i) ref_schema_id = p_FK_decl['schema_id'] if uritools.isabsuri(self.schemaURI): abs_ref_schema_id = uritools.urijoin(self.schemaURI,ref_schema_id) else: abs_ref_schema_id = ref_schema_id fk_members = p_FK_decl.get('members',[]) if isinstance(fk_members,list): obtainedValues = PrimaryKey.GetKeyValues(value,fk_members) else: obtainedValues = [(value,)] isAtomicValue = len(obtainedValues) == 1 and len(obtainedValues[0]) == 1 and isinstance(obtainedValues[0][0], ALLOWED_ATOMIC_VALUE_TYPES) if isAtomicValue: theValues = [ obtainedValues[0][0] ] else: theValues = PrimaryKey.GenKeyStrings(obtainedValues) # Group the values to be checked #fk_id = id(p_FK_decl) # id(schema) fk_id = abs_ref_schema_id # The common dictionary for this declaration where all the FK values are kept fkDef = self.FKWorld.setdefault(fk_id,{}).setdefault(fk_loc_id,FKDef(fkLoc=FKLoc(schemaURI=self.schemaURI,refSchemaURI=abs_ref_schema_id,path='(unknown {})'.format(fk_loc_id),values=list()),members=fk_members)) fkLoc = fkDef.fkLoc fkVals = fkLoc.values # Second pass will do the validation for theValue in theValues: fkVals.append(FKVal(where=self.currentJSONFile,value=theValue))
def loadJSONSchemas(self,*args,verbose=None): p_schemaHash = self.schemaHash # Schema validation stats numDirOK = 0 numDirFail = 0 numFileOK = 0 numFileIgnore = 0 numFileFail = 0 if verbose: print("PASS 0.a: JSON schema loading and cache generation") jsonSchemaPossibles = list(args) jsonSchemaNext = [] refSchemaCache = self.refSchemaCache = {} refSchemaFile = {} refSchemaSet = self.refSchemaSet = {} inlineCounter = 0 for jsonSchemaPossible in jsonSchemaPossibles: schemaObj = None if isinstance(jsonSchemaPossible,dict): schemaObj = jsonSchemaPossible errors = schemaObj.get('errors') if errors is None: if verbose: print("\tIGNORE: cached schema does not have the mandatory 'errors' attribute, so it cannot be processed") numFileIgnore += 1 continue jsonSchema = schemaObj.get('schema') if jsonSchema is None: if verbose: print("\tIGNORE: cached schema does not have the mandatory 'schema' attribute, so it cannot be processed") errors.append({ 'reason': 'unexpected', 'description': "The cached schema is missing" }) numFileIgnore += 1 continue schemaObj['schema_hash'] = self.GetNormalizedJSONHash(jsonSchema) if 'file' not in schemaObj: schemaObj['file'] = '(inline schema {})'.format(inlineCounter) inlineCounter += 1 jsonSchemaFile = schemaObj['file'] elif os.path.isdir(jsonSchemaPossible): jsonSchemaDir = jsonSchemaPossible # It's a possible JSON Schema directory, not a JSON Schema file try: for relJsonSchemaFile in os.listdir(jsonSchemaDir): if relJsonSchemaFile[0]=='.': continue newJsonSchemaFile = os.path.join(jsonSchemaDir,relJsonSchemaFile) if os.path.isdir(newJsonSchemaFile) or '.json' in relJsonSchemaFile: jsonSchemaPossibles.append(newJsonSchemaFile) numDirOK += 1 except IOError as ioe: if verbose: print("FATAL ERROR: Unable to open JSON schema directory {0}. Reason: {1}\n".format(jsonSchemaDir,ioe.strerror),file=sys.stderr) numDirFail += 1 continue else: jsonSchemaFile = jsonSchemaPossible if verbose: print("* Loading schema {0}".format(jsonSchemaFile)) try: with open(jsonSchemaFile,mode="r",encoding="utf-8") as sHandle: jsonSchema = json.load(sHandle) except IOError as ioe: if verbose: print("FATAL ERROR: Unable to open schema file {0}. Reason: {1}".format(jsonSchemaFile,ioe.strerror),file=sys.stderr) numFileFail += 1 continue else: errors = [] schemaObj = { 'schema': jsonSchema, 'schema_hash': self.GetNormalizedJSONHash(jsonSchema), 'file': jsonSchemaFile, 'errors': errors } schemaValId = jsonSchema.get(self.SCHEMA_KEY) if schemaValId is None: if verbose: print("\tIGNORE: {0} does not have the mandatory '{1}' attribute, so it cannot be validated".format(jsonSchemaFile,self.SCHEMA_KEY)) errors.append({ 'reason': 'no_schema', 'description': "JSON Schema attribute '{}' is missing".format(self.SCHEMA_KEY) }) numFileIgnore += 1 continue if PLAIN_VALIDATOR_MAPPER.get(schemaValId) is None: if verbose: print("\tIGNORE/FIXME: The JSON Schema id {0} is not being acknowledged by this validator".format(schemaValId)) errors.append({ 'reason': 'schema_unknown', 'description': "'$schema' id {0} is not being acknowledged by this validator".format(schemaValId) }) numFileIgnore += 1 continue # Getting the JSON Schema URI, needed by this idKey = '$id' if '$id' in jsonSchema else 'id' jsonSchemaURI = jsonSchema.get(idKey) if jsonSchemaURI is not None: if jsonSchemaURI in refSchemaFile: if verbose: print("\tERROR: schema in {0} and schema in {1} have the same id".format(jsonSchemaFile,refSchemaFile[jsonSchemaURI]),file=sys.stderr) errors.append({ 'reason': 'dup_id', 'description': "schema in {0} and schema in {1} have the same id".format(jsonSchemaFile,refSchemaFile[jsonSchemaURI]) }) numFileFail += 1 continue else: refSchemaCache[jsonSchemaURI] = jsonSchema refSchemaFile[jsonSchemaURI] = jsonSchemaFile else: numFileIgnore += 1 if verbose: print("\tIGNORE: Schema in {0} has no id attribute".format(jsonSchemaFile),file=sys.stderr) if self.doNotValidateNoId: errors.append({ 'reason': 'no_id', 'description': "JSON Schema attributes '$id' (Draft06 onward) and 'id' (Draft04) are missing in {}".format(jsonSchemaFile) }) numFileIgnore += 1 continue # We need to store these before creating the validators # in order to build the RefSchema cache jsonSchemaNext.append(schemaObj) if verbose: print("PASS 0.b: JSON schema validation") refSchemaListSet = {} for schemaObj in jsonSchemaNext: jsonSchema = schemaObj['schema'] jsonSchemaFile = schemaObj['file'] errors = schemaObj['errors'] # Errors related to these are captured in the previous loop schemaValId = jsonSchema.get(self.SCHEMA_KEY) plain_validator = PLAIN_VALIDATOR_MAPPER.get(schemaValId) # Getting the JSON Schema URI, needed by this idKey = '$id' if '$id' in jsonSchema else 'id' jsonSchemaURI = jsonSchema.get(idKey) validator , customFormatInstances = extendValidator(jsonSchemaURI, plain_validator, self.customTypes, self.customValidators, config=self.config, jsonSchemaSource=jsonSchemaFile) schemaObj['customFormatInstances'] = customFormatInstances schemaObj['validator'] = validator # Validate the extended JSON schema properly metaSchema = validator.META_SCHEMA if len(customFormatInstances) > 0: metaSchema = metaSchema.copy() metaSchema['properties'] = metaProps = metaSchema['properties'].copy() for customFormatInstance in customFormatInstances: for kF, vF in customFormatInstance.triggerJSONSchemaDef.items(): if kF in metaProps: # Multiple declarations vM = metaProps[kF].copy() if 'anyOf' not in vM: newDecl = { 'anyOf': [ vM ] } vM = metaProps[kF] = newDecl else: metaProps[kF] = vM vM['anyOf'].append(vF) else: metaProps[kF] = vF # We need to shadow the original schema localRefSchemaCache = refSchemaCache.copy() localRefSchemaCache[jsonSchemaURI] = metaSchema cachedSchemasResolver = JSV.RefResolver(base_uri=jsonSchemaURI, referrer=metaSchema, store=localRefSchemaCache) valErrors = [ valError for valError in validator(metaSchema,resolver = cachedSchemasResolver).iter_errors(jsonSchema) ] if len(valErrors) > 0: if verbose: print("\t- ERRORS:\n"+"\n".join(map(lambda se: "\t\tPath: {0} . Message: {1}".format("/"+"/".join(map(lambda e: str(e),se.path)),se.message) , valErrors))+"\n") for valError in valErrors: errors.append({ 'reason': 'schema_error', 'description': "Path: {0} . Message: {1}".format("/"+"/".join(map(lambda e: str(e),valError.path)),valError.message) }) numFileFail += 1 elif jsonSchemaURI is not None: # Getting the JSON Pointer object instance of the augmented schema # my $jsonSchemaP = $v->schema($jsonSchema)->schema; # This step is done, so we fetch a complete schema # $jsonSchema = $jsonSchemaP->data; if jsonSchemaURI in p_schemaHash: if verbose: print("\tERROR: validated, but schema in {0} and schema in {1} have the same id".format(jsonSchemaFile,p_schemaHash[jsonSchemaURI]['file']),file=sys.stderr) errors.append({ 'reason': 'dup_id', 'description': "JSON Schema validated, but schema in {0} and schema in {1} have the same id".format(jsonSchemaFile,p_schemaHash[jsonSchemaURI]['file']) }) numFileFail += 1 else: if verbose: print("\t- Validated {0}".format(jsonSchemaURI)) # Reverse mappings, needed later triggeringFeatures = [] for cFI in customFormatInstances: for triggerAttribute,_ in cFI.getValidators(): triggeringFeatures.append(triggerAttribute) traverseJSONSchema(jsonSchema,schemaURI=jsonSchemaURI,keys=triggeringFeatures,refSchemaListSet=refSchemaListSet) p_schemaHash[jsonSchemaURI] = schemaObj numFileOK += 1 else: # This is here to capture cases where we wanted to validate an # unidentified schema for its correctness if verbose: print("\tIGNORE: validated, but schema in {0} has no id attribute".format(jsonSchemaFile),file=sys.stderr) errors.append({ 'reason': 'no_id', 'description': "JSON Schema attributes '$id' (Draft06 onward) and 'id' (Draft04) are missing" }) numFileIgnore += 1 if verbose: print("\nSCHEMA VALIDATION STATS: loaded {0} schemas from {1} directories, ignored {2} schemas, failed {3} schemas and {4} directories".format(numFileOK,numDirOK,numFileIgnore,numFileFail,numDirFail)) print("\nPASS 0.c: JSON schema set consistency checks") # Circular references check is based on having two levels # one unmodified, another being built from the first, taking # into account already visited schemas refSchemaSetBase = {} for jsonSchemaURI, traverseListSet in refSchemaListSet.items(): # Time to implode each one of the elements from refSchemaListSet # for further usage refSchemaSetBase[jsonSchemaURI] = flattenTraverseListSet(traverseListSet) for jsonSchemaURI, jsonSchemaSet in refSchemaSetBase.items(): id2ElemId , keyRefs , jp2val = jsonSchemaSet # referenced schemas id2ElemId and keyRefs if REF_FEATURE in keyRefs: # Unlinking references on keyRefs keyRefs_augmented = {} for featName , featList in keyRefs.items(): keyRefs_augmented[featName] = list(featList) # Unlinking references on id2ElemId id2ElemId_augmented = {} for i2e_k , featDict in id2ElemId.items(): id2ElemId_augmented[i2e_k] = {} for featName , l_uniqId in featDict.items(): id2ElemId_augmented[i2e_k][featName] = list(l_uniqId) # And on the $ref case refList = keyRefs_augmented[REF_FEATURE] # Initializing the visitedURIs through # $ref fetching visitedURIs = set([jsonSchemaURI]) # This $ref list can be increased through the process for fLoc in refList: theRef = fLoc.context[REF_FEATURE] # Computing the absolute schema URI if uritools.isabsuri(jsonSchemaURI): abs_ref_schema_id , _ = uritools.uridefrag(uritools.urijoin(jsonSchemaURI,theRef)) else: abs_ref_schema_id , _ = uritools.uridefrag(uritools.urijoin(jsonSchemaURI,theRef)) # Circular references detection check if abs_ref_schema_id in visitedURIs: continue visitedURIs.add(abs_ref_schema_id) # Now, time to get the referenced, gathered data refSet = refSchemaSetBase.get(abs_ref_schema_id) if refSet is not None: ref_id2ElemId , ref_keyRefs , ref_jp2val = refSet # TODO: properly augment refSchemaSet id2ElemId and keyRefs with # This is needed to have a proper bootstrap for ref_pAddr_k, ref_pAddr_v in ref_id2ElemId.items(): featDict = id2ElemId_augmented.setdefault(ref_pAddr_k,{}) for ref_feat_k , ref_feat_v in ref_pAddr_v.items(): featDict.setdefault(ref_feat_k,[]).extend(ref_feat_v) for ref_kR_k , ref_kR_v in ref_keyRefs.items(): keyRefs_augmented.setdefault(ref_kR_k,[]).extend(ref_kR_v) else: # TODO: error handling print("UNHANDLED ERROR",file=sys.stderr) # Recomposing the tuple jsonSchemaSet = (id2ElemId_augmented,keyRefs_augmented,jp2val) refSchemaSet[jsonSchemaURI] = jsonSchemaSet # Last, bootstrapping the extensions # Now, we check whether the declared foreign keys are pointing to loaded JSON schemas numSchemaConsistent = 0 numSchemaInconsistent = 0 for jsonSchemaURI , p_schema in p_schemaHash.items(): jsonSchemaFile = p_schema['file'] if verbose: print("* Checking {0}".format(jsonSchemaFile)) customFormatInstances = p_schema['customFormatInstances'] isValid = True if len(customFormatInstances) > 0: (id2ElemId , keyRefs , jp2val) = refSchemaSet[jsonSchemaURI] for cFI in customFormatInstances: if cFI.needsBootstrapping: doBootstrap = False for triggerAttribute,_ in cFI.getValidators(): if triggerAttribute in keyRefs: doBootstrap = True break if doBootstrap: # Bootstrapping the schema # By default this is a no-op errors = cFI.bootstrap(refSchemaTuple=(id2ElemId , keyRefs , self.refSchemaCache)) if errors: if verbose: for error in errors: print("\t- ERROR: {}".format(error['description']),file=sys.stderr) p_schema['errors'].extend(errors) isValid = False if isValid: if verbose: print("\t- Consistent!") numSchemaConsistent += 1 else: numSchemaInconsistent += 1 if verbose: print("\nSCHEMA CONSISTENCY STATS: {0} schemas right, {1} with inconsistencies".format(numSchemaConsistent,numSchemaInconsistent)) return len(self.schemaHash.keys())