def check_assertion_compatibility( reader_schema, writer_schema, assertion_check: AssertionCheck, location: List[str] ) -> SchemaCompatibilityResult: result = SchemaCompatibilityResult.compatible() reader_value = reader_schema.get(assertion_check.keyword.value) writer_value = writer_schema.get(assertion_check.keyword.value) if introduced_constraint(reader_value, writer_value): result.add_incompatibility( incompat_type=assertion_check.error_when_introducing, message=assertion_check.error_msg_when_introducing.format( assert_name=assertion_check.keyword.value, introduced_value=writer_value ), location=location, ) # The type error below is due to a mypy bug for version 0.820 (issue #10131) if assertion_check.comparison(reader_value, writer_value): # type: ignore result.add_incompatibility( incompat_type=assertion_check.error_when_restricting, message=assertion_check.error_msg_when_restricting.format( assert_name=assertion_check.keyword.value, reader_value=reader_value, writer_value=writer_value ), location=location, ) return result
def compatibility_numerical(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.2 result = SchemaCompatibilityResult.compatible() reader_type = get_type_of(reader_schema) writer_type = get_type_of(writer_schema) reader_is_number = reader_type in (Instance.NUMBER, Instance.INTEGER) writer_is_number = writer_type in (Instance.NUMBER, Instance.INTEGER) assert reader_is_number, "types should have been previously checked" assert writer_is_number, "types should have been previously checked" checks: List[AssertionCheck] = [MAXIMUM_CHECK, MINIMUM_CHECK, EXCLUSIVE_MAXIMUM_CHECK, EXCLUSIVE_MINIMUM_CHECK] for assertion_check in checks: check_result = check_assertion_compatibility( reader_schema, writer_schema, assertion_check, location, ) result = result.merged_with(check_result) reader_multiple = reader_schema.get(Keyword.MULTIPLE.value) writer_multiple = writer_schema.get(Keyword.MULTIPLE.value) if introduced_constraint(reader_multiple, writer_multiple): result.add_incompatibility( incompat_type=Incompatibility.multiple_added, message=INTRODUCED_INCOMPATIBILITY_MSG_FMT.format( assert_name=Keyword.MULTIPLE.value, introduced_value=writer_multiple ), location=location, ) if ne(reader_multiple, writer_multiple): if reader_multiple > writer_multiple: message_expanded = f"Multiple must not increase ({reader_multiple} > {writer_multiple})" result.add_incompatibility( incompat_type=Incompatibility.multiple_expanded, message=message_expanded, location=location ) elif writer_multiple % reader_multiple != 0: message_changed = ( f"{reader_multiple} must be an integer multiple of " f"{writer_multiple} ({writer_multiple} % {reader_multiple} = 0)" ) result.add_incompatibility( incompat_type=Incompatibility.multiple_changed, message=message_changed, location=location ) if reader_type == Instance.INTEGER and writer_type == Instance.NUMBER: message_narrowed = "Writer produces numbers while reader only accepted integers" result.add_incompatibility(incompat_type=Incompatibility.type_narrowed, message=message_narrowed, location=location) return result
def compatibility_object(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.5 result = SchemaCompatibilityResult.compatible() assert get_type_of(reader_schema) == Instance.OBJECT, "types should have been previously checked" assert get_type_of(writer_schema) == Instance.OBJECT, "types should have been previously checked" properties_location = location + [Keyword.PROPERTIES.value] reader_properties = reader_schema.get(Keyword.PROPERTIES.value) writer_properties = writer_schema.get(Keyword.PROPERTIES.value) reader_property_set = set(reader_properties) if reader_properties else set() writer_property_set = set(writer_properties) if writer_properties else set() # These properties are unknown in the sense they don't have a direct # schema, however there may be an indirect schema (patternProperties or # additionalProperties) properties_unknown_to_writer = reader_property_set - writer_property_set properties_unknown_to_reader = writer_property_set - reader_property_set for common_property in reader_property_set & writer_property_set: this_property_location = properties_location + [common_property] reader_property = reader_properties[common_property] writer_property = writer_properties[common_property] is_required_by_reader = reader_property.get(Keyword.REQUIRED.value) is_required_by_writer = writer_property.get(Keyword.REQUIRED.value) if not is_required_by_writer and is_required_by_reader: result.add_incompatibility( incompat_type=Incompatibility.required_attribute_added, message=f"Property {common_property} became required", location=this_property_location, ) rec_result = compatibility_rec( reader_schema=reader_property, writer_schema=writer_property, location=this_property_location, ) if is_incompatible(rec_result): result = result.merged_with(rec_result) # With an open content model any property can be added without breaking # compatibility because those do not have assertions, so only check if the # reader is using a closed model if properties_unknown_to_reader and not is_object_content_model_open(reader_schema): for unknown_property_to_reader in properties_unknown_to_reader: schema_for_property = schema_from_partially_open_content_model(reader_schema, unknown_property_to_reader) if schema_for_property is None: result.add_incompatibility( incompat_type=Incompatibility.property_removed_from_closed_content_model, message=f"The property {unknown_property_to_reader} is not accepted anymore", location=properties_location, ) else: rec_result = compatibility_rec( reader_schema=schema_for_property, writer_schema=writer_properties[unknown_property_to_reader], location=properties_location, ) if is_incompatible(rec_result): result = result.merged_with(rec_result) result.add_incompatibility( incompat_type=Incompatibility.property_removed_not_covered_by_partially_open_content_model, message=f"property {unknown_property_to_reader} is not compatible", location=properties_location, ) elif properties_unknown_to_writer: is_writer_open_model = is_object_content_model_open(writer_schema) if is_writer_open_model: properties = ', '.join(properties_unknown_to_writer) message_property_added_to_open_content_model = ( f"Restricting acceptable values of properties is an incompatible " f"change. The following properties {properties} accepted any " f"value because of the lack of validation (the object schema had " f"neither patternProperties nor additionalProperties), now " f"these values are restricted." ) result.add_incompatibility( incompat_type=Incompatibility.property_added_to_open_content_model, message=message_property_added_to_open_content_model, location=properties_location ) if not is_writer_open_model: for unknown_property_to_writer in properties_unknown_to_writer: schema_for_property = schema_from_partially_open_content_model(writer_schema, unknown_property_to_writer) schema_for_property_exists = schema_for_property is not None schema_allows_writes = not is_false_schema(schema_for_property) if schema_for_property_exists and schema_allows_writes: rec_result = compatibility_rec( reader_schema=reader_properties[unknown_property_to_writer], writer_schema=schema_for_property, location=properties_location, ) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.property_added_not_covered_by_partially_open_content_model, message="incompatible schemas", location=properties_location, ) new_property_is_required_without_default = ( unknown_property_to_writer in reader_schema.get(Keyword.REQUIRED.value, list()) and Keyword.DEFAULT.value not in reader_properties.get(Keyword.REQUIRED.value, list()) ) if new_property_is_required_without_default: result.add_incompatibility( incompat_type=Incompatibility.required_property_added_to_unopen_content_model, message=f"Property {unknown_property_to_writer} added without a default", location=properties_location, ) reader_attribute_dependencies_schema = reader_schema.get(Keyword.DEPENDENCIES.value, dict()) writer_attribute_dependencies_schema = writer_schema.get(Keyword.DEPENDENCIES.value, dict()) for writer_attribute_dependency_name, writer_attribute_dependencies in writer_attribute_dependencies_schema.items(): reader_attribute_dependencies = reader_attribute_dependencies_schema.get(writer_attribute_dependency_name) if not reader_attribute_dependencies: result.add_incompatibility( incompat_type=Incompatibility.dependency_array_added, message="incompatible dependency array", location=location, ) new_dependencies = set(writer_attribute_dependencies) - set(reader_attribute_dependencies) if new_dependencies: result.add_incompatibility( incompat_type=Incompatibility.dependency_array_extended, message=f"new dependencies {new_dependencies}", location=location ) reader_dependent_schemas = reader_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict()) writer_dependent_schemas = writer_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict()) for writer_dependent_schema_name, writer_dependent_schema in writer_dependent_schemas.items(): reader_dependent_schema = reader_dependent_schemas.get(writer_dependent_schema_name) if introduced_constraint(reader_dependent_schema, writer_dependent_schemas): result.add_incompatibility( incompat_type=Incompatibility.dependency_schema_added, message=f"new dependency schema {writer_dependent_schema_name}", location=location ) rec_result = compatibility_rec(reader_dependent_schema, writer_dependent_schema, location) result = result.merged_with(rec_result) checks: List[AssertionCheck] = [MAX_PROPERTIES_CHECK, MIN_PROPERTIES_CHECK] for assertion_check in checks: check_result = check_assertion_compatibility( reader_schema, writer_schema, assertion_check, location, ) result = result.merged_with(check_result) reader_additional_properties = reader_schema.get(Keyword.ADDITIONAL_PROPERTIES.value) writer_additional_properties = writer_schema.get(Keyword.ADDITIONAL_PROPERTIES.value) location_additional_properties = location + [Keyword.ADDITIONAL_PROPERTIES.value] if introduced_constraint(reader_additional_properties, writer_additional_properties): result.add_incompatibility( incompat_type=Incompatibility.additional_properties_narrowed, message="additionalProperties instroduced", location=location_additional_properties, ) if reader_additional_properties and writer_additional_properties: rec_result = compatibility_rec( reader_additional_properties, writer_additional_properties, location_additional_properties ) result = result.merged_with(rec_result) return result
def compatibility_array(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.4 reader_type = get_type_of(reader_schema) writer_type = get_type_of(writer_schema) assert reader_type == Instance.ARRAY, "types should have been previously checked" assert writer_type == Instance.ARRAY, "types should have been previously checked" reader_items = reader_schema.get(Keyword.ITEMS.value) writer_items = writer_schema.get(Keyword.ITEMS.value) result = SchemaCompatibilityResult.compatible() reader_is_tuple = is_tuple(reader_schema) writer_is_tuple = is_tuple(writer_schema) if reader_is_tuple != writer_is_tuple: return type_mismatch(reader_type, writer_type, location) # Extend the array iterator to match the tuple size if reader_is_tuple and writer_is_tuple: reader_items_iter = iter(reader_items) writer_items_iter = iter(writer_items) reader_requires_more_items = len(reader_items) > len(writer_items) writer_has_more_items = len(writer_items) > len(reader_items) else: reader_items_iter = iter([reader_items]) writer_items_iter = iter([writer_items]) reader_requires_more_items = False writer_has_more_items = False pos = 0 for pos, (reader_item, writer_item) in enumerate(zip(reader_items_iter, writer_items_iter), start=pos): rec_result = compatibility_rec(reader_item, writer_item, location + ["items", f"{pos}"]) if is_incompatible(rec_result): result = result.merged_with(rec_result) reader_additional_items = reader_schema.get(Keyword.ADDITIONAL_ITEMS.value, True) reader_restricts_additional_items = not is_true_schema(reader_additional_items) location_additional_items = location + [Keyword.ADDITIONAL_ITEMS.value] if writer_has_more_items and reader_restricts_additional_items: reader_rejects_additional_items = is_false_schema(reader_restricts_additional_items) if reader_rejects_additional_items: result.add_incompatibility( incompat_type=Incompatibility.item_removed_from_closed_content_model, message=f"Elements starting from index {pos} are not allowed", location=location + [Keyword.ADDITIONAL_ITEMS.value], ) else: for pos, writer_item in enumerate(writer_items_iter, start=pos): rec_result = compatibility_rec(reader_restricts_additional_items, writer_item, location_additional_items) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.item_removed_not_covered_by_partially_open_content_model, message=f"Item in position {pos} is not compatible", location=location_additional_items, ) writer_additional_items = writer_schema.get(Keyword.ADDITIONAL_ITEMS.value, True) writer_restricts_additional_items = not is_true_schema(writer_additional_items) if reader_requires_more_items: # This is just for more detailed diagnostics if writer_restricts_additional_items: for pos, reader_item in enumerate(reader_items_iter, start=pos): location_reader_item = location + ["items", f"{pos}"] rec_result = compatibility_rec(reader_item, writer_additional_items, location_reader_item) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.item_added_not_covered_by_partially_open_content_model, message="New element schema incompatible with the other version", location=location_reader_item, ) result.add_incompatibility( incompat_type=Incompatibility.item_added_to_open_content_model, message=f"Elements starting from index {pos} are now required", location=location, ) if is_tuple_without_additional_items(reader_schema) and not is_tuple_without_additional_items(writer_schema): result.add_incompatibility( incompat_type=Incompatibility.additional_items_removed, message="Additional items are not longer allowed", location=location_additional_items, ) reader_additional_items = reader_schema.get(Keyword.ITEMS) writer_additional_items = writer_schema.get(Keyword.ITEMS) if introduced_constraint(reader_additional_items, writer_additional_items): result.add_incompatibility( incompat_type=Incompatibility.additional_items_removed, message="Items are now restricted, old values may not be valid anymore", location=location_additional_items, ) rec_result = compatibility_rec(reader_additional_items, writer_additional_items, location_additional_items) result = result.merged_with(rec_result) checks: List[AssertionCheck] = [MAX_ITEMS_CHECK, MIN_ITEMS_CHECK] for assertion_check in checks: check_result = check_assertion_compatibility( reader_schema, writer_schema, assertion_check, location, ) result = result.merged_with(check_result) reader_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS) writer_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS) if introduced_constraint(reader_unique_items, writer_unique_items): result.add_incompatibility( incompat_type=Incompatibility.unique_items_added, message=INTRODUCED_INCOMPATIBILITY_MSG_FMT.format( assert_name=Keyword.UNIQUE_ITEMS.value, introduced_value=writer_unique_items, ), location=location, ) return result
def compatibility_rec( reader_schema: Optional[Any], writer_schema: Optional[Any], location: List[str] ) -> SchemaCompatibilityResult: if introduced_constraint(reader_schema, writer_schema): return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.schema_added, message="schema added, previously used values may not be valid anymore", location=location, ) # Note: This is not always an incompatible change, jsonschema accepts # values unless there is an explicit assertion to reject it, meaning the # reader_schema would have to be `false` instead of undefined. However, on # some code paths this is really a incompatible change, specially when the # reader has type `array` to represent a list, and the writer is either a # different type or it is also an `array` but now it representes a tuple. if reader_schema is None and writer_schema is not None: LOG.debug("Schema removed reader_schema.type='%r'", get_type_of(reader_schema)) return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.schema_removed, message="schema removed", location=location, ) # The type of reader_schema and writer_schema may vary wildly. Example: # # reader_schema = {"additionalProperties": {"type": "integer"}, ...} # writer_schema = {"additionalProperties": false, ...} # # When recursing `reader_schema` will be Instance.INTEGER and # `writer_schema` will be BooleanSchema # reader_type = get_type_of(reader_schema) writer_type = get_type_of(writer_schema) reader_is_number = reader_type in (Instance.NUMBER, Instance.INTEGER) writer_is_number = writer_type in (Instance.NUMBER, Instance.INTEGER) both_are_numbers = reader_is_number and writer_is_number reader_has_subschema = reader_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) writer_has_subschema = writer_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) either_has_subschema = reader_has_subschema or writer_has_subschema reader_is_true_schema = is_true_schema(reader_schema) reader_is_object = reader_type == Instance.OBJECT reader_is_true_schema = is_true_schema(reader_schema) writer_is_object = writer_type == Instance.OBJECT writer_is_true_schema = is_true_schema(writer_schema) both_are_object = (reader_is_object or reader_is_true_schema) and (writer_is_object or writer_is_true_schema) # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.1.1 if not both_are_numbers and not either_has_subschema and not both_are_object and reader_type != writer_type: result = type_mismatch(reader_type, writer_type, location) elif both_are_numbers: result = compatibility_numerical(reader_schema, writer_schema, location) elif either_has_subschema: result = compatibility_subschemas(reader_schema, writer_schema, location) elif both_are_object: if reader_is_true_schema: reader_schema = {"type": Instance.OBJECT.value} if writer_is_true_schema: writer_schema = {"type": Instance.OBJECT.value} result = compatibility_object(reader_schema, writer_schema, location) elif reader_type is BooleanSchema: result = SchemaCompatibilityResult.compatible() elif reader_type is Subschema.NOT: assert reader_schema, "if just one schema is NOT the result should have been a type_mismatch" assert writer_schema, "if just one schema is NOT the result should have been a type_mismatch" location_not = location + [Subschema.NOT.value] return compatibility_rec( reader_schema[Subschema.NOT.value], writer_schema[Subschema.NOT.value], location_not, ) elif reader_type == Instance.BOOLEAN: result = SchemaCompatibilityResult.compatible() elif reader_type == Instance.STRING: result = compatibility_string(reader_schema, writer_schema, location) elif reader_type == Instance.ARRAY: result = compatibility_array(reader_schema, writer_schema, location) elif reader_type == Keyword.ENUM: result = compatibility_enum(reader_schema, writer_schema, location) elif reader_type is Instance.NULL: result = SchemaCompatibilityResult.compatible() else: raise ValueError(f"unknown type {reader_type}") return result