def test_convert_with_nested_expectations(self): # arrange schema = DataLoader.load_schema_with_name( "schema_nested_expectation_json.json") expected_schema = DataLoader.load_schema_with_name( "schema_nested_expectation_result_json.json") # act result = self.parser.convert_expectations( SchemaDefinition.create(schema, False)) # assert self.assertStingEqualAsDict(result.schema_content, expected_schema)
def test_inspect_with_both_schema_formats(self): # arrange schema_json = DataLoader.load_schema_with_name("schema_diff_json.json") schema_avro = DataLoader.load_schema_with_name("schema_diff_avro.json") samples = DataLoader.load_samples() # act result_json = self.inspector.inspect( samples, SchemaDefinition.create(schema_json, False)) result_avro = self.inspector.inspect( samples, SchemaDefinition.create(schema_avro, False)) # assert self.assertEqual(result_json, result_avro)
def test_inspect_with_multiple_expectations_asyncapi_style_json(self): # arrange schema = DataLoader.load_schema_with_name( "schema_expectation_asyncapi_style_json.json") samples = [ { 'random_integer': 1, 'random_string': 'id_1' }, { 'random_integer': 2, 'random_string': 'foo' }, # no match (string) { 'random_integer': 3, 'random_string': 'id_3' }, { 'random_integer': 4, 'random_string': 'id_4' }, # no match (integer) { 'random_integer': 5, 'random_string': 'foo' }, # no match (integer, string) ] # act metrics = self.inspector.inspect( samples, SchemaDefinition.create(schema, False)) # assert self.assertAlmostEqual(6 / 10, metrics.attribute_integrity, 3)
def test_inspect_with_inferred_schemas(self): # arrange schema = DataLoader.load_schema_with_name("schema_registry_json.json") schema_definition = SchemaDefinition.create(schema, True) samples = DataLoader.load_samples() # act result = self.inspector.inspect(samples, schema_definition) # assert self.assertEqual(1.0, result.attribute_integrity) self.assertEqual(.0, result.attribute_specification) self.assertEqual(.5, result.attribute_quality_index)
def test_load_required_types_for_deeply_nested_schema(self): # arrange schema = DataLoader.load_schema_with_name( "schema_registry_avro_complex.json") schema_obj = json.loads(schema) # act type_definitions, _ = self.parser.load_required_types_from_schema( schema_obj) # assert self.assertListEqual([ "complex/subtypeString", "complex/subtypeComplex/subtypeNumber", "simpleNumber" ], type_definitions)
def test_load_required_types_for_deeply_nested_schema(self): # arrange schema = DataLoader.load_schema_with_name( "schema_inferred_complex.json") schema_obj = json.loads(schema) # act type_definitions, _ = JsonSchemaParser( ).load_required_types_from_schema(schema_obj) # assert # assert self.assertListEqual(type_definitions, [ "base", "complex/type1number", "complex/type3complex/subtype1number" ])
def test_integrity_for_complex_type(self): # arrange schema = DataLoader.load_schema_with_name("schema_registry_avro.json") samples = [ { "timestamp": 1595601702, "iss_position": { "longitude": "-42.2948", "latitude": "-40.3670" }, "message": "success" }, { "timestamp": 1595601702, "iss_position": { "latitude": "-40.3670" }, "message": "success" }, { "timestamp": "wrong", "iss_position": { "longitude": 666, "latitude": "-40.0283" }, "message": "success" }, ] # act result = self.inspector.inspect_attributes( samples, SchemaDefinition.create(schema, False)) # assert - only message is not mandatory so 3 out of 12 (3*4) are missing or wrong invalid_elements = 3 all_elements = 12 expected_integrity = (all_elements - invalid_elements) / all_elements self.assertAlmostEqual( expected_integrity, result.attribute_integrity, 3, f"Integrity must be {expected_integrity * 100}%")
def test_specification_from_toeggelomat(self): # arrange samples = DataLoader.load_samples_from_file("samples_toeggelomat.json") # act schema = DataLoader.load_schema_with_name("schema_toeggelomat.json") result = self.inspector.inspect(samples, SchemaDefinition.create(schema, False)) # assert self.assertEqual(53, len(result.attribute_details.keys()), "There should be 53 keys in the dictionary") for attribute_metric in result.attribute_details.keys(): self.assertEqual( 1.0, result.attribute_details[attribute_metric]. attribute_specification, f"Attribute specification must be 100% ({attribute_metric})") self.assertEqual( 1.0, result.attribute_details[attribute_metric].attribute_integrity, f"Attribute integrity must be 100% ({attribute_metric})")
def test_inspect_with_min_max_range_expectation(self): # arrange schema = DataLoader.load_schema_with_name("schema_with_min_max.json") samples = [ { 'random_integer': 3 }, { 'random_integer': 11 }, { 'random_integer': 3 }, { 'random_integer': 8 }, { 'random_integer': 3 }, { 'random_integer': -5 }, { 'random_integer': 3 }, { 'random_integer': 10 }, ] # act metrics = self.inspector.inspect( samples, SchemaDefinition.create(schema, False)) # assert self.assertEqual((6 / 8), metrics.attribute_integrity, f"Attribute integrity must be {(6 / 8) * 100}%")