def test_integrity_on_attribute_level_with_not_specified_partial_field( self) -> None: # arrange samples = [ { "random_int": 1002, "random_string": 1 }, { "random_int": 1003, "random_string": 2 }, { "random_int": 1004 }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue('random_string' in attribute_details.keys(), "Missing integrity for attribute random_string") self.assertAlmostEqual( 1, attribute_details['random_string'].attribute_integrity, 3, "Integrity of random_string is not correct")
def test_inspect_with_missing_field(self): # arrange samples = [ { "random_other": "other" }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], ["random_int"]) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert expected_specification = (0 + 1) / 2 expected_integrity = (1 + 0) / 2 self.assertEqual(expected_specification, result.attribute_specification, "Attribute specification is not correct") self.assertEqual(expected_integrity, result.attribute_integrity, "Attribute integrity is not correct") self.assertEqual((expected_specification + expected_integrity) / 2, result.attribute_quality_index, "Attribute quality is not correct")
def test_specification_on_attribute_level_with_missing_specification( self) -> None: # arrange samples = [ { "random_int": 1002, "random_string": 1 }, { "random_int": 1003, "random_string": 2 }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue('random_string' in attribute_details.keys()) self.assertEqual( 0.0, attribute_details['random_string'].attribute_specification)
def test_specification_on_attribute_level_with_partial_expectations( self) -> None: # arrange samples = [ { "random_int": 1002, "random_string": 1 }, { "random_int": 1003, "random_string": 2 }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer"), ("random_string", "string")], [], {"random_int": { "minimum": 0 }}) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue('random_int' in attribute_details.keys()) self.assertEqual( .75, attribute_details['random_int'].attribute_specification) self.assertEqual( .5, attribute_details['random_string'].attribute_specification)
def test_quality_with_complete_specification(self) -> None: # arrange samples = [ { "random_int": 1, "random_string": "foo" }, # random_string does not match { "random_int": 2, "random_string": "bar" } ] schema_definition = DataLoader.expand_schema( [("random_string", "string"), ("random_int", "number")], [], { "random_string": { "pattern": "bar" }, "random_int": { "minimum": 0, "maximum": 100 } }) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(.75, result.attribute_integrity) self.assertEqual(1.0, result.attribute_specification) self.assertEqual(.875, result.attribute_quality_index)
def test_inspect_with_non_unique_types_does_not_throw_exception( self) -> None: # arrange samples = [ { "random_int": 1002 }, { "random_int": "1003" }, { "random_int": "1004" }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], [], {"random_int": { "minimum": 0, "maximum": 100 }}) # act result = self.inspector.inspect(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertAlmostEquals( (1 / 3), attribute_details['random_int'].attribute_integrity, 3)
def test_integrity_with_float_as_int(self) -> None: # arrange samples = [{"random_int": "10000001.023"}] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(0.0, result.attribute_integrity)
def test_integrity_with_missing_not_required(self) -> None: # arrange samples = [{"random_int": 1}, {"random_int": None}, {"random_int": 2}] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(1.0, result.attribute_integrity)
def test_integrity_without_specified_required_field(self) -> None: # arrange samples = [{"random_int": 1}, {"random_int": 2}, {"random_int": 3}] schema_definition = DataLoader.expand_schema( [("random_int", "integer"), ("random_string", "string")], ["random_string"]) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(.5, result.attribute_integrity)
def test_integrity_with_negative_as_string(self) -> None: # arrange samples = [{"random_int": "-10000"}] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual( .0, result.attribute_integrity, "Attribute integrity must be 0% (even if not required, a " "specified value needs to be correct).")
def test_inspect_with_unspecified_field(self): # arrange samples = [ { "random_int": 1 }, ] schema_definition = DataLoader.expand_schema([], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(0, result.attribute_specification) self.assertEqual(1, result.attribute_integrity) self.assertEqual(.5, result.attribute_quality_index)
def test_quality_without_specification(self): # arrange samples = [{ "random_int": 1, "random_string": "foo" }, { "random_int": 2, "random_string": "bar" }] schema_definition = DataLoader.expand_schema([], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(.5, result.attribute_quality_index)
def test_quality_with_partial_specification(self) -> None: # arrange samples = [{ "random_int": 1, "random_string": "foo" }, { "random_int": 2, "random_string": "bar" }] schema_definition = DataLoader.expand_schema( [("random_string", "string"), ("random_int", "int")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(1.0, result.attribute_integrity) self.assertEqual(.5, result.attribute_specification) self.assertEqual(.75, result.attribute_quality_index)
def test_specification_with_complete_specification(self) -> None: # arrange samples = [{ "random_int": 1, "random_string": "foo" }, { "random_int": 2, "random_string": "bar" }] schema_definition = DataLoader.expand_schema( [("random_int", "integer"), ("random_string", "string")], ["random_string", "random_int"], ) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(.5, result.attribute_specification)
def test_specification_with_partial_specification(self) -> None: # arrange samples = [{ "random_int": 1, "random_string": "foo" }, { "random_int": 2, "random_string": "bar" }] schema_definition = DataLoader.expand_schema( [("random_string", "string")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert (half of the data is specified to .5) self.assertEqual( .25, result.attribute_specification, "Specification must be 25% because only half of the data is specified in schema" )
def test_integrity_with_additional_field(self) -> None: # arrange samples = [{ "random_int": 1, "random_string": "abc" }, { "random_int": 2, "random_string": "efg" }, { "random_int": 3, "random_string": "hij" }] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual(1.0, result.attribute_integrity)
def test_specification_with_irrelevant_specification(self) -> None: # arrange samples = [{ "random_int": 1, "random_string": "foo" }, { "random_int": 2, "random_string": "bar" }] schema_definition = DataLoader.expand_schema( [("random_other", "string")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert self.assertEqual( 0, result.attribute_specification, "Specification must be 0% because none of the attributes are specified" )
def test_integrity_on_attribute_level_with_missing_value(self) -> None: # arrange samples = [ { "random_int": 1002, "random_string": 1 }, { "random_int": 1003, "random_string": 2 }, { "random_int": "foo", "random_string": 3 }, { "random_int": 1005, "random_string": "fourth" }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer"), ("random_string", "string")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue('random_int' in attribute_details.keys(), "Missing integrity for attribute random_int") self.assertTrue('random_string' in attribute_details.keys(), "Missing integrity for attribute random_string") self.assertAlmostEqual( (3 / 4), attribute_details['random_int'].attribute_integrity, 3, "Integrity of random_int is not correct") self.assertAlmostEqual( (1 / 4), attribute_details['random_string'].attribute_integrity, 3, "Integrity of random_string is not correct")
def test_integrity_on_attribute_level_with_not_specified_fields( self) -> None: # arrange samples = [ { "random_int": 1002, "random_string": 1 }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], []) # act result = self.inspector.inspect_attributes(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue( 'random_string' in attribute_details.keys(), "Even a not specified fields needs to be present in the details.") self.assertEqual( 1.0, attribute_details['random_string'].attribute_integrity)
def test_quality_on_attribute_level(self) -> None: # arrange samples = [ { "random_int": 2, "random_string": "one" }, { "random_int": 55, "random_string": "two" }, { "random_int": 101, "random_string": "three" }, ] schema_definition = DataLoader.expand_schema( [("random_int", "integer")], [], {"random_int": { "minimum": 50, "maximum": 100 }}) # act result = self.inspector.inspect(samples, schema_definition) # assert attribute_details = result.attribute_details self.assertTrue('random_int' in attribute_details.keys()) self.assertTrue('random_string' in attribute_details.keys()) self.assertAlmostEquals( ((1 / 3) + 1) / 2, attribute_details['random_int'].attribute_quality_index, 3) self.assertAlmostEquals( (1 + 0) / 2, attribute_details['random_string'].attribute_quality_index, 3)