示例#1
0
    def test_integrity_on_attribute_level_with_not_specified_partial_field(
            self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002,
                "random_string": 1
            },
            {
                "random_int": 1003,
                "random_string": 2
            },
            {
                "random_int": 1004
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue('random_string' in attribute_details.keys(),
                        "Missing integrity for attribute random_string")
        self.assertAlmostEqual(
            1, attribute_details['random_string'].attribute_integrity, 3,
            "Integrity of random_string is not correct")
示例#2
0
    def test_inspect_with_missing_field(self):
        # arrange
        samples = [
            {
                "random_other": "other"
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], ["random_int"])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        expected_specification = (0 + 1) / 2
        expected_integrity = (1 + 0) / 2
        self.assertEqual(expected_specification,
                         result.attribute_specification,
                         "Attribute specification is not correct")
        self.assertEqual(expected_integrity, result.attribute_integrity,
                         "Attribute integrity is not correct")
        self.assertEqual((expected_specification + expected_integrity) / 2,
                         result.attribute_quality_index,
                         "Attribute quality is not correct")
示例#3
0
    def test_specification_on_attribute_level_with_missing_specification(
            self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002,
                "random_string": 1
            },
            {
                "random_int": 1003,
                "random_string": 2
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue('random_string' in attribute_details.keys())
        self.assertEqual(
            0.0, attribute_details['random_string'].attribute_specification)
示例#4
0
    def test_specification_on_attribute_level_with_partial_expectations(
            self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002,
                "random_string": 1
            },
            {
                "random_int": 1003,
                "random_string": 2
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer"),
             ("random_string", "string")], [], {"random_int": {
                 "minimum": 0
             }})

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue('random_int' in attribute_details.keys())
        self.assertEqual(
            .75, attribute_details['random_int'].attribute_specification)
        self.assertEqual(
            .5, attribute_details['random_string'].attribute_specification)
示例#5
0
    def test_quality_with_complete_specification(self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1,
                "random_string": "foo"
            },  # random_string does not match
            {
                "random_int": 2,
                "random_string": "bar"
            }
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_string", "string"), ("random_int", "number")], [], {
                "random_string": {
                    "pattern": "bar"
                },
                "random_int": {
                    "minimum": 0,
                    "maximum": 100
                }
            })

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(.75, result.attribute_integrity)
        self.assertEqual(1.0, result.attribute_specification)
        self.assertEqual(.875, result.attribute_quality_index)
示例#6
0
    def test_inspect_with_non_unique_types_does_not_throw_exception(
            self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002
            },
            {
                "random_int": "1003"
            },
            {
                "random_int": "1004"
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [],
            {"random_int": {
                "minimum": 0,
                "maximum": 100
            }})

        # act
        result = self.inspector.inspect(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertAlmostEquals(
            (1 / 3), attribute_details['random_int'].attribute_integrity, 3)
示例#7
0
    def test_integrity_with_float_as_int(self) -> None:
        # arrange
        samples = [{"random_int": "10000001.023"}]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(0.0, result.attribute_integrity)
示例#8
0
    def test_integrity_with_missing_not_required(self) -> None:
        # arrange
        samples = [{"random_int": 1}, {"random_int": None}, {"random_int": 2}]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(1.0, result.attribute_integrity)
示例#9
0
    def test_integrity_without_specified_required_field(self) -> None:
        # arrange
        samples = [{"random_int": 1}, {"random_int": 2}, {"random_int": 3}]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer"),
             ("random_string", "string")], ["random_string"])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(.5, result.attribute_integrity)
示例#10
0
    def test_integrity_with_negative_as_string(self) -> None:
        # arrange
        samples = [{"random_int": "-10000"}]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(
            .0, result.attribute_integrity,
            "Attribute integrity must be 0% (even if not required, a "
            "specified value needs to be correct).")
示例#11
0
    def test_inspect_with_unspecified_field(self):
        # arrange
        samples = [
            {
                "random_int": 1
            },
        ]

        schema_definition = DataLoader.expand_schema([], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(0, result.attribute_specification)
        self.assertEqual(1, result.attribute_integrity)
        self.assertEqual(.5, result.attribute_quality_index)
示例#12
0
    def test_quality_without_specification(self):
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "foo"
        }, {
            "random_int": 2,
            "random_string": "bar"
        }]

        schema_definition = DataLoader.expand_schema([], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(.5, result.attribute_quality_index)
示例#13
0
    def test_quality_with_partial_specification(self) -> None:
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "foo"
        }, {
            "random_int": 2,
            "random_string": "bar"
        }]

        schema_definition = DataLoader.expand_schema(
            [("random_string", "string"), ("random_int", "int")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(1.0, result.attribute_integrity)
        self.assertEqual(.5, result.attribute_specification)
        self.assertEqual(.75, result.attribute_quality_index)
示例#14
0
    def test_specification_with_complete_specification(self) -> None:
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "foo"
        }, {
            "random_int": 2,
            "random_string": "bar"
        }]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer"), ("random_string", "string")],
            ["random_string", "random_int"],
        )

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(.5, result.attribute_specification)
示例#15
0
    def test_specification_with_partial_specification(self) -> None:
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "foo"
        }, {
            "random_int": 2,
            "random_string": "bar"
        }]

        schema_definition = DataLoader.expand_schema(
            [("random_string", "string")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert (half of the data is specified to .5)
        self.assertEqual(
            .25, result.attribute_specification,
            "Specification must be 25% because only half of the data is specified in schema"
        )
示例#16
0
    def test_integrity_with_additional_field(self) -> None:
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "abc"
        }, {
            "random_int": 2,
            "random_string": "efg"
        }, {
            "random_int": 3,
            "random_string": "hij"
        }]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(1.0, result.attribute_integrity)
示例#17
0
    def test_specification_with_irrelevant_specification(self) -> None:
        # arrange
        samples = [{
            "random_int": 1,
            "random_string": "foo"
        }, {
            "random_int": 2,
            "random_string": "bar"
        }]

        schema_definition = DataLoader.expand_schema(
            [("random_other", "string")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        self.assertEqual(
            0, result.attribute_specification,
            "Specification must be 0% because none of the attributes are specified"
        )
示例#18
0
    def test_integrity_on_attribute_level_with_missing_value(self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002,
                "random_string": 1
            },
            {
                "random_int": 1003,
                "random_string": 2
            },
            {
                "random_int": "foo",
                "random_string": 3
            },
            {
                "random_int": 1005,
                "random_string": "fourth"
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer"), ("random_string", "string")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue('random_int' in attribute_details.keys(),
                        "Missing integrity for attribute random_int")
        self.assertTrue('random_string' in attribute_details.keys(),
                        "Missing integrity for attribute random_string")
        self.assertAlmostEqual(
            (3 / 4), attribute_details['random_int'].attribute_integrity, 3,
            "Integrity of random_int is not correct")
        self.assertAlmostEqual(
            (1 / 4), attribute_details['random_string'].attribute_integrity, 3,
            "Integrity of random_string is not correct")
示例#19
0
    def test_integrity_on_attribute_level_with_not_specified_fields(
            self) -> None:
        # arrange
        samples = [
            {
                "random_int": 1002,
                "random_string": 1
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [])

        # act
        result = self.inspector.inspect_attributes(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue(
            'random_string' in attribute_details.keys(),
            "Even a not specified fields needs to be present in the details.")
        self.assertEqual(
            1.0, attribute_details['random_string'].attribute_integrity)
示例#20
0
    def test_quality_on_attribute_level(self) -> None:
        # arrange
        samples = [
            {
                "random_int": 2,
                "random_string": "one"
            },
            {
                "random_int": 55,
                "random_string": "two"
            },
            {
                "random_int": 101,
                "random_string": "three"
            },
        ]

        schema_definition = DataLoader.expand_schema(
            [("random_int", "integer")], [],
            {"random_int": {
                "minimum": 50,
                "maximum": 100
            }})

        # act
        result = self.inspector.inspect(samples, schema_definition)

        # assert
        attribute_details = result.attribute_details
        self.assertTrue('random_int' in attribute_details.keys())
        self.assertTrue('random_string' in attribute_details.keys())
        self.assertAlmostEquals(
            ((1 / 3) + 1) / 2,
            attribute_details['random_int'].attribute_quality_index, 3)
        self.assertAlmostEquals(
            (1 + 0) / 2,
            attribute_details['random_string'].attribute_quality_index, 3)