def test_convert_with_nested_expectations(self):
        # arrange
        schema = DataLoader.load_schema_with_name(
            "schema_nested_expectation_json.json")
        expected_schema = DataLoader.load_schema_with_name(
            "schema_nested_expectation_result_json.json")

        # act
        result = self.parser.convert_expectations(
            SchemaDefinition.create(schema, False))

        # assert
        self.assertStingEqualAsDict(result.schema_content, expected_schema)
示例#2
0
    def test_inspect_with_both_schema_formats(self):
        # arrange
        schema_json = DataLoader.load_schema_with_name("schema_diff_json.json")
        schema_avro = DataLoader.load_schema_with_name("schema_diff_avro.json")

        samples = DataLoader.load_samples()

        # act
        result_json = self.inspector.inspect(
            samples, SchemaDefinition.create(schema_json, False))
        result_avro = self.inspector.inspect(
            samples, SchemaDefinition.create(schema_avro, False))

        # assert
        self.assertEqual(result_json, result_avro)
示例#3
0
    def test_inspect_with_multiple_expectations_asyncapi_style_json(self):
        # arrange
        schema = DataLoader.load_schema_with_name(
            "schema_expectation_asyncapi_style_json.json")

        samples = [
            {
                'random_integer': 1,
                'random_string': 'id_1'
            },
            {
                'random_integer': 2,
                'random_string': 'foo'
            },  # no match (string)
            {
                'random_integer': 3,
                'random_string': 'id_3'
            },
            {
                'random_integer': 4,
                'random_string': 'id_4'
            },  # no match (integer)
            {
                'random_integer': 5,
                'random_string': 'foo'
            },  # no match (integer, string)
        ]

        # act
        metrics = self.inspector.inspect(
            samples, SchemaDefinition.create(schema, False))

        # assert
        self.assertAlmostEqual(6 / 10, metrics.attribute_integrity, 3)
示例#4
0
    def test_inspect_with_inferred_schemas(self):
        # arrange
        schema = DataLoader.load_schema_with_name("schema_registry_json.json")
        schema_definition = SchemaDefinition.create(schema, True)
        samples = DataLoader.load_samples()

        # act
        result = self.inspector.inspect(samples, schema_definition)

        # assert
        self.assertEqual(1.0, result.attribute_integrity)
        self.assertEqual(.0, result.attribute_specification)
        self.assertEqual(.5, result.attribute_quality_index)
示例#5
0
    def test_load_required_types_for_deeply_nested_schema(self):
        # arrange
        schema = DataLoader.load_schema_with_name(
            "schema_registry_avro_complex.json")
        schema_obj = json.loads(schema)

        # act
        type_definitions, _ = self.parser.load_required_types_from_schema(
            schema_obj)

        # assert
        self.assertListEqual([
            "complex/subtypeString", "complex/subtypeComplex/subtypeNumber",
            "simpleNumber"
        ], type_definitions)
    def test_load_required_types_for_deeply_nested_schema(self):
        # arrange
        schema = DataLoader.load_schema_with_name(
            "schema_inferred_complex.json")
        schema_obj = json.loads(schema)

        # act
        type_definitions, _ = JsonSchemaParser(
        ).load_required_types_from_schema(schema_obj)

        # assert
        # assert
        self.assertListEqual(type_definitions, [
            "base", "complex/type1number",
            "complex/type3complex/subtype1number"
        ])
示例#7
0
    def test_integrity_for_complex_type(self):
        # arrange
        schema = DataLoader.load_schema_with_name("schema_registry_avro.json")

        samples = [
            {
                "timestamp": 1595601702,
                "iss_position": {
                    "longitude": "-42.2948",
                    "latitude": "-40.3670"
                },
                "message": "success"
            },
            {
                "timestamp": 1595601702,
                "iss_position": {
                    "latitude": "-40.3670"
                },
                "message": "success"
            },
            {
                "timestamp": "wrong",
                "iss_position": {
                    "longitude": 666,
                    "latitude": "-40.0283"
                },
                "message": "success"
            },
        ]

        # act
        result = self.inspector.inspect_attributes(
            samples, SchemaDefinition.create(schema, False))

        # assert - only message is not mandatory so 3 out of 12 (3*4) are missing or wrong
        invalid_elements = 3
        all_elements = 12
        expected_integrity = (all_elements - invalid_elements) / all_elements
        self.assertAlmostEqual(
            expected_integrity, result.attribute_integrity, 3,
            f"Integrity must be {expected_integrity * 100}%")
示例#8
0
    def test_specification_from_toeggelomat(self):
        # arrange
        samples = DataLoader.load_samples_from_file("samples_toeggelomat.json")

        # act
        schema = DataLoader.load_schema_with_name("schema_toeggelomat.json")
        result = self.inspector.inspect(samples,
                                        SchemaDefinition.create(schema, False))

        # assert
        self.assertEqual(53, len(result.attribute_details.keys()),
                         "There should be 53 keys in the dictionary")
        for attribute_metric in result.attribute_details.keys():
            self.assertEqual(
                1.0, result.attribute_details[attribute_metric].
                attribute_specification,
                f"Attribute specification must be 100% ({attribute_metric})")
            self.assertEqual(
                1.0,
                result.attribute_details[attribute_metric].attribute_integrity,
                f"Attribute integrity must be 100% ({attribute_metric})")
示例#9
0
    def test_inspect_with_min_max_range_expectation(self):
        # arrange
        schema = DataLoader.load_schema_with_name("schema_with_min_max.json")

        samples = [
            {
                'random_integer': 3
            },
            {
                'random_integer': 11
            },
            {
                'random_integer': 3
            },
            {
                'random_integer': 8
            },
            {
                'random_integer': 3
            },
            {
                'random_integer': -5
            },
            {
                'random_integer': 3
            },
            {
                'random_integer': 10
            },
        ]

        # act
        metrics = self.inspector.inspect(
            samples, SchemaDefinition.create(schema, False))

        # assert
        self.assertEqual((6 / 8), metrics.attribute_integrity,
                         f"Attribute integrity must be {(6 / 8) * 100}%")