Пример #1
0
    def test_null_repeated(self):
        data = [
            {
                "v": None
            },  # Since v is REPEATED, WE MUST ASSUME IT IS []
            {
                "v": []
            },
            {
                "v": [None]
            },
            {
                "v": [None, None]
            }
        ]

        expected_values = {"v": []}
        expected_reps = {"v": [0, 0, 0, 0, 1]}
        expected_defs = {"v": [0, 0, 1, 1, 1]}

        schema = SchemaTree(locked=True)
        schema.add("v", (REPEATED, OPTIONAL), object)

        table = rows_to_columns(data, schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)

        nature = {".": REPEATED, "v": REPEATED}
Пример #2
0
    def _run_test(self, config):
        """
        :param config: list of {name: nature} objects
        :return: test function
        """
        generator = make_const
        for c in reversed(config):
            for name, rep_type in c.items()[:1]:
                generator = rep_type_to_generator[rep_type](name, generator)

        schema = SchemaTree(locked=True)
        path = []
        for c in config:
            for name, rep_type in c.items()[:1]:
                path.append(name)
                schema.add(join_field(path), rep_type, int)

        # THESE TESTS ASSUME ONLY ONE LEAF
        full_name = join_field([name for c in config for name, rep_type in c.items()[:1]])

        data, values, rep_level, def_level = zip(*list(generator()))
        expected_values = {full_name: sum(values, [])}
        expected_reps = {full_name: sum(rep_level, [])}
        expected_defs = {full_name: sum(def_level, [])}

        table = rows_to_columns(list(data), schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)
Пример #3
0
    def test_repeated_repeated(self):
        data, values, rep_level, def_level = zip(*list(make_repeated("a", make_repeated("b", make_const))()))
        expected_values = {"a.b": sum(values, [])}
        expected_reps = {"a.b": sum(rep_level, [])}
        expected_defs = {"a.b": sum(def_level, [])}

        schema=SchemaTree()
        schema.add("a", REPEATED, object)
        schema.add("a.b", REPEATED, int)
        table = rows_to_columns(list(data), schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)
Пример #4
0
    def test_null_required(self):

        good_data = [{"v": "legit value"}]
        bad_data = [{"v": None}, {"v": []}, {"v": [None]}, {"v": [None, None]}]

        expected_values = {"v": ["legit value"]}
        expected_reps = {"v": [0]}
        expected_defs = {"v": [0]}

        schema = SchemaTree(locked=True)
        schema.add("v", REQUIRED, text_type)
        table = rows_to_columns(good_data, schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)

        for b in bad_data:
            self.assertRaises(Exception, rows_to_columns, [b], schema)
Пример #5
0
    def test_dremel_rep_values(self):
        expected_values = {
            "DocId": [10, 20],
            "Name.Url": ["http://A", "http://B", "http://C"],
            "Links.Forward": [20, 40, 60, 80],
            "Links.Backward": [10, 30],
            "Name.Language.Code": ["en-us", "en", "en-gb"],
            "Name.Language.Country": ["us", "gb"]
        }
        expected_reps = {
            "DocId": [0, 0],
            "Name.Url": [0, 1, 1, 0],
            "Links.Forward": [0, 1, 1, 0],
            "Links.Backward": [0, 0, 1],
            "Name.Language.Code": [0, 2, 1, 1, 0],
            "Name.Language.Country": [0, 2, 1, 1, 0]
        }
        expected_defs = {
            "DocId": [0, 0],
            "Name.Url": [2, 2, 1, 2],
            "Links.Forward": [2, 2, 2, 2],
            "Links.Backward": [1, 2, 2],
            "Name.Language.Code": [2, 2, 1, 2, 1],
            "Name.Language.Country": [3, 2, 1, 3, 1]
        }

        schema = SchemaTree(locked=True)
        schema.add("DocId", REQUIRED, int)
        schema.add("Name", REPEATED, object)
        schema.add("Name.Url", OPTIONAL, text_type)
        schema.add("Links", OPTIONAL, object)
        schema.add("Links.Forward", REPEATED, int)
        schema.add("Links.Backward", REPEATED, int)
        schema.add("Name.Language", REPEATED, object)
        schema.add("Name.Language.Code", REQUIRED, text_type)
        schema.add("Name.Language.Country", OPTIONAL, text_type)

        table = rows_to_columns(DREMEL_DATA, schema=schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)
Пример #6
0
    def test_optional_required_repeated(self):
        data = [{}, {
            "a": {
                "b": {
                    "c": 1
                }
            }
        }, {
            "a": {
                "b": {
                    "c": 2,
                    "d": [3]
                }
            }
        }, {
            "a": {
                "b": {
                    "c": 4,
                    "d": [5, 6]
                }
            }
        }]

        expected_values = {"a.b.c": [1, 2, 4], "a.b.d": [3, 5, 6]}

        expected_reps = {"a.b.c": [0, 0, 0, 0], "a.b.d": [0, 0, 0, 0, 1]}

        expected_defs = {"a.b.c": [0, 1, 1, 1], "a.b.d": [0, 1, 2, 2, 2]}

        schema = SchemaTree(locked=True)
        schema.add("a", OPTIONAL, object)
        schema.add("a.b", REQUIRED, object)
        schema.add("a.b.c", REQUIRED, int)
        schema.add("a.b.d", REPEATED, int)

        table = rows_to_columns(data, schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)
Пример #7
0
    def test_classic_nested(self):
        data = [{
            "a": "value0"
        }, {
            "a": "value1",
            "b": [{
                "c": -1,
                "d": 0
            }]
        }, {
            "a": "value2",
            "b": [{
                "c": 1,
                "d": 2
            }, {
                "c": 3,
                "d": 4
            }]
        }, {
            "a":
            "value3",
            "b": [{
                "c": 5,
                "d": 6
            }, {
                "c": 7
            }, {
                "e": [{
                    "g": 1
                }, {
                    "g": 2
                }]
            }, {
                "c": 9,
                "d": 10
            }]
        }]

        expected_values = {
            "a": ["value0", "value1", "value2", "value3"],
            "b.c": [-1, 1, 3, 5, 7, 9],
            "b.d": [0, 2, 4, 6, 10],
            "b.e.g": [1, 2]
        }

        expected_reps = {
            "a": [0, 0, 0, 0],
            "b.c": [0, 0, 0, 1, 0, 1, 1, 1],
            "b.d": [0, 0, 0, 1, 0, 1, 1, 1],
            "b.e.g": [0, 0, 0, 1, 0, 1, 1, 2, 1]
        }

        expected_defs = {
            "a": [0, 0, 0, 0],
            "b.c": [0, 2, 2, 2, 2, 2, 1, 2],
            "b.d": [0, 2, 2, 2, 2, 1, 1, 2],
            "b.e.g": [0, 1, 1, 1, 1, 1, 2, 2, 1]
        }

        schema = SchemaTree(locked=True)
        schema.add("a", REQUIRED, text_type)
        schema.add("b", REPEATED, object)
        schema.add("b.c", OPTIONAL, int)
        schema.add("b.d", OPTIONAL, int)
        schema.add("b.e", REPEATED, object)
        schema.add("b.e.g", REQUIRED, int)

        table = rows_to_columns(data, schema)
        self.assertEqual(table.values, expected_values)
        self.assertEqual(table.reps, expected_reps)
        self.assertEqual(table.defs, expected_defs)