Exemplo n.º 1
0
    def test_build_row(self):
        tr = TreeRow({'foo': 12})
        self.assertTrue(tr.row is None)
        tr.build_row({'foo': 1}, method='numpy')
        self.assertTrue(tr.row is not None)
        self.assertEqual(tr.row, {'foo': 1})

        tr = TreeRow({'foo': 12})
        self.assertTrue(tr.row is None)
        tr.build_row({'foo': 1}, method='python')
        self.assertTrue(tr.row is not None)
        self.assertEqual(tr.row, {'foo': 1})

        tr = TreeRow({'foo': 12})
        self.assertTrue(tr.row is None)
        with self.assertRaises(RuntimeError):
            tr.build_row({'foo': 1}, method='no')
Exemplo n.º 2
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')