示例#1
0
    def test_transform_tree(self):
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr = TreeRow(input_data_1)
        self.assertEqual(tr.transform_tree(input_data_1, fork_1, 'numpy'),
                         output_data_1_exp)

        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_2)
            tr.transform_tree(input_data_2, fork_2, 'numpy')

        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_3)
            tr.transform_tree(input_data_3, fork_3, 'numpy')
示例#2
0
 def test_get_python_type(self):
     dtp = DateDataType(resolution='Y', format_string="%Y")
     self.assertEqual(type(dtp.get_python_type()("2018")), datetime)
     dtp = DateDataType(resolution='M', format_string="%Y")
     self.assertEqual(type(dtp.get_python_type()("2018")), datetime)
     dtp = DateDataType(format_string="%Y")
     self.assertEqual(type(dtp.get_python_type()("2018")), datetime)
示例#3
0
 def test_get_numpy_type(self):
     dtp = DateDataType(resolution='Y')
     self.assertEqual(dtp.get_numpy_type(), np.dtype('<M8[Y]'))
     dtp = DateDataType(resolution='M')
     self.assertEqual(dtp.get_numpy_type(), np.dtype('<M8[M]'))
     dtp = DateDataType()
     self.assertEqual(dtp.get_numpy_type(), np.dtype('<M8[s]'))
示例#4
0
 def test_build_python_value(self):
     dtp = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     self.assertEqual(dtp.build_python_value("2018-04-01"),
                      datetime.strptime("2018-04-01", "%Y-%m-%d"))
     dtp = DateDataType(resolution='M', format_string="%Y-%m")
     self.assertEqual(dtp.build_python_value("2018-04"),
                      datetime.strptime("2018-04", "%Y-%m"))
     dtp = DateDataType(format_string="%Y")
     self.assertEqual(dtp.build_python_value("2018"),
                      datetime.strptime("2018", "%Y"))
示例#5
0
 def test_build_numpy_value(self):
     dtp = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     self.assertEqual(dtp.build_numpy_value("2018-04-01"),
                      np.datetime64("2018"))
     dtp = DateDataType(resolution='M', format_string="%Y-%m-%d")
     self.assertEqual(dtp.build_numpy_value("2018-04-01"),
                      np.datetime64("2018-04"))
     dtp = DateDataType(format_string="%Y-%m-%d")
     self.assertEqual(dtp.build_numpy_value("2018-04-01"),
                      np.datetime64("2018-04-01"))
示例#6
0
 def test_get_python_type(self):
     dtp = ArrayDataType(element_data_type=FloatDataType())
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ArrayDataType(element_data_type=StringDataType())
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ArrayDataType(element_data_type=DateDataType())
     self.assertEqual(dtp.get_python_type(), list)
示例#7
0
 def test_get_numpy_type(self):
     dtp = ArrayDataType(element_data_type=FloatDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ArrayDataType(element_data_type=StringDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ArrayDataType(element_data_type=DateDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
示例#8
0
 def test_get_numpy_type(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ListDataType(element_data_types=[StringDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ListDataType(element_data_types=[DateDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
示例#9
0
 def test_get_python_type(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ListDataType(element_data_types=[StringDataType()])
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ListDataType(element_data_types=[DateDataType()])
     self.assertEqual(dtp.get_python_type(), list)
示例#10
0
 def test__datetime_format(self):
     self.assertEqual(
         DateDataType(format_string="%Y")._datetime_format("2018"),
         datetime(2018, 1, 1))
     self.assertEqual(
         DateDataType(format_string="%Y-%m")._datetime_format("2018-03"),
         datetime(2018, 3, 1))
     self.assertEqual(
         DateDataType(
             format_string="%Y-%m-%d")._datetime_format("2018-03-29"),
         datetime(2018, 3, 29))
     self.assertEqual(
         DateDataType(
             format_string="%Y-%m-%d %H")._datetime_format("2018-03-29 18"),
         datetime(2018, 3, 29, 18))
     self.assertEqual(
         DateDataType(format_string="%Y-%m-%d %H:%M")._datetime_format(
             "2018-03-29 18:36"), datetime(2018, 3, 29, 18, 36))
     self.assertEqual(
         DateDataType(format_string="%Y-%m-%d %H:%M:%S")._datetime_format(
             "2018-03-29 18:36:59"), datetime(2018, 3, 29, 18, 36, 59))
     self.assertEqual(
         DateDataType(format_string="%Y-%m-%d %H:%M:%S.%f").
         _datetime_format("2018-03-29 18:36:59.967344"),
         datetime(2018, 3, 29, 18, 36, 59, 967344))
示例#11
0
    def test__transform_child_value(self):
        # Case 1
        value1 = '120.28'
        leaf1 = ChildNode('case1', FloatDataType())

        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'numpy'))
        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value1, leaf1, 'no')

        # Case 2
        value2 = 40
        leaf2 = ChildNode('case2', StringDataType())

        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'numpy'))
        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value2, leaf2, 'no')

        # Case 3
        value3 = '2018-01-04'
        leaf3 = ChildNode(
            'case3', DateDataType(resolution='D', format_string="%Y-%m-%d"))

        self.assertEqual(
            np.datetime64(value3),
            TreeRow._transform_child_value(value3, leaf3, 'numpy'))
        self.assertEqual(
            datetime.strptime(value3, "%Y-%m-%d"),
            TreeRow._transform_child_value(value3, leaf3, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value3, leaf3, 'no')

        # Case 4
        value4 = None

        self.assertTrue(
            np.isnan(TreeRow._transform_child_value(value4, leaf1, 'numpy')))
        self.assertTrue(
            TreeRow._transform_child_value(value4, leaf1, 'python') is None)
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'numpy'), 'nan')
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'python'), 'None')
        self.assertTrue(
            np.isnat(TreeRow._transform_child_value(value4, leaf3, 'numpy')))
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf3, 'python'), '')
示例#12
0
 def test_eq(self):
     dtp1 = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     dtp2 = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     self.assertEqual(dtp1, dtp2)
     dtp1 = DateDataType(resolution='Y', format_string="%Y-%m-%d %H")
     dtp2 = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     self.assertEqual(dtp1, dtp2)
     dtp1 = DateDataType(resolution='D', format_string="%Y-%m-%d %H")
     dtp2 = DateDataType(resolution='D', format_string="%Y-%m-%d")
     self.assertEqual(dtp1, dtp2)
     dtp1 = DateDataType(resolution='D', format_string="%Y-%m-%d %H")
     dtp2 = DateDataType(resolution='Y', format_string="%Y-%m-%d")
     self.assertNotEqual(dtp1, dtp2)
示例#13
0
    def get_data_types():
        dt = DataType(numpy_dtype='<i8',
                      python_dtype=int,
                      numpy_na_value=np.nan,
                      python_na_value=None)
        sdt = StringDataType()
        fdt = FloatDataType()
        ddt_d = DateDataType(resolution='D')
        ddt_s = DateDataType(resolution='s')
        adt_f = ArrayDataType(element_data_type=FloatDataType())
        adt_s = ArrayDataType(element_data_type=StringDataType())
        ldt_fsd = ListDataType(element_data_types=[
            FloatDataType(), StringDataType(),
            DateDataType()
        ])
        ldt_ssd = ListDataType(element_data_types=[
            StringDataType(),
            StringDataType(),
            DateDataType()
        ])

        return dt, sdt, fdt, ddt_d, ddt_s, adt_f, adt_s, ldt_fsd, ldt_ssd
示例#14
0
    def test__assert_transformation_possible(self):
        fork1 = ForkNode('base', [
            ChildNode('c1', StringDataType()),
            ChildNode('c2', FloatDataType()),
            ForkNode('f1', [ChildNode('c2', DateDataType())])
        ])

        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c1', 'c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['f1', 'c1', 'c2'], fork1)

        TreeRow._assert_transformation_possible(['c1'], fork1)
        TreeRow._assert_transformation_possible(['c1', 'f1'], fork1)
示例#15
0
 def test__get_numpy_dtypes(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8')])
     dtp = ListDataType(element_data_types=[
         FloatDataType(),
         ArrayDataType(element_data_type=StringDataType())
     ])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8'),
                                                ('1', np.ndarray)])
     dtp = ListDataType(element_data_types=[
         FloatDataType(),
         ArrayDataType(element_data_type=StringDataType()),
         DateDataType(resolution='M')
     ])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8'),
                                                ('1', np.ndarray),
                                                ('2', '<M8[M]')])
示例#16
0
 def test_set_schema(self):
     tr = TreeRow({'foo': "2018-01-01"})
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             StringDataType))
     new_schema = TreeSchema(base_fork_node=ForkNode(
         name='base',
         children=[
             ChildNode(name='foo',
                       data_type=DateDataType(resolution='D',
                                              format_string="%Y-%m-%d"))
         ]))
     tr.set_schema(new_schema)
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             DateDataType))
示例#17
0
    def test_get_schema(self):
        tr = TreeRow({'foo': "2018-01-01"})
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertTrue(
            "foo" in tr.get_schema().base_fork_node.get_children_names())

        new_schema = TreeSchema(base_fork_node=ForkNode(
            name='base',
            children=[
                ChildNode(name='foo-new',
                          data_type=DateDataType(resolution='D',
                                                 format_string="%Y-%m-%d"))
            ]))
        tr.set_schema(new_schema)
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertNotIn("foo",
                         tr.get_schema().base_fork_node.get_children_names())
        self.assertIn("foo-new",
                      tr.get_schema().base_fork_node.get_children_names())
        self.assertEqual(tr.get_schema(), new_schema)
示例#18
0
    def test__get_tree_row(self):
        data = self.get_json_data_same_schema()[0]

        # Case 1: Dictionary + no schema
        expected_schema = self.get_schema_for_json_data_same_schema()
        tr = TreeDataSet._get_tree_row(input_row=data,
                                       schema=None,
                                       method='numpy')
        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 2: Dictionary + single schema
        expected_schema = self.get_schema_for_json_data_same_schema()
        expected_schema = expected_schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        expected_schema = expected_schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = tr.get_schema()
        schema = schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))

        tr = TreeDataSet._get_tree_row(input_row=data,
                                       schema=schema,
                                       method='numpy')

        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 3: TreeRow + no schema
        tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy')
        expected_schema = self.get_schema_for_json_data_same_schema()
        tr = TreeDataSet._get_tree_row(input_row=tr,
                                       schema=None,
                                       method='numpy')
        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 4: TreeRow + schema
        tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy')

        expected_schema = self.get_schema_for_json_data_same_schema()
        expected_schema = expected_schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        expected_schema = expected_schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = tr.get_schema()
        schema = schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))

        tr = TreeDataSet._get_tree_row(input_row=tr,
                                       schema=schema,
                                       method='numpy')

        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)
示例#19
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')
示例#20
0
 def test_is_nullable(self):
     dtp = DateDataType(nullable=False)
     self.assertFalse(dtp.is_nullable())
     dtp = DateDataType(nullable=True)
     self.assertTrue(dtp.is_nullable())