def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int_", data=[1, -1]), long_col(name="Long_", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float_", data=[1.01, -1.01]), double_col(name="Double_", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols)
def setUp(self): j_array_list1 = j_array_list([1, -1]) j_array_list2 = j_array_list([2, -2]) input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[j_array_list1, j_array_list2]), ] self.test_table = new_table(cols=input_cols) self.np_array_dict = { 'Boolean': np.array([True, False]), 'Byte': np.array([1, -1], dtype=np.int8), 'Char': np.array('-1', dtype=np.int16), 'Short': np.array([1, -1], dtype=np.int16), 'Int': np.array([1, -1], dtype=np.int32), 'Long': np.array([1, NULL_LONG], dtype=np.int64), "NPLong": np.array([1, -1], dtype=np.int8), "Float": np.array([1.01, -1.01], dtype=np.float32), "Double": np.array([1.01, -1.01]), "String": np.array(["foo", "bar"], dtype=np.string_), "Datetime": np.array([1, -1], dtype=np.dtype("datetime64[ns]")), "PyObj": np.array([CustomClass(1, "1"), CustomClass(-1, "-1")]), "PyObj1": np.array([[1, 2, 3], CustomClass(-1, "-1")], dtype=np.object_), "PyObj2": np.array([False, 'False'], dtype=np.object_), "JObj": np.array([j_array_list1, j_array_list2]), }
def test_to_numpy_remap(self): for col in self.test_table.columns: with self.subTest(f"test single column to numpy - {col.name}"): np_array = to_numpy(self.test_table, [col.name]) self.assertEqual((2, 1), np_array.shape) try: to_numpy(self.test_table, [col.name for col in self.test_table.columns]) except DHError as e: self.assertIn("same data type", e.root_cause) with self.subTest("test multi-columns to numpy"): input_cols = [ long_col(name="Long", data=[101, -101]), long_col(name="Long1", data=[11011, -11011]), long_col(name="Long2", data=[NULL_LONG, -1110111]), long_col(name="Long3", data=[111101111, -111101111]), long_col(name="Long4", data=[11111011111, MAX_LONG]) ] tmp_table = new_table(cols=input_cols) tmp_table = tmp_table.update(formulas=[ "Long2 = isNull(Long2) ? Double.NaN : Long2", "Long4 = (double)Long4" ]) np_array = to_numpy(tmp_table, ['Long2', 'Long4']) self.assertEqual((2, 2), np_array.shape) self.assertEqual(np_array.dtype, float) tmp_table2 = to_table(np_array, ['Long2', 'Long4']) self.assert_table_equals(tmp_table2, tmp_table.select(['Long2', 'Long4']))
def test_new_table(self): jobj1 = JArrayList() jobj1.add(1) jobj1.add(-1) jobj2 = JArrayList() jobj2.add(2) jobj2.add(-2) cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, -1]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), string_col(name="String", data=["foo", "bar"]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), dtypes.DateTime(-1)]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), CustomClass(-1, "-1")]), pyobj_col(name="PyObj1", data=[[1, 2, 3], CustomClass(-1, "-1")]), pyobj_col(name="PyObj2", data=[False, 'False']), jobj_col(name="JObj", data=[jobj1, jobj2]), ] t = new_table(cols=cols) self.assertEqual(t.size, 2)
def test_to_table(self): input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, test_table)
def test_round_trip_with_nulls(self): # Note that no two-way conversion for those types # j_array_list = dtypes.ArrayList([1, -1]) # bool_col(name="Boolean", data=[True, None])] # string_col(name="String", data=["foo", None]), # jobj_col(name="JObj", data=[j_array_list, None]), input_cols = [ byte_col(name="Byte", data=(1, NULL_BYTE)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, NULL_SHORT]), int_col(name="Int_", data=[1, NULL_INT]), long_col(name="Long_", data=[1, NULL_LONG]), float_col(name="Float_", data=[1.01, np.nan]), double_col(name="Double_", data=[1.01, np.nan]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) self.assertEqual(len(df.columns), len(test_table.columns)) self.assertEqual(df.size, 2 * len(test_table.columns)) test_table2 = to_table(df) self.assert_table_equals(test_table2, test_table)
def test_column_error(self): jobj = j_array_list([1, -1]) with self.assertRaises(DHError) as cm: bool_input_col = bool_col(name="Boolean", data=[True, 'abc']) self.assertNotIn("bool_input_col", dir()) with self.assertRaises(DHError) as cm: _ = byte_col(name="Byte", data=[1, 'abc']) with self.assertRaises(DHError) as cm: _ = char_col(name="Char", data=[jobj]) with self.assertRaises(DHError) as cm: _ = short_col(name="Short", data=[1, 'abc']) with self.assertRaises(DHError) as cm: _ = int_col(name="Int", data=[1, [1, 2]]) with self.assertRaises(DHError) as cm: _ = long_col(name="Long", data=[1, float('inf')]) with self.assertRaises(DHError) as cm: _ = float_col(name="Float", data=[1.01, 'NaN']) with self.assertRaises(DHError) as cm: _ = double_col(name="Double", data=[1.01, jobj]) with self.assertRaises(DHError) as cm: _ = string_col(name="String", data=[1, -1.01]) with self.assertRaises(DHError) as cm: _ = datetime_col(name="Datetime", data=[dtypes.DateTime(round(time.time())), False]) with self.assertRaises(DHError) as cm: _ = jobj_col(name="JObj", data=[jobj, CustomClass(-1, "-1")])