def test_to_table_boolean_with_none(self): input_cols = [bool_col(name="Boolean", data=[True, None])] table_with_null_bool = new_table(cols=input_cols) prepared_table = table_with_null_bool.update(formulas=[ "Boolean = isNull(Boolean) ? NULL_BYTE : (Boolean == true ? 1: 0)" ]) df = to_pandas(prepared_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, prepared_table)
def test_to_table(self): input_cols = [ bool_col(name="Boolean", data=[True, False]), byte_col(name="Byte", data=(1, -1)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, -1]), int_col(name="Int", data=[1, -1]), long_col(name="Long", data=[1, NULL_LONG]), long_col(name="NPLong", data=np.array([1, -1], dtype=np.int8)), float_col(name="Float", data=[1.01, -1.01]), double_col(name="Double", data=[1.01, -1.01]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) table_from_df = to_table(df) self.assert_table_equals(table_from_df, test_table)
def test_to_table_datetime_with_none(self): datetime_str = "2021-12-10T23:59:59 NY" dt = to_datetime(datetime_str) datetime_str = "2021-12-10T23:59:59 HI" dt1 = to_datetime(datetime_str) input_cols = [ datetime_col(name="Datetime", data=[dtypes.DateTime(1), None, dt, dt1]) ] table_with_null_dt = new_table(cols=input_cols) df = to_pandas(table_with_null_dt) table_from_df = to_table(df) self.assert_table_equals(table_from_df, table_with_null_dt)
def test_round_trip_with_nulls(self): # Note that no two-way conversion for those types # j_array_list = dtypes.ArrayList([1, -1]) # bool_col(name="Boolean", data=[True, None])] # string_col(name="String", data=["foo", None]), # jobj_col(name="JObj", data=[j_array_list, None]), input_cols = [ byte_col(name="Byte", data=(1, NULL_BYTE)), char_col(name="Char", data='-1'), short_col(name="Short", data=[1, NULL_SHORT]), int_col(name="Int_", data=[1, NULL_INT]), long_col(name="Long_", data=[1, NULL_LONG]), float_col(name="Float_", data=[1.01, np.nan]), double_col(name="Double_", data=[1.01, np.nan]), datetime_col(name="Datetime", data=[dtypes.DateTime(1), None]), pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]), ] test_table = new_table(cols=input_cols) df = to_pandas(test_table) self.assertEqual(len(df.columns), len(test_table.columns)) self.assertEqual(df.size, 2 * len(test_table.columns)) test_table2 = to_table(df) self.assert_table_equals(test_table2, test_table)
def test_to_table_nullable(self): boolean_array = pd.array([True, False, None], dtype=pd.BooleanDtype()) int8_array = pd.array([1, 2, None], dtype=pd.Int8Dtype()) int16_array = pd.array([1, 2, None], dtype=pd.Int16Dtype()) int32_array = pd.array([1, 2, None], dtype=pd.Int32Dtype()) int64_array = pd.array([1, 2, None], dtype=pd.Int64Dtype()) float_array = pd.array([1.1, 2.2, None], dtype=pd.Float32Dtype()) double_array = pd.array([1.1, 2.2, None], dtype=pd.Float64Dtype()) string_array = pd.array(["s11", "s22", None], dtype=pd.StringDtype()) object_array = pd.array([pd.NA, "s22", None], dtype=object) df = pd.DataFrame({ "NullableBoolean": boolean_array, "NullableInt8": int8_array, "NullableInt16": int16_array, "NullableInt32": int32_array, "NullableInt64": int64_array, "NullableFloat": float_array, "NullableDouble": double_array, "NullableString": string_array, "NullableObject": object_array, }) table = to_table(df) self.assertIs(table.columns[0].data_type, dtypes.bool_) self.assertIs(table.columns[1].data_type, dtypes.int8) self.assertIs(table.columns[2].data_type, dtypes.int16) self.assertIs(table.columns[3].data_type, dtypes.int32) self.assertIs(table.columns[4].data_type, dtypes.int64) self.assertIs(table.columns[5].data_type, dtypes.float32) self.assertIs(table.columns[6].data_type, dtypes.double) self.assertIs(table.columns[7].data_type, dtypes.string) self.assertIs(table.columns[8].data_type, dtypes.PyObject) self.assertEqual(table.size, 3) table_string = table.to_string() self.assertEqual(9, table_string.count("null"))
def test_to_table_category(self): df = pd.DataFrame({"A": ["a", "b", "a", "d"]}) df["B"] = df["A"].astype("category") table = to_table(df) df2 = to_pandas(table) self.assertTrue(np.array_equal(df2["A"].values, df2["B"].values))
def to_bytes(self, exporter: Exporter, data_frame: DataFrame): exporter.reference(to_table(data_frame)) return b''