def assert_eq(df, df_expected=None, raw=False): if df_expected is None: df_expected = df df_actual = deserialize_df(serialize_df(df)) if raw: assert df_expected.native == df_actual.native else: df_eq(df_expected, df_actual, throw=True)
def test_serialize_df(tmpdir): def assert_eq(df, df_expected=None, raw=False): if df_expected is None: df_expected = df df_actual = deserialize_df(serialize_df(df)) if raw: assert df_expected.native == df_actual.native else: df_eq(df_expected, df_actual, throw=True) fs = FileSystem() assert deserialize_df(serialize_df(None)) is None assert_eq(ArrayDataFrame([], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, None]], "a:int,b:int")) assert_eq(ArrayDataFrame([[None, "abc"]], "a:int,b:str")) assert_eq(ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True) assert_eq( IterableDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), ArrayDataFrame([[None, [1, 2], dict(x=1)]], "a:int,b:[int],c:{x:int}"), raw=True, ) assert_eq(PandasDataFrame([[None, None]], "a:int,b:int")) assert_eq(PandasDataFrame([[None, "abc"]], "a:int,b:str")) raises( InvalidOperationError, lambda: serialize_df(ArrayDataFrame([], "a:int,b:int"), 0), ) path = os.path.join(tmpdir, "1.pkl") df = ArrayDataFrame([[None, None]], "a:int,b:int") s = serialize_df(df, 0, path, fs) df_eq(df, deserialize_df(s, fs), throw=True) df_eq(df, deserialize_df(s), throw=True) s = serialize_df(df, 0, path) df_eq(df, deserialize_df(s), throw=True) raises(ValueError, lambda: deserialize_df('{"x":1}'))
def run(self, cursor: PartitionCursor, df: LocalDataFrame) -> LocalDataFrame: data = serialize_df(df, self.to_file_threshold, self.temp_path) row = cursor.key_value_array + [data] return ArrayDataFrame([row], self.output_schema)