def assert_packed_frame_equality(df): pdf = df.to_pandas() packed = pack(df) del df tbl = unpack(packed) unpacked = DataFrame._from_data(tbl._data, tbl._index) assert_eq(unpacked, pdf)
def test_sizeof_packed_dataframe(): np.random.seed(0) df = DataFrame() nelem = 1000 df["keys"] = hkeys = np.arange(nelem, dtype=np.float64) df["vals"] = hvals = np.random.random(nelem) packed = pack(df) nbytes = hkeys.nbytes + hvals.nbytes sizeof = sys.getsizeof(packed) assert sizeof < nbytes serialized_nbytes = len( pickle.dumps(packed, protocol=pickle.HIGHEST_PROTOCOL)) # assert at least sizeof bytes were serialized assert serialized_nbytes >= sizeof
def check_packed_pickled_equality(df): # basic assert_packed_frame_picklable(df) # sliced assert_packed_frame_picklable(df[:-1]) assert_packed_frame_picklable(df[1:]) assert_packed_frame_picklable(df[2:-2]) # sorted sortvaldf = df.sort_values("vals") assert isinstance(sortvaldf.index, GenericIndex) assert_packed_frame_picklable(sortvaldf) # out-of-band if pickle.HIGHEST_PROTOCOL >= 5: buffers = [] serialbytes = pickle.dumps(pack(df), protocol=5, buffer_callback=buffers.append) for b in buffers: assert isinstance(b, pickle.PickleBuffer) tbl = unpack(pickle.loads(serialbytes, buffers=buffers)) loaded = DataFrame._from_data(tbl._data, tbl._index) assert_eq(loaded, df)
def assert_packed_frame_serializable(df): packed = pack(df) header, frames = packed.serialize() tbl = unpack(packed.deserialize(header, frames)) loaded = DataFrame._from_data(tbl._data, tbl._index) assert_eq(loaded, df)
def assert_packed_frame_picklable(df): serialbytes = pickle.dumps(pack(df)) tbl = unpack(pickle.loads(serialbytes)) loaded = DataFrame._from_data(tbl._data, tbl._index) assert_eq(loaded, df)
def assert_packed_frame_unique_pointers(df): unpacked = unpack(pack(df)) for col in df: if df._data[col].data: assert df._data[col].data.ptr != unpacked._data[col].data.ptr