def test_serialize_incremental_chunk_size_pandas_to_recarray(input_df_descr): if not is_test_data_serializable(input_df_descr): return df = _mixed_test_data()[input_df_descr][0] expectation = _mixed_test_data()[input_df_descr][1] if not isinstance(expectation, tuple) and issubclass( expectation, Exception): for div in (1, 4, 8): chunk_size = div * 8 * 1024**2 with pytest.raises(expectation): incr_ser = IncrementalPandasToRecArraySerializer( df_serializer, df, chunk_size=chunk_size) [chunk for chunk, _, _, _ in incr_ser.generator_bytes()] return for div in (1, 4, 8): chunk_size = div * 8 * 1024**2 if input_df_descr is not None and len(expectation) > 0: row_size = int(expectation[0].dtype.itemsize) chunk_size = NON_HOMOGENEOUS_DTYPE_PATCH_SIZE_ROWS * row_size / div incr_ser = IncrementalPandasToRecArraySerializer(df_serializer, df, chunk_size=chunk_size) chunk_bytes = [chunk for chunk, _, _, _ in incr_ser.generator_bytes()] matching = expectation[0].tostring() == b''.join(chunk_bytes) assert matching assert expectation[1] == incr_ser.dtype
def test_shape(input_df_descr): if not is_test_data_serializable(input_df_descr): return df = _mixed_test_data()[input_df_descr][0] expectation = _mixed_test_data()[input_df_descr][1] incr_ser = IncrementalPandasToRecArraySerializer(df_serializer, df, chunk_size=_CHUNK_SIZE) if not isinstance(expectation, tuple) and issubclass( expectation, Exception): with pytest.raises(expectation): [chunk for chunk, _, _, _ in incr_ser.shape] else: assert incr_ser.shape == expectation[0].shape
def test_serialize_pandas_to_recarray(input_df_descr): if not is_test_data_serializable(input_df_descr): return df = _mixed_test_data()[input_df_descr][0] expectation = _mixed_test_data()[input_df_descr][1] incr_ser = IncrementalPandasToRecArraySerializer(df_serializer, df, chunk_size=_CHUNK_SIZE) if not isinstance(expectation, tuple) and issubclass( expectation, Exception): with pytest.raises(expectation): [chunk for chunk, _, _, _ in incr_ser.generator_bytes()] else: incr_ser_data, incr_ser_dtype = incr_ser.serialize() matching = expectation[0].tostring() == incr_ser_data.tostring() assert matching assert expectation[1] == incr_ser_dtype