def test_serialize_read_message(self): _, messages = self._get_example_messages() msg = messages[0] buf = msg.serialize() restored = pa.read_message(buf) restored2 = pa.read_message(pa.BufferReader(buf)) restored3 = pa.read_message(buf.to_pybytes()) assert msg.equals(restored) assert msg.equals(restored2) assert msg.equals(restored3)
def test_message_read_from_compressed(example_messages): # Part of ARROW-5910 _, messages = example_messages for message in messages: raw_out = pa.BufferOutputStream() with pa.output_stream(raw_out, compression='gzip') as compressed_out: message.serialize_to(compressed_out) compressed_buf = raw_out.getvalue() result = pa.read_message(pa.input_stream(compressed_buf, compression='gzip')) assert result.equals(message)
def _load_data(buf, schema): """ Load a `pandas.DataFrame` from a buffer written to shared memory Parameters ---------- buf : pyarrow.Buffer shcema : pyarrow.Schema Returns ------- df : pandas.DataFrame """ import pyarrow as pa message = pa.read_message(buf) rb = pa.read_record_batch(message, schema) return rb.to_pandas()
def test_message_serialize_read_message(example_messages): _, messages = example_messages msg = messages[0] buf = msg.serialize() reader = pa.BufferReader(buf.to_pybytes() * 2) restored = pa.read_message(buf) restored2 = pa.read_message(reader) restored3 = pa.read_message(buf.to_pybytes()) restored4 = pa.read_message(reader) assert msg.equals(restored) assert msg.equals(restored2) assert msg.equals(restored3) assert msg.equals(restored4) with pytest.raises(pa.ArrowInvalid, match="Corrupted message"): pa.read_message(pa.BufferReader(b'ab')) with pytest.raises(EOFError): pa.read_message(reader)
def _load_data(buf, schema, tdf=None): """ Load a `pandas.DataFrame` from a buffer written to shared memory Parameters ---------- buf : pyarrow.Buffer shcema : pyarrow.Schema tdf(optional) : TDataFrame Returns ------- df : pandas.DataFrame """ message = pa.read_message(buf) rb = pa.read_record_batch(message, schema) df = rb.to_pandas() df.set_tdf = MethodType(set_tdf, df) df.get_tdf = MethodType(get_tdf, df) df.set_tdf(tdf) return df