Exemplo n.º 1
0
def test_file_reader_writer():
    data = [
        pa.array([1, 2, 3, 4]),
        pa.array(['foo', 'bar', 'baz', None]),
        pa.array([True, None, False, True])
    ]
    batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])

    sink = pa.BufferOutputStream()

    with pytest.warns(FutureWarning):
        stream_writer = pa.StreamWriter(sink, batch.schema)
        assert isinstance(stream_writer, pa.RecordBatchStreamWriter)

    sink2 = pa.BufferOutputStream()
    with pytest.warns(FutureWarning):
        file_writer = pa.FileWriter(sink2, batch.schema)
        assert isinstance(file_writer, pa.RecordBatchFileWriter)

    file_writer.write_batch(batch)
    stream_writer.write_batch(batch)

    file_writer.close()
    stream_writer.close()

    buf = sink.get_result()
    buf2 = sink2.get_result()

    with pytest.warns(FutureWarning):
        stream_reader = pa.StreamReader(buf)
        assert isinstance(stream_reader, pa.RecordBatchStreamReader)

    with pytest.warns(FutureWarning):
        file_reader = pa.FileReader(buf2)
        assert isinstance(file_reader, pa.RecordBatchFileReader)
Exemplo n.º 2
0
    def test_read_all(self):
        batches = self.write_batches()
        file_contents = self._get_source()

        reader = pa.FileReader(file_contents)

        result = reader.read_all()
        expected = pa.Table.from_batches(batches)
        assert result.equals(expected)
Exemplo n.º 3
0
    def test_simple_roundtrip(self):
        batches = self.write_batches()
        file_contents = self._get_source()

        reader = pa.FileReader(file_contents)

        assert reader.num_record_batches == len(batches)

        for i, batch in enumerate(batches):
            # it works. Must convert back to DataFrame
            batch = reader.get_batch(i)
            assert batches[i].equals(batch)
Exemplo n.º 4
0
def read_file(source):
    reader = pa.FileReader(source)
    return [reader.get_batch(i)
            for i in range(reader.num_record_batches)]