示例#1
0
 def java_to_python_record_batch(self, root):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
     c_array = ffi.new("struct ArrowArray*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     self.java_c.Data.exportVectorSchemaRoot(
         self.java_allocator, root, None,
         self.java_c.ArrowArray.wrap(ptr_array),
         self.java_c.ArrowSchema.wrap(ptr_schema))
     return pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
示例#2
0
 def java_to_python_array(self, vector, dictionary_provider=None):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
     c_array = ffi.new("struct ArrowArray*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
     self.java_c.Data.exportVector(self.java_allocator, vector,
                                   dictionary_provider,
                                   self.java_c.ArrowArray.wrap(ptr_array),
                                   self.java_c.ArrowSchema.wrap(ptr_schema))
     return pa.Array._import_from_c(ptr_array, ptr_schema)
示例#3
0
def check_export_import_batch(batch_factory):
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
    c_array = ffi.new("struct ArrowArray*")
    ptr_array = int(ffi.cast("uintptr_t", c_array))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    # Schema is known up front
    batch = batch_factory()
    schema = batch.schema
    py_value = batch.to_pydict()
    batch._export_to_c(ptr_array)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    del batch
    batch_new = pa.RecordBatch._import_from_c(ptr_array, schema)
    assert batch_new.to_pydict() == py_value
    assert batch_new.schema == schema
    assert pa.total_allocated_bytes() > old_allocated
    del batch_new, schema
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_array_released:
        pa.RecordBatch._import_from_c(ptr_array, make_schema())

    # Type is exported and imported at the same time
    batch = batch_factory()
    py_value = batch.to_pydict()
    batch._export_to_c(ptr_array, ptr_schema)
    # Delete and recreate C++ objects from exported pointers
    del batch
    batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
    assert batch_new.to_pydict() == py_value
    assert batch_new.schema == batch_factory().schema
    assert pa.total_allocated_bytes() > old_allocated
    del batch_new
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)

    # Not a struct type
    pa.int32()._export_to_c(ptr_schema)
    batch_factory()._export_to_c(ptr_array)
    with pytest.raises(ValueError,
                       match="ArrowSchema describes non-struct type"):
        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
    # Now released
    with assert_schema_released:
        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
示例#4
0
def test_export_import_type():
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    typ = pa.list_(pa.int32())
    typ._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    del typ
    assert pa.total_allocated_bytes() > old_allocated
    typ_new = pa.DataType._import_from_c(ptr_schema)
    assert typ_new == pa.list_(pa.int32())
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.DataType._import_from_c(ptr_schema)

    # Invalid format string
    pa.int32()._export_to_c(ptr_schema)
    bad_format = ffi.new("char[]", b"zzz")
    c_schema.format = bad_format
    with pytest.raises(ValueError,
                       match="Invalid or unsupported format string"):
        pa.DataType._import_from_c(ptr_schema)
    # Now released
    with assert_schema_released:
        pa.DataType._import_from_c(ptr_schema)
示例#5
0
def test_imported_batch_reader_error():
    c_stream = ffi.new("struct ArrowArrayStream*")
    ptr_stream = int(ffi.cast("uintptr_t", c_stream))

    schema = pa.schema([('foo', pa.int32())])
    batches = [
        pa.record_batch([[1, 2, 3]], schema=schema),
        pa.record_batch([[4, 5, 6]], schema=schema)
    ]
    buf = make_serialized(schema, batches)

    # Open a corrupt/incomplete stream and export it
    reader = pa.ipc.open_stream(buf[:-16])
    reader._export_to_c(ptr_stream)
    del reader

    reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
    batch = reader_new.read_next_batch()
    assert batch == batches[0]
    with pytest.raises(OSError,
                       match="Expected to be able to read 16 bytes "
                       "for message body, got 8"):
        reader_new.read_next_batch()

    # Again, but call read_all()
    reader = pa.ipc.open_stream(buf[:-16])
    reader._export_to_c(ptr_stream)
    del reader

    reader_new = pa.RecordBatchReader._import_from_c(ptr_stream)
    with pytest.raises(OSError,
                       match="Expected to be able to read 16 bytes "
                       "for message body, got 8"):
        reader_new.read_all()
示例#6
0
def check_export_import_schema(schema_factory):
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    schema_factory()._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    schema_new = pa.Schema._import_from_c(ptr_schema)
    assert schema_new == schema_factory()
    assert pa.total_allocated_bytes() == old_allocated
    del schema_new
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.Schema._import_from_c(ptr_schema)

    # Not a struct type
    pa.int32()._export_to_c(ptr_schema)
    with pytest.raises(ValueError,
                       match="ArrowSchema describes non-struct type"):
        pa.Schema._import_from_c(ptr_schema)
    # Now released
    with assert_schema_released:
        pa.Schema._import_from_c(ptr_schema)
示例#7
0
def test_export_import_array():
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
    c_array = ffi.new("struct ArrowArray*")
    ptr_array = int(ffi.cast("uintptr_t", c_array))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    # Type is known up front
    typ = pa.list_(pa.int32())
    arr = pa.array([[1], [2, 42]], type=typ)
    py_value = arr.to_pylist()
    arr._export_to_c(ptr_array)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete recreate C++ object from exported pointer
    del arr
    arr_new = pa.Array._import_from_c(ptr_array, typ)
    assert arr_new.to_pylist() == py_value
    assert arr_new.type == pa.list_(pa.int32())
    assert pa.total_allocated_bytes() > old_allocated
    del arr_new, typ
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_array_released:
        pa.Array._import_from_c(ptr_array, pa.list_(pa.int32()))

    # Type is exported and imported at the same time
    arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
    py_value = arr.to_pylist()
    arr._export_to_c(ptr_array, ptr_schema)
    # Delete and recreate C++ objects from exported pointers
    del arr
    arr_new = pa.Array._import_from_c(ptr_array, ptr_schema)
    assert arr_new.to_pylist() == py_value
    assert arr_new.type == pa.list_(pa.int32())
    assert pa.total_allocated_bytes() > old_allocated
    del arr_new
    assert pa.total_allocated_bytes() == old_allocated
    # Now released
    with assert_schema_released:
        pa.Array._import_from_c(ptr_array, ptr_schema)
示例#8
0
文件: data.py 项目: ShvetsKS/xgboost
    def _next(data_handle: int) -> int:
        from pyarrow.cffi import ffi

        try:
            batch = next(data_iter)
            c_schemas.append(ffi.new("struct ArrowSchema*"))
            c_arrays.append(ffi.new("struct ArrowArray*"))
            ptr_schema = int(ffi.cast("uintptr_t", c_schemas[-1]))
            ptr_array = int(ffi.cast("uintptr_t", c_arrays[-1]))
            # pylint: disable=protected-access
            batch._export_to_c(ptr_array, ptr_schema)
            _check_call(
                _LIB.XGImportArrowRecordBatch(
                    ctypes.c_void_p(data_handle),
                    ctypes.c_void_p(ptr_array),
                    ctypes.c_void_p(ptr_schema),
                ))
            return 1
        except StopIteration:
            return 0
示例#9
0
def test_export_import_schema_float_pointer():
    # Previous versions of the R Arrow library used to pass pointer
    # values as a double.
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    match = "Passing a pointer value as a float is unsafe"
    with pytest.warns(UserWarning, match=match):
        make_schema()._export_to_c(float(ptr_schema))
    with pytest.warns(UserWarning, match=match):
        schema_new = pa.Schema._import_from_c(float(ptr_schema))
    assert schema_new == make_schema()
示例#10
0
def test_export_import_batch_reader(reader_factory):
    c_stream = ffi.new("struct ArrowArrayStream*")
    ptr_stream = int(ffi.cast("uintptr_t", c_stream))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    _export_import_batch_reader(ptr_stream, reader_factory)

    assert pa.total_allocated_bytes() == old_allocated

    # Now released
    with assert_stream_released:
        pa.RecordBatchReader._import_from_c(ptr_stream)
示例#11
0
    def test_batch_roundtrip(self):
        with self.assert_pyarrow_memory_released():
            # make sure that Python -> Go -> Python for record
            # batches works correctly and gets the same data in the end
            schema = self.make_schema()
            batch = self.make_batch()
            schema._export_to_c(self.ptr_schema)
            batch._export_to_c(self.ptr_array)
            del schema
            del batch

            c_schema = ffi.new("struct ArrowSchema*")
            c_batch = ffi.new("struct ArrowArray*")
            ptr_schema = int(ffi.cast("uintptr_t", c_schema))
            ptr_batch = int(ffi.cast("uintptr_t", c_batch))

            cgotest.importThenExportRecord(self.ptr_schema, self.ptr_array,
                                           ptr_schema, ptr_batch)
            batch_new = pa.RecordBatch._import_from_c(ptr_batch, ptr_schema)
            assert batch_new == self.make_batch()
            del batch_new
            del c_schema
            del c_batch
示例#12
0
    def test_schema_roundtrip(self):
        with self.assert_pyarrow_memory_released():
            # make sure that Python -> Go -> Python ends up with
            # the same exact schema
            schema = self.make_schema()
            schema._export_to_c(self.ptr_schema)
            del schema

            c_schema = ffi.new("struct ArrowSchema*")
            ptr_schema = int(ffi.cast("uintptr_t", c_schema))

            cgotest.importThenExportSchema(self.ptr_schema, ptr_schema)
            schema_new = pa.Schema._import_from_c(ptr_schema)
            assert schema_new == self.make_schema()
            del c_schema
示例#13
0
def test_export_import_field():
    c_schema = ffi.new("struct ArrowSchema*")
    ptr_schema = int(ffi.cast("uintptr_t", c_schema))

    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
    old_allocated = pa.total_allocated_bytes()

    field = pa.field("test", pa.list_(pa.int32()), nullable=True)
    field._export_to_c(ptr_schema)
    assert pa.total_allocated_bytes() > old_allocated
    # Delete and recreate C++ object from exported pointer
    del field
    assert pa.total_allocated_bytes() > old_allocated

    field_new = pa.Field._import_from_c(ptr_schema)
    assert field_new == pa.field("test", pa.list_(pa.int32()), nullable=True)
    assert pa.total_allocated_bytes() == old_allocated

    # Now released
    with assert_schema_released:
        pa.Field._import_from_c(ptr_schema)
示例#14
0
 def java_to_python_field(self, jfield):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
     self.java_c.Data.exportField(self.java_allocator, jfield, None,
                                  self.java_c.ArrowSchema.wrap(ptr_schema))
     return pa.Field._import_from_c(ptr_schema)
示例#15
0
 def setUp(self):
     self.c_schema = ffi.new("struct ArrowSchema*")
     self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema))
     self.c_array = ffi.new("struct ArrowArray*")
     self.ptr_array = int(ffi.cast("uintptr_t", self.c_array))