def load_buffer(handle, size): if find_library('c') is None: if platform.system() == "Windows": assert "IPC uses POSIX shared memory, which is not supported \ on Windows" else: # libc should be available by default on linux/darwin systems assert "ctypes.find_library('c') did not find libc, which is \ required for IPC" # OmniSci passes struct as bytes, convert to int shmkey = struct.unpack('<L', handle)[0] # Take key from OmniSci, get identifier of shared memory segment # If shmid is -1, an error has occured shmid = shmget(shmkey, size, 0) if shmid == -1: raise ValueError("Invalid shared memory key {}".format(shmkey)) # With id of shared memory segment, attach to Python process # None lets system choose suitable unused address ptr = shmat(shmid, None, 0) # With ptr as shared memory segment's start address, make arrow buffer pabuff = pa.foreign_buffer(ptr, size, None) # Return the ptr value as well as the buffer, as its too early here # to release shared memory from Python return (pabuff, ptr)
def to_pandas(self, schema_only=False): if schema_only: return pd.Series([], dtype=pd.StringDtype()) # Recover a contiguous bitmask new_self = self.repartition(1).as_string_column() rect = new_self.offsets.ispace.get_bounds() num_elements = rect.hi[0] - rect.lo[0] if num_elements <= 0: return pd.Series([], dtype=pd.StringDtype()) # Convert ranges back to offsets # XXX: We should keep this reference to the result of export_offsets # to avoid it being collected offsets = new_self.offsets.data.to_raw_address() offsets_size = new_self.offsets.dtype.itemsize * (num_elements + 1) chars = new_self.chars.data.to_raw_address() char_rect = new_self.chars.ispace.get_bounds() char_size = char_rect.hi[0] - char_rect.lo[0] + 1 bitmask_buf = None null_count = 0 if new_self.nullable: null_count = new_self.bitmask.count_nulls() if null_count > 0: bitmask = new_self.bitmask.compact_bitmask.to_raw_address() bitmask_size = (num_elements + 7) // 8 bitmask_buf = pa.foreign_buffer(bitmask, bitmask_size) offsets_buf = pa.foreign_buffer(offsets, offsets_size) chars_buf = pa.foreign_buffer(chars, char_size) array = pa.StringArray.from_buffers( num_elements, offsets_buf, chars_buf, bitmask_buf, null_count, ) return array.to_pandas().astype(pd.StringDtype())
def xnd_xnd_cuda(cbuf): """Return xnd.xnd view of a pyarrow.cuda.CudaBuffer [EXPERIMENTAL]. """ import xnd import pyarrow as pa #addr = cbuf.context.get_device_address(cbuf.address) # requires arrow>=0.12.1 addr = cbuf.address # device = cbuf.context.device_number buf = pa.foreign_buffer(addr, cbuf.size, cbuf) return xnd.xnd.from_buffer(buf)
def test_foreign_buffer(): obj = np.array([1, 2], dtype=np.int32) addr = obj.__array_interface__["data"][0] size = obj.nbytes buf = pa.foreign_buffer(addr, size, obj) wr = weakref.ref(obj) del obj assert np.frombuffer(buf, dtype=np.int32).tolist() == [1, 2] assert wr() is not None del buf assert wr() is None
def jvm_buffer(arrowbuf): """ Construct an Arrow buffer from io.netty.buffer.ArrowBuf Parameters ---------- arrowbuf: io.netty.buffer.ArrowBuf Arrow Buffer representation on the JVM Returns ------- pyarrow.Buffer Python Buffer that references the JVM memory """ address = arrowbuf.memoryAddress() size = arrowbuf.capacity() return pa.foreign_buffer(address, size, arrowbuf.asNettyBuffer())
def jvm_buffer(jvm_buf): """ Construct an Arrow buffer from org.apache.arrow.memory.ArrowBuf Parameters ---------- jvm_buf: org.apache.arrow.memory.ArrowBuf Arrow Buffer representation on the JVM. Returns ------- pyarrow.Buffer Python Buffer that references the JVM memory. """ nanny = _JvmBufferNanny(jvm_buf) address = jvm_buf.memoryAddress() size = jvm_buf.capacity() return pa.foreign_buffer(address, size, base=nanny)