def _initialize_read(self): from cudf.core.buffer import Buffer from cupy.cuda.memory import UnownedMemory self._offset = 0 self._has_read_headers = False self._buffers = [] headers, buffers = _id_to_buffers[self._object_id] self._headers = headers = headers.copy() buffer_types = [] for buf in buffers: if isinstance(buf, cupy.ndarray): ptr, size = buf.data.ptr, buf.size self._buffers.append( UnownedMemory(ptr, size, Buffer(ptr, size))) buffer_types.append(["cuda", size]) elif isinstance(buf, Buffer): ptr, size = buf.ptr, buf.size if size == 0: # empty buffer cannot construct a UnownedMemory self._buffers.append(None) else: self._buffers.append( UnownedMemory(ptr, size, Buffer(ptr, size))) buffer_types.append(["cuda", size]) else: size = getattr(buf, "size", len(buf)) self._buffers.append(buf) buffer_types.append(["memory", size]) headers["buffer_types"] = buffer_types
def _read_init(self): from cupy.cuda.memory import UnownedMemory ptr = self._object_id.ptrs[0] self._size = self._object_id.headers['size'] self._buffer = Buffer(ptr, self._size) self._cupy_memory = UnownedMemory(ptr, self._size, self._buffer)
def write(self, content): from cudf.core.buffer import Buffer from cupy.cuda import MemoryPointer from cupy.cuda.memory import UnownedMemory if not self._has_write_headers: self._headers = headers = pickle.loads(content) buffer_types = headers["buffer_types"] for buffer_type, size in buffer_types: if buffer_type == "cuda": self._buffers.append(Buffer.empty(size)) else: self._buffers.append(BytesIO()) self._has_write_headers = True return cur_buf = self._buffers[self._cur_buffer_index] cur_buf_size = self._headers["buffer_types"][self._cur_buffer_index][1] if isinstance(cur_buf, Buffer): cur_cupy_memory = UnownedMemory(cur_buf.ptr, len(cur_buf), cur_buf) cupy_pointer = MemoryPointer(cur_cupy_memory, self._offset) if isinstance(content, bytes): content_length = len(content) source_mem = np.frombuffer( content, dtype="uint8").ctypes.data_as(ctypes.c_void_p) else: source_mem = MemoryPointer( UnownedMemory(content.ptr, len(content), content), 0) content_length = source_mem.mem.size cupy_pointer.copy_from(source_mem, content_length) else: content_length = len(content) cur_buf.write(content) if content_length + self._offset >= cur_buf_size: if isinstance(cur_buf, BytesIO): self._buffers[self._cur_buffer_index] = cur_buf.getvalue() self._cur_buffer_index += 1 self._offset = 0 else: self._offset += content_length
def cupy_pytorch_allocator(size_in_bytes: int): """Function that can be passed into cupy.cuda.set_allocator, to have cupy allocate memory via PyTorch. This is important when using the two libraries together, as otherwise OOM errors can occur when there's available memory sitting in the other library's pool. """ # Cupy was having trouble with very small allocations? size_in_bytes = max(1024, size_in_bytes) torch_tensor = torch.zeros((size_in_bytes // 4,)) # cupy has a neat class to help us here. Otherwise it will try to free. # I think this is a private API? It's not in the types. address = torch_tensor.data_ptr() # type: ignore memory = UnownedMemory(address, size_in_bytes, torch_tensor) # Now return a new memory pointer. return MemoryPointer(memory, 0)
def cupy_tensorflow_allocator(size_in_bytes: int): """Function that can be passed into cupy.cuda.set_allocator, to have cupy allocate memory via TensorFlow. This is important when using the two libraries together, as otherwise OOM errors can occur when there's available memory sitting in the other library's pool. """ size_in_bytes = max(1024, size_in_bytes) tensor = tensorflow.zeros((size_in_bytes // 4,), dtype=tensorflow.dtypes.float32) # We convert to cupy via dlpack, so that we can get a memory pointer. cupy_array = cast(ArrayXd, tensorflow2xp(tensor)) address = int(cupy_array.data) # cupy has a neat class to help us here. Otherwise it will try to free. memory = UnownedMemory(address, size_in_bytes, cupy_array) # Now return a new memory pointer. return MemoryPointer(memory, 0)
def cupy_pytorch_allocator(size_in_bytes: int): """Function that can be passed into cupy.cuda.set_allocator, to have cupy allocate memory via PyTorch. This is important when using the two libraries together, as otherwise OOM errors can occur when there's available memory sitting in the other library's pool. """ # Cupy was having trouble with very small allocations? size_in_bytes = max(1024, size_in_bytes) # We use pytorch's underlying FloatStorage type to avoid overhead from # creating a whole Tensor. # This turns out to be way faster than making FloatStorage? Maybe # a Python vs C++ thing I guess? torch_tensor = torch.zeros((size_in_bytes // 4,), requires_grad=False) # cupy has a neat class to help us here. Otherwise it will try to free. # I think this is a private API? It's not in the types. address = torch_tensor.data_ptr() # type: ignore memory = UnownedMemory(address, size_in_bytes, torch_tensor) # Now return a new memory pointer. return MemoryPointer(memory, 0)
def _write_init(self): from cupy.cuda.memory import UnownedMemory self._buffer = self._cuda_buffer self._cupy_memory = UnownedMemory(self._buffer.ptr, self._size, self._buffer)