def _unpack(self, dlpack): if self.device == "cpu": return torch.Tensor(dlpack.values).squeeze(1) return from_dlpack(dlpack)
def beforeForwardPass(self, retrievedPosIndexes, retrievedNegIndexes=None): reshapedRetrieval = self._getReshapedRetrieval(retrievedPosIndexes, retrievedNegIndexes) self.model_variable.weight.data = (from_dlpack( self.CUPYmemmap[reshapedRetrieval].toDlpack()))
def getData(self): return from_dlpack(self.CUPYmemmap[reshapedRetrieval].toDlpack())
def to_pt(a): return from_dlpack(a.toDlpack())
def execute(self, requests): responses = [] for request in requests: input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") print('ISCPU', input0.is_cpu()) gpu_output = pb_utils.get_input_tensor_by_name( request, "GPU_OUTPUT").as_numpy() if input0.is_cpu(): if not gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", input0.to_dlpack()) else: outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(outptu0_pytorch)) else: if gpu_output[0]: output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", input0.to_dlpack()) else: outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu() output0 = pb_utils.Tensor.from_dlpack( "OUTPUT0", to_dlpack(outptu0_pytorch)) next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:]) # Do not perform BLS inference if it is the first # model in the pipeline. if self._model_name != 'dlpack_io_identity_1': infer_request = pb_utils.InferenceRequest( model_name='dlpack_io_identity_1', inputs=[ input0, pb_utils.get_input_tensor_by_name( request, "GPU_OUTPUT") ], requested_output_names=['OUTPUT0']) infer_response = infer_request.exec() if infer_response.has_error(): raise pb_utils.TritonModelException( infer_response.error().message()) bls_output0 = pb_utils.get_output_tensor_by_name( infer_response, 'OUTPUT0') if not output0.is_cpu(): bls_output0 = from_dlpack( bls_output0.to_dlpack()).detach().cpu().numpy() else: bls_output0 = bls_output0.as_numpy() if not input0.is_cpu(): input0 = from_dlpack( input0.to_dlpack()).detach().cpu().numpy() else: input0 = input0.as_numpy() if not np.allclose(bls_output0, input0): raise pb_utils.TritonModelException( 'BLS input and output tensors are not equal') responses.append( pb_utils.InferenceResponse([output0, next_gpu_output])) return responses
import tch import torch from torch.utils.dlpack import from_dlpack, to_dlpack dlt = tch.eye(10) x = from_dlpack(dlt) print(x) y = torch.rand(2, 3) tch.print(to_dlpack(y))
def encodes(self, to: TabularGPU): return from_dlpack(to.cats.to_dlpack()).long(), from_dlpack( to.conts.to_dlpack()).float(), from_dlpack(to.targ.to_dlpack()).long()
def zerocopy_from_dlpack(dlpack_tensor): return dlpack.from_dlpack(dlpack_tensor)