def _transform_hugectr_outputs(self, tensors): output_tensors = [] if "conts" in self.column_types: output_tensors.append( Tensor( "DES", _convert_to_hugectr(self.column_types["conts"], tensors, np.float32), )) else: output_tensors.append(Tensor("DES", np.array([[]], np.float32))) if "cats" in self.column_types: cats_np = _convert_to_hugectr(self.column_types["cats"], tensors, np.int64) cats_np += self.offsets output_tensors.append(Tensor( "CATCOLUMN", cats_np, )) else: output_tensors.append(Tensor("CATCOLUMN", np.array([[]], np.int64))) len_cats_np = cats_np.shape[1] row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1) output_tensors.append(Tensor("ROWINDEX", row_index)) return InferenceResponse(output_tensors)
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Transforms the input batches by running through a NVTabular workflow.transform function. """ responses = [] for request in requests: # create a cudf DataFrame from the triton request input_df = cudf.DataFrame({ name: _convert_tensor(get_input_tensor_by_name(request, name)) for name in self.input_dtypes }) for name, dtype in self.input_multihots.items(): values = as_column( _convert_tensor( get_input_tensor_by_name(request, name + "__values"))) nnzs = as_column( _convert_tensor( get_input_tensor_by_name(request, name + "__nnzs"))) input_df[name] = build_column(None, dtype=dtype, size=nnzs.size - 1, children=(nnzs, values)) # use our NVTabular workflow to transform the dataframe output_df = nvtabular.workflow._transform_partition( input_df, [self.workflow.column_group]) # convert back to a triton response output_tensors = [] for name in output_df.columns: col = output_df[name] if is_list_dtype(col.dtype): # convert list values to match TF dataloader values = col.list.leaves.values_host.astype( self.output_dtypes[name + "__values"]) values = values.reshape(len(values), 1) output_tensors.append(Tensor(name + "__values", values)) offsets = col._column.offsets.values_host.astype( self.output_dtypes[name + "__nnzs"]) nnzs = offsets[1:] - offsets[:-1] nnzs = nnzs.reshape(len(nnzs), 1) output_tensors.append(Tensor(name + "__nnzs", nnzs)) else: d = col.values_host.astype(self.output_dtypes[name]) d = d.reshape(len(d), 1) output_tensors.append(Tensor(name, d)) responses.append(InferenceResponse(output_tensors)) return responses
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Transforms the input batches by running through a NVTabular workflow.transform function. """ responses = [] for request in requests: # create a cudf DataFrame from the triton request input_df = cudf.DataFrame( { name: _convert_tensor(get_input_tensor_by_name(request, name)) for name in self.workflow.column_group.input_column_names } ) # use our NVTabular workflow to transform the dataframe output_df = nvtabular.workflow._transform_partition( input_df, [self.workflow.column_group] ) output_tensors = [] if "conts" in self.column_types: output_tensors.append( Tensor( "DES", _convert_cudf2numpy(output_df[self.column_types["conts"]], np.float32), ) ) else: output_tensors.append(Tensor("DES", np.array([[]], np.float32))) if "cats" in self.column_types: output_df[self.column_types["cats"]] = ( output_df[self.column_types["cats"]] + self.slot_sizes ) cats_np = _convert_cudf2numpy(output_df[self.column_types["cats"]], np.int64) output_tensors.append( Tensor( "CATCOLUMN", cats_np, ) ) else: output_tensors.append(Tensor("CATCOLUMN", np.array([[]], np.int64))) len_cats_np = cats_np.shape[1] row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1) output_tensors.append(Tensor("ROWINDEX", row_index)) responses.append(InferenceResponse(output_tensors)) return responses
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Transforms the input batches by running through a NVTabular workflow.transform function. """ responses = [] for request in requests: # transform the triton tensors to a dict of name:numpy tensor input_tensors = { name: _convert_tensor(get_input_tensor_by_name(request, name)) for name in self.input_dtypes } # multihots are represented as a tuple of (values, offsets) for name, dtype in self.input_multihots.items(): values = _convert_tensor(get_input_tensor_by_name(request, name + "__values")) offsets = _convert_tensor(get_input_tensor_by_name(request, name + "__nnzs")) input_tensors[name] = (values, offsets) raw_tensor_tuples = self.runner.run_workflow(input_tensors) result = [Tensor(name, data) for name, data in raw_tensor_tuples] responses.append(InferenceResponse(result)) return responses
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Transforms the input batches by running through a NVTabular workflow.transform function. """ responses = [] for request in requests: # create a cudf DataFrame from the triton request input_df = cudf.DataFrame( { name: _convert_tensor(get_input_tensor_by_name(request, name)) for name in self.workflow.column_group.input_column_names } ) # use our NVTabular workflow to transform the dataframe output_df = nvtabular.workflow._transform_partition( input_df, [self.workflow.column_group] ) # convert back to a triton response output_tensors = [] for col in output_df.columns: d = output_df[col].values_host.astype(self.output_dtypes[col]) d = d.reshape(len(d), 1) output_tensors.append(Tensor(col, d)) responses.append(InferenceResponse(output_tensors)) return responses
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Transforms the input batches by running through a NVTabular workflow.transform function. """ responses = [] for request in requests: # create a cudf DataFrame from the triton request input_df = cudf.DataFrame({ name: _convert_tensor(get_input_tensor_by_name(request, name)) for name in self.workflow.column_group.input_column_names }) # use our NVTabular workflow to transform the dataframe output_df = nvtabular.workflow._transform_partition( input_df, [self.workflow.column_group]) output_tensors = [] for col, val in self.output_columns.items(): d = _convert_cudf2numpy(output_df[val["columns"]], val["dtype"]) output_tensors.append(Tensor(col, d)) responses.append(InferenceResponse(output_tensors)) return responses
def _transform_outputs(self, tensors): """ transforms outputs for both pytorch and tensorflow """ output_tensors = [] for name, value in tensors.items(): if isinstance(value, tuple): # convert list values to match TF dataloader values = value[0].astype(self.output_dtypes[name + "__values"]) values = values.reshape(len(values), 1) output_tensors.append(Tensor(name + "__values", values)) offsets = value[1].astype(self.output_dtypes[name + "__nnzs"]) nnzs = offsets[1:] - offsets[:-1] nnzs = nnzs.reshape(len(nnzs), 1) output_tensors.append(Tensor(name + "__nnzs", nnzs)) else: d = value.astype(self.output_dtypes[name]) d = d.reshape(len(d), 1) output_tensors.append(Tensor(name, d)) return InferenceResponse(output_tensors)
def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: """Predicts the input batches by running through a PyTorch predict function.""" # To be able to execute the queries, the PyTorch model must accept a dict input # and generates a dict output that has the output in the the "predictions" # bucket. Otherwise, it'll throw an error. with torch.no_grad(): responses = [] for request in requests: # Convert the input data to dict to pass it into the PyTorch model input_dict = dict() for name, dtype in self.inputs.items(): input_dict[name] = torch.tensor( _convert_tensor(get_input_tensor_by_name(request, name)), dtype=dtype ).cuda() # Sparse inputs have a special format for name, dtype in self.sparse_inputs.items(): # Convert to fixed dtypes if requested if self.model_info["use_fix_dtypes"]: dtype = _convert_dtype(dtype) # Get __values and __nnzs input_val = _convert_tensor( get_input_tensor_by_name(request, name + sparse_value_marker) ) input_nnzs = _convert_tensor( get_input_tensor_by_name(request, name + sparse_nnzs_marker) ) input_nnzs = torch.tensor(input_nnzs, dtype=torch.int64) input_values = torch.tensor(input_val, dtype=dtype) # Get the PyTorch sparse_coo_tensor sparse_to_dense = False seq_limit = 0 if self.model_info is not None: if self.model_info["sparse_max"].get(name) is not None: sparse_to_dense = True seq_limit = self.model_info["sparse_max"][name] if seq_limit == 0: seq_limit = int(input_nnzs.max()) input_dict[name] = _build_sparse_tensor( input_values, input_nnzs, seq_limit, sparse_to_dense ) # Call forward function to get the predictions # Forward function should return a dict with the "predictions" bucket out = self.model(input_dict, training=False) if not isinstance(out, dict): raise ValueError("output of the forward function should be a dict") # Get the predictions from the out pred = out.get("predictions") if pred is None: raise KeyError( "output of the forward function should have a bucket named as predictions" ) # There is one output in the config file # since the PyTorch models generate a tensor as an output output_info = self.model_config["output"][0] output_tensor = Tensor(output_info["name"], pred.cpu().detach().numpy()) responses.append(InferenceResponse([output_tensor])) return responses