Пример #1
0
    def _transform_hugectr_outputs(self, tensors):
        output_tensors = []
        if "conts" in self.column_types:
            output_tensors.append(
                Tensor(
                    "DES",
                    _convert_to_hugectr(self.column_types["conts"], tensors,
                                        np.float32),
                ))
        else:
            output_tensors.append(Tensor("DES", np.array([[]], np.float32)))

        if "cats" in self.column_types:
            cats_np = _convert_to_hugectr(self.column_types["cats"], tensors,
                                          np.int64)
            cats_np += self.offsets
            output_tensors.append(Tensor(
                "CATCOLUMN",
                cats_np,
            ))
        else:
            output_tensors.append(Tensor("CATCOLUMN", np.array([[]],
                                                               np.int64)))

        len_cats_np = cats_np.shape[1]
        row_index = np.arange(len_cats_np + 1,
                              dtype=np.int32).reshape(1, len_cats_np + 1)
        output_tensors.append(Tensor("ROWINDEX", row_index))

        return InferenceResponse(output_tensors)
Пример #2
0
    def execute(self,
                requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame({
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.input_dtypes
            })

            for name, dtype in self.input_multihots.items():
                values = as_column(
                    _convert_tensor(
                        get_input_tensor_by_name(request, name + "__values")))
                nnzs = as_column(
                    _convert_tensor(
                        get_input_tensor_by_name(request, name + "__nnzs")))
                input_df[name] = build_column(None,
                                              dtype=dtype,
                                              size=nnzs.size - 1,
                                              children=(nnzs, values))

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group])

            # convert back to a triton response
            output_tensors = []
            for name in output_df.columns:
                col = output_df[name]
                if is_list_dtype(col.dtype):
                    # convert list values to match TF dataloader
                    values = col.list.leaves.values_host.astype(
                        self.output_dtypes[name + "__values"])
                    values = values.reshape(len(values), 1)
                    output_tensors.append(Tensor(name + "__values", values))

                    offsets = col._column.offsets.values_host.astype(
                        self.output_dtypes[name + "__nnzs"])
                    nnzs = offsets[1:] - offsets[:-1]
                    nnzs = nnzs.reshape(len(nnzs), 1)
                    output_tensors.append(Tensor(name + "__nnzs", nnzs))
                else:
                    d = col.values_host.astype(self.output_dtypes[name])
                    d = d.reshape(len(d), 1)
                    output_tensors.append(Tensor(name, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
Пример #3
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame(
                {
                    name: _convert_tensor(get_input_tensor_by_name(request, name))
                    for name in self.workflow.column_group.input_column_names
                }
            )

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group]
            )

            output_tensors = []
            if "conts" in self.column_types:
                output_tensors.append(
                    Tensor(
                        "DES",
                        _convert_cudf2numpy(output_df[self.column_types["conts"]], np.float32),
                    )
                )
            else:
                output_tensors.append(Tensor("DES", np.array([[]], np.float32)))

            if "cats" in self.column_types:
                output_df[self.column_types["cats"]] = (
                    output_df[self.column_types["cats"]] + self.slot_sizes
                )
                cats_np = _convert_cudf2numpy(output_df[self.column_types["cats"]], np.int64)
                output_tensors.append(
                    Tensor(
                        "CATCOLUMN",
                        cats_np,
                    )
                )
            else:
                output_tensors.append(Tensor("CATCOLUMN", np.array([[]], np.int64)))

            len_cats_np = cats_np.shape[1]
            row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1)
            output_tensors.append(Tensor("ROWINDEX", row_index))

            responses.append(InferenceResponse(output_tensors))

        return responses
Пример #4
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # transform the triton tensors to a dict of name:numpy tensor
            input_tensors = {
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.input_dtypes
            }

            # multihots are represented as a tuple of (values, offsets)
            for name, dtype in self.input_multihots.items():
                values = _convert_tensor(get_input_tensor_by_name(request, name + "__values"))
                offsets = _convert_tensor(get_input_tensor_by_name(request, name + "__nnzs"))
                input_tensors[name] = (values, offsets)

            raw_tensor_tuples = self.runner.run_workflow(input_tensors)

            result = [Tensor(name, data) for name, data in raw_tensor_tuples]

            responses.append(InferenceResponse(result))

        return responses
Пример #5
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame(
                {
                    name: _convert_tensor(get_input_tensor_by_name(request, name))
                    for name in self.workflow.column_group.input_column_names
                }
            )

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group]
            )

            # convert back to a triton response
            output_tensors = []
            for col in output_df.columns:
                d = output_df[col].values_host.astype(self.output_dtypes[col])
                d = d.reshape(len(d), 1)
                output_tensors.append(Tensor(col, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
Пример #6
0
    def execute(self,
                requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Transforms the input batches by running through a NVTabular workflow.transform
        function.
        """
        responses = []
        for request in requests:
            # create a cudf DataFrame from the triton request
            input_df = cudf.DataFrame({
                name: _convert_tensor(get_input_tensor_by_name(request, name))
                for name in self.workflow.column_group.input_column_names
            })

            # use our NVTabular workflow to transform the dataframe
            output_df = nvtabular.workflow._transform_partition(
                input_df, [self.workflow.column_group])

            output_tensors = []
            for col, val in self.output_columns.items():
                d = _convert_cudf2numpy(output_df[val["columns"]],
                                        val["dtype"])
                output_tensors.append(Tensor(col, d))

            responses.append(InferenceResponse(output_tensors))

        return responses
Пример #7
0
    def _transform_outputs(self, tensors):
        """ transforms outputs for both pytorch and tensorflow """
        output_tensors = []
        for name, value in tensors.items():
            if isinstance(value, tuple):
                # convert list values to match TF dataloader
                values = value[0].astype(self.output_dtypes[name + "__values"])
                values = values.reshape(len(values), 1)
                output_tensors.append(Tensor(name + "__values", values))

                offsets = value[1].astype(self.output_dtypes[name + "__nnzs"])
                nnzs = offsets[1:] - offsets[:-1]
                nnzs = nnzs.reshape(len(nnzs), 1)
                output_tensors.append(Tensor(name + "__nnzs", nnzs))
            else:
                d = value.astype(self.output_dtypes[name])
                d = d.reshape(len(d), 1)
                output_tensors.append(Tensor(name, d))
        return InferenceResponse(output_tensors)
Пример #8
0
    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
        """Predicts the input batches by running through a PyTorch predict function."""

        # To be able to execute the queries, the PyTorch model must accept a dict input
        # and generates a dict output that has the output in the the "predictions"
        # bucket. Otherwise, it'll throw an error.

        with torch.no_grad():
            responses = []
            for request in requests:
                # Convert the input data to dict to pass it into the PyTorch model
                input_dict = dict()
                for name, dtype in self.inputs.items():
                    input_dict[name] = torch.tensor(
                        _convert_tensor(get_input_tensor_by_name(request, name)), dtype=dtype
                    ).cuda()

                # Sparse inputs have a special format
                for name, dtype in self.sparse_inputs.items():
                    # Convert to fixed dtypes if requested
                    if self.model_info["use_fix_dtypes"]:
                        dtype = _convert_dtype(dtype)

                    # Get __values and __nnzs
                    input_val = _convert_tensor(
                        get_input_tensor_by_name(request, name + sparse_value_marker)
                    )
                    input_nnzs = _convert_tensor(
                        get_input_tensor_by_name(request, name + sparse_nnzs_marker)
                    )
                    input_nnzs = torch.tensor(input_nnzs, dtype=torch.int64)
                    input_values = torch.tensor(input_val, dtype=dtype)

                    # Get the PyTorch sparse_coo_tensor
                    sparse_to_dense = False
                    seq_limit = 0
                    if self.model_info is not None:
                        if self.model_info["sparse_max"].get(name) is not None:
                            sparse_to_dense = True
                            seq_limit = self.model_info["sparse_max"][name]

                    if seq_limit == 0:
                        seq_limit = int(input_nnzs.max())

                    input_dict[name] = _build_sparse_tensor(
                        input_values, input_nnzs, seq_limit, sparse_to_dense
                    )

                # Call forward function to get the predictions
                # Forward function should return a dict with the "predictions" bucket
                out = self.model(input_dict, training=False)
                if not isinstance(out, dict):
                    raise ValueError("output of the forward function should be a dict")

                # Get the predictions from the out
                pred = out.get("predictions")
                if pred is None:
                    raise KeyError(
                        "output of the forward function should have a bucket named as predictions"
                    )

                # There is one output in the config file
                # since the PyTorch models generate a tensor as an output
                output_info = self.model_config["output"][0]
                output_tensor = Tensor(output_info["name"], pred.cpu().detach().numpy())
                responses.append(InferenceResponse([output_tensor]))

        return responses