def acc_ops_squeeze(network, target, args, kwargs, name): input_val = kwargs["input"] if not isinstance(input_val, trt.tensorrt.ITensor): raise RuntimeError( f"squeeze received input {input_val} that is not part " "of the TensorRT region!" ) dim = kwargs["dim"] if "dim" in kwargs else None # Squeeze with dim=None would only work in explicit batch dim mode without any dynamic # dim, which is a very rare case. For now we just claim not supporting dim=None. assert dim is not None, "We don't support dim=None right now." if network.has_implicit_batch_dimension: assert dim != 0, "We don't support squeeze batch dim when it's implicit." dim -= 1 assert input_val.shape[dim] != -1, "We don't support squeeze dynamic dim." assert ( len(get_dynamic_dims(input_val.shape)) <= 1 ), "Currently more than one dynamic dim for input to squeeze is not supported." output_shape = [] for i, s in enumerate(input_val.shape): if i == dim and s == 1: continue output_shape.append(s) layer = network.add_shuffle(input_val) layer.reshape_dims = tuple(output_shape) layer.name = name return layer.get_output(0)
def acc_ops_linear(network, target, args, kwargs, name): input_val = kwargs["input"] if not isinstance(input_val, trt.tensorrt.ITensor): raise RuntimeError( f"Linear received input {input_val} that is not part " "of the TensorRT region!") dynamic_dims = get_dynamic_dims(input_val.shape) assert len(dynamic_dims) < 2 and input_val.shape[-1] != -1, ( "Currently we only support one dynmaic " "dim for linear and it can't be the last dim.") weight = kwargs["weight"] # For quantization, weight here would be a trt tensor because it goes through # quant + dequant. In this case, we need to use matmul + add because fully_connected # can't take non-constant weight. # TODO: Need to benchmark the performance of lowering linear as fully_connected versus # lowering as matmul + add. TensorRT documentation suggests to always lower it as # matmul + add but we found in some cases this results in performance regression compared # with lowering to fully_connected layer. if isinstance(weight, torch.Tensor): layer = network.add_shuffle(input_val) layer.reshape_dims = tuple(input_val.shape) + (1, 1) layer.name = f"{name}_pre_shuffle" # add fully connected layer = network.add_fully_connected( input=layer.get_output(0), num_outputs=kwargs["weight"].shape[0], kernel=to_numpy(kwargs["weight"]), bias=to_numpy(kwargs["bias"]), ) layer.name = f"{name}_linear" # reshape back layer = network.add_shuffle(layer.get_output(0)) layer.reshape_dims = tuple( input_val.shape[:-1]) + (kwargs["weight"].shape[0], ) layer.name = f"{name}_post_shuffle" return layer.get_output(0) else: # add matrix multiply and add output = add_matrix_multiply_layer(network, input_val, weight, f"{name}_linear_mm", transpose_other=True) if kwargs["bias"] is not None: return add_binary_elementwise_layer(network, output, kwargs["bias"], trt.ElementWiseOperation.SUM, f"{name}_linear_add") else: return output
def acc_ops_unsqueeze(network, target, args, kwargs, name): input_val = kwargs["input"] if not isinstance(input_val, trt.tensorrt.ITensor): raise RuntimeError(f"unsqueeze received input {input_val} that is not part " "of the TensorRT region!") dim = kwargs["dim"] if network.has_implicit_batch_dimension: assert dim != 0 dim -= 1 assert len(get_dynamic_dims(input_val.shape)) <= 1, "Currently we don't support unsqueeze with more than one dynamic dims." layer = network.add_shuffle(input_val) layer.reshape_dims = tuple(input_val.shape)[:dim] + (1,) + tuple(input_val.shape)[dim:] layer.name = name return layer.get_output(0)