def log_softmax(g, input, dim, dtype=None): return_op = g.op("LogSoftmax", input, axis_i=dim) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = sym_help._get_const(dtype, "i", "dtype") return_op = g.op("Cast", return_op, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return return_op
def symbolic(g, self, dim=None, keepdim=None): self = _maybe_cast_reduce_op_input(g, self) if dim is None: # all-reduce path return sym_help._handle_reduce_dim_none(g, self, onnx_op_name) else: keepdim = sym_help._get_const(keepdim, "i", "keepdim") return g.op(onnx_op_name, self, dim, keepdims_i=keepdim)
def full(g, sizes, value, dtype, layout, device, pin_memory=False): const_value = sym_help._maybe_get_const(value, 't') if sym_help._is_value(const_value): tmp = zeros(g, sizes, dtype, layout, device) return sym_opset9.add(g, tmp, value, g.op("Constant", value_t=torch.tensor(1))) else: dtype = sym_help._get_const(dtype, 'i', 'dtype') return _constant_fill(g, sizes, dtype, const_value)
def symbolic(g, self, dim=None, keepdim=None): self = _maybe_cast_reduce_op_input(g, self) if dim is None: # all-reduce path return g.op(onnx_op_name, self, keepdims_i=0) else: keepdim = sym_help._get_const(keepdim, 'i', 'keepdim') return g.op(onnx_op_name, self, dim, keepdims_i=keepdim)
def full(g, sizes, value, dtype, layout, device, pin_memory=False): const_value = symbolic_helper._maybe_get_const(value, "t") if symbolic_helper._is_value(const_value): tmp = zeros(g, sizes, dtype, layout, device) return opset9.add(g, tmp, value, g.op("Constant", value_t=torch.tensor(1))) else: dtype = symbolic_helper._get_const(dtype, "i", "dtype") return _constant_fill(g, sizes, dtype, const_value)
def split(g, self, split_size_or_sizes, dim, _outputs=None): if not sym_help._is_split_static(split_size_or_sizes, _outputs): split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim) if _outputs is None: return split_out # Convert to multiple slice nodes iff number of splits and number of outputs are statically known. if (sym_help._is_packed_list(split_size_or_sizes) and len( sym_help._unpack_list(split_size_or_sizes)) == _outputs): split_sizes = [ sym_help._unsqueeze_helper(g, v, [0]) for v in sym_help._unpack_list(split_size_or_sizes) ] start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long)) res = [] for i in range(_outputs): end = g.op( "Add", start, split_sizes[i] ) # split_sizes is a list of same length as _outputs res.append(g.op("Slice", self, start, end, axis)) start = end return res return [ g.op( "SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long)), ) for i in range(_outputs) ] split_val = split_size_or_sizes.node()["value"] if split_val.dim() > 0: return g.op("Split", self, split_size_or_sizes, axis_i=dim, outputs=_outputs) split_size = sym_help._get_const(split_size_or_sizes, "i", "split_size") size = sym_help._get_tensor_dim_size(self, dim) if size is None: if _outputs is not None: size = split_size * _outputs else: raise RuntimeError("Unknown dimension size not supported") splits = [split_size] * (size // split_size) leftover = size % split_size if leftover: splits.append(leftover) splits = g.op("Constant", value_t=torch.tensor(splits)) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
def cumsum(g, self, dim, dtype=None): dim_tensor = g.op("Constant", value_t=torch.tensor(dim, dtype=torch.int)) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = sym_help._get_const(dtype, "i", "dtype") cast = g.op("Cast", self, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) else: cast = self csum = g.op("CumSum", cast, dim_tensor) return csum
def softmax(g, input, dim, dtype=None): softmax = g.op("Softmax", input, axis_i=dim) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = symbolic_helper._get_const(dtype, "i", "dtype") softmax = g.op("Cast", softmax, to_i=symbolic_helper.scalar_type_to_onnx[parsed_dtype]) return softmax
def softmax(g, input, dim, dtype=None): softmax = g.op('Softmax', input, axis_i=dim) if dtype and dtype.node().kind() != 'prim::Constant': parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') softmax = g.op("Cast", softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return softmax
def reduce_dim(g, self, dim, keepdim, dtype): if dtype.node().kind() == "onnx::Constant": dtype = symbolic_helper._get_const(dtype, "i", "dtype") self = g.op( "Cast", self, to_i=_type_utils.JitScalarType(dtype).onnx_type() ) elif dtype.node().kind() != "prim::Constant": return symbolic_helper._unimplemented(name, "dtype", dtype) return symbolic(g, self, dim, keepdim)
def squeeze(g, self, dim=None): # Current _infer_If does not correctly infer shapes from its then- and else- branches, and will # cause error in shape inference of following nodes, here we choose to export it as `Squeeze.` from torch.onnx.symbolic_opset11 import squeeze as squeeze_with_if if dim is None: return squeeze_with_if(g, self, dim) squeeze_dim = sym_help._get_const(dim, "i", "dim") return sym_help._squeeze_helper(g, self, axes_i=[squeeze_dim])
def cumsum(g, self, dim, dtype=None): dim_tensor = g.op("Constant", value_t=torch.tensor(dim, dtype=torch.int)) csum = g.op("CumSum", self, dim_tensor) if dtype and dtype.node().kind() != 'prim::Constant': parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') csum = g.op("Cast", csum, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return csum
def squeeze(g, self, dim=None): if dim is None: dims = [] for i, size in enumerate(self.type().sizes()): if size == 1: dims.append(i) else: dims = [sym_help._get_const(dim, 'i', 'dim')] return g.op("Squeeze", self, axes_i=dims)
def reduce_dim(g, self, dim, keepdim, dtype): if dtype.node().kind() == "onnx::Constant": dtype = symbolic_helper._get_const(dtype, "i", "dtype") self = g.op("Cast", self, to_i=symbolic_helper.scalar_type_to_onnx[dtype]) elif dtype.node().kind() != "prim::Constant": return symbolic_helper._unimplemented(name, "dtype") return symbolic(g, self, dim, keepdim)
def softmax(g, input, dim, dtype=None): softmax = g.op("Softmax", input, axis_i=dim) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = symbolic_helper._get_const(dtype, "i", "dtype") softmax = g.op( "Cast", softmax, to_i=_type_utils.JitScalarType(parsed_dtype).onnx_type() ) return softmax
def stack(g, tensor_list, dim): if sym_help._is_packed_list(tensor_list): from torch.onnx.symbolic_opset9 import stack as stack_opset9 return stack_opset9(g, tensor_list, dim) else: dim = sym_help._get_const(dim, 'i', 'dim') return g.op("ConcatFromSequence", tensor_list, axis_i=dim, new_axis_i=1)
def _onnx_crypten_logsoftmax(g, input, dim, dtype=None): """ This function converts PyTorch's LogSoftmax module to a LogSoftmax module in the ONNX model. It overrides PyTorch's default conversion of LogSoftmax module to avoid potentially creating Transpose operators. """ result = g.op("LogSoftmax", input, axis_i=dim) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = sym_help._get_const(dtype, "i", "dtype") result = g.op("Cast", result, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return result
def _onnx_crypten_softmax(g, input, dim, dtype=None): """ This function converts PyTorch's Softmax module to a Softmax module in the ONNX model. It overrides PyTorch's default conversion of Softmax module to a sequence of Exp, ReduceSum and Div modules, since this default conversion can cause numerical overflow when applied to CrypTensors. """ result = g.op("Softmax", input, axis_i=dim) if dtype and dtype.node().kind() != "prim::Constant": parsed_dtype = sym_help._get_const(dtype, "i", "dtype") result = g.op("Cast", result, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) return result
def flatten(g, input, start_dim, end_dim): start_dim_i = sym_help._get_const(start_dim, 'i', 'start_dim') end_dim_i = sym_help._get_const(end_dim, 'i', 'end_dim') dim = input.type().dim() if end_dim_i < 0 : end_dim_i = dim + end_dim_i # use ONNX's Flatten operator for cases where the output shape is 2D if start_dim_i == 1 and end_dim_i == dim - 1 : if _try_get_scalar_type(input): old_type, input = _try_cast_integer_to_float(g, input) return _cast_to_type(g, g.op("Flatten", input, axis_i=start_dim_i), old_type) else: return g.op("Flatten", input, axis_i=start_dim_i) if start_dim_i == 0 and end_dim_i == dim - 2 : if _try_get_scalar_type(input): old_type, input = _try_cast_integer_to_float(g, input) return _cast_to_type(g, g.op("Flatten", input, axis_i=end_dim_i + 1), old_type) else: return g.op("Flatten", input, axis_i=end_dim_i + 1) return sym_opset9.flatten(g, input, start_dim, end_dim)
def split(g, self, split_size_or_sizes, dim, _outputs=None): if not sym_help._is_split_static(split_size_or_sizes, _outputs): split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim) if _outputs is None: return split_out # Convert to multiple slice nodes iff number of splits and number of outputs are statically known. if sym_help._is_packed_list(split_size_or_sizes) and \ len(sym_help._unpack_list(split_size_or_sizes)) == _outputs: split_sizes = [ g.op("Unsqueeze", v, g.op("Constant", value_t=torch.tensor([0]))) for v in sym_help._unpack_list(split_size_or_sizes) ] start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long)) res = [] for i in range(_outputs): end = g.op( "Add", start, split_sizes[i] ) # split_sizes is a list of same length as _outputs res.append(g.op("Slice", self, start, end, axis)) start = end return res return [ g.op("SequenceAt", split_out, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long))) for i in range(_outputs) ] split_val = split_size_or_sizes.node()['value'] if split_val.dim() > 0: return g.op("Split", self, split_size_or_sizes, axis_i=dim, outputs=_outputs) split_size = sym_help._get_const(split_size_or_sizes, 'i', 'split_size') size = self.type().sizes()[dim] splits = [split_size] * (size // split_size) leftover = size % split_size if leftover: splits.append(leftover) splits = g.op("Constant", value_t=torch.tensor(splits)) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)
def squeeze(g, self, dim=None): if dim is None: return g.op("Squeeze", self) # dim as a tensor if not symbolic_helper._is_constant(dim): return symbolic_helper._squeeze_helper(g, self, [dim]) dim = symbolic_helper._get_const(dim, "i", "dim") input_rank = symbolic_helper._get_tensor_rank(self) adjusted_dim = dim if input_rank is not None and dim < 0: adjusted_dim += input_rank dim_size = symbolic_helper._get_tensor_dim_size(self, adjusted_dim) if (dim < 0 and input_rank is None) or dim_size is None: # If onnx shape inference is not on, export always as dynamic. # Because we cannot tell if observed static shape is also static at runtime. # create "cond" node (condition is shape[i]==1) dim_constant = g.op("Constant", value_t=torch.tensor([dim])) size = symbolic_helper._size_helper(g, self, dim_constant) const_one = g.op("Constant", value_t=torch.ones(1, dtype=torch.int64)) cond = g.op("Equal", size, const_one) # create the "If" node and add the "then" and "else" blocks to it. if_node_outputs = g.op("If", cond) if_node = if_node_outputs.node() if_block = utils._add_block(if_node) squeeze_ = symbolic_helper._squeeze_helper(if_block, self, [dim]) utils._add_output_to_block(if_block, squeeze_) else_block = utils._add_block(if_node) identity_ = else_block.op("Identity", self) utils._add_output_to_block(else_block, identity_) return if_node_outputs # For static input shape dim = adjusted_dim if dim_size > 1: warnings.warn( "This model contains a squeeze operation on dimension " + str(dim) + ". The size of " + "this dimension in the given input is " + str(dim_size) + ". The model will " + "be exported without the squeeze node. If the model is intended to be used with dynamic " + "input shapes, please export with dynamic_axes argument." ) return self return symbolic_helper._squeeze_helper(g, self, [dim])
def squeeze(g, self, dim=None): if dim is None: return g.op("Squeeze", self) dim = sym_help._get_const(dim, 'i', 'dim') # create 'cond' node (condition is shape[i]==1) dim_constant = g.op("Constant", value_t=torch.tensor([dim])) size = sym_help._size_helper(g, self, dim_constant) const_one = g.op("Constant", value_t=torch.ones(1, dtype=torch.int64)) cond = g.op("Equal", size, const_one) # create the 'If' node and add the 'then' and 'else' blocks to it. if_node_outputs = g.op("If", cond) if_node = if_node_outputs.node() torch.onnx.utils._add_block(if_node, self, "onnx::Squeeze", axes_i=[dim]) torch.onnx.utils._add_block(if_node, self, "onnx::Identity") return if_node_outputs
def squeeze(g, self, dim=None): if dim is None: return g.op("Squeeze", self) dim = sym_help._get_const(dim, 'i', 'dim') input_shape = self.type().sizes() from torch.onnx.symbolic_helper import _onnx_shape_inference if input_shape is None or not _onnx_shape_inference: # If onnx shape inference is not on, export always as dynamic. # Because we cannot tell if observed static shape is also static at runtime. # create 'cond' node (condition is shape[i]==1) dim_constant = g.op("Constant", value_t=torch.tensor([dim])) size = sym_help._size_helper(g, self, dim_constant) const_one = g.op("Constant", value_t=torch.ones(1, dtype=torch.int64)) cond = g.op("Equal", size, const_one) # create the 'If' node and add the 'then' and 'else' blocks to it. if_node_outputs = g.op("If", cond) if_node = if_node_outputs.node() if_block = torch.onnx.utils._add_block(if_node) squeeze_ = if_block.op("Squeeze", self, axes_i=[dim]) torch.onnx.utils._add_output_to_block(if_block, squeeze_) else_block = torch.onnx.utils._add_block(if_node) identity_ = else_block.op("Identity", self) torch.onnx.utils._add_output_to_block(else_block, identity_) return if_node_outputs # For static input shape if dim < 0: dim += self.type().dim() if input_shape[dim] > 1: warnings.warn( "This model contains a squeeze operation on dimension " + str(dim) + ". The size of " + "this dimension in the given input is " + str(input_shape[dim]) + ". The model will " + "be exported without the squeeze node. If the model is intended to be used with dynamic " + "input shapes, please export with dynamic_axes argument.") return self return g.op("Squeeze", self, axes_i=[dim])
def unsqueeze(g, self, dim): if sym_help._is_constant(dim): dim = sym_help._get_const(dim, "i", "dim") return sym_help._unsqueeze_helper(g, self, [dim])
def stack(g, tensor_list, dim): if symbolic_helper._is_packed_list(tensor_list): return opset9.stack(g, tensor_list, dim) else: dim = symbolic_helper._get_const(dim, "i", "dim") return g.op("ConcatFromSequence", tensor_list, axis_i=dim, new_axis_i=1)
def quantize_per_tensor(g, input, scale, zero_point, dtype): dtype = sym_help._get_const(dtype, "i", "dtype") zero_point = g.op("Cast", zero_point, to_i=sym_help.scalar_type_to_onnx[dtype]) scale = g.op("Cast", scale, to_i=torch.onnx.TensorProtoDataType.FLOAT) return sym_help.quantize_helper(g, input, scale, zero_point)
def tensor_split(g, self, indices_or_sections, dim, _outputs=None): axis = g.op("Constant", value_t=torch.tensor(dim, dtype=torch.long)) axis = opset11.unsqueeze(g, axis, 0) const_1 = g.op("Constant", value_t=torch.tensor(1, dtype=torch.long)) if symbolic_helper._is_split_static(indices_or_sections, _outputs): split_val = symbolic_helper._node_get(indices_or_sections.node(), "value") if split_val.dim() > 0: start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) res = [] assert _outputs is not None for i in range(_outputs - 1): end = g.op( "Gather", indices_or_sections, g.op("Constant", value_t=torch.tensor([i], dtype=torch.long)), axis_i=0, ) res.append(g.op("Slice", self, start, end, axis)) start = end end = symbolic_helper._size_helper(g, self, axis) res.append(g.op("Slice", self, start, end, axis)) return res split_size = symbolic_helper._get_const( indices_or_sections, "i", "indices_or_sections" ) size = symbolic_helper._get_tensor_dim_size(self, dim) if size is None: if _outputs is not None: size = split_size * _outputs else: raise errors.SymbolicValueError( "Unknown dimension size not supported", self ) min_split_size = size // split_size num_splits_one_extra = size % split_size splits = num_splits_one_extra * [min_split_size + 1] leftover = (split_size - num_splits_one_extra) * [min_split_size] splits = g.op( "Constant", value_t=torch.tensor(splits + leftover, dtype=torch.long) ) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs) if ( symbolic_helper._is_tensor(indices_or_sections) and symbolic_helper._get_tensor_rank(indices_or_sections) == 1 ): loop_len = symbolic_helper._size_helper( g, indices_or_sections, g.op("Constant", value_t=torch.tensor(0)) ) loop_len = opset11.unsqueeze(g, loop_len, 0) loop_condition = g.op("Cast", const_1, to_i=_C_onnx.TensorProtoDataType.BOOL) # To make the first slice in the below loop work, # we pad a zero to the first position so that it will be the initial start of slice. padding_0 = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long)) indices_or_sections = g.op("Concat", padding_0, indices_or_sections, axis_i=0) final_splits = g.op("SequenceEmpty") loop = g.op("Loop", loop_len, loop_condition, final_splits) # Loop inputs loop_block = utils._add_block(loop.node()) block_input_iter = utils._add_input_to_block(loop_block) cond = utils._add_input_to_block(loop_block) final_splits = utils._add_input_to_block(loop_block) start = loop_block.op("Gather", indices_or_sections, block_input_iter, axis_i=0) end = loop_block.op( "Gather", indices_or_sections, loop_block.op("Add", block_input_iter, const_1), axis_i=0, ) slice = loop_block.op("Slice", self, start, end, axis) final_splits = loop_block.op("SequenceInsert", final_splits, slice) # Loop outputs cond_out = loop_block.op("Identity", loop_condition) utils._add_output_to_block(loop_block, cond_out) utils._add_output_to_block(loop_block, final_splits) loop_out = loop.node().output() start = g.op( "Gather", indices_or_sections, g.op("Constant", value_t=torch.tensor(-1, dtype=torch.long)), axis_i=0, ) start = opset11.unsqueeze(g, start, 0) end = symbolic_helper._size_helper(g, self, axis) last_slice = g.op("Slice", self, start, end, axis) return g.op("SequenceInsert", loop_out, last_slice) else: # scalar tensor dim_size = symbolic_helper._size_helper(g, self, axis) min_split_size = g.op("Div", dim_size, indices_or_sections) min_split_size_plus_1 = g.op( "Add", min_split_size, const_1, ) num_splits_one_extra = g.op("Mod", dim_size, indices_or_sections) splits = g.op("Tile", min_split_size_plus_1, num_splits_one_extra) leftover = g.op( "Tile", min_split_size, g.op( "Sub", opset11.unsqueeze(g, indices_or_sections, 0), num_splits_one_extra, ), ) splits = g.op("Concat", splits, leftover, axis_i=0) if _outputs is None: return g.op("SplitToSequence", self, splits, axis_i=dim) return g.op("Split", self, splits, axis_i=dim, outputs=_outputs)