def __interpolate(g, input, size, scale_factor, mode , align_corners): mode = sym_help._maybe_get_const(mode, 's') align_corners = sym_help._maybe_get_const(align_corners, 'b') align_corners = False if _is_none(align_corners) else align_corners coordinate_transformation_mode = "asymmetric" if mode == "nearest" \ else "align_corners" if align_corners else "pytorch_half_pixel" # roi only takes effect whith coordinate_transformation_mode="tf_crop_and_resize" roi = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) if not sym_help._is_none(size) : offsets = g.op("Constant", value_t=torch.ones(2, dtype=torch.int64)) size = g.op("Cast", size, to_i=sym_help.cast_pytorch_to_onnx["Long"]) size = g.op("Concat", offsets, size, axis_i=0) scales = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) elif not sym_help._is_none(scales) : scales = sym_help._interpolate_get_scales(g, scale_factor, 4) size = g.op("Constant", value_t=torch.tensor([], dtype=torch.int64)) return g.op("Resize", input, roi, scales, size, coordinate_transformation_mode_s=coordinate_transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=mode, # nearest, linear, or cubic nearest_mode_s="floor") # only valid when mode="nearest"
def binary_cross_entropy_with_logits(g, input, target, weight, pos_weight, reduction): from torch.onnx.symbolic_opset9 import sigmoid, log, sub, neg, mul, add p = g.op("Constant", value_t=torch.tensor([1])) sig_x = sigmoid(g, input) log_sig_x = log(g, sig_x) sub_1_x = sub(g, p, sig_x) sub_1_y = sub(g, p, target) log_1_x = log(g, sub_1_x) if pos_weight is None or sym_help._is_none(pos_weight): output = neg( g, add(g, mul(g, target, log_sig_x), mul(g, sub_1_y, log_1_x))) else: output = neg( g, add(g, mul(g, mul(g, target, log_sig_x), pos_weight), mul(g, sub_1_y, log_1_x))) if weight is not None and not sym_help._is_none(weight): output = mul(g, weight, output) reduction = sym_help._maybe_get_const(reduction, 'i') if reduction == 0: return output elif reduction == 1: return g.op("ReduceMean", output) elif reduction == 2: return g.op("ReduceSum", output) else: return sym_help._onnx_unsupported( "binary_cross_entropy_with_logits with reduction other than none, mean, or sum" )
def clamp(g, self, min, max): dtype = self.type().scalarType() def _cast_if_not_none(tensor, dtype): if tensor is not None and not symbolic_helper._is_none(tensor): return g.op( "Cast", tensor, to_i=symbolic_helper.cast_pytorch_to_onnx[dtype] ) else: return tensor if dtype is not None: min = _cast_if_not_none(min, dtype) max = _cast_if_not_none(max, dtype) if symbolic_helper._is_none(min): return clamp_max(g, self, max) elif symbolic_helper._is_none(max): return clamp_min(g, self, min) else: if ( symbolic_helper._get_tensor_rank(min) == 0 and symbolic_helper._get_tensor_rank(max) == 0 ): return opset9.op_with_optional_float_cast( g, "Clip", self, min, max, opset_before=12 ) else: return clamp_max(g, clamp_min(g, self, min), max)
def clamp(g, self, min, max): dtype = self.type().scalarType() def _cast_if_not_none(tensor, dtype): if tensor is not None and not sym_help._is_none(tensor): return g.op("Cast", tensor, to_i=sym_help.cast_pytorch_to_onnx[dtype]) else: return tensor if dtype is not None: min = _cast_if_not_none(min, dtype) max = _cast_if_not_none(max, dtype) if sym_help._is_none(min): return clamp_max(g, self, max) elif sym_help._is_none(max): return clamp_min(g, self, min) else: if sym_help._get_tensor_rank(min) == 0 and sym_help._get_tensor_rank( max) == 0: return g.op("Clip", self, min, max) else: return clamp_max(g, clamp_min(g, self, min), max)
def __interpolate(g, input, size, scale_factor, mode, align_corners, recompute_scale_factor): align_corners = sym_help._maybe_get_const(align_corners, 'b') if not sym_help._is_none(align_corners) and align_corners: return _unimplemented("interpolate", "align_corners == True") if not sym_help._is_none(scale_factor) and sym_help._is_value(scale_factor): return _unimplemented("interpolate", "dynamic scales in opset 8") if not sym_help._is_none(size) and sym_help._is_value(size): return _unimplemented("interpolate", "dynamic size in opset 8") scales, mode = sym_help._interpolate_get_scales_and_mode(g, input, size, scale_factor, mode , align_corners) return g.op("Upsample", input, mode_s=mode, scales_f=scales)
def __interpolate(g, input, size, scale_factor, mode, align_corners): mode = sym_help._maybe_get_const(mode, 's') if 'linear' in mode: mode = 'linear' if 'cubic' in mode: mode = 'cubic' align_corners = sym_help._maybe_get_const(align_corners, 'b') align_corners = False if sym_help._is_none( align_corners) else align_corners coordinate_transformation_mode = "asymmetric" if mode == "nearest" \ else "align_corners" if align_corners else "pytorch_half_pixel" # roi only takes effect whith coordinate_transformation_mode="tf_crop_and_resize" roi = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) if not sym_help._is_none(size): input_size = input.type().sizes() input_size = g.op("Constant", value_t=torch.tensor(input_size[0:2], dtype=torch.int64)) is_scalar = ((sym_help._maybe_get_const(size, 't').dim() == 0)) if is_scalar: size = unsqueeze(g, size, 0) size = [size for i in range(input.type().dim() - 2)] size = g.op("Concat", *size, axis_i=0) size = g.op("Concat", input_size, size, axis_i=0) scales = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) return g.op( "Resize", input, roi, scales, size, coordinate_transformation_mode_s=coordinate_transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=mode, # nearest, linear, or cubic nearest_mode_s="floor") else: # if not sym_help._is_none(scales) scales = sym_help._interpolate_get_scales(g, scale_factor, input.type().dim()) return g.op( "Resize", input, roi, scales, coordinate_transformation_mode_s=coordinate_transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=mode, # nearest, linear, or cubic nearest_mode_s="floor") # only valid when mode="nearest"
def batch_norm(g, input, weight, bias, running_mean, running_var, training, momentum, eps, cudnn_enabled): sym_help.assert_training_mode(training, "batch_norm") input_sizes = input.type().sizes() if weight is None or sym_help._is_none(weight): assert len(input_sizes) > 1 weight_value = torch.tensor( [1.] * input_sizes[1]).type('torch.' + input.type().scalarType() + 'Tensor') weight = g.op("Constant", value_t=weight_value) if bias is None or sym_help._is_none(bias): assert len(input_sizes) > 1 bias_value = torch.tensor( [0.] * input_sizes[1]).type('torch.' + input.type().scalarType() + 'Tensor') bias = g.op("Constant", value_t=bias_value) if not sym_help._training_mode: out = g.op("BatchNormalization", input, weight, bias, running_mean, running_var, epsilon_f=eps, momentum_f=1 - momentum, outputs=1) return out else: training_mode = g.op("Constant", value_t=torch.tensor(True)) res, new_running_mean, new_running_var, saved_mean, saved_var = g.op( "BatchNormalization", input, weight, bias, running_mean, running_var, training_mode, epsilon_f=eps, momentum_f=1 - momentum, outputs=5) new_running_mean.setType(running_mean.type()) new_running_var.setType(running_var.type()) saved_mean.setDebugName("batch_norm_dead_output-" + saved_mean.debugName()) saved_var.setDebugName("batch_norm_dead_output-" + saved_var.debugName()) return res
def _cast_if_not_none(tensor, dtype): if tensor is not None and not sym_help._is_none(tensor): return g.op("Cast", tensor, to_i=sym_help.cast_pytorch_to_onnx[dtype]) else: return tensor
def __interpolate(g, input, size, scale_factor, mode, align_corners, recompute_scale_factor): mode = sym_help._maybe_get_const(mode, 's') if 'linear' in mode: mode = 'linear' if 'cubic' in mode: mode = 'cubic' align_corners = sym_help._maybe_get_const(align_corners, 'b') align_corners = False if not isinstance(align_corners, bool) else align_corners coordinate_transformation_mode = "asymmetric" if mode == "nearest" \ else "align_corners" if align_corners else "pytorch_half_pixel" # roi only takes effect with coordinate_transformation_mode="tf_crop_and_resize" roi = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) if not sym_help._is_none(size) : input_size = g.op("Shape", input) input_size = sym_help._slice_helper(g, input_size, axes=[0], ends=[2], starts=[0]) # in some cases size is not a packed list but size is a scalar # We need to also verify that (sym_help._maybe_get_const(size, 't').dim() == 0) # but this information is not always available. Try to get the dim, # and if not assume that it is not a scalar. try: is_scalar = not sym_help._is_packed_list(size) and ((sym_help._maybe_get_const(size, 't').dim() == 0)) except AttributeError: is_scalar = not sym_help._is_packed_list(size) if not is_scalar: warnings.warn("Cannot verify if the output_size is a scalar " "while exporting interpolate. Assuming that it is not a scalar.") if is_scalar: if not input.type().dim(): return sym_help._unimplemented("interpolate (with a scalar output_size)", "missing input shape (try giving an array of output_size values)") size = unsqueeze(g, size, 0) size = [size for i in range(input.type().dim() - 2)] size = g.op("Concat", *size, axis_i=0) size = g.op("Cast", size, to_i=sym_help.cast_pytorch_to_onnx['Long']) size = g.op("Concat", input_size, size, axis_i=0) scales = g.op("Constant", value_t=torch.tensor([], dtype=torch.float32)) return g.op("Resize", input, roi, scales, size, coordinate_transformation_mode_s=coordinate_transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=mode, # nearest, linear, or cubic nearest_mode_s="floor") else: # if not sym_help._is_none(scales) if not input.type().dim(): return sym_help._unimplemented("interpolate (with scales)", "missing input shape") scales = sym_help._interpolate_get_scales(g, scale_factor, input.type().dim()) return g.op("Resize", input, roi, scales, coordinate_transformation_mode_s=coordinate_transformation_mode, cubic_coeff_a_f=-0.75, # only valid when mode="cubic" mode_s=mode, # nearest, linear, or cubic nearest_mode_s="floor") # only valid when mode="nearest"
def __is_(g, self, other): if symbolic_helper._is_none(other): if isinstance(self.type(), _C.OptionalType): none = g.op("OptionalHasElement", self) return g.op("Not", none) else: return g.op("Constant", value_t=torch.BoolTensor([0])) return opset9.eq(g, self, other)
def argmin(g, input, dim, keepdim): if sym_help._is_none(dim): flattened = sym_help._reshape_helper(g, input, g.op("Constant", value_t=torch.tensor([-1]))) return g.op("ArgMin", flattened, axis_i=0, keepdims_i=False, select_last_index_i=False) else: dim = _parse_arg(dim, "i") keepdim = _parse_arg(keepdim, "i") return g.op("ArgMin", input, axis_i=dim, keepdims_i=keepdim, select_last_index_i=False)
def __is_(g, self, other): if _is_none(other): if isinstance(self.type(), OptionalType): none = g.op("OptionalHasElement", self) return g.op("Not", none) else: return g.op("Constant", value_t=torch.BoolTensor([0])) return eq(g, self, other)
def argmax(g, input, dim, keepdim): if sym_help._is_none(dim): from torch.onnx.symbolic_opset9 import reshape flattened = reshape(g, input, g.op("Constant", value_t=torch.tensor([-1]))) return g.op("ArgMax", flattened, axis_i=0, keepdims_i=False, select_last_index_i=False) else: dim = _parse_arg(dim, "i") keepdim = _parse_arg(keepdim, "i") return g.op("ArgMax", input, axis_i=dim, keepdims_i=keepdim, select_last_index_i=False)
def argmin(g, input, dim, keepdim): if sym_help._is_none(dim): from torch.onnx.symbolic_opset9 import reshape flattened = reshape(g, input, (-1,)) return g.op('ArgMin', flattened, axis_i=0, keepdims_i=False, select_last_index_i=True) else: dim = _parse_arg(dim, 'i') keepdim = _parse_arg(keepdim, 'i') return g.op('ArgMin', input, axis_i=dim, keepdims_i=keepdim, select_last_index_i=True)
def embedding_bag(g, embedding_matrix, indices, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights, include_last_offset, padding_idx): if scale_grad_by_freq and sym_help._training_mode: return sym_help._onnx_unsupported("embedding_bag with scale_grad_by_freq for training mode") if padding_idx is not None and padding_idx >= 0: raise RuntimeError("embedding_bag with padding_idx") from torch.onnx.symbolic_opset9 import select import warnings warnings.warn("Export of embedding_bag with dynamic input/offsets shape is not supported in opset 10. " "Please use opset 11 or higher to export model for dynamic input shape.'") offsets_dim_0 = sym_help._get_tensor_dim_size(offsets, 0) if offsets_dim_0 is not None: if include_last_offset: offset_len = offsets_dim_0 - 1 offsets_extended = offsets else: offset_len = offsets_dim_0 offsets_extended = [offsets, g.op("Constant", value_t=torch.tensor([maxsize]))] offsets_extended = g.op("Concat", *offsets_extended, axis_i=0) list_ = [] for i in range(offset_len): start_ = sym_help._unsqueeze_helper(g, select(g, offsets_extended, torch.tensor(0), torch.tensor(i)), [0]) end_ = sym_help._unsqueeze_helper(g, select(g, offsets_extended, torch.tensor(0), torch.tensor(i + 1)), [0]) axes_ = g.op("Constant", value_t=torch.tensor([0])) indices_row = g.op("Slice", indices, start_, end_, axes_) embeddings = g.op("Gather", embedding_matrix, indices_row) if not sym_help._is_none(per_sample_weights): per_sample_weights_row = g.op("Slice", per_sample_weights, start_, end_, axes_) per_sample_weights_row = sym_help._unsqueeze_helper(g, per_sample_weights_row, [1]) embeddings = g.op("Mul", embeddings, per_sample_weights_row) if mode == 0: embeddings = sym_help._reducesum_helper(g, embeddings, axes_i=[0], keepdims_i=0) elif mode == 1: embeddings = g.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0) else: embeddings = g.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0) embeddings = sym_help._unsqueeze_helper(g, embeddings, [0]) list_.append(embeddings) output = g.op("Concat", *list_, axis_i=0) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return output, None, None, None else: return sym_help._onnx_unsupported("embedding_bag with unknown shape of offsets for opset 10 is not supported. " "please use opset 11 or higher.")
def multinomial(g, self, num_samples, replacement=False, generator=None): if generator is not None and not sym_help._is_none(generator): raise RuntimeError( "Unsupported: ONNX does not support generator for multinomial") return g.op("org.pytorch.aten::ATen", self, num_samples, replacement, generator, operator_s="aten::multinomial")
def multinomial(g, self, num_samples, replacement=False, generator=None): if generator is not None and not sym_help._is_none(generator): raise RuntimeError( "Unsupported: ONNX does not support generator for multinomial") return g.op("com.microsoft::ATenOp", self, num_samples, replacement, generator, name_s='aten::multinomial')
def nll_loss(g, self, target, weight, reduction, ignore_index): # none reduction : onnx::Constant[value={0}] # mean reduction : onnx::Constant[value={1}] # sum reduction : onnx::Constant[value={2}] reduction = sym_help._maybe_get_const(reduction, 'i') reduction_vals = ['none', 'mean', 'sum'] reduction = reduction_vals[reduction] # when ignore_index is not specified, ignore_index == onnx::Constant[value={-100}] if sym_help._maybe_get_const(ignore_index, 'i') == -100: if weight.node().mustBeNone(): return g.op("NegativeLogLikelihoodLoss", self, target, reduction_s=reduction) else: return g.op("NegativeLogLikelihoodLoss", self, target, weight, reduction_s=reduction) # if ignore_index is specified, compute nllloss with no reduction and apply the reduction afterwards if weight.node().mustBeNone(): nllloss = g.op("NegativeLogLikelihoodLoss", self, target, reduction_s='none') else: nllloss = g.op("NegativeLogLikelihoodLoss", self, target, weight, reduction_s='none') from torch.onnx.symbolic_opset9 import zeros_like, ones_like, eq, where, index_select zeros = zeros_like(g, nllloss) ignored_mask = eq(g, target, ignore_index) nllloss = where(g, ignored_mask, zeros, nllloss) if reduction == 'none': return nllloss nllloss = g.op("ReduceSum", nllloss) if reduction == 'sum': return nllloss # reduction == 'mean' # if reduction = mean, we want to divide the reduced sum of nllloss # by the sum of the non ignored weights (if weights are available), # or by the number of non ignored targets (if weights are not available); # denominator acts like a mask of which indices to ignore and is then # multiplied by weight to set the ignored ones to 0, before summing # the values in it zeros = zeros_like(g, target) ones = ones_like(g, target) denominator = where(g, ignored_mask, zeros, ones) if not sym_help._is_none(weight): # take(weight, target) on 1D tensor weight weight = index_select(g, weight, 0, target) denominator = g.op("Mul", denominator, weight) # denominator is the number of elements if weights are not provided, # otherwise it is the sum of the non ignored weights denominator = g.op("ReduceSum", denominator) nllloss = g.op("Div", nllloss, denominator) return nllloss
def binary_cross_entropy_with_logits(g, input, target, weight, pos_weight, reduction): p = g.op("Constant", value_t=torch.tensor([1])) sig_x = opset9.sigmoid(g, input) log_sig_x = opset9.log(g, sig_x) sub_1_x = opset9.sub(g, p, sig_x) sub_1_y = opset9.sub(g, p, target) log_1_x = opset9.log(g, sub_1_x) if pos_weight is None or symbolic_helper._is_none(pos_weight): output = opset9.neg( g, opset9.add(g, opset9.mul(g, target, log_sig_x), opset9.mul(g, sub_1_y, log_1_x)), ) else: output = opset9.neg( g, opset9.add( g, opset9.mul(g, opset9.mul(g, target, log_sig_x), pos_weight), opset9.mul(g, sub_1_y, log_1_x), ), ) if weight is not None and not symbolic_helper._is_none(weight): output = opset9.mul(g, weight, output) reduction = symbolic_helper._maybe_get_const(reduction, "i") if reduction == 0: return output elif reduction == 1: return g.op("ReduceMean", output, keepdims_i=0) elif reduction == 2: return g.op("ReduceSum", output, keepdims_i=0) else: return symbolic_helper._onnx_unsupported( "binary_cross_entropy_with_logits with reduction other than none, mean, or sum", input, )
def verify_inferred_shape(graph): # Check every node in graph has type properly assigned. for n in graph.nodes(): for out in n.outputs(): if not _is_tensor_list(out) and not _is_tensor( out) and not _is_none(out): raise RuntimeError( "Output of node is neither type Tensor nor type list of Tensor: ", out) if _is_tensor(out) and out.type().scalarType() is None: raise RuntimeError( "Output of node does not have type assigned", out) if _is_tensor(out) and out.type().dim() is None: raise RuntimeError( "Output of node does not have shape assigned", out)
def binary_cross_entropy_with_logits(g, self, target, weight, pos_weight, reduction): # If weight is not None, we need to check if it requires grad and add gradient graph accordingly. # But current custom_gradient_registry doesn't support such None checking, # So doesn't support non-None weight for now. if weight is None or sym_help._is_none(weight): return g.op("org.pytorch.aten::ATen", self, target, weight, pos_weight, reduction, operator_s='aten::binary_cross_entropy_with_logits') from torch.onnx.symbolic_opset12 import binary_cross_entropy_with_logits as bce return bce(g, self, target, weight, pos_weight, reduction)
def index(g, self, index): if sym_help._operator_export_type == torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK: return g.op("ATen", self, index, operator_s="index") if sym_help._is_packed_list(index): indices = sym_help._unpack_list(index) else: indices = [index] # Handle single mask index. if len(indices) == 1: index = indices[0] if not sym_help._is_none(index) and (index.type().scalarType() == "Bool" or index.type().scalarType() == "Byte"): from torch.onnx.symbolic_opset9 import nonzero index = nonzero(g, index) return g.op("GatherND", self, index) from torch.onnx.symbolic_opset9 import index as index_opset9 return index_opset9(g, self, index)
def index(g, self, index): if symbolic_helper.is_caffe2_aten_fallback(): return g.at("index", self, index, overload_name="Tensor") if symbolic_helper._is_packed_list(index): indices = symbolic_helper._unpack_list(index) else: indices = [index] # Handle single mask index. if len(indices) == 1: index = indices[0] if not symbolic_helper._is_none(index) and ( index.type().scalarType() == "Bool" or index.type().scalarType() == "Byte" ): index = opset9.nonzero(g, index) return g.op("GatherND", self, index) return opset9.index(g, self, index)
def index(g, self, index): if sym_help.is_caffe2_aten_fallback(): return g.at("index", self, index, overload_name="Tensor") if sym_help._is_packed_list(index): indices = sym_help._unpack_list(index) else: indices = [index] # Handle single mask index. if len(indices) == 1: index = indices[0] if not sym_help._is_none(index) and ( index.type().scalarType() == "Bool" or index.type().scalarType() == "Byte"): from torch.onnx.symbolic_opset9 import nonzero index = nonzero(g, index) return g.op("GatherND", self, index) from torch.onnx.symbolic_opset9 import index as index_opset9 return index_opset9(g, self, index)
def embedding_bag(g, embedding_matrix, indices, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights, include_last_offset, padding_idx): if scale_grad_by_freq and sym_help._training_mode: return sym_help._onnx_unsupported( 'embedding_bag with scale_grad_by_freq for training mode') if padding_idx is not None and padding_idx >= 0: raise RuntimeError('embedding_bag with padding_idx') loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) zero = g.op("Constant", value_t=torch.tensor([0])) indices_len = sym_help._unsqueeze_helper( g, sym_help._size_helper(g, indices, g.op("Constant", value_t=torch.tensor(0))), [0]) if not include_last_offset: offsets = [offsets, indices_len] offsets = g.op("Concat", *offsets, axis_i=0) # Offsets holds the starting index position of each bag. So we create a list of the indices slices (determined by # offsets) and gather those indices in indices_row. Then we use this subset of indices to gather from embeddings. # The embeddings output is a loop scan output, so we can avoid creating a sequence and inserting elements in. offsets_starts = sym_help._slice_helper(g, offsets, axes=[0], starts=[0], ends=[maxsize], steps=[1]) offsets_ends = sym_help._slice_helper(g, offsets, axes=[0], starts=[1], ends=[maxsize], steps=[1]) loop_len = sym_help._size_helper(g, offsets_ends, g.op("Constant", value_t=torch.tensor(0))) loop = g.op("Loop", loop_len, loop_condition) loop_block = _add_block(loop.node()) block_input_iter = _add_input_to_block(loop_block) cond = _add_input_to_block(loop_block) indices_start = loop_block.op("Gather", offsets_starts, block_input_iter, axis_i=0) indices_end = loop_block.op("Gather", offsets_ends, block_input_iter, axis_i=0) indices_start = sym_help._unsqueeze_helper(loop_block, indices_start, [0]) indices_end = sym_help._unsqueeze_helper(loop_block, indices_end, [0]) indices_row = loop_block.op("Slice", indices, indices_start, indices_end, zero) embeddings = loop_block.op("Gather", embedding_matrix, indices_row, axis_i=0) if not sym_help._is_none(per_sample_weights): per_sample_weights_row = loop_block.op("Slice", per_sample_weights, indices_start, indices_end, zero) per_sample_weights_row = sym_help._unsqueeze_helper( loop_block, per_sample_weights_row, [1]) embeddings = loop_block.op("Mul", embeddings, per_sample_weights_row) if mode == 0: embeddings = sym_help._reducesum_helper(loop_block, embeddings, axes_i=[0], keepdims_i=0) elif mode == 1: embeddings = loop_block.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0) else: embeddings = loop_block.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0) cond_out = loop_block.op("Cast", loop_condition, to_i=9) _add_output_to_block(loop_block, cond_out) _add_output_to_block(loop_block, embeddings) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return loop.node().output(), None, None, None
def repeat_interleave(g, self, repeats, dim=None): from torch.onnx.symbolic_opset9 import reshape input = self final_dim = dim # if dim is None flatten # By default, use the flattened input array, and return a flat output array if sym_help._is_none(dim): input = reshape(g, self, g.op("Constant", value_t=torch.tensor([-1]))) dim = 0 else: dim = sym_help._maybe_get_scalar(dim) repeats_dim = sym_help._get_tensor_rank(repeats) repeats_sizes = sym_help._get_tensor_sizes(repeats) input_sizes = sym_help._get_tensor_sizes(input) if repeats_dim is None: raise RuntimeError( 'Unsupported: ONNX export of repeat_interleave for unknown ' 'repeats rank.') if repeats_sizes is None: raise RuntimeError( 'Unsupported: ONNX export of repeat_interleave for unknown ' 'repeats size.') if input_sizes is None: raise RuntimeError( 'Unsupported: ONNX export of repeat_interleave for unknown ' 'input size.') # Handle cases where dim is negative if dim < 0: dim += len(input_sizes) output_sizes = input_sizes.copy() perm_i = [0] for idx, input_size in enumerate(input_sizes): perm_i.append(idx + 1) if input_size is None: output_sizes[idx], input_sizes[idx] = 0, -1 perm_i[0], perm_i[dim] = perm_i[dim], perm_i[0] # Cases when repeats is a single value tensor and dim has unknown input size if (repeats_dim == 0 or (repeats_dim == 1 and repeats_sizes[0] == 1)) and output_sizes[dim] == 0: if not sym_help._is_tensor(repeats): repeats = g.op("Constant", value_t=torch.LongTensor(repeats)) reps = sym_help._size_helper(g, input, dim) reps = unsqueeze(g, reps, 0) repeats = g.op("Expand", repeats, reps) # There are cases when the repeats are 1-d tensor with multiple repeats, but dim # provided along one of the dynamic axes provided. A simple example would be # input.shape -> [1, 1, *] where * represents the dynamic axes, and dim = 2 # Now, repeat interleaving can be performed in pytorch when the value of * matches # with the number of elements in repeat, for example if * -> 2, number of repeats # should be 2 as well. else: return torch.onnx.symbolic_opset9.repeat_interleave( g, self, repeats, final_dim) reps_like = g.op("ConstantOfShape", g.op("Shape", repeats), value_t=torch.tensor([1], dtype=torch.long)) r_splits = split(g, repeats, reps_like, 0) i_splits = split(g, input, reps_like, dim) output_sizes[dim], input_sizes[dim] = -1, 1 # Create a loop to iterate over each value along the dimension # and perform individual interleaving using the repeats tensor # Loop is of the following pattern # input (trip_count, cond) # int trip_count = ...; # bool cond = ...; # for (int i=0; i < trip_count && cond; ++i) { # cond = ...; # } # Loop conditions loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) loop_len = reps loop = g.op("Loop", loop_len, loop_condition) # Loop inputs loop_block = _add_block(loop.node()) block_input_iter = _add_input_to_block(loop_block) cond = _add_input_to_block(loop_block) r_split = loop_block.op("SequenceAt", r_splits, block_input_iter) i_split = loop_block.op("SequenceAt", i_splits, block_input_iter) i_split = unsqueeze(loop_block, i_split, dim + 1) r_concat = [ loop_block.op("Constant", value_t=torch.LongTensor(input_sizes[:dim + 1])), r_split, loop_block.op("Constant", value_t=torch.LongTensor(input_sizes[dim + 1:])) ] r_concat = loop_block.op("Concat", *r_concat, axis_i=0) i_split = expand(loop_block, i_split, r_concat, None) i_split = reshape(loop_block, i_split, g.op("Constant", value_t=torch.LongTensor(output_sizes))) # Loop outputs cond_out = loop_block.op("Cast", loop_condition, to_i=9) _add_output_to_block(loop_block, cond_out) _add_output_to_block(loop_block, i_split) loop_out = loop.node().output() # In this loop, the outputs are scan outputs and are concatenated along # the zero'th dimension (by default). In order to avoid this and concatenate # along the dimension provided, some post-processing is required loop_out = g.op("Transpose", loop_out, perm_i=perm_i) return reshape(g, loop_out, g.op("Constant", value_t=torch.LongTensor(output_sizes)))
def embedding_bag(g, embedding_matrix, indices, offsets, scale_grad_by_freq, mode, sparse, per_sample_weights, include_last_offset): if scale_grad_by_freq and sym_help._training_mode: return sym_help._onnx_unsupported( 'embedding_bag with scale_grad_by_freq for training mode') from torch.onnx.symbolic_opset9 import size, div, select # Check if initial indices was 2D. In functional.py: # offsets is set to torch.arange(0, indices.numel(), indices.size(1)) # Then indices is reshaped to 1D: indices.reshape(-1) if len(list(indices.node().inputs())) > 0 and indices.node().inputs().__next__().type().sizes() is not None \ and len(indices.node().inputs().__next__().type().sizes()) == 2: # Assert include_last_offset is False assert not include_last_offset embeddings = g.op("Gather", embedding_matrix, indices) dim_0 = size(g, offsets, g.op("Constant", value_t=torch.LongTensor([0]))) dim_1 = div( g, size(g, indices, g.op("Constant", value_t=torch.LongTensor([0]))), dim_0) dim_2 = g.op("Constant", value_t=torch.LongTensor([-1])) shape = [dim_0, dim_1, dim_2] shape = g.op("Concat", *shape, axis_i=0) if not sym_help._is_none(per_sample_weights): per_sample_weights = g.op("Unsqueeze", per_sample_weights, axes_i=[1]) embeddings = g.op("Mul", embeddings, per_sample_weights) embeddings = g.op("Reshape", embeddings, shape) if mode == 0: embeddings = g.op("ReduceSum", embeddings, axes_i=[1], keepdims_i=0) elif mode == 1: embeddings = g.op("ReduceMean", embeddings, axes_i=[1], keepdims_i=0) else: embeddings = g.op("ReduceMax", embeddings, axes_i=[1], keepdims_i=0) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return embeddings, None, None, None elif offsets.type().sizes() is not None: if include_last_offset: offset_len = offsets.type().sizes()[0] - 1 offsets_extended = offsets else: offset_len = offsets.type().sizes()[0] offsets_extended = [ offsets, g.op("Constant", value_t=torch.tensor([maxsize])) ] offsets_extended = g.op("Concat", *offsets_extended, axis_i=0) list_ = [] for i in range(offset_len): start_ = g.op("Unsqueeze", select(g, offsets_extended, torch.tensor(0), torch.tensor(i)), axes_i=[0]) end_ = g.op("Unsqueeze", select(g, offsets_extended, torch.tensor(0), torch.tensor(i + 1)), axes_i=[0]) axes_ = g.op("Constant", value_t=torch.tensor([0])) indices_row = g.op("Slice", indices, start_, end_, axes_) embeddings = g.op("Gather", embedding_matrix, indices_row) if not sym_help._is_none(per_sample_weights): per_sample_weights_row = g.op("Slice", per_sample_weights, start_, end_, axes_) per_sample_weights_row = g.op("Unsqueeze", per_sample_weights_row, axes_i=[1]) embeddings = g.op("Mul", embeddings, per_sample_weights_row) if mode == 0: embeddings = g.op("ReduceSum", embeddings, axes_i=[0], keepdims_i=0) elif mode == 1: embeddings = g.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0) else: embeddings = g.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0) embeddings = g.op("Unsqueeze", embeddings, axes_i=[0]) list_.append(embeddings) output = g.op("Concat", *list_, axis_i=0) # aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices. # But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag. return output, None, None, None else: return sym_help._onnx_unsupported( 'embedding_bag with unknown shape of indices')
def repeat_interleave(g, self, repeats, dim=None, output_size=None): input = self final_dim = dim # if dim is None flatten # By default, use the flattened input array, and return a flat output array if sym_help._is_none(dim): input = sym_help._reshape_helper( g, self, g.op("Constant", value_t=torch.tensor([-1]))) dim = 0 else: dim = sym_help._maybe_get_scalar(dim) repeats_dim = sym_help._get_tensor_rank(repeats) repeats_sizes = sym_help._get_tensor_sizes(repeats) input_sizes = sym_help._get_tensor_sizes(input) if repeats_dim is None: raise RuntimeError( "Unsupported: ONNX export of repeat_interleave for unknown " "repeats rank.") if repeats_sizes is None: raise RuntimeError( "Unsupported: ONNX export of repeat_interleave for unknown " "repeats size.") if input_sizes is None: raise RuntimeError( "Unsupported: ONNX export of repeat_interleave for unknown " "input size.") # Handle cases where dim is negative if dim < 0: dim += len(input_sizes) output_sizes = input_sizes.copy() for idx, input_size in enumerate(input_sizes): if input_size is None: output_sizes[idx], input_sizes[idx] = 0, -1 print(output_sizes, input_sizes) cond_dynamic_repeats = (repeats_dim == 1 and repeats_sizes[0] is None) # If input size is dynamic or repeats vector is dynamic if output_sizes[dim] == 0 or cond_dynamic_repeats: reps = sym_help._size_helper(g, input, dim) reps = unsqueeze(g, reps, 0) # Check if repeats vector is a single integer value # or a single dimension tensor with non-dynamic values if repeats_dim == 0 or (repeats_dim == 1 and repeats_sizes[0] == 1): if not sym_help._is_tensor(repeats): repeats = g.op("Constant", value_t=torch.LongTensor(repeats)) repeats = g.op("Expand", repeats, reps) # Check if repeats is dynamic # As repeats is dynamic, we use a where node as a substitute for the if statement # If repests_dim = 1, expand repeats otherwise use original tensor elif cond_dynamic_repeats: repeat_dim = sym_help._size_helper( g, repeats, g.op("Constant", value_t=torch.LongTensor([0]))) repeat_cond = g.op("Equal", repeat_dim, g.op("Constant", value_t=torch.LongTensor([1]))) repeats = where(g, repeat_cond, g.op("Expand", repeats, reps), repeats) # There are cases when the repeats are 1-d tensor with multiple repeats, but dim # provided along one of the dynamic axes provided. A simple example would be # input.shape -> [1, 1, *] where * represents the dynamic axes, and dim = 2 # Now, repeat interleaving can be performed in pytorch when the value of * matches # with the number of elements in repeat, for example if * -> 2, number of repeats # should be 2 as well. else: return torch.onnx.symbolic_opset9.repeat_interleave( g, self, repeats, final_dim) reps_like = g.op("ConstantOfShape", g.op("Shape", repeats), value_t=torch.tensor([1], dtype=torch.long)) r_splits = split(g, repeats, reps_like, 0) i_splits = split(g, input, reps_like, dim) output_sizes[dim], input_sizes[dim] = -1, 1 # Create a loop to iterate over each value along the dimension # and perform individual interleaving using the repeats tensor # Loop is of the following pattern # input (trip_count, cond) # int trip_count = ...; # bool cond = ...; # for (int i=0; i < trip_count && cond; ++i) { # cond = ...; # } # Loop conditions loop_condition = g.op("Constant", value_t=torch.tensor(1)) loop_condition = g.op("Cast", loop_condition, to_i=9) loop_len = reps # Create an empty sequence to store final expansions final_splits = g.op("SequenceEmpty") loop = g.op("Loop", loop_len, loop_condition, final_splits) # Loop inputs loop_block = _add_block(loop.node()) block_input_iter = _add_input_to_block(loop_block) cond = _add_input_to_block(loop_block) final_splits = _add_input_to_block(loop_block) r_split = loop_block.op("SequenceAt", r_splits, block_input_iter) i_split = loop_block.op("SequenceAt", i_splits, block_input_iter) i_split = unsqueeze(loop_block, i_split, dim + 1) r_concat = [ loop_block.op("Constant", value_t=torch.LongTensor(input_sizes[:dim + 1])), r_split, loop_block.op("Constant", value_t=torch.LongTensor(input_sizes[dim + 1:])) ] r_concat = loop_block.op("Concat", *r_concat, axis_i=0) i_split = expand(loop_block, i_split, r_concat, None) i_split = sym_help._reshape_helper( loop_block, i_split, g.op("Constant", value_t=torch.LongTensor(output_sizes))) final_splits = loop_block.op("SequenceInsert", final_splits, i_split) # Loop outputs cond_out = loop_block.op("Cast", loop_condition, to_i=9) _add_output_to_block(loop_block, cond_out) _add_output_to_block(loop_block, final_splits) loop_out = loop.node().output() loop_out = g.op("ConcatFromSequence", loop_out, axis_i=dim) return loop_out