def _sample(i, c, ph, pw): roi = rois[i] batch_index = roi[0].astype('int32') roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[ 3], roi[4] roi_start_h *= spatial_scale roi_end_h *= spatial_scale roi_start_w *= spatial_scale roi_end_w *= spatial_scale # force malformed ROIs to be 1x1 roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype)) roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype)) bin_h = roi_h / pooled_size_h bin_w = roi_w / pooled_size_w if sample_ratio > 0: roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const( sample_ratio, 'int32') else: roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype('int32') roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype('int32') count = roi_bin_grid_h * roi_bin_grid_w rh = te.reduce_axis((0, roi_bin_grid_h)) rw = te.reduce_axis((0, roi_bin_grid_w)) roi_start_h += ph * bin_h roi_start_w += pw * bin_w return te.sum( _bilinear(batch_index, c, roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count, axis=[rh, rw])
def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): """Copy output after applying nms to continuous memory. Parameters ---------- sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. Returns ------- stmt : Stmt The result IR statement. """ batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape) rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch nthread_tx = batch tx = te.thread_axis("threadIdx.x") ib = tvm.tir.ir_builder.create() ib.scope_attr(tx, "thread_extent", nthread_tx) i = ib.allocate('int32', (1, ), 'i', scope='local') i[0] = 0 p_sorted_bbox = ib.buffer_ptr(sorted_bbox_buf) p_remove = ib.buffer_ptr(remove_mask_buf) p_out = ib.buffer_ptr(out_buf) b = tx nkeep = ib.allocate('int32', (1, ), 'nkeep', scope='local') nkeep[0] = 0 # number of bbox after nms with ib.for_range(0, num_bbox) as j: with ib.if_scope(p_remove[b * num_bbox + j] == False): nkeep[0] += 1 with ib.if_scope(nkeep[0] > 0): with ib.for_range( 0, te.ceil( tvm.tir.const(rpn_post_nms_top_n, 'float32') / nkeep[0]).astype('int32')): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[0]) * 5 with ib.if_scope( tvm.tir.all(i[0] < rpn_post_nms_top_n, p_remove[(b * num_bbox + j)] == False)): p_out[offset_i] = tvm.tir.Cast('float32', b) with ib.for_range(0, 4, for_type='unroll') as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[0] = i[0] + 1 body = ib.get() return body
def prepare_output_ir(sorted_bbox_buf, remove_mask_buf, out_buf): """Copy output after applying nms to continuous memory. Parameters ---------- sorted_bbox_buf : tvm.te.schedule.Buffer 3-D with shape [batch, num_bbox, 5]. The last dimension is in format of [w_start, h_start, w_end, h_end, score]. remove_mask_buf : tvm.te.schedule.Buffer 2-D with shape [batch, num_bbox]. Boolean mask of whether a bounding box should be removed. out_buf : tvm.te.schedule.Buffer 2-D with shape [batch * rpn_post_nms_top_n, 5]. The last dimension is in format of [batch_index, w_start, h_start, w_end, h_end]. Returns ------- stmt : Stmt The result IR statement. """ batch, num_bbox, _ = get_const_tuple(sorted_bbox_buf.shape) rpn_post_nms_top_n = get_const_int(out_buf.shape[0]) // batch ib = tvm.tir.ir_builder.create() i = ib.allocate("int32", (batch,), "i", scope="local") p_sorted_bbox = ib.buffer_ptr(sorted_bbox_buf) p_remove = ib.buffer_ptr(remove_mask_buf) p_out = ib.buffer_ptr(out_buf) nkeep = ib.allocate("int32", (batch,), "nkeep", scope="local") with ib.for_range(0, batch) as b: nkeep[b] = 0 i[b] = 0 with ib.for_range(0, num_bbox) as j: with ib.for_range(0, batch) as b: with ib.if_scope(p_remove[b * num_bbox + j] == False): nkeep[b] += 1 with ib.for_range(0, batch) as b: with ib.if_scope(nkeep[b] > 0): with ib.for_range( 0, te.ceil(tvm.tir.const(rpn_post_nms_top_n, "float32") / nkeep[b]).astype("int32") ): with ib.for_range(0, num_bbox) as j: offset_j = (b * num_bbox + j) * 5 offset_i = (b * rpn_post_nms_top_n + i[b]) * 5 with ib.if_scope( tvm.tir.all( i[b] < rpn_post_nms_top_n, p_remove[(b * num_bbox + j)] == False ) ): p_out[offset_i] = tvm.tir.Cast("float32", b) with ib.for_range(0, 4, kind="unroll") as k: p_out[offset_i + k + 1] = p_sorted_bbox[offset_j + k] i[b] = i[b] + 1 body = ib.get() return body
def get_closest_index(in_x, rounding_method, boxes): """get the closest index to a value based on a certain rounding method""" if rounding_method == "round" or boxes is not None: closest_x_index = te.round(in_x).astype("int32") elif rounding_method == "round_prefer_floor": closest_x_index = te.ceil(in_x - 0.5).astype("int32") elif rounding_method == "round_prefer_ceil": closest_x_index = te.floor(in_x + 0.5).astype("int32") elif rounding_method == "floor": # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 closest_x_index = te.floor(in_x + epsilon).astype("int32") elif rounding_method == "ceil": # Subract epsilon from ceil to prevent gpu rounding errors. epsilon = 1e-5 closest_x_index = te.ceil(in_x - epsilon).astype("int32") else: raise ValueError("Uknown rounding method: {}".format(rounding_method)) return closest_x_index
def _pool(i, c, ph, pw): roi = rois[i] batch_index = roi[0].astype("int32") roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[ 3], roi[4] roi_start_h = te.round(roi_start_h * spatial_scale).astype("int32") roi_start_w = te.round(roi_start_w * spatial_scale).astype("int32") roi_end_h = te.round(roi_end_h * spatial_scale).astype("int32") roi_end_w = te.round(roi_end_w * spatial_scale).astype("int32") # force malformed ROIs to be 1x1 roi_h = tvm.te.max(roi_end_h - roi_start_h + 1, tvm.tir.const(1, "int32")) roi_w = tvm.te.max(roi_end_w - roi_start_w + 1, tvm.tir.const(1, "int32")) bin_h = roi_h.astype(dtype) / pooled_size_h bin_w = roi_w.astype(dtype) / pooled_size_w # use epsilon to prevent floating point precision loss in floor/ceil epsilon = tvm.tir.const(0.00001, dtype) hstart = te.floor(ph * bin_h + epsilon).astype("int32") wstart = te.floor(pw * bin_w + epsilon).astype("int32") hend = te.ceil((ph + 1) * bin_h - epsilon).astype("int32") wend = te.ceil((pw + 1) * bin_w - epsilon).astype("int32") hstart = tvm.te.min(tvm.te.max(hstart + roi_start_h, 0), height) wstart = tvm.te.min(tvm.te.max(wstart + roi_start_w, 0), width) hend = tvm.te.min(tvm.te.max(hend + roi_start_h, 0), height) wend = tvm.te.min(tvm.te.max(wend + roi_start_w, 0), width) non_empty = tvm.tir.all(hstart < hend, wstart < wend) min_value = lambda dtype: tvm.tir.if_then_else( non_empty, tvm.te.min_value(dtype), tvm.tir.const(0.0, dtype)) # pylint: disable=unnecessary-lambda _max = te.comm_reducer(lambda x, y: tvm.te.max(x, y), min_value, name="max") rh = te.reduce_axis((0, hend - hstart), "rh") rw = te.reduce_axis((0, wend - wstart), "rw") return _max(data[batch_index, c, hstart + rh, wstart + rw], axis=[rh, rw])
def ceil(x): """Take ceil of input x. Parameters ---------- x : tvm.te.Tensor Input argument. Returns ------- y : tvm.te.Tensor The result. """ return te.compute(x.shape, lambda *i: te.ceil(x(*i)))
def test_const_fold4(): x1 = tvm.tir.const(4, "int32") x2 = x1 + 5 tdiv = tvm.tir.truncdiv assert isinstance(x2, tvm.tir.IntImm) and x2.value == 9 x3 = tdiv(x2, 3) assert isinstance(x3, tvm.tir.IntImm) and x3.value == 3 x4 = x3 + 0.55 assert isinstance(x4, tvm.tir.FloatImm) and abs(x4.value - 3.55) < 1e-6 x5 = te.ceil(x4) assert isinstance(x5, tvm.tir.FloatImm) and x5.value == 4 x6 = x5.astype("int") assert isinstance(x6, tvm.tir.IntImm) and x6.value == 4, "x6={}".format(x6) y = (te.round((tvm.tir.const(6.5, "float32") - 1) / 1.5) + 2).astype("int") assert isinstance(y, tvm.tir.IntImm) and y.value == 6
def _sample_common( i, c, ph, pw, rois, pooled_size_h, pooled_size_w, spatial_scale, sample_ratio, dtype, avg_mode, bilinear_func, ): roi = rois[i] batch_index = roi[0].astype("int32") roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[ 3], roi[4] roi_start_h *= spatial_scale roi_end_h *= spatial_scale roi_start_w *= spatial_scale roi_end_w *= spatial_scale # force malformed ROIs to be 1x1 roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype)) roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype)) bin_h = roi_h / pooled_size_h bin_w = roi_w / pooled_size_w if sample_ratio > 0: roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, "int32") else: roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype("int32") roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype("int32") count = roi_bin_grid_h * roi_bin_grid_w rh = te.reduce_axis((0, roi_bin_grid_h)) rw = te.reduce_axis((0, roi_bin_grid_w)) roi_start_h += ph * bin_h roi_start_w += pw * bin_w if avg_mode: return te.sum( bilinear_func( batch_index, c, roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w, ) / count, axis=[rh, rw], ) # max mode return te.max( bilinear_func( batch_index, c, roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w, ), axis=[rh, rw], )
def test_basic_operation(): np.random.seed(0) shape = (10, 10) x = te.var("x", dtype='float32') k = te.reduce_axis((0, 10), name="k") l = te.reduce_axis((0, 10), name="l") A0 = te.placeholder(shape, name='A0') A1 = te.placeholder(shape, name='A1') zeros = np.zeros(shape) B = te.compute(shape, lambda i, j: A0[i, j], name='B') check_grad(B, [A0]) B = te.compute(shape, lambda i, j: A0[i, j] + A1[i, j], name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: A0[i, j] + A0[j, i], name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.floor(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.ceil(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.trunc(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: te.round(A0[i, j]), name='B') check_grad(B, A0, desired_grads=[zeros]) B = te.compute(shape, lambda i, j: A0[i, j] + te.exp(A0[j, i]), name='B') check_grad(B, A0) B = te.compute( shape, lambda i, j: te.log(0.1 + te.abs(A0[i, j] + te.exp(A0[j, i]))), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sigmoid(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.tanh(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sqrt(A0[i, j] * A0[i, j] * A0[j, i]), name='B') check_grad(B, A0, data_range=(0.1, 10)) B = te.compute(shape, lambda i, j: te.power(te.abs(A0[i, j]), A0[j, i]), name='B') check_grad(B, A0, data_range=(-4, 4)) B = te.compute(shape, lambda i, j: A0[i, j] * A0[j, i], name='B') check_grad(B, A0) B = te.compute((10, ), lambda i: te.sum(A0[i, k] * A0[k, i], axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.sum(A0[i, k] * A0[k, i] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: te.max(A0[i, k] * A0[k, j] + 5, axis=k), name='B') check_grad(B, A0) B = te.compute(shape, lambda i, j: A0[i, j] * (A1[j, i] + A0[j, i]), name='B') check_grad(B, [A0, A1]) B = te.compute(shape, lambda i, j: te.sum( A0[k, k] - A0[te.min(j + k, 9), j] * A0[i, k], axis=k), name='B') check_grad(B, A0) def fcombine(x, y): return x * y def fidentity(t0): return tvm.tir.const(1, t0) prod = te.comm_reducer(fcombine, fidentity, name='prod') B = te.compute((10, 10), lambda i, j: prod(A0[i, k] + A0[k, i], axis=k), name='B') check_grad(B, A0) X = te.placeholder((10, ), name='X') A = te.compute((10, ), lambda i: X[i] + X[9 - i]) B = te.compute((10, ), lambda i: X[i] * X[9 - i]) Y = topi.tensordot(A, B, 1) check_grad(Y, X)
def resize_bilinear(indices, data, image_height, image_width, target_height, target_width, boxes=None, box_indices=None, extrapolation_value=None, layout='NCHW', coordinate_transformation_mode="align_corners", out_dtype=None): """Perform resize operation with bilinear method on the data. For details about Bilinear interpolation please refer to https://en.wikipedia.org/wiki/Bilinear_interpolation. Parameters ---------- indices : tuple The indices of input data data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] image_height : integer Input image height image_width : integer Input image width target_height : integer The target resized image height target_width : integer The target resized image width boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. extrapolation_value: float, optional Value used for extrapolation, when applicable. layout: string, optional "NCHW", "NHWC", or "NCHWc". coordinate_transformation_mode: string, optional Describes how to transform the coordinate in the resized tensor to the coordinate in the original tensor. Refer to the ONNX Resize operator specification for details. Available options are "half_pixel", "align_corners" and "asymmetric". out_dtype: string, optional Type to return. If left None will be same as input type. Returns ------- output : out_dtype The computed result with type out_dtype """ def _cast_output(value, data_dtype="float32", out_dtype=None): if out_dtype: dtype = out_dtype else: dtype = data_dtype return value.astype(dtype) def _lerp(A, B, t): return A * (1.0 - t) + B * t n, c, y, x, cc, inum, ic = get_2d_indices(indices, layout=layout) box_idx = box_indices(n) if box_indices is not None else n if boxes is not None: y1, x1 = boxes(n, 0), boxes(n, 1) y2, x2 = boxes(n, 2), boxes(n, 3) in_h = (image_height - 1) * (y2 - y1) in_w = (image_width - 1) * (x2 - x1) h_scale = in_h.astype('float') / (target_height - 1) w_scale = in_w.astype('float') / (target_width - 1) in_y = y1 * (image_height - 1) + h_scale * y in_x = x1 * (image_width - 1) + w_scale * x else: if coordinate_transformation_mode == "align_corners": h_scale = (image_height - 1).astype('float') / (target_height - 1) w_scale = (image_width - 1).astype('float') / (target_width - 1) elif coordinate_transformation_mode in ["asymmetric", "half_pixel"]: h_scale = image_height.astype('float') / target_height w_scale = image_width.astype('float') / target_width else: raise ValueError( "Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) if coordinate_transformation_mode == "half_pixel": in_y = h_scale * (y + 0.5) - 0.5 in_x = w_scale * (x + 0.5) - 0.5 else: in_y = h_scale * y in_x = w_scale * x top_y_index = te.floor(in_y).astype('int32') bottom_y_index = te.ceil(in_y).astype('int32') y_lerp = in_y - top_y_index left_x_index = te.floor(in_x).astype('int32') right_x_index = te.ceil(in_x).astype('int32') x_lerp = in_x - left_x_index top_left = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, top_y_index, left_x_index, cc, inum, ic) top_right = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, top_y_index, right_x_index, cc, inum, ic) bottom_left = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, bottom_y_index, left_x_index, cc, inum, ic) bottom_right = get_2d_pixel(data, layout, boxes, image_height, image_width, box_idx, c, bottom_y_index, right_x_index, cc, inum, ic) top = _lerp(top_left, top_right, x_lerp) bottom = _lerp(bottom_left, bottom_right, x_lerp) value = _lerp(top, bottom, y_lerp) # use extrapolation_value if in_y/in_x is out of boundary if extrapolation_value is not None: out = tvm.tir.if_then_else( in_y < 0, extrapolation_value, tvm.tir.if_then_else(in_y > image_height - 1, extrapolation_value, value)) value = tvm.tir.if_then_else( in_x < 0, extrapolation_value, tvm.tir.if_then_else(in_x > image_width - 1, extrapolation_value, out)) return _cast_output(value, data.dtype, out_dtype=out_dtype)
def resize_nearest_neighbor( indices, data, image_height, image_width, target_height, target_width, boxes=None, box_indices=None, extrapolation_value=None, layout="NCHW", coordinate_transformation_mode="align_corners", rounding_method="", out_dtype=None, ): """Perform resize operation with nearest neighbor method on the data. For details about Nearest-neighbor interpolation please refer to https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. Parameters ---------- indices : tuple The indices of input data data : tvm.te.Tensor inputs is a 4-D tensor with shape [batch, channel, in_height, in_width] or [batch, in_height, in_width, channel] image_height : integer Input image height image_width : integer Input image width target_height : integer The target resized image height target_width : integer The target resized image width boxes : tvm.te.Tensor, optional A 2-D tensor of shape [num_boxes, 4]. Each row of the tensor specifies the coordinates of a box. box_indices : tvm.te.Tensor, optional A 1-D tensor of shape [num_boxes], box_indices[i] specifies the data that the i-th box refers to. extrapolation_value: float, optional Value used for extrapolation, when applicable. layout: string, optional "NCHW", "NHWC", or "NCHWc". coordinate_transformation_mode: string, optional Describes how to transform the coordinate in the resized tensor to the coordinate in the original tensor. Refer to the ONNX Resize operator specification for details. Available options are "half_pixel", "align_corners" and "asymmetric". rounding_method: string, optional indicates how to find the "nearest" pixel in nearest_neighbor method [round, floor, ceil] out_dtype: string, optional Type to return. If left None will be same as input type. Returns ------- output : out_dtype The computed result with type out_dtype """ if rounding_method == "": if coordinate_transformation_mode == "align_corners": rounding_method = "round" else: rounding_method = "floor" def _cast_output(value, data_dtype="float32", out_dtype=None): if out_dtype: dtype = out_dtype else: dtype = data_dtype return value.astype(dtype) n, c, y, x, cc, inum, ic = get_2d_indices(indices, layout) box_idx = box_indices(n) if box_indices is not None else n if boxes is not None: y1, x1 = boxes(n, 0), boxes(n, 1) y2, x2 = boxes(n, 2), boxes(n, 3) in_h = (image_height - 1) * (y2 - y1) in_w = (image_width - 1) * (x2 - x1) h_scale = in_h.astype("float") / (target_height - 1) w_scale = in_w.astype("float") / (target_width - 1) in_y = y1 * (image_height - 1) + h_scale * y in_x = x1 * (image_width - 1) + w_scale * x else: in_y, in_x = get_iny_inx( y, x, image_height, image_width, target_height, target_width, coordinate_transformation_mode, ) if rounding_method == "round" or boxes is not None: closest_x_index = te.round(in_x).astype("int32") closest_y_index = te.round(in_y).astype("int32") elif rounding_method == "round_prefer_floor": closest_x_index = te.ceil(in_x - 0.5).astype("int32") closest_y_index = te.ceil(in_y - 0.5).astype("int32") elif rounding_method == "round_prefer_ceil": closest_x_index = te.floor(in_x + 0.5).astype("int32") closest_y_index = te.floor(in_y + 0.5).astype("int32") elif rounding_method == "floor": # Add epsilon to floor to prevent gpu rounding errors. epsilon = 1e-5 closest_y_index = te.floor(in_y + epsilon).astype("int32") closest_x_index = te.floor(in_x + epsilon).astype("int32") elif rounding_method == "ceil": # Subract epsilon from ceil to prevent gpu rounding errors. epsilon = 1e-5 closest_y_index = te.ceil(in_y - epsilon).astype("int32") closest_x_index = te.ceil(in_x - epsilon).astype("int32") else: raise ValueError("Uknown rounding method: {}".format(rounding_method)) value = get_2d_pixel( data, layout, boxes, image_height, image_width, box_idx, c, closest_y_index, closest_x_index, cc, inum, ic, ) if extrapolation_value is not None: out = tvm.tir.if_then_else( in_y < 0, extrapolation_value, tvm.tir.if_then_else(in_y > image_height - 1, extrapolation_value, value), ) # use extrapolation_value if in_x is out of boundary value = tvm.tir.if_then_else( in_x < 0, extrapolation_value, tvm.tir.if_then_else(in_x > image_width - 1, extrapolation_value, out), ) return _cast_output(value, data.dtype, out_dtype=out_dtype)