def max_pool(data, kernel, stride, padding): """Perform max pooling on the data Parameters ---------- data : tvm.Tensor 4-D with shape [batch, channel, in_height, in_width] kernel : list/tuple of two ints Kernel size, or [kernel_height, kernel_width] stride : list/tuple of two ints Stride size, or [stride_height, stride_width] paddding : list/tuple of two ints Pad size, or [pad_height, pad_width] Returns ------- output : tvm.Tensor 4-D with shape [batch, channel, out_height, out_width] """ assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" kernel_height, kernel_width = kernel stride_height, stride_width = stride batch, channel, height, width = data.shape pad_top, pad_left, pad_down, pad_right = get_pad_tuple( padding, (kernel_height, kernel_width)) pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] temp = pad(data, pad_before, pad_after, name="pad_temp", pad_value=tvm.min_value("float32")) out_height = util.simplify((height - kernel_height + pad_top + pad_down) // stride_height + 1) out_width = util.simplify((width - kernel_width + pad_left + pad_right) // stride_width + 1) dheight = tvm.reduce_axis((0, kernel_height)) dwidth = tvm.reduce_axis((0, kernel_width)) return tvm.compute( (batch, channel, out_height, out_width), lambda i, c, h, w: tvm.max(temp[i, c, h * stride_height + dheight, w * stride_width + dwidth], axis=[dheight, dwidth]), tag="max_pool")
def _pool(i, c, ph, pw): roi = rois[i] batch_index = roi[0].astype('int32') roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[ 3], roi[4] roi_start_h = tvm.round(roi_start_h * spatial_scale).astype('int32') roi_start_w = tvm.round(roi_start_w * spatial_scale).astype('int32') roi_end_h = tvm.round(roi_end_h * spatial_scale).astype('int32') roi_end_w = tvm.round(roi_end_w * spatial_scale).astype('int32') # force malformed ROIs to be 1x1 roi_h = tvm.max(roi_end_h - roi_start_h + 1, tvm.const(1, 'int32')) roi_w = tvm.max(roi_end_w - roi_start_w + 1, tvm.const(1, 'int32')) bin_h = roi_h.astype(dtype) / pooled_size_h bin_w = roi_w.astype(dtype) / pooled_size_w # use epsilon to prevent floating point precision loss in floor/ceil epsilon = tvm.const(0.00001, dtype) hstart = tvm.floor(ph * bin_h + epsilon).astype('int32') wstart = tvm.floor(pw * bin_w + epsilon).astype('int32') hend = tvm.ceil((ph + 1) * bin_h - epsilon).astype('int32') wend = tvm.ceil((pw + 1) * bin_w - epsilon).astype('int32') hstart = tvm.min(tvm.max(hstart + roi_start_h, 0), height) wstart = tvm.min(tvm.max(wstart + roi_start_w, 0), width) hend = tvm.min(tvm.max(hend + roi_start_h, 0), height) wend = tvm.min(tvm.max(wend + roi_start_w, 0), width) non_empty = tvm.all(hstart < hend, wstart < wend) min_value = lambda dtype: tvm.if_then_else( non_empty, tvm.min_value(dtype), tvm.const(0.0, dtype)) # pylint: disable=unnecessary-lambda _max = tvm.comm_reducer(lambda x, y: tvm.make._OpMax(x, y), min_value, name='max') rh = tvm.reduce_axis((0, hend - hstart), 'rh') rw = tvm.reduce_axis((0, wend - wstart), 'rw') return _max(data[batch_index, c, hstart + rh, wstart + rw], axis=[rh, rw])
def argmax_init(idx_typ, val_typ): return tvm.const(-1, idx_typ), tvm.min_value(val_typ)
def fidentity(t0, t1): return tvm.const(-1, t0), tvm.min_value(t1)
def pool(data, kernel, stride, padding, pool_type, ceil_mode=False): """Perform pooling on the data Parameters ---------- data : tvm.Tensor 4-D with shape [batch, channel, in_height, in_width] kernel : list/tuple of two ints Kernel size, [kernel_height, kernel_width] stride : list/tuple of two ints Stride size, [stride_height, stride_width] paddding : list/tuple of two ints Pad size, [pad_height, pad_width] pool_type : str Pool type, 'max' or 'avg' ceil_mode : bool Whether to use ceil when caculate output size. Returns ------- output : tvm.Tensor 4-D with shape [batch, channel, out_height, out_width] """ assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" kernel_height, kernel_width = kernel stride_height, stride_width = stride batch, channel, height, width = data.shape pad_top, pad_left, pad_down, pad_right = get_pad_tuple( padding, (kernel_height, kernel_width)) if ceil_mode: # Additional padding to ensure we do ceil instead of floor when divide stride. pad_down += stride_height - 1 pad_right += stride_width - 1 pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] out_height = util.simplify((height - kernel_height + pad_top + pad_down) // stride_height + 1) out_width = util.simplify((width - kernel_width + pad_left + pad_right) // stride_width + 1) dheight = tvm.reduce_axis((0, kernel_height)) dwidth = tvm.reduce_axis((0, kernel_width)) if pool_type == 'max': temp = pad(data, pad_before, pad_after, name="pad_temp", \ pad_value=tvm.min_value(data.dtype)) return tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tvm.max(temp[n, c, h*stride_height+dheight, w*stride_width+dwidth], \ axis=[dheight, dwidth]), \ tag="pool_max") elif pool_type == 'avg': temp = pad(data, pad_before, pad_after, name="pad_temp", \ pad_value=tvm.const(0.).astype(data.dtype)) tsum = tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tvm.sum(temp[n, c, h*stride_height+dheight, w*stride_width+dwidth], \ axis=[dheight, dwidth]), \ tag="pool_avg") return tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tsum[n, c, h, w] / (kernel_height*kernel_width), \ tag=tag.ELEMWISE) else: raise ValueError("Pool type should be 'avg' or 'max'.")
def _argmax_init(idx_typ, val_typ): """Initial ind and val of argmax""" return tvm.const(-1, idx_typ), tvm.min_value(val_typ)
def pool_nchw(data, kernel, stride, padding, pool_type, ceil_mode=False): """Perform pooling on the data in NCHW layout Parameters ---------- data : tvm.Tensor 4-D with shape [batch, channel, in_height, in_width] kernel : list/tuple of two ints Kernel size, [kernel_height, kernel_width] stride : list/tuple of two ints Stride size, [stride_height, stride_width] paddding : list/tuple of two ints Pad size, [pad_height, pad_width] pool_type : str Pool type, 'max' or 'avg' ceil_mode : bool Whether to use ceil when caculate output size. Returns ------- output : tvm.Tensor 4-D with shape [batch, channel, out_height, out_width] """ assert len(data.shape) == 4, "only support 4-dim pooling" assert len(stride) == 2, "only support 2-dim stride" kernel_height, kernel_width = kernel stride_height, stride_width = stride batch, channel, height, width = data.shape pad_top, pad_left, pad_down, pad_right = get_pad_tuple( padding, (kernel_height, kernel_width)) if ceil_mode: # Additional padding to ensure we do ceil instead of floor when divide stride. pad_down += stride_height -1 pad_right += stride_width - 1 pad_before = [0, 0, pad_top, pad_left] pad_after = [0, 0, pad_down, pad_right] out_height = util.simplify((height - kernel_height + pad_top + pad_down) // stride_height + 1) out_width = util.simplify((width - kernel_width + pad_left + pad_right) // stride_width + 1) dheight = tvm.reduce_axis((0, kernel_height)) dwidth = tvm.reduce_axis((0, kernel_width)) if pool_type == 'max': temp = pad(data, pad_before, pad_after, name="pad_temp", \ pad_value=tvm.min_value(data.dtype)) return tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tvm.max(temp[n, c, h*stride_height+dheight, w*stride_width+dwidth], \ axis=[dheight, dwidth]), \ tag="pool_max") elif pool_type == 'avg': temp = pad(data, pad_before, pad_after, name="pad_temp", \ pad_value=tvm.const(0.).astype(data.dtype)) tsum = tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tvm.sum(temp[n, c, h*stride_height+dheight, w*stride_width+dwidth], \ axis=[dheight, dwidth]), \ tag="pool_avg") return tvm.compute((batch, channel, out_height, out_width), \ lambda n, c, h, w: \ tsum[n, c, h, w] / (kernel_height*kernel_width), \ tag=tag.ELEMWISE) else: raise ValueError("Pool type should be 'avg' or 'max'.")
def pool3d_ncdhw_python(np_data, kernel, strides, padding, out_shape, pool_type, count_include_pad=True, ceil_mode=False, dtype="float32"): """baseline for max_pool3d and avg_pool3d, default layout is "NCDHW""" in_n, in_c, in_d, in_h, in_w = in_shape = np_data.shape k_d, k_h, k_w = kernel s_d, s_h, s_w = strides pf, pt, pl, pk, pb, pr = padding if ceil_mode: assert out_shape[2] == int( math.ceil(float(in_shape[2] - k_d + pf + pk) / s_d) + 1) assert out_shape[3] == int( math.ceil(float(in_shape[3] - k_h + pt + pb) / s_h) + 1) assert out_shape[4] == int( math.ceil(float(in_shape[4] - k_w + pl + pr) / s_w) + 1) else: assert out_shape[2] == int( math.floor(float(in_shape[2] - k_d + pf + pk) / s_d) + 1) assert out_shape[3] == int( math.floor(float(in_shape[3] - k_h + pt + pb) / s_h) + 1) assert out_shape[4] == int( math.floor(float(in_shape[4] - k_w + pl + pr) / s_w) + 1) fill_value = tvm.const(0.0, dtype).value if not (count_include_pad) and pool_type == 'max': fill_value = tvm.min_value(dtype).value pad_np = np.full(shape=(in_n, in_c, in_d + pf + pk, in_h + pt + pb, in_w + pl + pr), fill_value=fill_value, dtype=dtype) no_zero = (range(in_n), range(in_c), (range(pf, in_d + pf)), (range(pt, in_h + pt)), (range(pl, in_w + pl))) pad_np[np.ix_(*no_zero)] = np_data ret_np = np.zeros(shape=out_shape).astype(dtype) if pool_type == 'avg': for k in range(out_shape[2]): for i in range(out_shape[3]): for j in range(out_shape[4]): if count_include_pad: ret_np[:, :, k, i, j] = \ np.mean(pad_np[:, :, k * s_d: k * s_d + k_d, i * s_h: i * s_h + k_h, j * s_w: j * s_w + k_w], axis=(2, 3, 4)) else: pad_count = np.sum(pad_np[:, :, k * s_d:k * s_d + k_d, i * s_h:i * s_h + k_h, j * s_w:j * s_w + k_w] > 0, axis=(2, 3, 4)) ret_np[:, :, k, i, j] = np.sum( pad_np[:, :, k * s_d:k * s_d + k_d, i * s_h:i * s_h + k_h, j * s_w:j * s_w + k_w], axis=(2, 3, 4)) / np.maximum(pad_count, 1) elif pool_type == 'max': for k in range(out_shape[2]): for i in range(out_shape[3]): for j in range(out_shape[4]): ret_np[:, :, k, i, j] = np.max(pad_np[:, :, k * s_d:k * s_d + k_d, i * s_h:i * s_h + k_h, j * s_w:j * s_w + k_w], axis=(2, 3, 4)) else: raise ValueError("pool type {} is not supported".format(pool_type)) ret_np = np.maximum(ret_np, fill_value) return ret_np
def min_value(dtype): return tvm.expr.Select(non_empty, tvm.min_value(dtype), tvm.const(0.0, dtype))