def get_iny_inx(y, x, image_height, image_width, target_height, target_width, coordinate_transformation_mode): """ Infer input x,y from output x,y with various coordinate transformation methods """ scale_y = te.div(image_height.astype("float"), target_height.astype("float")) scale_x = te.div(image_width.astype("float"), target_width.astype("float")) if coordinate_transformation_mode == "half_pixel": in_y = (y + 0.5) * scale_y - 0.5 in_x = (x + 0.5) * scale_x - 0.5 elif coordinate_transformation_mode == "align_corners": in_y = (image_height - 1).astype("float") / (target_height - 1) * y in_x = (image_width - 1).astype("float") / (target_width - 1) * x elif coordinate_transformation_mode == "asymmetric": in_y = scale_y * y in_x = scale_x * x elif coordinate_transformation_mode == "pytorch_half_pixel": in_y = te.if_then_else(target_height > 1, (y + 0.5) * scale_y - 0.5, 0.0) in_x = te.if_then_else(target_width > 1, (x + 0.5) * scale_x - 0.5, 0.0) elif coordinate_transformation_mode == "tf_half_pixel_for_nn": in_y = (y + 0.5) * scale_y in_x = (x + 0.5) * scale_x else: raise ValueError( "Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) return in_y, in_x
def test_average_pool(): for i in range(5): N, H, W, CO, CI, KH, KW = [np.random.randint(10, 32) for _ in range(7)] (input_dtype, acc_dtype) = random_dtypes() D = te.placeholder((N, CI, H, W), dtype=input_dtype) KH = min(H, KH) KW = min(W, KW) kh = te.reduce_axis((0, KH)) kw = te.reduce_axis((0, KW)) OH = (H - KH) + 1 OW = (W - KW) + 1 C = te.compute( (N, CO, OH, OW), lambda n, co, h, w: te.sum( te.div(D[n][co][h + kh][w + kw].astype(acc_dtype), (KW * KH)), axis=[kh, kw] ), ) s = te.create_schedule([C.op]) assert compute_flop(s) == 2 * N * CO * OH * OW * KH * KW
def get_inx(x, image_width, target_width, coordinate_transformation_mode, start_x=0, end_x=-1): """Infer input x from output x with various coordinate transformation methods""" scale_x = te.div(image_width.astype("float"), target_width.astype("float")) if coordinate_transformation_mode == "half_pixel": in_x = (x + 0.5) * scale_x - 0.5 elif coordinate_transformation_mode == "align_corners": in_x = (image_width - 1).astype("float") / (target_width - 1) * x elif coordinate_transformation_mode == "asymmetric": in_x = scale_x * x elif coordinate_transformation_mode == "pytorch_half_pixel": in_x = te.if_then_else(target_width > 1, (x + 0.5) * scale_x - 0.5, 0.0) elif coordinate_transformation_mode == "tf_half_pixel_for_nn": in_x = (x + 0.5) * scale_x elif coordinate_transformation_mode == "tf_crop_and_resize": in_x = te.if_then_else( target_width > 1, start_x * (image_width - 1) + x * (end_x - start_x) * (image_width - 1).astype("float") / (target_width - 1), 0.5 * (start_x + end_x) * (image_width - 1), ) else: raise ValueError( "Unsupported coordinate_transformation_mode: {}".format( coordinate_transformation_mode)) return in_x
def test_reduce_simplify(): ck = CanonicalChecker() k = te.reduce_axis((0, 10), name="k") j = te.reduce_axis((-5, 3), name="j") A = te.placeholder((10,), name="A") ck.verify(te.sum(tvm.tir.Select(k + j < 12, k + j, 0), [k, j]), te.sum(k + j, [k, j])) ck.verify(te.sum(A[3], []), A[3]) ck.verify(te.sum(A[3], [], where=k > 12, init=1.0), tvm.tir.const(1.0, dtype="float32")) # The rule below is not typical, removed for now ck.verify(te.sum(te.div(k, 10), k), te.sum(tvm.tir.const(0, "int32"), k))
def check_llvm_reciprocal(n): A = te.placeholder((n, ), name="A") B = te.compute((n, ), lambda i: te.div(1.0, (1e37 * A[i])), name="B") s = te.create_schedule(B.op) f = tvm.build(s, [A, B], "llvm") a = tvm.nd.array(np.full((n, ), 100, "float32")) b = tvm.nd.empty((n, ), "float32") f(a, b) tvm.testing.assert_allclose(b.numpy(), np.zeros((n, ), "float32"))
def matmul(): # Algorithm k = te.reduce_axis((0, K), 'k') A = te.placeholder((M, K), name='A') B = te.placeholder((K, N), name='B') ##### define space begin ##### cfg = autotvm.get_config() cfg.define_split("tile_x", M, num_outputs=3) cfg.define_split("tile_y", N, num_outputs=3) cfg.define_split("tile_k", K, num_outputs=2) ##### define space end ##### # We have to re-write the algorithm slightly. bn = cfg["tile_y"].size[-1] packedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name='packedB') C = te.compute( (M, N), lambda x, y: te.sum(A[x, k] * packedB[te.div(y, bn), k, y % bn], axis=k), name='C') s = te.create_schedule(C.op) x, y = s[C].op.axis k, = s[C].op.reduce_axis # schedule according to config # Allocate write cache CC = s.cache_write(C, 'global') xt, xo, xi = cfg["tile_x"].apply(s, C, x) yt, yo, yi = cfg["tile_y"].apply(s, C, y) s[C].reorder(xt, yt, xo, yo, xi, yi) xyt = s[C].fuse(xt, yt) # parallel s[C].parallel(xyt) xyo = s[C].fuse(xo, yo) s[C].unroll(xi) s[C].vectorize(yi) # Write cache is computed at xyo s[CC].compute_at(s[C], xyo) # New inner axes xc, yc = s[CC].op.axis k, = s[CC].op.reduce_axis ko, ki = cfg["tile_k"].apply(s, CC, k) s[CC].reorder(ko, xc, ki, yc) s[CC].unroll(xc) s[CC].unroll(ki) s[CC].vectorize(yc) # cfg.define_reorder("reorder", [xc, ki, yc], "all") # cfg["reorder"].apply(s, CC, [xc, ki, yc]) # cfg.define_annotate('ann', [xc, ki, yc], policy='try_unroll_vec') # cfg['ann'].apply(s, CC, [xc, ki, yc]) x, y, z = s[packedB].op.axis s[packedB].vectorize(z) s[packedB].parallel(x) return s, [A, B, C]