def test_array_mul(extent=1024, target="llvm", dev_id=0, number=10, verbose=False): time_cost_lst = [] for N in range(1, extent + 1): ctx = tvm.device(target, dev_id) ary_ops, ary_bufs = array_mul(N) ary_inputs = [ tvm.nd.array( np.random.uniform(size=to_tuple(buf.shape)).astype(buf.dtype), ctx) for buf in ary_bufs[:-1] ] ary_inputs += [ tvm.nd.array(np.zeros(shape=to_tuple(buf.shape), dtype=buf.dtype), ctx) for buf in ary_bufs[-1:] ] s = tvm.te.create_schedule(ary_ops) func = tvm.build(s, ary_bufs, target) evaluator = func.time_evaluator(func.entry_name, ctx, number=number) cost = evaluator(*ary_inputs).mean * 1e3 # print("N=", N, "cost=", "%f(ms)"%cost, "(target=%s, dev_id=%d, number=%d)"%(target, dev_id, number)) time_cost_lst.append(cost) res_lst = [x / time_cost_lst[0] for x in time_cost_lst] print("array_mul |(target=%s, dev_id=%d, number=%d)" % (target, dev_id, number)) if verbose: for i, res in enumerate(res_lst): print("time_cost: ext=%d / ext=1 = %f" % (i + 1, res)) else: print("time_cost: ext=%d / ext=1 = %f" % (extent, res_lst[-1]))
def numpy_init(weight_list, *args): ''' the first argument is randomly initialized. All others are zero initialized. ''' weight_np = [ np.random.uniform(-1, 1, to_tuple(var.shape)).astype(dtype) for var in weight_list ] init = [weight_np] if len(args) > 0: for item in args: init.append( [np.zeros(to_tuple(var.shape), dtype=dtype) for var in item]) return init
def evaluate(name, s, bufs, target, dev_id, number, rpc_info): if rpc_info is not None: host = rpc_info.host port = rpc_info.port else: # local host = "0.0.0.0" port = 9090 # default port if host == "0.0.0.0": if LOCAL_RPC: use_rpc = True else: use_rpc = False else: use_rpc = True if use_rpc: remote = rpc.connect(host, port) ctx = remote.context(target, dev_id) else: ctx = tvm.context(target, dev_id) tvm_arys = [] for buf in bufs: shape = to_tuple(buf.shape) tmp = np.random.uniform(-10, 10, size=shape).astype(buf.dtype) tmp = tvm.nd.array(tmp, ctx) tvm_arys.append(tmp) try: func_file = "{}.tar".format(name) if rpc_info is not None and rpc_info.target_host is not None: func = tvm.build(s, bufs, target=target, target_host=rpc_info.target_host) else: func = tvm.build(s, bufs, target=target) if use_rpc: func.export_library(os.path.join(LIB_DIR, func_file)) remote.upload(os.path.join(LIB_DIR, func_file)) func = remote.load_module(func_file) evaluator = func.time_evaluator(func.entry_name, ctx, number=number) time_cost = evaluator(*tvm_arys).mean * 1e3 except Exception as e: print(e) time_cost = float("inf") finally: while len(tvm_arys) > 0: del tvm_arys[-1] if os.path.exists(os.path.join(LIB_DIR, func_file)): try: os.remove(os.path.join(LIB_DIR, func_file)) except Exception as e: print(e) elif os.path.exists(os.path.join(LIB_DIR, func_file + ".so")): try: os.remove(os.path.join(LIB_DIR, func_file)) except Exception as e: print(e) return time_cost
def evaluate(name, s, bufs, target, dev_id, number=10, rpc_info=None, result_generator=None): if rpc_info is not None: use_rpc = rpc_info.use_rpc target_host = rpc_info.target_host fcompile = rpc_info.fcompile else: use_rpc, target_host, fcompile = None, None, None remote = rpc_info.get_remote() dev = (remote if remote else tvm).device(target, dev_id) np_arys = [ np.random.uniform(-10, 10, size=to_tuple(buf.shape)).astype(buf.dtype) for buf in bufs ] tvm_arys = [tvm.nd.array(arr, dev) for arr in np_arys] func_file = f"{name}.so" time_cost = float("inf") try: func = tvm.build(s, bufs, target=target, target_host=target_host) if use_rpc: func.export_library(os.path.join(LIB_DIR, func_file), fcompile) remote.upload(os.path.join(LIB_DIR, func_file)) func = remote.load_module(func_file) func(*tvm_arys) if result_generator is not None: print("Test whether computed...") result = tvm_arys[-1].asnumpy() test_allclose(result, np_arys[-1], rtol=1e-3, print_diff=True) print("Test correctness...") expected = result_generator(np_arys) test_allclose(result, expected, rtol=1e-3, print_diff=True) evaluator = func.time_evaluator(func.entry_name, dev, number=number) time_cost = evaluator(*tvm_arys).mean * 1e3 except Exception as e: print(e) finally: while len(tvm_arys) > 0: del tvm_arys[-1] if os.path.exists(os.path.join(LIB_DIR, func_file)): try: os.remove(os.path.join(LIB_DIR, func_file)) except Exception as e: print(e) return time_cost
def init_weight(var): w_pth = torch.empty(*to_tuple(var.shape), dtype=torch.float64) if len(w_pth.shape) == 4: # Conv2d # NOTE: https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_ torch.nn.init.kaiming_normal_(w_pth, mode='fan_out', nonlinearity='relu') elif len(w_pth.shape) == 2: # Linear torch.nn.init.normal_(w_pth, mean=0, std=0.01) elif len(w_pth.shape) == 1: # bias torch.nn.init.constant_(w_pth, 0) else: raise NotImplementedError(f'Unrecognized weight shape: {var.shape}') return w_pth.numpy()
def __evaluate(s, bufs, target, dev_id, number=1, q=None): beg = time.time() for i in range(number): ctx = tvm.context(target, dev_id) tvm_arys = [] for arg in bufs: shape = to_tuple(arg.shape) tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype) tmp = tvm.nd.array(tmp, ctx) tvm_arys.append(tmp) try: func = tvm.build(s, bufs, target) func(*tvm_arys) except Exception as e: print("Oops") print(e) end = time.time() time_cost = (end - beg) * 1e3 / number if q: q.put(time_cost) return time_cost
def build_and_eval(lib, s, bufs, target, dev_id, rpc_info: RpcInfo = None, number=1): if rpc_info is not None: target_host = rpc_info.target_host fcompile = rpc_info.fcompile use_rpc = rpc_info.use_rpc else: target_host, fcompile, use_rpc = None, None, None # mod = tvm.lower(s, bufs, simple_mode=True) # print("Building...") func = tvm.build(s, bufs, target=target, target_host=target_host) tvm_arys = [] try: func.export_library(lib, fcompile) # print("Connecting...") remote = rpc_info.get_remote() # print("Allocating...") ctx = (remote if remote else tvm).device(target, dev_id) for buf in bufs: shape = to_tuple(buf.shape) tmp = np.random.uniform(0, 1, size=shape).astype(buf.dtype) tmp = tvm.nd.array(tmp, ctx) tvm_arys.append(tmp) if use_rpc: # print("Uploading...") remote.upload(lib) func = remote.load_module(os.path.split(lib)[-1]) else: func = tvm.runtime.module.load_module(lib) # print("Evaluating...") evaluator = func.time_evaluator(func.entry_name, ctx, number=number) time_cost = evaluator(*tvm_arys).mean * 1e3 finally: while len(tvm_arys) > 0: del tvm_arys[-1] return time_cost
def evaluate(s, bufs, target, dev_id, number=10): ctx = tvm.context(target, dev_id) tvm_arys = [] for arg in bufs: shape = to_tuple(arg.shape) tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype) tmp = tvm.nd.array(tmp, ctx) tvm_arys.append(tmp) func, evaluator = None, None try: func = tvm.build(s, bufs, target) # evaluator = func.time_evaluator(func.entry_name, ctx, number=number) # time_cost = evaluator(*tvm_arys).mean * 1e3 beg = time.time() for i in range(number): func(*tvm_arys) end = time.time() time_cost = (end - beg) * 1e3 / number return time_cost except Exception as e: print(e) return float("inf")
def _evaluate(s, bufs, target, dev_id, number=1, q=None): ctx = tvm.device(target, dev_id) tvm_arys = [] for arg in bufs: shape = to_tuple(arg.shape) tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype) tmp = tvm.nd.array(tmp, ctx) tvm_arys.append(tmp) func, evaluator = None, None try: func = tvm.build(s, bufs, target) evaluator = func.time_evaluator(func.entry_name, ctx, number=number) time_cost = evaluator(*tvm_arys).mean * 1e3 if q: q.put(time_cost) return time_cost except Exception as e: for item in tvm_arys: del item if func is not None: del func if evaluator is not None: del evaluator raise e
def main(): batch = 32 dtype = "float64" img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img") label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label") weight_1 = tvm.te.placeholder([28 * 28, 10], dtype=dtype, name="w1", requires_grad=True) t1 = flatten(img) t2 = gemm(t1, weight_1) t3 = softmax(t2) t4 = mse_loss(t3, label) d1, = tvm.te.mygradient(t4, [weight_1]) print("Build model...") s = tvm.te.create_schedule([t4.op, d1.op]) func = tvm.build(s, [img, label, weight_1, t4, d1], target="llvm") print("Check correctness...") free_vars = [weight_1] gradients = [d1] params = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-2, 2, shape).astype(dtype) params.append(var_np) img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype) label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype) ret_np = np.zeros(to_tuple(t4.shape)).astype(dtype) inits = [] for var in gradients: shape = to_tuple(var.shape) var_np = np.zeros(shape).astype(dtype) inits.append(var_np) ctx = tvm.context("llvm") img_tvm = tvm.nd.array(img_np, ctx) label_tvm = tvm.nd.array(label_np, ctx) ret_tvm = tvm.nd.array(ret_np, ctx) free_vars_tvm = [tvm.nd.array(x, ctx) for x in params] gradients_tvm = [tvm.nd.array(x, ctx) for x in inits] func(img_tvm, label_tvm, *free_vars_tvm, ret_tvm, *gradients_tvm) print("Start training...") # this will be updated during training model_weights = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-0.5, 0.5, shape).astype(dtype) model_weights.append(var_np) epoch = 100 lr = 0.0002 train_set = torchvision.datasets.MNIST(".", train=True, transform=transforms.Compose( [transforms.ToTensor()])) test_set = torchvision.datasets.MNIST(".", train=False, transform=transforms.Compose( [transforms.ToTensor()])) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch, shuffle=True) for ep in range(epoch): for i, data in enumerate(train_loader): img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx) label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype)) label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0) label_tvm = tvm.nd.array(label_torch.numpy(), ctx) weights_iter = [] for var in model_weights: var_tvm = tvm.nd.array(var) weights_iter.append(var_tvm) gradients_iter = [] for var in gradients: shape = to_tuple(var.shape) var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype)) gradients_iter.append(var_tvm) # print("Running...") func(img_tvm, label_tvm, *weights_iter, ret_tvm, *gradients_iter) if (i) % 100 == 0: print("epoch=", ep + 1, "iteration=", i + 1, "loss=", ret_tvm.asnumpy()) # print("logit=", logit_tvm.asnumpy()) # print("weights") # print(model_weights[0]) # print("gradients") # print(gradients_iter[0]) # print("Updating...") for k, gradient in enumerate(gradients_iter): model_weights[k] -= lr * gradient.asnumpy()
def main(): batch = 8 dtype = "float64" img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img") label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label") weight_1 = tvm.te.placeholder([6, 1, 5, 5], dtype=dtype, name="w1", requires_grad=True) weight_2 = tvm.te.placeholder([16, 6, 5, 5], dtype=dtype, name="w2", requires_grad=True) weight_3 = tvm.te.placeholder([120, 16, 5, 5], dtype=dtype, name="w3", requires_grad=True) weight_4 = tvm.te.placeholder([120, 84], dtype=dtype, name="w4", requires_grad=True) weight_5 = tvm.te.placeholder([84, 10], dtype=dtype, name="w5", requires_grad=True) act1 = tanh act2 = ReLU t1 = conv2d_nchw(img, weight_1, None, 1, 2, 1, 1) t2 = act2(t1) t3 = avgpool(t2) t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1) t5 = act2(t4) t6 = avgpool(t5) t7 = conv2d_nchw(t6, weight_3, None, 1, 0, 1, 1) t8 = act2(t7) # t9 = avgpool(t8) t10 = flatten_gemm(t8, weight_4) t11 = act2(gemm(t10, weight_5)) t12 = softmax_log(t11) # t13 = sum_all(t12) t13 = mse_loss(t12, label) d1, d2, d3, d4, d5 = tvm.te.mygradient(t13, [weight_1, weight_2, weight_3, weight_4, weight_5]) print("Build model...") s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op]) # print(tvm.lower(s, [img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5], simple_mode=True)) func = tvm.build(s, [img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5], target="llvm") print("Check correctness...") free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5] gradients = [d1, d2, d3, d4, d5] params = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-2, 2, shape).astype(dtype) params.append(var_np) img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype) label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype) logit_np = np.zeros(to_tuple(t12.shape)).astype(dtype) ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype) inits = [] for var in gradients: shape = to_tuple(var.shape) var_np = np.zeros(shape).astype(dtype) inits.append(var_np) ctx = tvm.context("llvm") img_tvm = tvm.nd.array(img_np, ctx) label_tvm = tvm.nd.array(label_np, ctx) logit_tvm = tvm.nd.array(logit_np, ctx) ret_tvm = tvm.nd.array(ret_np, ctx) free_vars_tvm = [tvm.nd.array(x, ctx) for x in params] gradients_tvm = [tvm.nd.array(x, ctx) for x in inits] func(img_tvm, label_tvm, *free_vars_tvm, logit_tvm, ret_tvm, *gradients_tvm) ret_torch, grad_torch = pytorch_result(img_np, label_np, params) # print(ret_tvm) # print(ret_torch) # tvm.testing.assert_allclose(ret_tvm.asnumpy(), ret_torch.detach().numpy(), atol=1e-3, rtol=1e-5) # for i in range(len(gradients_tvm)): # if i > 2: # tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().T.numpy(), atol=1e-3, rtol=1e-5) # else: # tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().numpy(), atol=1e-3, rtol=1e-5) # print("Compare to Pytorch success!") print("Start training...") # this will be updated during training model_weights = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(0, 0.25, shape).astype(dtype) model_weights.append(var_np) epoch = 100 lr = 0.1 train_set = torchvision.datasets.MNIST(".", train=True, transform=transforms.Compose([transforms.ToTensor()])) test_set = torchvision.datasets.MNIST(".", train=False, transform=transforms.Compose([transforms.ToTensor()])) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch, shuffle=True) for ep in range(epoch): for i, data in enumerate(train_loader): img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx) label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype)) label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0) label_tvm = tvm.nd.array(label_torch.numpy(), ctx) weights_iter = [] for var in model_weights: var_tvm = tvm.nd.array(var) weights_iter.append(var_tvm) gradients_iter = [] for var in gradients: shape = to_tuple(var.shape) var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype)) gradients_iter.append(var_tvm) # print("Running...") func(img_tvm, label_tvm, *weights_iter, logit_tvm, ret_tvm, *gradients_iter) if (i) % 100 == 0: print("epoch=", ep+1, "iteration=", i+1, "loss=", ret_tvm.asnumpy()) # print("logit=", logit_tvm.asnumpy()) # print("weights") # print(model_weights[0]) # print("gradients") # print(gradients_iter[0]) # print("Updating...") for k, gradient in enumerate(gradients_iter): model_weights[k] -= lr * gradient.asnumpy()
def try_yolo_conv(batch_size=2, number=100): # get the compute yolo_conv = SqueezeNetFire8Gemm() input_shape = yolo_conv.get_intput_shape() inputs = tvm.te.placeholder((batch_size, *input_shape), dtype="float32", name='inputs') weight = yolo_conv.get_weight() outputs = yolo_conv(inputs) bias = yolo_conv.get_bias() s = tvm.te.create_schedule(outputs.op) schedule_yolo_conv_x86(s, outputs, inputs, weight, bias) arg_bufs = [inputs, weight, bias, outputs] stmt = tvm.lower(s, arg_bufs, simple_mode=True) print(stmt) dev_id = 0 time_cost = _evaluate(s, arg_bufs, "llvm", dev_id, number=number) print("Yolo conv24 use", time_cost, "ms") """ For pytorch """ out_channel, in_channel, kernel_height, kernel_width = yolo_conv.weight_shape padding, stride, dilation, groups = (yolo_conv.padding, yolo_conv.stride, yolo_conv.dilation, yolo_conv.groups) conv2d_torch = torch.nn.Conv2d(in_channel, out_channel, (kernel_height, kernel_width), padding=padding, stride=stride, dilation=dilation, groups=groups) # warm up inputs = torch.rand(batch_size, *input_shape) res = conv2d_torch(inputs) times = time.time() for _ in range(number): res = conv2d_torch(inputs) times = time.time() - times print("Pytorch on cpu use: {}ms".format(times / number * 1e3)) # to test the correctness, currently the result is wrong becasue of the schedule # if you change line 148 to 'outer = s[write_cache].fuse(gemm_g, gemm_go)' # the result is correct ctx = tvm.device("llvm", 0) inputs_np = np.random.random(inputs.shape).astype("float32") * 100 weight_np = np.random.random(to_tuple(weight.shape)).astype( weight.dtype) * 100 outputs_np = np.zeros(shape=to_tuple(outputs.shape), dtype=np.float32) bias_np = np.random.random(size=to_tuple(bias.shape)).astype( bias.dtype) * 100 inputs_tvm = tvm.nd.array(inputs_np, ctx) weight_tvm = tvm.nd.array(weight_np, ctx) outputs_tvm = tvm.nd.array(outputs_np, ctx) bias_tvm = tvm.nd.array(bias_np, ctx) inputs_torch = torch.tensor(inputs_np) weight_torch = torch.tensor(weight_np) bias_torch = torch.tensor(bias_np) func_tvm = tvm.build(s, arg_bufs, "llvm") func_tvm(inputs_tvm, weight_tvm, bias_tvm, outputs_tvm) outputs_torch = torch.nn.functional.conv2d(inputs_torch, weight_torch, bias=bias_torch, padding=padding, stride=stride, dilation=dilation, groups=groups) the_same = test_allclose(outputs_tvm.asnumpy(), outputs_torch.numpy(), rtol=1e-5, print_diff=True) if the_same: print("The same!") else: print("Not the same!")
def _reset_gradients(self): grads_np = [np.zeros(to_tuple(var.shape)).astype(self.dtype) for var in self.gradients] self.grads_tvm = [tvm.nd.array(var, self.ctx) for var in grads_np]
def create_buffer(tensor): np_buffer = np.zeros(to_tuple(tensor.shape)).astype(self.dtype) tvm_buffer = tvm.nd.array(np_buffer, self.ctx) return tvm_buffer
def main(): ''' conv[outchannel, inchannel, kernel_h, kernel_w], simplified formula: output_h = input_h + 2*padding - kernel + 1 Non_ReLu version: [batch, 1, 28, 28] -> conv[6, 1, 3, 3], padding=1 -> [batch, 6, 28, 28] -> avgpool -> [batch, 6, 14, 14] -> conv[16, 6, 5, 5], padding=0 -> [batch, 16, 10, 10] -> avgpool -> [batch, 16, 5, 5] -> flatten -> [batch, 400] -> gemm[400, 120] -> [batch, 120] -> gemm[120, 84] -> [batch, 84] -> gemm[84, 10] -> [batch, 10] -> CE_loss ''' batch = 4 dtype = "float64" img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img") label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label") weight_1 = tvm.te.placeholder([6, 1, 3, 3], dtype=dtype, name="w1", requires_grad=True) weight_2 = tvm.te.placeholder([16, 6, 5, 5], dtype=dtype, name="w2", requires_grad=True) weight_3 = tvm.te.placeholder([400, 120], dtype=dtype, name="w3", requires_grad=True) weight_4 = tvm.te.placeholder([120, 84], dtype=dtype, name="w4", requires_grad=True) weight_5 = tvm.te.placeholder([84, 10], dtype=dtype, name="w5", requires_grad=True) if enable_relu: t1 = conv2d_nchw(img, weight_1, None, 1, 1, 1, 1) t2 = ReLU(t1) t3 = avgpool(t2) t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1) t5 = ReLU(t4) t6 = avgpool(t5) t7 = flatten(t6) t8 = gemm(t7, weight_3) t9 = gemm(t8, weight_4) t10 = ReLU(t9) t11 = gemm(t10, weight_5) t12 = ReLU(t11) t13 = cross_entropy(t12, label) else: t1 = conv2d_nchw(img, weight_1, None, 1, 1, 1, 1) t3 = avgpool(t1) t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1) t6 = avgpool(t4) t7 = flatten(t6) t8 = gemm(t7, weight_3) t9 = gemm(t8, weight_4) t11 = gemm(t9, weight_5) t12 = t11 t13 = cross_entropy(t12, label) d1, d2, d3, d4, d5 = tvm.te.mygradient( t13, [weight_1, weight_2, weight_3, weight_4, weight_5]) print("Build model...") s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op]) print( tvm.lower(s, [ img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5 ], simple_mode=True)) func = tvm.build(s, [ img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5 ], target="llvm") print("Check correctness...") free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5] gradients = [d1, d2, d3, d4, d5] params = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-2, 2, shape).astype(dtype) params.append(var_np) img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype) label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype) logit_np = np.zeros(to_tuple(t12.shape)).astype(dtype) ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype) inits = [] for var in gradients: shape = to_tuple(var.shape) var_np = np.zeros(shape).astype(dtype) inits.append(var_np) ctx = tvm.device("llvm") img_tvm = tvm.nd.array(img_np, ctx) label_tvm = tvm.nd.array(label_np, ctx) logit_tvm = tvm.nd.array(logit_np, ctx) ret_tvm = tvm.nd.array(ret_np, ctx) free_vars_tvm = [tvm.nd.array(x, ctx) for x in params] gradients_tvm = [tvm.nd.array(x, ctx) for x in inits] func(img_tvm, label_tvm, *free_vars_tvm, logit_tvm, ret_tvm, *gradients_tvm) print("Start training...") # this will be updated during training model_weights = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-1, 1, shape).astype(dtype) model_weights.append(var_np) epoch = 3 lr = 1e-6 train_set = torchvision.datasets.MNIST(".", train=True, transform=transforms.Compose( [transforms.ToTensor()]), download=True) test_set = torchvision.datasets.MNIST(".", train=False, transform=transforms.Compose( [transforms.ToTensor()]), download=True) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch, shuffle=True) for ep in range(epoch): train_num_covered = 0 running_acc = 0.0 running_loss = 0.0 for i, data in enumerate(train_loader): img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx) label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype)) label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0) #print("label_torch", label_torch) label_tvm = tvm.nd.array(label_torch.numpy(), ctx) weights_iter = [] for var in model_weights: var_tvm = tvm.nd.array(var) weights_iter.append(var_tvm) gradients_iter = [] for var in gradients: shape = to_tuple(var.shape) var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype)) gradients_iter.append(var_tvm) # print("Running...") func(img_tvm, label_tvm, *weights_iter, logit_tvm, ret_tvm, *gradients_iter) # accuracy & loss record train_num_covered += batch _, predict = torch.max(torch.from_numpy(logit_tvm.asnumpy()), 1) num_correct = (predict == data[1]).sum() running_acc += num_correct.item() running_loss += ret_tvm.asnumpy().item(0) if (i) % 1000 == 0: print("epoch=", ep + 1, "iteration=", i + 1, "loss=", running_loss / train_num_covered, "acc=", running_acc / train_num_covered) # print("logit=", logit_tvm.asnumpy()) #shape:[batch, 10] # print("model_weights[0]", model_weights[0]) # #print("model_weights[4]", model_weights[4]) # print("gradient_iter[0]", gradients_iter[0]) # #print("gradient_iter[4]", gradients_iter[4]) # print("Updating...") for k, gradient in enumerate(gradients_iter): assert (model_weights[k].shape == gradient.asnumpy().shape) model_weights[k] -= lr * gradient.asnumpy() assert (train_num_covered == len(train_set)) running_acc /= len(train_set) print("epoch=", ep + 1, "accuracy=", running_acc)
def main(): batch = 2 dtype = "float64" img = tvm.te.placeholder([batch, 1, 32, 32], dtype=dtype, name="img") label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label") weight_1 = tvm.te.placeholder([6, 1, 5, 5], dtype=dtype, name="w1") weight_2 = tvm.te.placeholder([16, 6, 5, 5], dtype=dtype, name="w2") weight_3 = tvm.te.placeholder([120, 16, 5, 5], dtype=dtype, name="w3") weight_4 = tvm.te.placeholder([120, 84], dtype=dtype, name="w4") weight_5 = tvm.te.placeholder([84, 10], dtype=dtype, name="w5") act = tanh # ReLU t1 = conv2d_nchw(img, weight_1, None, 1, 0, 1, 1) t2 = act(t1) t3 = avgpool(t2) t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1) t5 = act(t4) t6 = avgpool(t5) t7 = conv2d_nchw(t6, weight_3, None, 1, 0, 1, 1) t8 = act(t7) # t9 = avgpool(t8) t10 = flatten_gemm(t8, weight_4) t11 = (gemm(t10, weight_5)) t12 = softmax(t11) # t13 = sum_all(t12) t13 = mse_loss(t12, label) d1, d2, d3, d4, d5 = tvm.te.mygradient( t13, [weight_1, weight_2, weight_3, weight_4, weight_5]) s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op]) func = tvm.build(s, [ img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t13, d1, d2, d3, d4, d5 ], target="llvm") free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5] gradients = [d1, d2, d3, d4, d5] params = [] for var in free_vars: shape = to_tuple(var.shape) var_np = np.random.uniform(-100, 100, shape).astype(dtype) params.append(var_np) img_np = np.random.uniform(-10, 10, to_tuple(img.shape)).astype(dtype) label_np = np.random.uniform(-10, 10, to_tuple(label.shape)).astype(dtype) ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype) inits = [] for var in gradients: shape = to_tuple(var.shape) var_np = np.zeros(shape).astype(dtype) inits.append(var_np) ctx = tvm.device("llvm") img_tvm = tvm.nd.array(img_np, ctx) label_tvm = tvm.nd.array(label_np, ctx) ret_tvm = tvm.nd.array(ret_np, ctx) free_vars_tvm = [tvm.nd.array(x, ctx) for x in params] gradients_tvm = [tvm.nd.array(x, ctx) for x in inits] func(img_tvm, label_tvm, *free_vars_tvm, ret_tvm, *gradients_tvm) ret_torch, grad_torch = pytorch_result(img_np, label_np, params) print(ret_tvm) print(ret_torch) tvm.testing.assert_allclose(ret_tvm.asnumpy(), ret_torch.detach().numpy(), atol=1e-3, rtol=1e-5) for i in range(len(gradients_tvm)): print("grad_torch", i, grad_torch[i].detach().T.numpy()) if i > 2: tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().T.numpy(), atol=1e-3, rtol=1e-5) else: tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().numpy(), atol=1e-3, rtol=1e-5) print("Compare to Pytorch success!")