Exemplo n.º 1
0
def test_array_mul(extent=1024,
                   target="llvm",
                   dev_id=0,
                   number=10,
                   verbose=False):
    time_cost_lst = []
    for N in range(1, extent + 1):
        ctx = tvm.device(target, dev_id)
        ary_ops, ary_bufs = array_mul(N)
        ary_inputs = [
            tvm.nd.array(
                np.random.uniform(size=to_tuple(buf.shape)).astype(buf.dtype),
                ctx) for buf in ary_bufs[:-1]
        ]
        ary_inputs += [
            tvm.nd.array(np.zeros(shape=to_tuple(buf.shape), dtype=buf.dtype),
                         ctx) for buf in ary_bufs[-1:]
        ]

        s = tvm.te.create_schedule(ary_ops)
        func = tvm.build(s, ary_bufs, target)
        evaluator = func.time_evaluator(func.entry_name, ctx, number=number)

        cost = evaluator(*ary_inputs).mean * 1e3
        # print("N=", N, "cost=", "%f(ms)"%cost, "(target=%s, dev_id=%d, number=%d)"%(target, dev_id, number))
        time_cost_lst.append(cost)

    res_lst = [x / time_cost_lst[0] for x in time_cost_lst]
    print("array_mul |(target=%s, dev_id=%d, number=%d)" %
          (target, dev_id, number))
    if verbose:
        for i, res in enumerate(res_lst):
            print("time_cost: ext=%d / ext=1 = %f" % (i + 1, res))
    else:
        print("time_cost: ext=%d / ext=1 = %f" % (extent, res_lst[-1]))
Exemplo n.º 2
0
def numpy_init(weight_list, *args):
    '''
  the first argument is randomly initialized. All others are zero initialized.
  '''
    weight_np = [
        np.random.uniform(-1, 1, to_tuple(var.shape)).astype(dtype)
        for var in weight_list
    ]
    init = [weight_np]
    if len(args) > 0:
        for item in args:
            init.append(
                [np.zeros(to_tuple(var.shape), dtype=dtype) for var in item])
    return init
Exemplo n.º 3
0
def evaluate(name, s, bufs, target, dev_id, number, rpc_info):
    if rpc_info is not None:
        host = rpc_info.host
        port = rpc_info.port
    else:
        # local
        host = "0.0.0.0"
        port = 9090  # default port
    if host == "0.0.0.0":
        if LOCAL_RPC:
            use_rpc = True
        else:
            use_rpc = False
    else:
        use_rpc = True
    if use_rpc:
        remote = rpc.connect(host, port)
        ctx = remote.context(target, dev_id)
    else:
        ctx = tvm.context(target, dev_id)
    tvm_arys = []
    for buf in bufs:
        shape = to_tuple(buf.shape)
        tmp = np.random.uniform(-10, 10, size=shape).astype(buf.dtype)
        tmp = tvm.nd.array(tmp, ctx)
        tvm_arys.append(tmp)
    try:
        func_file = "{}.tar".format(name)
        if rpc_info is not None and rpc_info.target_host is not None:
            func = tvm.build(s,
                             bufs,
                             target=target,
                             target_host=rpc_info.target_host)
        else:
            func = tvm.build(s, bufs, target=target)
        if use_rpc:
            func.export_library(os.path.join(LIB_DIR, func_file))
            remote.upload(os.path.join(LIB_DIR, func_file))
            func = remote.load_module(func_file)
        evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
        time_cost = evaluator(*tvm_arys).mean * 1e3
    except Exception as e:
        print(e)
        time_cost = float("inf")
    finally:
        while len(tvm_arys) > 0:
            del tvm_arys[-1]
        if os.path.exists(os.path.join(LIB_DIR, func_file)):
            try:
                os.remove(os.path.join(LIB_DIR, func_file))
            except Exception as e:
                print(e)
        elif os.path.exists(os.path.join(LIB_DIR, func_file + ".so")):
            try:
                os.remove(os.path.join(LIB_DIR, func_file))
            except Exception as e:
                print(e)
    return time_cost
Exemplo n.º 4
0
def evaluate(name,
             s,
             bufs,
             target,
             dev_id,
             number=10,
             rpc_info=None,
             result_generator=None):
    if rpc_info is not None:
        use_rpc = rpc_info.use_rpc
        target_host = rpc_info.target_host
        fcompile = rpc_info.fcompile
    else:
        use_rpc, target_host, fcompile = None, None, None

    remote = rpc_info.get_remote()
    dev = (remote if remote else tvm).device(target, dev_id)

    np_arys = [
        np.random.uniform(-10, 10, size=to_tuple(buf.shape)).astype(buf.dtype)
        for buf in bufs
    ]
    tvm_arys = [tvm.nd.array(arr, dev) for arr in np_arys]
    func_file = f"{name}.so"
    time_cost = float("inf")
    try:
        func = tvm.build(s, bufs, target=target, target_host=target_host)
        if use_rpc:
            func.export_library(os.path.join(LIB_DIR, func_file), fcompile)
            remote.upload(os.path.join(LIB_DIR, func_file))
            func = remote.load_module(func_file)
        func(*tvm_arys)
        if result_generator is not None:
            print("Test whether computed...")
            result = tvm_arys[-1].asnumpy()
            test_allclose(result, np_arys[-1], rtol=1e-3, print_diff=True)
            print("Test correctness...")
            expected = result_generator(np_arys)
            test_allclose(result, expected, rtol=1e-3, print_diff=True)
        evaluator = func.time_evaluator(func.entry_name, dev, number=number)
        time_cost = evaluator(*tvm_arys).mean * 1e3
    except Exception as e:
        print(e)
    finally:
        while len(tvm_arys) > 0:
            del tvm_arys[-1]
        if os.path.exists(os.path.join(LIB_DIR, func_file)):
            try:
                os.remove(os.path.join(LIB_DIR, func_file))
            except Exception as e:
                print(e)

    return time_cost
 def init_weight(var):
     w_pth = torch.empty(*to_tuple(var.shape), dtype=torch.float64)
     if len(w_pth.shape) == 4:  # Conv2d
         # NOTE: https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_
         torch.nn.init.kaiming_normal_(w_pth, mode='fan_out', nonlinearity='relu')
     elif len(w_pth.shape) == 2:  # Linear
         torch.nn.init.normal_(w_pth, mean=0, std=0.01)
     elif len(w_pth.shape) == 1:  # bias
         torch.nn.init.constant_(w_pth, 0)
     else:
         raise NotImplementedError(f'Unrecognized weight shape: {var.shape}')
     return w_pth.numpy()
Exemplo n.º 6
0
def __evaluate(s, bufs, target, dev_id, number=1, q=None):
    beg = time.time()
    for i in range(number):
        ctx = tvm.context(target, dev_id)
        tvm_arys = []
        for arg in bufs:
            shape = to_tuple(arg.shape)
            tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
            tmp = tvm.nd.array(tmp, ctx)
            tvm_arys.append(tmp)
        try:
            func = tvm.build(s, bufs, target)
            func(*tvm_arys)
        except Exception as e:
            print("Oops")
            print(e)
    end = time.time()
    time_cost = (end - beg) * 1e3 / number
    if q:
        q.put(time_cost)
    return time_cost
Exemplo n.º 7
0
def build_and_eval(lib, s, bufs, target, dev_id, rpc_info: RpcInfo = None, number=1):
    if rpc_info is not None:
        target_host = rpc_info.target_host
        fcompile = rpc_info.fcompile
        use_rpc = rpc_info.use_rpc
    else:
        target_host, fcompile, use_rpc = None, None, None

    # mod = tvm.lower(s, bufs, simple_mode=True)
    # print("Building...")
    func = tvm.build(s, bufs, target=target, target_host=target_host)

    tvm_arys = []
    try:
        func.export_library(lib, fcompile)
        # print("Connecting...")
        remote = rpc_info.get_remote()
        # print("Allocating...")
        ctx = (remote if remote else tvm).device(target, dev_id)
        for buf in bufs:
            shape = to_tuple(buf.shape)
            tmp = np.random.uniform(0, 1, size=shape).astype(buf.dtype)
            tmp = tvm.nd.array(tmp, ctx)
            tvm_arys.append(tmp)

        if use_rpc:
            # print("Uploading...")
            remote.upload(lib)
            func = remote.load_module(os.path.split(lib)[-1])
        else:
            func = tvm.runtime.module.load_module(lib)
        # print("Evaluating...")
        evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
        time_cost = evaluator(*tvm_arys).mean * 1e3
    finally:
        while len(tvm_arys) > 0:
            del tvm_arys[-1]

    return time_cost
Exemplo n.º 8
0
def evaluate(s, bufs, target, dev_id, number=10):
    ctx = tvm.context(target, dev_id)
    tvm_arys = []
    for arg in bufs:
        shape = to_tuple(arg.shape)
        tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
        tmp = tvm.nd.array(tmp, ctx)
        tvm_arys.append(tmp)
    func, evaluator = None, None
    try:
        func = tvm.build(s, bufs, target)
        # evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
        # time_cost = evaluator(*tvm_arys).mean * 1e3
        beg = time.time()
        for i in range(number):
            func(*tvm_arys)
        end = time.time()
        time_cost = (end - beg) * 1e3 / number
        return time_cost
    except Exception as e:
        print(e)
        return float("inf")
Exemplo n.º 9
0
def _evaluate(s, bufs, target, dev_id, number=1, q=None):
    ctx = tvm.device(target, dev_id)
    tvm_arys = []
    for arg in bufs:
        shape = to_tuple(arg.shape)
        tmp = np.random.uniform(-10, 10, size=shape).astype(arg.dtype)
        tmp = tvm.nd.array(tmp, ctx)
        tvm_arys.append(tmp)
    func, evaluator = None, None
    try:
        func = tvm.build(s, bufs, target)
        evaluator = func.time_evaluator(func.entry_name, ctx, number=number)
        time_cost = evaluator(*tvm_arys).mean * 1e3
        if q:
            q.put(time_cost)
        return time_cost
    except Exception as e:
        for item in tvm_arys:
            del item
        if func is not None:
            del func
        if evaluator is not None:
            del evaluator
        raise e
Exemplo n.º 10
0
def main():
    batch = 32
    dtype = "float64"
    img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img")
    label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label")
    weight_1 = tvm.te.placeholder([28 * 28, 10],
                                  dtype=dtype,
                                  name="w1",
                                  requires_grad=True)

    t1 = flatten(img)

    t2 = gemm(t1, weight_1)

    t3 = softmax(t2)

    t4 = mse_loss(t3, label)

    d1, = tvm.te.mygradient(t4, [weight_1])

    print("Build model...")

    s = tvm.te.create_schedule([t4.op, d1.op])

    func = tvm.build(s, [img, label, weight_1, t4, d1], target="llvm")

    print("Check correctness...")

    free_vars = [weight_1]
    gradients = [d1]
    params = []
    for var in free_vars:
        shape = to_tuple(var.shape)
        var_np = np.random.uniform(-2, 2, shape).astype(dtype)
        params.append(var_np)
    img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype)
    label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype)
    ret_np = np.zeros(to_tuple(t4.shape)).astype(dtype)
    inits = []
    for var in gradients:
        shape = to_tuple(var.shape)
        var_np = np.zeros(shape).astype(dtype)
        inits.append(var_np)

    ctx = tvm.context("llvm")

    img_tvm = tvm.nd.array(img_np, ctx)
    label_tvm = tvm.nd.array(label_np, ctx)
    ret_tvm = tvm.nd.array(ret_np, ctx)
    free_vars_tvm = [tvm.nd.array(x, ctx) for x in params]
    gradients_tvm = [tvm.nd.array(x, ctx) for x in inits]

    func(img_tvm, label_tvm, *free_vars_tvm, ret_tvm, *gradients_tvm)

    print("Start training...")

    # this will be updated during training
    model_weights = []
    for var in free_vars:
        shape = to_tuple(var.shape)
        var_np = np.random.uniform(-0.5, 0.5, shape).astype(dtype)
        model_weights.append(var_np)

    epoch = 100
    lr = 0.0002

    train_set = torchvision.datasets.MNIST(".",
                                           train=True,
                                           transform=transforms.Compose(
                                               [transforms.ToTensor()]))
    test_set = torchvision.datasets.MNIST(".",
                                          train=False,
                                          transform=transforms.Compose(
                                              [transforms.ToTensor()]))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch,
                                               shuffle=True)

    for ep in range(epoch):
        for i, data in enumerate(train_loader):
            img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx)
            label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype))
            label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0)
            label_tvm = tvm.nd.array(label_torch.numpy(), ctx)
            weights_iter = []
            for var in model_weights:
                var_tvm = tvm.nd.array(var)
                weights_iter.append(var_tvm)
            gradients_iter = []
            for var in gradients:
                shape = to_tuple(var.shape)
                var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype))
                gradients_iter.append(var_tvm)
            # print("Running...")
            func(img_tvm, label_tvm, *weights_iter, ret_tvm, *gradients_iter)

            if (i) % 100 == 0:
                print("epoch=", ep + 1, "iteration=", i + 1, "loss=",
                      ret_tvm.asnumpy())
                # print("logit=", logit_tvm.asnumpy())
                # print("weights")
                # print(model_weights[0])
                # print("gradients")
                # print(gradients_iter[0])

            # print("Updating...")
            for k, gradient in enumerate(gradients_iter):
                model_weights[k] -= lr * gradient.asnumpy()
Exemplo n.º 11
0
def main():
  batch = 8
  dtype = "float64"
  img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img")
  label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label")
  weight_1 = tvm.te.placeholder([6, 1, 5, 5], dtype=dtype, name="w1", requires_grad=True)
  weight_2 = tvm.te.placeholder([16, 6, 5, 5], dtype=dtype, name="w2", requires_grad=True)
  weight_3 = tvm.te.placeholder([120, 16, 5, 5], dtype=dtype, name="w3", requires_grad=True)
  weight_4 = tvm.te.placeholder([120, 84], dtype=dtype, name="w4", requires_grad=True)
  weight_5 = tvm.te.placeholder([84, 10], dtype=dtype, name="w5", requires_grad=True)

  act1 = tanh
  act2 = ReLU

  t1 = conv2d_nchw(img, weight_1, None, 1, 2, 1, 1)
  t2 = act2(t1)
  t3 = avgpool(t2)

  t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1)
  t5 = act2(t4)
  t6 = avgpool(t5)

  t7 = conv2d_nchw(t6, weight_3, None, 1, 0, 1, 1)
  t8 = act2(t7)
  # t9 = avgpool(t8)

  t10 = flatten_gemm(t8, weight_4)

  t11 = act2(gemm(t10, weight_5))

  t12 = softmax_log(t11)

  # t13 = sum_all(t12)
  t13 = mse_loss(t12, label)

  d1, d2, d3, d4, d5 = tvm.te.mygradient(t13, [weight_1, weight_2, weight_3, weight_4, weight_5])

  print("Build model...")

  s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op])

  # print(tvm.lower(s, [img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5], simple_mode=True))

  func = tvm.build(s, [img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13, d1, d2, d3, d4, d5], target="llvm")

  print("Check correctness...")

  free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5]
  gradients = [d1, d2, d3, d4, d5]
  params = []
  for var in free_vars:
    shape = to_tuple(var.shape)
    var_np = np.random.uniform(-2, 2, shape).astype(dtype)
    params.append(var_np)
  img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype)
  label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype)
  logit_np = np.zeros(to_tuple(t12.shape)).astype(dtype)
  ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype)
  inits = []
  for var in gradients:
    shape = to_tuple(var.shape)
    var_np = np.zeros(shape).astype(dtype)
    inits.append(var_np)

  ctx = tvm.context("llvm")

  img_tvm = tvm.nd.array(img_np, ctx)
  label_tvm = tvm.nd.array(label_np, ctx)
  logit_tvm = tvm.nd.array(logit_np, ctx)
  ret_tvm = tvm.nd.array(ret_np, ctx)
  free_vars_tvm = [tvm.nd.array(x, ctx) for x in params]
  gradients_tvm = [tvm.nd.array(x, ctx) for x in inits]

  func(img_tvm, label_tvm, *free_vars_tvm, logit_tvm, ret_tvm, *gradients_tvm)

  ret_torch, grad_torch = pytorch_result(img_np, label_np, params)

  # print(ret_tvm)
  # print(ret_torch)

  # tvm.testing.assert_allclose(ret_tvm.asnumpy(), ret_torch.detach().numpy(), atol=1e-3, rtol=1e-5)
  # for i in range(len(gradients_tvm)):
  #   if i > 2:
  #     tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().T.numpy(), atol=1e-3, rtol=1e-5)
  #   else:
  #     tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(), grad_torch[i].detach().numpy(), atol=1e-3, rtol=1e-5)

  # print("Compare to Pytorch success!")

  print("Start training...")

  # this will be updated during training
  model_weights = []
  for var in free_vars:
    shape = to_tuple(var.shape)
    var_np = np.random.uniform(0, 0.25, shape).astype(dtype)
    model_weights.append(var_np)

  epoch = 100
  lr = 0.1

  train_set = torchvision.datasets.MNIST(".", train=True, transform=transforms.Compose([transforms.ToTensor()]))
  test_set = torchvision.datasets.MNIST(".", train=False, transform=transforms.Compose([transforms.ToTensor()]))
  train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch, shuffle=True)

  for ep in range(epoch):
    for i, data in enumerate(train_loader):
      img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx)
      label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype))
      label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0)
      label_tvm = tvm.nd.array(label_torch.numpy(), ctx)
      weights_iter = []
      for var in model_weights:
        var_tvm = tvm.nd.array(var)
        weights_iter.append(var_tvm)
      gradients_iter = []
      for var in gradients:
        shape = to_tuple(var.shape)
        var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype))
        gradients_iter.append(var_tvm)
      # print("Running...")
      func(img_tvm, label_tvm, *weights_iter, logit_tvm, ret_tvm, *gradients_iter)

      if (i) % 100 == 0:
        print("epoch=", ep+1, "iteration=", i+1, "loss=", ret_tvm.asnumpy())
        # print("logit=", logit_tvm.asnumpy())
        # print("weights")
        # print(model_weights[0])
        # print("gradients")
        # print(gradients_iter[0])

      # print("Updating...")
      for k, gradient in enumerate(gradients_iter):
        model_weights[k] -= lr * gradient.asnumpy()
Exemplo n.º 12
0
def try_yolo_conv(batch_size=2, number=100):
    # get the compute
    yolo_conv = SqueezeNetFire8Gemm()
    input_shape = yolo_conv.get_intput_shape()
    inputs = tvm.te.placeholder((batch_size, *input_shape),
                                dtype="float32",
                                name='inputs')
    weight = yolo_conv.get_weight()
    outputs = yolo_conv(inputs)
    bias = yolo_conv.get_bias()

    s = tvm.te.create_schedule(outputs.op)

    schedule_yolo_conv_x86(s, outputs, inputs, weight, bias)

    arg_bufs = [inputs, weight, bias, outputs]
    stmt = tvm.lower(s, arg_bufs, simple_mode=True)
    print(stmt)
    dev_id = 0
    time_cost = _evaluate(s, arg_bufs, "llvm", dev_id, number=number)
    print("Yolo conv24 use", time_cost, "ms")
    """ For pytorch """
    out_channel, in_channel, kernel_height, kernel_width = yolo_conv.weight_shape
    padding, stride, dilation, groups = (yolo_conv.padding, yolo_conv.stride,
                                         yolo_conv.dilation, yolo_conv.groups)
    conv2d_torch = torch.nn.Conv2d(in_channel,
                                   out_channel, (kernel_height, kernel_width),
                                   padding=padding,
                                   stride=stride,
                                   dilation=dilation,
                                   groups=groups)

    # warm up
    inputs = torch.rand(batch_size, *input_shape)
    res = conv2d_torch(inputs)

    times = time.time()
    for _ in range(number):
        res = conv2d_torch(inputs)
    times = time.time() - times
    print("Pytorch on cpu use: {}ms".format(times / number * 1e3))

    # to test the correctness, currently the result is wrong becasue of the schedule
    # if you change line 148 to 'outer = s[write_cache].fuse(gemm_g, gemm_go)'
    # the result is correct
    ctx = tvm.device("llvm", 0)
    inputs_np = np.random.random(inputs.shape).astype("float32") * 100
    weight_np = np.random.random(to_tuple(weight.shape)).astype(
        weight.dtype) * 100
    outputs_np = np.zeros(shape=to_tuple(outputs.shape), dtype=np.float32)
    bias_np = np.random.random(size=to_tuple(bias.shape)).astype(
        bias.dtype) * 100

    inputs_tvm = tvm.nd.array(inputs_np, ctx)
    weight_tvm = tvm.nd.array(weight_np, ctx)
    outputs_tvm = tvm.nd.array(outputs_np, ctx)
    bias_tvm = tvm.nd.array(bias_np, ctx)

    inputs_torch = torch.tensor(inputs_np)
    weight_torch = torch.tensor(weight_np)
    bias_torch = torch.tensor(bias_np)

    func_tvm = tvm.build(s, arg_bufs, "llvm")
    func_tvm(inputs_tvm, weight_tvm, bias_tvm, outputs_tvm)
    outputs_torch = torch.nn.functional.conv2d(inputs_torch,
                                               weight_torch,
                                               bias=bias_torch,
                                               padding=padding,
                                               stride=stride,
                                               dilation=dilation,
                                               groups=groups)
    the_same = test_allclose(outputs_tvm.asnumpy(),
                             outputs_torch.numpy(),
                             rtol=1e-5,
                             print_diff=True)
    if the_same:
        print("The same!")
    else:
        print("Not the same!")
 def _reset_gradients(self):
     grads_np = [np.zeros(to_tuple(var.shape)).astype(self.dtype) for var in self.gradients]
     self.grads_tvm = [tvm.nd.array(var, self.ctx) for var in grads_np]
 def create_buffer(tensor):
     np_buffer = np.zeros(to_tuple(tensor.shape)).astype(self.dtype)
     tvm_buffer = tvm.nd.array(np_buffer, self.ctx)
     return tvm_buffer
Exemplo n.º 15
0
def main():
    '''
  conv[outchannel, inchannel, kernel_h, kernel_w], 
  simplified formula: output_h = input_h + 2*padding - kernel + 1

  Non_ReLu version:
  [batch, 1, 28, 28] -> conv[6, 1, 3, 3], padding=1 -> [batch, 6, 28, 28] -> avgpool -> [batch, 6, 14, 14]
  -> conv[16, 6, 5, 5], padding=0 -> [batch, 16, 10, 10] -> avgpool -> [batch, 16, 5, 5] -> flatten -> [batch, 400]
  -> gemm[400, 120] -> [batch, 120] -> gemm[120, 84] -> [batch, 84] -> gemm[84, 10] -> [batch, 10] -> CE_loss
  '''
    batch = 4
    dtype = "float64"
    img = tvm.te.placeholder([batch, 1, 28, 28], dtype=dtype, name="img")
    label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label")
    weight_1 = tvm.te.placeholder([6, 1, 3, 3],
                                  dtype=dtype,
                                  name="w1",
                                  requires_grad=True)
    weight_2 = tvm.te.placeholder([16, 6, 5, 5],
                                  dtype=dtype,
                                  name="w2",
                                  requires_grad=True)
    weight_3 = tvm.te.placeholder([400, 120],
                                  dtype=dtype,
                                  name="w3",
                                  requires_grad=True)
    weight_4 = tvm.te.placeholder([120, 84],
                                  dtype=dtype,
                                  name="w4",
                                  requires_grad=True)
    weight_5 = tvm.te.placeholder([84, 10],
                                  dtype=dtype,
                                  name="w5",
                                  requires_grad=True)

    if enable_relu:
        t1 = conv2d_nchw(img, weight_1, None, 1, 1, 1, 1)
        t2 = ReLU(t1)
        t3 = avgpool(t2)

        t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1)
        t5 = ReLU(t4)
        t6 = avgpool(t5)

        t7 = flatten(t6)
        t8 = gemm(t7, weight_3)
        t9 = gemm(t8, weight_4)
        t10 = ReLU(t9)

        t11 = gemm(t10, weight_5)
        t12 = ReLU(t11)
        t13 = cross_entropy(t12, label)
    else:
        t1 = conv2d_nchw(img, weight_1, None, 1, 1, 1, 1)
        t3 = avgpool(t1)

        t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1)
        t6 = avgpool(t4)

        t7 = flatten(t6)
        t8 = gemm(t7, weight_3)
        t9 = gemm(t8, weight_4)

        t11 = gemm(t9, weight_5)
        t12 = t11
        t13 = cross_entropy(t12, label)

    d1, d2, d3, d4, d5 = tvm.te.mygradient(
        t13, [weight_1, weight_2, weight_3, weight_4, weight_5])

    print("Build model...")

    s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op])

    print(
        tvm.lower(s, [
            img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12,
            t13, d1, d2, d3, d4, d5
        ],
                  simple_mode=True))

    func = tvm.build(s, [
        img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t12, t13,
        d1, d2, d3, d4, d5
    ],
                     target="llvm")

    print("Check correctness...")

    free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5]
    gradients = [d1, d2, d3, d4, d5]
    params = []
    for var in free_vars:
        shape = to_tuple(var.shape)
        var_np = np.random.uniform(-2, 2, shape).astype(dtype)
        params.append(var_np)
    img_np = np.random.uniform(0.9999, 1, to_tuple(img.shape)).astype(dtype)
    label_np = np.random.uniform(0, 1, to_tuple(label.shape)).astype(dtype)
    logit_np = np.zeros(to_tuple(t12.shape)).astype(dtype)
    ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype)
    inits = []
    for var in gradients:
        shape = to_tuple(var.shape)
        var_np = np.zeros(shape).astype(dtype)
        inits.append(var_np)

    ctx = tvm.device("llvm")

    img_tvm = tvm.nd.array(img_np, ctx)
    label_tvm = tvm.nd.array(label_np, ctx)
    logit_tvm = tvm.nd.array(logit_np, ctx)
    ret_tvm = tvm.nd.array(ret_np, ctx)
    free_vars_tvm = [tvm.nd.array(x, ctx) for x in params]
    gradients_tvm = [tvm.nd.array(x, ctx) for x in inits]

    func(img_tvm, label_tvm, *free_vars_tvm, logit_tvm, ret_tvm,
         *gradients_tvm)

    print("Start training...")
    # this will be updated during training
    model_weights = []
    for var in free_vars:
        shape = to_tuple(var.shape)
        var_np = np.random.uniform(-1, 1, shape).astype(dtype)
        model_weights.append(var_np)

    epoch = 3
    lr = 1e-6

    train_set = torchvision.datasets.MNIST(".",
                                           train=True,
                                           transform=transforms.Compose(
                                               [transforms.ToTensor()]),
                                           download=True)
    test_set = torchvision.datasets.MNIST(".",
                                          train=False,
                                          transform=transforms.Compose(
                                              [transforms.ToTensor()]),
                                          download=True)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch,
                                               shuffle=True)

    for ep in range(epoch):
        train_num_covered = 0
        running_acc = 0.0
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            img_tvm = tvm.nd.array(data[0].numpy().astype(dtype), ctx)
            label_torch = torch.tensor(np.zeros([batch, 10]).astype(dtype))
            label_torch.scatter_(1, data[1].unsqueeze(0).T, 1.0)
            #print("label_torch", label_torch)
            label_tvm = tvm.nd.array(label_torch.numpy(), ctx)
            weights_iter = []
            for var in model_weights:
                var_tvm = tvm.nd.array(var)
                weights_iter.append(var_tvm)
            gradients_iter = []
            for var in gradients:
                shape = to_tuple(var.shape)
                var_tvm = tvm.nd.array(np.zeros(shape).astype(dtype))
                gradients_iter.append(var_tvm)
            # print("Running...")
            func(img_tvm, label_tvm, *weights_iter, logit_tvm, ret_tvm,
                 *gradients_iter)

            # accuracy & loss record
            train_num_covered += batch
            _, predict = torch.max(torch.from_numpy(logit_tvm.asnumpy()), 1)
            num_correct = (predict == data[1]).sum()
            running_acc += num_correct.item()
            running_loss += ret_tvm.asnumpy().item(0)

            if (i) % 1000 == 0:
                print("epoch=", ep + 1, "iteration=", i + 1, "loss=",
                      running_loss / train_num_covered, "acc=",
                      running_acc / train_num_covered)
                # print("logit=", logit_tvm.asnumpy()) #shape:[batch, 10]
                # print("model_weights[0]", model_weights[0])
                # #print("model_weights[4]", model_weights[4])
                # print("gradient_iter[0]", gradients_iter[0])
                # #print("gradient_iter[4]", gradients_iter[4])

            # print("Updating...")
            for k, gradient in enumerate(gradients_iter):
                assert (model_weights[k].shape == gradient.asnumpy().shape)
                model_weights[k] -= lr * gradient.asnumpy()

        assert (train_num_covered == len(train_set))
        running_acc /= len(train_set)
        print("epoch=", ep + 1, "accuracy=", running_acc)
Exemplo n.º 16
0
def main():
    batch = 2
    dtype = "float64"
    img = tvm.te.placeholder([batch, 1, 32, 32], dtype=dtype, name="img")
    label = tvm.te.placeholder([batch, 10], dtype=dtype, name="label")
    weight_1 = tvm.te.placeholder([6, 1, 5, 5], dtype=dtype, name="w1")
    weight_2 = tvm.te.placeholder([16, 6, 5, 5], dtype=dtype, name="w2")
    weight_3 = tvm.te.placeholder([120, 16, 5, 5], dtype=dtype, name="w3")
    weight_4 = tvm.te.placeholder([120, 84], dtype=dtype, name="w4")
    weight_5 = tvm.te.placeholder([84, 10], dtype=dtype, name="w5")

    act = tanh  # ReLU

    t1 = conv2d_nchw(img, weight_1, None, 1, 0, 1, 1)
    t2 = act(t1)
    t3 = avgpool(t2)

    t4 = conv2d_nchw(t3, weight_2, None, 1, 0, 1, 1)
    t5 = act(t4)
    t6 = avgpool(t5)

    t7 = conv2d_nchw(t6, weight_3, None, 1, 0, 1, 1)
    t8 = act(t7)
    # t9 = avgpool(t8)

    t10 = flatten_gemm(t8, weight_4)

    t11 = (gemm(t10, weight_5))

    t12 = softmax(t11)

    # t13 = sum_all(t12)
    t13 = mse_loss(t12, label)

    d1, d2, d3, d4, d5 = tvm.te.mygradient(
        t13, [weight_1, weight_2, weight_3, weight_4, weight_5])

    s = tvm.te.create_schedule([t13.op, d1.op, d2.op, d3.op, d4.op, d5.op])

    func = tvm.build(s, [
        img, label, weight_1, weight_2, weight_3, weight_4, weight_5, t13, d1,
        d2, d3, d4, d5
    ],
                     target="llvm")

    free_vars = [weight_1, weight_2, weight_3, weight_4, weight_5]
    gradients = [d1, d2, d3, d4, d5]
    params = []
    for var in free_vars:
        shape = to_tuple(var.shape)
        var_np = np.random.uniform(-100, 100, shape).astype(dtype)
        params.append(var_np)
    img_np = np.random.uniform(-10, 10, to_tuple(img.shape)).astype(dtype)
    label_np = np.random.uniform(-10, 10, to_tuple(label.shape)).astype(dtype)
    ret_np = np.zeros(to_tuple(t13.shape)).astype(dtype)
    inits = []
    for var in gradients:
        shape = to_tuple(var.shape)
        var_np = np.zeros(shape).astype(dtype)
        inits.append(var_np)

    ctx = tvm.device("llvm")

    img_tvm = tvm.nd.array(img_np, ctx)
    label_tvm = tvm.nd.array(label_np, ctx)
    ret_tvm = tvm.nd.array(ret_np, ctx)
    free_vars_tvm = [tvm.nd.array(x, ctx) for x in params]
    gradients_tvm = [tvm.nd.array(x, ctx) for x in inits]

    func(img_tvm, label_tvm, *free_vars_tvm, ret_tvm, *gradients_tvm)

    ret_torch, grad_torch = pytorch_result(img_np, label_np, params)

    print(ret_tvm)
    print(ret_torch)

    tvm.testing.assert_allclose(ret_tvm.asnumpy(),
                                ret_torch.detach().numpy(),
                                atol=1e-3,
                                rtol=1e-5)
    for i in range(len(gradients_tvm)):
        print("grad_torch", i, grad_torch[i].detach().T.numpy())
        if i > 2:
            tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(),
                                        grad_torch[i].detach().T.numpy(),
                                        atol=1e-3,
                                        rtol=1e-5)
        else:
            tvm.testing.assert_allclose(gradients_tvm[i].asnumpy(),
                                        grad_torch[i].detach().numpy(),
                                        atol=1e-3,
                                        rtol=1e-5)

    print("Compare to Pytorch success!")