Пример #1
0
def pixelcnn_autotvm(N, H, W, CO, CI, KH, KW, mask_type, bias, stride, padding,
                     dilation):
    # assert N == 1, "Only consider batch_size = 1 in this template"

    # data = tvm.te.placeholder((N, CI, H, W), name='data')
    # kernel = tvm.te.placeholder((CO, CI, KH, KW), name='kernel')
    # conv = topi.nn.conv2d_nchw(data, kernel, stride, padding, dilation=dilation, out_dtype='float32')
    convop, tensors = pixelcnn(N,
                               H,
                               W,
                               CI,
                               CO,
                               KH,
                               KW,
                               mask_type,
                               bias=bias,
                               stride=stride,
                               padding=padding,
                               dilation=dilation)
    s = tvm.te.create_schedule(convop)

    cfg = autotvm.get_config()

    ##### space definition begin #####
    schedule_direct_cuda(cfg, s, *tensors[-2:])

    return s, [*tensors]
Пример #2
0
def tvm_PixelCNN_cpu(B,
                     H,
                     W,
                     C,
                     out_C,
                     kernel_height,
                     kernel_width,
                     mask_type,
                     bias,
                     dilation,
                     stride,
                     padding,
                     number=10,
                     dev=0):
    Input = torch.rand([B, H, W, C], dtype=torch.float32)
    Kernel = torch.zeros([out_C, C, kernel_height, kernel_width],
                         dtype=torch.float32)

    s, bufs = pixelcnn(B,
                       H,
                       W,
                       C,
                       out_C,
                       kernel_height,
                       kernel_width,
                       mask_type,
                       bias,
                       dilation=dilation,
                       stride=stride,
                       padding=padding)
    ctx = tvm.cpu(dev_id=dev)
    s = tvm.te.create_schedule(s)
    f = tvm.build(s, bufs, "llvm")

    im = tvm.nd.array(Input.numpy().astype(np.float32), ctx)
    fi = tvm.nd.array(Kernel.numpy().astype(np.float32), ctx)

    in_height = H
    in_width = W
    out_height = (H + 2 * padding - dilation *
                  (kernel_height - 1) - 1) // stride + 1
    out_width = (W + 2 * padding - dilation *
                 (kernel_width - 1) - 1) // stride + 1
    output_shape = (B, out_height, out_width, out_C)
    un = tvm.nd.array(np.zeros(output_shape).astype(np.float32), ctx)

    start_time = time.time()
    for i in range(number):
        f(im, fi, un)
    end_time = time.time()
    return (end_time - start_time) * 1e3 / number