Пример #1
0
def test_module_quantize_args():

    hcl.init()

    def algorithm(A, B):
        @hcl.def_([A.shape, B.shape, ()])
        def add(A, B, x):
            hcl.return_(A[x] + B[x])

        return hcl.compute(A.shape, lambda x: add(A, B, x), "C")

    A = hcl.placeholder((10, ), dtype=hcl.UInt(2))
    B = hcl.placeholder((10, ))

    s = hcl.create_scheme([A, B], algorithm)
    s.downsize([algorithm.add.A], hcl.UInt(2))
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s)

    a = np.random.randint(100, size=(10, ))
    b = np.random.randint(100, size=(10, ))
    c = np.zeros(10)
    _A = hcl.asarray(a, hcl.UInt(2))
    _B = hcl.asarray(b)
    _C = hcl.asarray(c)

    f(_A, _B, _C)

    _A = _A.asnumpy()
    _B = _B.asnumpy()
    _C = _C.asnumpy()

    for i in range(0, 10):
        assert (_C[i] == a[i] % 4 + b[i])
Пример #2
0
 def test_uint_imm_ops():
     A = hcl.placeholder((10, 10), "A", dtype=hcl.UInt(1))
     def kernel(A):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], 0), "B")
     s = hcl.create_scheme(A, kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "(unsigned int)0U)" in code
Пример #3
0
 def test_binary_ops():
     A = hcl.placeholder((8, 8), "A", dtype=hcl.Int(20))
     B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16,12))
     def kernel(A, B):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8))
     s = hcl.create_scheme([A, B], kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "(ap_fixed<32, 20>)B" in code
Пример #4
0
 def test_imm_ops():
     A = hcl.placeholder((10, 10), "A")
     def kernel(A):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x] + A[y+2][x+2], 0), "B")
     s = hcl.create_scheme(A, kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "((ap_int<33>)0)" in code
     assert "((ap_int<33>)(((ap_int<33>)A" in code
Пример #5
0
 def test_uint_int():
     A = hcl.placeholder((8, 8), "A", dtype=hcl.Fixed(20,12))
     B = hcl.placeholder((8, 8), "B", dtype=hcl.UFixed(16,12))
     def kernel(A, B):
         return hcl.compute((8, 8), lambda y, x:
             hcl.select(x < 4, A[y][x], B[y][x]), "C", dtype=hcl.Int(8))
     s = hcl.create_scheme([A, B], kernel)
     s = hcl.create_schedule_from_scheme(s)
     code = hcl.build(s, target="vhls")
     assert "ap_ufixed<20, 8>)A" in code
Пример #6
0
def build_bnn_inf_opt(batch_size=batch_size, target=target):
    hcl_ph = []
    input_image = hcl.placeholder((batch_size, 1, 16, 16), "input_image",
                                  qtype_bit)
    for name in params:
        dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float
        hcl_ph.append(hcl.placeholder(params[name].shape, name, dtype=dtype))

    # build the network
    scheme = hcl.create_scheme([input_image] + hcl_ph, build_bnn)
    s = hcl.create_schedule_from_scheme(scheme)

    def plot_dataflow_graph():
        import matplotlib.pyplot as plt
        import networkx as nx
        graph, op = s.dataflow_graph(plot=True)
        nx.draw(graph, with_labels=True)
        plt.savefig("bnn.png")

    # compute optimization
    layer_names = build_bnn.__dict__.keys()
    for layer in layer_names:
        s_layer = getattr(build_bnn, layer)
        if "bn" in layer:  # fuse conv
            s_conv = getattr(build_bnn, "conv" + layer[-1])
            s[s_conv].compute_at(s[s_layer], s_layer.axis[3])
            if layer == "bn1":
                s[s_layer].pipeline(s_layer.axis[3])  # will be refreshed
            else:
                s[s_conv].pipeline(s_conv.axis[4])
        elif "pool" in layer:
            s[s_layer].pipeline(s_layer.axis[2])
        elif "fc" in layer:
            s[s_layer].pipeline(s_layer.axis[1])
        elif "flatten" in layer:
            s[s_layer].pipeline(s_layer.axis[1])
        elif "dense_relu" in layer:
            s_fc = getattr(build_bnn, "fc1")
            s[s_fc].compute_at(s[s_layer], s_layer.axis[1])
            s[s_fc].pipeline(s_fc.axis[2])

    if isinstance(target, hcl.platform):
        s.to([input_image] + hcl_ph, target.xcel)
        s.to(build_bnn.fc2, target.host)
        target.config(compile="vivado_hls", mode="csyn")

    # memory optimization
    s.partition(input_image, hcl.Partition.Block, dim=1, factor=8)
    for ph in reversed(hcl_ph):
        if ph.name in ["b_fc2", "fc2"]:
            s.partition(ph, hcl.Partition.Complete, dim=1)
        else:
            s.partition(ph, hcl.Partition.Block, dim=1, factor=8)

    return hcl.build(s, target=target)
Пример #7
0
def test():
    hcl.init()
    A = hcl.placeholder((8, 8), "A")

    def kernel(A):
        return hcl.compute((8, 8), lambda y, x: foo(A[y, x] + A[y, x]), "C")

    s = hcl.create_scheme([A], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, "vhls")
    print(f)
Пример #8
0
def test3():
    A = hcl.placeholder((8, 8), "A", dtype=hcl.UInt(2))
    B = hcl.placeholder((8, 8), "B", dtype=hcl.UInt(2))

    def kernel(A, B):
        return hcl.compute((8, 8),
                           lambda y, x: hcl.select(x < 4, A[y, x][0], 0), "C")

    s = hcl.create_scheme([A, B], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, "vhls")
    print(f)
Пример #9
0
def test1():
    A = hcl.placeholder((8, 8), "A")
    B = hcl.placeholder((8, 8), "B", dtype=hcl.Fixed(16, 12))

    def kernel(A, B):
        return hcl.compute(
            (8, 8), lambda y, x: hcl.select(x < 4, A[y][x], B[y][x]), "C")

    s = hcl.create_scheme([A, B], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, target="vhls")
    print(f)
Пример #10
0
def test2():
    A = hcl.placeholder((8, 8), "A", dtype=hcl.UInt(1))

    def kernel(A):
        return hcl.compute((8, 8), lambda y, x: hcl.select(x < 4, A[y][x], 0),
                           "B")

    s = hcl.create_scheme([A], kernel)
    s = hcl.create_schedule_from_scheme(s)
    f = hcl.build(s, target="vhls")
    with open("select_test.cpp", "w") as outfile:
        outfile.write(f)
Пример #11
0
def build_lenet_inf(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 1, 28, 28), "input_image")
    weight_conv1 = hcl.placeholder((20, 1, 5, 5), "weight_conv1", qtype1)
    weight_conv2 = hcl.placeholder((50, 20, 5, 5), "weight_conv2", qtype1)
    weight_fc1 = hcl.placeholder((500, 800), "weight_fc1", qtype1)
    weight_fc2 = hcl.placeholder((10, 500), "weight_fc2", qtype1)
    lenet = hcl.placeholder((batch_size, 10), "lenet")
    # create a quantization scheme
    scheme = hcl.create_scheme([
        input_image, weight_conv1, weight_conv2, weight_fc1, weight_fc2, lenet
    ], build_lenet)
    # quantize the three activation layers
    scheme.quantize([build_lenet.tanh1, build_lenet.tanh2, build_lenet.tanh3],
                    qtype2)
    s = hcl.create_schedule_from_scheme(scheme)
    return hcl.build(s, target=target)
Пример #12
0
def build_bnn_inf(batch_size=batch_size, target=target):
    hcl_ph = []
    input_image = hcl.placeholder((batch_size, 1, 16, 16), "input_image",
                                  qtype_bit)
    for name in params:
        dtype = qtype_bit if ("conv" in name or "w_" in name) else qtype_float
        hcl_ph.append(hcl.placeholder(params[name].shape, name, dtype=dtype))

    # build the network
    scheme = hcl.create_scheme([input_image] + hcl_ph, build_bnn)
    s = hcl.create_schedule_from_scheme(scheme)

    # if isinstance(target,hcl.platform):
    #     s.to([input_image] + hcl_ph, target.xcel)
    #     s.to(build_bnn.fc2, target.host)
    # target.config(compile="vivado_hls", mode="csyn")

    return hcl.build(s, target=target)
Пример #13
0
def test_resize():
    def algorithm(A):
        return hcl.compute(A.shape, lambda x: A[x] + 1, "B")

    A = hcl.placeholder((10, ), dtype=hcl.UInt(32))

    scheme = hcl.create_scheme([A], algorithm)
    scheme.downsize(algorithm.B, hcl.UInt(2))
    s = hcl.create_schedule_from_scheme(scheme)
    f = hcl.build(s)

    a = np.random.randint(100, size=(10, ))
    _A = hcl.asarray(a, dtype=hcl.UInt(32))
    _B = hcl.asarray(np.zeros(10), dtype=hcl.UInt(2))

    f(_A, _B)

    _A = _A.asnumpy()
    _B = _B.asnumpy()

    for i in range(10):
        assert (_B[i] == (a[i] + 1) % 4)
Пример #14
0
def build_ultranet_hls(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 3, 160, 320),
                                  dtype=input_dtype,
                                  name="input_image")

    weight_conv1 = hcl.placeholder((16, 3, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv1")  # 3 in, 16 out
    a_batchnorm1 = hcl.placeholder((16, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm1")
    b_batchnorm1 = hcl.placeholder((16, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm1")

    weight_conv2 = hcl.placeholder((32, 16, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv2")  # 16 in, 32 out
    a_batchnorm2 = hcl.placeholder((32, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm2")
    b_batchnorm2 = hcl.placeholder((32, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm2")

    weight_conv3 = hcl.placeholder((64, 32, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv3")  # 32 in, 64 out
    a_batchnorm3 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm3")
    b_batchnorm3 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm3")

    weight_conv4 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv4")  # 64 in, 64 out
    a_batchnorm4 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm4")
    b_batchnorm4 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm4")

    weight_conv5 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv5")  # 64 in, 64 out
    a_batchnorm5 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm5")
    b_batchnorm5 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm5")

    weight_conv6 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv6")  # 64 in, 64 out
    a_batchnorm6 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm6")
    b_batchnorm6 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm6")

    weight_conv7 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv7")  # 64 in, 64 out
    a_batchnorm7 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm7")
    b_batchnorm7 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm7")

    weight_conv8 = hcl.placeholder((64, 64, 3, 3),
                                   dtype=weight_dtype,
                                   name="weight_conv8")  # 64 in, 64 out
    a_batchnorm8 = hcl.placeholder((64, ),
                                   dtype=bn_a_dtype,
                                   name="a_batchnorm8")
    b_batchnorm8 = hcl.placeholder((64, ),
                                   dtype=bn_b_dtype,
                                   name="b_batchnorm8")

    sm = hcl.create_scheme([
        input_image, weight_conv1, a_batchnorm1, b_batchnorm1, weight_conv2,
        a_batchnorm2, b_batchnorm2, weight_conv3, a_batchnorm3, b_batchnorm3,
        weight_conv4, a_batchnorm4, b_batchnorm4, weight_conv5, a_batchnorm5,
        b_batchnorm5, weight_conv6, a_batchnorm6, b_batchnorm6, weight_conv7,
        a_batchnorm7, b_batchnorm7, weight_conv8, a_batchnorm8, b_batchnorm8
    ], ultranet)

    # quantize activations
    sm.quantize(ultranet.conv1, conv_dtype)
    sm.quantize(ultranet.relu1, act_dtype)
    sm.quantize(ultranet.conv2, conv_dtype)
    sm.quantize(ultranet.relu2, act_dtype)
    sm.quantize(ultranet.conv3, conv_dtype)
    sm.quantize(ultranet.relu3, act_dtype)
    sm.quantize(ultranet.conv4, conv_dtype)
    sm.quantize(ultranet.relu4, act_dtype)
    sm.quantize(ultranet.conv5, conv_dtype)
    sm.quantize(ultranet.relu5, act_dtype)
    sm.quantize(ultranet.conv6, conv_dtype)
    sm.quantize(ultranet.relu6, act_dtype)
    sm.quantize(ultranet.conv7, conv_dtype)
    sm.quantize(ultranet.relu7, act_dtype)
    sm.quantize(ultranet.conv8, conv_dtype)
    sm.quantize(ultranet.relu8, act_dtype)

    s = hcl.create_schedule_from_scheme(sm, "main")

    # create line-buffer and window-buffer for conv layers
    for i in range(1, 1 + 8):
        conv_pad = getattr(ultranet, 'conv' + str(i) + '_pad')
        conv = getattr(ultranet, 'conv' + str(i))
        LB = s.reuse_at(conv_pad._op, s[conv], conv.axis[2],
                        f"conv{i}_line_buffer")
        WB = s.reuse_at(LB, s[conv], conv.axis[3], f"conv{i}_window_buffer")

    # conv3 = ultranet.conv3
    # xo, yo, xi, yi = s[conv3].tile(conv3.axis[2], conv3.axis[3], 4, 4)
    # s[conv3].reorder(yo, xo, yi, xi)

    # print(hcl.lower(s))
    if opt:
        # merge conv + bn + relu operators
        for i in range(1, 1 + 8):
            pad = getattr(ultranet, 'conv' + str(i) + '_pad')
            conv = getattr(ultranet, 'conv' + str(i))
            bn = getattr(ultranet, 'batch_norm' + str(i))
            relu = getattr(ultranet, 'relu' + str(i))
            # Can't merge pad with conv, a limitation of HCL.
            # s[pad].compute_at(s[conv], conv.axis[3])
            s[bn].compute_at(s[relu], relu.axis[3])
        res = ultranet.result
        relu8 = ultranet.relu8
        s[relu8].compute_at(s[res], res.axis[3])

        # pipeline all layers
        for i in range(1, 1 + 8):
            pad = getattr(ultranet, 'conv' + str(i) + '_pad')
            conv = getattr(ultranet, 'conv' + str(i))
            bn_relu = getattr(ultranet, 'relu' + str(i))
            s[pad].pipeline(pad.axis[3])
            # s[conv].pipeline(conv.axis[4])
            s[conv].pipeline(conv.axis[3])
            s[bn_relu].pipeline(bn_relu.axis[3])
            if i <= 4:
                pool_pad = getattr(ultranet, 'pool' + str(i) + '_pad')
                pool = getattr(ultranet, 'pool' + str(i))
                s[pool_pad].pipeline(pool_pad.axis[3])
                s[pool].pipeline(pool.axis[3])
        s[ultranet.result].pipeline(ultranet.result.axis[3])

        # partition weight buffers
        if partition:
            # weights need to be partitioned in dim 2, 3, 4
            # for now HeteroCL doesn't support multi-dimensional partition
            s.partition(weight_conv1, dim=2)
            s.partition(weight_conv2, dim=2)
            s.partition(weight_conv3, dim=2)
            s.partition(weight_conv4, dim=2)
            s.partition(weight_conv5, dim=2)
            s.partition(weight_conv6, dim=2)
            s.partition(weight_conv7, dim=2)
            s.partition(weight_conv8, dim=2)

        # fifo across layers
        if stream:
            '''
            Note: 
            Padding layer's pipelining has to precede other layers'
            because of a bug in HeteroCL: when there's an ifThenElse
            statement in which both branch reads/writes the same buffer, 
            HeteroCL thinks it's accessing the buffer twice, thus preventing
            pipelining. For now, ?: works, but if..else.. doesn't, 
            because the latter has two load/store nodes.
            '''
            s.to(ultranet.conv1_pad, s[ultranet.conv1], fifo_depth=128)
            s.to(ultranet.conv2_pad, s[ultranet.conv2], fifo_depth=128)
            s.to(ultranet.conv3_pad, s[ultranet.conv3], fifo_depth=128)
            s.to(ultranet.conv4_pad, s[ultranet.conv4], fifo_depth=128)
            s.to(ultranet.conv5_pad, s[ultranet.conv5], fifo_depth=128)
            s.to(ultranet.conv6_pad, s[ultranet.conv6], fifo_depth=128)
            s.to(ultranet.conv7_pad, s[ultranet.conv7], fifo_depth=128)
            s.to(ultranet.conv8_pad, s[ultranet.conv8], fifo_depth=128)

            s.to(ultranet.conv1, s[ultranet.relu1], fifo_depth=128)
            s.to(ultranet.relu1, s[ultranet.pool1_pad], fifo_depth=128)
            s.to(ultranet.pool1_pad, s[ultranet.pool1], fifo_depth=128)
            s.to(ultranet.pool1, s[ultranet.conv2_pad], fifo_depth=128)
            s.to(ultranet.conv2, s[ultranet.relu2], fifo_depth=128)
            s.to(ultranet.relu2, s[ultranet.pool2_pad], fifo_depth=128)
            s.to(ultranet.pool2_pad, s[ultranet.pool2], fifo_depth=128)
            s.to(ultranet.pool2, s[ultranet.conv3_pad], fifo_depth=128)
            s.to(ultranet.conv3, s[ultranet.relu3], fifo_depth=128)
            s.to(ultranet.relu3, s[ultranet.pool3_pad], fifo_depth=128)
            s.to(ultranet.pool3_pad, s[ultranet.pool3], fifo_depth=128)
            s.to(ultranet.pool3, s[ultranet.conv4_pad], fifo_depth=128)
            s.to(ultranet.conv4, s[ultranet.relu4], fifo_depth=128)
            s.to(ultranet.relu4, s[ultranet.pool4_pad], fifo_depth=128)
            s.to(ultranet.pool4_pad, s[ultranet.pool4], fifo_depth=128)
            s.to(ultranet.pool4, s[ultranet.conv5_pad], fifo_depth=128)
            s.to(ultranet.conv5, s[ultranet.relu5], fifo_depth=128)
            s.to(ultranet.relu5, s[ultranet.conv6_pad], fifo_depth=128)
            s.to(ultranet.conv6, s[ultranet.relu6], fifo_depth=128)
            s.to(ultranet.relu6, s[ultranet.conv7_pad], fifo_depth=128)
            s.to(ultranet.conv7, s[ultranet.relu7], fifo_depth=128)
            s.to(ultranet.relu7, s[ultranet.conv8_pad], fifo_depth=128)
            s.to(ultranet.conv8, s[ultranet.result], fifo_depth=128)

    return hcl.build(s, name="main", target=target)
Пример #15
0
def top(target=None):

    # Algorithm definition (§1)
    def knn(test_image, train_images):

        # Imperative programming and bit operations (§2)
        def popcount(num):
            out = hcl.scalar(0, "out")
            with hcl.for_(0, train_images.type.bits) as i:
                # Bit selection operation
                out.v += num[i]
            return out.v

        # This function update the candidates, i.e., `knn_mat`. Here we mutate
        # through the shape of tensor `dist`. For each `dist` value, if it is
        # smaller than the maximum candidate, we replace it.
        def update_knn(dist, knn_mat, i, j):
            max_id = hcl.scalar(0, "max_id")
            with hcl.for_(0, 3) as k:
                with hcl.if_(knn_mat[i][k] > knn_mat[i][max_id.v]):
                    max_id.v = k
            with hcl.if_(dist[i][j] < knn_mat[i][max_id.v]):
                knn_mat[i][max_id.v] = dist[i][j]

        # Main algorithm (§3)
        # Fist step: XOR (§3.1)
        diff = hcl.compute(train_images.shape,
                           lambda x, y: train_images[x][y] ^ test_image,
                           "diff")

        # Second step: popcount (§3.2)
        dist = hcl.compute(diff.shape, lambda x, y: popcount(diff[x][y]),
                           "dist")

        # Third step: initialize the candidates (§3.3)
        knn_mat_buf = hcl.compute((10, 4), lambda x, y: 50, "knn_mat_buf")

        # Fourth step: update the candidates (§3.4)
        hcl.mutate(dist.shape,
                   lambda x, y: update_knn(dist, knn_mat_buf, x, y),
                   "knn_update")
        knn_mat = hcl.compute((10, 3), lambda x, y: knn_mat_buf[x][y],
                              "knn_mat")

        # Final step: return the candidates (§3.5)
        return knn_mat

    # Inputs/Outputs definition (§4)
    # Scalars (§4.1)
    test_image = hcl.placeholder((), "test_image")
    # Tensors (§4.2)
    train_images = hcl.placeholder(data_size, "train_images")

    # Data type customization (§5.1)
    scheme = hcl.create_scheme([test_image, train_images], knn)
    scheme.downsize([knn.dist, knn.dist.out, knn.knn_mat_buf, knn.knn_mat],
                    dtype_knnmat)

    # Compute customization (§5.2)
    s = hcl.create_schedule_from_scheme(scheme)

    diff = knn.diff
    dist = knn.dist
    knn_mat_buf = knn.knn_mat_buf
    knn_update = knn.knn_update

    # Merge loop nests
    s[diff].compute_at(s[dist], dist.axis[1])
    s[dist].compute_at(s[knn_update], knn_update.axis[1])

    # Reorder loop to expose more parallelism
    s[knn_update].reorder(knn_update.axis[1], knn_update.axis[0])

    # Parallel initialization of knn mat
    s[knn_mat_buf].parallel(knn_mat_buf.axis[0])

    # Parallel outer loop and pipeline inner loop
    s[knn_update].parallel(knn_update.axis[0])
    s[knn_update].pipeline(knn_update.axis[1])

    # Parallel the innermost loop of 49 pixels
    s[dist].parallel(dist.axis[2])

    # At the end, we build the whole offloaded function.
    return hcl.build(s, target=target)
Пример #16
0
def top(target=None):
    def smith_waterman(seqA, seqB, consA, consB):
        def similarity_score(a, b):
            return hcl.select(a == b, 1, penalty)

        def find_max(A, len_):
            max_ = hcl.local(A[0], "max")
            act_ = hcl.local(0, "act")
            with hcl.for_(0, len_) as i:
                with hcl.if_(A[i] > max_[0]):
                    max_[0] = A[i]
                    act_[0] = i
            return max_[0], act_[0]

        matrix_max = hcl.local(0, "maxtrix_max")
        i_max = hcl.local(0, "i_max")
        j_max = hcl.local(0, "j_max")

        matrix = hcl.compute((lenA + 1, lenB + 1), lambda x, y: 0, "matrix")
        action = hcl.compute(matrix.shape, lambda x, y: 3, "action")

        def populate_matrix(i, j):
            trace_back = hcl.compute((4, ), lambda x: 0, "trace_back")

            with hcl.if_(hcl.and_(i != 0, j != 0)):
                trace_back[0] = matrix[i-1, j-1] + \
                                similarity_score(seqA[i-1], seqB[j-1])
                trace_back[1] = matrix[i - 1, j] + penalty
                trace_back[2] = matrix[i, j - 1] + penalty
                trace_back[3] = 0
                matrix[i, j], action[i, j] = find_max(trace_back, 4)
                with hcl.if_(matrix[i, j] > matrix_max[0]):
                    matrix_max[0] = matrix[i, j]
                    i_max[0] = i
                    j_max[0] = j

        P = hcl.mutate((lenA + 1, lenB + 1),
                       lambda i, j: populate_matrix(i, j))

        def align(curr_i, curr_j, next_i, next_j):
            outA = hcl.local(0, "a")
            outB = hcl.local(0, "b")

            with hcl.if_(next_i[0] == curr_i[0]):
                outA[0] = 0
            with hcl.else_():
                outA[0] = seqA[curr_i[0] - 1]

            with hcl.if_(next_j[0] == curr_j[0]):
                outB[0] = 0
            with hcl.else_():
                outB[0] = seqB[curr_j[0] - 1]
            return outA[0], outB[0]

        def get_next(action, i, j):
            act_ = hcl.local(action[i][j], "act")
            next_i = hcl.local(0, "next_i")
            next_j = hcl.local(0, "next_j")
            with hcl.if_(act_[0] == 0):
                next_i[0] = i - 1
                next_j[0] = j - 1
            with hcl.elif_(act_[0] == 1):
                next_i[0] = i - 1
                next_j[0] = j
            with hcl.elif_(act_[0] == 2):
                next_i[0] = i
                next_j[0] = j - 1
            with hcl.else_():
                next_i[0] = i
                next_j[0] = j
            return next_i[0], next_j[0]

        with hcl.Stage("T"):
            curr_i = hcl.local(i_max[0], "curr_i")
            curr_j = hcl.local(j_max[0], "curr_j")
            next_i = hcl.local(0, "next_i")
            next_j = hcl.local(0, "next_j")
            next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0])
            tick = hcl.local(0, "tick")

            with hcl.while_(
                    hcl.or_(curr_i[0] != next_i[0], curr_j[0] != next_j[0])):
                consA[tick[0]], consB[tick[0]] = \
                    align(curr_i, curr_j, next_i, next_j)
                curr_i[0], curr_j[0] = next_i[0], next_j[0]
                next_i[0], next_j[0] = get_next(action, curr_i[0], curr_j[0])
                tick[0] += 1

    def batch_sw(seqAs, seqBs, outAs, outBs):
        hcl.mutate(
            (num, ),
            lambda t: smith_waterman(seqAs[t], seqBs[t], outAs[t], outBs[t]),
            "B")

    seqAs = hcl.placeholder((num, lenA), "seqAs", dtype)
    seqBs = hcl.placeholder((
        num,
        lenB,
    ), "seqBs", dtype)
    outAs = hcl.placeholder((num, lenA + lenB), "outAs", dtype)
    outBs = hcl.placeholder((num, lenA + lenB), "outBs", dtype)

    # seqAs = hcl.placeholder((num, lenA), "seqAs")
    # seqBs = hcl.placeholder((num, lenB,), "seqBs")
    # outAs = hcl.placeholder((num, lenA+lenB), "outAs")
    # outBs = hcl.placeholder((num, lenA+lenB), "outBs")

    scheme = hcl.create_scheme([seqAs, seqBs, outAs, outBs], batch_sw)
    scheme.downsize([batch_sw.B.matrix, batch_sw.B.action], mtype)
    s = hcl.create_schedule_from_scheme(scheme)
    o, p = s[batch_sw.B].split(batch_sw.B.axis[0], factor=32)
    s[batch_sw.B].pipeline(o)
    # s[batch_sw.B].parallel(p)
    s[batch_sw.B].unroll(p)
    return hcl.build(s, target=target)
Пример #17
0
# We can also apply data type customization to our defined modules. There are
# two ways to do that. First, you can specify the data types directly in the
# module decorator. Second, you can use the ``quantize`` and ``downsize`` APIs.
# Let's show how we can downsize the first example.

A = hcl.placeholder((10,), dtype=hcl.UInt(4))
B = hcl.placeholder((10,), dtype=hcl.UInt(4))
C = hcl.placeholder((10,), dtype=hcl.UInt(4))
D = hcl.placeholder((10,), dtype=hcl.UInt(4))

s = hcl.create_scheme([A, B, C, D], maximum)
# Downsize the input arguments and also the return value
s.downsize([maximum.find_max.A, maximum.find_max.B, maximum.find_max], hcl.UInt(4))
# We also need to downsize the intermediate results
s.downsize([maximum.max_1, maximum.max_2], hcl.UInt(4))
s = hcl.create_schedule_from_scheme(s)
f = hcl.build(s)

##############################################################################
# Let's run it.

hcl_A = hcl.asarray(a, hcl.UInt(4))
hcl_B = hcl.asarray(b, hcl.UInt(4))
hcl_C = hcl.asarray(c, hcl.UInt(4))
hcl_D = hcl.asarray(d, hcl.UInt(4))
hcl_O = hcl.asarray(o)

f(hcl_A, hcl_B, hcl_C, hcl_D, hcl_O)

print("Downsized output tensor:")
print(hcl_O)
Пример #18
0
# -----------------------------------
# create fixed point scheme
# -----------------------------------
from uptune import autotune, feedback
take_log, type_log = list(), list()
for index in range(len(name_pool)):
    primitive = eval('build_resnet.' + name_pool[index])
    taken = autotune(1, (0, 1))
    fraction = autotune(18, (0, 16))
    bitwidth = 32
    datatype = hcl.Fixed(bitwidth, fraction)
    take_log.append(taken)
    type_log.append(datatype)
    if taken:
        scheme.quantize(primitive, datatype)
s = hcl.create_schedule_from_scheme(scheme)
f = hcl.build(s, target="llvm")

# ---------------------------------
# evaluation of run through synthesis
# ---------------------------------
# load validation data from imagenet
# jitter_param = 0.4
# lighting_param = 0.1
# num_gpur = 1
# mean_rgb = [123.68, 116.779, 103.939]
# std_rgb = [58.393, 57.12, 57.375]
# ctx = [mx.cpu(0)]
#
# val_data = mx.io.ImageRecordIter(
#     path_imgrec         = '/work/zhang-x2/common/datasets/imagenet-mxnet/val.rec',
Пример #19
0
def build_ultranet_inf(batch_size=batch_size, target=None):
    # set up input/output placeholders
    input_image = hcl.placeholder((batch_size, 3, 160, 320), dtype=input_dtype, name="input_image")

    weight_conv1 = hcl.placeholder((16, 3, 3, 3), dtype=weight_dtype, name="weight_conv1") # 3 in, 16 out
    a_batchnorm1 = hcl.placeholder((16,), dtype=bn_a_dtype, name="a_batchnorm1")
    b_batchnorm1 = hcl.placeholder((16,), dtype=bn_b_dtype, name="b_batchnorm1")

    weight_conv2 = hcl.placeholder((32, 16, 3, 3), dtype=weight_dtype, name="weight_conv2") # 16 in, 32 out
    a_batchnorm2 = hcl.placeholder((32,), dtype=bn_a_dtype, name="a_batchnorm2")
    b_batchnorm2 = hcl.placeholder((32,), dtype=bn_b_dtype, name="b_batchnorm2")

    weight_conv3 = hcl.placeholder((64, 32, 3, 3), dtype=weight_dtype, name="weight_conv3") # 32 in, 64 out
    a_batchnorm3 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm3")
    b_batchnorm3 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm3")

    weight_conv4 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv4") # 64 in, 64 out
    a_batchnorm4 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm4")
    b_batchnorm4 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm4")

    weight_conv5 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv5") # 64 in, 64 out
    a_batchnorm5 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm5")
    b_batchnorm5 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm5")

    weight_conv6 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv6") # 64 in, 64 out
    a_batchnorm6 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm6")
    b_batchnorm6 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm6")

    weight_conv7 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv7") # 64 in, 64 out
    a_batchnorm7 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm7")
    b_batchnorm7 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm7")

    weight_conv8 = hcl.placeholder((64, 64, 3, 3), dtype=weight_dtype, name="weight_conv8") # 64 in, 64 out
    a_batchnorm8 = hcl.placeholder((64,), dtype=bn_a_dtype, name="a_batchnorm8")
    b_batchnorm8 = hcl.placeholder((64,), dtype=bn_b_dtype, name="b_batchnorm8")

    sm = hcl.create_scheme(
        [input_image, 
        weight_conv1, a_batchnorm1, b_batchnorm1, 
        weight_conv2, a_batchnorm2, b_batchnorm2, 
        weight_conv3, a_batchnorm3, b_batchnorm3, 
        weight_conv4, a_batchnorm4, b_batchnorm4, 
        weight_conv5, a_batchnorm5, b_batchnorm5, 
        weight_conv6, a_batchnorm6, b_batchnorm6, 
        weight_conv7, a_batchnorm7, b_batchnorm7, 
        weight_conv8, a_batchnorm8, b_batchnorm8], 
        ultranet
    )

    # quantize activations
    sm.quantize(ultranet.conv1, conv_dtype)
    sm.quantize(ultranet.relu1, act_dtype)
    sm.quantize(ultranet.conv2, conv_dtype)
    sm.quantize(ultranet.relu2, act_dtype)
    sm.quantize(ultranet.conv3, conv_dtype)
    sm.quantize(ultranet.relu3, act_dtype)
    sm.quantize(ultranet.conv4, conv_dtype)
    sm.quantize(ultranet.relu4, act_dtype)
    sm.quantize(ultranet.conv5, conv_dtype)
    sm.quantize(ultranet.relu5, act_dtype)
    sm.quantize(ultranet.conv6, conv_dtype)
    sm.quantize(ultranet.relu6, act_dtype)
    sm.quantize(ultranet.conv7, conv_dtype)
    sm.quantize(ultranet.relu7, act_dtype)
    sm.quantize(ultranet.conv8, conv_dtype)
    sm.quantize(ultranet.relu8, act_dtype)
    
    s = hcl.create_schedule_from_scheme(sm, "main")
    return hcl.build(s, target=target)