Exemplo n.º 1
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    size = strm.constant('size')
    sum, sum_valid = strm.ReduceAddValid(a, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_constant('size', size)
        strm.set_sink('sum', ram_b, offset, 1)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            sum += a
        ram_b.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, 1)

        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 2
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def matmul():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)
            matrix_size = saxi.read(1)
            a_offset = saxi.read(2)
            b_offset = saxi.read(3)
            c_offset = saxi.read(4)
            comp(matrix_size, a_offset, b_offset, c_offset)
            saxi.write_flag(5, 1, resetvalue=0)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            maxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                maxi.dma_read(ram_b, 0, b_addr, matrix_size)

                sum = 0
                for k in range(matrix_size):
                    x = ram_a.read(k)
                    y = ram_b.read(k)
                    sum += x * y
                ram_c.write(j, sum)

                b_addr += matrix_size * (datawidth // 8)

            maxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start()

    return m
Exemplo n.º 3
0
def mkMemcpy():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')
    led = m.OutputReg('led', 8, initval=0)

    datawidth = 32
    addrwidth = 10

    ram_words = (2**addrwidth) // (datawidth // 8)

    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth)
    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8)

    def memcpy():
        while True:
            saxi.wait_flag(0, value=1, resetvalue=0)

            copy_bytes = saxi.read(1)
            src_offset = saxi.read(2)
            dst_offset = saxi.read(3)

            copy(copy_bytes, src_offset, dst_offset)

            saxi.write_flag(4, 1, resetvalue=0)

    def copy(copy_bytes, src_offset, dst_offset):
        rest_words = copy_bytes // (datawidth // 8)
        src_global_addr = src_offset
        dst_global_addr = dst_offset
        local_addr = 0

        while rest_words > 0:
            if rest_words > ram_words:
                dma_size = ram_words
            else:
                dma_size = rest_words

            maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size)
            maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size)

            src_global_addr += dma_size * (datawidth // 8)
            dst_global_addr += dma_size * (datawidth // 8)
            rest_words -= dma_size

    th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy)
    fsm = th.start()

    return m
Exemplo n.º 4
0
def mkLed(word_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m,
                        'myram',
                        clk,
                        rst,
                        word_datawidth,
                        addrwidth,
                        numports=2)

    axi_in = vthread.AXIStreamInFifo(m,
                                     'axi_in',
                                     clk,
                                     rst,
                                     datawidth,
                                     with_last=True,
                                     noio=True)
    axi_out = vthread.AXIStreamOutFifo(m,
                                       'axi_out',
                                       clk,
                                       rst,
                                       datawidth,
                                       with_last=True,
                                       noio=True)

    maxi_in = vthread.AXIM_for_AXIStreamIn(axi_in, 'maxi_in')
    maxi_out = vthread.AXIM_for_AXIStreamOut(axi_out, 'maxi_out')

    fifo_addrwidth = 8
    fifo_in = vthread.FIFO(m, 'fifo_in', clk, rst, word_datawidth,
                           fifo_addrwidth)
    fifo_out = vthread.FIFO(m, 'fifo_out', clk, rst, word_datawidth,
                            fifo_addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size -
                                      (word_datawidth // 8))
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    def body(size, offset):
        # write a test vector
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size, port=1)

        # AXI-stream read -> FIFO -> FIFO -> AXI-stream write
        maxi_in.dma_read_async(gaddr, size * (word_datawidth // datawidth))
        axi_in.write_fifo(fifo_in, size)

        for i in range(size):
            va = fifo_in.deq()
            fifo_out.enq(va)

        out_gaddr = (size + size) * (word_datawidth // 8) + offset
        maxi_out.dma_write_async(out_gaddr,
                                 size * (word_datawidth // datawidth))
        axi_out.read_fifo(fifo_out, size)

        # check
        myaxi.dma_read(myram, 0, gaddr, size, port=1)
        myaxi.dma_read(myram, size, out_gaddr, size, port=1)

        for i in range(size):
            v0 = myram.read(i)
            v1 = myram.read(i + size)
            if vthread.verilog.NotEql(v0, v1):
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(17)

    return m
Exemplo n.º 5
0
def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size -
                                      memory_datawidth // 8)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(17)

    return m
Exemplo n.º 6
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    cnt1 = strm.Counter()
    cnt2 = strm.Counter(initval=1)
    cnt3 = strm.Counter(initval=2, size=5)
    cnt4 = strm.Counter(initval=3, interval=3)
    cnt5 = strm.Counter(initval=4, interval=3, size=7)
    cnt6 = strm.Counter(initval=4, step=2, interval=2)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        cnt = 0
        for i in range(size):
            cnt1 = cnt
            cnt2 = 1 + cnt
            cnt3 = (cnt + 2) % 5
            cnt4 = (cnt // 3) + 3
            cnt5 = ((cnt // 3) + 4) % 7
            cnt6 = (cnt // 2) * 2 + 4
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6
            ram_c.write(i + offset, sum)
            cnt += 1

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 7
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    size = strm.constant('size')
    cnt, valid = strm.CounterValid(size)
    a = strm.source('a')
    b = strm.source('b')
    cntval = strm.Mux(valid, 1000, cnt)
    c = a + b + cntval
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_constant('size', size // 2)
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        cnt = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b + cnt
            cnt += 1
            if cnt == 1001:
                cnt = 0
            if cnt == size // 2 - 1:
                cnt = 1000
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 8
0
def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(timer.inc())

    datawidth = 32
    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)

    def strm_madd(strm, size, waddr):
        a = strm.read(ram_a, 0, size)
        b = strm.read(ram_b, 0, size)
        sum, valid = strm.RegionAdd(a * b, size)
        strm.write(ram_c, waddr, 1, sum, when=valid)

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                stream.run(matrix_size, j)
                stream.join()

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print("OK")
        else:
            print("NG")

    stream = vthread.Stream(m, 'strm_madd', clk, rst, strm_madd)
    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, 0, 1024, 2048)

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.constant('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm_v += 0
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.constant('const')
    sub = strm.substream(macstrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_constant('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + x
    strm.sink(z, 'z', when=v, when_name='v')

    def comp_stream_macstrm(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('const', reduce_size)
        macstrm.set_sink('c', ram_c, offset, size)
        macstrm.set_sink('v', ram_d, offset, size)
        macstrm.run()
        macstrm.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_constant('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += x * y
            val = sum + x
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m
Exemplo n.º 10
0
def mkLed(memory_datawidth=32):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numbanks = 4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)
    myram = vthread.MultibankRAM(m,
                                 'myram',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)

    all_ok = m.TmpReg(initval=0)

    block_size = 3
    array_len = 32
    array_size = (array_len + array_len) * 4 * numbanks

    def blink(size):
        all_ok.value = True

        print('# start')
        # Test for 4KB boundary check
        #offset = 1024 * 16 + (myaxi.boundary_size - 4)
        offset = 1024 * 16
        body(size, offset)
        print('# end')

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        count = 0
        blk_offset = 0
        bias = 0
        done = False
        while count < size:
            for bank in range(numbanks):
                for i in range(block_size):
                    wdata = bias + i + 512
                    myram.write_bank(bank, blk_offset + i, wdata)
                    count += 1
                    if count >= size:
                        done = True
                        break
                if done:
                    break
                bias += block_size
            blk_offset += block_size

        laddr = 0
        gaddr = offset
        myram.dma_write_block(myaxi, laddr, gaddr, size, block_size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        count = 0
        blk_offset = 0
        bias = 0
        done = False
        while count < size:
            for bank in range(numbanks):
                for i in range(block_size):
                    wdata = bias + i + 1024
                    myram.write_bank(bank, blk_offset + i, wdata)
                    count += 1
                    if count >= size:
                        done = True
                        break
                if done:
                    break
                bias += block_size
            blk_offset += block_size

        laddr = 0
        gaddr = array_size + offset
        myram.dma_write_block(myaxi, laddr, gaddr, size, block_size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myram.dma_read_block(myaxi, laddr, gaddr, size, block_size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        count = 0
        blk_offset = 0
        bias = 0
        done = False
        while count < size:
            for bank in range(numbanks):
                for i in range(block_size):
                    rdata = myram.read_bank(bank, blk_offset + i)
                    exp = bias + i + 512
                    if vthread.verilog.NotEql(rdata, exp):
                        print('rdata[%d:%d] = %d:%d' % (bank, i, rdata, exp))
                        all_ok.value = False
                    count += 1
                    if count >= size:
                        done = True
                        break
                if done:
                    break
                bias += block_size
            blk_offset += block_size

        # read
        laddr = 0
        gaddr = array_size + offset
        myram.dma_read_block(myaxi, laddr, gaddr, size, block_size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        count = 0
        blk_offset = 0
        bias = 0
        done = False
        while count < size:
            for bank in range(numbanks):
                for i in range(block_size):
                    rdata = myram.read_bank(bank, blk_offset + i)
                    exp = bias + i + 1024
                    if vthread.verilog.NotEql(rdata, exp):
                        print('rdata[%d:%d] = %d:%d' % (bank, i, rdata, exp))
                        all_ok.value = False
                    count += 1
                    if count >= size:
                        done = True
                        break
                if done:
                    break
                bias += block_size
            blk_offset += block_size

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(array_len)

    return m
Exemplo n.º 11
0
def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(
        timer.inc()
    )

    datawidth = 64
    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth //
                         (data_wordsize // axi_wordsize))

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)
        vthread.finish()

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                sum = 0
                for k in range(matrix_size):
                    x = ram_a.read(k)
                    y = ram_b.read(k)
                    sum += x * y
                ram_c.write(j, sum)

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, a_offset, b_offset, c_offset)

    return m
Exemplo n.º 12
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    x = strm.Counter(initval=0, size=4)
    y = strm.Counter(initval=0, size=4, enable=x == 3)
    z = strm.Counter(initval=0, size=4, enable=y == 3)
    c = a + b - a - b + z + y + x
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        x = 0
        y = 0
        z = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b - a - b + z + y + x
            ram_c.write(i + offset, sum)
            if y == 3:
                z += 1
                if z == 4:
                    z = 0
            if x == 3:
                y += 1
                if y == 4:
                    y = 0
            x += 1
            if x == 4:
                x = 0

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth, numports=2)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    numbins = strm.constant('numbins')
    offset = strm.constant('offset')
    a = strm.source('a')
    a = strm.Mux(a < 0, 0, a)
    a.latency = 0
    a = strm.Mux(a >= numbins, numbins - 1, a)
    a.latency = 0

    raddr = a + offset
    raddrs = (raddr, )
    waddr = raddr
    op = strm.Add
    op_args = (1, )
    strm.read_modify_write_RAM('ext', raddrs, waddr, op, op_args)

    def comp_stream(numbins, size, offset):
        for i in range(numbins):
            ram_b.write(i + offset, 0)

        strm.set_constant('numbins', numbins)
        strm.set_constant('offset', offset)
        strm.set_source('a', ram_a, offset, size)
        strm.set_read_modify_write_RAM('ext',
                                       ram_b,
                                       read_ports=(0, ),
                                       write_port=1)
        strm.run()
        strm.join()

    def comp_sequential(numbins, size, offset):
        for i in range(numbins):
            ram_b.write(i + offset, 0)

        for i in range(size):
            a = ram_a.read(i + offset)
            a = 0 if a < 0 else a
            a = numbins - 1 if a >= numbins else a
            current = ram_b.read(a + offset)
            updated = current + 1
            ram_b.write(a + offset, updated)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        numbins = 8

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        comp_stream(numbins, size, offset)
        myaxi.dma_write(ram_b, offset, 1024, numbins)

        # sequential
        offset = size * 4
        myaxi.dma_read(ram_a, offset, 0, size * 2)
        comp_sequential(numbins, size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, numbins)

        # verification
        check(numbins, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 14
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myaxi.disable_write()
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    i = strm.Counter()
    term = a == 270
    strm.sink(i, 'i')
    strm.terminate(term)

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_sink_immediate('i', 0)
        strm.run()
        strm.join()
        i = strm.read_sink('i')
        return i

    def comp_sequential(size, offset):
        for i in range(size):
            a = ram_a.read(i + offset)
            if a == 270:
                return i
        return size - 1

    def check(size_stream, size_seq):
        all_ok = True
        if vthread.verilog.NotEql(size_stream, size_seq):
            all_ok = False
        print(size_stream, size_seq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 1024, size)
        st_i = comp_stream(size, offset)
        st_i = comp_stream(size, offset)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 1024, size)
        sq_i = comp_sequential(size, offset)

        # verification
        check(st_i, sq_i)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 15
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            offset = i * 1024 * 16
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != i + 100:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != i + 1000:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(16)

    return m
Exemplo n.º 16
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    reduce_size = 4

    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth)

    macstrm = vthread.Stream(m, 'macstream', clk, rst)
    macstrm_a = macstrm.source('a')
    macstrm_b = macstrm.source('b')
    macstrm_const = macstrm.parameter('const')
    macstrm_mul = macstrm_a * macstrm_b
    macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const)
    macstrm.sink(macstrm_c, 'c')
    macstrm.sink(macstrm_v, 'v')

    macstrm2 = vthread.Stream(m, 'macstream2', clk, rst)
    macstrm2_a = macstrm2.source('a')
    macstrm2_b = macstrm2.source('b')
    macstrm2_const = macstrm2.parameter('const')
    macstrm2_a = macstrm2_a + 1
    macstrm2_a = macstrm2_a - 1
    macstrm2_b = macstrm2_b * 1
    macsub = macstrm2.substream(macstrm)
    macsub.to_source('a', macstrm2_a)
    macsub.to_source('b', macstrm2_b)
    macsub.to_parameter('const', macstrm2_const)
    macstrm2_c = macsub.from_sink('c')
    macstrm2_v = macsub.from_sink('v')
    macstrm2.sink(macstrm2_c, 'c')
    macstrm2.sink(macstrm2_v, 'v')

    neststrm = vthread.Stream(m, 'neststream', clk, rst)
    neststrm_a = neststrm.source('a')
    neststrm_b = neststrm.source('b')
    neststrm_const = neststrm.parameter('const')
    neststrm_a += 1
    neststrm_a += 0
    neststrm_b += 1
    macsub = neststrm.substream(macstrm2)
    macsub.to_source('a', neststrm_a)
    macsub.to_source('b', neststrm_b)
    macsub.to_parameter('const', neststrm_const)
    neststrm_c = macsub.from_sink('c')
    neststrm_c += neststrm_a
    neststrm_c += 0
    neststrm_v = macsub.from_sink('v')
    neststrm.sink(neststrm_c, 'c')
    neststrm.sink(neststrm_v, 'v')

    strm = vthread.Stream(m, 'mystream', clk, rst)
    x = strm.source('x')
    y = strm.source('y')
    const = strm.parameter('const')
    sub = strm.substream(neststrm)
    sub.to_source('a', x)
    sub.to_source('b', y)
    sub.to_parameter('const', const)
    z = sub.from_sink('c')
    v = sub.from_sink('v')
    z = z + y
    strm.sink(z, 'z', when=v, when_name='v')

    all_ok = m.TmpReg(initval=0)

    def comp_stream_macstrm(size, offset):
        macstrm2.set_source('a', ram_a, offset, size)
        macstrm2.set_source('b', ram_b, offset, size)
        macstrm2.set_parameter('const', reduce_size)
        macstrm2.set_sink('c', ram_c, offset, size)
        macstrm2.set_sink('v', ram_d, offset, size)
        macstrm2.run()
        macstrm2.join()

    def comp_stream_mystrm(size, offset):
        strm.set_source('x', ram_a, offset, size)
        strm.set_source('y', ram_b, offset, size)
        strm.set_parameter('const', reduce_size)
        strm.set_sink('z', ram_c, offset, size // reduce_size)
        strm.run()
        strm.join()

    def comp_sequential_macstrm(size, offset):
        sum = 0
        count = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum += a * b
            count += 1
            ram_c.write(i + offset, sum)
            ram_d.write(i + offset, count == (reduce_size - 1))
            if count == reduce_size:
                sum = 0
                count = 0

    def comp_sequential_mystrm(size, offset):
        sum = 0
        count = 0
        write_offset = offset
        for i in range(size):
            x = ram_a.read(i + offset)
            y = ram_b.read(i + offset)
            sum += (x + 1) * (y + 1)
            val = sum + (x + 1) + y
            count += 1
            if count == reduce_size:
                ram_c.write(write_offset, val)
                write_offset += 1
                sum = 0
                count = 0

    def check(size, offset_stream, offset_seq):
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok.value = False
                print(i, st, sq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        all_ok.value = True

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_macstrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# macstream')
        check(size, 0, offset)

        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size // reduce_size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential_mystrm(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size)

        # verification
        print('# mystream')
        check(size // reduce_size, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(16)

    return m
Exemplo n.º 17
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        all_ok.value = True

        # Test for 4KB boundary check
        offset = myaxi.boundary_size - 4
        body(size, offset)

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset + myaxi.boundary_size
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = offset + myaxi.boundary_size
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(256 + 256 + 64)

    return m
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth)

    all_ok = m.TmpReg(initval=0)

    def blink(size):
        # wait start
        saxi.wait_flag(0, value=1, resetvalue=0)
        # reset done
        saxi.write(1, 0)

        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size - 4)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('# verify (local): PASSED')
        else:
            print('# verify (local): FAILED')

        # result
        saxi.write(2, all_ok)

        # done
        saxi.write_flag(1, 1, resetvalue=0)

    def body(size, offset):
        # write
        for i in range(size):
            wdata = i + 100
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for i in range(size):
            wdata = i + 1000
            myram.write(i, wdata)

        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 100):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

        # read
        laddr = 0
        gaddr = (size + size) * 4 + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if vthread.verilog.NotEql(rdata, i + 1000):
                print('rdata[%d] = %d' % (i, rdata))
                all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(16)

    return m
Exemplo n.º 19
0
def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numbanks = 4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)
    ram_a = vthread.MultibankRAM(m,
                                 'ram_a',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)
    ram_b = vthread.MultibankRAM(m,
                                 'ram_b',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)
    ram_c = vthread.MultibankRAM(m,
                                 'ram_c',
                                 clk,
                                 rst,
                                 datawidth,
                                 addrwidth,
                                 numbanks=numbanks)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        dma_size = size
        comp_size = size * numbanks

        dma_offset = 0
        comp_offset = 0
        myaxi.dma_read(ram_a, dma_offset, 0, dma_size)
        myaxi.dma_read(ram_b, dma_offset, 0, dma_size)
        comp_stream(size, comp_offset)
        myaxi.dma_write(ram_c, dma_offset, 1024, dma_size)

        dma_offset = size
        comp_offset = comp_size
        myaxi.dma_read(ram_a, dma_offset, 0, dma_size)
        myaxi.dma_read(ram_b, dma_offset, 0, dma_size)
        comp_sequential(size, comp_offset)
        myaxi.dma_write(ram_c, dma_offset, 1024 * 2, dma_size)

        check(comp_size, 0, comp_offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 20
0
def mkLed(matrix_size=16):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    seq = Seq(m, 'seq', clk, rst)
    timer = m.Reg('timer', 32, initval=0)
    seq(
        timer.inc()
    )

    addrwidth = 10
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)

    strm = vthread.Stream(m, 'strm_madd', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    size = strm.constant('size')
    sum, sum_valid = strm.ReduceAddValid(a * b, size)
    strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def strm_madd(size, waddr):
        strm.set_source('a', ram_a, 0, size)
        strm.set_source('b', ram_b, 0, size)
        strm.set_constant('size', size)
        strm.set_sink('sum', ram_c, waddr, 1)
        strm.run()
        strm.join()

    def matmul(matrix_size, a_offset, b_offset, c_offset):
        start_time = timer
        comp(matrix_size, a_offset, b_offset, c_offset)
        end_time = timer
        time = end_time - start_time
        print("Time (cycles): %d" % time)
        check(matrix_size, a_offset, b_offset, c_offset)
        vthread.finish()

    def comp(matrix_size, a_offset, b_offset, c_offset):
        a_addr, c_addr = a_offset, c_offset

        for i in range(matrix_size):
            myaxi.dma_read(ram_a, 0, a_addr, matrix_size)

            b_addr = b_offset
            for j in range(matrix_size):
                myaxi.dma_read(ram_b, 0, b_addr, matrix_size)

                strm_madd(matrix_size, j)

                b_addr += matrix_size * (datawidth // 8)

            myaxi.dma_write(ram_c, 0, c_addr, matrix_size)
            a_addr += matrix_size * (datawidth // 8)
            c_addr += matrix_size * (datawidth // 8)

    def check(matrix_size, a_offset, b_offset, c_offset):
        all_ok = True
        c_addr = c_offset
        for i in range(matrix_size):
            myaxi.dma_read(ram_c, 0, c_addr, matrix_size)
            for j in range(matrix_size):
                v = ram_c.read(j)
                if i == j and vthread.verilog.NotEql(v, (i + 1) * 2):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
                if i != j and vthread.verilog.NotEql(v, 0):
                    all_ok = False
                    print("NG [%d,%d] = %d" % (i, j, v))
            c_addr += matrix_size * (datawidth // 8)

        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    th = vthread.Thread(m, 'th_matmul', clk, rst, matmul)
    fsm = th.start(matrix_size, a_offset, b_offset, c_offset)

    return m
Exemplo n.º 21
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    v = strm.Ands(c > 140, c < 150)
    cnt = strm.ReduceAdd(v)
    strm.sink(c, 'c', when=v, when_name='v')
    strm.sink(cnt, 'cnt')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, 0)  # max_size
        strm.set_sink_immediate('cnt', 0)  # max_size
        strm.run()
        strm.join()
        cnt = strm.read_sink('cnt')
        print('# num of counted: %d' % cnt)
        return cnt

    def comp_sequential(size, offset):
        sum = 0
        addr = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            c = a + b
            if c > 140 and c < 150:
                ram_c.write(addr + offset, c)
                addr += 1
        print('# num of counted: %d' % addr)
        return addr

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, cnt)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        cnt = comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, cnt)

        # verification
        myaxi.dma_read(ram_c, 0, 1024, cnt)
        myaxi.dma_read(ram_c, offset, 1024 * 2, cnt)
        check(cnt, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 22
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth)

    def blink():
        size = 256 * 2
        offset = 1024 * 4

        # write
        for i in range(size):
            wdata = i
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset
        myram.dma_write(myaxi, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # overwrite
        for i in range(size):
            wdata = 128
            myram.write(i, wdata)

        laddr = 0
        gaddr = offset + size * 4
        myram.dma_write(myaxi, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        all_ok = True

        laddr = 0
        gaddr = offset
        myram.dma_read(myaxi, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != i:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok = False

        # read
        laddr = 0
        gaddr = offset + size * 4
        myram.dma_read(myaxi, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for i in range(size):
            rdata = myram.read(i)
            if rdata != 128:
                print('rdata[%d] = %d' % (i, rdata))
                all_ok = False

        if all_ok:
            print('ALL OK')

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start()

    return m
Exemplo n.º 23
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    mulstrm = vthread.Stream(m, 'mul_stream', clk, rst)
    mulx = mulstrm.source('x')
    muly = mulstrm.source('y')
    mulz = mulx * muly
    mulstrm.sink(mulz, 'z')

    macstrm = vthread.Stream(m, 'mac_stream', clk, rst)
    a = macstrm.source('a')
    b = macstrm.source('b')
    a = a + 1
    b = b + 1
    sub = macstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = macstrm.constant('size')
    sum, sum_valid = macstrm.ReduceAddValid(c, size)
    macstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    actstrm = vthread.Stream(m, 'act_stream', clk, rst)
    a = actstrm.source('a')
    b = actstrm.source('b')
    a = a + 1
    b = b + 1
    a = a + 1
    b = b + 1
    sub = actstrm.substream(mulstrm)
    sub.to_source('x', a)
    sub.to_source('y', b)
    c = sub.from_sink('z')
    size = actstrm.constant('size')
    sum, sum_valid = actstrm.ReduceAddValid(c, size)
    sum = actstrm.Mux(sum > 0, sum, 0)
    actstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid')

    def comp_stream_mul(size, offset):
        mulstrm.set_source('x', ram_a, offset, size)
        mulstrm.set_source('y', ram_b, offset, size)
        mulstrm.set_sink('z', ram_c, offset, size)
        mulstrm.run()
        mulstrm.join()

    def comp_stream_mac(size, offset):
        macstrm.set_source('a', ram_a, offset, size)
        macstrm.set_source('b', ram_b, offset, size)
        macstrm.set_constant('size', size)
        macstrm.set_sink('sum', ram_c, offset, 1)
        macstrm.run()
        macstrm.join()

    def comp_stream_act(size, offset):
        actstrm.set_source('a', ram_a, offset, size)
        actstrm.set_source('b', ram_b, offset, size)
        actstrm.set_constant('size', size)
        actstrm.set_sink('sum', ram_c, offset, 1)
        actstrm.run()
        actstrm.join()

    def comp_sequential_mul(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a * b
            ram_c.write(i + offset, sum)

    def comp_sequential_mac(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 1
            b = ram_b.read(i + offset) + 1
            sum += a * b
        ram_c.write(offset, sum)

    def comp_sequential_act(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset) + 2
            b = ram_b.read(i + offset) + 2
            sum += a * b
        if sum <= 0:
            sum = 0
        ram_c.write(offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
                print(i, st, sq)
        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp(size):
        # mul
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mul(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        print('# MUL')
        check(size, 0, offset)

        # mac
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

        # mac 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_mac(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# MAC')
        check(1, 0, offset)

        # act 2
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential_act(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, 1)

        # verification
        print('# ACT')
        check(1, 0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 24
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    img_width = strm.parameter('img_width')

    counter = strm.Counter()

    a = strm.source('a')
    a_addr = strm.Counter()

    sp = strm.Scratchpad(a, a_addr, length=128)

    a_old_addr = strm.Counter() - img_width
    a_old = sp.read(a_old_addr)

    b = a + a_old

    strm.sink(b, 'b', when=counter >= img_width)

    # add a stall condition
    count = m.Reg('count', 4, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(count.inc())

    util.add_disable_cond(strm.oready, 1, count == 0)

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size * 2)
        strm.set_sink('b', ram_b, offset, size)
        strm.set_parameter('img_width', size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        for i in range(size):
            a_buf = ram_a.read(i + offset)
            a = ram_a.read(i + offset + size)
            b = a_buf + a
            ram_b.write(i + offset, b)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size * 2)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, size)

        # sequential
        offset = size * 4
        myaxi.dma_read(ram_a, offset, 0, size * 2)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 25
0
def mkLed(memory_datawidth=128):
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    numbanks = 4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth)

    myrams = [vthread.RAM(m, 'myram_%d' % i, clk, rst, datawidth, addrwidth)
              for i in range(numbanks)]
    myram = vthread.MultibankRAM(rams=myrams, name='myram')

    all_ok = m.TmpReg(initval=0)

    array_len = 16
    array_size = (array_len + array_len) * 4 * numbanks

    def blink(size):
        all_ok.value = True

        for i in range(4):
            print('# iter %d start' % i)
            # Test for 4KB boundary check
            offset = i * 1024 * 16 + (myaxi.boundary_size - 4)
            body(size, offset)
            print('# iter %d end' % i)

        if all_ok:
            print('ALL OK')

    def body(size, offset):
        # write
        for bank in range(numbanks):
            for i in range(size):
                wdata = i + 100 + bank
                myram.write_bank(bank, i, wdata)

        laddr = 0
        gaddr = offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # write
        for bank in range(numbanks):
            for i in range(size):
                wdata = i + 1000 + bank
                myram.write_bank(bank, i, wdata)

        laddr = 0
        gaddr = array_size + offset
        myaxi.dma_write(myram, laddr, gaddr, size)
        print('dma_write: [%d] -> [%d]' % (laddr, gaddr))

        # read
        laddr = 0
        gaddr = offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for bank in range(numbanks):
            for i in range(size):
                rdata = myram.read_bank(bank, i)
                if vthread.verilog.NotEql(rdata, i + 100 + bank):
                    print('rdata[%d] = %d' % (i, rdata))
                    all_ok.value = False

        # read
        laddr = 0
        gaddr = array_size + offset
        myaxi.dma_read(myram, laddr, gaddr, size)
        print('dma_read:  [%d] <- [%d]' % (laddr, gaddr))

        for bank in range(numbanks):
            for i in range(size):
                rdata = myram.read_bank(bank, i)
                if vthread.verilog.NotEql(rdata, i + 1000 + bank):
                    print('rdata[%d] = %d' % (i, rdata))
                    all_ok.value = False

    th = vthread.Thread(m, 'th_blink', clk, rst, blink)
    fsm = th.start(array_len)

    return m
Exemplo n.º 26
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    img_width = strm.parameter('img_width')

    counter = strm.Counter()

    a = strm.source('a')
    a_addr = strm.Counter()
    sp = strm.Scratchpad(a, a_addr, length=128)

    a0 = a
    a1 = a0.prev(1)
    a2 = a1.prev(1)

    a3_addr = a_addr - img_width
    a3 = sp.read(a3_addr)
    a4 = a3.prev(1)
    a5 = a4.prev(1)

    a6_addr = a3_addr - img_width
    a6 = sp.read(a6_addr)
    a7 = a6.prev(1)
    a8 = a7.prev(1)

    #b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8
    b = strm.AddN(a0, a1, a2, a3, a4, a5, a6, a7, a8)

    strm.sink(b, 'b', when=counter >= img_width + img_width + 2)

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size * 3)
        strm.set_sink('b', ram_b, offset, size - 2)
        strm.set_parameter('img_width', size)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        for i in range(size - 2):
            a0 = ram_a.read(i + offset)
            a1 = ram_a.read(i + offset + 1)
            a2 = ram_a.read(i + offset + 2)
            a3 = ram_a.read(i + offset + size)
            a4 = ram_a.read(i + offset + size + 1)
            a5 = ram_a.read(i + offset + size + 2)
            a6 = ram_a.read(i + offset + size + size)
            a7 = ram_a.read(i + offset + size + size + 1)
            a8 = ram_a.read(i + offset + size + size + 2)
            b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8
            ram_b.write(i + offset, b)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size - 2):
            st = ram_b.read(i + offset_stream)
            sq = ram_b.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size * 3)
        comp_stream(size, offset)
        myaxi.dma_write(ram_b, offset, 1024, size)

        # sequential
        offset = size * 4
        myaxi.dma_read(ram_a, offset, 0, size * 3)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_b, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 27
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    shape = [16, 4, 8]
    size = functools.reduce(lambda x, y: x * y, shape, 1)
    order = [1, 2, 0]

    def to_pattern(shape, order):
        pattern = []
        for p in order:
            size = shape[p]
            stride = functools.reduce(lambda x, y: x * y, shape[p + 1:], 1)
            pattern.append((size, stride))
        return pattern

    pattern_a = to_pattern(shape, order)
    pattern_b = to_pattern(shape, order)
    pattern_c = to_pattern(shape, order)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a + b
    strm.sink(c, 'c')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset, pattern_a)
        strm.set_source_pattern('b', ram_b, offset, pattern_b)
        strm.set_sink_pattern('c', ram_c, offset, pattern_c)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(offset_stream, offset_seq):
        all_ok = True
        st = ram_c.read(offset_stream)
        sq = ram_c.read(offset_seq)
        if vthread.verilog.NotEql(st, sq):
            all_ok = False

        if all_ok:
            print('OK')
        else:
            print('NG')

    def comp():
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        # verification
        check(0, offset)

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Exemplo n.º 28
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    c = a * b
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        strm.run()

        # double buffer of comp and cmd
        strm.set_source('a', ram_a, offset + size, size)
        strm.set_source('b', ram_b, offset + size, size)
        strm.set_sink('c', ram_c, offset + size, size)
        strm.source_join()

        strm.run()

        # double buffer of comp and cmd
        strm.set_source('a', ram_a, offset + size + size, size)
        strm.set_source('b', ram_b, offset + size + size, size)
        strm.set_sink('c', ram_c, offset + size + size, size)
        strm.source_join()

        strm.run()

        strm.source_join()
        strm.join()

    def comp_sequential(size, offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            sum = a * b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        new_size = size + size + size
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, new_size)
        myaxi.dma_read(ram_b, offset, 512, new_size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, new_size)

        # sequential
        offset = new_size
        myaxi.dma_read(ram_a, offset, 0, new_size)
        myaxi.dma_read(ram_b, offset, 512, new_size)
        comp_sequential(new_size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, new_size)

        # verification
        myaxi.dma_read(ram_c, 0, 1024, new_size)
        myaxi.dma_read(ram_c, offset, 1024 * 2, new_size)
        check(new_size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m
Exemplo n.º 29
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
    ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
    ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)

    size = 16
    pattern = [(size, 0)]

    strm = vthread.Stream(m, 'mystream', clk, rst)
    a = strm.source('a')
    b = strm.source('b')
    sum = a + b
    strm.sink(sum, 'sum')

    def comp_stream(offset):
        strm.set_source_pattern('a', ram_a, offset + 10, pattern)
        strm.set_source_pattern('b', ram_b, offset + 10, pattern)
        strm.set_sink('sum', ram_c, offset, size)
        strm.run()
        strm.join()

    def comp_sequential(offset):
        sum = 0
        for i in range(size):
            a = ram_a.read(offset + 10)
            b = ram_b.read(offset + 10)
            sum = a + b
            ram_c.write(i + offset, sum)

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp():
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_stream(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 4, 1)

        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 0, size)
        comp_sequential(offset)
        myaxi.dma_write(ram_c, offset, 1024 * 8, 1)

        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start()

    return m
Exemplo n.º 30
0
def mkLed():
    m = Module('blinkled')
    clk = m.Input('CLK')
    rst = m.Input('RST')

    datawidth = 32
    addrwidth = 10
    point = -4
    myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
    ram_a = vthread.FixedRAM(m,
                             'ram_a',
                             clk,
                             rst,
                             datawidth,
                             addrwidth,
                             point=point)
    ram_b = vthread.FixedRAM(m,
                             'ram_b',
                             clk,
                             rst,
                             datawidth,
                             addrwidth,
                             point=point)
    ram_c = vthread.FixedRAM(m,
                             'ram_c',
                             clk,
                             rst,
                             datawidth,
                             addrwidth,
                             point=point)

    strm = vthread.Stream(m, 'mystream', clk, rst, dump=True)
    a = strm.source('a', point=point)
    b = strm.source('b', point=point)
    const = strm.constant('const', point=point)
    c = a * b + const
    strm.sink(c, 'c')

    def comp_stream(size, offset):
        strm.set_source('a', ram_a, offset, size)
        strm.set_source('b', ram_b, offset, size)
        strm.set_sink('c', ram_c, offset, size)
        const = vthread.fixed.FixedConst(32, point=point)
        strm.set_constant('const', const)
        strm.run()
        strm.join()

    def comp_sequential(size, offset):
        for i in range(size):
            a = ram_a.read(i + offset)
            b = ram_b.read(i + offset)
            const = vthread.fixed.FixedConst(32, point=point)
            c = a * b + const
            ram_c.write(i + offset, c)
            print('a = %10g, b = %10g, const = %10g, c =  %10g' %
                  (a, b, const, c))

    def check(size, offset_stream, offset_seq):
        all_ok = True
        for i in range(size):
            st = ram_c.read(i + offset_stream)
            sq = ram_c.read(i + offset_seq)
            if vthread.verilog.NotEql(st, sq):
                all_ok = False
        if all_ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

    def comp(size):
        # stream
        offset = 0
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_stream(size, offset)
        myaxi.dma_write(ram_c, offset, 1024, size)

        # sequential
        offset = size
        myaxi.dma_read(ram_a, offset, 0, size)
        myaxi.dma_read(ram_b, offset, 512, size)
        comp_sequential(size, offset)
        myaxi.dma_write(ram_c, offset, 1024 * 2, size)

        # verification
        check(size, 0, offset)

        vthread.finish()

    th = vthread.Thread(m, 'th_comp', clk, rst, comp)
    fsm = th.start(32)

    return m