def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') size = strm.constant('size') sum, sum_valid = strm.ReduceAddValid(a, size) strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_constant('size', size) strm.set_sink('sum', ram_b, offset, 1) strm.run() strm.join() def comp_sequential(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) sum += a ram_b.write(offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_b.read(i + offset_stream) sq = ram_b.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('OK') else: print('NG') def comp(size): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, 1) check(1, 0, offset) th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') led = m.OutputReg('led', 8, initval=0) datawidth = 32 addrwidth = 10 ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth) saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8) def matmul(): while True: saxi.wait_flag(0, value=1, resetvalue=0) matrix_size = saxi.read(1) a_offset = saxi.read(2) b_offset = saxi.read(3) c_offset = saxi.read(4) comp(matrix_size, a_offset, b_offset, c_offset) saxi.write_flag(5, 1, resetvalue=0) def comp(matrix_size, a_offset, b_offset, c_offset): a_addr, c_addr = a_offset, c_offset for i in range(matrix_size): maxi.dma_read(ram_a, 0, a_addr, matrix_size) b_addr = b_offset for j in range(matrix_size): maxi.dma_read(ram_b, 0, b_addr, matrix_size) sum = 0 for k in range(matrix_size): x = ram_a.read(k) y = ram_b.read(k) sum += x * y ram_c.write(j, sum) b_addr += matrix_size * (datawidth // 8) maxi.dma_write(ram_c, 0, c_addr, matrix_size) a_addr += matrix_size * (datawidth // 8) c_addr += matrix_size * (datawidth // 8) th = vthread.Thread(m, 'th_matmul', clk, rst, matmul) fsm = th.start() return m
def mkMemcpy(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') led = m.OutputReg('led', 8, initval=0) datawidth = 32 addrwidth = 10 ram_words = (2**addrwidth) // (datawidth // 8) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) maxi = vthread.AXIM(m, 'maxi', clk, rst, datawidth) saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth, length=8) def memcpy(): while True: saxi.wait_flag(0, value=1, resetvalue=0) copy_bytes = saxi.read(1) src_offset = saxi.read(2) dst_offset = saxi.read(3) copy(copy_bytes, src_offset, dst_offset) saxi.write_flag(4, 1, resetvalue=0) def copy(copy_bytes, src_offset, dst_offset): rest_words = copy_bytes // (datawidth // 8) src_global_addr = src_offset dst_global_addr = dst_offset local_addr = 0 while rest_words > 0: if rest_words > ram_words: dma_size = ram_words else: dma_size = rest_words maxi.dma_read(ram_a, local_addr, src_global_addr, dma_size) maxi.dma_write(ram_a, local_addr, dst_global_addr, dma_size) src_global_addr += dma_size * (datawidth // 8) dst_global_addr += dma_size * (datawidth // 8) rest_words -= dma_size th = vthread.Thread(m, 'th_memcpy', clk, rst, memcpy) fsm = th.start() return m
def mkLed(word_datawidth=128): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myram = vthread.RAM(m, 'myram', clk, rst, word_datawidth, addrwidth, numports=2) axi_in = vthread.AXIStreamInFifo(m, 'axi_in', clk, rst, datawidth, with_last=True, noio=True) axi_out = vthread.AXIStreamOutFifo(m, 'axi_out', clk, rst, datawidth, with_last=True, noio=True) maxi_in = vthread.AXIM_for_AXIStreamIn(axi_in, 'maxi_in') maxi_out = vthread.AXIM_for_AXIStreamOut(axi_out, 'maxi_out') fifo_addrwidth = 8 fifo_in = vthread.FIFO(m, 'fifo_in', clk, rst, word_datawidth, fifo_addrwidth) fifo_out = vthread.FIFO(m, 'fifo_out', clk, rst, word_datawidth, fifo_addrwidth) all_ok = m.TmpReg(initval=0) def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - (word_datawidth // 8)) body(size, offset) print('# iter %d end' % i) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish() def body(size, offset): # write a test vector for i in range(size): wdata = i + 100 myram.write(i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size, port=1) # AXI-stream read -> FIFO -> FIFO -> AXI-stream write maxi_in.dma_read_async(gaddr, size * (word_datawidth // datawidth)) axi_in.write_fifo(fifo_in, size) for i in range(size): va = fifo_in.deq() fifo_out.enq(va) out_gaddr = (size + size) * (word_datawidth // 8) + offset maxi_out.dma_write_async(out_gaddr, size * (word_datawidth // datawidth)) axi_out.read_fifo(fifo_out, size) # check myaxi.dma_read(myram, 0, gaddr, size, port=1) myaxi.dma_read(myram, size, out_gaddr, size, port=1) for i in range(size): v0 = myram.read(i) v1 = myram.read(i + size) if vthread.verilog.NotEql(v0, v1): all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(17) return m
def mkLed(memory_datawidth=128): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth) myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth) all_ok = m.TmpReg(initval=0) def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - memory_datawidth // 8) body(size, offset) print('# iter %d end' % i) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') vthread.finish() def body(size, offset): # write for i in range(size): wdata = i + 100 myram.write(i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write for i in range(size): wdata = i + 1000 myram.write(i, wdata) laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 100): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False # read laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 1000): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(17) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) cnt1 = strm.Counter() cnt2 = strm.Counter(initval=1) cnt3 = strm.Counter(initval=2, size=5) cnt4 = strm.Counter(initval=3, interval=3) cnt5 = strm.Counter(initval=4, interval=3, size=7) cnt6 = strm.Counter(initval=4, step=2, interval=2) a = strm.source('a') b = strm.source('b') c = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6 strm.sink(c, 'c') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) strm.run() strm.join() def comp_sequential(size, offset): cnt = 0 for i in range(size): cnt1 = cnt cnt2 = 1 + cnt cnt3 = (cnt + 2) % 5 cnt4 = (cnt // 3) + 3 cnt5 = ((cnt // 3) + 4) % 7 cnt6 = (cnt // 2) * 2 + 4 a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a + b - a - b + cnt1 + cnt2 + cnt3 + cnt4 + cnt5 + cnt6 ram_c.write(i + offset, sum) cnt += 1 def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) size = strm.constant('size') cnt, valid = strm.CounterValid(size) a = strm.source('a') b = strm.source('b') cntval = strm.Mux(valid, 1000, cnt) c = a + b + cntval strm.sink(c, 'c') def comp_stream(size, offset): strm.set_constant('size', size // 2) strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) strm.run() strm.join() def comp_sequential(size, offset): sum = 0 cnt = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a + b + cnt cnt += 1 if cnt == 1001: cnt = 0 if cnt == size // 2 - 1: cnt = 1000 ram_c.write(i + offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(matrix_size=16): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') seq = Seq(m, 'seq', clk, rst) timer = m.Reg('timer', 32, initval=0) seq(timer.inc()) datawidth = 32 addrwidth = 10 ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) def matmul(matrix_size, a_offset, b_offset, c_offset): start_time = timer comp(matrix_size, a_offset, b_offset, c_offset) end_time = timer time = end_time - start_time print("Time (cycles): %d" % time) check(matrix_size, a_offset, b_offset, c_offset) def strm_madd(strm, size, waddr): a = strm.read(ram_a, 0, size) b = strm.read(ram_b, 0, size) sum, valid = strm.RegionAdd(a * b, size) strm.write(ram_c, waddr, 1, sum, when=valid) def comp(matrix_size, a_offset, b_offset, c_offset): a_addr, c_addr = a_offset, c_offset for i in range(matrix_size): myaxi.dma_read(ram_a, 0, a_addr, matrix_size) b_addr = b_offset for j in range(matrix_size): myaxi.dma_read(ram_b, 0, b_addr, matrix_size) stream.run(matrix_size, j) stream.join() b_addr += matrix_size * (datawidth // 8) myaxi.dma_write(ram_c, 0, c_addr, matrix_size) a_addr += matrix_size * (datawidth // 8) c_addr += matrix_size * (datawidth // 8) def check(matrix_size, a_offset, b_offset, c_offset): all_ok = True c_addr = c_offset for i in range(matrix_size): myaxi.dma_read(ram_c, 0, c_addr, matrix_size) for j in range(matrix_size): v = ram_c.read(j) if i == j and vthread.verilog.NotEql(v, (i + 1) * 2): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) if i != j and vthread.verilog.NotEql(v, 0): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) c_addr += matrix_size * (datawidth // 8) if all_ok: print("OK") else: print("NG") stream = vthread.Stream(m, 'strm_madd', clk, rst, strm_madd) th = vthread.Thread(m, 'th_matmul', clk, rst, matmul) fsm = th.start(matrix_size, 0, 1024, 2048) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 reduce_size = 4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth) macstrm = vthread.Stream(m, 'macstream', clk, rst) macstrm_a = macstrm.source('a') macstrm_b = macstrm.source('b') macstrm_const = macstrm.constant('const') macstrm_mul = macstrm_a * macstrm_b macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const) macstrm_v += 0 macstrm.sink(macstrm_c, 'c') macstrm.sink(macstrm_v, 'v') strm = vthread.Stream(m, 'mystream', clk, rst) x = strm.source('x') y = strm.source('y') const = strm.constant('const') sub = strm.substream(macstrm) sub.to_source('a', x) sub.to_source('b', y) sub.to_constant('const', const) z = sub.from_sink('c') v = sub.from_sink('v') z = z + x strm.sink(z, 'z', when=v, when_name='v') def comp_stream_macstrm(size, offset): macstrm.set_source('a', ram_a, offset, size) macstrm.set_source('b', ram_b, offset, size) macstrm.set_constant('const', reduce_size) macstrm.set_sink('c', ram_c, offset, size) macstrm.set_sink('v', ram_d, offset, size) macstrm.run() macstrm.join() def comp_stream_mystrm(size, offset): strm.set_source('x', ram_a, offset, size) strm.set_source('y', ram_b, offset, size) strm.set_constant('const', reduce_size) strm.set_sink('z', ram_c, offset, size // reduce_size) strm.run() strm.join() def comp_sequential_macstrm(size, offset): sum = 0 count = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum += a * b count += 1 ram_c.write(i + offset, sum) ram_d.write(i + offset, count == (reduce_size - 1)) if count == reduce_size: sum = 0 count = 0 def comp_sequential_mystrm(size, offset): sum = 0 count = 0 write_offset = offset for i in range(size): x = ram_a.read(i + offset) y = ram_b.read(i + offset) sum += x * y val = sum + x count += 1 if count == reduce_size: ram_c.write(write_offset, val) write_offset += 1 sum = 0 count = 0 def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False print(i, st, sq) if all_ok: print('OK') else: print('NG') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# macstream') check(size, 0, offset) # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size // reduce_size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size) # verification print('# mystream') check(size // reduce_size, 0, offset) th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(16) return m
def mkLed(memory_datawidth=32): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 numbanks = 4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth) myram = vthread.MultibankRAM(m, 'myram', clk, rst, datawidth, addrwidth, numbanks=numbanks) all_ok = m.TmpReg(initval=0) block_size = 3 array_len = 32 array_size = (array_len + array_len) * 4 * numbanks def blink(size): all_ok.value = True print('# start') # Test for 4KB boundary check #offset = 1024 * 16 + (myaxi.boundary_size - 4) offset = 1024 * 16 body(size, offset) print('# end') if all_ok: print('ALL OK') def body(size, offset): # write count = 0 blk_offset = 0 bias = 0 done = False while count < size: for bank in range(numbanks): for i in range(block_size): wdata = bias + i + 512 myram.write_bank(bank, blk_offset + i, wdata) count += 1 if count >= size: done = True break if done: break bias += block_size blk_offset += block_size laddr = 0 gaddr = offset myram.dma_write_block(myaxi, laddr, gaddr, size, block_size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write count = 0 blk_offset = 0 bias = 0 done = False while count < size: for bank in range(numbanks): for i in range(block_size): wdata = bias + i + 1024 myram.write_bank(bank, blk_offset + i, wdata) count += 1 if count >= size: done = True break if done: break bias += block_size blk_offset += block_size laddr = 0 gaddr = array_size + offset myram.dma_write_block(myaxi, laddr, gaddr, size, block_size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myram.dma_read_block(myaxi, laddr, gaddr, size, block_size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) count = 0 blk_offset = 0 bias = 0 done = False while count < size: for bank in range(numbanks): for i in range(block_size): rdata = myram.read_bank(bank, blk_offset + i) exp = bias + i + 512 if vthread.verilog.NotEql(rdata, exp): print('rdata[%d:%d] = %d:%d' % (bank, i, rdata, exp)) all_ok.value = False count += 1 if count >= size: done = True break if done: break bias += block_size blk_offset += block_size # read laddr = 0 gaddr = array_size + offset myram.dma_read_block(myaxi, laddr, gaddr, size, block_size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) count = 0 blk_offset = 0 bias = 0 done = False while count < size: for bank in range(numbanks): for i in range(block_size): rdata = myram.read_bank(bank, blk_offset + i) exp = bias + i + 1024 if vthread.verilog.NotEql(rdata, exp): print('rdata[%d:%d] = %d:%d' % (bank, i, rdata, exp)) all_ok.value = False count += 1 if count >= size: done = True break if done: break bias += block_size blk_offset += block_size th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(array_len) return m
def mkLed(matrix_size=16): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') seq = Seq(m, 'seq', clk, rst) timer = m.Reg('timer', 32, initval=0) seq( timer.inc() ) datawidth = 64 addrwidth = 10 ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth // (data_wordsize // axi_wordsize)) def matmul(matrix_size, a_offset, b_offset, c_offset): start_time = timer comp(matrix_size, a_offset, b_offset, c_offset) end_time = timer time = end_time - start_time print("Time (cycles): %d" % time) check(matrix_size, a_offset, b_offset, c_offset) vthread.finish() def comp(matrix_size, a_offset, b_offset, c_offset): a_addr, c_addr = a_offset, c_offset for i in range(matrix_size): myaxi.dma_read(ram_a, 0, a_addr, matrix_size) b_addr = b_offset for j in range(matrix_size): myaxi.dma_read(ram_b, 0, b_addr, matrix_size) sum = 0 for k in range(matrix_size): x = ram_a.read(k) y = ram_b.read(k) sum += x * y ram_c.write(j, sum) b_addr += matrix_size * (datawidth // 8) myaxi.dma_write(ram_c, 0, c_addr, matrix_size) a_addr += matrix_size * (datawidth // 8) c_addr += matrix_size * (datawidth // 8) def check(matrix_size, a_offset, b_offset, c_offset): all_ok = True c_addr = c_offset for i in range(matrix_size): myaxi.dma_read(ram_c, 0, c_addr, matrix_size) for j in range(matrix_size): v = ram_c.read(j) if i == j and vthread.verilog.NotEql(v, (i + 1) * 2): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) if i != j and vthread.verilog.NotEql(v, 0): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) c_addr += matrix_size * (datawidth // 8) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') th = vthread.Thread(m, 'th_matmul', clk, rst, matmul) fsm = th.start(matrix_size, a_offset, b_offset, c_offset) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') x = strm.Counter(initval=0, size=4) y = strm.Counter(initval=0, size=4, enable=x == 3) z = strm.Counter(initval=0, size=4, enable=y == 3) c = a + b - a - b + z + y + x strm.sink(c, 'c') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) strm.run() strm.join() def comp_sequential(size, offset): sum = 0 x = 0 y = 0 z = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a + b - a - b + z + y + x ram_c.write(i + offset, sum) if y == 3: z += 1 if z == 4: z = 0 if x == 3: y += 1 if y == 4: y = 0 x += 1 if x == 4: x = 0 def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth, numports=2) strm = vthread.Stream(m, 'mystream', clk, rst) numbins = strm.constant('numbins') offset = strm.constant('offset') a = strm.source('a') a = strm.Mux(a < 0, 0, a) a.latency = 0 a = strm.Mux(a >= numbins, numbins - 1, a) a.latency = 0 raddr = a + offset raddrs = (raddr, ) waddr = raddr op = strm.Add op_args = (1, ) strm.read_modify_write_RAM('ext', raddrs, waddr, op, op_args) def comp_stream(numbins, size, offset): for i in range(numbins): ram_b.write(i + offset, 0) strm.set_constant('numbins', numbins) strm.set_constant('offset', offset) strm.set_source('a', ram_a, offset, size) strm.set_read_modify_write_RAM('ext', ram_b, read_ports=(0, ), write_port=1) strm.run() strm.join() def comp_sequential(numbins, size, offset): for i in range(numbins): ram_b.write(i + offset, 0) for i in range(size): a = ram_a.read(i + offset) a = 0 if a < 0 else a a = numbins - 1 if a >= numbins else a current = ram_b.read(a + offset) updated = current + 1 ram_b.write(a + offset, updated) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_b.read(i + offset_stream) sq = ram_b.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): numbins = 8 # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) comp_stream(numbins, size, offset) myaxi.dma_write(ram_b, offset, 1024, numbins) # sequential offset = size * 4 myaxi.dma_read(ram_a, offset, 0, size * 2) comp_sequential(numbins, size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, numbins) # verification check(numbins, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myaxi.disable_write() ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') i = strm.Counter() term = a == 270 strm.sink(i, 'i') strm.terminate(term) def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_sink_immediate('i', 0) strm.run() strm.join() i = strm.read_sink('i') return i def comp_sequential(size, offset): for i in range(size): a = ram_a.read(i + offset) if a == 270: return i return size - 1 def check(size_stream, size_seq): all_ok = True if vthread.verilog.NotEql(size_stream, size_seq): all_ok = False print(size_stream, size_seq) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 1024, size) st_i = comp_stream(size, offset) st_i = comp_stream(size, offset) # sequential offset = size myaxi.dma_read(ram_a, offset, 1024, size) sq_i = comp_sequential(size, offset) # verification check(st_i, sq_i) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth) all_ok = m.TmpReg(initval=0) def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) offset = i * 1024 * 16 body(size, offset) print('# iter %d end' % i) if all_ok: print('ALL OK') def body(size, offset): # write for i in range(size): wdata = i + 100 myram.write(i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write for i in range(size): wdata = i + 1000 myram.write(i, wdata) laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if rdata != i + 100: print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False # read laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if rdata != i + 1000: print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(16) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 reduce_size = 4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) ram_d = vthread.RAM(m, 'ram_d', clk, rst, datawidth, addrwidth) macstrm = vthread.Stream(m, 'macstream', clk, rst) macstrm_a = macstrm.source('a') macstrm_b = macstrm.source('b') macstrm_const = macstrm.parameter('const') macstrm_mul = macstrm_a * macstrm_b macstrm_c, macstrm_v = macstrm.ReduceAddValid(macstrm_mul, macstrm_const) macstrm.sink(macstrm_c, 'c') macstrm.sink(macstrm_v, 'v') macstrm2 = vthread.Stream(m, 'macstream2', clk, rst) macstrm2_a = macstrm2.source('a') macstrm2_b = macstrm2.source('b') macstrm2_const = macstrm2.parameter('const') macstrm2_a = macstrm2_a + 1 macstrm2_a = macstrm2_a - 1 macstrm2_b = macstrm2_b * 1 macsub = macstrm2.substream(macstrm) macsub.to_source('a', macstrm2_a) macsub.to_source('b', macstrm2_b) macsub.to_parameter('const', macstrm2_const) macstrm2_c = macsub.from_sink('c') macstrm2_v = macsub.from_sink('v') macstrm2.sink(macstrm2_c, 'c') macstrm2.sink(macstrm2_v, 'v') neststrm = vthread.Stream(m, 'neststream', clk, rst) neststrm_a = neststrm.source('a') neststrm_b = neststrm.source('b') neststrm_const = neststrm.parameter('const') neststrm_a += 1 neststrm_a += 0 neststrm_b += 1 macsub = neststrm.substream(macstrm2) macsub.to_source('a', neststrm_a) macsub.to_source('b', neststrm_b) macsub.to_parameter('const', neststrm_const) neststrm_c = macsub.from_sink('c') neststrm_c += neststrm_a neststrm_c += 0 neststrm_v = macsub.from_sink('v') neststrm.sink(neststrm_c, 'c') neststrm.sink(neststrm_v, 'v') strm = vthread.Stream(m, 'mystream', clk, rst) x = strm.source('x') y = strm.source('y') const = strm.parameter('const') sub = strm.substream(neststrm) sub.to_source('a', x) sub.to_source('b', y) sub.to_parameter('const', const) z = sub.from_sink('c') v = sub.from_sink('v') z = z + y strm.sink(z, 'z', when=v, when_name='v') all_ok = m.TmpReg(initval=0) def comp_stream_macstrm(size, offset): macstrm2.set_source('a', ram_a, offset, size) macstrm2.set_source('b', ram_b, offset, size) macstrm2.set_parameter('const', reduce_size) macstrm2.set_sink('c', ram_c, offset, size) macstrm2.set_sink('v', ram_d, offset, size) macstrm2.run() macstrm2.join() def comp_stream_mystrm(size, offset): strm.set_source('x', ram_a, offset, size) strm.set_source('y', ram_b, offset, size) strm.set_parameter('const', reduce_size) strm.set_sink('z', ram_c, offset, size // reduce_size) strm.run() strm.join() def comp_sequential_macstrm(size, offset): sum = 0 count = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum += a * b count += 1 ram_c.write(i + offset, sum) ram_d.write(i + offset, count == (reduce_size - 1)) if count == reduce_size: sum = 0 count = 0 def comp_sequential_mystrm(size, offset): sum = 0 count = 0 write_offset = offset for i in range(size): x = ram_a.read(i + offset) y = ram_b.read(i + offset) sum += (x + 1) * (y + 1) val = sum + (x + 1) + y count += 1 if count == reduce_size: ram_c.write(write_offset, val) write_offset += 1 sum = 0 count = 0 def check(size, offset_stream, offset_seq): for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok.value = False print(i, st, sq) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): all_ok.value = True # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_macstrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# macstream') check(size, 0, offset) # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024, size // reduce_size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential_mystrm(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size // reduce_size) # verification print('# mystream') check(size // reduce_size, 0, offset) th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(16) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth) all_ok = m.TmpReg(initval=0) def blink(size): all_ok.value = True # Test for 4KB boundary check offset = myaxi.boundary_size - 4 body(size, offset) if all_ok: print('ALL OK') def body(size, offset): # write for i in range(size): wdata = i + 100 myram.write(i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write for i in range(size): wdata = i + 1000 myram.write(i, wdata) laddr = 0 gaddr = offset + myaxi.boundary_size myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 100): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False # read laddr = 0 gaddr = offset + myaxi.boundary_size myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 1000): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(256 + 256 + 64) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth) saxi = vthread.AXISLiteRegister(m, 'saxi', clk, rst, datawidth) all_ok = m.TmpReg(initval=0) def blink(size): # wait start saxi.wait_flag(0, value=1, resetvalue=0) # reset done saxi.write(1, 0) all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - 4) body(size, offset) print('# iter %d end' % i) if all_ok: print('# verify (local): PASSED') else: print('# verify (local): FAILED') # result saxi.write(2, all_ok) # done saxi.write_flag(1, 1, resetvalue=0) def body(size, offset): # write for i in range(size): wdata = i + 100 myram.write(i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write for i in range(size): wdata = i + 1000 myram.write(i, wdata) laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 100): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False # read laddr = 0 gaddr = (size + size) * 4 + offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if vthread.verilog.NotEql(rdata, i + 1000): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(16) return m
def mkLed(memory_datawidth=128): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 numbanks = 4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth) ram_a = vthread.MultibankRAM(m, 'ram_a', clk, rst, datawidth, addrwidth, numbanks=numbanks) ram_b = vthread.MultibankRAM(m, 'ram_b', clk, rst, datawidth, addrwidth, numbanks=numbanks) ram_c = vthread.MultibankRAM(m, 'ram_c', clk, rst, datawidth, addrwidth, numbanks=numbanks) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') c = a + b strm.sink(c, 'c') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) strm.run() strm.join() def comp_sequential(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a + b ram_c.write(i + offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False print(i, st, sq) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): dma_size = size comp_size = size * numbanks dma_offset = 0 comp_offset = 0 myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_stream(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024, dma_size) dma_offset = size comp_offset = comp_size myaxi.dma_read(ram_a, dma_offset, 0, dma_size) myaxi.dma_read(ram_b, dma_offset, 0, dma_size) comp_sequential(size, comp_offset) myaxi.dma_write(ram_c, dma_offset, 1024 * 2, dma_size) check(comp_size, 0, comp_offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(matrix_size=16): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') seq = Seq(m, 'seq', clk, rst) timer = m.Reg('timer', 32, initval=0) seq( timer.inc() ) addrwidth = 10 ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) strm = vthread.Stream(m, 'strm_madd', clk, rst) a = strm.source('a') b = strm.source('b') size = strm.constant('size') sum, sum_valid = strm.ReduceAddValid(a * b, size) strm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid') def strm_madd(size, waddr): strm.set_source('a', ram_a, 0, size) strm.set_source('b', ram_b, 0, size) strm.set_constant('size', size) strm.set_sink('sum', ram_c, waddr, 1) strm.run() strm.join() def matmul(matrix_size, a_offset, b_offset, c_offset): start_time = timer comp(matrix_size, a_offset, b_offset, c_offset) end_time = timer time = end_time - start_time print("Time (cycles): %d" % time) check(matrix_size, a_offset, b_offset, c_offset) vthread.finish() def comp(matrix_size, a_offset, b_offset, c_offset): a_addr, c_addr = a_offset, c_offset for i in range(matrix_size): myaxi.dma_read(ram_a, 0, a_addr, matrix_size) b_addr = b_offset for j in range(matrix_size): myaxi.dma_read(ram_b, 0, b_addr, matrix_size) strm_madd(matrix_size, j) b_addr += matrix_size * (datawidth // 8) myaxi.dma_write(ram_c, 0, c_addr, matrix_size) a_addr += matrix_size * (datawidth // 8) c_addr += matrix_size * (datawidth // 8) def check(matrix_size, a_offset, b_offset, c_offset): all_ok = True c_addr = c_offset for i in range(matrix_size): myaxi.dma_read(ram_c, 0, c_addr, matrix_size) for j in range(matrix_size): v = ram_c.read(j) if i == j and vthread.verilog.NotEql(v, (i + 1) * 2): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) if i != j and vthread.verilog.NotEql(v, 0): all_ok = False print("NG [%d,%d] = %d" % (i, j, v)) c_addr += matrix_size * (datawidth // 8) if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') th = vthread.Thread(m, 'th_matmul', clk, rst, matmul) fsm = th.start(matrix_size, a_offset, b_offset, c_offset) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') c = a + b v = strm.Ands(c > 140, c < 150) cnt = strm.ReduceAdd(v) strm.sink(c, 'c', when=v, when_name='v') strm.sink(cnt, 'cnt') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, 0) # max_size strm.set_sink_immediate('cnt', 0) # max_size strm.run() strm.join() cnt = strm.read_sink('cnt') print('# num of counted: %d' % cnt) return cnt def comp_sequential(size, offset): sum = 0 addr = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) c = a + b if c > 140 and c < 150: ram_c.write(addr + offset, c) addr += 1 print('# num of counted: %d' % addr) return addr def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) cnt = comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, cnt) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) cnt = comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, cnt) # verification myaxi.dma_read(ram_c, 0, 1024, cnt) myaxi.dma_read(ram_c, offset, 1024 * 2, cnt) check(cnt, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) myram = vthread.RAM(m, 'myram', clk, rst, datawidth, addrwidth) def blink(): size = 256 * 2 offset = 1024 * 4 # write for i in range(size): wdata = i myram.write(i, wdata) laddr = 0 gaddr = offset myram.dma_write(myaxi, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # overwrite for i in range(size): wdata = 128 myram.write(i, wdata) laddr = 0 gaddr = offset + size * 4 myram.dma_write(myaxi, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read all_ok = True laddr = 0 gaddr = offset myram.dma_read(myaxi, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if rdata != i: print('rdata[%d] = %d' % (i, rdata)) all_ok = False # read laddr = 0 gaddr = offset + size * 4 myram.dma_read(myaxi, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for i in range(size): rdata = myram.read(i) if rdata != 128: print('rdata[%d] = %d' % (i, rdata)) all_ok = False if all_ok: print('ALL OK') th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start() return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) mulstrm = vthread.Stream(m, 'mul_stream', clk, rst) mulx = mulstrm.source('x') muly = mulstrm.source('y') mulz = mulx * muly mulstrm.sink(mulz, 'z') macstrm = vthread.Stream(m, 'mac_stream', clk, rst) a = macstrm.source('a') b = macstrm.source('b') a = a + 1 b = b + 1 sub = macstrm.substream(mulstrm) sub.to_source('x', a) sub.to_source('y', b) c = sub.from_sink('z') size = macstrm.constant('size') sum, sum_valid = macstrm.ReduceAddValid(c, size) macstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid') actstrm = vthread.Stream(m, 'act_stream', clk, rst) a = actstrm.source('a') b = actstrm.source('b') a = a + 1 b = b + 1 a = a + 1 b = b + 1 sub = actstrm.substream(mulstrm) sub.to_source('x', a) sub.to_source('y', b) c = sub.from_sink('z') size = actstrm.constant('size') sum, sum_valid = actstrm.ReduceAddValid(c, size) sum = actstrm.Mux(sum > 0, sum, 0) actstrm.sink(sum, 'sum', when=sum_valid, when_name='sum_valid') def comp_stream_mul(size, offset): mulstrm.set_source('x', ram_a, offset, size) mulstrm.set_source('y', ram_b, offset, size) mulstrm.set_sink('z', ram_c, offset, size) mulstrm.run() mulstrm.join() def comp_stream_mac(size, offset): macstrm.set_source('a', ram_a, offset, size) macstrm.set_source('b', ram_b, offset, size) macstrm.set_constant('size', size) macstrm.set_sink('sum', ram_c, offset, 1) macstrm.run() macstrm.join() def comp_stream_act(size, offset): actstrm.set_source('a', ram_a, offset, size) actstrm.set_source('b', ram_b, offset, size) actstrm.set_constant('size', size) actstrm.set_sink('sum', ram_c, offset, 1) actstrm.run() actstrm.join() def comp_sequential_mul(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a * b ram_c.write(i + offset, sum) def comp_sequential_mac(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) + 1 b = ram_b.read(i + offset) + 1 sum += a * b ram_c.write(offset, sum) def comp_sequential_act(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) + 2 b = ram_b.read(i + offset) + 2 sum += a * b if sum <= 0: sum = 0 ram_c.write(offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False print(i, st, sq) if all_ok: print('OK') else: print('NG') def comp(size): # mul # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mul(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification print('# MUL') check(size, 0, offset) # mac # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# MAC') check(1, 0, offset) # act # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_act(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_act(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# ACT') check(1, 0, offset) # mac 2 # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_mac(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# MAC') check(1, 0, offset) # act 2 # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream_act(size, offset) myaxi.dma_write(ram_c, offset, 1024, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential_act(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, 1) # verification print('# ACT') check(1, 0, offset) th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) img_width = strm.parameter('img_width') counter = strm.Counter() a = strm.source('a') a_addr = strm.Counter() sp = strm.Scratchpad(a, a_addr, length=128) a_old_addr = strm.Counter() - img_width a_old = sp.read(a_old_addr) b = a + a_old strm.sink(b, 'b', when=counter >= img_width) # add a stall condition count = m.Reg('count', 4, initval=0) seq = Seq(m, 'seq', clk, rst) seq(count.inc()) util.add_disable_cond(strm.oready, 1, count == 0) def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size * 2) strm.set_sink('b', ram_b, offset, size) strm.set_parameter('img_width', size) strm.run() strm.join() def comp_sequential(size, offset): for i in range(size): a_buf = ram_a.read(i + offset) a = ram_a.read(i + offset + size) b = a_buf + a ram_b.write(i + offset, b) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_b.read(i + offset_stream) sq = ram_b.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size * 2) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, size) # sequential offset = size * 4 myaxi.dma_read(ram_a, offset, 0, size * 2) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(memory_datawidth=128): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 numbanks = 4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, memory_datawidth) myrams = [vthread.RAM(m, 'myram_%d' % i, clk, rst, datawidth, addrwidth) for i in range(numbanks)] myram = vthread.MultibankRAM(rams=myrams, name='myram') all_ok = m.TmpReg(initval=0) array_len = 16 array_size = (array_len + array_len) * 4 * numbanks def blink(size): all_ok.value = True for i in range(4): print('# iter %d start' % i) # Test for 4KB boundary check offset = i * 1024 * 16 + (myaxi.boundary_size - 4) body(size, offset) print('# iter %d end' % i) if all_ok: print('ALL OK') def body(size, offset): # write for bank in range(numbanks): for i in range(size): wdata = i + 100 + bank myram.write_bank(bank, i, wdata) laddr = 0 gaddr = offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # write for bank in range(numbanks): for i in range(size): wdata = i + 1000 + bank myram.write_bank(bank, i, wdata) laddr = 0 gaddr = array_size + offset myaxi.dma_write(myram, laddr, gaddr, size) print('dma_write: [%d] -> [%d]' % (laddr, gaddr)) # read laddr = 0 gaddr = offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for bank in range(numbanks): for i in range(size): rdata = myram.read_bank(bank, i) if vthread.verilog.NotEql(rdata, i + 100 + bank): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False # read laddr = 0 gaddr = array_size + offset myaxi.dma_read(myram, laddr, gaddr, size) print('dma_read: [%d] <- [%d]' % (laddr, gaddr)) for bank in range(numbanks): for i in range(size): rdata = myram.read_bank(bank, i) if vthread.verilog.NotEql(rdata, i + 1000 + bank): print('rdata[%d] = %d' % (i, rdata)) all_ok.value = False th = vthread.Thread(m, 'th_blink', clk, rst, blink) fsm = th.start(array_len) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) img_width = strm.parameter('img_width') counter = strm.Counter() a = strm.source('a') a_addr = strm.Counter() sp = strm.Scratchpad(a, a_addr, length=128) a0 = a a1 = a0.prev(1) a2 = a1.prev(1) a3_addr = a_addr - img_width a3 = sp.read(a3_addr) a4 = a3.prev(1) a5 = a4.prev(1) a6_addr = a3_addr - img_width a6 = sp.read(a6_addr) a7 = a6.prev(1) a8 = a7.prev(1) #b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 b = strm.AddN(a0, a1, a2, a3, a4, a5, a6, a7, a8) strm.sink(b, 'b', when=counter >= img_width + img_width + 2) def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size * 3) strm.set_sink('b', ram_b, offset, size - 2) strm.set_parameter('img_width', size) strm.run() strm.join() def comp_sequential(size, offset): for i in range(size - 2): a0 = ram_a.read(i + offset) a1 = ram_a.read(i + offset + 1) a2 = ram_a.read(i + offset + 2) a3 = ram_a.read(i + offset + size) a4 = ram_a.read(i + offset + size + 1) a5 = ram_a.read(i + offset + size + 2) a6 = ram_a.read(i + offset + size + size) a7 = ram_a.read(i + offset + size + size + 1) a8 = ram_a.read(i + offset + size + size + 2) b = a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 ram_b.write(i + offset, b) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size - 2): st = ram_b.read(i + offset_stream) sq = ram_b.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size * 3) comp_stream(size, offset) myaxi.dma_write(ram_b, offset, 1024, size) # sequential offset = size * 4 myaxi.dma_read(ram_a, offset, 0, size * 3) comp_sequential(size, offset) myaxi.dma_write(ram_b, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) shape = [16, 4, 8] size = functools.reduce(lambda x, y: x * y, shape, 1) order = [1, 2, 0] def to_pattern(shape, order): pattern = [] for p in order: size = shape[p] stride = functools.reduce(lambda x, y: x * y, shape[p + 1:], 1) pattern.append((size, stride)) return pattern pattern_a = to_pattern(shape, order) pattern_b = to_pattern(shape, order) pattern_c = to_pattern(shape, order) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') c = a + b strm.sink(c, 'c') def comp_stream(offset): strm.set_source_pattern('a', ram_a, offset, pattern_a) strm.set_source_pattern('b', ram_b, offset, pattern_b) strm.set_sink_pattern('c', ram_c, offset, pattern_c) strm.run() strm.join() def comp_sequential(offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a + b ram_c.write(i + offset, sum) def check(offset_stream, offset_seq): all_ok = True st = ram_c.read(offset_stream) sq = ram_c.read(offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('OK') else: print('NG') def comp(): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(offset) myaxi.dma_write(ram_c, offset, 1024 * 4, 1) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential(offset) myaxi.dma_write(ram_c, offset, 1024 * 8, 1) # verification check(0, offset) th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start() return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') c = a * b strm.sink(c, 'c') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) strm.run() # double buffer of comp and cmd strm.set_source('a', ram_a, offset + size, size) strm.set_source('b', ram_b, offset + size, size) strm.set_sink('c', ram_c, offset + size, size) strm.source_join() strm.run() # double buffer of comp and cmd strm.set_source('a', ram_a, offset + size + size, size) strm.set_source('b', ram_b, offset + size + size, size) strm.set_sink('c', ram_c, offset + size + size, size) strm.source_join() strm.run() strm.source_join() strm.join() def comp_sequential(size, offset): sum = 0 for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) sum = a * b ram_c.write(i + offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): new_size = size + size + size # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, new_size) # sequential offset = new_size myaxi.dma_read(ram_a, offset, 0, new_size) myaxi.dma_read(ram_b, offset, 512, new_size) comp_sequential(new_size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, new_size) # verification myaxi.dma_read(ram_c, 0, 1024, new_size) myaxi.dma_read(ram_c, offset, 1024 * 2, new_size) check(new_size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth) ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth) ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth) size = 16 pattern = [(size, 0)] strm = vthread.Stream(m, 'mystream', clk, rst) a = strm.source('a') b = strm.source('b') sum = a + b strm.sink(sum, 'sum') def comp_stream(offset): strm.set_source_pattern('a', ram_a, offset + 10, pattern) strm.set_source_pattern('b', ram_b, offset + 10, pattern) strm.set_sink('sum', ram_c, offset, size) strm.run() strm.join() def comp_sequential(offset): sum = 0 for i in range(size): a = ram_a.read(offset + 10) b = ram_b.read(offset + 10) sum = a + b ram_c.write(i + offset, sum) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(): offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_stream(offset) myaxi.dma_write(ram_c, offset, 1024 * 4, 1) offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 0, size) comp_sequential(offset) myaxi.dma_write(ram_c, offset, 1024 * 8, 1) check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start() return m
def mkLed(): m = Module('blinkled') clk = m.Input('CLK') rst = m.Input('RST') datawidth = 32 addrwidth = 10 point = -4 myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth) ram_a = vthread.FixedRAM(m, 'ram_a', clk, rst, datawidth, addrwidth, point=point) ram_b = vthread.FixedRAM(m, 'ram_b', clk, rst, datawidth, addrwidth, point=point) ram_c = vthread.FixedRAM(m, 'ram_c', clk, rst, datawidth, addrwidth, point=point) strm = vthread.Stream(m, 'mystream', clk, rst, dump=True) a = strm.source('a', point=point) b = strm.source('b', point=point) const = strm.constant('const', point=point) c = a * b + const strm.sink(c, 'c') def comp_stream(size, offset): strm.set_source('a', ram_a, offset, size) strm.set_source('b', ram_b, offset, size) strm.set_sink('c', ram_c, offset, size) const = vthread.fixed.FixedConst(32, point=point) strm.set_constant('const', const) strm.run() strm.join() def comp_sequential(size, offset): for i in range(size): a = ram_a.read(i + offset) b = ram_b.read(i + offset) const = vthread.fixed.FixedConst(32, point=point) c = a * b + const ram_c.write(i + offset, c) print('a = %10g, b = %10g, const = %10g, c = %10g' % (a, b, const, c)) def check(size, offset_stream, offset_seq): all_ok = True for i in range(size): st = ram_c.read(i + offset_stream) sq = ram_c.read(i + offset_seq) if vthread.verilog.NotEql(st, sq): all_ok = False if all_ok: print('# verify: PASSED') else: print('# verify: FAILED') def comp(size): # stream offset = 0 myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_stream(size, offset) myaxi.dma_write(ram_c, offset, 1024, size) # sequential offset = size myaxi.dma_read(ram_a, offset, 0, size) myaxi.dma_read(ram_b, offset, 512, size) comp_sequential(size, offset) myaxi.dma_write(ram_c, offset, 1024 * 2, size) # verification check(size, 0, offset) vthread.finish() th = vthread.Thread(m, 'th_comp', clk, rst, comp) fsm = th.start(32) return m