Пример #1
0
def run(a_shape=(15, 15),
        b_shape=(15, 15),
        a_dtype=ng.int32,
        b_dtype=ng.int32,
        c_dtype=ng.int32,
        par=1,
        axi_datawidth=32,
        silent=False,
        filename=None,
        simtype='iverilog',
        outputfile=None):

    # create target hardware
    a = ng.placeholder(a_dtype, shape=a_shape, name='a')
    b = ng.placeholder(b_dtype, shape=b_shape, name='b')
    t = ng.add(a, b, dtype=c_dtype, par=par)
    c = ng.relu(t, dtype=c_dtype, par=par)

    targ = ng.to_veriloggen([c],
                            'matrix_add_relu',
                            silent=silent,
                            config={'maxi_datawidth': axi_datawidth})

    # verification data
    va = np.arange(a.length, dtype=np.int64).reshape(a.shape) % [5] - [10]
    vb = (np.arange(b.length, dtype=np.int64).reshape(b.shape) +
          [100]) % [6] - [10]

    eval_outs = ng.eval([c], a=va, b=vb)
    vc = eval_outs[0]

    # to memory image
    size_max = int(
        math.ceil(
            max(a.memory_size, b.memory_size, c.memory_size) / 4096)) * 4096
    check_addr = max(a.addr, b.addr, c.addr) + size_max
    size_check = size_max
    tmp_addr = check_addr + size_check

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 8 // (memimg_datawidth // 8)],
                   dtype=np.int64)
    mem = mem + [100]

    axi.set_memory(mem, va, memimg_datawidth, a_dtype.width, a.addr,
                   max(int(math.ceil(axi_datawidth / a_dtype.width)), par))
    axi.set_memory(mem, vb, memimg_datawidth, b_dtype.width, b.addr,
                   max(int(math.ceil(axi_datawidth / b_dtype.width)), par))
    axi.set_memory(mem, vc, memimg_datawidth, c_dtype.width, check_addr,
                   max(int(math.ceil(axi_datawidth / c_dtype.width)), par))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if outputfile is None:
        outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + outputfile

    memory = axi.AxiMemoryModel(m,
                                'memory',
                                clk,
                                rst,
                                datawidth=axi_datawidth,
                                memimg=mem,
                                memimg_name=memimg_name,
                                memimg_datawidth=memimg_datawidth)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    num_rep = functools.reduce(lambda x, y: x * y, c.shape[:-1], 1)

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for i in range(num_rep):
            for j in range(c.shape[-1]):
                orig = memory.read_word(i * c.aligned_shape[-1] + j, c.addr,
                                        c_dtype.width)
                check = memory.read_word(i * c.aligned_shape[-1] + j,
                                         check_addr, c_dtype.width)

                if vthread.verilog.NotEql(orig, check):
                    print('NG', i, j, orig, check)
                    ok = False
                # else:
                #    print('OK', i, j, orig, check)

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    # output source code
    if filename is not None:
        m.to_verilog(filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=outputfile)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt
Пример #2
0
def run(act_shape=(1, 7, 7, 7),
        weight_shape=(3, 3, 3, 7),
        bias_shape=None,
        scale_shape=None,
        act_dtype=ng.int32,
        weight_dtype=ng.int32,
        bias_dtype=ng.int32,
        scale_dtype=ng.int32,
        out_dtype=ng.int32,
        stride=(1, 1, 1, 1),
        rshift_mul=None,
        rshift_sum=None,
        rshift_out=0,
        par_ich=1,
        par_och=1,
        par_col=1,
        par_row=1,
        concur_och=None,
        stationary='filter',
        input_ram_size=None,
        filter_ram_size=None,
        bias_ram_size=None,
        scale_ram_size=None,
        out_ram_size=None,
        axi_datawidth=32,
        silent=False,
        filename=None,
        simtype='iverilog',
        outputfile=None):

    # create target hardware
    act = ng.placeholder(act_dtype, shape=act_shape, name='act')
    weight = ng.variable(weight_dtype, shape=weight_shape, name='weight')

    if bias_shape is not None:
        bias = ng.variable(bias_dtype, bias_shape, name='bias')
    else:
        bias = None

    if scale_shape is not None:
        scale = ng.variable(scale_dtype, scale_shape, name='scale')
    else:
        scale = None

    out = ng.conv2d(act, weight, stride, bias, scale, rshift_mul, rshift_sum,
                    rshift_out, None, 'SAME', out_dtype, ng.int32, ng.int32,
                    'conv2d', par_ich, par_och, par_col, par_row, concur_och,
                    stationary, input_ram_size, filter_ram_size, bias_ram_size,
                    scale_ram_size, None, None, None, out_ram_size)

    out = ng.relu(out)

    targ = ng.to_veriloggen([out],
                            'matrix_conv2d_relu',
                            silent=silent,
                            config={'maxi_datawidth': axi_datawidth})

    # verification data
    if act_dtype.width > 4:
        vact = np.arange(act.length, dtype=np.int64).reshape(act.shape) % [11]
    else:
        vact = np.arange(act.length, dtype=np.int64).reshape(act.shape) % [5]

    vweight = np.arange(weight.length, dtype=np.int64).reshape(
        weight.shape) % [7] - [3]

    if bias is not None:
        vbias = np.arange(bias.length, dtype=np.int64).reshape(
            bias.shape) % [4]
    else:
        vbias = None

    if scale is not None:
        vscale = np.arange(scale.length, dtype=np.int64).reshape(
            scale.shape) % [6]
    else:
        vscale = None

    eval_outs = ng.eval([out],
                        act=vact,
                        weight=vweight,
                        bias=vbias,
                        scale=vscale)
    vout = eval_outs[0]

    # to memory image
    size_max = int(
        math.ceil(
            max(act.memory_size, weight.memory_size,
                bias.memory_size if bias is not None else 0, scale.memory_size
                if scale is not None else 0, out.memory_size) / 4096)) * 4096
    check_addr = max(
        act.addr, weight.addr, bias.addr if bias is not None else -1,
        scale.addr if scale is not None else -1, out.addr) + size_max
    size_check = size_max
    tmp_addr = check_addr + size_check

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 8 // (memimg_datawidth // 8)],
                   dtype=np.int64)
    mem = mem + [100]

    axi.set_memory(
        mem, vact, memimg_datawidth, act_dtype.width, act.addr,
        max(int(math.ceil(axi_datawidth / act_dtype.width)), par_ich))

    axi.set_memory(
        mem, vweight, memimg_datawidth, weight_dtype.width, weight.addr,
        max(int(math.ceil(axi_datawidth / weight_dtype.width)), par_ich))

    if bias is not None:
        axi.set_memory(
            mem, vbias, memimg_datawidth, bias_dtype.width, bias.addr,
            max(int(math.ceil(axi_datawidth / bias_dtype.width)), par_och))

    if scale is not None:
        axi.set_memory(
            mem, vscale, memimg_datawidth, scale_dtype.width, scale.addr,
            max(int(math.ceil(axi_datawidth / scale_dtype.width)), par_och))

    axi.set_memory(
        mem, vout, memimg_datawidth, out_dtype.width, check_addr,
        max(int(math.ceil(axi_datawidth / out_dtype.width)), par_och))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if outputfile is None:
        outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + outputfile

    memory = axi.AxiMemoryModel(m,
                                'memory',
                                clk,
                                rst,
                                datawidth=axi_datawidth,
                                memimg=mem,
                                memimg_name=memimg_name,
                                memimg_datawidth=memimg_datawidth)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for bat in range(out.shape[0]):
            for y in range(out.shape[1]):
                for x in range(out.shape[2]):
                    for ch in range(out.shape[3]):
                        orig = memory.read_word(
                            bat * out.aligned_shape[1] * out.aligned_shape[2] *
                            out.aligned_shape[3] +
                            y * out.aligned_shape[2] * out.aligned_shape[3] +
                            x * out.aligned_shape[3] + ch, out.addr,
                            out_dtype.width)
                        check = memory.read_word(
                            bat * out.aligned_shape[1] * out.aligned_shape[2] *
                            out.aligned_shape[3] +
                            y * out.aligned_shape[2] * out.aligned_shape[3] +
                            x * out.aligned_shape[3] + ch, check_addr,
                            out_dtype.width)

                        if vthread.verilog.NotEql(orig, check):
                            print('NG (', bat, y, x, ch, ') orig: ', orig,
                                  ' check: ', check)
                            ok = False
                        # else:
                        #    print('OK (', bat, y, x, ch,
                        #          ') orig: ', orig, ' check: ', check)

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(10000000),
        Systask('finish'),
    )

    # output source code
    if filename is not None:
        m.to_verilog(filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=outputfile)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt
Пример #3
0
def mkTest(ich=3, och=10, ch=64, ksize=3, stride=1, col=28, row=28):
    # create target hardware

    # layer 0: conv2d, max_pool_serial, relu
    input_layer = ng.placeholder(ng.int32,
                                 shape=(1, row, col, ich),
                                 name='input_layer')
    w0 = ng.variable(ng.int32, shape=(ch, ksize, ksize, ich), name='w0')
    a0 = ng.conv2d(input_layer, w0, strides=(1, stride, stride, 1))
    a0 = ng.max_pool_serial(a0, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1))
    a0 = ng.relu(a0)

    # layer 1: conv2d, relu, reshape
    w1 = ng.variable(ng.int32,
                     shape=(ch, ksize, ksize, a0.shape[-1]),
                     name='w1')
    a1 = ng.conv2d(a0, w1, strides=(1, stride, stride, 1))
    a1 = ng.relu(a1)
    a1 = ng.reshape(a1, [-1])

    # layer 2: full-connection
    w2 = ng.variable(ng.int32, shape=(16, a1.shape[-1]), name='w2')
    a2 = ng.matmul(a1, w2, transposed_b=True)
    a2 = ng.relu(a2)

    # layer 3: full-connection
    w3 = ng.variable(ng.int32, shape=(och, a2.shape[-1]), name='w3')
    output_layer = ng.matmul(a2, w3, transposed_b=True, name='output_layer')

    targ = ng.to_veriloggen([output_layer], 'cnn')
    #targ = ng.to_ipxact([output_layer], 'cnn')

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    memory = axi.AxiMemoryModel(m, 'memory', clk, rst, mem_addrwidth=23)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(10000000),
        Systask('finish'),
    )

    return m
Пример #4
0
def run(a_shape=(15, 15), b_shape=(15, 15),
        bias_shape=None, scale_shape=None,
        a_dtype=ng.int32, b_dtype=ng.int32,
        bias_dtype=ng.int32, scale_dtype=ng.int32,
        c_dtype=ng.int32,
        rshift_mul=None, rshift_sum=None, rshift_out=None,
        act_func=None,
        par_left_col=1, par_left_row=1, par_out_col=1,
        concur_out_col=None, stationary='right',
        left_ram_size=None, right_ram_size=None,
        bias_ram_size=None, scale_ram_size=None,
        out_ram_size=None,
        axi_datawidth=32, silent=False,
        filename=None, simtype='iverilog', outputfile=None):

    # create target hardware
    a = ng.placeholder(a_dtype, shape=a_shape, name='a')
    b = ng.placeholder(b_dtype, shape=b_shape, name='b')

    if bias_shape is not None:
        bias = ng.placeholder(bias_dtype, bias_shape, name='bias')
    else:
        bias = None

    if scale_shape is not None:
        scale = ng.placeholder(scale_dtype, scale_shape, name='scale')
    else:
        scale = None

    transposed_a = False
    transposed_b = True

    c = ng.matmul(a, b,
                  bias, scale,
                  transposed_a, transposed_b,
                  rshift_mul, rshift_sum, rshift_out,
                  act_func,
                  c_dtype, ng.int32, ng.int32,
                  'matmul',
                  par_left_col, par_left_row, par_out_col,
                  concur_out_col, stationary,
                  left_ram_size, right_ram_size,
                  bias_ram_size, scale_ram_size,
                  None, None, None,
                  out_ram_size)

    c = ng.relu(c)

    targ = ng.to_veriloggen([c], 'matrix_matmul_relu', silent=silent,
                            config={'maxi_datawidth': axi_datawidth})

    # verification data
    va = np.arange(a.length, dtype=np.int64).reshape(a.shape) % [5]
    vb = np.arange(b.length, dtype=np.int64).reshape(b.shape) % [5] - [3]

    if bias is not None:
        vbias = np.arange(bias.length,
                          dtype=np.int64).reshape(bias.shape) % [4]
    else:
        vbias = None

    if scale is not None:
        vscale = np.arange(scale.length,
                           dtype=np.int64).reshape(scale.shape) % [6]
    else:
        vscale = None

    vc = ng.verify.matmul(va, vb,
                          bias, scale,
                          False, True,
                          rshift_mul, rshift_sum, rshift_out,
                          act_func,
                          c_dtype, ng.int32, ng.int32,
                          'matmul',
                          par_left_col, par_left_row, par_out_col,
                          concur_out_col, stationary,
                          left_ram_size, right_ram_size,
                          bias_ram_size, scale_ram_size,
                          None, None, None,
                          out_ram_size,
                          False,
                          a_dtype, b_dtype,
                          bias_dtype, scale_dtype)

    vc = ng.verify.relu(vc)

    # to memory image
    size_max = int(math.ceil(max(a.memory_size, b.memory_size,
                                 bias.memory_size if bias is not None else 0,
                                 scale.memory_size if scale is not None else 0,
                                 c.memory_size) / 4096)) * 4096
    check_addr = max(a.addr, b.addr,
                     bias.addr if bias is not None else -1,
                     scale.addr if scale is not None else -1,
                     c.addr) + size_max
    size_check = size_max
    tmp_addr = check_addr + size_check

    memimg_datawidth = 32
    mem = np.zeros([1024 * 1024 * 8 // memimg_datawidth], dtype=np.int64)
    mem = mem + [100]

    axi.set_memory(mem, va, memimg_datawidth,
                   a_dtype.width, a.addr,
                   max(int(math.ceil(axi_datawidth / a_dtype.width)), par_left_col))

    axi.set_memory(mem, vb, memimg_datawidth,
                   b_dtype.width, b.addr,
                   max(int(math.ceil(axi_datawidth / b_dtype.width)), par_left_col))

    if bias is not None:
        axi.set_memory(mem, vbias, memimg_datawidth,
                       bias_dtype.width, bias.addr,
                       max(int(math.ceil(axi_datawidth / bias_dtype.width)), par_out_col))

    if scale is not None:
        axi.set_memory(mem, vscale, memimg_datawidth,
                       scale_dtype.width, scale.addr,
                       max(int(math.ceil(axi_datawidth / scale_dtype.width)), par_out_col))

    axi.set_memory(mem, vc, memimg_datawidth,
                   c_dtype.width, check_addr,
                   max(int(math.ceil(axi_datawidth / c_dtype.width)), par_out_col))

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    if outputfile is None:
        outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'

    memimg_name = 'memimg_' + outputfile

    memory = axi.AxiMemoryModel(m, 'memory', clk, rst,
                                datawidth=axi_datawidth,
                                memimg=mem, memimg_name=memimg_name,
                                memimg_datawidth=memimg_datawidth)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(
        time_counter.inc()
    )

    def ctrl():
        for i in range(100):
            pass

        ng.sim.set_global_addrs(_saxi, tmp_addr)

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        # verify
        ok = True
        for i in range(c.shape[0]):
            for j in range(c.shape[1]):
                orig = memory.read_word(i * c.aligned_shape[1] + j,
                                        c.addr, c_dtype.width)
                check = memory.read_word(i * c.aligned_shape[1] + j,
                                         check_addr, c_dtype.width)

                if vthread.verilog.NotEql(orig, check):
                    print(i, j, orig, check)
                    ok = False

        if ok:
            print('# verify: PASSED')
        else:
            print('# verify: FAILED')

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ, 'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m, resetn, m.make_reset(), period=100, polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    # output source code
    if filename is not None:
        m.to_verilog(filename)

    # run simulation
    sim = simulation.Simulator(m, sim=simtype)
    rslt = sim.run(outputfile=outputfile)
    lines = rslt.splitlines()
    if simtype == 'verilator' and lines[-1].startswith('-'):
        rslt = '\n'.join(lines[:-1])
    return rslt
Пример #5
0
def mkTest(n_input=784, n_classes=10):
    # create target hardware
    x = ng.placeholder(ng.int32, shape=[n_input])

    w1 = ng.variable(ng.int32, shape=(n_input, n_input), name='h1')
    w2 = ng.variable(ng.int32, shape=(n_input, n_input), name='h2')
    w3 = ng.variable(ng.int32, shape=(n_classes, n_input), name='out')

    l1 = ng.matmul(x, w1, transposed_b=True)
    l1 = ng.relu(l1)

    l2 = ng.matmul(l1, w2, transposed_b=True)
    l2 = ng.relu(l2)

    out = ng.matmul(l2, w3, transposed_b=True)

    targ = ng.to_veriloggen([out], 'mlp')
    #targ = ng.to_ipxact([model], 'mlp')

    # test controller
    m = Module('test')
    params = m.copy_params(targ)
    ports = m.copy_sim_ports(targ)
    clk = ports['CLK']
    resetn = ports['RESETN']
    rst = m.Wire('RST')
    rst.assign(Not(resetn))

    # AXI memory model
    memory = axi.AxiMemoryModel(m, 'memory', clk, rst)
    memory.connect(ports, 'maxi')

    # AXI-Slave controller
    _saxi = vthread.AXIMLite(m, '_saxi', clk, rst, noio=True)
    _saxi.connect(ports, 'saxi')

    # timer
    time_counter = m.Reg('time_counter', 32, initval=0)
    seq = Seq(m, 'seq', clk, rst)
    seq(time_counter.inc())

    def ctrl():
        for i in range(100):
            pass

        start_time = time_counter.value
        ng.sim.start(_saxi)

        print('# start')

        ng.sim.wait(_saxi)
        end_time = time_counter.value

        print('# end')
        print('# execution cycles: %d' % (end_time - start_time))

        vthread.finish()

    th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl)
    fsm = th.start()

    uut = m.Instance(targ,
                     'uut',
                     params=m.connect_params(targ),
                     ports=m.connect_ports(targ))

    # simulation.setup_waveform(m, uut)
    simulation.setup_clock(m, clk, hperiod=5)
    init = simulation.setup_reset(m,
                                  resetn,
                                  m.make_reset(),
                                  period=100,
                                  polarity='low')

    init.add(
        Delay(1000000),
        Systask('finish'),
    )

    return m