def sweeper(hop: Queue[Tuple[Uint['w_y'], Uint['w_x']], 2], scale_ratio: Queue[Tuple[Uint['w_ratio'], Uint['w_ratio']], 1], *, frame_size): scale_ratio = scale_ratio | cart_sync_with(hop) cfg_sweep_y = ccat(hop[0][0], frame_size[0], 1) sweep_y = cfg_sweep_y | rng(cnt_steps=True) ratio_y = scale_ratio | cart_sync_with(sweep_y) scaled_y = ((sweep_y[0] * ratio_y[0][0]) >> 16) | sweep_y.dtype[0] sweep_y = ccat(scaled_y, sweep_y[1]) | Queue[sweep_y.dtype[0], 1] cfg_sweep_x = ccat(hop[0][1], frame_size[1], 1) \ | cart_sync_with(sweep_y) sweep_x = cfg_sweep_x | rng(cnt_steps=True) ratio_x = ratio_y | cart_sync_with(sweep_x) scaled_x = ( (sweep_x[0] * ratio_x[0][1]) >> 16) | sweep_x.dtype[0] | decouple_sp sweep_x = ccat(scaled_x, sweep_x[1] | decouple_sp) | Queue[sweep_x.dtype[0], 1] dout = cart(sweep_y | decouple_sp, sweep_x) dout = cart(hop | flatten, dout) dout_eot = ccat(dout[1], ratio_x[1] | decouple_sp) | Uint[4] dout = ccat(dout[0][1], dout_eot) | Queue[dout.dtype[0][1], 4] return dout | decouple_sp
def test_cosim(cosim_cls, din_delay, dout_delay): verif(drv(t=Uint[8], seq=[0]) | delay_rng(din_delay, din_delay), drv(t=Queue[Uint[8]], seq=[list(range(10))]) | delay_rng(din_delay, din_delay), f=cart(sim_cls=cosim_cls), ref=cart(name='ref_model'), delays=[delay_rng(dout_delay, dout_delay)]) sim()
def hopper(hopper_cfg): cfg_hop_y = ccat(0, hopper_cfg[0][0], 1) hop_y = cfg_hop_y | rng cfg_hop_x = ccat(0, hopper_cfg[0][1], 1) cfg_hop_x = cfg_hop_x | cart_sync_with(hop_y) hop_x = cfg_hop_x | rng dout = cart(hop_y, hop_x) return dout
def rd_addrgen(*, casc_hw): # import pdb; pdb.set_trace(); scale = scale_counter(scale_num=casc_hw.scale_num) ratio = scale_ratio(scale, casc_hw=casc_hw) boundary = boundaries(scale, casc_hw=casc_hw) hop_out = boundary | hopper | decouple_sp sweep_out = hop_out | sweeper(scale_ratio=ratio, frame_size=casc_hw.frame_size) scaled_addr = cart(scale, hop_out) | flatten(lvl=2) sweep_linear = sweep_out | addr_trans(img_size=casc_hw.img_size) return sweep_linear, scaled_addr
def rects_mem(rd_addr_if: Uint['w_addr'], *, inst_num, casc_hw): w_rect = casc_hw.w_rect_data // 2 rect_tuple = rom( rd_addr_if, data=casc_hw.rects_mem[inst_num], dtype=Uint[casc_hw.w_rect_data]) | \ Tuple[Uint[w_rect/2], Uint[w_rect/2], Uint[w_rect]] rect_coords = rect_tuple | calc_rect_coords(casc_hw=casc_hw) weight = rom(rd_addr_if, data=casc_hw.weights_mem[inst_num], dtype=Int[casc_hw.w_weight]) data_t = Intf(Tuple[Uint[w_rect], Uint[1], Int[casc_hw.w_weight]]) cart_sync = cart(rect_coords, weight) tuple_rect = ccat(cart_sync[0][0], cart_sync[0][1]) | data_t.dtype dout = ccat(tuple_rect, cart_sync[1]) | Queue[data_t.dtype, 1] return dout
def features_mem(rd_addr: Queue[Uint['w_addr'], 2], rst_in: Unit, *, casc_hw): w_rect = casc_hw.w_rect_data // 2 rst_in | local_rst rd_addr = rd_addr | decouple_sp features_data = [] for i in range(3): feature = rects_mem(rd_addr_if=rd_addr[0], inst_num=i, casc_hw=casc_hw) features_data.append(feature | decouple_sp) feature_data_t = Intf(Tuple[Uint[w_rect], Uint[1], Int[casc_hw.w_weight]]) features_zip = czip(*features_data) | Queue[Array[feature_data_t.dtype, 3], 1] sync = cart(rd_addr[1] | dreg, features_zip) dout_eot = ccat(sync[1], sync[0][0]) | Uint[3] dout = ccat(sync[0][1], dout_eot) | Queue[Array[feature_data_t.dtype, 3], 3] return dout
def chop2(din: Queue, size: Uint) -> b'din': return cart(size, din, order=[1, 0]) | chop
def take2(din: Queue, size: Uint): return cart(size, din, order=[1, 0]) | take
def clip2(din: Queue, size: Uint, *, init=1) -> b'din': return cart(size, din, order=[1, 0]) | clip(init=init)
def test_two(): iout = cart(Intf(Queue[Unit, 3]), Intf(Uint[1])) assert iout.dtype == Queue[Tuple[Unit, Uint[1]], 3]
def chop2(din: Queue, size: Uint) -> b'din': return cart(din, size) | chop
def take2(din: Queue, size: Uint): return cart(din, size) | take
def clip2(din: Queue, size: Uint, *, init=1) -> b'din': return cart(din, size) | clip
from pygears.lib import drv, check, cart from pygears.typing import Queue, Uint x = drv(t=Queue[Uint[5]], seq=[[10, 11, 12]]) y = drv(t=Uint[5], seq=[0]) cart(x, y) | check(ref=[[(10, 0), (11, 0), (12, 0)]])
from pygears.lib import drv, check, cart, shred from pygears.typing import Queue, Uint op1 = drv(t=Queue[Uint[5]], seq=[[10, 11], [20, 21], [30, 31]]) op2 = drv(t=Queue[Uint[5]], seq=[[10, 11, 12]]) cart(op1, op2) | check( ref=[[[(10, 10), (11, 10)], [(20, 11), (21, 11)], [(30, 12), (31, 12)]]])
from pygears.lib import drv, check, cart from pygears.typing import Queue, Uint op1 = drv(t=Queue[Uint[5]], seq=[[10, 11, 12], [20, 21, 22]]) op2 = drv(t=Uint[1], seq=[0, 1]) cart(op1, op2) | check(ref=[[(10, 0), (11, 0), (12, 0)], [(20, 1), (21, 1), (22, 1)]])
def pulse(cfg: TCfg): """Generates pulse of variable length, width is clk cycles for value 0""" cnt = rng(0, cfg['period'], 1) return cart(cnt, cfg['width']) | fmap(f=gt)
def test_multiple(): iout = cart(Intf(Uint[1]), Intf(Queue[Uint[2], 1]), Intf(Queue[Unit, 3]), Intf(Queue[Uint[4], 5])) assert iout.dtype == Queue[Tuple[Uint[1], Uint[2], Unit, Uint[4]], 9]