Пример #1
0
def pipeline_resource01(xs, ys):
    with rule(scheduling='pipeline'):
        for i in range(4):
            a = xs[i]
            b = xs[i + 1]
            ys[i] = (a + b) >> 1
    return
def pipeline_resource02(xs, ys):
    with rule(scheduling='pipeline'):
        for i in range(4):
            a = xs[i]
            ys[i] = a
            ys[i + 1] = a << 1
    return
def _sha256(msg, _h, w):
    with rule(unroll='full'):
        for i in range(16):
            w[i] = msg[i]

    for i in range(16, 64):
        wi_15 = w[i - 15]
        s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
        wi_2 = w[i - 2]
        s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
        wi_16 = w[i - 16]
        wi_7 = w[i - 7]
        w[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

    a = _h[0]
    b = _h[1]
    c = _h[2]
    d = _h[3]
    e = _h[4]
    f = _h[5]
    g = _h[6]
    h = _h[7]

    for i in range(64):
        s0 = rotr(a, 2) ^ rotr(a, 13) ^ rotr(a, 22)
        maj = (a & b) ^ (a & c) ^ (b & c)
        t2 = s0 + maj
        s1 = rotr(e, 6) ^ rotr(e, 11) ^ rotr(e, 25)
        ch = (e & f) ^ ((~e) & g)
        t1 = h + s1 + ch + _k[i] + w[i]

        h = g
        g = f
        f = e
        e = (d + t1) & 0xFFFFFFFF
        d = c
        c = b
        b = a
        a = (t1 + t2) & 0xFFFFFFFF

    _lst = [a, b, c, d, e, f, g, h]

    with rule(unroll='full'):
        for i in range(8):
            _h[i] = (_h[i] + _lst[i]) & 0xFFFFFFFF
Пример #4
0
def test(p04):
    with rule(scheduling='parallel'):
        p04.i1.wr(2)
        p04.i2.wr(3)
        p04.i3.wr(4)
        clkfence()
        assert p04.o1() == 4
        assert p04.o2() == 9
        assert p04.o3() == 16
Пример #5
0
 def main(self):
     while is_worker_running():
         with rule(scheduling='parallel'):
             t1 = self.i1.rd()
             t2 = self.i2.rd()
             t3 = self.i3.rd()
             self.o1.wr(t1 * t1)
             self.o2.wr(t2 * t2)
             self.o3.wr(t3 * t3)
def sha256(msg, h):
    with rule(unroll='full'):
        for i in range(len(_h)):
            h[i] = _h[i]
    work = [None] * 64
    _sha256(msg, h, work)

    tail_blk = [0] * 16
    tail_blk[0] = 0x80000000
    tail_blk[15] = 0x00000200
    _sha256(tail_blk, h, work)
Пример #7
0
def test(m):
    msg = [0x61616161] * 16  # type: List[bit32]
    lst = [0] * 16  # type: List[bit32]
    blen = len(msg)
    blocks = ((blen * 4 + 5) + 63) // 64
    print("blocks", blocks)

    start_i = 0
    m.data_in.wr(blocks)

    for i in range(blocks - 1):
        print('index:', i)
        #print('start_i', start_i)
        with rule(unroll='full'):
            for j in range(16):
                lst[j] = msg[start_i]
                start_i += 1
        rv512: bit512 = bit32x16_bit512(lst)
        print('rv512', rv512)
        m.data_in.wr(rv512)

    print("$time")

    with rule(unroll='full'):
        for i in range(16):
            lst[i] = 0

    for i in range(blen - start_i):
        lst[i] = lst[start_i]
        start_i += 1

    lst[blen - start_i] = 0x80000000
    lst[15] = (blocks << 8)

    v512_last: bit512 = bit32x16_bit512(lst)
    print('lastblock', v512_last)
    m.data_in.wr(v512_last)

    v256: bit256 = m.data_out.rd()
    print('sha256', v256)
 def w(self, i_q, o_q):
     with rule(scheduling='pipeline'):
         while is_worker_running():
             v = i_q.rd()
             o_q.wr(v)
             o_q.wr(v)
Пример #9
0
    def process_sha256(self):
        work = [0] * 64  # type: List[bit32]
        _h = [0] * 8  # type: List[bit32]
        __h = [0] * 8  # type: List[bit32]

        while is_worker_running():
            update = True

            for i in range(8):
                _h[i] = h[i]

            block_len512: bit512 = self.data_in.rd()
            block_len32 = block_len512
            count = 0
            #print(block_len512)
            #print(block_len32)

            while count < block_len32:
                #print(count, block_len32)
                count += 1
                #print("--=========")
                d512 = self.data_in.rd()
                #print("start d512 %5t", d512, "$time")
                shift_n = 480

                for i in unroll(range(16)):
                    work[i] = (d512 >> shift_n) & 0xFFFFFFFF
                    shift_n -= 32

                for i in range(16, 64):
                    wi_15 = work[i - 15]
                    s0 = rotr(wi_15, 7) ^ rotr(wi_15, 18) ^ (wi_15 >> 3)
                    wi_2 = work[i - 2]
                    s1 = rotr(wi_2, 17) ^ rotr(wi_2, 19) ^ (wi_2 >> 10)
                    wi_16 = work[i - 16]
                    wi_7 = work[i - 7]
                    work[i] = (wi_16 + s0 + wi_7 + s1) & 0xFFFFFFFF

                with rule(unroll='full'):
                    for i in range(8):
                        __h[i] = _h[i]

                for i in range(64):
                    s0 = rotr(__h[0], 2) ^ rotr(__h[0], 13) ^ rotr(__h[0], 22)
                    maj = (__h[0] & __h[1]) ^ (__h[0] & __h[2]) ^ (__h[1]
                                                                   & __h[2])
                    t2 = s0 + maj
                    s1 = rotr(__h[4], 6) ^ rotr(__h[4], 11) ^ rotr(__h[4], 25)
                    ch = (__h[4] & __h[5]) ^ ((~__h[4]) & __h[6])
                    t1 = __h[7] + s1 + ch + k[i] + work[i]

                    __h[7] = __h[6]
                    __h[6] = __h[5]
                    __h[5] = __h[4]
                    __h[4] = (__h[3] + t1) & 0xFFFFFFFF
                    __h[3] = __h[2]
                    __h[2] = __h[1]
                    __h[1] = __h[0]
                    __h[0] = (t1 + t2) & 0xFFFFFFFF

                with rule(unroll='full'):
                    for i in range(8):
                        _h[i] = (_h[i] + __h[i]) & 0xFFFFFFFF

            #    print("turn %5t", count, "$time")

            rv256: bit256 = 0
            with rule(unroll='full'):
                for i in range(8):
                    rv256 <<= 32
                    rv256 |= _h[i]
            #print("rv256 %5t", rv256, "$time")
            self.data_out.wr(rv256)
Пример #10
0
    def mips_main(self):
        inputs = [0] * 8
        for i in range(len(inputs)):
            inputs[i] = self.din()
        dmem = [0] * 64
        for i in range(8):
            dmem[i] = inputs[i]

        hilo: int64 = 0
        Hi = 0
        Lo = 0
        n_inst = 0
        reg = [0] * 32
        reg[29] = 0x7fffeffc
        pc = 0x00400000
        self.run()
        while is_worker_running():
            with rule(scheduling='pipeline'):
                while pc != 0:
                    iaddr = self.IADDR(pc)
                    ins = imem[iaddr]
                    pc = pc + 4
                    op = (ins >> 26) & 0x3f
                    #print(op)
                    if op == R:
                        funct = ins & 0x3f
                        shamt = (ins >> 6) & 0x1f
                        rd = (ins >> 11) & 0x1f
                        rt = (ins >> 16) & 0x1f
                        rs = (ins >> 21) & 0x1f

                        if funct == ADDU:
                            reg[rd] = reg[rs] + reg[rt]
                        elif funct == SUBU:
                            reg[rd] = reg[rs] - reg[rt]
                        elif funct == MULT:
                            hilo = reg[rs] * reg[rt]
                            Lo = hilo & 0x00000000ffffffff
                            Hi = (hilo >> 32) & 0xffffffff
                        elif funct == MULTU:
                            hilo = reg[rs] * reg[rt]
                            Lo = hilo & 0x00000000ffffffff
                            Hi = (hilo >> 32) & 0xffffffff
                        elif funct == MFHI:
                            reg[rd] = Hi
                        elif funct == MFLO:
                            reg[rd] = Lo
                        elif funct == AND:
                            reg[rd] = reg[rs] & reg[rt]
                        elif funct == OR:
                            reg[rd] = reg[rs] | reg[rt]
                        elif funct == XOR:
                            reg[rd] = reg[rs] ^ reg[rt]
                        elif funct == SLL:
                            reg[rd] = reg[rt] << shamt
                        elif funct == SRL:
                            reg[rd] = reg[rt] >> shamt
                        elif funct == SLLV:
                            reg[rd] = reg[rt] << reg[rs]
                        elif funct == SRLV:
                            reg[rd] = reg[rt] >> reg[rs]
                        elif funct == SLT:
                            reg[rd] = reg[rs] < reg[rt]
                        elif funct == SLTU:
                            reg[rd] = reg[rs] < reg[rt]
                        elif funct == JR:
                            pc = reg[rs]
                        else:
                            pc = 0  # error
                    elif op == J:
                        tgtadr = ins & 0x3ffffff
                        pc = tgtadr << 2
                    elif op == JAL:
                        tgtadr = ins & 0x3ffffff
                        reg[31] = pc
                        pc = tgtadr << 2
                    else:  # if op == ...
                        address = ins & 0xffff
                        rt = (ins >> 16) & 0x1f
                        rs = (ins >> 21) & 0x1f
                        if op == ADDIU:
                            reg[rt] = reg[rs] + address
                        elif op == ANDI:
                            reg[rt] = reg[rs] & address
                        elif op == ORI:
                            reg[rt] = reg[rs] | address
                        elif op == XORI:
                            reg[rt] = reg[rs] ^ address
                        elif op == LW:
                            reg[rt] = dmem[self.DADDR(reg[rs] + address)]
                        elif op == SW:
                            dmem[self.DADDR(reg[rs] + address)] = reg[rt]
                        elif op == LUI:
                            reg[rt] = address << 16
                        elif op == BEQ:
                            if reg[rs] == reg[rt]:
                                pc = pc - 4 + (address << 2)
                        elif op == BNE:
                            if reg[rs] != reg[rt]:
                                pc = pc - 4 + (address << 2)
                        elif op == BGEZ:
                            if reg[rs] >= 0:
                                pc = pc - 4 + (address << 2)
                        elif op == SLTI:
                            reg[rt] = reg[rs] < address
                        elif op == SLTIU:
                            reg[rt] = reg[rs] < address
                        else:
                            pc = 0  # error
                    reg[0] = 0
                    n_inst = n_inst + 1
                    #if pc == 0:
            self.result(n_inst)
            for i in range(len(dmem)):
                self.dout(dmem[i])
            self.run()
Пример #11
0
def ChenIDct(x: list, y: list):
    '''
    ChenIDCT() implements the Chen inverse dct. Note that there are two
    input vectors that represent x=input, and y=output, and must be
    defined (and storage allocated) before this routine is called.
    '''
    def LS(r, s):
        return r << s

    def RS(r, s):
        return r >> s  # Caution with rounding...

    def MSCALE(expr):
        return RS(expr, 9)

    tmp = [None] * 64
    # Loop over columns
    with rule(scheduling='pipeline'):
        for i in range(8):
            b0 = LS(x[i + 0], 2)
            a0 = LS(x[i + 8], 2)
            b2 = LS(x[i + 16], 2)
            a1 = LS(x[i + 24], 2)
            b1 = LS(x[i + 32], 2)
            a2 = LS(x[i + 40], 2)
            b3 = LS(x[i + 48], 2)
            a3 = LS(x[i + 56], 2)

            # Split into even mode  b0 = x0  b1 = x4  b2 = x2  b3 = x6.
            # And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
            c0 = MSCALE((c7d16 * a0) - (c1d16 * a3))
            c1 = MSCALE((c3d16 * a2) - (c5d16 * a1))
            c2 = MSCALE((c3d16 * a1) + (c5d16 * a2))
            c3 = MSCALE((c1d16 * a0) + (c7d16 * a3))

            # First Butterfly on even terms.
            a0 = MSCALE(c1d4 * (b0 + b1))
            a1 = MSCALE(c1d4 * (b0 - b1))

            a2 = MSCALE((c3d8 * b2) - (c1d8 * b3))
            a3 = MSCALE((c1d8 * b2) + (c3d8 * b3))

            b0 = a0 + a3
            b1 = a1 + a2
            b2 = a1 - a2
            b3 = a0 - a3

            # Second Butterfly
            a0 = c0 + c1
            a1 = c0 - c1
            a2 = c3 - c2
            a3 = c3 + c2

            c0 = a0
            c1 = MSCALE(c1d4 * (a2 - a1))
            c2 = MSCALE(c1d4 * (a2 + a1))
            c3 = a3

            tmp[i + 0] = b0 + c3
            tmp[i + 8] = b1 + c2
            tmp[i + 16] = b2 + c1
            tmp[i + 24] = b3 + c0
            tmp[i + 32] = b3 - c0
            tmp[i + 40] = b2 - c1
            tmp[i + 48] = b1 - c2
            tmp[i + 56] = b0 - c3

        # Loop over rows
        for i in range(8):
            idx = LS(i, 3)
            b0 = tmp[idx + 0]
            a0 = tmp[idx + 1]
            b2 = tmp[idx + 2]
            a1 = tmp[idx + 3]
            b1 = tmp[idx + 4]
            a2 = tmp[idx + 5]
            b3 = tmp[idx + 6]
            a3 = tmp[idx + 7]

            # Split into even mode  b0 = x0  b1 = x4  b2 = x2  b3 = x6.
            # And the odd terms a0 = x1 a1 = x3 a2 = x5 a3 = x7.
            c0 = MSCALE((c7d16 * a0) - (c1d16 * a3))
            c1 = MSCALE((c3d16 * a2) - (c5d16 * a1))
            c2 = MSCALE((c3d16 * a1) + (c5d16 * a2))
            c3 = MSCALE((c1d16 * a0) + (c7d16 * a3))

            # First Butterfly on even terms.
            a0 = MSCALE(c1d4 * (b0 + b1))
            a1 = MSCALE(c1d4 * (b0 - b1))

            a2 = MSCALE((c3d8 * b2) - (c1d8 * b3))
            a3 = MSCALE((c1d8 * b2) + (c3d8 * b3))

            # Calculate last set of b's
            b0 = a0 + a3
            b1 = a1 + a2
            b2 = a1 - a2
            b3 = a0 - a3

            # Second Butterfly
            a0 = c0 + c1
            a1 = c0 - c1
            a2 = c3 - c2
            a3 = c3 + c2

            c0 = a0
            c1 = MSCALE(c1d4 * (a2 - a1))
            c2 = MSCALE(c1d4 * (a2 + a1))
            c3 = a3

            idx = LS(i, 3)
            tmp[idx + 0] = b0 + c3
            tmp[idx + 1] = b1 + c2
            tmp[idx + 2] = b2 + c1
            tmp[idx + 3] = b3 + c0
            tmp[idx + 4] = b3 - c0
            tmp[idx + 5] = b2 - c1
            tmp[idx + 6] = b1 - c2
            tmp[idx + 7] = b0 - c3

        # Retrieve correct accuracy. We have additional factor
        # of 16 that must be removed.
        for i in range(64):
            v = tmp[i]
            if v < 0:
                z = (v - 8) >> 4
            else:
                z = (v + 8) >> 4
            y[i] = z
    return 0