示例#1
0
 def pack_smimms_int(x):
     smimms_int = {}
     for i in range(16):
         smimms_int[i] = i
         smimms_int[i - 16] = i + 16
         smimms_int[pack_unpack('i', 'I', i - 16)] = i + 16
         smimms_int[pack_unpack('f', 'I', 2 ** (i - 8))] = i + 32
     return smimms_int[x]
示例#2
0
    def pack(self):

        if self.addr_label is not None:
            addr = pack_unpack('i', 'I',
                               (int(self.addr_label) - self.serial - 4) * 8)
        elif self.addr is not None:
            addr = self.addr
        else:
            addr = 0

        set_link = 1 if self.set_link else 0

        msfign = 0b00

        return 0 \
            | (0b10 << 56) \
            | (((addr & ((1 << 24) - 1)) >> 3) << 35) \
            | (self.cond_br << 32) \
            | ((addr >> 24) << 24) \
            | (set_link << 23) \
            | (msfign << 21) \
            | (self.bdu << 15) \
            | (self.ub << 14) \
            | (self.bdi << 12) \
            | ((self.raddr_a if self.raddr_a is not None else 0) << 6)
示例#3
0
 def pack_smimms_float(x):
     smimms_float = {}
     for i in range(16):
         # Denormal numbers
         smimms_float[pack_unpack('I', 'f', i)] = i
         smimms_float[2 ** (i - 8)] = i + 32
     return smimms_float[x]
示例#4
0
    def run(drv,
            unif,
            src,
            dst,
            num_qpus,
            rows,
            cols,
            tile_rows,
            tile_cols,
            subtile_rows,
            subtile_cols,
            code_offset=0):

        code = drv.program(qpu_comatcopy_t,
                           num_qpus=num_qpus,
                           tile_rows=tile_rows,
                           tile_cols=tile_cols,
                           subtile_rows=subtile_rows,
                           subtile_cols=subtile_cols,
                           code_offset=code_offset)

        src[:, :] = np.arange(src.size, dtype=src.dtype).reshape(src.shape)
        dst[:, :] = np.arange(dst.size, dtype=dst.dtype).reshape(dst.shape)

        unif[0] = rows
        unif[1] = cols
        unif[2] = pack_unpack('f', 'I', 1.)
        unif[3] = pack_unpack('f', 'I', 0.)
        unif[4] = src.addresses()[0, 0]
        unif[5] = cols * 8
        unif[6] = dst.addresses()[0, 0]
        unif[7] = rows * 8

        start = monotonic()
        drv.execute(code, unif.addresses()[0], thread=num_qpus)
        end = monotonic()

        print(f'{num_qpus} QPUs,', f'{rows} x {cols} matrix,',
              f'{tile_rows:2} x {tile_cols:2} tile,',
              f'{subtile_rows:2} x {subtile_cols:2} subtile:',
              f'{end - start} seconds,',
              f'{rows * cols * 8 / (end - start) * 1e-6} MB/s')
示例#5
0
 def block_2x4_params(i, j):
     tile_P = P // 2
     tile_R = R // 4
     return [
         tile_P,
         Q,
         tile_R,
         A.addresses()[tile_P * i, 0],
         A.strides[0],
         B.addresses()[0, tile_R * j],
         B.strides[0],
         C.addresses()[tile_P * i, tile_R * j],
         C.strides[0],
         *pack_unpack('f', 'I', [alpha, beta]),
     ]