Пример #1
0
def common_uspmv_dev(rows,cols,datas,m_sizes,k_sizes,nnz_sizes, num_runs,A_handles,B_handles,C_handles):
  ddrWidth = int(xclbin_opts["GEMX_ddrWidth"])
  stages = int(xclbin_opts["GEMX_uspmvStages"])
  min_k = ddrWidth
  min_m = ddrWidth * int(xclbin_opts["GEMX_uspmvInterleaves"]) 
  for i in range(len(m_sizes)):
     m_sizes[i] = test.get_padded_size (m_sizes[i], min_m)
     k_sizes[i] = test.get_padded_size (k_sizes[i], min_m)
  print ("size:",m_sizes,k_sizes,"nnz:",nnz_sizes)    
  B = gemx.addDevBuf(B_handles[0],num_runs, k_sizes[0],np.float32)
  B_tmp = np.zeros((num_runs, k_sizes[0]), dtype=np.float32)
  test.fillMod(9, num_runs, k_sizes[0],B_tmp)
  B[:]=B_tmp
  C_list=[]
  for i in range(len(m_sizes)):
    C = gemx.addDevBuf(C_handles[i],num_runs, m_sizes[-1],np.float32)
    C.fill(0)
    C_list.append(C)
    A = gemx.addUSpDevBuf(np.array(rows[i]).astype(np.uint16),
                        np.array(cols[i]).astype(np.uint16),
                        np.array(datas[i]),
                        A_handles[i],
                        np.array(m_sizes[i],dtype=np.int32),
                        np.array(k_sizes[i],dtype=np.int32),
                        np.array(nnz_sizes[i],dtype=np.int32),
                        np.array(1,dtype=np.float32),xclbin_opts)
    gemx.sendDevBuf(A_handles[i])
    gemx.sendDevBuf(B_handles[i])
    gemx.sendDevBuf(C_handles[i])
    gemx.addUSPMVDevOp(A_handles[i],B_handles[i],C_handles[i],num_runs)

  gemx.executeDev()
  gemx.getDevBuf(C_handles[-1])
  test.multiply_and_cmp_uspmv(rows,cols,datas,m_sizes,k_sizes,B,C_list[-1])
Пример #2
0
def common_uspmv(rows,cols,datas,m_sizes,k_sizes,nnz_sizes, num_runs,vector_range):
  ddrWidth = int(xclbin_opts["GEMX_ddrWidth"])
  min_k = ddrWidth
  min_m = ddrWidth * int(xclbin_opts["GEMX_uspmvInterleaves"]) 
  for i in range(len(m_sizes)):
     m_sizes[i] = test.get_padded_size (m_sizes[i], min_m)
     k_sizes[i] = test.get_padded_size (k_sizes[i], min_m)
  print ("size:",m_sizes,k_sizes,"nnz:",nnz_sizes) 
  B = np.zeros((num_runs, k_sizes[i]), dtype=np.float32)
  test.fillMod(9, num_runs, k_sizes[i],B)
  B = B.astype(np.float32)
  C_list=[B]
  for i in range(len(m_sizes)):
    C = np.zeros ((num_runs, m_sizes[i]), dtype=np.float32)
    C_list.append(C)
    A = gemx.sendUSpMat(np.array(rows[i]).astype(np.uint16),
                        np.array(cols[i]).astype(np.uint16),
                        np.array(datas[i]),
                        np.array(m_sizes[i],dtype=np.int32),
                        np.array(k_sizes[i],dtype=np.int32),
                        np.array(nnz_sizes[i],dtype=np.int32),
                        np.array(1,dtype=np.float32),
                        xclbin_opts)  
    gemx.sendMat(C_list[i])
    gemx.sendMat(C_list[i+1])
    gemx.addUSPMVOp(A,C_list[i],C_list[i+1],num_runs)
  gemx.execute()
  gemx.clearInstrBuf()
  gemx.getMat(C_list[-1])
  test.multiply_and_cmp_uspmv(rows,cols,datas,m_sizes,k_sizes,B,C_list[-1])
Пример #3
0
def test_perf_fcn(m,
                  k,
                  n,
                  xclbin_opts,
                  post_scale=[1, 0],
                  A_range=32764,
                  B_range=32764,
                  bias_range=32764):
    ddrWidth = int(xclbin_opts["GEMX_ddrWidth"])
    m = test.get_padded_size(m,
                             int(xclbin_opts["GEMX_gemmMBlocks"]) * ddrWidth)
    k = test.get_padded_size(k,
                             int(xclbin_opts["GEMX_gemmKBlocks"]) * ddrWidth)
    n = test.get_padded_size(n,
                             int(xclbin_opts["GEMX_gemmNBlocks"]) * ddrWidth)
    if xclbin_opts["GEMX_dataType"] == "short":
        mat_A = np.random.randint(low=-A_range,
                                  high=A_range,
                                  size=(m, k),
                                  dtype=np.int16)
        mat_B = np.random.randint(low=-B_range,
                                  high=B_range,
                                  size=(k, n),
                                  dtype=np.int16)
        bias = []
        if bias_range != 0:
            bias = np.random.randint(low=-bias_range,
                                     high=bias_range,
                                     size=(m, n),
                                     dtype=np.int32)
        else:
            bias = np.zeros((m, n), dtype=np.int32, order='C')
        C_fpga = np.zeros((m, n), dtype=np.int16)
    else:
        mat_A = np.random.uniform(low=-128, high=128,
                                  size=(m, k)).astype(np.float32)
        mat_B = np.random.uniform(low=-128, high=128,
                                  size=(k, n)).astype(np.float32)
        bias = np.zeros((m, n), dtype=np.float32, order='C')
        C_fpga = np.zeros((m, n), dtype=np.float32)

    start_time = time.time()
    gemx.sendMat(mat_A)
    gemx.sendMat(mat_B)
    gemx.sendMat(C_fpga)
    gemx.sendMat(bias)
    gemx.addFCNOp(mat_A, mat_B, C_fpga, bias, post_scale[0], post_scale[1], 1,
                  0)
    gemx.execute()
    gemx.clearInstrBuf()
    gemx.getMat(C_fpga)
    end_time = time.time()
    total_operations = 2 * m * n * k + m * n * 3
    test.test_perf(end_time - start_time, total_operations, m, k, n, ddrWidth)
    test.multiply_and_cmp(C_fpga, mat_A, mat_B, bias, m, n, post_scale)
Пример #4
0
def test_multi_fcn(ins_count,
                   m_size,
                   k_size,
                   n_size,
                   post_scale=[1, 0],
                   A_range=32764,
                   B_range=32764):
    mat_A = []
    mat_C = []
    mat_bias = []
    ddrWidth = int(xclbin_opts["GEMX_ddrWidth"])
    for i in range(ins_count):
        m_size[i] = test.get_padded_size(
            m_size[i],
            int(xclbin_opts["GEMX_gemmMBlocks"]) * ddrWidth)
        k_size[i] = test.get_padded_size(
            k_size[i],
            int(xclbin_opts["GEMX_gemmKBlocks"]) * ddrWidth)
        n_size[i] = test.get_padded_size(
            n_size[i],
            int(xclbin_opts["GEMX_gemmNBlocks"]) * ddrWidth)
        mat_A.append(
            np.random.randint(low=-A_range,
                              high=A_range,
                              size=(m_size[i], k_size[i]),
                              dtype=np.int16))
        mat_bias.append(np.zeros((m_size[i], n_size[i]), dtype=np.int32))
        mat_C.append(
            np.zeros((m_size[i], n_size[i]), dtype=np.int16, order='C'))
    mat_B0 = np.random.randint(low=-B_range,
                               high=B_range,
                               size=(k_size[0], n_size[0]),
                               dtype=np.int16)
    for i in range(ins_count):
        gemx.sendMat(mat_A[i])
        gemx.sendMat(mat_C[i])
        gemx.sendMat(mat_bias[i])
    gemx.sendMat(mat_B0)
    gemx.addFCNOp(mat_A[0], mat_B0, mat_C[0], mat_bias[0], post_scale[0],
                  post_scale[1], 1, 0)
    gemx.addFCNOp(mat_A[1], mat_C[0], mat_C[1], mat_bias[1], post_scale[0],
                  post_scale[1], 1, 0)
    gemx.addFCNOp(mat_A[2], mat_C[1], mat_C[2], mat_bias[2], post_scale[0],
                  post_scale[1], 1, 0)
    gemx.addFCNOp(mat_A[3], mat_C[2], mat_C[3], mat_bias[3], post_scale[0],
                  post_scale[1], 1, 0)
    gemx.execute()
    gemx.clearInstrBuf()
    gemx.getMat(mat_C[0])
    gemx.getMat(mat_C[1])
    gemx.getMat(mat_C[2])
    gemx.getMat(mat_C[3])
    test.multiply_and_cmp(mat_C[3], mat_A[3], mat_C[2], mat_bias[3], m_size[3],
                          n_size[3], post_scale)