def test_csr2sertilp(self): mat = np.array([ [1,0,2,0,3,0], [4,0,5,0,0,0], [0,0,0,6,7,0], [0,0,0,0,0,8], [21,0,22,0,23,0], [24,0,25,0,0,0], [0,0,0,26,27,0], [0,0,0,0,0,28] ]) sp_mat = sp.csr_matrix(mat) row_len_right = np.array([1,1,1,1,1,1,1,1]) sl_start_right = np.array([0,16,32]) val_right = np.array([1.0,2.0,4.0,5.0,6.0,7.0,8.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,22.0,24.0,25.0,26.0,27.0,28.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]) #collumns taken directly from dataset, col_vs_right = np.array([1,3,1,3,4,5,6,0,5,0,0,0,0,0,0,0,1,3,1,3,4,5,6,0,5,0,0,0,0,0,0,0]) #but in sparse format collumns start from 0 so we have to substract 1 col_right = col_vs_right-1 col_right[col_right==-1]=0 val,col,row_len,sl_start=spf.csr2sertilp(sp_mat, threadsPerRow=2, prefetch=2, sliceSize=4, minAlign=2*4) self.assertTrue(np.allclose(row_len,row_len_right), 'sliced ellpack row length arrays are not equal') self.assertTrue(np.allclose(sl_start,sl_start_right), 'sliced ellpack slice start arrays are not equal') self.assertTrue(np.allclose(val,val_right), 'sliced ellpack values arrays are not equal') self.assertTrue(np.allclose(col,col_right), 'sliced ellpack collumns arrays are not equal')
print 'Total sum:',resultsEll.sum() print "Error to CPU:",np.square(resultsEll-kij).sum() print resultsEll[0:1000:skip] #print results ##------------------------------------------ # SERTILP gpu kernel sliceSize=64 threadsPerRow=2 prefetch=2 minAlign=64 #8 v,c,r,ss=spf.csr2sertilp(X, threadsPerRow=threadsPerRow, prefetch=prefetch, sliceSize=sliceSize, minAlign=minAlign) sd=rbf.Diag self_dot = rbf.Xsquare results = np.zeros(2*num_el,dtype=np.float32) kernel_file = "sertilpMulti2Col.cu" with open (kernel_file,"r") as CudaFile: data = CudaFile.read(); #compile module #module = SourceModule(data,cache_dir='./nvcc_cache',keep=True,no_extern_c=True) module = SourceModule(data,keep=True,no_extern_c=True,options=["--ptxas-options=-v"])