class fcn_dataGenerator: def __init__(self, dirname): self.dataGen = DataGenerator() self.dataGen.setRange(-1, 1) self.dataGen.setDataType(np.float32) self.dirname = dirname def setWeights(self, sizes): self.weights = [] self.bias = [] self.sizes = sizes self.number_of_layers = int(len(sizes) / 2) for i in range(self.number_of_layers): size_k = sizes[2 * i] size_n = sizes[2 * i + 1] self.weights.append(self.dataGen.matrix((size_k, size_n))) self.bias.append(self.dataGen.matrix((1, size_n))) def compute(self, batch_size, number_of_models): self.dirname = self.dirname + "_" + str(batch_size) os.makedirs(self.dirname, exist_ok=True) size_m = batch_size size_n = self.sizes[0] self.input = self.dataGen.matrix((size_m, size_n)) self.input.tofile(os.path.join(self.dirname, 'input.dat')) print('input', self.input.shape) for model in range(number_of_models): self.C = [self.input] for i in range(self.number_of_layers): local_bias = np.tile(self.bias[i], (size_m, 1)) c = (np.matmul(self.C[i], self.weights[i], dtype=np.float32) + local_bias).astype(np.float32) # sigmoid c = 1 / (1 + np.exp(-c)) self.C.append(c) self.weights[i].tofile( os.path.join( self.dirname, 'model' + str(model) + '_weight' + str(i) + '.dat')) print('weight', i, self.weights[i].shape) self.bias[i].tofile( os.path.join( self.dirname, 'model' + str(model) + '_bias' + str(i) + '.dat')) print('bias', i, self.bias[i].shape) self.C[-1].tofile( os.path.join(self.dirname, 'model' + str(model) + '_goldenC.dat')) print(self.C[-1]) print('golden_c', self.C[-1].shape)
class Test: def cmp(self, A, B): if np.array_equal(A, B): print("Success!\n") else: print("not equal!") np.savetxt("A.np", A, fmt="%d") np.savetxt("B.np", B, fmt="%d") sys.exit(1) def cmpWithinTolerance(self, A, B): if np.allclose(A, B, 1e-3, 1e-5): print("Success!\n") else: print(A.shape, B.shape) np.savetxt("C.np", A, fmt="%f") np.savetxt("C_cpu.np", B, fmt="%f") diff = np.isclose(A.flatten(), B.flatten(), 1e-3, 1e-5) countDiff = diff.shape[0] - np.count_nonzero(diff) print("not equal, number of mismatches = ", countDiff) mismatch = ((diff == 0).nonzero()) print("mismatches are in ", mismatch[0]) for i in mismatch[0]: print(A.flatten()[i], " is different from ", B.flatten()[i]) sys.exit(1) def get_padded_size(self, size, min_size): size_padded = int(math.ceil(np.float32(size) / min_size) * min_size) return size_padded def test_basic_gemm(self, m, k, n, xclbin_opts, idxKernel=0, idxDevice=0, minRange=-16384, maxRange=16384): if xclbin_opts['BLAS_dataType'] == 'short': dtype = np.int16 elif xclbin_opts['BLAS_dataType'] == 'float': dtype = np.float32 else: raise TypeError("type", xclbin_opts["BLAS_dataType"], "not supported") ddrWidth = int(xclbin_opts["BLAS_ddrWidth"]) padded_m = self.get_padded_size( m, int(xclbin_opts["BLAS_gemmMBlocks"]) * ddrWidth) padded_k = self.get_padded_size( k, int(xclbin_opts["BLAS_gemmKBlocks"]) * ddrWidth) padded_n = self.get_padded_size( n, int(xclbin_opts["BLAS_gemmNBlocks"]) * ddrWidth) self.dataGen = DataGenerator() self.dataGen.setRange(minRange, maxRange) self.dataGen.setDataType(dtype) A = self.dataGen.matrix((padded_m, padded_k)) B = self.dataGen.matrix((padded_k, padded_n)) C = self.dataGen.matrix((padded_m, padded_n)) golden_C = np.matmul(A, B, dtype=dtype) + C xfblas.sendMat(A, idxKernel, idxDevice) xfblas.sendMat(B, idxKernel, idxDevice) xfblas.sendMat(C, idxKernel, idxDevice) xfblas.gemmOp(A, B, C, idxKernel, idxDevice) xfblas.getMat(C, idxKernel, idxDevice) if dtype == np.int16: self.cmp(C, golden_C) else: self.cmpWithinTolerance(C, golden_C)
class fcn_dataGenerator: def __init__(self, dirname): self.dataGen = DataGenerator() self.dataGen.setRange(-1, 1) self.dataGen.setDataType(np.float32) self.dirname = dirname def setWeights(self, sizes): self.weights = [] self.bias = [] self.sizes = sizes self.number_of_layers = int(len(sizes) / 2) for i in range(self.number_of_layers): size_k = sizes[2 * i] size_n = sizes[2 * i + 1] self.weights.append(self.dataGen.matrix((size_k, size_n))) self.bias.append(self.dataGen.matrix((1, size_n))) def compute(self, weights_size, batch_size, number_of_models, number_of_inputs, activation): self.dirname = self.dirname + "_" + str(batch_size) os.makedirs(self.dirname, exist_ok=True) size_m = batch_size size_n = weights_size[0] self.inputs = [] # generate random inputs for inp in range(number_of_inputs): input = self.dataGen.matrix((size_m, size_n)) input.tofile( os.path.join( self.dirname, 'mat_input_' + str(inp) + '_' + str(size_m) + '.bin')) self.inputs.append(input) for model in range(number_of_models): # generate random weights self.setWeights(weights_size) # save weights and bias to file for i in range(self.number_of_layers): self.weights[i].tofile( os.path.join( self.dirname, 'matW' + str(i + 1) + '_' + str(model) + '.bin')) self.bias[i].tofile( os.path.join( self.dirname, 'matb' + str(i + 1) + '_' + str(model) + '.bin')) for inp in range(number_of_inputs): self.C = [self.inputs[inp]] for i in range(self.number_of_layers): local_bias = np.tile(self.bias[i], (size_m, 1)) c = (np.matmul( self.C[i], self.weights[i], dtype=np.float32) + local_bias).astype(np.float32) # sigmoid if i != (self.number_of_layers - 1): if activation == 'tansig': c = 2 / (1 + np.exp(-2 * c)) - 1 elif activation == 'relu': c[c < 0] = 0 else: c = 1 / (1 + np.exp(-c)) self.C.append(c) self.C[-1].tofile( os.path.join( self.dirname, 'mat_sigmoid_output_input_' + str(inp) + '_model_' + str(model) + '.bin'))
class gemm_dataGenerator: def __init__(self, dirname, dataType): self.dataGen = DataGenerator() if dataType == 'float': self.dataGen.setDataType(np.float32) self.dtype = np.float32 self.dataGen.setRange(-100, 100) elif dataType == 'short' or dataType == 'int16_t': self.dataGen.setDataType(np.int16) self.dtype = np.int16 self.dataGen.setRange(-100, 100) elif dataType == 'int' or dataType == 'int32_t': self.dataGen.setDataType(np.int32) self.dtype = np.int32 self.dataGen.setRange(-100, 100) elif dataType == 'int8_t': self.dataGen.setDataType(np.int8) self.dtype = np.int8 self.dataGen.setRange(-100, 100) elif dataType == 'uint8_t': self.dataGen.setDataType(np.uint8) self.dtype = np.uint8 self.dataGen.setRange(0, 100) elif dataType == 'uint16_t': self.dataGen.setDataType(np.uint16) self.dtype = np.uint16 self.dataGen.setRange(0, 100) elif dataType == 'uint32_t': self.dataGen.setDataType(np.uint32) self.dtype = np.uint32 self.dataGen.setRange(0, 100) else: print('dataType not supported') self.dirname = dirname def compute(self, args): os.makedirs(self.dirname, exist_ok=True) size_m = args.m size_n = args.n size_k = args.k matA = self.dataGen.matrix((size_m, size_k)) matTA = np.transpose(matA) matTA.tofile(os.path.join(self.dirname, 'matA.bin')) matB = self.dataGen.matrix((size_k, size_n)) matB.tofile(os.path.join(self.dirname, 'matB.bin')) matC = self.dataGen.matrix((size_m, size_n)) matC.tofile(os.path.join(self.dirname, 'matC.bin')) matC = (args.alpha * np.matmul(matA, matB, dtype=self.dtype) + args.beta * matC).astype(self.dtype) matC.tofile(os.path.join(self.dirname, 'golden.bin'))