def gen_data(fm_shape, w_shape, pad, stride, dilation, bias, expect_file): conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} stride, pad, dilation = conv_param_prepare(conv_param) fm_shape, w_shape, out_shape = conv_shape_4d(fm_shape, w_shape, pad, stride, dilation) IN, IC, IH, IW = fm_shape WN, WC, WH, WW = w_shape x = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) if bias: b = random_gaussian((WN, ), miu=1, sigma=0.1).astype(np.float16) else: b = (np.array(np.zeros(WN))).astype(np.float16, copy=False) flag_w = os.environ.get("WRITE_TO_DISK", "No") if (flag_w == "No") and (os.path.exists(expect_file) == True): #read expect from file out = np.fromfile(expect_file, np.float16).reshape(out_shape) else: #compute expect data: out = conv_forward_naive(x.astype(np.float32), w.astype(np.float32), b.astype(np.float32), conv_param) out = out.astype(np.float16) if flag_w == "Yes": # write expect to file with open(expect_file, "w+") as file: out.tofile(file) file.close() return conv_tensor_4d_to_5d(x, w, b, out)
def gen_data(fm_shape, w_shape, pad, stride, bias): IN, IC, IH, IW = fm_shape C0 = 16 IC = ((IC + C0 - 1) // C0) * C0 WN, WC, WH, WW = w_shape WN = ((WN + C0 - 1) // C0) * C0 WC = ((WC + C0 - 1) // C0) * C0 #WN = mt.ceil(WN/C0)*C0 #WC = mt.ceil(WC/C0)*C0 ON = IN OC = WN OH = (IH + 2 * pad - WH) // stride + 1 OW = (IW + 2 * pad - WW) // stride + 1 # np.random.seed(2) # x = ( np.random.rand(IN, IC, IH, IW) * 1.0 ).astype(np.float16, copy=False) # w = ( np.random.rand(WN, WC, WH, WW) - 0.5 ).astype(np.float16, copy=False) # b = ( np.array(np.zeros(WN)) ).astype(np.float16, copy=False) x = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) if bias: b = np.random.rand(WN).astype(np.float16, copy=False) else: b = (np.array(np.zeros(WN))).astype(np.float16, copy=False) # b = np.arange(WN).astype(np.float16, copy=False) # x = np.random.uniform(1, 1, size=(IN, IC, IH, IW)).astype(np.float16) # w = np.random.uniform(1, 1, size=(WN, WC, WH, WW)).astype(np.float16) # b = (np.array(np.ones(WN))).astype(np.float16, copy=False) # b = (np.array(np.full(WN, 9))).astype(np.float16, copy=False) conv_param = {'stride': stride, 'pad': pad} out = conv_forward_naive(x, w, b, conv_param) ''' transpose to 5D - NC1HWC0 ''' feature = x.reshape(IN, IC // C0, C0, IH, IW).transpose(0, 1, 3, 4, 2).copy() ''' transpose to 5D - C1HWNC0 ''' filter = w.reshape(WN, WC // C0, C0, WH, WW).transpose(1, 3, 4, 0, 2).copy() ''' transpose to 5D - NC1HWC0 ''' output = out.reshape(ON, OC // C0, C0, OH, OW).transpose(0, 1, 3, 4, 2).copy() if fusion: zeros = np.full(output.shape, 0, output.dtype) output = np.maximum(zeros, output) return feature, filter, b, output
def gen_data(fm_shape, w_shape, pad, stride, dilation, strided=-1): IN, IC, IH, IW = fm_shape C0 = 16 IC = ((IC + C0 - 1) // C0) * C0 WN, WC, WH, WW = w_shape WN = ((WN + C0 - 1) // C0) * C0 WC = ((WC + C0 - 1) // C0) * C0 ON = IN OC = WN WHD = (WH - 1) * dilation + 1 WWD = (WW - 1) * dilation + 1 OH = (IH + 2 * pad - WHD) // stride + 1 OW = (IW + 2 * pad - WWD) // stride + 1 if (strided <= 1): x = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) else: x_tmp = random_gaussian( (IN, IC, (IH // strided + 1), (IW // strided + 1)), miu=1, sigma=0.1).astype(np.float16) x = np.full((IN, IC, IH, IW), 0, dtype=np.float16) for i0 in range(x_tmp.shape[0]): for i1 in range(x_tmp.shape[1]): for i2 in range(x_tmp.shape[2]): for i3 in range(x_tmp.shape[3]): x[i0, i1, i2 * strided, i3 * strided] = x_tmp[i0, i1, i2, i3] w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} out = conv_forward_naive(x, w, None, conv_param) # transpose to 5D - NC1HWC0 feature = x.reshape(IN, IC // C0, C0, IH, IW).transpose(0, 1, 3, 4, 2).copy() # transpose to 5D - C1HWNC0 filter = w.reshape(WN, WC // C0, C0, WH, WW).transpose(1, 3, 4, 0, 2).copy() # transpose to 5D - NC1HWC0 output = out.reshape(ON, OC // C0, C0, OH, OW).transpose(0, 1, 3, 4, 2).copy() return feature, filter, output
def gen_data(fm_shape, w_shape, pad, stride, dilation, bias): conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} stride, pad, dilation = conv_param_prepare(conv_param) fm_shape, w_shape, out_shape = conv_shape_4d(fm_shape, w_shape, pad, stride, dilation) IN, IC, IH, IW = fm_shape WN, WC, WH, WW = w_shape x = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) b = (np.array(np.zeros(WN))).astype(np.float16, copy=False) out = conv_forward_naive(x.astype(np.float32), w.astype(np.float32), b, conv_param) feature, filter, bb, output = conv_tensor_4d_to_5d(x, w, b, out) return feature, filter, bb, output
def gen_data(fm_shape, w_shape, pad, stride, dilation, bias): IN, IC, IH, IW = fm_shape C0 = 16 IC = ((IC + C0 - 1) // C0) * C0 WN, WC, WH, WW = w_shape WN = ((WN + C0 - 1) // C0) * C0 WC = ((WC + C0 - 1) // C0) * C0 ON = IN OC = WN WHD = (WH - 1) * dilation + 1 WWD = (WW - 1) * dilation + 1 OH = (IH + 2 * pad - WHD) // stride + 1 OW = (IW + 2 * pad - WWD) // stride + 1 x = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) x_add = x + 1.0 if bias: b = np.random.rand(WN).astype(np.float16, copy=False) else: b = (np.array(np.zeros(WN))).astype(np.float16, copy=False) conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} out = conv_forward_naive(x_add, w, b, conv_param) ''' transpose to 5D - NC1HWC0 ''' feature = x.reshape(IN, IC // C0, C0, IH, IW).transpose(0, 1, 3, 4, 2).copy() ''' transpose to 5D - C1HWNC0 ''' filter = w.reshape(WN, WC // C0, C0, WH, WW).transpose(1, 3, 4, 0, 2).copy() ''' transpose to 5D - NC1HWC0 ''' output = out.reshape(ON, OC // C0, C0, OH, OW).transpose(0, 1, 3, 4, 2).copy() return feature, filter, b, output
def benchmark(x, w, bias, gamma, beta, running_mean, running_var, other_branch_data, pad, stride, dilation, momentum, eps, has_add, has_relu): """benchmark function""" conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} out = conv_forward_naive(x.astype(np.float32), w.astype(np.float32), bias, conv_param) _, _, _, conv_expect = conv_tensor_4d_to_5d(x, w, bias, out) axes = (0, 2, 3) conv_mean = np.mean(conv_expect.astype(np.float64), axis=axes, keepdims=True).astype(np.float32) mean_new = momentum * running_mean + (1 - momentum) * conv_mean var = np.var(conv_expect.astype(np.float64), axis=axes, keepdims=True).astype(np.float32) var_new = momentum * running_var + (1 - momentum) * var res = bn_benchmark(conv_expect, gamma, beta, running_mean, running_var, momentum, eps, 3) return conv_expect.astype(x.dtype), res[0].astype(x.dtype), \ mean_new, var_new, conv_mean, var
def gen_data(fm_shape, w_shape, pad, stride, dilation, bias): if isinstance(stride, int): stride = [stride] * 2 elif isinstance(stride, (list, tuple)) and 1 == len(stride): stride = list(stride) * 2 elif isinstance(stride, (list, tuple)) and 2 == len(stride): pass else: raise RuntimeError('stride para illegal !!!') if isinstance(pad, int): pad = [pad] * 4 elif isinstance(pad, (list, tuple)) and 1 == len(pad): pad = list(pad) * 4 elif isinstance(pad, (list, tuple)) and 4 == len(pad): pass else: raise RuntimeError('pad para illegal !!!') if isinstance(dilation, int): dilation = [dilation] * 2 elif isinstance(dilation, (list, tuple)) and 1 == len(dilation): dilation = list(dilation) * 2 elif isinstance(dilation, (list, tuple)) and 2 == len(dilation): pass else: raise RuntimeError('dilation para illegal !!!') S_h, S_w = stride P_top, P_bottom, P_left, P_right = pad D_h, D_w = dilation IN, IC, IH, IW = fm_shape C0 = 16 IC = ((IC + C0 - 1) // C0) * C0 WN, WC, WH, WW = w_shape WN = ((WN + C0 - 1) // C0) * C0 WC = ((WC + C0 - 1) // C0) * C0 ON = IN OC = WN WHD = (WH - 1) * D_h + 1 WWD = (WW - 1) * D_w + 1 OH = (IH + P_top + P_bottom - WHD) // S_h + 1 OW = (IW + P_left + P_right - WWD) // S_w + 1 x1 = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) x2 = random_gaussian((IN, IC, IH, IW), miu=1, sigma=0.1).astype(np.float16) x = fmap_data = np.multiply(x1, x2) w = random_gaussian((WN, WC, WH, WW), miu=0.5, sigma=0.01).astype(np.float16) if bias: b = np.random.rand(WN).astype(np.float16, copy=False) else: b = (np.array(np.zeros(WN))).astype(np.float16, copy=False) conv_param = {'stride': stride, 'pad': pad, 'dilation': dilation} out = conv_forward_naive(x, w, b, conv_param) ''' transpose to 5D - NC1HWC0 ''' feature1 = x1.reshape(IN, IC // C0, C0, IH, IW).transpose(0, 1, 3, 4, 2).copy() feature2 = x2.reshape(IN, IC // C0, C0, IH, IW).transpose(0, 1, 3, 4, 2).copy() ''' transpose to 5D - C1HWNC0 ''' filter = w.reshape(WN, WC // C0, C0, WH, WW).transpose(1, 3, 4, 0, 2).copy() filter = filter.reshape(WC // C0 * WH * WW, WN // 16, 16, C0) bb = b.reshape(1, WN // 16, 1, 1, 16) ''' transpose to 5D - NC1HWC0 ''' output = out.reshape(ON, OC // C0, C0, OH, OW).transpose(0, 1, 3, 4, 2).copy() return feature1, feature2, filter, bb, output