def test_with_place(place): out_grad = np.random.random_sample(self.x.shape).astype(np.float32) x_grad = out_grad sum_axis = range(0, len(self.x.shape)) del sum_axis[self.axis] y_grad = np.sum(out_grad, axis=tuple(sum_axis)) var_dict = locals() var_dict['y'] = self.y var_dict['x'] = self.x var_dict['out'] = self.out var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['out@GRAD'] = out_grad var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) elementwise_add_op = block.append_op( type="elementwise_add", inputs={ "X": block.var('x'), "Y": block.var('y'), }, outputs={"Out": block.var('out'), }, attrs={"axis": self.axis, }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( elementwise_add_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'y', 'out@GRAD'] }, fetch_list=['x@GRAD', 'y@GRAD']) self.__assert_close(x_grad, out[0], "x@GRAD") self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4)
def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out, out_grad, x_grad): def __assert_close(tensor, np_array, msg, atol=1e-4): test_case.assertTrue( np.allclose(np.array(tensor), np_array, atol=atol), msg) place = core.CPUPlace() var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad} var_names = list(var_dict.keys()) ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype=np.float32, shape=ground_truth[name].shape) op = block.append_op(type=op_type, inputs={ 'X': block.var('x'), }, outputs={'Out': block.var('out')}, attrs={'use_mkldnn': True}) # Generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode('ascii')) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode('ascii')) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) # Do at least 2 iterations for i in range(2): out = exe.run( program, feed={name: var_dict[name] for name in ['x', 'out@GRAD']}, fetch_list=['x@GRAD', 'out']) __assert_close(x_grad, out[0], 'x@GRAD')
def _calc_grad_output(self, place, fwd_res, grad_op_desc, enable_inplace=None): """Calculate grad_output for given grad_op_desc. since we don`t really check gradient accuracy, but check the consistency when using and not using inplace, we use fwd outs (also inputs sometimes) to construct grad inputs. Args: place (CPUPlace | CUDAPlace): The place where the op runs. fwd_res (tuple): The outputs of its forward op, in the same form as returns of _calc_outputs() when for_inplace_test is True. i.e., tuple(fwd_outs, fwd_fetch_list, fwd_feed_map, fwd_program, fwd_op_desc). grad_op_desc (OpDesc): The OpDesc of grad op. enable_inplace (bool): Enable inplace or not. Returns: res (tuple(outs, fetch_list, feed_map, program, op_desc)): The results of given grad_op_desc. """ fwd_outs, fwd_fetch_list, fwd_feed_map, fwd_program, fwd_op_desc = fwd_res grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( fwd_op_desc, set(), []) grad_program = self._construct_grad_program_from_forward( fwd_program, grad_op_desc, op_grad_to_var) grad_feed_map = self._construct_grad_feed_map_from_forward( place, fwd_res, grad_op_desc, op_grad_to_var) grad_fetch_list = grad_op_desc.output_arg_names() exe = Executor(place) program = grad_program if enable_inplace is not None: build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = enable_inplace compiled_program = fluid.CompiledProgram( grad_program).with_data_parallel(loss_name="", build_strategy=build_strategy, places=place) program = compiled_program outs = exe.run(program, feed=grad_feed_map, fetch_list=grad_fetch_list, return_numpy=False) return outs, grad_fetch_list, grad_feed_map, grad_program, grad_op_desc
def _dfs_grad_op(op_desc, fwd_op_desc=None): visited_ops.append(op_desc.type()) has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type()) has_grad_op_maker = fluid.core.has_grad_op_maker(op_desc.type()) has_infer_inplace_in_grad_descendants = False if not has_grad_op_maker: has_infer_inplace_in_descendants = False else: # get grad_op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( op_desc, set(), []) if not grad_op_desc_list: has_infer_inplace_in_grad_descendants = False else: for i, grad_op_desc in enumerate(grad_op_desc_list): if grad_op_desc.type( ) not in visited_ops and _dfs_grad_op( grad_op_desc, fwd_op_desc=op_desc): has_infer_inplace_in_grad_descendants = True if has_infer_inplace or has_infer_inplace_in_grad_descendants: need_run_ops.append((op_desc, fwd_op_desc)) return True else: return False
def test_with_place(place, shape): epsilon = self.epsilon n, c, h, w = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] mean_shape = [n * c] np.random.seed() x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean, variance = self.set_global_mean_var(mean_shape, x) d_y = np.random.random_sample(shape).astype(np.float32) y, saved_mean, variance_tmp = _reference_instance_norm_naive( x, scale, bias, epsilon, mean, variance) saved_variance = 1 / np.sqrt(variance_tmp + epsilon) d_x, d_scale, d_bias = _reference_instance_norm_grad( x, d_y, scale, saved_mean, saved_variance, epsilon) var_dict = locals() var_dict['y@GRAD'] = d_y var_dict['x@GRAD'] = d_x var_dict['scale@GRAD'] = d_scale var_dict['bias@GRAD'] = d_bias var_names = [ 'x', 'scale', 'bias', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) in_op = block.append_op( type="instance_norm", inputs={ "X": block.var("x"), "Scale": block.var("scale"), "Bias": block.var("bias"), }, outputs={ "Y": block.var("y"), "SavedMean": block.var("saved_mean"), "SavedVariance": block.var("saved_variance") }, attrs={"epsilon": epsilon, }) block.create_var(name="y@GRAD", dtype='float32', shape=y.shape) grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( in_op.desc, self.no_grad_set, []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=self.fetch_list) for id, name in enumerate(self.fetch_list): self.__assert_close(var_dict[name], out[id], name) print("op test forward passes: ", str(place))
def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_grad = np.random.random_sample(shape).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share the same memory "VarianceOut": block.var('variance'), # share the same memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": momentum, "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean_out, out[1], "mean") self.__assert_close(variance_out, out[2], "variance", 1e-3) self.__assert_close(saved_mean, out[3], "saved_mean") self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) self.__assert_close(x_grad, out[5], "x_grad") self.__assert_close(scale_grad, out[6], "scale_grad") self.__assert_close(bias_grad, out[7], "bias_grad") print "op test forward passed: ", str(place), data_layout
def test_train(self): y_grad_np = np.random.random_sample(self.shape).astype(self.dtype) y_np, mean_out_np, variance_out_np, saved_mean_np, saved_variance_np, x_grad_np, scale_grad_np, bias_grad_np = ref_batch_norm_train( self.x_np, y_grad_np, self.scale_np, self.bias_np, self.mean_np, self.variance_np, self.momentum, self.epsilon, self.data_layout) inputs = { 'X': self.x_np, 'Scale': self.scale_np, 'Bias': self.bias_np, 'Mean': self.mean_np, 'Variance': self.variance_np, 'Y@GRAD': y_grad_np } outputs = { 'Y': y_np, 'Mean': mean_out_np, 'Variance': variance_out_np, 'SavedMean': saved_mean_np, 'SavedVariance': saved_variance_np, 'X@GRAD': x_grad_np, 'Scale@GRAD': scale_grad_np, 'Bias@GRAD': bias_grad_np } attrs = { 'momentum': self.momentum, 'epsilon': self.epsilon, 'is_test': False, 'data_layout': self.data_layout, 'use_mkldnn': False, 'fuse_with_relu': False, 'use_global_stats': False, } paddle.enable_static() program = paddle.static.Program() with paddle.static.program_guard(program): block = program.global_block() # Set inputs, outputs and attributes to the forward op of batch_norm input_vars = {} for var_name in inputs: arg_name = var_name np_value = inputs[var_name] if not block.has_var(var_name): block.create_var(name=var_name, shape=np_value.shape, dtype=np_value.dtype) input_vars[arg_name] = block.var(var_name) fetch_list = [] output_vars = {} for var_name in outputs: arg_name = var_name np_value = outputs[var_name] if not block.has_var(var_name): block.create_var(name=var_name, shape=np_value.shape, dtype=np_value.dtype) if var_name == 'Mean': arg_name = 'MeanOut' # Share memory if var_name == 'Variance': arg_name = 'VarianceOut' # Share memory output_vars[arg_name] = block.var(var_name) fetch_list.append(var_name) batch_norm_op = block.append_op(type="batch_norm", inputs=input_vars, outputs=output_vars, attrs=attrs) # Generate the backward op_desc of batch_norm grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( batch_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) program._sync_with_cpp() exe = paddle.static.Executor(self.place) outs = exe.run(program, feed=inputs, fetch_list=fetch_list) for id, name in enumerate(fetch_list): self.assertEqual( np.allclose(outputs[name], outs[id], atol=1e-4), True)
def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_grad = np.random.random_sample(shape).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share the same memory "VarianceOut": block.var('variance'), # share the same memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": momentum, "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean_out, out[1], "mean") self.__assert_close(variance_out, out[2], "variance", 1e-3) self.__assert_close(saved_mean, out[3], "saved_mean") self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) self.__assert_close(x_grad, out[5], "x_grad") self.__assert_close(scale_grad, out[6], "scale_grad") self.__assert_close(bias_grad, out[7], "bias_grad") print "op test forward passed: ", str(place), data_layout
def test_with_place(place, data_layout, shape): # attr epsilon = self.epsilon momentum = self.momentum if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean, variance = self.set_mean_variance(scale_shape, x, data_layout) y_grad = np.random.random_sample(shape).astype(np.float32) momentum_var = np.array([momentum]).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['scale@GRAD'] = scale_grad var_dict['bias@GRAD'] = bias_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance', 'momentum_var' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) inputs = { "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') } attrs = { "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn, "fuse_with_relu": self.fuse_with_relu, "use_global_stats": self.use_global_stats } if self.use_momentum_variable: inputs['MomentumTensor'] = block.var('momentum_var') else: attrs['momentum'] = momentum outputs = { "Y": block.var('y'), "MeanOut": block.var('mean'), # share memory "VarianceOut": block.var('variance'), # share memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') } block.create_var(name="reserve_space", dtype='float32') outputs["ReserveSpace"] = block.var('reserve_space') bn_op = block.append_op( type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, self.no_grad_set, []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in [ 'x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD', 'momentum_var' ] }, fetch_list=self.fetch_list) for id, name in enumerate(self.fetch_list): if name == 'variance': self.__assert_close( var_dict[name], out[id], name, atol=1e-3) continue self.__assert_close(var_dict[name], out[id], name) print("op test forward passed: ", str(place), data_layout)
def check_if_mkldnn_batchnorm_primitives_exist_in_bwd(test_case, var_dict, place, shape, data_layout): var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share memory "VarianceOut": block.var('variance'), # share memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": test_case.momentum, "epsilon": test_case.epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": test_case.use_mkldnn, "fuse_with_relu": test_case.fuse_with_relu, "use_global_stats": test_case.use_global_stats }) block.create_var(name='y@GRAD', dtype='float32', shape=var_dict['y'].shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, test_case.no_grad_set, []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) # Do at least 2 iterations for i in range(2): out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=test_case.fetch_list) for id, name in enumerate(test_case.fetch_list): __assert_close(test_case, var_dict[name], out[id], name) print("MKLDNN op test forward passed: ", str(place), data_layout)
def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) layer_norm_op = block.append_op( type="layer_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), }, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3) self.__assert_close(bias_grad, out[5], "bias_grad")
def test_get_set(self): scope = core.Scope() program = fluid.Program() block = program.global_block() input_arr = block.create_var( name="tmp_lod_tensor_array", type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) input_arr.persistable = True input_arr_var = scope.var('tmp_lod_tensor_array') input_tensor_array = input_arr_var.get_lod_tensor_array() self.assertEqual(0, len(input_tensor_array)) cpu = core.CPUPlace() for i in range(10): t = core.LoDTensor() if i == 0: t.set(numpy.array([[i], [i]], dtype='float32'), cpu) else: t.set(numpy.array([[i]], dtype='float32'), cpu) input_tensor_array.append(t) self.assertEqual(10, len(input_tensor_array)) random_grad = numpy.random.random_sample([11]).astype(numpy.float32) y_out = block.create_var(name="Out") y_out.persistable = True y_out_index = block.create_var(name="OutIndex") y_out_index.persistable = True y_grad_arr = block.create_var( name='Out@GRAD', dtype='float32', shape=[11]) y_grad_arr.persistable = True y_grad = scope.var('Out@GRAD') y_grad_tensor = y_grad.get_tensor() y_grad_tensor.set(random_grad, cpu) op = block.append_op( type=self.op_type, inputs={"X": input_arr}, outputs={"Out": y_out, "OutIndex": y_out_index}, attrs=self.attrs) out_grad = block.create_var( name="tmp_lod_tensor_array@GRAD", type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) out_grad.persistable = True grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) fetch_list = [] fetch_list.append(block.var('Out')) fetch_list.append(block.var('OutIndex')) exe = fluid.Executor(fluid.CPUPlace()) out = exe.run(program, fetch_list=fetch_list, scope=scope) #print ("index: ", numpy.array(out[1])) # test forward tensor_res = numpy.array(out[0]) tensor_res_out_idx = numpy.array(out[1]) tensor_gt = numpy.array( [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32') self.assertEqual(len(tensor_res), len(tensor_gt)) self.assertEqual(len(tensor_res_out_idx), 10) for i in range(len(tensor_res)): self.assertEqual(tensor_res[i], tensor_gt[i]) for i in range(len(tensor_res_out_idx)): if i == 0: self.assertEqual(tensor_res_out_idx[i], 2) else: self.assertEqual(tensor_res_out_idx[i], 1) # test backward grad_tensor = scope.var('tmp_lod_tensor_array@GRAD') grad_tensor_array = grad_tensor.get_lod_tensor_array() self.assertEqual(10, len(grad_tensor_array)) for i in range(len(grad_tensor_array)): if i == 0: self.assertEqual( numpy.array(grad_tensor_array[i])[0], numpy.array(random_grad[i])) self.assertEqual( numpy.array(grad_tensor_array[i])[1], numpy.array(random_grad[i + 1])) if i == 1: self.assertEqual( numpy.array(grad_tensor_array[i]), numpy.array(random_grad[i + 1]))
def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) layer_norm_op = block.append_op( type="layer_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), }, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3) self.__assert_close(bias_grad, out[5], "bias_grad")