def test_calc_gradient(self): x = layers.create_parameter(dtype="float32", shape=[5, 10]) y = layers.create_parameter(dtype="float32", shape=[10, 8]) mul_out = layers.mul(x=x, y=y) mean_out = layers.mean(mul_out) a = calc_gradient(mean_out, mul_out) b = calc_gradient(mean_out, x) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
def test_calc_gradient(self): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): x = layers.create_parameter(dtype="float32", shape=[5, 10]) y = layers.create_parameter(dtype="float32", shape=[10, 8]) mul_out = layers.mul(x=x, y=y) mean_out = layers.mean(mul_out) a = calc_gradient(mean_out, mul_out) b = calc_gradient(mean_out, x) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) exe.run(main, feed={}, fetch_list=[a, b])
loss = content_weight * content_loss(base_image_features, combination_features) feature_layers = [ 'block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1' ] for layer_name in feature_layers: layer_features = outputs_dict[layer_name] style_reference_features = layer_features[1] combination_features = layer_features[2] sl = style_loss(style_reference_features, combination_features) loss += (style_weight / len(feature_layers)) * sl loss += total_variation_weight * total_variation_loss(combination_data) # get the gradients of the generated image wrt the loss grads = calc_gradient(loss, combination_data) fetch = [loss.name] if isinstance(grads, (list, tuple)): fetch.append(grads[0].name) else: fetch.append(grads.name) optimizer = fluid.optimizer.SGD(0.0) optimizer.backward(loss=loss) test_program = fluid.default_main_program() exe = fluid.Executor(fluid.CUDAPlace(1)) exe.run(fluid.default_startup_program())
def double_grad_check(x, y, x_init=None, y_grads=None, place=None, program=None, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True): """ Check gradients of gradients. This function will append backward to the program before second order gradient check. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. y_grads (numpy.array|list[numpy.array]|None): the gradients with respect to y. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). eps (float): perturbation for finite differences. atol (float): absolute tolerance. rtol (float): relative tolerance. raise_exception (bool): whether to raise an exception if the check fails. Default is True. Returns: True if all differences satisfy numpy.allclose condition. """ # check input arguments x = _as_list(x) for v in x: v.stop_gradient = False v.persistable = True y = _as_list(y) if program is None: program = fluid.default_main_program() if y_grads is None: scope = fluid.executor.global_scope() y_grads = [] y_grads_init = [] for yi in y: dyi_name = _append_grad_suffix_(yi.name) np_type = dtype_to_np_dtype(yi.dtype) dy = program.global_block().create_var(name=dyi_name, shape=yi.shape, dtype=np_type, persistable=True) dy.stop_gradient = False v = np.random.random(size=yi.shape).astype(np_type) set_var_in_scope(scope, place, dyi_name, v) y_grads.append(dy) y_grads_init.append(v) else: y_grads = _as_list(y_grads) y_grads_init = [ var_to_np_array_in_scope(scope, place, v.name) for v in y_grads ] # append first order grads target_grads = calc_gradient(y, x, y_grads) # y_grads are the input of first-order backward, # so, they are also the input of second-order backward. x += y_grads x_init = _as_list(x_init) x_init += y_grads_init grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)
def _compute_analytical_jacobian(program, x, y, place, scope): """Computes the analytical Jacobian for dy/dx. Args: program (Program): a Program with forward pass. x (Variable|list[Variable]): a variable or list of variable y (Variable): the target variable. place (fluid.CPUPlace or fluid.CUDAPlace): the device. scope (Scope): the scope used to run program. Returns: A list of 2-D numpy array. The list length is len(x). Each 2-D numpy array represents the Jacobian for dy/dx_i. It has "xi_size" rows and "dy_size" columns where "x_size" is the number of elements in x_i and "dy_size" is the number of elements in y. """ if not isinstance(y, fluid.framework.Variable): raise TypeError('y is not Variable') dy_name = _append_grad_suffix_(y.name) np_type = dtype_to_np_dtype(y.dtype) # create dy Variable in Program dy = program.global_block().create_var(name=dy_name, shape=y.shape, dtype=np_type, persistable=True) # append backward dx = calc_gradient(y, x, dy) # init dy tensor in scope value = np.zeros(y.shape, dtype=np_type) dy_t = set_var_in_scope(scope, place, dy_name, value) exe = fluid.Executor(place) y_size = _product(y.shape) x = _as_list(x) jacobian = make_jacobian(x, y_size, np_type) # filter None in dx for DX/DY may be None in kernel # only fetch not None dx in exe.run filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None] filted_idx, filted_dx = zip(*filted) for i in six.moves.xrange(y_size): _set_item(dy_t, i, 1, np_type) dx_res = exe.run(program, scope=scope, fetch_list=filted_dx) for j in six.moves.xrange(len(filted_dx)): dx_idx = filted_idx[j] if dx_res[j] is not None: jacobian[dx_idx][:, i] = dx_res[j].flatten() else: jacobian[dx_idx][:, i] = np.zeros(dx[dx_idx].shape, dtype=np_type).flatten() _set_item(dy_t, i, 0, np_type) return jacobian
def net(self, input, class_dim=1000): x = self.inception_stem(input) for i in range(4): x = self.inceptionA(x, name=str(i + 1)) if i == 3: x1 = x x = self.reductionA(x1) x1_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.0) x2_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) x3_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.5) x4_coeff = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.2) ep = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1e-10) for i in range(7): x = self.inceptionB(x, name=str(i + 1)) if i == 0: x2 = x if i == 4: x3 = x x = self.reductionB(x) for i in range(3): x = self.inceptionC(x, name=str(i + 1)) if i == 0: x4 = x pool = fluid.layers.pool2d( input=x, pool_size=8, pool_type='avg', global_pooling=True) # loss scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x1), 'float32')) loss = x1_coeff * fluid.layers.reduce_sum(fluid.layers.square(x1[:, :, 2: -2, 2: -2])) / scaling scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x2), 'float32')) loss += x2_coeff * fluid.layers.reduce_sum(fluid.layers.square(x2[:, :, 2: -2, 2: -2])) / scaling scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x3), 'float32')) loss += x3_coeff * fluid.layers.reduce_sum(fluid.layers.square(x3[:, :, 2: -2, 2: -2])) / scaling scaling = fluid.layers.reduce_prod(fluid.layers.cast(fluid.layers.shape(x4), 'float32')) loss += x4_coeff * fluid.layers.reduce_sum(fluid.layers.square(x4[:, :, 2: -2, 2: -2])) / scaling # grad grad = calc_gradient(loss, input) grad = grad / fluid.layers.elementwise_max(fluid.layers.reduce_mean(fluid.layers.abs(grad)), ep) drop = fluid.layers.dropout(x=pool, dropout_prob=0.2) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc( input=drop, size=class_dim, param_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name="final_fc_weights"), bias_attr=ParamAttr( initializer=fluid.initializer.Uniform(-stdv, stdv), name="final_fc_offset")) return loss, grad, out