def __call__(self, param, grad, block): """Add L2 weight decay ops to network Adds L2 weight decay ops. L2WeightDecay = reg_coeff * parameter Args: param: parameter variable for which regularization is applied block: block in which variable is to be created Returns: new variable for weight decay """ assert isinstance(param, framework.Variable) assert isinstance(block, framework.Block) if framework.in_dygraph_mode(): return _C_ops.scale(param, "scale", self._regularization_coeff) else: decay = block.create_var(dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) # Append Op to calculate decay block.append_op(type='scale', inputs={"X": param}, outputs={"Out": decay}, attrs={"scale": self._regularization_coeff}) return decay
def test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) out = _C_ops.scale(w, 'scale', 0.1) out.backward() w.grad.scale_(scale=0.5) w._reset_grad_inplace_version(False) assert w.grad._inplace_version() == 1
def test(self): paddle.set_device('cpu') input_data = np.ones([2, 2]).astype('float32') w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) _clear_grad = clear_grad(w, a="1") w._register_backward_hook(_clear_grad) for i in range(10): out = _C_ops.scale(w, 'scale', 0.1) out.backward()
def test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) _clear_grad = clear_grad_test_0(w, a="1") w._register_backward_hook(_clear_grad) for i in range(2): print(" Step: ", i) out0 = _C_ops.scale(w, 'scale', 0.1) out = _C_ops.matmul_v2(out0, w, 'trans_x', False, 'trans_y', False) out.backward() assert w.grad[0] == 0.15
def test(self): input_data = np.ones([1, 1]) w = paddle.to_tensor(input_data, 'float32', stop_gradient=False) c = Counter() _clear_grad = clear_grad_test_1(w, c) w._register_backward_hook(_clear_grad) for c.step in range(5): out0 = _C_ops.scale(w, 'scale', 0.1) out = _C_ops.matmul_v2(out0, w, 'trans_x', False, 'trans_y', False) out.backward() if c.step == 1: w.clear_gradient(False) assert c.num_calls == 1 c.num_calls = 0
def _scalar_elementwise_op_(var, scale, bias): return _C_ops.scale(var, 'scale', scale, 'bias', bias)
def _scalar_elementwise_op_(var, scale, bias): if framework.in_dygraph_mode(): return _C_ops.final_state_scale(var, float(scale), bias, True) return _C_ops.scale(var, 'scale', scale, 'bias', bias)