def update(self, learning_rate, stream=None): if self.l1_penalty > 0: op.add_vec_l1reg(self.W, self.dW, -learning_rate, self.l1_penalty, out=self.W, stream=stream) else: op.add_vec(self.W, -learning_rate, self.dW, stream=stream) op.add_vec(self.b, -learning_rate, self.db, stream=stream)
def test_l1reg(): # NOTE: you could argue wether it's okay to "jump over zero" # when applying both the regular gradient and the L1 gradient l1_penalty=0.005 w = np.array( [3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32) dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32) eta = 1.0 nw = w + dw - l1_penalty*np.sign(w) expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw)) y = np.empty_like(dw) op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y) assert_allclose(expected, y) wd = op.to_gpu(w) dwd = op.to_gpu(dw) yd = op.to_gpu(np.empty_like(dw)) op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd) assert_allclose(expected, op.to_cpu(yd))
def test_l1reg(): # NOTE: you could argue wether it's okay to "jump over zero" # when applying both the regular gradient and the L1 gradient l1_penalty = 0.005 w = np.array([3.0, 0.01, -0.01, 0.010, -0.010]).astype(np.float32) dw = np.array([2.9, 0.10, -0.10, 0.006, +0.006]).astype(np.float32) eta = 1.0 nw = w + dw - l1_penalty * np.sign(w) expected = np.where(w > 0, np.maximum(0, nw), np.minimum(0, nw)) y = np.empty_like(dw) op.add_vec_l1reg(w, dw, eta, l1_penalty, out=y) assert_allclose(expected, y) wd = op.to_gpu(w) dwd = op.to_gpu(dw) yd = op.to_gpu(np.empty_like(dw)) op.add_vec_l1reg(wd, dwd, eta, l1_penalty, out=yd) assert_allclose(expected, op.to_cpu(yd))