def grad_array(self): ''' Returns the gradient array. ''' if self._tmp_grad_array is None: if self.array is None: raise ValueError('Parameter not setup') self._tmp_grad_array = ca.zeros_like(self.array) return self._tmp_grad_array
def __init__(self, node, in_port, out_port, in_arrays, grad_arrays): self.node = node self.in_port = in_port self.out_port = out_port self.in_arrays = {} for k, array in in_arrays.items(): self.in_arrays[k] = ca.array(array) self.grad_arrays = {} for k, array in grad_arrays.items(): self.grad_arrays[k + '_grad'] = ca.zeros_like(array)
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None]*len(self.layers) x_grams = [None]*len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0: x_feats[l] = next_x if self.style_weights[l] > 0: x_feats[l] = next_x x_grams[l] = gram_matrix(next_x) # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5*weight*ca.sum(diff**2) if self.style_weights[l] > 0: diff = x_grams[l] - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25*weight*ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None] * len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0 or self.style_weights[l] > 0: x_feats[l] = next_x # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5 * weight * ca.sum(diff**2) if self.style_weights[l] > 0: diff = gram_matrix(x_feats[l]) - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25 * weight * ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def test_dot(): a = np.random.normal(size=(5, 5)) b = np.random.normal(size=(5, 5)) c_np = np.dot(a, b) a = ca.array(a) b = ca.array(b) c_ca = ca.dot(a, b) print(np.allclose(c_np, np.array(c_ca))) c_ca = ca.zeros_like(a) ca.dot(a, b, c_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5)) b_np = np.random.normal(size=(5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np, b_np) c_ca = ca.dot(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np.T, b_np) c_ca = ca.dot(a_ca.T, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(3, 4)) b_np = np.random.normal(size=(5, 4)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np, b_np.T) c_ca = ca.dot(a_ca, b_ca.T) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(4, 3)) b_np = np.random.normal(size=(4, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np.T, b_np) c_ca = ca.dot(a_ca.T, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(4, 3)) b_np = np.random.normal(size=(5, 4)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np.T, b_np.T) c_ca = ca.dot(a_ca.T, b_ca.T) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(4)) b_np = np.random.normal(size=(4, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np, b_np) c_ca = ca.dot(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(4, 5)) b_np = np.random.normal(size=(5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np, b_np) c_ca = ca.dot(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(4)) b_np = np.random.normal(size=(5, 4)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np, b_np.T) c_ca = ca.dot(a_ca, b_ca.T) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 4)) b_np = np.random.normal(size=(5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.dot(a_np.T, b_np) c_ca = ca.dot(a_ca.T, b_ca) print(np.allclose(c_np, np.array(c_ca)))
def _setup(self, params, batch_size): self.params = params self.batch_size = batch_size self.steps = [ca.zeros_like(p.grad_array) for p in params]
def grad_array(self): ''' Returns the gradient array. ''' if self._tmp_grad_array is None: self._tmp_grad_array = ca.zeros_like(self.array) return self._tmp_grad_array
def init_state(self, param): m = ca.zeros_like(param.grad_array) v = ca.zeros_like(param.grad_array) t = np.zeros(1, dtype=int) return m, v, t
def _setup(self, params, batch_size): self.batch_size = batch_size self.params = params self.steps = [ca.zeros_like(param.grad_array) for param in params]
def _setup(self, params, batch_size): self.params = params self.batch_size = batch_size self.steps = [ca.zeros_like(p.values) for p in params]
def init_state(self, param): last_step = ca.zeros_like(param.grad_array) return last_step
def bprop(self): return {'x_grad': ca.zeros_like(self.array)}
def bprop(self): return {"x_grad": ca.zeros_like(self.array)}
def init_state(self, param): mean_square = ca.zeros_like(param.grad_array) return mean_square