def bprop(self): grad = ca.reshape(self.grad_array, self.bcast_shape) ca.multiply(self.mu.array, grad, self.mu.grad_array) ca.exp(self.logvar.array, out=self.logvar.grad_array) self.logvar.grad_array -= 1 self.logvar.grad_array *= 0.5 self.logvar.grad_array *= grad
def bprop(self): ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad) tmp = ca.mean(self.x.out_grad, axis=0, keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad) self.x.out_grad *= -1 self.x.out_grad *= self._tmp_batch_inv_std self.x.out_grad *= self._tmp_batch_inv_std ca.mean(self.out_grad, axis=0, keepdims=True, out=tmp) self.x.out_grad += self.out_grad self.x.out_grad -= tmp self.x.out_grad *= self._tmp_batch_inv_std if self.affine: self.x.out_grad *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.out_grad ca.sum(self._tmp_batch_centered, axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(self.out_grad, axis=0, keepdims=True, out=self.beta.grad_array)
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def bprop(self): ca.multiply(self.mu.out, self.out_grad, self.mu.out_grad) self.mu.out_grad *= self.out_grad ca.exp(self.log_sigma.out, out=self.log_sigma.out_grad) self.log_sigma.out_grad -= 1 self.log_sigma.out_grad *= 0.5 self.log_sigma.out_grad *= self.out_grad
def bprop(self): if self.lhs.bpropable: tmp = ca.equal(self.lhs.array, self.array) ca.multiply(self.grad_array, tmp, self.lhs.grad_array) if self.rhs.bpropable: ca.equal(self.rhs.array, self.array, self.rhs.grad_array) self.rhs.grad_array *= self.grad_array
def bprop(self): ca.multiply(self._tmp_batch_centered, self.grad_array, self.x.grad_array) tmp = ca.mean(ca.mean(self.x.grad_array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array) self.x.grad_array *= -1 self.x.grad_array *= self._tmp_batch_inv_std self.x.grad_array *= self._tmp_batch_inv_std tmp = ca.mean(ca.mean(self.grad_array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) self.x.grad_array += self.grad_array self.x.grad_array -= tmp self.x.grad_array *= self._tmp_batch_inv_std if self.affine: self.x.grad_array *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.grad_array ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.beta.grad_array)
def bprop(self): if self.keepgrads: self.x.grad_array = self.grad_array else: ca.multiply(self.grad_array, self.x.array > self.a_min, self.x.grad_array) self.x.grad_array *= self.x.array < self.a_max
def test_binary(): a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.add(a_np, b_np) c_ca = ca.add(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) np.add(a_np, b_np, a_np) ca.add(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) np.multiply(a_np, b_np, a_np) ca.multiply(a_ca, b_ca, a_ca) print(np.allclose(a_np, np.array(a_ca))) a_np = np.random.normal(size=(5, 5)) b_np = np.random.normal(size=(5, 5)) > 0 a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca)))
def bprop(self): if self.lhs_bprop: ca.divide(self.out_grad, self.rhs.out, out=self.lhs.out_grad) if self.rhs_bprop: ca.multiply(self.out_grad, self.out, out=self.rhs.out_grad) self.rhs.out_grad /= self.rhs.out ca.negative(self.rhs.out_grad, out=self.rhs.out_grad)
def bprop(self): if self.lhs.bpropable: ca.divide(self.grad_array, self.rhs.array, out=self.lhs.grad_array) if self.rhs.bpropable: ca.multiply(self.grad_array, self.array, out=self.rhs.grad_array) self.rhs.grad_array /= self.rhs.array ca.negative(self.rhs.grad_array, out=self.rhs.grad_array)
def bprop(self): if self.lhs_bprop: tmp = ca.equal(self.lhs.out, self.out) ca.multiply(self.out_grad, tmp, self.lhs.out_grad) if self.rhs_bprop: ca.equal(self.rhs.out, self.out, self.rhs.out_grad) self.rhs.out_grad *= self.out_grad
def bprop(self): if self.keepgrads: self.x.out_grad = self.out_grad else: ca.multiply(self.out_grad, self.x.out > self.a_min, self.x.out_grad) self.x.out_grad *= self.x.out < self.a_max
def bprop(self): ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad) tmp = ca.mean(ca.mean(self.x.out_grad, axis=0, keepdims=True), axis=(2, 3), keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad) self.x.out_grad *= -1 self.x.out_grad *= self._tmp_batch_inv_std self.x.out_grad *= self._tmp_batch_inv_std tmp = ca.mean(ca.mean(self.out_grad, axis=0, keepdims=True), axis=(2, 3), keepdims=True) self.x.out_grad += self.out_grad self.x.out_grad -= tmp self.x.out_grad *= self._tmp_batch_inv_std if self.affine: self.x.out_grad *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.out_grad ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.beta.grad_array)
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def fprop(self): if self.phase == "train": ca.less(self.dropout, ca.random.uniform(size=self.mask_shape), self._tmp_mask) ca.multiply(self.x.out, self._tmp_mask, self.out) elif self.phase == "test": ca.multiply(self.x.out, 1.0 - self.dropout, self.out) else: raise ValueError("Invalid phase: %s" % self.phase)
def fprop(self): if self.phase == 'train': ca.less(self.dropout, ca.random.uniform(size=self.mask_shape), self._tmp_mask) ca.multiply(self.x.out, self._tmp_mask, self.out) elif self.phase == 'test': ca.multiply(self.x.out, 1.0 - self.dropout, self.out) else: raise ValueError('Invalid phase: %s' % self.phase)
def fprop(self): if self.phase == 'train': ca.less(self.dropout, ca.random.uniform(size=self.mask_shape), self._tmp_mask) ca.multiply(self.x.array, self._tmp_mask, self.array) elif self.phase == 'test': ca.multiply(self.x.array, 1.0-self.dropout, self.array) else: raise ValueError('Invalid phase: %s' % self.phase)
def fprop(self): # -log(1 - pred)*(1 - target) - log(pred)*target tmp1 = 1 - self.pred.out tmp1 += self.eps ca.log(tmp1, tmp1) tmp2 = 1 - self.target.out ca.multiply(tmp1, tmp2, tmp1) ca.add(self.pred.out, self.eps, tmp2) ca.log(tmp2, tmp2) tmp2 *= self.target.out ca.add(tmp1, tmp2, tmp1) tmp1 *= -1 ca.sum(tmp1, axis=1, keepdims=True, out=self.out)
def fprop(self): # -log(1 - pred)*(1 - target) - log(pred)*target tmp1 = 1 - self.pred.array tmp1 += self.eps ca.log(tmp1, tmp1) tmp2 = 1 - self.target.array ca.multiply(tmp1, tmp2, tmp1) ca.add(self.pred.array, self.eps, tmp2) ca.log(tmp2, tmp2) tmp2 *= self.target.array ca.add(tmp1, tmp2, tmp1) tmp1 *= -1 ca.sum(tmp1, axis=1, out=self.array)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha)) Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha)) return P, Q
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) # Center input ca.subtract(self.x.array, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.array) ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.array) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp if self.noise_std > 0.0: noise = ca.random.normal(scale=self.noise_std, size=self.shape) ca.add(self.array, noise, self.array) elif self.phase == 'test': ca.subtract(self.x.array, self.running_mean, self.array) self.array *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.array *= self.gamma.array self.array += self.beta.array
def bprop(self): ca.multiply(self._tmp_batch_centered, self.grad_array, self.x.grad_array) tmp = ca.mean(self.x.grad_array, axis=0, keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array) self.x.grad_array *= -1 self.x.grad_array *= self._tmp_batch_inv_std self.x.grad_array *= self._tmp_batch_inv_std ca.mean(self.grad_array, axis=0, keepdims=True, out=tmp) self.x.grad_array += self.grad_array self.x.grad_array -= tmp self.x.grad_array *= self._tmp_batch_inv_std if self.affine: self.x.grad_array *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.grad_array ca.sum(self._tmp_batch_centered, axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(self.grad_array, axis=0, keepdims=True, out=self.beta.grad_array)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P, (1 - alpha * beta)), ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha)) Q = ca.add(ca.multiply(Q, (1 - alpha * beta)), ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha)) return P, Q
def bprop(self): if self.lhs_bprop: ca.multiply(self.out_grad, self.rhs.out, out=self.lhs.out_grad) if self.rhs_bprop: ca.multiply(self.out_grad, self.lhs.out, out=self.rhs.out_grad)
def fprop(self): ca.multiply(self.lhs.out, self.rhs.out, out=self.out)
def bprop(self): ca.nnet.relu_d(self.x.out, self.x.out_grad) self.x.out_grad *= 2.0 self.x.out_grad -= 1.0 ca.multiply(self.x.out_grad, self.out_grad, out=self.x.out_grad)
def bprop(self): if self.lhs.bpropable: ca.multiply(self.grad_array, self.rhs.array, self.lhs.grad_array) if self.rhs.bpropable: ca.multiply(self.grad_array, self.lhs.array, self.rhs.grad_array)
def bprop(self): ca.multiply(self.out_grad, self._tmp_mask, self.x.out_grad)
def fprop(self): ca.multiply(self.lhs.array, self.rhs.array, out=self.array)
def bprop(self): ca.nnet.relu_d(self.x.array, self.x.grad_array) self.x.grad_array *= 2.0 self.x.grad_array -= 1.0 ca.multiply(self.x.grad_array, self.grad_array, out=self.x.grad_array)
def bprop(self): # y_i * (y_grad_i - sum(y_grad * y)) ca.multiply(self.array, self.grad_array, self.x.grad_array) tmp1 = ca.sum(self.x.grad_array, axis=1, keepdims=True) ca.subtract(self.grad_array, tmp1, self.x.grad_array) self.x.grad_array *= self.array
def bprop(self): ca.multiply(self.grad_array, self.scale, self.x.grad_array)
def bprop(self): ca.multiply(self.mu.array, self.grad_array, self.mu.grad_array) ca.exp(self.log_sigma.array, out=self.log_sigma.grad_array) self.log_sigma.grad_array -= 1 self.log_sigma.grad_array *= 0.5 self.log_sigma.grad_array *= self.grad_array
def bprop(self): ca.multiply(self.grad_array, self._tmp_mask, self.x.grad_array)
def test_multiply(): a_np = np.ones((5, 5)) b_np = np.arange(5) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_ca = ca.multiply(a_ca, b_ca, a_ca) c_np = np.multiply(a_np, b_np, a_np) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3)) b_np = np.arange(3) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3)) b_np = np.arange(3).reshape(1, 3) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3)) b_np = np.arange(3).reshape(3, 1) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3, 4)) b_np = np.arange(3).reshape(3, 1, 1) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3, 4)) b_np = np.arange(4) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.arange(3) b_np = np.ones((3, 3)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.arange(4) b_np = np.ones((3, 3, 4)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((2, 7, 3, 5, 6)) b_np = np.arange(3).reshape(1, 1, 3, 1, 1) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.ones((3, 3, 4)) b_np = np.ones((3, 1, 4)) a_ca = ca.array(a_np) b_ca = ca.array(b_np) c_np = np.multiply(a_np, b_np) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca))) a = np.random.normal(size=(5, 5)) a_ca = ca.array(a) c_np = np.multiply(a, 3) c_ca = ca.multiply(a_ca, 3) print(np.allclose(c_np, np.array(c_ca))) c_np = np.multiply(a, 3, a) c_ca = ca.multiply(a_ca, 3, a_ca) print(np.allclose(c_np, np.array(c_ca))) a = np.random.normal(size=(5, 5)) a_ca = ca.array(a) b = np.random.normal(size=(5, 5)) b_ca = ca.array(b) c_np = np.multiply(a, b, a) c_ca = ca.multiply(a_ca, b_ca, a_ca) print(np.allclose(c_np, np.array(c_ca))) c_np = np.multiply(a, b) c_ca = ca.multiply(a_ca, b_ca) print(np.allclose(c_np, np.array(c_ca)))
def bprop(self): ca.multiply(self.mu.out, self.out_grad, self.mu.out_grad) ca.exp(self.log_sigma.out, out=self.log_sigma.out_grad) self.log_sigma.out_grad -= 1 self.log_sigma.out_grad *= 0.5 self.log_sigma.out_grad *= self.out_grad