def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(self.x.out, axis=0, keepdims=True) # Center input ca.subtract(self.x.out, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.out) ca.mean(self.out, axis=0, keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.out) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1 - self.momentum, tmp) self.running_std += tmp elif self.phase == 'test': ca.subtract(self.x.out, self.running_mean, self.out) self.out *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.out *= self.gamma.array self.out += self.beta.array
def bprop(self): if self.lhs.bpropable: tmp = self.rhs.array - 1 ca.power(self.lhs.array, tmp, out=self.lhs.grad_array) self.lhs.grad_array *= self.rhs.array self.lhs.grad_array *= self.grad_array if self.rhs.bpropable: ca.log(self.lhs.array, out=self.rhs.grad_array) self.rhs.grad_array *= self.array self.rhs.grad_array *= self.grad_array
def bprop(self): if self.lhs_bprop: tmp = self.rhs.out - 1 ca.power(self.lhs.out, tmp, out=self.lhs.out_grad) self.lhs.out_grad *= self.rhs.out self.lhs.out_grad *= self.out_grad if self.rhs_bprop: ca.log(self.lhs.out, out=self.rhs.out_grad) self.rhs.out_grad *= self.out self.rhs.out_grad *= self.out_grad
def normalize(matrix, gpuFlag=False): if gpuFlag == True: import cudarray as ca norm = ca.sqrt(ca.sum(ca.power(matrix, 2), 1, keepdims=True)) matrix_n = matrix / norm else: norm = np.sqrt(np.sum(np.square(matrix), 1, keepdims=True)) matrix_n = matrix / norm return matrix_n
def normalize(matrix,gpuFlag=False): if gpuFlag==True: import cudarray as ca norm=ca.sqrt(ca.sum(ca.power(matrix,2),1,keepdims=True)); matrix_n=matrix/norm else: norm=np.sqrt(np.sum(np.square(matrix),1,keepdims=True)); matrix_n=matrix/norm return matrix_n
def fprop(self): if self.phase == 'train': # Calculate batch mean tmp = ca.mean(ca.mean(self.x.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) # Center input ca.subtract(self.x.array, tmp, self._tmp_batch_centered) # Update running mean tmp *= 1 - self.momentum self.running_mean *= self.momentum self.running_mean += tmp # Calculate batch variance ca.power(self._tmp_batch_centered, 2, self.array) ca.mean(ca.mean(self.array, axis=0, keepdims=True), axis=(2, 3), keepdims=True, out=self._tmp_batch_inv_std) # Calculate 1 / E([x - E(x)]^2) self._tmp_batch_inv_std += self.eps ca.sqrt(self._tmp_batch_inv_std, self._tmp_batch_inv_std) ca.power(self._tmp_batch_inv_std, -1, self._tmp_batch_inv_std) # Normalize input ca.multiply(self._tmp_batch_centered, self._tmp_batch_inv_std, self.array) # Update running std self.running_std *= self.momentum ca.multiply(self._tmp_batch_inv_std, 1-self.momentum, tmp) self.running_std += tmp if self.noise_std > 0.0: noise = ca.random.normal(scale=self.noise_std, size=self.shape) ca.add(self.array, noise, self.array) elif self.phase == 'test': ca.subtract(self.x.array, self.running_mean, self.array) self.array *= self.running_std else: raise ValueError('Invalid phase: %s' % self.phase) if self.affine: self.array *= self.gamma.array self.array += self.beta.array
def step(self, param, state): m, v, t = state grad = param.grad() t += 1 t = int(t) # m = beta1*m + (1 - beta1)*grad m *= self.beta1 tmp = (1 - self.beta1)*grad m += tmp # v = beta2*v + (1 - beta2)*grad**2 v *= self.beta2 ca.power(grad, 2, tmp) tmp *= (1 - self.beta2) v += tmp # alpha = learn_rate*sqrt(1 - beta2**t)/(1 - beta1**t) # step = -alpha_t*m/(sqrt(v) + eps) alpha = self.learn_rate*np.sqrt(1 - self.beta2**t)/(1 - self.beta1**t) ca.sqrt(v, tmp) tmp += self.eps ca.divide(m, tmp, tmp) tmp *= -alpha param.step(tmp)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha)) Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha)) return P, Q
def fprop(self, x, phase): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def fprop(self, x): n_channels = x.shape[1] # Calculate local mean tmp = self.conv_op.fprop(x, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) # Center input with local mean centered = ca.subtract(x, tmp) # Calculate local standard deviation tmp = ca.power(centered, 2) tmp = self.conv_op.fprop(tmp, self.ca_kernel) if n_channels > 1: ca.divide(tmp, n_channels, tmp) ca.sqrt(tmp, tmp) # Scale centered input with standard deviation return centered / (tmp + self.eps)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P, (1 - alpha * beta)), ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha)) Q = ca.add(ca.multiply(Q, (1 - alpha * beta)), ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha)) return P, Q
def fprop(self): ca.power(self.lhs.out, self.rhs.out, out=self.out)
def fprop(self): ca.power(self.lhs.array, self.rhs.array, out=self.array)