def bprop(self): self.conv_op.bprop( self.out_grad, self.weights.array, self.x.out, filters_d=self.weights.grad_array, to_imgs=False ) self.conv_op.fprop(self.out_grad, self.weights.array, convout=self.x.out_grad) if self.bias is not None: ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
def bprop(self): self.conv_op.bprop( self.x.out, self.weights.array, self.out_grad, filters_d=self.weights.grad_array, imgs_d=self.x.out_grad ) ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
def fprop(self): # -target * log(pred) tmp1 = self.pred.array + self.eps ca.log(tmp1, tmp1) tmp1 *= self.target.array ca.sum(tmp1, axis=1, out=self.array) ca.negative(self.array, self.array)
def fprop(self): # c - multiplier*(pred - target)**2 tmp = self.pred.array - self.target.array tmp **= 2.0 tmp *= -self.multiplier tmp += self.c ca.sum(tmp, axis=self.axis, out=self.array)
def test_sum(): a_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) s_np = np.sum(a_np) s_ca = ca.sum(a_ca) print(np.allclose(s_np, np.array(s_ca))) a_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) s_np = np.sum(a_np, 0) s_ca = ca.sum(a_ca, 0) print(np.allclose(s_np, np.array(s_ca))) s_np = np.sum(a_np, 1) s_ca = ca.sum(a_ca, 1) print(np.allclose(s_np, np.array(s_ca))) a_np = np.random.normal(size=(5, 5, 10)) a_ca = ca.array(a_np) s_np = np.sum(a_np, 0) s_ca = ca.sum(a_ca, 0) print(np.allclose(s_np, np.array(s_ca))) s_np = np.sum(a_np, 2) s_ca = ca.sum(a_ca, 2) print(np.allclose(s_np, np.array(s_ca)))
def categorical_cross_entropy(y_pred, y_true, eps=1e-15): # Assumes one-hot encoding. y_pred = ca.clip(y_pred, eps, 1 - eps) # XXX: do we need to normalize? y_pred /= ca.sum(y_pred, axis=1, keepdims=True) loss = -ca.sum(y_true * ca.log(y_pred), axis=1) return loss
def bprop(self): ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad) tmp = ca.mean(self.x.out_grad, axis=0, keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad) self.x.out_grad *= -1 self.x.out_grad *= self._tmp_batch_inv_std self.x.out_grad *= self._tmp_batch_inv_std ca.mean(self.out_grad, axis=0, keepdims=True, out=tmp) self.x.out_grad += self.out_grad self.x.out_grad -= tmp self.x.out_grad *= self._tmp_batch_inv_std if self.affine: self.x.out_grad *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.out_grad ca.sum(self._tmp_batch_centered, axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(self.out_grad, axis=0, keepdims=True, out=self.beta.grad_array)
def fprop(self): tmp1 = self.mu.array**2 ca.negative(tmp1, tmp1) tmp1 += self.logvar.array tmp1 += 1 tmp1 -= ca.exp(self.logvar.array) ca.sum(tmp1, axis=self.axis, out=self.array) self.array *= -0.5
def bprop(self): self.conv_op.bprop( self.x.array, self.weights.array, self.grad_array, filters_d=self.weights.grad_array, imgs_d=self.x.grad_array ) if self.bias is not None: ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
def encode_bprop(self, y_grad): y_grad = self.activation.bprop(y_grad) # Because the weight gradient has already been updated by # decode_bprop() we must add the contribution. w_grad = self.weights.grad_array w_grad += ca.dot(self._tmp_x.T, y_grad) ca.sum(y_grad, axis=0, out=self.bias.grad_array) return ca.dot(y_grad, self.weights.array.T)
def bprop(self, y_grad): _, x_grad = self.conv_op.bprop( self._tmp_x, self.weights.array, y_grad, to_imgs=self.bprop_to_x, filters_d=self.weights.grad_array ) ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array) return x_grad
def fprop(self): # e_i = exp(x_i - max(x)) # y = e_i / sum(e) tmp1 = ca.amax(self.x.array, axis=1, keepdims=True) ca.subtract(self.x.array, tmp1, self.array) ca.exp(self.array, self.array) ca.sum(self.array, axis=1, keepdims=True, out=tmp1) self.array /= tmp1
def encode_bprop(self, y_grad): y_grad = self.activation.bprop(y_grad) # Because W's gradient has already been updated by decode_bprop() at # this point, we should add its contribution from the encode step. W_grad = self.W.grad_array W_grad += ca.dot(self._tmp_last_x.T, y_grad) ca.sum(y_grad, axis=0, out=self.b.grad_array) return ca.dot(y_grad, self.W.array.T)
def bprop(self, y_grad, to_x=True): _, x_grad = self.conv_op.bprop( self._tmp_last_x, self.W.array, y_grad, to_imgs=to_x, filters_d=self.W.grad_array ) ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.b.grad_array) return x_grad
def fprop(self): tmp1 = self.mu.out**2 ca.negative(tmp1, tmp1) tmp1 += self.log_sigma.out tmp1 += 1 tmp1 -= ca.exp(self.log_sigma.out) ca.sum(tmp1, axis=1, keepdims=True, out=self.out) self.out *= -0.5
def bprop(self): self.conv_op.bprop(self.x.out, self.weights.array, self.out_grad, filters_d=self.weights.grad_array, imgs_d=self.x.out_grad) ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
def bprop(self, y_grad): _, x_grad = self.conv_op.bprop(self.last_x, self.W.array, y_grad, filters_d=self.W.grad_array) ca.sum(ca.sum(y_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.b.grad_array) return x_grad
def fprop(self): # -log(1 - pred)*(1 - target) - log(pred)*target tmp1 = 1 - self.pred.out tmp1 += self.eps ca.log(tmp1, tmp1) tmp2 = 1 - self.target.out ca.multiply(tmp1, tmp2, tmp1) ca.add(self.pred.out, self.eps, tmp2) ca.log(tmp2, tmp2) tmp2 *= self.target.out ca.add(tmp1, tmp2, tmp1) tmp1 *= -1 ca.sum(tmp1, axis=1, keepdims=True, out=self.out)
def fprop(self): # -log(1 - pred)*(1 - target) - log(pred)*target tmp1 = 1 - self.pred.array tmp1 += self.eps ca.log(tmp1, tmp1) tmp2 = 1 - self.target.array ca.multiply(tmp1, tmp2, tmp1) ca.add(self.pred.array, self.eps, tmp2) ca.log(tmp2, tmp2) tmp2 *= self.target.array ca.add(tmp1, tmp2, tmp1) tmp1 *= -1 ca.sum(tmp1, axis=1, out=self.array)
def bprop(self, y_grad, h_grad): ca.dot(self._tmp_h.T, y_grad, out=self.w_hy.grad_array) ca.sum(y_grad, axis=0, keepdims=True, out=self.b_y.grad_array) h_grad = h_grad + ca.dot(y_grad, self.w_hy.array.T) h_grad = self.activation.bprop(h_grad) ca.sum(h_grad, axis=0, keepdims=True, out=self.b_h.grad_array) ca.dot(self._tmp_h_tm1.T, h_grad, out=self.w_hh.grad_array) ca.dot(self._tmp_x.T, h_grad, out=self.w_xh.grad_array) x_grad = ca.dot(h_grad, self.w_xh.array.T) h_grad = ca.dot(h_grad, self.w_hh.array.T) return {'x_grad': x_grad, 'h_grad': h_grad}
def bprop(self, y_grad, h_grad): ca.dot(self._tmp_h.T, y_grad, out=self.w_hy.grad_array) ca.sum(y_grad, axis=0, keepdims=True, out=self.b_y.grad_array) h_grad = h_grad + ca.dot(y_grad, self.w_hy.array.T) h_grad = self.activation.bprop(h_grad) ca.sum(h_grad, axis=0, keepdims=True, out=self.b_h.grad_array) ca.dot(self._tmp_h_tm1.T, h_grad, out=self.w_hh.grad_array) ca.dot(self._tmp_x.T, h_grad, out=self.w_xh.grad_array) x_grad = ca.dot(h_grad, self.w_xh.array.T) h_grad = ca.dot(h_grad, self.w_hh.array.T) return {"x_grad": x_grad, "h_grad": h_grad}
def bprop(self): self.conv_op.bprop(self.out_grad, self.weights.array, self.x.out, filters_d=self.weights.grad_array, to_imgs=False) self.conv_op.fprop(self.out_grad, self.weights.array, convout=self.x.out_grad) if self.bias is not None: ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.bias.grad_array)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P,Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E,2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P,(1-alpha*beta)),ca.multiply(ca.dot(E,ca.transpose(Q)), 2*alpha)) Q = ca.add(ca.multiply(Q,(1-alpha*beta)),ca.multiply(ca.dot(ca.transpose(P),E),2*alpha)) return P, Q
def fprop(self): pred = self.x.out target = self.target.out if self.clip: ca.clip(pred, _FLT_MIN, .9999999, pred) self.out = -ca.sum(target * ca.log(pred) + (1 - target) * ca.log(1 - pred))
def fprop(self): tmp1 = self.mu.array**2 ca.negative(tmp1, tmp1) tmp1 += self.log_sigma.array tmp1 += 1 tmp1 -= ca.exp(self.log_sigma.array) self.array = ca.sum(tmp1) self.array *= -0.5
def fprop(self): tmp1 = self.mu.out**2 ca.negative(tmp1, tmp1) tmp1 += self.log_sigma.out tmp1 += 1 tmp1 -= ca.exp(self.log_sigma.out) self.out = ca.sum(tmp1) self.out *= -0.5
def func(x, *args): ca.random.seed(random_seed) p_idx = args[0] param_vals = layer.params()[p_idx].values param_vals *= 0 param_vals += ca.array(np.reshape(x, param_vals.shape)) out = layer.fprop(ca.array(x0), 'train') y = ca.sum(out) return np.array(y)
def func(x, *args): ca.random.seed(random_seed) p_idx = args[0] param_vals = layer._params[p_idx].array param_vals *= 0 param_vals += ca.array(np.reshape(x, param_vals.shape)) out = layer.fprop(ca.array(x0), 'train') y = ca.sum(out) return np.array(y)
def normalize(matrix, gpuFlag=False): if gpuFlag == True: import cudarray as ca norm = ca.sqrt(ca.sum(ca.power(matrix, 2), 1, keepdims=True)) matrix_n = matrix / norm else: norm = np.sqrt(np.sum(np.square(matrix), 1, keepdims=True)) matrix_n = matrix / norm return matrix_n
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None]*len(self.layers) x_grams = [None]*len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0: x_feats[l] = next_x if self.style_weights[l] > 0: x_feats[l] = next_x x_grams[l] = gram_matrix(next_x) # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5*weight*ca.sum(diff**2) if self.style_weights[l] > 0: diff = x_grams[l] - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25*weight*ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def normalize(matrix,gpuFlag=False): if gpuFlag==True: import cudarray as ca norm=ca.sqrt(ca.sum(ca.power(matrix,2),1,keepdims=True)); matrix_n=matrix/norm else: norm=np.sqrt(np.sum(np.square(matrix),1,keepdims=True)); matrix_n=matrix/norm return matrix_n
def bprop(self): ca.multiply(self._tmp_batch_centered, self.grad_array, self.x.grad_array) tmp = ca.mean(ca.mean(self.x.grad_array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array) self.x.grad_array *= -1 self.x.grad_array *= self._tmp_batch_inv_std self.x.grad_array *= self._tmp_batch_inv_std tmp = ca.mean(ca.mean(self.grad_array, axis=0, keepdims=True), axis=(2, 3), keepdims=True) self.x.grad_array += self.grad_array self.x.grad_array -= tmp self.x.grad_array *= self._tmp_batch_inv_std if self.affine: self.x.grad_array *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.grad_array ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(ca.sum(self.grad_array, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.beta.grad_array)
def bprop(self, y_grad, h_grad): n = self.n_hidden h_grad = h_grad + y_grad c_grad = h_grad * self._tmp_u u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1) h_grad *= (1 - self._tmp_u) c_grad = ca.ascontiguousarray(ca.transpose(c_grad)) u_grad = ca.ascontiguousarray(ca.transpose(u_grad)) c_grad = self.act_c.bprop(c_grad) ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array) u_grad = self.act_u.bprop(u_grad) ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array) r_grad = c_grad * self._tmp_h_c r_grad = self.act_r.bprop(r_grad) ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array) stack_grad = ca.empty((self.n_hidden*3, y_grad.shape[0])) stack_grad[:n, :] = r_grad stack_grad[n:n*2, :] = u_grad stack_grad[n*2:n*3, :] = c_grad ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array) x_grad = ca.dot(stack_grad.T, self.w_x.array.T) stack_grad[n*2:n*3, :] *= self._tmp_r ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array) h_grad += ca.dot(stack_grad.T, self.w_h.array.T) ca.clip(h_grad, -self.clip, self.clip, out=h_grad) return {'x_grad': x_grad, 'h_grad': h_grad}
def bprop(self): ca.multiply(self._tmp_batch_centered, self.out_grad, self.x.out_grad) tmp = ca.mean(ca.mean(self.x.out_grad, axis=0, keepdims=True), axis=(2, 3), keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.out_grad) self.x.out_grad *= -1 self.x.out_grad *= self._tmp_batch_inv_std self.x.out_grad *= self._tmp_batch_inv_std tmp = ca.mean(ca.mean(self.out_grad, axis=0, keepdims=True), axis=(2, 3), keepdims=True) self.x.out_grad += self.out_grad self.x.out_grad -= tmp self.x.out_grad *= self._tmp_batch_inv_std if self.affine: self.x.out_grad *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.out_grad ca.sum(ca.sum(self._tmp_batch_centered, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(ca.sum(self.out_grad, axis=(2, 3), keepdims=True), axis=0, keepdims=True, out=self.beta.grad_array)
def bprop(self, y_grad, h_grad): n = self.n_hidden h_grad = h_grad + y_grad c_grad = h_grad * self._tmp_u u_grad = h_grad * (self._tmp_c - self._tmp_h_tm1) h_grad *= 1 - self._tmp_u c_grad = ca.ascontiguousarray(ca.transpose(c_grad)) u_grad = ca.ascontiguousarray(ca.transpose(u_grad)) c_grad = self.act_c.bprop(c_grad) ca.sum(c_grad, axis=1, keepdims=True, out=self.b_c.grad_array) u_grad = self.act_u.bprop(u_grad) ca.sum(u_grad, axis=1, keepdims=True, out=self.b_u.grad_array) r_grad = c_grad * self._tmp_h_c r_grad = self.act_r.bprop(r_grad) ca.sum(r_grad, axis=1, keepdims=True, out=self.b_r.grad_array) stack_grad = ca.empty((self.n_hidden * 3, y_grad.shape[0])) stack_grad[:n, :] = r_grad stack_grad[n : n * 2, :] = u_grad stack_grad[n * 2 : n * 3, :] = c_grad ca.dot(self._tmp_x.T, stack_grad.T, out=self.w_x.grad_array) x_grad = ca.dot(stack_grad.T, self.w_x.array.T) stack_grad[n * 2 : n * 3, :] *= self._tmp_r ca.dot(self._tmp_h_tm1.T, stack_grad.T, out=self.w_h.grad_array) h_grad += ca.dot(stack_grad.T, self.w_h.array.T) ca.clip(h_grad, -self.clip, self.clip, out=h_grad) return {"x_grad": x_grad, "h_grad": h_grad}
def _update(self): # Forward propagation next_x = self.x.array x_feats = [None] * len(self.layers) for l, layer in enumerate(self.layers): next_x = layer.fprop(next_x) if self.subject_weights[l] > 0 or self.style_weights[l] > 0: x_feats[l] = next_x # Backward propagation grad = ca.zeros_like(next_x) loss = ca.zeros(1) for l, layer in reversed(list(enumerate(self.layers))): if self.subject_weights[l] > 0: diff = x_feats[l] - self.subject_feats[l] norm = ca.sum(ca.fabs(diff)) + 1e-8 weight = float(self.subject_weights[l]) / norm grad += diff * weight loss += 0.5 * weight * ca.sum(diff**2) if self.style_weights[l] > 0: diff = gram_matrix(x_feats[l]) - self.style_grams[l] n_channels = diff.shape[0] x_feat = ca.reshape(x_feats[l], (n_channels, -1)) style_grad = ca.reshape(ca.dot(diff, x_feat), x_feats[l].shape) norm = ca.sum(ca.fabs(style_grad)) weight = float(self.style_weights[l]) / norm style_grad *= weight grad += style_grad loss += 0.25 * weight * ca.sum(diff**2) grad = layer.bprop(grad) if self.tv_weight > 0: x = ca.reshape(self.x.array, (3, 1) + grad.shape[2:]) tv = self.tv_conv.fprop(x, self.tv_kernel) tv *= self.tv_weight grad -= ca.reshape(tv, grad.shape) ca.copyto(self.x.grad_array, grad) return loss
def test_reduce(): a_np = np.random.normal(size=(1024, )) a_ca = ca.array(a_np) c_np = np.sum(a_np) c_ca = ca.sum(a_ca) print(np.allclose(c_np, np.array(c_ca))) c_np = np.mean(a_np) c_ca = ca.mean(a_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) c_np = np.sum(a_np) c_ca = ca.sum(a_ca) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=0) c_ca = ca.sum(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=1) c_ca = ca.sum(a_ca, axis=1) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 7, 11)) a_ca = ca.array(a_np) c_np = np.sum(a_np, axis=0) c_ca = ca.sum(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=2) c_ca = ca.sum(a_ca, axis=2) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=(0, 1)) c_ca = ca.sum(a_ca, axis=(0, 1)) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=(1, 2)) c_ca = ca.sum(a_ca, axis=(1, 2)) print(np.allclose(c_np, np.array(c_ca))) c_np = np.argmin(a_np, axis=0) c_ca = ca.argmin(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.argmin(a_np, axis=2) c_ca = ca.argmin(a_ca, axis=2) print(np.allclose(c_np, np.array(c_ca)))
def test_reduce(): a_np = np.random.normal(size=(1024,)) a_ca = ca.array(a_np) c_np = np.sum(a_np) c_ca = ca.sum(a_ca) print(np.allclose(c_np, np.array(c_ca))) c_np = np.mean(a_np) c_ca = ca.mean(a_ca) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 5)) a_ca = ca.array(a_np) c_np = np.sum(a_np) c_ca = ca.sum(a_ca) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=0) c_ca = ca.sum(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=1) c_ca = ca.sum(a_ca, axis=1) print(np.allclose(c_np, np.array(c_ca))) a_np = np.random.normal(size=(5, 7, 11)) a_ca = ca.array(a_np) c_np = np.sum(a_np, axis=0) c_ca = ca.sum(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=2) c_ca = ca.sum(a_ca, axis=2) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=(0, 1)) c_ca = ca.sum(a_ca, axis=(0, 1)) print(np.allclose(c_np, np.array(c_ca))) c_np = np.sum(a_np, axis=(1, 2)) c_ca = ca.sum(a_ca, axis=(1, 2)) print(np.allclose(c_np, np.array(c_ca))) c_np = np.argmin(a_np, axis=0) c_ca = ca.argmin(a_ca, axis=0) print(np.allclose(c_np, np.array(c_ca))) c_np = np.argmin(a_np, axis=2) c_ca = ca.argmin(a_ca, axis=2) print(np.allclose(c_np, np.array(c_ca)))
def bprop(self): ca.multiply(self._tmp_batch_centered, self.grad_array, self.x.grad_array) tmp = ca.mean(self.x.grad_array, axis=0, keepdims=True) ca.multiply(self._tmp_batch_centered, tmp, self.x.grad_array) self.x.grad_array *= -1 self.x.grad_array *= self._tmp_batch_inv_std self.x.grad_array *= self._tmp_batch_inv_std ca.mean(self.grad_array, axis=0, keepdims=True, out=tmp) self.x.grad_array += self.grad_array self.x.grad_array -= tmp self.x.grad_array *= self._tmp_batch_inv_std if self.affine: self.x.grad_array *= self.gamma.array # Normalized input self._tmp_batch_centered *= self._tmp_batch_inv_std self._tmp_batch_centered *= self.grad_array ca.sum(self._tmp_batch_centered, axis=0, keepdims=True, out=self.gamma.grad_array) ca.sum(self.grad_array, axis=0, keepdims=True, out=self.beta.grad_array)
def matrix_factorization(R, P, Q, mask, steps=200000000, alpha=0.00005, beta=0.02): Q = ca.transpose(Q) for step in xrange(steps): E = ca.subtract(R, ca.multiply(ca.dot(P, Q), mask)) rmse = ca.sqrt(ca.sum(ca.power(E, 2)) / ca.sum(mask)) rmse = np.array(rmse)[0] print 'step: %i RMSE: %f' % (step, rmse) if rmse < 0.65: break P = ca.add(ca.multiply(P, (1 - alpha * beta)), ca.multiply(ca.dot(E, ca.transpose(Q)), 2 * alpha)) Q = ca.add(ca.multiply(Q, (1 - alpha * beta)), ca.multiply(ca.dot(ca.transpose(P), E), 2 * alpha)) return P, Q
def bprop(self, y_grad): ca.dot(self._tmp_x.T, y_grad, out=self.weights.grad_array) ca.sum(y_grad, axis=0, out=self.bias.grad_array) if self.bprop_to_x: return ca.dot(y_grad, self.weights.array.T)
def fprop(self, x1, x2): if self._tmp_x1 is not x1 or self._tmp_x2 is not x2: self._tmp_dists = ca.sum((x1-x2)**2, axis=1, keepdims=True) self._tmp_x1 = x1 self._tmp_x2 = x2 return self._tmp_dists
def bprop(self): ca.dot(self.x.out.T, self.out_grad, out=self.weights.grad_array) ca.dot(self.out_grad, self.weights.array.T, out=self.x.out_grad) if self.bias is not None: ca.sum(self.out_grad, axis=0, out=self.bias.grad_array)
def fprop(self): ca.sum(self.x.out, axis=self.axis, out=self.out, keepdims=self.keepdims)
def setup(self): self.out = ca.sum(self.x.out, axis=self.axis, keepdims=self.keepdims) self.out_shape = self.out.shape self.out = ca.empty(self.out_shape) self.out_grad = ca.empty(self.out_shape)
def bprop(self, y_grad): ca.dot(self._last_x.T, y_grad, out=self.W.grad_array) ca.sum(y_grad, axis=0, out=self.b.grad_array) return ca.dot(y_grad, self.W.array.T)