def step(self): for param, rms_grad in zip(self.params, self.steps): rms_grad *= self.decay step = param.grad() if param.penalty is not None: step -= param.penalty() rms_grad += (1.0 - self.decay) * step**2 scaling = ca.maximum(ca.sqrt(rms_grad), self.max_scaling_inv) step_rate = self.learn_rate * param.learn_rate / self.batch_size param.step(step / scaling * (-step_rate))
def fprop(self): ca.maximum(self.lhs.out, self.rhs.out, out=self.out)
def fprop(self, x): self._tmp_x = x pos = ca.maximum(x, 0) neg = self.a.array * ca.minimum(x, 0) return pos + neg
def grad(self, pred, target): pred = ca.maximum(pred, _FLT_MIN) return -(target/pred - (1-target)/(1-pred))
def fprop(self): ca.minimum(self.x.out, 0, out=self.out) self.out *= self.a self.out += ca.maximum(self.x.out, 0)
def fprop(self): ca.maximum(self.lhs.array, self.rhs.array, out=self.array)
def loss(self, pred, target): pred = ca.maximum(pred, _FLT_MIN) return -ca.sum(target*ca.log(pred) + (1 - target)*ca.log(1 - pred), axis=1)
def loss(self, y, dists): return y * dists + (1 - y) * ca.maximum(self.margin - dists, 0)
def grad(self, pred, target): pred = ca.maximum(pred, _FLT_MIN) return -(target / pred - (1 - target) / (1 - pred))
def fprop(self): ca.minimum(self.x.array, 0, out=self.array) self.array *= self.a self.array += ca.maximum(self.x.array, 0)
def loss(self, pred, target): pred = ca.maximum(pred, _FLT_MIN) return -ca.sum(target * ca.log(pred) + (1 - target) * ca.log(1 - pred), axis=1)
def grad(self, y, y_pred): y_pred = ca.maximum(y_pred, _FLT_MIN) return -(y/y_pred - (1-y)/(1-y_pred))
def loss(self, y, y_pred): y_pred = ca.maximum(y_pred, _FLT_MIN) return -ca.mean(y*ca.log(y_pred) + (1 - y)*ca.log(1 - y_pred), axis=1)
def loss(self, target, x1, x2): dists = self.fprop(x1, x2) return target*dists + (1-target)*ca.maximum(self.margin-dists, 0)
def loss(self, y, dists): return y*dists + (1-y)*ca.maximum(self.margin-dists, 0)