def apply_with_lr(self, epoch, lr, grad, value, name, step): '''Update one parameter object. Args: step(int): the accumulated training iterations, not the iteration ID ''' if grad.is_empty(): return value assert step != -1, 'step should >= 0' if epoch != self.last_epoch or step != self.last_step: self.t += 1 grad = self.apply_regularizer_constraint(epoch, value, grad, name, step) if name is not None and name in self.learning_rate_multiplier: lr = lr * self.learning_rate_multiplier[name] if name not in self.m or name not in self.v: self.m[name] = tensor.Tensor(grad.shape, grad.device, grad.dtype) self.m[name].set_value(0) self.v[name] = tensor.Tensor(grad.shape, grad.device, grad.dtype) self.v[name].set_value(0) self.m[name] *= self.beta_1 tensor.axpy(1 - self.beta_1, grad, self.m[name]) self.v[name] *= self.beta_2 tensor.axpy(1 - self.beta_2, tensor.square(grad), self.v[name]) alpha = lr * math.sqrt(1 - math.pow(self.beta_2, self.t)) \ / (1 - math.pow(self.beta_1, self.t)) value -= alpha * self.m[name] / (tensor.sqrt(self.v[name]) + self.epsilon) return value
def evaluate(self, flag, x, y): '''Compuate the averaged error. Returns: a float value as the averaged error ''' return tensor.sum(tensor.square(x - y) * 0.5) / x.size()
def forward(self, is_train, x): norm = tensor.sum_columns(tensor.square(x)) norm += self.epsilon norm = tensor.sqrt(norm) self.y = x.clone() self.y.div_column(norm) if is_train: self.norm = norm return self.y
def forward(self, flag, x, y): '''Compute the error as 0.5 * ||x-y||^2. Args: flag (int): kTrain or kEval; if kTrain, then the backward must be called before calling forward again. x (Tensor): the prediction Tensor y (Tensor): the truth Tensor, an integer value per sample, whose value is [0, x.shape[1]) Returns: a Tensor with one error value per sample ''' self.err = x - y return tensor.square(self.err) * 0.5