def __init__(self, input_shape, momentum=0.1, beta_initializer=normal, gamma_initializer=normal, epsilon=1e-8): """ Batch normalization module. ## Parameters input_shape: `tuple` - shape of input features momentum: `float` - momentum when calculatin exponentially weighted average, defaults to 0.1 beta_initializer: `callable` - defaults to normal gamma_initializer: `callable` - defaults to normal epsilong: `float` - numerical stability constant, defaults to 1e-8 """ super().__init__() self.momentum = fn.to_tensor(momentum) self.beta = Parameter(shape=input_shape, initializer=beta_initializer) self.gamma = Parameter(shape=input_shape, initializer=gamma_initializer) self.u_avg = fn.to_tensor(0) self.std_avg = fn.to_tensor(0) self.epsilon = fn.to_tensor(epsilon)
def __init__(self, parameters, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8): """ Nadam optimizer. """ super().__init__(parameters, lr=lr) self.beta1 = fn.to_tensor(beta1) self.beta2 = fn.to_tensor(beta2) self.epsilon = fn.to_tensor(epsilon) self.ms = [fn.zeros_like(p) for p in self.parameters] self.vs = [fn.zeros_like(p) for p in self.parameters]
def __init__(self, parameters, lr=0.01, momentum=0.9): """ Nesterov accelerated SGD optimizer. """ super().__init__(parameters, lr=lr) self.momentum = fn.to_tensor(momentum) self.vs = [fn.zeros_like(p) for p in self.parameters]
def __init__(self, parameters, lr=0.01, momentum=0.9): """ SGD with Momentum optimizer. """ super().__init__(parameters, lr=lr) self.momentum = fn.to_tensor(momentum) self.vs = [fn.zeros_like(p) for p in self.parameters]
def __init__(self, parameters, lr=0.01, beta=0.99, epsilon=1e-8): """ RMSProp optimizer. """ super().__init__(parameters, lr=lr) self.epsilon = epsilon self.beta = fn.to_tensor(beta) self.E = [fn.zeros_like(p) for p in self.parameters]
def _step(self, epoch): t = fn.to_tensor(epoch) grads = [p.grad for p in self.parameters] for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)): m = self.beta1 * m + (1 - self.beta1) * g v = fn.maximum(self.beta2 * v, fn.abs(g)) lr_t = self.lr / (1 - fn.power(self.beta1, t)) p -= lr_t * m / (v + self.epsilon)
def _step(self, epoch): t = fn.to_tensor(epoch) grads = [p.grad for p in self.parameters] for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)): m = self.beta1*m + (1-self.beta1)*g v = self.beta2*v + (1-self.beta2)*fn.square(g) m_hat = m / (1 - fn.power(self.beta1, t)) v_hat = v / (1 - fn.power(self.beta2, t)) p -= (self.lr * (m_hat * self.beta1 + (1 - self.beta2) / (1 - fn.power(self.beta1, t))*g)) / (fn.sqrt(v_hat) + self.epsilon)
def _step(self, epoch): t = fn.to_tensor(epoch) grads = [p.grad for p in self.parameters] for p, g, m, v, vhat in (zip(self.parameters, grads, self.ms, self.vs, self.vhats)): m = self.beta1 * m + (1 - self.beta1) * g v = self.beta2 * v + (1 - self.beta2) * fn.square(g) vhat = fn.maximum(vhat, v) p -= self.lr * m / (fn.sqrt(vhat) + self.epsilon)
def __init__(self, parameters, lr=0.01, beta1=0.9, beta2=0.999, final_lr=0.1, gamma=1e-3, epsilon=1e-8): """ Adabound optimizer. """ super().__init__(parameters, lr=lr) self.beta1 = fn.to_tensor(beta1) self.beta2 = fn.to_tensor(beta2) self.epsilon = fn.to_tensor(epsilon) self.final_lr = fn.to_tensor(final_lr) self.gamma = fn.to_tensor(gamma) self.ms = [fn.zeros_like(p) for p in self.parameters] self.vs = [fn.zeros_like(p) for p in self.parameters]
def half_quadratic(output: Tensor, target: Tensor): """ Half quadratic loss function. ## Parameters output: `Tensor` - model's prediction target: `Target` - training sample targets ## Example usage ```python from beacon.tensor import Tensor from beacon.functional import functions as F output = Tensor([[0.2, 0.7, 0.1], [0.4, 0.45, 0.15]], requires_grad=True) target = Tensor([[0, 1, 0], [1, 0, 0]], requires_grad=True) loss = F.half_quadratic(output, target) ``` """ output, target = fn.to_tensor(output), fn.to_tensor(target) return 0.5 * fn.square(output - target)
def mean_absolute_error(output: Tensor, target: Tensor): """ Mean absolute error loss function. ## Parameters output: `Tensor` - model's prediction target: `Target` - training sample targets ## Example usage ```python from beacon.tensor import Tensor from beacon.functional import functions as F output = Tensor([[0.2, 0.7, 0.1], [0.4, 0.45, 0.15]], requires_grad=True) target = Tensor([[0, 1, 0], [1, 0, 0]], requires_grad=True) loss = F.mean_absolute_error(output, target) ``` """ output, target = fn.to_tensor(output), fn.to_tensor(target) return fn.mean(fn.abs(output - target), axis=-1)
def _step(self, epoch): t = fn.to_tensor(epoch) step_size = self.lr * (fn.sqrt(1 - fn.power(self.beta2, t)) / (1 - fn.power(self.beta1, t))) lower_bound = self.final_lr * (1.0 - 1.0 / (self.gamma * t + 1)) upper_bound = self.final_lr * (1.0 + 1.0 / (self.gamma * t)) grads = [p.grad for p in self.parameters] for p, g, m, v in (zip(self.parameters, grads, self.ms, self.vs)): m = self.beta1 * m + (1 - self.beta1) * g v = self.beta2 * v + (1 - self.beta2) * fn.square(g) denom = fn.sqrt(v) + self.epsilon p -= m * fn.clip(step_size / denom, lower_bound.item(), upper_bound.item())
def nll_loss(output: Tensor, target: Tensor): """ Negative log likelihood loss function. ## Parameters output: `Tensor` - model's prediction target: `Target` - training sample targets ## Example usage ```python from beacon.tensor import Tensor from beacon.functional import functions as F output = Tensor([[0.2, 0.7, 0.1], [0.4, 0.45, 0.15]], requires_grad=True) target = Tensor([[0, 1, 0], [1, 0, 0]], requires_grad=True) loss = F.nll_loss(output, target) ``` """ output, target = fn.to_tensor(output), fn.to_tensor(target) output = fn.clip(output, 1e-7, 1 - 1e-7) return -target * fn.log(output)
def categorical_crossentropy(output: Tensor, target: Tensor): """ Cross entropy loss function. ## Parameters output: `Tensor` - model's prediction target: `Target` - training sample targets ## Example usage ```python from beacon.tensor import Tensor from beacon.functional import functions as F output = Tensor([[0.2, 0.7, 0.1], [0.4, 0.45, 0.15]], requires_grad=True) target = Tensor([[0, 1, 0], [1, 0, 0]], requires_grad=True) loss = F.categorical_crossentropy(output, target) ``` """ output, target = fn.to_tensor(output), fn.to_tensor(target) output = fn.clip(output, 1e-7, 1 - 1e-7) return target * -fn.log(output)
def binary_crossentropy(output: Tensor, target: Tensor): """ Binary cross entropy loss function. ## Parameters output: `Tensor` - model's prediction target: `Target` - training sample targets ## Example usage ```python from beacon.tensor import Tensor from beacon.functional import functions as F output = Tensor([[0.89], [0.76], [0.1]], requires_grad=True) target = Tensor([[1], [1], [0]], requires_grad=True) loss = F.binary_crossentropy(output, target) ``` """ output, target = fn.to_tensor(output), fn.to_tensor(target) output = fn.clip(output, 1e-7, 1 - 1e-7) return (target * -fn.log(sigmoid(output)) + (1 - target) * -fn.log(1 - sigmoid(output)))
def __call__(self, x): """ Redefined call operator. """ return self.forward(fn.to_tensor(x))