def compute_weight(self, module): """Compute weight with equalized learning rate. Args: module (nn.Module): A module that is wrapped with equalized lr. Returns: torch.Tensor: Updated weight. """ weight = getattr(module, self.name + '_orig') if weight.ndim == 5: # weight in shape of [b, out, in, k, k] fan = _calculate_correct_fan(weight[0], self.mode) else: assert weight.ndim <= 4 fan = _calculate_correct_fan(weight, self.mode) weight = weight * torch.tensor( self.gain, device=weight.device) * torch.sqrt( torch.tensor(1. / fan, device=weight.device)) * self.lr_mul return weight
def model_build(trial): model = models.CESN(8) w_res = model.rnn.cell.w_res.data size_res = model.rnn.cell.size_res fan = _calculate_correct_fan(w_res, 'fan_in') gain = calculate_gain('tanh', math.sqrt(5)) std = gain / math.sqrt(fan) bound = math.sqrt(3.0) * std for i in range(size_res): for j in range(size_res): suggest = trial.suggest_uniform(f'w_res[{i}][{j}]', -bound, bound) model.rnn.cell.w_res.data[i][j] = suggest return model
def tds_normal_(tensor, mode='fan_in'): """ Normal Initialization from the paper [Sequence-to-Sequence Speech Recognition with Time-Depth Separable Convolutions](https://www.isca-speech.org/archive/Interspeech_2019/pdfs/2460.pdf) Normalized to - .. math:: \text{bound} = \text{2} \times \sqrt{\frac{1}{\text{fan\_mode}}} Args: tensor: an n-dimensional `torch.Tensor` mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` preserves the magnitude of the variance of the weights in the forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the backwards pass. """ fan = _calculate_correct_fan(tensor, mode) gain = 2.0 std = gain / math.sqrt(fan) # sqrt(4.0 / fan_in) bound = std # Calculate uniform bounds from standard deviation with torch.no_grad(): return tensor.normal_(0.0, bound)
def kaiming_uniform_mod(tensor, a=0, gain=1, mode='fan_in', nonlinearity='leaky_relu'): r"""Fills the input `Tensor` with values according to the method described in `Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification` - He, K. et al. (2015), using a uniform distribution. The resulting tensor will have values sampled from :math:`\mathcal{U}(-\text{bound}, \text{bound})` where .. math:: \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}} Also known as He initialization. Args: tensor: an n-dimensional `torch.Tensor` a: the negative slope of the rectifier used after this layer (only used with ``'leaky_relu'``) mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'`` preserves the magnitude of the variance of the weights in the forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the backwards pass. nonlinearity: the non-linear function (`nn.functional` name), recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default). Examples: >>> w = torch.empty(3, 5) >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu') """ gain_mod = gain # The "gain_mod" mutiplier is my only modification fan = _calculate_correct_fan(tensor, mode) gain = calculate_gain(nonlinearity, a) std = (gain / math.sqrt(fan)) * gain_mod # print(gain, gain_mod) bound = math.sqrt( 3.0) * std # Calculate uniform bounds from standard deviation with torch.no_grad(): return tensor.uniform_(-bound, bound)
def our_kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'): fan = 1.0 * _calculate_correct_fan(tensor, mode) gain = calculate_gain(nonlinearity, a) std = gain / math.sqrt(fan) with torch.no_grad(): return tensor.normal_(0, std)
def selu_init(tensor): import torch.nn.init as init import math fan = init._calculate_correct_fan(tensor, 'fan_in') std = math.sqrt(1 / fan) return tensor.normal_(0, std)
def getUniformKaimingBound(tensor, a = 0, mode='fan_in', nonlinearity='leaky_relu'): fan = torchInit._calculate_correct_fan(tensor, mode) gain = torchInit.calculate_gain(nonlinearity, a) std = gain / math.sqrt(fan) bound = math.sqrt(3.0) * std return bound
def init_parameter(self, parameter): fan = init._calculate_correct_fan(parameter, 'fan_in') init.normal_(parameter, mean=0, std=1.0 / math.sqrt(fan))