Exemplo n.º 1
0
    def compute_weight(self, module):
        """Compute weight with equalized learning rate.

        Args:
            module (nn.Module): A module that is wrapped with equalized lr.

        Returns:
            torch.Tensor: Updated weight.
        """
        weight = getattr(module, self.name + '_orig')
        if weight.ndim == 5:
            # weight in shape of [b, out, in, k, k]
            fan = _calculate_correct_fan(weight[0], self.mode)
        else:
            assert weight.ndim <= 4
            fan = _calculate_correct_fan(weight, self.mode)
        weight = weight * torch.tensor(
            self.gain, device=weight.device) * torch.sqrt(
                torch.tensor(1. / fan, device=weight.device)) * self.lr_mul

        return weight
def model_build(trial):
    model = models.CESN(8)
    w_res = model.rnn.cell.w_res.data
    size_res = model.rnn.cell.size_res
    fan = _calculate_correct_fan(w_res, 'fan_in')
    gain = calculate_gain('tanh', math.sqrt(5))
    std = gain / math.sqrt(fan)
    bound = math.sqrt(3.0) * std
    for i in range(size_res):
        for j in range(size_res):
            suggest = trial.suggest_uniform(f'w_res[{i}][{j}]', -bound, bound)
            model.rnn.cell.w_res.data[i][j] = suggest
    return model
Exemplo n.º 3
0
def tds_normal_(tensor, mode='fan_in'):
    """
    Normal Initialization from the paper [Sequence-to-Sequence Speech Recognition with Time-Depth Separable Convolutions](https://www.isca-speech.org/archive/Interspeech_2019/pdfs/2460.pdf)
    Normalized to -

    .. math::
        \text{bound} = \text{2} \times \sqrt{\frac{1}{\text{fan\_mode}}}

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
    """
    fan = _calculate_correct_fan(tensor, mode)
    gain = 2.0
    std = gain / math.sqrt(fan)  # sqrt(4.0 / fan_in)
    bound = std  # Calculate uniform bounds from standard deviation
    with torch.no_grad():
        return tensor.normal_(0.0, bound)
Exemplo n.º 4
0
def kaiming_uniform_mod(tensor,
                        a=0,
                        gain=1,
                        mode='fan_in',
                        nonlinearity='leaky_relu'):
    r"""Fills the input `Tensor` with values according to the method
    described in `Delving deep into rectifiers: Surpassing human-level
    performance on ImageNet classification` - He, K. et al. (2015), using a
    uniform distribution. The resulting tensor will have values sampled from
    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where

    .. math::
        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}

    Also known as He initialization.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        a: the negative slope of the rectifier used after this layer (only
        used with ``'leaky_relu'``)
        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
            preserves the magnitude of the variance of the weights in the
            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
            backwards pass.
        nonlinearity: the non-linear function (`nn.functional` name),
            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu')
    """
    gain_mod = gain  # The "gain_mod" mutiplier is my only modification
    fan = _calculate_correct_fan(tensor, mode)
    gain = calculate_gain(nonlinearity, a)
    std = (gain / math.sqrt(fan)) * gain_mod
    # print(gain, gain_mod)
    bound = math.sqrt(
        3.0) * std  # Calculate uniform bounds from standard deviation
    with torch.no_grad():
        return tensor.uniform_(-bound, bound)
Exemplo n.º 5
0
def our_kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):
    fan = 1.0 * _calculate_correct_fan(tensor, mode)
    gain = calculate_gain(nonlinearity, a)
    std = gain / math.sqrt(fan)
    with torch.no_grad():
        return tensor.normal_(0, std)
Exemplo n.º 6
0
def selu_init(tensor):
    import torch.nn.init as init
    import math
    fan = init._calculate_correct_fan(tensor, 'fan_in')
    std = math.sqrt(1 / fan)
    return tensor.normal_(0, std)
Exemplo n.º 7
0
def getUniformKaimingBound(tensor, a = 0, mode='fan_in', nonlinearity='leaky_relu'):
	fan = torchInit._calculate_correct_fan(tensor, mode)
	gain = torchInit.calculate_gain(nonlinearity, a)
	std = gain / math.sqrt(fan)
	bound = math.sqrt(3.0) * std
	return bound
Exemplo n.º 8
0
 def init_parameter(self, parameter):
     fan = init._calculate_correct_fan(parameter, 'fan_in')
     init.normal_(parameter, mean=0, std=1.0 / math.sqrt(fan))