Exemplo n.º 1
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           itr=1,
           fix_parameters=False, rng=None, with_bias=True,
           sn=True, test=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w_sn, b, base_axis)
Exemplo n.º 2
0
def INByBatchNorm(inp,
                  axes=[1],
                  decay_rate=0.9,
                  eps=1e-5,
                  fix_parameters=True):
    """Instance Normalization (implemented using BatchNormalization)
    Instance normalization is equivalent to the batch normalization if a batch size is one, in
    other words, it normalizes over spatial dimension(s), meaning all dimensions except for
    the batch and feature dimension.
    """
    assert len(axes) == 1

    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp,
                                 beta,
                                 gamma,
                                 mean,
                                 var,
                                 axes,
                                 decay_rate,
                                 eps,
                                 batch_stat=True,
                                 output_stat=False)
Exemplo n.º 3
0
def test_get_parameter_with_initializer():
    """Testing with initializer
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    rng = np.random.RandomState(seed=313)
    shape = (8, 8, 3, 3)

    # Instnace inherited from BaseInitializer
    initializer = UniformInitializer(lim=(-1, 1), rng=rng)
    param1 = get_parameter_or_create('param1',
                                     shape,
                                     initializer=initializer,
                                     need_grad=True)
    assert np.min(param1.d > -1) and np.max(param1.d < 1)

    # Numpy array
    initializer = rng.randn(*shape)
    param2 = get_parameter_or_create('param2',
                                     initializer=initializer,
                                     need_grad=True)
    np.allclose(initializer, param2.d)

    # Random
    param3 = get_parameter_or_create('param3', shape, need_grad=True)

    nn.clear_parameters()
Exemplo n.º 4
0
def test_get_parameter_or_create_need_grad():
    """Testing if need_grad flag works not not.
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    param1 = get_parameter_or_create('p/param1', (2, 3, 4, 5), need_grad=True)
    p1d = np.random.randn(*param1.shape).astype(np.float32)
    p1g = np.random.randn(*param1.shape).astype(np.float32)
    param1.d = p1d
    param1.g = p1g
    param1_f = get_parameter_or_create('p/param1',
                                       param1.shape,
                                       need_grad=False)
    assert not param1_f.need_grad
    assert not param1.need_grad
    assert np.all(param1.d == p1d)
    assert np.all(param1.d == param1_f.d)
    param1.d = 1
    assert np.all(param1_f.d == 1)
    param1_f2 = get_parameter_or_create('p/param1',
                                        param1.shape,
                                        need_grad=True,
                                        as_need_grad=False)
    assert param1.need_grad
    assert param1_f.need_grad
    assert not param1_f2.need_grad
    nn.clear_parameters()
Exemplo n.º 5
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                itr=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True,
                sn=True,
                test=False,
                init_scale=1.0):
    """
    """
    if w_init is None:
        l, u = calc_uniform_lim_glorot(inp.shape[base_axis], outmaps,
                                       tuple(kernel))
        l, u = init_scale * l, init_scale * u
        w_init = UniformInitializer((l, u), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] // group) +
                                tuple(kernel), w_init, not fix_parameters)
    w_sn = spectral_normalization_for_conv(w, itr=itr, test=test) if sn else w
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w_sn, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 6
0
    def __init__(self,
                 hparams,
                 comm=None,
                 test=False,
                 recompute=False,
                 init_method=None,
                 input_mean=None,
                 input_scale=None):
        super(D3NetMSS, self).__init__(comm=comm,
                                       test=test,
                                       recompute=recompute,
                                       init_method=init_method)
        self.hparams = hparams
        if input_mean is None or input_scale is None:
            input_mean = np.zeros((1, 1, 1, self.hparams['fft_size'] // 2 + 1))
            input_scale = np.ones((1, 1, 1, self.hparams['fft_size'] // 2 + 1))
        else:
            input_mean = input_mean.reshape(
                (1, 1, 1, self.hparams['fft_size'] // 2 + 1))
            input_scale = input_scale.reshape(
                (1, 1, 1, self.hparams['fft_size'] // 2 + 1))

        self.in_offset = get_parameter_or_create('in_offset',
                                                 shape=input_mean.shape,
                                                 initializer=input_mean)
        self.in_scale = get_parameter_or_create('in_scale',
                                                shape=input_scale.shape,
                                                initializer=input_scale)
        self.decode_scale = get_parameter_or_create(
            'decode_scale', (1, 1, 1, self.hparams['valid_signal_idx']),
            initializer=I.ConstantInitializer(value=1))
        self.decode_bias = get_parameter_or_create(
            'decode_bias', (1, 1, 1, self.hparams['valid_signal_idx']),
            initializer=I.ConstantInitializer(value=1))
Exemplo n.º 7
0
Arquivo: prune.py Projeto: sony/nnabla
    def modify(self, f, inputs):
        if f.info.type_name not in self._fct_set:
            return

        # Prune the weight
        x, w = inputs[:2]
        b = None
        if len(inputs) == 3:
            b = inputs[2]
        output_channel = self.calculate_axis(f)
        shape = list(range(w.ndim))
        shape.pop(output_channel)
        l2_norm_per_channel = np.sum(
            w.d ** 2, axis=tuple(shape), keepdims=True)
        mask = l2_norm_per_channel > self._pruning_threshold

        scope = self.get_parameter_scope(w)
        w_pruned, b_pruned = None, None
        with nn.parameter_scope(scope):
            w_data = w.d * mask
            w_pruned = get_parameter_or_create(
                'w-pruned', w.shape, w_data, True, True)
            if b is not None:
                b_data = b.d * mask.reshape((-1,))
                b_pruned = get_parameter_or_create(
                    'b-pruned', b_data.shape, b_data, True, True)
        h = self._fct_set[f.info.type_name](
            x, w_pruned, b_pruned, **f.info.args)
        return h
def parametric_fixed_point_quantize_b_xmax(x,
                                           sign=True,
                                           n_init=8,
                                           n_min=2,
                                           n_max=16,
                                           xmax_init=1,
                                           xmax_min=0.001,
                                           xmax_max=10,
                                           fix_parameters=False):
    """Parametric version of `fixed_point_quantize` where the
    bitwidth `b` and dynamic range `xmax` are learnable parameters.

    Returns:
        ~nnabla.Variable: N-D array.
    """
    def clip_scalar(v, min_value, max_value):
        return F.minimum_scalar(F.maximum_scalar(v, min_value), max_value)

    def broadcast_scalar(v, shape):
        return F.broadcast(F.reshape(v, (1, ) * len(shape), inplace=False),
                           shape=shape)

    def quantize_pow2(v):
        return 2**F.round(F.log(v) / np.log(2.))

    n = get_parameter_or_create("n", (),
                                ConstantInitializer(n_init),
                                need_grad=True,
                                as_need_grad=not fix_parameters)
    xmax = get_parameter_or_create("xmax", (),
                                   ConstantInitializer(xmax_init),
                                   need_grad=True,
                                   as_need_grad=not fix_parameters)

    # ensure that bitwidth is in specified range and an integer
    n = F.round(clip_scalar(n, n_min, n_max))
    if sign:
        n = n - 1

    # ensure that dynamic range is in specified range
    xmax = clip_scalar(xmax, xmax_min, xmax_max)

    # compute step size from dynamic range and make sure that it is a pow2
    d = quantize_pow2(xmax / (2**n - 1))

    # compute min/max value that we can represent
    if sign:
        xmin = -xmax
    else:
        xmin = nn.Variable((1, ), need_grad=False)
        xmin.d = 0.

    # broadcast variables to correct size
    d = broadcast_scalar(d, shape=x.shape)
    xmin = broadcast_scalar(xmin, shape=x.shape)
    xmax = broadcast_scalar(xmax, shape=x.shape)

    # apply fixed-point quantization
    return d * F.round(F.clip_by_value(x, xmin, xmax) / d)
Exemplo n.º 9
0
def inq_convolution(inp, outmaps, kernel,
                    pad=None, stride=None, dilation=None, group=1,
                    num_bits=4, inq_iterations=(), selection_algorithm='random',
                    seed=-1, w_init=None, i_init=None, b_init=None,
                    base_axis=1, fix_parameters=False, rng=None,
                    with_bias=True):
    """Incremental Network Quantization Convolution Layer

    During training, the weights are sequentially quantized to power-of-two
    values, which allows the training of a multiplierless network.

    Using `inq_iterations`, one can specify after how many forward passes
    half of the learnable weights are fixed and quantized to powers-of-two.
    After reaching the last value in `inq_iterations`, all weights are fixed.

    For more details, please refer to the reference.

    Reference:
    Zhou A, Yao A, Guo Y, Xu L, Chen Y. Incremental network quantization:
    Towards lossless CNNs with low-precision weights.
    <https://arxiv.org/abs/1702.03044>

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        num_bits (int): Number of bits per weight. Value has to be larger than 1 as one bit is already used to code the value "0"
        inq_iterations (tuple of int): Tuple of iteration numbers at which we fix half of the weights.
        selection_algorithm (str): Chooses algorithm that is used to decide which weights are fixed. ("largest_abs" ... fix weights with largest absolute value, "random" ... fix weights randomly)
        seed (int): Random seed for INQ algorithm
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        i_init (~nnabla.initializer.BaseInitializer): Initializer for the indicators (0 ... learnable, 1 ... fixed).
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if i_init is None:
        i_init = ConstantInitializer()
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    i = get_parameter_or_create(
        "I", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        i_init, False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.inq_convolution(inp, w, i, b, base_axis, pad, stride, dilation, group, num_bits, inq_iterations, selection_algorithm, seed)
Exemplo n.º 10
0
def create_scale_bias(idx, maps, ndim=4):
    shape = [1] * ndim
    shape[1] = maps
    a = get_parameter_or_create("a{}".format(idx), list(shape), None, True,
                                True)
    b = get_parameter_or_create("b{}".format(idx), list(shape), None, True,
                                True)
    return a, b
Exemplo n.º 11
0
def conv(inp,
         outmaps,
         kernel,
         pad=None,
         stride=None,
         dilation=None,
         group=1,
         w_init=None,
         b_init=None,
         base_axis=1,
         fix_parameters=False,
         rng=None,
         with_bias=True,
         use_wscale=True,
         use_he_backward=False):
    """
    """
    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis],
                                          outmaps,
                                          kernel=kernel)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis],
                                         outmaps,
                                         kernel=kernel)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], outmaps, tuple(kernel)),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
            w_init, not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)

    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 12
0
def dummy_parametric_function(shape, f=10, i=1, s="dummy"):
    """Doc"""
    from nnabla import Variable
    from nnabla.parameter import get_parameter_or_create
    from nnabla.initializer import UniformInitializer
    p1 = get_parameter_or_create("p1", shape, UniformInitializer((-1, 1)))
    p2 = get_parameter_or_create(
        "p2", shape + (1,), UniformInitializer((-1, 1)))
    return Variable(shape)
Exemplo n.º 13
0
def dummy_parametric_function(shape, f=10, i=1, s="dummy"):
    """Doc"""
    from nnabla import Variable
    from nnabla.parameter import get_parameter_or_create
    from nnabla.initializer import UniformInitializer
    p1 = get_parameter_or_create("p1", shape, UniformInitializer((-1, 1)))
    p2 = get_parameter_or_create("p2", shape + (1, ),
                                 UniformInitializer((-1, 1)))
    return Variable(shape)
Exemplo n.º 14
0
def CCBN(h,
         y,
         n_classes,
         decay_rate=0.999,
         test=False,
         fix_parameters=False,
         coefs=[1.0]):
    """Categorical Conditional Batch Normaliazation"""
    # Call the batch normalization once
    shape_stat = [1 for _ in h.shape]
    shape_stat[1] = h.shape[1]
    gamma_tmp = nn.Variable.from_numpy_array(np.ones(shape_stat))
    beta_tmp = nn.Variable.from_numpy_array(np.zeros(shape_stat))
    mean = get_parameter_or_create("mean", shape_stat,
                                   ConstantInitializer(0.0), False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(1.0),
                                  False)
    h = F.batch_normalization(h,
                              beta_tmp,
                              gamma_tmp,
                              mean,
                              var,
                              decay_rate=decay_rate,
                              batch_stat=not test)

    # Condition the gamma and beta with the class label
    b, c = h.shape[0:2]

    def embed_func(y, initializer):
        if type(y) != list:
            o = embed(y,
                      n_classes,
                      c,
                      initializer=initializer,
                      sn=False,
                      test=test)
        else:
            y_list = y
            o = reduce(lambda x, y: x + y, [
                coef * embed(y,
                             n_classes,
                             c,
                             initializer=initializer,
                             sn=False,
                             test=test) for coef, y in zip(coefs, y_list)
            ])
        return o

    with nn.parameter_scope("gamma"):
        gamma = embed_func(y, ConstantInitializer(1.0))
        gamma = F.reshape(gamma, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        gamma = F.broadcast(gamma, h.shape)
    with nn.parameter_scope("beta"):
        beta = embed_func(y, ConstantInitializer(0.0))
        beta = F.reshape(beta, [b, c] + [1 for _ in range(len(h.shape[2:]))])
        beta = F.broadcast(beta, h.shape)
    return gamma * h + beta
Exemplo n.º 15
0
def LN(inp, fix_parameters=False):
    """Layer normalization.
    """
    beta_shape = (1, inp.shape[1], 1, 1)
    gamma_shape = (1, inp.shape[1], 1, 1)
    beta = get_parameter_or_create("beta", beta_shape, ConstantInitializer(0),
                                   not fix_parameters)
    gamma = get_parameter_or_create("gamma", gamma_shape,
                                    ConstantInitializer(1), not fix_parameters)
    return f_layer_normalization(inp, beta, gamma)
Exemplo n.º 16
0
def convolution(inp,
                outmaps,
                kernel,
                pad=None,
                stride=None,
                dilation=None,
                group=1,
                w_init=None,
                b_init=None,
                base_axis=1,
                fix_parameters=False,
                rng=None,
                with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            inp.shape[base_axis], outmaps, tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (outmaps, inp.shape[base_axis] / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 17
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True,
           use_wscale=True,
           use_he_backward=False):
    """
    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))

    # Use He backward
    if use_he_backward:
        std = calc_normal_std_he_backward(inp.shape[base_axis], n_outmap)
    else:
        std = calc_normal_std_he_forward(inp.shape[base_axis], n_outmap)

    # W init
    if w_init is None and use_wscale:
        # Equalized Learning Rate
        w_init = NormalInitializer(1.)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
        w *= std
    elif w_init is None and not use_wscale:
        w_init = NormalInitializer(std)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)
    else:
        if w_init is None:
            w_init = UniformInitializer(calc_uniform_lim_glorot(
                inp.shape[base_axis], n_outmaps),
                                        rng=rng)
        w = get_parameter_or_create(
            "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps, w_init,
            not fix_parameters)

    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)

    return F.affine(inp, w, b, base_axis)
Exemplo n.º 18
0
def noisy_layer(x, out_size, name):
    inpt_size = x.shape[1]
    root_p = np.sqrt(inpt_size)
    mu_init = UniformInitializer((-1.0 / root_p, 1.0 / root_p))
    sig_init = ConstantInitializer(0.5 / root_p)
    eps_w, eps_b = sample_noise(inpt_size, out_size)
    with nn.parameter_scope(name):
        mu_w = get_parameter_or_create('mu_w', (inpt_size, out_size), mu_init)
        sig_w = get_parameter_or_create('sig_w', (inpt_size, out_size),
                                        sig_init)
        mu_b = get_parameter_or_create('mu_b', (out_size, ), mu_init)
        sig_b = get_parameter_or_create('sig_b', (out_size, ), sig_init)
    return F.affine(x, mu_w + sig_w * eps_w, mu_b + sig_b * eps_b)
Exemplo n.º 19
0
def batch_normalization(inp,
                        axes=[1],
                        decay_rate=0.9,
                        eps=1e-5,
                        batch_stat=True,
                        output_stat=False):
    """
    Batch normalization layer.

    .. math::
        \\begin{array}{lcl}
        \\mu &=& \\frac{1}{M} \\sum x_i\\\\
        \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\
        \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\
        y_i &=& \\hat{x}_i \\gamma + \\beta.
        \\end{array}

    where :math:`x_i, y_i` are the inputs.
    In testing, the mean and variance computed by moving average calculated during training are used.

    Args:
        inp (~nnabla.Variable): N-D array of input.
        axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken.
        decay_rate (float): Decay rate of running mean and variance.
        eps (float): Tiny value to avoid zero division by std.
        batch_stat (bool): Use mini-batch statistics rather than running ones.
        output_stat (bool): Output batch mean and variance.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    References:

        - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167

    """
    assert len(axes) == 1
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create("beta", shape_stat, ConstantInitializer(0),
                                   True)
    gamma = get_parameter_or_create("gamma", shape_stat,
                                    ConstantInitializer(1), True)
    mean = get_parameter_or_create("mean", shape_stat, ConstantInitializer(0),
                                   False)
    var = get_parameter_or_create("var", shape_stat, ConstantInitializer(0),
                                  False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes, decay_rate,
                                 eps, batch_stat, output_stat)
Exemplo n.º 20
0
def deconvolution(inp,
                  outmaps,
                  kernel,
                  pad=None,
                  stride=None,
                  dilation=None,
                  group=1,
                  w_init=None,
                  b_init=None,
                  base_axis=1,
                  fix_parameters=False,
                  rng=None,
                  with_bias=True):
    """
    Deconvolution layer.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(calc_uniform_lim_glorot(
            outmaps, inp.shape[base_axis], tuple(kernel)),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", (inp.shape[base_axis], outmaps / group) +
                                tuple(kernel), w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", (outmaps, ), b_init,
                                    not fix_parameters)
    return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 21
0
def BN(inp, axes=[1], decay_rate=0.9, eps=1e-5,
       batch_stat=True, output_stat=False, fix_parameters=False):
    """Batch Normalization
    """
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create(
        "beta", shape_stat, ConstantInitializer(0), not fix_parameters)
    gamma = get_parameter_or_create(
        "gamma", shape_stat, ConstantInitializer(1), not fix_parameters)
    mean = get_parameter_or_create(
        "mean", shape_stat, ConstantInitializer(0), False)
    var = get_parameter_or_create(
        "var", shape_stat, ConstantInitializer(0), False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes,
                                 decay_rate, eps, batch_stat, output_stat)
Exemplo n.º 22
0
def spectral_normalization_for_conv(w, itr=1, eps=1e-12, test=False):
    w_shape = w.shape
    d0 = w.shape[0]            # Out
    d1 = np.prod(w.shape[1:])  # In
    u0 = get_parameter_or_create(
        "singular-vector", [d0], NormalInitializer(), False)
    return F.spectral_norm(w, u0, dim=0, itr=itr, eps=eps, test=test)
Exemplo n.º 23
0
    def svd_convolution(x, n_outputs, kernel, pad, with_bias, cr):
        W = get_parameter('conv/W')

        if W is None:
            UV = None
        else:
            UV = W.d
        b = get_parameter('conv/b')
        # compute rank (size of intermediate activations)
        # to obtained desired reduction
        inmaps = x.shape[1]
        outmaps = n_outputs
        Ksize = np.prod(kernel)
        rank = int(
            np.floor((1 - cr) * inmaps * outmaps * Ksize /
                     (inmaps * Ksize + inmaps * outmaps)))

        # Initialize bias to existing b in affine if exists
        if b is not None:
            b_new = get_parameter_or_create('svd_conv/b',
                                            b.d.shape,
                                            need_grad=b.need_grad)
            b_new.d = b.d.copy()
        logger.info(
            "SVD convolution created: inmaps = {}; outmaps = {}; compression = {}; rank = {};"
            .format(inmaps, outmaps, cr, rank))

        # create svd_convolution initialized from W in current context if it exists
        return PF.svd_convolution(x,
                                  n_outputs,
                                  kernel=kernel,
                                  r=rank,
                                  pad=pad,
                                  with_bias=with_bias,
                                  uv_init=UV)
Exemplo n.º 24
0
def test_parameter_scope_slash():
    """Testing if parameter_scope('aaa/bbb') works.
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    with nn.parameter_scope('aaa/bbb'):
        param = get_parameter_or_create('ccc', (2, 3, 4, 5))
    ref = np.random.randn(*param.shape).astype(np.float32)
    param.d = ref

    with nn.parameter_scope('aaa'):
        with nn.parameter_scope('bbb'):
            param = get_parameter_or_create('ccc', (2, 3, 4, 5))
    assert np.all(param.d == ref)
    nn.clear_parameters()
Exemplo n.º 25
0
def embed(inp,
          n_inputs,
          n_features,
          initializer=None,
          fix_parameters=False,
          apply_w=None):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor. Weights are
    initialized with :obj:`nnabla.initializer.UniformInitializer` within
    the range of :math:`-\\sqrt{3}` and :math:`\\sqrt{3}`.

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape
            :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
        fix_parameters (bool): When set to `True`, the embedding weight matrix
            will not be updated.
        apply_w (function): Lambda, function, or callable object applied to
            the weights.

    Returns:
        ~nnabla.Variable: Output with shape
            :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    if initializer is None:
        initializer = UniformInitializer((-np.sqrt(3.), np.sqrt(3)))
    w = get_parameter_or_create("W", [n_inputs, n_features], initializer, True,
                                not fix_parameters)
    if apply_w is not None:
        w = apply_w(w)
    return F.embed(inp, w)
Exemplo n.º 26
0
def attnblock(h, r=8, fix_parameters=False, sn=True, test=False):
    """Attention block"""
    x = h

    # 1x1 convolutions
    b, c, s0, s1 = h.shape
    c_r = c // r
    assert c_r > 0
    f_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="f",
                      with_bias=False, sn=sn, test=test)
    g_x = convolution(h, c_r, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="g",
                      with_bias=False, sn=sn, test=test)
    h_x = convolution(h, c, kernel=(1, 1), pad=(0, 0), stride=(1, 1), name="h",
                      with_bias=False, sn=sn, test=test)

    # Attend
    attn = F.batch_matmul(f_x.reshape(
        [b, c_r, -1]), g_x.reshape([b, c_r, -1]), transpose_a=True)
    attn = F.softmax(attn, 1)
    h_x = h_x.reshape([b, c, -1])
    o = F.batch_matmul(h_x, attn)
    o = F.reshape(o, [b, c, s0, s1])

    # Shortcut
    gamma = get_parameter_or_create(
        "gamma", [1, 1, 1, 1], ConstantInitializer(0.), not fix_parameters)
    y = gamma * o + x
    return y
Exemplo n.º 27
0
    def svd_affine(x, n_outputs, cr):
        W = get_parameter('affine/W')

        if W is None:
            UV = None
        else:
            UV = W.d
        b = get_parameter('affine/b')
        # compute rank (size of intermediate activations)
        # to obtained desired reduction
        inshape = np.prod(x.shape[1:])
        outshape = np.prod(n_outputs)
        rank = int(
            np.floor((1 - cr) * inshape * outshape / (inshape + outshape)))

        # Initialize bias to existing b in affine if exists
        if b is not None:
            b_new = get_parameter_or_create('svd_affine/b',
                                            b.d.shape,
                                            need_grad=b.need_grad)
            b_new.d = b.d.copy()
        logger.info(
            "SVD affine created: input_shape = {}; output_shape = {}; compression = {}; rank = {};"
            .format(inshape, outshape, cr, rank))

        # create svd_affine initialized from W in current context if it exists
        return PF.svd_affine(x, n_outputs, rank, uv_init=UV)
Exemplo n.º 28
0
def test_parameter_scope_slash():
    """Testing if parameter_scope('aaa/bbb') works.
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    with nn.parameter_scope('aaa/bbb'):
        param = get_parameter_or_create('ccc', (2, 3, 4, 5))
    ref = np.random.randn(*param.shape).astype(np.float32)
    param.d = ref

    with nn.parameter_scope('aaa'):
        with nn.parameter_scope('bbb'):
            param = get_parameter_or_create('ccc', (2, 3, 4, 5))
    assert np.all(param.d == ref)
    nn.clear_parameters()
Exemplo n.º 29
0
    def modify(self, f, inputs):
        fname = f.info.type_name
        if not fname in self._fct_set:
            return

        # Next or Previous func is not BatchNorm
        next_func = f.outputs[0].function_references[0]
        prev_func = f.inputs[0].parent
        if (prev_func == None
                or prev_func.info.type_name != 'BatchNormalization') \
                and next_func.info.type_name != 'BatchNormalization':
            return

        x = inputs[0]
        w = inputs[1]
        b = inputs[2] if len(inputs) == 3 else None

        if b is not None:
            return

        scope = self.get_parameter_scope(w)
        n_outmaps = w.shape[1] if fname == 'Affine' else w.shape[0]
        with nn.parameter_scope(scope):
            b = get_parameter_or_create('b', (n_outmaps, ),
                                        ConstantInitializer(), True, True)
        h = self.connect(f, x, w, b)
        return h
Exemplo n.º 30
0
def affine(inp,
           n_outmaps,
           base_axis=1,
           w_init=None,
           b_init=None,
           fix_parameters=False,
           rng=None,
           with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(calc_uniform_lim_glorot(inmaps, n_outmap),
                                    rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create("W", [int(np.prod(inp.shape[base_axis:]))] +
                                n_outmaps, w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create("b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
Exemplo n.º 31
0
def affine(inp, n_outmaps,
           base_axis=1,
           w_init=None, b_init=None,
           fix_parameters=False, rng=None, with_bias=True):
    """
    The affine layer, also known as the fully connected layer. Computes

    .. math::
        {\\mathbf y} = {\\mathbf A} {\\mathbf x} + {\\mathbf b}.

    where :math:`{\\mathbf x}, {\\mathbf y}` are the inputs and outputs respectively,
    and :math:`{\\mathbf A}, {\\mathbf b}` are constants.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (:obj:`int` or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: :math:`(B + 1)`-D array. (:math:`M_0 \\times \ldots \\times M_{B-1} \\times L`)f

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        inmaps = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inmaps, n_outmap), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.affine(inp, w, b, base_axis)
Exemplo n.º 32
0
def batch_normalization(inp, axes=[1], decay_rate=0.9, eps=1e-5,
                        batch_stat=True, output_stat=False):
    """
    Batch normalization layer.

    .. math::
        \\begin{array}{lcl}
        \\mu &=& \\frac{1}{M} \\sum x_i\\\\
        \\sigma^2 &=& \\frac{1}{M} \\left(\\sum x_i - \\mu\\right)^2\\\\
        \\hat{x}_i &=& \\frac{x_i - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\\\
        y_i &=& \\hat{x}_i \\gamma + \\beta.
        \\end{array}

    where :math:`x_i, y_i` are the inputs.
    In testing, the mean and variance computed by moving average calculated during training are used.

    Args:
        inp (~nnabla.Variable): N-D array of input.
        axes (:obj:`tuple` of :obj:`int`): Axes mean and variance are taken.
        decay_rate (float): Decay rate of running mean and variance.
        eps (float): Tiny value to avoid zero division by std.
        batch_stat (bool): Use mini-batch statistics rather than running ones.
        output_stat (bool): Output batch mean and variance.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    References:

        - Ioffe and Szegedy, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. https://arxiv.org/abs/1502.03167

    """
    assert len(axes) == 1
    shape_stat = [1 for _ in inp.shape]
    shape_stat[axes[0]] = inp.shape[axes[0]]
    beta = get_parameter_or_create(
        "beta", shape_stat, ConstantInitializer(0), True)
    gamma = get_parameter_or_create(
        "gamma", shape_stat, ConstantInitializer(1), True)
    mean = get_parameter_or_create(
        "mean", shape_stat, ConstantInitializer(0), False)
    var = get_parameter_or_create(
        "var", shape_stat, ConstantInitializer(0), False)
    return F.batch_normalization(inp, beta, gamma, mean, var, axes,
                                 decay_rate, eps, batch_stat, output_stat)
Exemplo n.º 33
0
def convolution(inp, outmaps, kernel,
                pad=None, stride=None, dilation=None, group=1,
                w_init=None, b_init=None,
                base_axis=1, fix_parameters=False, rng=None, with_bias=True):
    """
    N-D Convolution with a bias term.

    For Dilated Convolution (a.k.a. Atrous Convolusion), refer to:

    - Chen et al., DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. https://arxiv.org/abs/1606.00915

    - Yu et al., Multi-Scale Context Aggregation by Dilated Convolutions. https://arxiv.org/abs/1511.07122

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels more sparse by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis] / group) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.convolution(inp, w, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 34
0
def embed(inp, n_inputs, n_features, initializer=None,
          itr=1, fix_parameters=False, sn=True, test=False):
    """
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                initializer, not fix_parameters)
    w_sn = spectral_normalization_for_affine(
        w, itr=itr, test=test) if sn else w
    return F.embed(inp, w_sn)
Exemplo n.º 35
0
def _init_beta_gamma(shape, fix_parameters, param_init, no_bias, no_scale):
    from nnabla.parameter import get_parameter_or_create
    from nnabla.initializer import ConstantInitializer

    if no_bias:
        beta = None
    else:
        beta_init = param_init.get('beta', ConstantInitializer(0))
        beta = get_parameter_or_create("beta", shape, beta_init, True,
                                       not fix_parameters)

    if no_scale:
        gamma = None
    else:
        gamma_init = param_init.get('gamma', ConstantInitializer(1))
        gamma = get_parameter_or_create("gamma", shape, gamma_init, True,
                                        not fix_parameters)

    return beta, gamma
Exemplo n.º 36
0
def test_get_parameter_or_create_need_grad():
    """Testing if need_grad flag works not not.
    """
    import nnabla as nn
    from nnabla.parameter import get_parameter_or_create
    nn.clear_parameters()
    param1 = get_parameter_or_create('param1', (2, 3, 4, 5), need_grad=True)
    p1d = np.random.randn(*param1.shape).astype(np.float32)
    p1g = np.random.randn(*param1.shape).astype(np.float32)
    param1.d = p1d
    param1.g = p1g
    param1_f = get_parameter_or_create('param1', param1.shape, need_grad=False)
    assert not param1_f.need_grad
    assert param1.need_grad
    assert np.all(param1.d == p1d)
    assert np.all(param1.d == param1_f.d)
    param1.d = 1
    assert np.all(param1_f.d == 1)
    nn.clear_parameters()
Exemplo n.º 37
0
def _create_variable(v, name, shape):
    # Create and initialize variables
    class Variable:
        pass

    parameter = v.type == "Parameter"
    variable_instance = None
    if parameter:
        if v.initializer.type == 'Normal':
            initializer = NormalInitializer(v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHe' or v.initializer.type == 'NormalAffineHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalAffineGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHe' or v.initializer.type == 'NormalConvolutionHeForward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_forward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionHeBackward':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_he_backward(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'NormalConvolutionGlorot':
            initializer = (lambda shape: NormalInitializer(calc_normal_std_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Uniform':
            initializer = UniformInitializer(
                lim=[-v.initializer.multiplier, v.initializer.multiplier])
        elif v.initializer.type == 'UniformAffineGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[0], numpy.prod(shape[1:])))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'UniformConvolutionGlorot':
            initializer = (lambda shape: UniformInitializer(calc_uniform_lim_glorot(
                shape[1], shape[0], kernel=shape[2:]))(shape) * v.initializer.multiplier)
        elif v.initializer.type == 'Constant':
            initializer = ConstantInitializer(value=v.initializer.multiplier)
        else:
            initializer = None
        variable_instance = get_parameter_or_create(name, shape, initializer)
    else:
        # create empty variable, memory will be allocated in network.setup()
        # after network optimization
        variable_instance = nn.Variable()

    variable = Variable()
    variable.name = name
    variable.parameter = parameter
    variable.shape = shape
    variable.variable_instance = variable_instance

    return variable
Exemplo n.º 38
0
def deconvolution(inp, outmaps, kernel,
                  pad=None, stride=None, dilation=None, group=1,
                  w_init=None, b_init=None,
                  base_axis=1, fix_parameters=False, rng=None, with_bias=True):
    """
    Deconvolution layer.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of deconvolution kernels (which is equal to the number of output channels). For example, to apply deconvolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply deconvolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`: N-D array.

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(outmaps, inp.shape[base_axis], tuple(kernel)), rng=rng)
    if with_bias and b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (inp.shape[base_axis], outmaps / group) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.deconvolution(inp, w, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 39
0
def embed(inp, n_inputs, n_features):
    """ Embed.

    Embed slices a matrix/tensor with indexing array/tensor

    Args:
        x(~nnabla.Variable): [Integer] Indices with shape :math:`(I_0, ..., I_N)`
        n_inputs : number of possible inputs, words or vocabraries
        n_features : number of embedding features
    Returns:
        ~nnabla.Variable: Output with shape :math:`(I_0, ..., I_N, W_1, ..., W_M)`
    """
    w = get_parameter_or_create("W", [n_inputs, n_features],
                                UniformInitializer((-np.sqrt(3.), np.sqrt(3))), True)
    return F.embed(inp, w)
Exemplo n.º 40
0
def prelu(inp, base_axis=1, shared=True):
    """
    Parametrized Rectified Linear Unit function defined as

    .. math::
        y_i = \max(0, x_i) + w_i \min(0, -x_i)

    where nagative slope :math:`w` is learned and can vary accros channels (an
    axis specified with base_axis).

    Args:
        x(~nnabla.Variable): N-D array as input
        base_axis(int): Dimensions up to base_axis is treated as sample dimension.
        shared(bool): Use shared weight value or not 

    Returns:
        ~nnabla.Variable: N-D array.

    """
    shape = tuple() if shared else inp.shape[base_axis]
    w = get_parameter_or_create("W", shape,
                                ConstantInitializer(-1), True)
    return F.prelu(inp, w, base_axis)
Exemplo n.º 41
0
def binary_connect_convolution(inp, outmaps, kernel,
                               pad=None, stride=None, dilation=None, group=1,
                               w_init=None, wb_init=None, b_init=None,
                               base_axis=1, fix_parameters=False, rng=None,
                               with_bias=True):
    """Binary Connect Convolution, multiplier-less inner-product.

    Binary Connect Convolution is the convolution function, 
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_{n, a, b} = \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with BatchNormalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.binary_connect_convolution(inp, w, wb, b, base_axis, pad, stride, dilation, group)
Exemplo n.º 42
0
def binary_connect_affine(inp, n_outmaps,
                          base_axis=1,
                          w_init=None, wb_init=None, b_init=None,
                          fix_parameters=False, rng=None, with_bias=True):
    """Binary Connect Affine, multiplier-less inner-product.

    Binary Connect Affine is an affine function,
    except the definition of the inner product is modified.
    The input-output relation of this function is as follows:

    .. math::

        y_i = \sum_{i} sign(w_i) x_i.

    Therefore :math:`sign(w_i)` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition.

    This function should be used together with Batch Normalization.

    References:

        M. Courbariaux, Y. Bengio, and J.-P. David. "BinaryConnect:
        Training Deep Neural Networks with binary weights during propagations."
        Advances in Neural Information Processing Systems. 2015.

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it is a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        wb_init, not fix_parameters)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.binary_connect_affine(inp, w, wb, b, base_axis)
Exemplo n.º 43
0
def binary_weight_affine(inp, n_outmaps,
                         base_axis=1,
                         w_init=None, wb_init=None, b_init=None,
                         fix_parameters=False, rng=None, with_bias=True):
    """Binary Weight Affine, multiplier-less inner-product with a scale factor.

    Binary Weight Affine is the affine function, but the inner product
    in this function is the following,

    .. math::

        y_j = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}} \sum_{i} sign(w_{ji}) x_i

    Therefore :math:`sign(w_{ji})` is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_j\\|_{\\ell_1}}`.
    The number of ::math:`\\alpha` is the outmaps of the affine function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): Input N-D array with shape (:math:`M_0 \\times \ldots \\times M_{B-1} \\times D_B \\times \ldots \\times D_N`). Dimensions before and after base_axis are flattened as if it was a matrix.
        n_outmaps (int or :obj:`tuple` of :obj:`int`): Number of output neurons per data.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for the weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for the binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for the bias.
        fix_parameters (bool): When set to `True`, the weight and bias will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if not hasattr(n_outmaps, '__iter__'):
        n_outmaps = [n_outmaps]
    n_outmaps = list(n_outmaps)
    n_outmap = int(np.prod(n_outmaps))
    if w_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if wb_init is None:
        fan_in = np.prod(inp.shape[base_axis:])
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(fan_in, n_outmap), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", [int(np.prod(inp.shape[base_axis:]))] + n_outmaps,
        wb_init, not fix_parameters)
    alpha = get_parameter_or_create(
        "alpha", n_outmaps, ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", n_outmaps, b_init, not fix_parameters)
    return F.binary_weight_affine(inp, w, wb, alpha, b, base_axis)
Exemplo n.º 44
0
def binary_weight_convolution(inp, outmaps, kernel,
                              pad=None, stride=None, dilation=None, group=1,
                              w_init=None, wb_init=None, b_init=None,
                              base_axis=1, fix_parameters=False, rng=None,
                              with_bias=True):
    """Binary Weight Convolution, multiplier-less inner-product with a scale factor.

    Binary Weight Convolution is the convolution function, but the
    inner product in this function is the following,

    .. math::

        y_{n, a, b} = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}} \sum_{m} \sum_{i} \sum_{j} sign(w_{n, m, i, j}) x_{m, a + i, b + j}.


    Therefore :math:`sign(w_{n, m, i, j})`  is either :math:`1` or :math:`-1` and the inner product
    simplifies to addition followed by scaling factor :math:`\\alpha = \\frac{1}{\\|\\mathbf{w}_n\\|_{\\ell_1}}`.
    The number of :math:`n` is the number of outmaps of the convolution
    function.

    References:

        Rastegari, Mohammad, et al. "XNOR-Net: ImageNet Classification Using
        Binary Convolutional Neural Networks." arXiv preprint
        arXiv:1603.05279 (2016).

    .. note::

        1) if you would like to share weights between some layers, please
        make sure to share the standard, floating value weights (`weight`)
        and not the binarized weights (`binary_weight`)

        2) The weights and the binary weights become synced only after :func:`~nnabla._variable.Variable.forward` is called,
        and not after a call to :func:`~nnabla._variable.Variable.backward`.
        To access the parameters of the network, remember to call :func:`~nnabla._variable.Variable.forward` once before doing so, otherwise the
        float weights and the binary weights will not be in sync.

        3) CPU and GPU implementations now use float value for `binary_weight`,
        since this function is only for simulation purposes.

    Args:
        inp (~nnabla.Variable): N-D array.
        outmaps (int): Number of convolution kernels (which is equal to the number of output channels). For example, to apply convolution on an input with 16 types of filters, specify 16.
        kernel (:obj:`tuple` of :obj:`int`): Convolution kernel size. For example, to apply convolution on an image with a 3 (height) by 5 (width) two-dimensional kernel, specify (3,5).
        pad (:obj:`tuple` of :obj:`int`): Padding sizes for dimensions.
        stride (:obj:`tuple` of :obj:`int`): Stride sizes for dimensions.
        dilation (:obj:`tuple` of :obj:`int`): Dilation sizes for dimensions.
        group (int): Number of groups of channels. This makes connections across channels sparser by grouping connections along map direction.
        w_init (~nnabla.initializer.BaseInitializer): Initializer for weight.
        wb_init (~nnabla.initializer.BaseInitializer): Initializer for binary weight.
        b_init (~nnabla.initializer.BaseInitializer): Initializer for bias.
        base_axis (int): Dimensions up to `base_axis` are treated as the sample dimensions.
        fix_parameters (bool): When set to `True`, the weights and biases will not be updated.
        rng (numpy.random.RandomState): Random generator for Initializer.
        with_bias (bool): Specify whether to include the bias term.

    Returns:
        :class:`~nnabla.Variable`

    """
    if w_init is None:
        w_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if wb_init is None:
        wb_init = UniformInitializer(
            calc_uniform_lim_glorot(inp.shape[base_axis], outmaps, tuple(kernel)), rng=rng)
    if b_init is None:
        b_init = ConstantInitializer()
    w = get_parameter_or_create(
        "W", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    wb = get_parameter_or_create(
        "Wb", (outmaps, inp.shape[base_axis]) + tuple(kernel),
        w_init, not fix_parameters)
    alpha = get_parameter_or_create(
        "alpha", (outmaps, ), ConstantInitializer(0), False)
    b = None
    if with_bias:
        b = get_parameter_or_create(
            "b", (outmaps,), b_init, not fix_parameters)
    return F.binary_weight_convolution(inp, w, wb, alpha, b, base_axis, pad, stride, dilation, group)