def __call__(self, x, **kwargs): """__call__(self, x, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluation during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', False)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ argument.check_unexpected_kwargs( kwargs, test='test argument is not supported anymore. ' 'Use chainer.using_config') finetune, = argument.parse_kwargs(kwargs, ('finetune', False)) if hasattr(self, 'gamma'): gamma = self.gamma else: with cuda.get_device_from_id(self._device_id): gamma = variable.Variable(self.xp.ones( self.avg_mean.shape, dtype=x.dtype)) if hasattr(self, 'beta'): beta = self.beta else: with cuda.get_device_from_id(self._device_id): beta = variable.Variable(self.xp.zeros( self.avg_mean.shape, dtype=x.dtype)) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay ret = functions.batch_normalization( x, gamma, beta, eps=self.eps, running_mean=self.avg_mean, running_var=self.avg_var, decay=decay) else: # Use running average statistics or fine-tuned statistics. mean = variable.Variable(self.avg_mean) var = variable.Variable(self.avg_var) ret = functions.fixed_batch_normalization( x, gamma, beta, mean, var, self.eps) return ret
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, activation=relu.relu, *args, **kwargs): # If `args` is not empty, users assume the API for v1 and # specify `wscale` as a positonal argument, which we want # to detect and forbid with an explicit error message. msg = ('wscale is not supported anymore. ' 'Use conv_init and bias_init argument to change ' 'the scale of initial parameters.') if args: raise TypeError(msg) argument.check_unexpected_kwargs(kwargs, wscale=msg) conv_init, bias_init = argument.parse_kwargs( kwargs, ('conv_init', None), ('bias_init', None)) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None assert len(out_channels) > 0 convs = [convolution_2d.Convolution2D( in_channels, out_channels[0], ksize, stride, pad, initialW=conv_init, initial_bias=bias_init)] for n_in, n_out in zip(out_channels, out_channels[1:]): convs.append(convolution_2d.Convolution2D( n_in, n_out, 1, initialW=conv_init, initial_bias=bias_init)) super(MLPConvolution2D, self).__init__(*convs) self.activation = activation
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, initialW=None, initial_bias=None, **kwargs): super(Convolution2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.out_channels = out_channels with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def __init__(self, targets, max_sample_size=1000, report_data=True, report_grad=True, plot_mean=True, plot_std=True, percentile_sigmas=( 0, 0.13, 2.28, 15.87, 50, 84.13, 97.72, 99.87, 100), trigger=(1, 'epoch'), filename='statistics.png', figsize=None, marker=None, grid=True, **kwargs): file_name, = argument.parse_kwargs( kwargs, ('file_name', 'statistics.png') ) if filename is None: filename = file_name if filename is None: raise ValueError('Missing output file name of statstics plot') self._vars = _unpack_variables(targets) if len(self._vars) == 0: raise ValueError( 'Need at least one variables for which to collect statistics.' '\nActual: 0 <= 0') if not any((plot_mean, plot_std, bool(percentile_sigmas))): raise ValueError('Nothing to plot') self._keys = [] if report_data: self._keys.append('data') if report_grad: self._keys.append('grad') self._report_data = report_data self._report_grad = report_grad self._statistician = Statistician( collect_mean=plot_mean, collect_std=plot_std, percentile_sigmas=percentile_sigmas) self._plot_mean = plot_mean self._plot_std = plot_std self._plot_percentile = bool(percentile_sigmas) self._trigger = trigger_module.get_trigger(trigger) self._filename = filename self._figsize = figsize self._marker = marker self._grid = grid if not self._plot_percentile: n_percentile = 0 else: if not isinstance(percentile_sigmas, (list, tuple)): n_percentile = 1 # scalar, single percentile else: n_percentile = len(percentile_sigmas) self._data_shape = ( len(self._keys), int(plot_mean) + int(plot_std) + n_percentile) self._samples = Reservoir(max_sample_size, data_shape=self._data_shape)
def __init__(self, loc, **kwargs): scale_tril = None if kwargs: scale_tril, = argument.parse_kwargs( kwargs, ('scale_tril', scale_tril)) if scale_tril is None: raise ValueError('`scale_tril` must have a value.') self.__loc = loc self.__scale_tril = scale_tril
def __init__(self, loc, **kwargs): scale_tril = None if kwargs: scale_tril, = argument.parse_kwargs( kwargs, ('scale_tril', scale_tril)) if scale_tril is None: raise ValueError("`scale_tril` must have a value.") self.loc = chainer.as_variable(loc) self.scale_tril = chainer.as_variable(scale_tril) self.d = self.scale_tril.shape[-1]
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs( kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( 'Either `p` or `logit` (not both) must have a value.') self.__p = p self.__logit = logit
def __init__(self, check_trigger=(1, 'epoch'), monitor='main/loss', patience=None, mode='auto', verbose=False, max_trigger=(100, 'epoch'), **kwargs): # `patients` as an alias of `patience` patients, = argument.parse_kwargs(kwargs, ('patients', None)) if patients is None: if patience is None: patience = 3 else: pass else: if patience is None: patience = patients else: raise TypeError( 'Both \'patience\' and \'patients\' arguments are ' 'specified. \'patients\' is an alias of the former. ' 'Specify only \'patience\'.') self.count = 0 self.patience = patience self.monitor = monitor self.verbose = verbose self.already_warning = False self._max_trigger = util.get_trigger(max_trigger) self._interval_trigger = util.get_trigger(check_trigger) self._init_summary() if mode == 'max': self._compare = operator.gt elif mode == 'min': self._compare = operator.lt else: if 'accuracy' in monitor: self._compare = operator.gt else: self._compare = operator.lt if self._compare == operator.gt: if verbose: print('early stopping: operator is greater') self.best = float('-inf') else: if verbose: print('early stopping: operator is less') self.best = float('inf')
def __init__(self, loc, scale=None, **kwargs): super(Normal, self).__init__() log_scale = None if kwargs: log_scale, = argument.parse_kwargs( kwargs, ('log_scale', log_scale)) if not (scale is None) ^ (log_scale is None): raise ValueError( 'Either `scale` or `log_scale` (not both) must have a value.') self.__loc = loc self.__scale = scale self.__log_scale = log_scale
def gaussian(mean, ln_var, **kwargs): """gaussian(mean, ln_var, *, eps=None, return_eps=False) Gaussian sampling function. This function takes a mean :math:`\\mu` and the logarithm of a variance :math:`\\log(\\sigma^2)` as inputs and outputs a sample drawn from a Gaussian distribution :math:`N(\\mu, \\sigma)`. The inputs must have the same shape. Args: mean (~chainer.Variable): Input variable representing the mean :math:`\\mu`. ln_var (~chainer.Variable): Input variable representing the logarithm of a variance :math:`\\log(\\sigma^2)`. eps (`ndarray` or None): The eps value to be used. You do not have to specify this value, unless you need to make results deterministic. If ``eps`` is not specified or set to ``None``, an eps value will be generated randomly. The shape and dtype must be the same as ``ln_var`` and should be on the same device. return_eps (bool): If ``True``, the eps value used in this function is returned together with the output variable. The returned eps can later be reused by passing it to the ``eps`` argument. Returns: ~chainer.Variable or tuple: When ``return_eps`` is ``False`` (default), returns the output variable with the shape of ``mean`` and/or ``ln_var``. When ``True``, returns the tuple of the output variable and eps (`ndarray`). The eps will be on the same device as the input (``ln_var``). """ eps = None return_eps = False if kwargs: eps, return_eps = argument.parse_kwargs( kwargs, ('eps', eps), ('return_eps', return_eps)) func = Gaussian(eps) out = func.apply((mean, ln_var))[0] if return_eps: return out, func.eps return out
def forward(self, x, **kwargs): """forward(self, x, *, finetune=False) Invokes the forward propagation of DecorrelatedBatchNormalization. In training mode, the DecorrelatedBatchNormalization computes moving averages of the mean and projection for evaluation during training, and normalizes the input using batch statistics. Args: x (:class:`~chainer.Variable`): Input variable. finetune (bool): If it is in the training mode and ``finetune`` is ``True``, DecorrelatedBatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ finetune, = argument.parse_kwargs(kwargs, ('finetune', False)) if configuration.config.train: if finetune: self.N += 1 decay = 1. - 1. / self.N else: decay = self.decay avg_mean = self.avg_mean avg_projection = self.avg_projection if configuration.config.in_recomputing: # Do not update statistics when extra forward computation is # called. if finetune: self.N -= 1 avg_mean = None avg_projection = None ret = functions.decorrelated_batch_normalization( x, groups=self.groups, eps=self.eps, running_mean=avg_mean, running_projection=avg_projection, decay=decay) else: # Use running average statistics or fine-tuned statistics. mean = self.avg_mean projection = self.avg_projection ret = functions.fixed_decorrelated_batch_normalization( x, mean, projection, groups=self.groups) return ret
def __init__(self, root_name, filename=None, variable_style=None, function_style=None, **kwargs): out_name, = argument.parse_kwargs(kwargs, ('out_name', 'cg.dot')) if filename is None: filename = out_name self._root_name = root_name self._filename = filename if variable_style is None: variable_style = _var_style self._variable_style = variable_style if function_style is None: function_style = _func_style self._function_style = function_style self._original_flag = None self._flag_called = False
def __init__(self, keys=None, trigger=(1, 'epoch'), postprocess=None, filename=None, **kwargs): self._keys = keys self._trigger = trigger_module.get_trigger(trigger) self._postprocess = postprocess self._log = [] log_name, = argument.parse_kwargs( kwargs, ('log_name', 'log'), ) if filename is None: filename = log_name self._log_name = filename self._init_summary()
def __init__(self, **kwargs): low, high, loc, scale = None, None, None, None if kwargs: low, high, loc, scale = argument.parse_kwargs( kwargs, ('low', low), ('high', high), ('loc', loc), ('scale', scale)) self._use_low_high = low is not None and high is not None self._use_loc_scale = loc is not None and scale is not None if not (self._use_low_high ^ self._use_loc_scale): raise ValueError( 'Either `low, high` or `loc, scale` (not both) must have a ' 'value.') self.__low = low self.__high = high self.__loc = loc self.__scale = scale
def __init__(self, stride=1, pad=0, cover_all=False, **kwargs): dilate, groups = argument.parse_kwargs( kwargs, ('dilate', 1), ('groups', 1), deterministic='deterministic argument is not supported anymore. ' 'Use chainer.using_config(\'cudnn_deterministic\', value) context ' 'where value is either `True` or `False`.', requires_x_grad='requires_x_grad argument is not supported ' 'anymore. Just remove the argument. Note that whether to compute ' 'the gradient w.r.t. x is automatically decided during ' 'backpropagation.') self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.cover_all = cover_all self.dy, self.dx = _pair(dilate) self.groups = groups
def __init__(self, stride=1, pad=0, outsize=None, **kwargs): dilate, groups = argument.parse_kwargs( kwargs, ('dilate', 1), ('groups', 1), deterministic="deterministic argument is not supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) context " "where value is either `True` or `False`.", requires_x_grad="requires_x_grad argument is not supported " "anymore. Just remove the argument. Note that whether to compute " "the gradient w.r.t. x is automatically decided during " "backpropagation.") self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.outh, self.outw = (None, None) if outsize is None else outsize self.dy, self.dx = _pair(dilate) self.groups = groups
def __init__(self, stride=1, pad=0, cover_all=False, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) context " "where value is either `True` or `False`.", requires_x_grad="requires_x_grad argument is not supported " "anymore. Just remove the argument. Note that whether to compute " "the gradient w.r.t. x is automatically decided during " "backpropagation." ) dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.cover_all = cover_all self.dy, self.dx = _pair(dilate)
def __init__(self, stride=1, pad=0, cover_all=False, group=1, **kwargs): argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) context " "where value is either `True` or `False`.", requires_x_grad="requires_x_grad argument is not supported " "anymore. Just remove the argument. Note that whether to compute " "the gradient w.r.t. x is automatically decided during " "backpropagation.") dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.cover_all = cover_all self.dy, self.dx = _pair(dilate) self.group = group
def __init__(self, p=None, **kwargs): logit = None if kwargs: logit, = argument.parse_kwargs( kwargs, ('logit', logit)) if not (p is None) ^ (logit is None): raise ValueError( "Either `p` or `logit` (not both) must have a value.") with chainer.using_config('enable_backprop', True): if p is None: logit = chainer.as_variable(logit) self.__log_p = log_softmax.log_softmax(logit, axis=-1) self.__p = exponential.exp(self.__log_p) else: self.__p = chainer.as_variable(p) self.__log_p = exponential.log(self.__p)
def __init__(self, loc, scale=None, **kwargs): super(Normal, self).__init__() log_scale = None if kwargs: log_scale, = argument.parse_kwargs(kwargs, ('log_scale', log_scale)) if not (scale is None) ^ (log_scale is None): raise ValueError( 'Either `scale` or `log_scale` (not both) must have a value.') self.__loc = loc self.__scale = scale self.__log_scale = log_scale if isinstance(loc, chainer.Variable): self.__device = loc.device else: self.__device = chainer.backend.get_device_from_array(loc)
def __init__(self, in_channels:int, out_channels:int, filtr:(tuple,list), sqrt=False,noB=0, KCD=False, verbose=False,stride=1, pad=0, initW=initializers.GlorotUniform(scale=1.2,dtype=np.float32), initB=initializers.GlorotUniform(scale=1.2,dtype=np.float32),bias_dept=2, **kwargs): """ input channels, number of outputs window """ super(Convar2D, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) #if filter is None: # out_channels, ksize, in_channels = in_channels, out_channels, None self.filter = filtr self.sqrt=sqrt self.noB=noB self.V=verbose self.KCD=KCD self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.out_channels = out_channels with self.init_scope(): #W_initializer = initializers._get_initializer(initW) self.W = variable.Parameter(initW) if in_channels is not None: self._initialize_params(in_channels) if noB: self.b = None else: if initB is None: initB = 0 #bias_initializer = initializers._get_initializer(initB) self.b = variable.Parameter(initB, (self.out_channel))#out_channels)
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialW=None, initial_bias=None, **kwargs): super(Deconvolution2D, self).__init__() dilate, groups, = argument.parse_kwargs( kwargs, ('dilate', 1), ('groups', 1), deterministic='deterministic argument is not supported anymore. ' 'Use chainer.using_config(\'cudnn_deterministic\', value) ' 'context where value is either `True` or `False`.') if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.groups = int(groups) with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels, ) if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, initialW=None, initial_bias=None, **kwargs): super(EqualizedConvolution2d, self).__init__() argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") dilate, groups = argument.parse_kwargs(kwargs, ('dilate', 1), ('groups', 1)) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.out_channels = out_channels self.groups = int(groups) with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) self.scale_param = variable.Parameter(-1, (1, 1)) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, activation=relu.relu, *args, **kwargs): # If `args` is not empty, users assume the API for v1 and # specify `wscale` as a positonal argument, which we want # to detect and forbid with an explicit error message. msg = ('wscale is not supported anymore. ' 'Use conv_init and bias_init argument to change ' 'the scale of initial parameters.') if args: raise TypeError(msg) argument.check_unexpected_kwargs(kwargs, wscale=msg) conv_init, bias_init = argument.parse_kwargs(kwargs, ('conv_init', None), ('bias_init', None)) if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None assert len(out_channels) > 0 convs = [ convolution_2d.Convolution2D(in_channels, out_channels[0], ksize, stride, pad, initialW=conv_init, initial_bias=bias_init) ] for n_in, n_out in zip(out_channels, out_channels[1:]): convs.append( convolution_2d.Convolution2D(n_in, n_out, 1, initialW=conv_init, initial_bias=bias_init)) super(MLPConvolution2D, self).__init__(*convs) self.activation = activation
def __init__(self, loc, scale=None, **kwargs): super(Normal, self).__init__() log_scale = None if kwargs: log_scale, = argument.parse_kwargs( kwargs, ('log_scale', log_scale)) if not (scale is None) ^ (log_scale is None): raise ValueError( "Either `scale` or `log_scale` (not both) must have a value.") self.loc = chainer.as_variable(loc) with chainer.using_config('enable_backprop', True): if scale is None: self.__log_scale = chainer.as_variable(log_scale) self.__scale = exponential.exp(self.log_scale) else: self.__scale = chainer.as_variable(scale) self.__log_scale = exponential.log(self.scale)
def __init__(self, data=None, **kwargs): name, grad, requires_grad = argument.parse_kwargs( kwargs, ('name', None), ('grad', None), ('requires_grad', True), volatile='volatile argument is not supported anymore. ' 'Use chainer.using_config') if (data is not None and not isinstance(data, chainer.get_array_types())): msg = '''numpy.ndarray or cuda.ndarray are expected. Actual: {0}'''.format(type(data)) raise TypeError(msg) # Use a list as a data structure to hold the data array indirectly to # abstract its initialized/uninitialized state. self._data = [data] self._requires_grad = requires_grad self._node = VariableNode(self, name) self._grad_var = None if grad is None else Variable(grad) self._loss_scale = None
def __init__(self, comm, iterator, target, device=None, converter=convert.concat_examples, root=0, **kwargs): progress_hook, = argument.parse_kwargs(kwargs, ('progress_hook', None)) self.comm = comm self.iterator = iterator self._targets = {"main": target} self.converter = converter if device is not None: device = backend.get_device(device) self.device = device self._progress_hook = progress_hook assert 0 <= root and root < self.comm.size self.root = root
def _einsum(xp, dtype, in_subscripts, out_subscript, *inputs, **kwargs): check_undefined_ellipsis_sum, = argument.parse_kwargs( kwargs, ('check_undefined_ellipsis_sum', False)) sum_ellipsis = '@' in in_subscripts and '@' not in out_subscript if sum_ellipsis: # einsum does not usually allow summing over '...' subscripts = '{}->...{}'.format( in_subscripts.replace('@', '...'), out_subscript ) else: subscripts = '{}->{}'.format( in_subscripts, out_subscript ).replace('@', '...') # Use optimize option whenever it is critical in speed. # Otherwise avoid bugs in numpy>=1.12,<1.15. einsum_kwargs = {} if len(inputs) >= 3: einsum_kwargs['optimize'] = True try: y = xp.einsum(subscripts, *inputs, **einsum_kwargs) except TypeError: warnings.warn( '{xp}.einsum does not support optimize option. ' 'Use newer version of {xp} to speed up.' .format(xp=xp.__name__), chainer.warnings.PerformanceWarning, ) y = xp.einsum(subscripts, *inputs) if sum_ellipsis: sum_ndim = y.ndim - len(out_subscript) if check_undefined_ellipsis_sum and sum_ndim > 0: raise ValueError( 'einsum should not support summing over Ellipsis, ' 'while NumPy 1.14 sometimes accidentally supports it. ' 'This feature is no longer supported by Chainer. ' 'See also NumPy issues #10926, #9984.', ) y = xp.sum(y, axis=tuple(range(sum_ndim))) return utils.force_array(y, dtype)
def set_item(x, slices, rhs, **kwargs): """set_item(x, slices, rhs, *, inplace=True) Copies array and does setitem Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable to be sliced. slices (int, slice, Ellipsis, None, integer array-like, boolean\ array-like or tuple of them): An object to specify the selection of elements. rhs (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): A variable to be set. Returns: A :class:`~chainer.Variable` object which contains the new array. """ inplace, = argument.parse_kwargs(kwargs, ('inplace', True)) if inplace: raise NotImplementedError( 'set_item currently supports only inplace=False') return _copied_set_item(x, slices, rhs)
def __init__(self, stride=1, pad=0, cover_all=False, ada_loss=None, **kwargs): """ CTOR """ super().__init__(stride=stride, pad=pad, cover_all=cover_all, **kwargs) dilate, _ = argument.parse_kwargs(kwargs, ('dilate', 1), ('groups', 1)) self.ada_loss = ada_loss self.ada_loss.func = self self.ada_loss.func_params = { 'stride': stride, 'pad': pad, 'cover_all': cover_all, 'dilate': dilate, }
def _einsum(xp, dtype, in_subscripts, out_subscript, *inputs, **kwargs): check_undefined_ellipsis_sum, = argument.parse_kwargs( kwargs, ('check_undefined_ellipsis_sum', False)) sum_ellipsis = '@' in in_subscripts and '@' not in out_subscript if sum_ellipsis: # einsum does not usually allow summing over '...' subscripts = '{}->...{}'.format( in_subscripts.replace('@', '...'), out_subscript ) else: subscripts = '{}->{}'.format( in_subscripts, out_subscript ).replace('@', '...') # Use optimize option whenever it is critical in speed. # Otherwise avoid bugs in numpy>=1.12,<1.15. einsum_kwargs = {} if len(inputs) >= 3: einsum_kwargs['optimize'] = True try: y = xp.einsum(subscripts, *inputs, **einsum_kwargs) except TypeError: warnings.warn( '{xp}.einsum does not support optimize option. ' 'Use newer version of {xp} to speed up.' .format(xp=xp.__name__), ) y = xp.einsum(subscripts, *inputs) if sum_ellipsis: sum_ndim = y.ndim - len(out_subscript) if check_undefined_ellipsis_sum and sum_ndim > 0: raise ValueError( 'einsum should not support summing over Ellipsis, ' 'while NumPy 1.14 sometimes accidentally supports it. ' 'This feature is no longer supported by Chainer. ' 'See also NumPy issues #10926, #9984.', ) y = xp.sum(y, axis=tuple(range(sum_ndim))) return utils.force_array(y, dtype)
def __init__(self, iterator, target, converter=convert.concat_examples, device=None, eval_hook=None, eval_func=None, **kwargs): progress_bar, = argument.parse_kwargs(kwargs, ('progress_bar', False)) if device is not None: device = backend.get_device(device) if isinstance(iterator, iterator_module.Iterator): iterator = {'main': iterator} self._iterators = iterator if isinstance(target, link.Link): target = {'main': target} self._targets = target self.converter = converter self.device = device self.eval_hook = eval_hook self.eval_func = eval_func self._progress_bar = progress_bar for key, iter in six.iteritems(iterator): if (isinstance( iter, (iterators.SerialIterator, iterators.MultiprocessIterator, iterators.MultithreadIterator)) and getattr(iter, 'repeat', False)): msg = 'The `repeat` property of the iterator {} ' 'is set to `True`. Typically, the evaluator sweeps ' 'over iterators until they stop, ' 'but as the property being `True`, this iterator ' 'might not stop and evaluation could go into ' 'an infinite loop. ' 'We recommend to check the configuration ' 'of iterators'.format(key) warnings.warn(msg)
def decorrelated_batch_normalization(x, **kwargs): """decorrelated_batch_normalization(x, *, groups=16, eps=2e-5, \ running_mean=None, running_projection=None, decay=0.9) Decorrelated batch normalization function. It takes the input variable ``x`` and normalizes it using batch statistics to make the output zero-mean and decorrelated. Args: x (:class:`~chainer.Variable`): Input variable. groups (int): Number of groups to use for group whitening. eps (float): Epsilon value for numerical stability. running_mean (:ref:`ndarray`): Expected value of the mean. This is a running average of the mean over several mini-batches using the decay parameter. If ``None``, the expected mean is initialized to zero. running_projection (:ref:`ndarray`): Expected value of the project matrix. This is a running average of the projection over several mini-batches using the decay parameter. If ``None``, the expected projected is initialized to the identity matrix. decay (float): Decay rate of moving average. It is used during training. Returns: ~chainer.Variable: The output variable which has the same shape as :math:`x`. See: `Decorrelated Batch Normalization <https://arxiv.org/abs/1804.08450>`_ .. seealso:: :class:`~chainer.links.DecorrelatedBatchNormalization` """ groups, eps, running_mean, running_projection, decay = \ argument.parse_kwargs( kwargs, ('groups', 16), ('eps', 2e-5), ('running_mean', None), ('running_projection', None), ('decay', 0.9)) f = DecorrelatedBatchNormalization(groups, eps, running_mean, running_projection, decay) return f.apply((x, ))[0]
def forward(self, x, t, reduce='sum', **kwargs): """forward(x, t, reduce='sum', *, return_samples=False) Computes the loss value for given input and ground truth labels. Args: x (~chainer.Variable): Input of the weight matrix multiplication. t (~chainer.Variable): Batch of ground truth labels. reduce (str): Reduction option. Its value must be either ``'sum'`` or ``'no'``. Otherwise, :class:`ValueError` is raised. return_samples (bool): If ``True``, the sample array is also returned. The sample array is a :math:`(\\text{batch_size}, \\text{sample_size} + 1)`-array of integers whose first column is fixed to the ground truth labels and the other columns are drawn from the :class:`chainer.utils.WalkerAlias` sampler. Returns: ~chainer.Variable or tuple: If ``return_samples`` is ``False`` (default), loss value is returned. Otherwise, a tuple of the loss value and the sample array is returned. """ return_samples = False if kwargs: return_samples, = argument.parse_kwargs( kwargs, ('return_samples', return_samples)) ret = negative_sampling.negative_sampling( x, t, self.W, self.sampler.sample, self.sample_size, reduce=reduce, return_samples=return_samples) return ret
def decorrelated_batch_normalization(x, **kwargs): """decorrelated_batch_normalization(x, *, groups=16, eps=2e-5, \ running_mean=None, running_projection=None, decay=0.9) Decorrelated batch normalization function. It takes the input variable ``x`` and normalizes it using batch statistics to make the output zero-mean and decorrelated. Args: x (:class:`~chainer.Variable`): Input variable. groups (int): Number of groups to use for group whitening. eps (float): Epsilon value for numerical stability. running_mean (:ref:`ndarray`): Expected value of the mean. This is a running average of the mean over several mini-batches using the decay parameter. If ``None``, the expected mean is initialized to zero. running_projection (:ref:`ndarray`): Expected value of the project matrix. This is a running average of the projection over several mini-batches using the decay parameter. If ``None``, the expected projected is initialized to the identity matrix. decay (float): Decay rate of moving average. It is used during training. Returns: ~chainer.Variable: The output variable which has the same shape as :math:`x`. See: `Decorrelated Batch Normalization <https://arxiv.org/abs/1804.08450>`_ .. seealso:: :class:`~chainer.links.DecorrelatedBatchNormalization` """ groups, eps, running_mean, running_projection, decay = \ argument.parse_kwargs( kwargs, ('groups', 16), ('eps', 2e-5), ('running_mean', None), ('running_projection', None), ('decay', 0.9)) f = DecorrelatedBatchNormalization( groups, eps, running_mean, running_projection, decay) return f.apply((x,))[0]
def snapshot_object(target, filename, savefun=npz.save_npz, **kwargs): """Returns a trainer extension to take snapshots of a given object. This extension serializes the given object and saves it to the output directory. This extension is called once per epoch by default. To take a snapshot at a different interval, a trigger object specifying the required interval can be passed along with this extension to the `extend()` method of the trainer. The default priority is -100, which is lower than that of most built-in extensions. Args: target: Object to serialize. filename (str): Name of the file into which the object is serialized. It can be a format string, where the trainer object is passed to the :meth:`str.format` method. For example, ``'snapshot_{.updater.iteration}'`` is converted to ``'snapshot_10000'`` at the 10,000th iteration. savefun: Function to save the object. It takes two arguments: the output file path and the object to serialize. snapshot_on_error (bool): Whether to take a snapshot in case trainer loop has been failed. Returns: Snapshot extension object. .. seealso:: - :meth:`chainer.training.extensions.snapshot` """ snapshot_on_error = argument.parse_kwargs( kwargs, ('snapshot_on_error', False)) argument.assert_kwargs_empty(kwargs) return _Snapshot( target=target, writer=snapshot_writers.SimpleWriter(savefun=savefun), filename=filename, snapshot_on_error=snapshot_on_error)
def __init__(self, root_name, filename=None, variable_style=None, function_style=None, **kwargs): out_name, = argument.parse_kwargs(kwargs, ('out_name', 'cg.dot')) if filename is None: filename = out_name del out_name # avoid accidental use self._root_name = root_name self._filename = filename if variable_style is None: variable_style = _var_style self._variable_style = variable_style if function_style is None: function_style = _func_style self._function_style = function_style self._original_flag = None self._flag_called = False
def __init__(self, stride=1, pad=0, outsize=None, **kwargs): dilate, groups = argument.parse_kwargs( kwargs, ('dilate', 1), ('groups', 1), deterministic='deterministic argument is not supported anymore. ' 'Use chainer.using_config(\'cudnn_deterministic\', value) context ' 'where value is either `True` or `False`.', requires_x_grad='requires_x_grad argument is not supported ' 'anymore. Just remove the argument. Note that whether to compute ' 'the gradient w.r.t. x is automatically decided during ' 'backpropagation.') self.sy, self.sx = _pair(stride) self.ph, self.pw = _pair(pad) self.outh, self.outw = (None, None) if outsize is None else outsize self.dy, self.dx = _pair(dilate) self.groups = groups if self.dx < 1 or self.dy < 1: raise ValueError('Dilate should be positive, but {} is ' 'supplied.'.format(dilate))
def snapshot_object(target, filename, savefun=npz.save_npz, **kwargs): """Returns a trainer extension to take snapshots of a given object. This extension serializes the given object and saves it to the output directory. This extension is called once per epoch by default. To take a snapshot at a different interval, a trigger object specifying the required interval can be passed along with this extension to the `extend()` method of the trainer. The default priority is -100, which is lower than that of most built-in extensions. Args: target: Object to serialize. filename (str): Name of the file into which the object is serialized. It can be a format string, where the trainer object is passed to the :meth:`str.format` method. For example, ``'snapshot_{.updater.iteration}'`` is converted to ``'snapshot_10000'`` at the 10,000th iteration. savefun: Function to save the object. It takes two arguments: the output file path and the object to serialize. snapshot_on_error (bool): Whether to take a snapshot in case trainer loop has been failed. Returns: Snapshot extension object. .. seealso:: - :meth:`chainer.training.extensions.snapshot` """ snapshot_on_error = argument.parse_kwargs(kwargs, ('snapshot_on_error', False)) argument.assert_kwargs_empty(kwargs) return _Snapshot(target=target, writer=snapshot_writers.SimpleWriter(savefun=savefun), filename=filename, snapshot_on_error=snapshot_on_error)
def __call__(self, x, c, **kwargs): """__call__(self, x, c, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluatino during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. c (Variable): Input variable for conditioning gamma and beta finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ weights, = argument.parse_kwargs(kwargs, ('weights', None)) if isinstance(c, (list, tuple)): gamma_cs = [] beta_cs = [] for _c, w in zip(c, weights): _gamma_c = self.gammas(_c) _beta_c = self.betas(_c) # print(F.expand_dims(w, 1).shape, _gamma_c.shape) # print(F.broadcast_to(F.expand_dims(w, 1), _gamma_c).shape) gamma_cs.append( F.broadcast_to(F.expand_dims(w, 1), _gamma_c.shape) * _gamma_c) beta_cs.append( F.broadcast_to(F.expand_dims(w, 1), _beta_c.shape) * _beta_c) gamma_c = sum(gamma_cs) beta_c = sum(beta_cs) else: gamma_c = self.gammas(c) beta_c = self.betas(c) return super(CategoricalConditionalBatchNormalization, self).__call__(x, gamma_c, beta_c)
def __init__(self, keys=None, trigger=(1, 'epoch'), postprocess=None, filename=None, **kwargs): self._keys = keys self._trigger = trigger_module.get_trigger(trigger) self._postprocess = postprocess self._log = [] log_name, = argument.parse_kwargs( kwargs, ('log_name', 'log'), ) if filename is None: filename = log_name del log_name # avoid accidental use self._log_name = filename self._init_summary()
def __init__(self, **kwargs): low, high, loc, scale = None, None, None, None if kwargs: low, high, loc, scale = argument.parse_kwargs( kwargs, ('low', low), ('high', high), ('loc', loc), ('scale', scale)) if not (low is None or high is None) ^ (loc is None or scale is None): raise ValueError( "Either `low, high` or `loc, scale` (not both) must have a " "value.") with chainer.using_config('enable_backprop', True): if low is None: self.__loc = chainer.as_variable(loc) self.__scale = chainer.as_variable(scale) self.__low = self.__loc self.__high = self.__loc + self.__scale else: self.__low = chainer.as_variable(low) self.__high = chainer.as_variable(high) self.__loc = self.__low self.__scale = self.__high - self.__low
def __init__(self, stride=1, pad=0, cover_all=False, ada_loss_cfg=None, **kwargs): """ CTOR """ super().__init__(stride=stride, pad=pad, cover_all=cover_all, **kwargs) # NOTE: initialize the loss scaler if ada_loss_cfg is None: ada_loss_cfg = {} dilate, _ = argument.parse_kwargs(kwargs, ('dilate', 1), ('groups', 1)) ada_loss_cfg['func_params'] = { 'stride': stride, 'pad': pad, 'cover_all': cover_all, 'dilate': dilate, } self.ada_loss = AdaLossChainer(**ada_loss_cfg)
def __call__(self, x, c, finetune=False, **kwargs): """__call__(self, x, c, finetune=False) Invokes the forward propagation of BatchNormalization. In training mode, the BatchNormalization computes moving averages of mean and variance for evaluatino during training, and normalizes the input using batch statistics. .. warning:: ``test`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. c (Variable): Input variable for conditioning gamma and beta finetune (bool): If it is in the training mode and ``finetune`` is ``True``, BatchNormalization runs in fine-tuning mode; it accumulates the input array to compute population statistics for normalization, and normalizes the input using batch statistics. """ weights, = argument.parse_kwargs(kwargs, ('weights', None)) if c.ndim == 2 and weights is not None: _gamma_c = self.gammas(c) _beta_c = self.betas(c) _gamma_c = F.broadcast_to(F.expand_dims(weights, 2), _gamma_c.shape) * _gamma_c _beta_c = F.broadcast_to(F.expand_dims(weights, 2), _beta_c.shape) * _beta_c gamma_c = F.sum(_gamma_c, 1) beta_c = F.sum(_beta_c, 1) else: if c.ndim == 1: # just labels gamma_c = self.gammas(c) beta_c = self.betas(c) else: # distributions wg = list(self.gammas.params())[0] wb = list(self.betas.params())[0] gamma_c = chainer.functions.matmul(c, wg) beta_c = chainer.functions.matmul(c, wb) return super(CategoricalConditionalBatchNormalization, self).__call__(x, gamma_c, beta_c, **kwargs)
def __init__(self, y_keys, x_key='iteration', trigger=(1, 'epoch'), postprocess=None, filename=None, marker='x', grid=True, **kwargs): file_name, = argument.parse_kwargs(kwargs, ('file_name', 'plot.png')) if filename is None: filename = file_name _check_available() self._x_key = x_key if isinstance(y_keys, str): y_keys = (y_keys,) self._y_keys = y_keys self._trigger = trigger_module.get_trigger(trigger) self._file_name = file_name self._marker = marker self._grid = grid self._postprocess = postprocess self._init_summary() self._data = {k: [] for k in y_keys}
def __init__(self, y_keys, x_key='iteration', trigger=(1, 'epoch'), postprocess=None, filename=None, marker='x', grid=True, **kwargs): file_name, = argument.parse_kwargs(kwargs, ('file_name', 'plot.png')) if filename is None: filename = file_name del file_name # avoid accidental use _check_available() self._x_key = x_key if isinstance(y_keys, str): y_keys = (y_keys,) self._y_keys = y_keys self._trigger = trigger_module.get_trigger(trigger) self._file_name = filename self._marker = marker self._grid = grid self._postprocess = postprocess self._init_summary() self._data = {k: [] for k in y_keys}
def gradient_snapshot(savefun=None, filename='snapshot_iter_{.updater.iteration}', **kwargs): target, condition, writer, snapshot_on_error, model = argument.parse_kwargs( kwargs, ('target', None), ('condition', None), ('writer', None), ('snapshot_on_error', False), ('model', None)) argument.assert_kwargs_empty(kwargs) if savefun is not None and writer is not None: raise TypeError( 'savefun and writer arguments cannot be specified together.') if writer is None: if savefun is None: savefun = npz.save_npz writer = snapshot_writers.SimpleWriter(savefun=savefun) return GradientSnapshot(target=target, condition=condition, writer=writer, filename=filename, snapshot_on_error=snapshot_on_error, model=model)
def __init__(self, in_channels, out_channels, ksize=None, stride=1, pad=0, nobias=False, outsize=None, initialW=None, initial_bias=None, **kwargs): super(Deconvolution2D, self).__init__() dilate, groups, = argument.parse_kwargs( kwargs, ('dilate', 1), ('groups', 1), deterministic='deterministic argument is not supported anymore. ' 'Use chainer.using_config(\'cudnn_deterministic\', value) ' 'context where value is either `True` or `False`.') if ksize is None: out_channels, ksize, in_channels = in_channels, out_channels, None self.ksize = ksize self.stride = _pair(stride) self.pad = _pair(pad) self.dilate = _pair(dilate) self.outsize = (None, None) if outsize is None else outsize self.out_channels = out_channels self.groups = int(groups) with self.init_scope(): W_initializer = initializers._get_initializer(initialW) self.W = variable.Parameter(W_initializer) if in_channels is not None: self._initialize_params(in_channels) if nobias: self.b = None else: if isinstance(initial_bias, (numpy.ndarray, cuda.ndarray)): assert initial_bias.shape == (out_channels,) if initial_bias is None: initial_bias = 0 bias_initializer = initializers._get_initializer(initial_bias) self.b = variable.Parameter(bias_initializer, out_channels)
def batch_normalization(x, gamma, beta, **kwargs): """batch_normalization(x, gamma, beta, eps=2e-5, running_mean=None, running_var=None, decay=0.9, axis=None) Batch normalization function. It takes the input variable ``x`` and two parameter variables ``gamma`` and ``beta``. The parameter variables must both have the same dimensionality, which is referred to as the channel shape. This channel shape corresponds to the dimensions in the input which are not averaged over. Since the first dimension of the input corresponds to the batch size, the second dimension of `x` will correspond to the first dimension of the channel shape, the third dimension of `x` will correspond to the second channel dimension (if it exists) and so on. Therefore, the dimensionality of the input must be at least one plus the number of channel dimensions. The total effective "batch size" will then be considered to be the product of all dimensions in `x` except for the channel dimensions. As an example, if the input is four dimensional and the parameter variables are one dimensional, then it is assumed that the first dimension of the input is the batch size, the second dimension is the channel size, and the remaining two dimensions are considered to be spatial dimensions that will be averaged over along with the batch size in the batch normalization computations. That is, the total batch size will be considered to be the product of all input dimensions except the second dimension. Note: If this function is called, it will not be possible to access the updated running mean and variance statistics, because they are members of the function object, which cannot be accessed by the caller. If it is desired to access the updated running statistics, it is necessary to get a new instance of the function object, call the object, and then access the running_mean and/or running_var attributes. See the corresponding Link class for an example of how to do this. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (Variable): Input variable. gamma (Variable): Scaling parameter of normalized data. beta (Variable): Shifting parameter of scaled normalized data. eps (float): Epsilon value for numerical stability. running_mean (numpy.ndarray or cupy.ndarray): Running average of the mean. This is a running average of the mean over several mini-batches using the decay parameter. If ``None``, the running average is not computed. If this is ``None``, then ``runnng_var`` must also be ``None``. running_var (numpy.ndarray or cupy.ndarray): Running average of the variance. This is a running average of the variance over several mini-batches using the decay parameter. If ``None``, the running average is not computed. If this is ``None``, then ``running_mean`` must also be ``None``. decay (float): Decay rate of moving average. It is used during training. axis (int, tuple of int or None): Axis over which normalization is performed. When axis is ``None``, it is determined from input dimensions. For example, if ``x.ndim`` is 4, axis becomes (0, 2, 3) and normalization is performed over 0th, 2nd and 3rd axis of input. If it is 2, axis becomes (0) and normalization is performed over 0th axis of input. When a tuple of int is given to this option, numbers in the tuple must be being sorted in ascending order. For example, (0, 2) is OK, but (2, 0) is not. See: `Batch Normalization: Accelerating Deep Network Training by Reducing\ Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_ .. seealso:: :class:`links.BatchNormalization` """ # NOQA argument.check_unexpected_kwargs( kwargs, train='train argument is not supported anymore. ' 'Use chainer.using_config') eps, running_mean, running_var, decay, axis = argument.parse_kwargs( kwargs, ('eps', 2e-5), ('running_mean', None), ('running_var', None), ('decay', 0.9), ('axis', None)) return BatchNormalization(eps, running_mean, running_var, decay, axis).apply((x, gamma, beta))[0]
def convolution_2d(x, W, b=None, stride=1, pad=0, cover_all=False, group=1, **kwargs): """convolution_2d(x, W, b=None, stride=1, pad=0, cover_all=False, *, dilate=1) Two-dimensional convolution function. This is an implementation of two-dimensional convolution in ConvNets. It takes three variables: the input image ``x``, the filter weight ``W``, and the bias vector ``b``. Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`c_I` and :math:`c_O` are the number of the input and output channels, respectively. - :math:`h_I` and :math:`w_I` are the height and width of the input image, respectively. - :math:`h_K` and :math:`w_K` are the height and width of the filters, respectively. - :math:`h_P` and :math:`w_P` are the height and width of the spatial padding size, respectively. Then the ``Convolution2D`` function computes correlations between filters and patches of size :math:`(h_K, w_K)` in ``x``. Note that correlation here is equivalent to the inner product between expanded vectors. Patches are extracted at positions shifted by multiples of ``stride`` from the first position ``(-h_P, -w_P)`` for each spatial axis. The right-most (or bottom-most) patches do not run over the padded spatial size. Let :math:`(s_Y, s_X)` be the stride of filter application. Then, the output size :math:`(h_O, w_O)` is determined by the following equations: .. math:: h_O &= (h_I + 2h_P - h_K) / s_Y + 1,\\\\ w_O &= (w_I + 2w_P - w_K) / s_X + 1. If ``cover_all`` option is ``True``, the filter will cover the all spatial locations. So, if the last stride of filter does not cover the end of spatial locations, an addtional stride will be applied to the end part of spatial locations. In this case, the output size :math:`(h_O, w_O)` is determined by the following equations: .. math:: h_O &= (h_I + 2h_P - h_K + s_Y - 1) / s_Y + 1,\\\\ w_O &= (w_I + 2w_P - w_K + s_X - 1) / s_X + 1. If the bias vector is given, then it is added to all spatial locations of the output of convolution. The output of this function can be non-deterministic when it uses cuDNN. If ``chainer.configuration.config.cudnn_deterministic`` is ``True`` and cuDNN version is >= v3, it forces cuDNN to use a deterministic algorithm. Convolution links can use a feature of cuDNN called autotuning, which selects the most efficient CNN algorithm for images of fixed-size, can provide a significant performance boost for fixed neural nets. To enable, set `chainer.using_config('autotune', True)` When the dilation factor is greater than one, cuDNN is not used unless the version is 6.0 or higher. .. warning:: ``deterministic`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('cudnn_deterministic', value)`` (value is either ``True`` or ``False``). See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable of shape :math:`(n, c_I, h_I, w_I)`. W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Weight variable of shape :math:`(c_O, c_I, h_K, w_K)`. b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Bias variable of length :math:`c_O` (optional). stride (:class:`int` or pair of :class:`int` s): Stride of filter applications. ``stride=s`` and ``stride=(s, s)`` are equivalent. pad (:class:`int` or pair of :class:`int` s): Spatial padding width for input arrays. ``pad=p`` and ``pad=(p, p)`` are equivalent. cover_all (bool): If ``True``, all spatial locations are convoluted into some output pixels. dilate (int or pair of ints): Dilation factor of filter applications. ``dilate=d`` and ``dilate=(d, d)`` are equivalent. Returns: ~chainer.Variable: Output variable of shape :math:`(n, c_O, h_O, w_O)`. .. seealso:: :class:`~chainer.links.Convolution2D` .. admonition:: Example >>> n = 10 >>> c_i, c_o = 3, 1 >>> h_i, w_i = 30, 40 >>> h_k, w_k = 10, 10 >>> h_p, w_p = 5, 5 >>> x = np.random.uniform(0, 1, (n, c_i, h_i, w_i)).astype(np.float32) >>> x.shape (10, 3, 30, 40) >>> W = np.random.uniform(0, 1, (c_o, c_i, h_k, w_k)).\ astype(np.float32) >>> W.shape (1, 3, 10, 10) >>> b = np.random.uniform(0, 1, (c_o,)).astype(np.float32) >>> b.shape (1,) >>> s_y, s_x = 5, 7 >>> y = F.convolution_2d(x, W, b, stride=(s_y, s_x), pad=(h_p, w_p)) >>> y.shape (10, 1, 7, 6) >>> h_o = int((h_i + 2 * h_p - h_k) / s_y + 1) >>> w_o = int((w_i + 2 * w_p - w_k) / s_x + 1) >>> y.shape == (n, c_o, h_o, w_o) True >>> y = F.convolution_2d(x, W, b, stride=(s_y, s_x), pad=(h_p, w_p), \ cover_all=True) >>> y.shape == (n, c_o, h_o, w_o + 1) True """ argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) fnode = Convolution2DFunction(stride, pad, cover_all, dilate=dilate, group=group) if b is None: args = x, W else: args = x, W, b y, = fnode.apply(args) return y
def rrelu(x, l=1. / 8, u=1. / 3, **kwargs): """rrelu(x, l=1. / 8, u=1. / 3, *, r=None, return_r=False) Randomized Leaky Rectified Liner Unit function. This function is expressed as .. math:: f(x)=\\max(x, ax), where :math:`a` is a random number sampled from a uniform distribution :math:`U(l, u)`. See: https://arxiv.org/pdf/1505.00853.pdf Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. A :math:`(s_1, s_2, ..., s_N)`-shaped float array. l (float): The lower bound of the uniform distribution. u (float): The upper bound of the uniform distribution. r (:class:`numpy.ndarray` or None): The r to be used for rrelu. The shape and dtype must be the same as ``x[0]`` and should be on the same device. If ``r`` is not specified or set to ``None``, a ``r`` will be generated randomly according to the given ``l`` and ``u``. If ``r`` is specified, ``l`` and ``u`` will be ignored. return_r (bool): If ``True``, the r used for rrelu is returned altogether with the output variable. The returned ``r`` can latter be reused by passing it to ``r`` argument. Returns: ~chainer.Variable or tuple: When ``return_r`` is ``False`` (default), return the output variable. Otherwise returnes the tuple of the output variable and ``r`` (ndarray). The ``r`` will be on the same device as the input. A :math:`(s_1, s_2, ..., s_N)`-shaped float array. .. admonition:: Example >>> x = np.array([[-1, 0], [2, -3], [-2, 1]], np.float32) >>> x array([[-1., 0.], [ 2., -3.], [-2., 1.]], dtype=float32) >>> F.rrelu(x).array # doctest: +SKIP array([[-0.24850948, 0. ], [ 2. , -0.50844127], [-0.598535 , 1. ]], dtype=float32) """ r = None return_r = False if kwargs: r, return_r = argument.parse_kwargs( kwargs, ('r', r), ('return_r', r), train='train argument is not supported anymore.' 'Use chainer.using_config') func = RReLU(l, u, r) out = func.apply((x,))[0] r = func.r if return_r: return out, r return out
def test(**kwargs): return parse_kwargs(kwargs, ('foo', 1), ('bar', 2))
def batch_normalization(x, gamma, beta, **kwargs): """batch_normalization(x, gamma, beta, eps=2e-5, running_mean=None, \ running_var=None, decay=0.9, axis=None) Batch normalization function. It takes the input variable ``x`` and two parameter variables ``gamma`` and ``beta``. The parameter variables must both have the same dimensionality, which is referred to as the channel shape. This channel shape corresponds to the dimensions in the input which are not averaged over. Since the first dimension of the input corresponds to the batch size, the second dimension of ``x`` will correspond to the first dimension of the channel shape, the third dimension of ``x`` will correspond to the second channel dimension (if it exists) and so on. Therefore, the dimensionality of the input must be at least one plus the number of channel dimensions. The total effective "batch size" will then be considered to be the product of all dimensions in ``x`` except for the channel dimensions. As an example, if the input is four dimensional and the parameter variables are one dimensional, then it is assumed that the first dimension of the input is the batch size, the second dimension is the channel size, and the remaining two dimensions are considered to be spatial dimensions that will be averaged over along with the batch size in the batch normalization computations. That is, the total batch size will be considered to be the product of all input dimensions except the second dimension. Args: x (:class:`~chainer.Variable` or :ref:`ndarray`): Input variable. gamma (:class:`~chainer.Variable` or :ref:`ndarray`): Scaling parameter of normalized data. beta (:class:`~chainer.Variable` or :ref:`ndarray`): Shifting parameter of scaled normalized data. eps (float): Epsilon value for numerical stability. running_mean (:ref:`ndarray`): Running average of the mean. This is a running average of the mean over several mini-batches using the decay parameter. The function takes a previous running average, and updates the array in-place by the new running average. If ``None``, the running average is not computed. If this is ``None``, then ``runnng_var`` must also be ``None``. running_var (:ref:`ndarray`): Running average of the variance. This is a running average of the variance over several mini-batches using the decay parameter. The function takes a previous running average, and updates the array in-place by the new running average. If ``None``, the running average is not computed. If this is ``None``, then ``running_mean`` must also be ``None``. decay (float): Decay rate of moving average. It is used during training. axis (int, tuple of int or None): Axis over which normalization is performed. When axis is ``None``, it is determined from input dimensions. For example, if ``x.ndim`` is 4, axis becomes (0, 2, 3) and normalization is performed over 0th, 2nd and 3rd axis of input. If it is 2, axis becomes (0) and normalization is performed over 0th axis of input. When a tuple of int is given to this option, numbers in the tuple must be being sorted in ascending order. For example, (0, 2) is OK, but (2, 0) is not. See: `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_ .. seealso:: :class:`~chainer.links.BatchNormalization` to manage the model parameters (``gamma``, ``beta``) and the statistics (``running_mean``, ``running_var``). """ eps, running_mean, running_var, decay, axis = argument.parse_kwargs( kwargs, ('eps', 2e-5), ('running_mean', None), ('running_var', None), ('decay', 0.9), ('axis', None), train='train argument is not supported anymore. ' 'Use chainer.using_config') return BatchNormalization(eps, running_mean, running_var, decay, axis).apply((x, gamma, beta))[0]
def batch_normalization(x, gamma, beta, **kwargs): """batch_normalization(x, gamma, beta, eps=2e-5, running_mean=None, running_var=None, decay=0.9, axis=None) Batch normalization function. It takes the input variable ``x`` and two parameter variables ``gamma`` and ``beta``. The parameter variables must both have the same dimensionality, which is referred to as the channel shape. This channel shape corresponds to the dimensions in the input which are not averaged over. Since the first dimension of the input corresponds to the batch size, the second dimension of ``x`` will correspond to the first dimension of the channel shape, the third dimension of ``x`` will correspond to the second channel dimension (if it exists) and so on. Therefore, the dimensionality of the input must be at least one plus the number of channel dimensions. The total effective "batch size" will then be considered to be the product of all dimensions in ``x`` except for the channel dimensions. As an example, if the input is four dimensional and the parameter variables are one dimensional, then it is assumed that the first dimension of the input is the batch size, the second dimension is the channel size, and the remaining two dimensions are considered to be spatial dimensions that will be averaged over along with the batch size in the batch normalization computations. That is, the total batch size will be considered to be the product of all input dimensions except the second dimension. .. warning:: ``train`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('train', train)``. See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable. gamma (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Scaling parameter of normalized data. beta (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Shifting parameter of scaled normalized data. eps (float): Epsilon value for numerical stability. running_mean (numpy.ndarray or cupy.ndarray): Running average of the mean. This is a running average of the mean over several mini-batches using the decay parameter. The function takes a previous running average, and updates the array in-place by the new running average. If ``None``, the running average is not computed. If this is ``None``, then ``runnng_var`` must also be ``None``. running_var (numpy.ndarray or cupy.ndarray): Running average of the variance. This is a running average of the variance over several mini-batches using the decay parameter. The function takes a previous running average, and updates the array in-place by the new running average. If ``None``, the running average is not computed. If this is ``None``, then ``running_mean`` must also be ``None``. decay (float): Decay rate of moving average. It is used during training. axis (int, tuple of int or None): Axis over which normalization is performed. When axis is ``None``, it is determined from input dimensions. For example, if ``x.ndim`` is 4, axis becomes (0, 2, 3) and normalization is performed over 0th, 2nd and 3rd axis of input. If it is 2, axis becomes (0) and normalization is performed over 0th axis of input. When a tuple of int is given to this option, numbers in the tuple must be being sorted in ascending order. For example, (0, 2) is OK, but (2, 0) is not. See: `Batch Normalization: Accelerating Deep Network Training by Reducing\ Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_ .. seealso:: :class:`~chainer.links.BatchNormalization` """ # NOQA eps, running_mean, running_var, decay, axis = argument.parse_kwargs( kwargs, ('eps', 2e-5), ('running_mean', None), ('running_var', None), ('decay', 0.9), ('axis', None), train='train argument is not supported anymore. ' 'Use chainer.using_config') return BatchNormalization(eps, running_mean, running_var, decay, axis).apply((x, gamma, beta))[0]
def deconvolution_2d(x, W, b=None, stride=1, pad=0, outsize=None, **kwargs): """deconvolution_2d(x, W, b=None, stride=1, pad=0, outsize=None) Two dimensional deconvolution function. This is an implementation of two-dimensional deconvolution. In most of deep learning frameworks and papers, this function is called **transposed convolution**. But because of historical reasons (e.g. paper by Ziller `Deconvolutional Networks`_) and backward compatibility, this function is called **deconvolution** in Chainer. .. _Deconvolutional Networks: \ http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf It takes three variables: input image ``x``, the filter weight ``W``, and the bias vector ``b``. Notation: here is a notation for dimensionalities. - :math:`n` is the batch size. - :math:`c_I` and :math:`c_O` are the number of the input and output channels, respectively. - :math:`h_I` and :math:`w_I` are the height and width of the input image, respectively. - :math:`h_K` and :math:`w_K` are the height and width of the filters, respectively. - :math:`h_P` and :math:`w_P` are the height and width of the spatial padding size, respectively. Let :math:`(s_Y, s_X)` be the stride of filter application. Then, the output size :math:`(h_O, w_O)` is estimated by the following equations: .. math:: h_O &= s_Y (h_I - 1) + h_K - 2h_P,\\\\ w_O &= s_X (w_I - 1) + w_K - 2w_P. The output of this function can be non-deterministic when it uses cuDNN. If ``chainer.configuration.config.deterministic`` is ``True`` and cuDNN version is >= v3, it forces cuDNN to use a deterministic algorithm. .. warning:: ``deterministic`` argument is not supported anymore since v2. Instead, use ``chainer.using_config('cudnn_deterministic', value)`` (value is either ``True`` or ``False``). See :func:`chainer.using_config`. Args: x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Input variable of shape :math:`(n, c_I, h_I, w_I)`. W (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Weight variable of shape :math:`(c_I, c_O, h_K, w_K)`. b (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): Bias variable of length :math:`c_O` (optional). stride (:class:`int` or pair of :class:`int` s): Stride of filter applications. ``stride=s`` and ``stride=(s, s)`` are equivalent. pad (:class:`int` or pair of :class:`int` s): Spatial padding width for input arrays. ``pad=p`` and ``pad=(p, p)`` are equivalent. outsize (:class:`tuple` of :class:`int`): Expected output size of deconvolutional operation. It should be pair of height and width :math:`(h_O, w_O)`. Default value is ``None`` and the outsize is estimated by input size, stride and pad. Returns: ~chainer.Variable: Output variable of shape :math:`(n, c_O, h_O, w_O)`. .. admonition:: Example >>> n = 10 >>> c_i, c_o = 1, 3 >>> h_i, w_i = 5, 10 >>> h_k, w_k = 10, 10 >>> h_p, w_p = 5, 5 >>> x = np.random.uniform(0, 1, (n, c_i, h_i, w_i)).astype('f') >>> x.shape (10, 1, 5, 10) >>> W = np.random.uniform(0, 1, (c_i, c_o, h_k, w_k)).astype('f') >>> W.shape (1, 3, 10, 10) >>> b = np.random.uniform(0, 1, c_o).astype('f') >>> b.shape (3,) >>> s_y, s_x = 5, 5 >>> y = F.deconvolution_2d(x, W, b, stride=(s_y, s_x), pad=(h_p, w_p)) >>> y.shape (10, 3, 20, 45) >>> h_o = s_y * (h_i - 1) + h_k - 2 * h_p >>> w_o = s_x * (w_i - 1) + w_k - 2 * w_p >>> y.shape == (n, c_o, h_o, w_o) True """ argument.check_unexpected_kwargs( kwargs, deterministic="deterministic argument is not " "supported anymore. " "Use chainer.using_config('cudnn_deterministic', value) " "context where value is either `True` or `False`.") dilate, = argument.parse_kwargs(kwargs, ('dilate', 1)) func = Deconvolution2DFunction(stride, pad, outsize, dilate=dilate) if b is None: args = x, W else: args = x, W, b y, = func.apply(args) return y