def __init__(self, num_output_classes, initializers=None, regularizers=None, model_size="basic", name="dilation"): """Creates a dilation module. Args: num_output_classes: Int. Number of output classes to predict for each pixel in an image. initializers: Optional dict containing ops to initialize filters (with key 'w') or biases (with key 'b'). The default initializer makes this module equivalent to the identity. regularizers: Optional dict containing regularizers for the weights (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. model_size: string. One of 'basic' or 'large'. name: string. Name of module. """ super(Dilation, self).__init__(name=name) self._num_output_classes = num_output_classes self._model_size = model_size self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, hidden_size, initializers=None, partitioners=None, regularizers=None, name="gru"): """Construct GRU. Args: hidden_size: (int) Hidden size dimensionality. initializers: Dict containing ops to initialize the weights. This dict may contain any of the keys in POSSIBLE_KEYS. partitioners: Optional dict containing partitioners to partition the weights and biases. As a default, no partitioners are used. This dict may contain any of the keys in POSSIBLE_KEYS. regularizers: Optional dict containing regularizers for the weights and biases. As a default, no regularizers are used. This dict may contain any of the keys in POSSIBLE_KEYS. name: name of the module. Raises: KeyError: if initializers contains any keys not in POSSIBLE_KEYS. """ super(GRU, self).__init__(name=name) self._hidden_size = hidden_size self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, output_size, use_bias=True, initializers=None, partitioners=None, regularizers=None, custom_getter=None, name="linear"): """Constructs a Linear module. Args: output_size: Output dimensionality. `output_size` can be either an integer or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that output_size can be called, returning an integer, when build is called. use_bias: Whether to include bias parameters. Default `True`. initializers: Optional dict containing initializers to initialize the weights (with key 'w') or biases (with key 'b'). The default initializer for the weights is a truncated normal initializer, which is commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). The default initializer for the bias is a zero initializer. partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the weights (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: KeyError: If `initializers`, `partitioners` or `regularizers` contains any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ super(Linear, self).__init__(custom_getter=custom_getter, name=name) self._output_size = output_size self._use_bias = use_bias self._input_shape = None self._w = None self._b = None self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys) self._partitioners = util.check_partitioners(partitioners, self.possible_keys) self._regularizers = util.check_regularizers(regularizers, self.possible_keys)
def __init__(self, output_size, use_bias=True, initializers=None, partitioners=None, regularizers=None, custom_getter=None, name="linear"): """Constructs a Linear module. Args: output_size: Output dimensionality. `output_size` can be either an integer or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that output_size can be called, returning an integer, when build is called. use_bias: Whether to include bias parameters. Default `True`. initializers: Optional dict containing initializers to initialize the weights (with key 'w') or biases (with key 'b'). The default initializer for the weights is a truncated normal initializer, which is commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). The default initializer for the bias is a zero initializer. partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the weights (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: KeyError: If `initializers`, `partitioners` or `regularizers` contains any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ super(Linear, self).__init__(custom_getter=custom_getter, name=name) self._output_size = output_size self._use_bias = use_bias self._input_shape = None self._w = None self._b = None self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) self._initializers = util.check_initializers( initializers, self.possible_keys) self._partitioners = util.check_partitioners( partitioners, self.possible_keys) self._regularizers = util.check_regularizers( regularizers, self.possible_keys)
def __init__(self, axis=None, offset=True, scale=True, eps=1e-5, initializers=None, partitioners=None, regularizers=None, name="layer_norm"): """Constructs a LayerNorm module. Args: axis: Optional dimension or iterable of indices of dimensions to normalize and reduce over. By default `None` and all dimensions except the first/batch dimension are reduced over. If the input tensor represents an image, summing over all except the batch and channel dimensions (e.g. for image format NHWC, axes=[1,2]), then this module corresponds to Instance Normalization (https://arxiv.org/abs/1607.08022). offset: Optional boolean to specify whether or not to apply a trained component-wise bias after the layer normalization and scaling. scale: Optional boolean to specify whether or not to apply a trained component-wise scale after the layer normalization. eps: small epsilon to avoid division by zero variance. Defaults to 1e-5 as used in the paper. initializers: Dict containing ops to initialize the scale (with key 'gamma') and bias (with key 'beta'). partitioners: Optional dict containing partitioners to partition the scale (with key 'gamma') and bias (with key 'beta'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the scale (with key 'gamma') and bias (with key 'beta').. As a default, no regularizers are used. name: name of the module. Raises: KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than `gamma` or `beta`. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ super(LayerNorm, self).__init__(name=name) if axis is not None: if isinstance(axis, int): axis = [axis] int_not_bool = lambda x: isinstance(x, int) and not isinstance(x, bool) if (not isinstance(axis, collections.Iterable) or not all(int_not_bool(ax) for ax in axis)): raise ValueError("axis should be an int or an iterable of ints") self._axis = axis self._offset = offset self._scale = scale self._eps = eps self._initializers = util.check_initializers(initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners(partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers(regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, shape, dtype=tf.float32, initializers=None, partitioners=None, regularizers=None, custom_getter=None, name="trainable_variable"): """Constructs a TrainableVariable module. Args: shape: Tensor shape. dtype: Tensor data type. initializers: Optional dictionary containing ops to initialize the weight Tensor, with key 'w'. partitioners: Optional dict containing a partitioner to partition the weight (with key 'w'). As a default, no partitioner is used. regularizers: Optional dict containing regularizers for the weights (with key 'w'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. custom_getter: Optional callable or dictionary of callables to use as custom_getter for the module. name: Name of the module. Raises: KeyError: If `initializers` contains any keys other than 'w'. KeyError: If `partitioners` contains any keys other than 'w'. KeyError: If `regularizers` contains any keys other than 'w'. TypeError: If any of the given initializers are not callable. TypeError: If any of the given partitioners are not callable. TypeError: If any of the given regularizers are not callable. """ super(TrainableVariable, self).__init__(custom_getter=custom_getter, name=name) self._shape = tuple(shape) self._dtype = dtype self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__( self, hidden_size, activation=tf.tanh, initializers=None, partitioners=None, regularizers=None, name="vanilla_rnn", ): """Construct a Basic RNN core. Args: hidden_size: hidden size dimensionality. activation: activation function to use. initializers: optional dict containing ops to initialize the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. partitioners: optional dict containing ops to partition the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. regularizers: optional dict containing ops to regularize the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. name: name of the module. Raises: KeyError: if `initializers` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. KeyError: if `partitioners` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. KeyError: if `regularizers` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. TypeError: If any of the given initializers are not callable. TypeError: If any of the given partitioners are not callable. TypeError: If any of the given regularizers are not callable. """ super(VanillaRNN, self).__init__(name=name) self._hidden_size = hidden_size self._activation = activation self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, hidden_size, activation=tf.tanh, initializers=None, partitioners=None, regularizers=None, name="vanilla_rnn"): """Construct a Basic RNN core. Args: hidden_size: hidden size dimensionality. activation: activation function to use. initializers: optional dict containing ops to initialize the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. partitioners: optional dict containing ops to partition the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. regularizers: optional dict containing ops to regularize the weights. This dictionary may contain the keys 'in_to_hidden' and/or 'hidden_to_hidden'. name: name of the module. Raises: KeyError: if `initializers` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. KeyError: if `partitioners` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. KeyError: if `regularizers` contains any keys other than 'in_to_hidden' or 'hidden_to_hidden'. TypeError: If any of the given initializers are not callable. TypeError: If any of the given partitioners are not callable. TypeError: If any of the given regularizers are not callable. """ super(VanillaRNN, self).__init__(name=name) self._hidden_size = hidden_size self._activation = activation self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, eps=1e-5, initializers=None, partitioners=None, regularizers=None, name="layer_norm"): """Constructs a LayerNorm module. Args: eps: small epsilon to avoid division by zero variance. Defaults to 1e-5 as used in the paper. initializers: Dict containing ops to initialize the scale (with key 'gamma') and bias (with key 'beta'). partitioners: Optional dict containing partitioners to partition the scale (with key 'gamma') and bias (with key 'beta'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the scale (with key 'gamma') and bias (with key 'beta').. As a default, no regularizers are used. name: name of the module. Raises: KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than `gamma` or `beta`. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ super(LayerNorm, self).__init__(name=name) self._eps = eps self._initializers = util.check_initializers(initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners(partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers(regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, mode=HALF, use_batch_norm=False, batch_norm_config=None, initializers=None, partitioners=None, regularizers=None, name="alex_net"): """Constructs AlexNet. Args: mode: Construction mode of network: `AlexNet.FULL`, `AlexNet.HALF` or `AlexNet.MINI`. use_batch_norm: Whether to use batch normalization between the output of a layer and the activation function. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. initializers: Optional dict containing ops to initialize the filters (with key 'w') or biases (with key 'b'). The default initializers are truncated normal initializers, which are commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). partitioners: Optional dict containing partitioners for the filters (with key 'w') and the biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. name: Name of the module. Raises: base.Error: If the given `mode` is not one of `AlexNet.FULL`, `AlexNet.HALF` or `AlexNet.MINI`. TypeError: If `batch_norm_config` is not a mapping, e.g. `dict`. KeyError: If `initializers` contains any keys other than 'w' or 'b'. KeyError: If `partitioners` contains any keys other than 'w' or 'b'. KeyError: If `regularizers` contains any keys other than 'w' or 'b'. """ super(AlexNet, self).__init__(name=name) self._mode = mode self._use_batch_norm = use_batch_norm if batch_norm_config is not None: if not isinstance(batch_norm_config, collections.Mapping): raise TypeError( "`batch_norm_config` must be a mapping, e.g. `dict`.") self._batch_norm_config = batch_norm_config else: self._batch_norm_config = {} if self._mode == self.HALF: # Half of AlexNet, i.e. originally ran on one GPU self._conv_layers = [ (48, (11, 4), (3, 2)), (128, (5, 1), (3, 2)), (192, (3, 1), None), (192, (3, 1), None), (128, (3, 1), (3, 2)), ] self._fc_layers = [2048, 2048] elif self._mode == self.FULL: # The full AlexNet, i.e. originally ran on two GPUs self._conv_layers = [ (96, (11, 4), (3, 2)), (256, (5, 1), (3, 2)), (384, (3, 1), None), (384, (3, 1), None), (256, (3, 1), (3, 2)), ] self._fc_layers = [4096, 4096] elif self._mode == self.MINI: # A cut down version of the half net for testing with Cifar10 self._conv_layers = [ (48, (3, 1), (3, 1)), (128, (3, 1), (3, 1)), (192, (3, 1), None), (192, (3, 1), None), (128, (3, 1), (3, 1)), ] self._fc_layers = [1024, 1024] else: raise base.Error("AlexNet construction mode '{}' not recognised, " "must be one of: '{}', '{}', '{}'".format( mode, self.HALF, self.FULL, self.MINI)) self._min_size = self._calc_min_size(self._conv_layers) self._conv_modules = [] self._linear_modules = [] self.possible_keys = {"w", "b"} self._initializers = util.check_initializers(initializers, self.possible_keys) self._partitioners = util.check_partitioners(partitioners, self.possible_keys) self._regularizers = util.check_regularizers(regularizers, self.possible_keys)
def __init__( self, output_channels, kernel_shapes, strides, paddings, rates=(1, ), activation=tf.nn.relu, activate_final=False, normalization_ctor=None, normalization_kwargs=None, normalize_final=None, initializers=None, partitioners=None, regularizers=None, use_batch_norm=None, # Deprecated. use_bias=True, batch_norm_config=None, # Deprecated. data_format=DATA_FORMAT_NHWC, custom_getter=None, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options as defined in `conv.Conv2D`. Each can be `snt.SAME`, `snt.VALID`, `snt.FULL`, `snt.CAUSAL`, `snt.REVERSE_CAUSAL` or a pair of these to use for height and width. If the Iterable contains one element only, the same padding is used in each layer of the network. rates: Iterable of dilation rates as defined in `conv.Conv2D`; if the list contains one element only, the same rate is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. normalization_ctor: Constructor to return a callable which will perform normalization at each layer. Defaults to None / no normalization. Examples of what could go here: `snt.BatchNormV2`, `snt.LayerNorm`. If a string is provided, importlib is used to convert the string to a callable, so either `snt.LayerNorm` or `"snt.LayerNorm"` can be provided. normalization_kwargs: kwargs to be provided to `normalization_ctor` when it is called. normalize_final: Whether to apply normalization after the final conv layer. Default is to take the value of activate_final. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters of the whole network (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_batch_norm: Boolean determining if batch normalization is applied after convolution. Deprecated, use `normalization_ctor` instead. use_bias: Boolean or iterable of booleans determining whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. Deprecated, use `normalization_kwargs` instead. data_format: A string, one of "NCHW" or "NHWC". Specifies whether the channel dimension of the input and output is the last dimension (default, "NHWC"), or the second dimension ("NCHW"). custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable. ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`; or if `rates` has not length 1 or `len(output_channels)`; or if the given data_format is not a supported format ("NHWC" or "NCHW"); or if `normalization_ctor` is provided but cannot be mapped to a callable. KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) if not isinstance(rates, collections.Iterable): raise TypeError("rates must be iterable") rates = tuple(rates) if isinstance(use_batch_norm, collections.Iterable): raise TypeError( "use_batch_norm must be a boolean. Per-layer use of " "batch normalization is not supported. Previously, a " "test erroneously suggested use_batch_norm can be an " "iterable of booleans.") super(ConvNet2D, self).__init__(name=name, custom_getter=custom_getter) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None if data_format not in SUPPORTED_2D_DATA_FORMATS: raise ValueError("Invalid data_format {}. Allowed formats " "{}".format(data_format, SUPPORTED_2D_DATA_FORMATS)) self._data_format = data_format self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._rates = _replicate_elements(rates, self._num_layers) if len(self._rates) != self._num_layers: raise ValueError( """rates must be of length 1 or len(output_channels)""") self._parse_normalization_kwargs(use_batch_norm, batch_norm_config, normalization_ctor, normalization_kwargs) if normalize_final is None: util.deprecation_warning( "normalize_final is not specified, so using the value of " "activate_final = {}. Change your code to set this kwarg explicitly. " "In the future, normalize_final will default to True.".format( activate_final)) self._normalize_final = activate_final else: # User has provided an override, so don't link to activate_final. self._normalize_final = normalize_final if isinstance(use_bias, bool): use_bias = (use_bias, ) else: if not isinstance(use_bias, collections.Iterable): raise TypeError( "use_bias must be either a bool or an iterable") use_bias = tuple(use_bias) self._use_bias = _replicate_elements(use_bias, self._num_layers) self._instantiate_layers()
def __init__(self, axis=None, offset=True, scale=False, decay_rate=0.999, eps=1e-3, initializers=None, partitioners=None, regularizers=None, update_ops_collection="update_ops", fused=False, name="batch_norm"): """Constructs a BatchNorm module. By default reduces over all input tensor dimensions apart from the final dimension. This has the effect of treating pixels in 1D/2D/3D images as additional elements of the minibatch. If this is not the desired behaviour, the user can specify the tensor indices to reduce over with `axis`. Args: axis: Optional iterable of indices of dimensions to reduce over. By default `None` and all dimensions except the last are reduced over. offset: Optional boolean to specify whether or not to apply a trained component-wise bias after the batch normalization and scaling. scale: Optional boolean to specify whether or not to apply a trained component-wise scale after the batch normalization. decay_rate: Decay rate of the exponential moving averages of the mean and variance. eps: Small number to avoid dividing by zero when diving by the standard deviation. initializers: Optional dict containing ops to initialize the weights of the affine transform (`gamma` and `beta`). partitioners: Optional dict containing partitioners to partition the weights of the affine transform (`gamma` and `beta`). regularizers: Optional dict containing regularizers for the weights of the affine transform ('gamma' and 'beta'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. update_ops_collection: Name of TensorFlow variable collection to add the moving average update ops to. If `None`, we instead add the update ops as control dependencies of the output of the module. This may result in some slowdown, as the feed-forward of the network is now blocked. By default, `tf.GraphKeys.UPDATE_OPS`. fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise. name: Name of the module. Raises: KeyError: If `initializers` contains any keys other than `gamma`, `beta`, `moving_mean` or `moving_variance`. KeyError: If `partitioners` or `regularizers` contains any keys other than `gamma` or `beta`. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ super(BatchNorm, self).__init__(name=name) self._axis = axis self._offset = offset self._scale = scale self._decay_rate = decay_rate self._eps = eps self._update_ops_collection = update_ops_collection self._fused = fused self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_PARTITIONER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_REGULARIZER_KEYS)
def __init__(self, output_channels, kernel_shapes, strides, paddings, activation=tf.nn.relu, activate_final=False, initializers=None, partitioners=None, regularizers=None, use_batch_norm=False, use_bias=True, batch_norm_config=None, data_format=DATA_FORMAT_NHWC, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options, either `snt.SAME` or `snt.VALID`; if the Iterable contains one element only, the same padding is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters of the whole network (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_batch_norm: Boolean determining if batch normalization is applied after convolution. use_bias: Boolean or iterable of booleans determining whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. data_format: A string, one of "NCHW" or "NHWC". Specifies whether the channel dimension of the input and output is the last dimension (default, "NHWC"), or the second dimension ("NCHW"). name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable. ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`; or if the given data_format is not a supported format ("NHWC" or "NCHW"). KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) super(ConvNet2D, self).__init__(name=name) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None if data_format not in SUPPORTED_2D_DATA_FORMATS: raise ValueError("Invalid data_format {}. Allowed formats " "{}".format(data_format, SUPPORTED_2D_DATA_FORMATS)) self._data_format = data_format self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._use_batch_norm = use_batch_norm self._batch_norm_config = batch_norm_config or {} if isinstance(use_bias, bool): use_bias = (use_bias,) else: if not isinstance(use_bias, collections.Iterable): raise TypeError("use_bias must be either a bool or an iterable") use_bias = tuple(use_bias) self._use_bias = _replicate_elements(use_bias, self._num_layers) self._instantiate_layers()
def __init__(self, output_sizes, activation=tf.nn.relu, activate_final=False, initializers=None, partitioners=None, regularizers=None, use_bias=True, name="mlp"): """Constructs an MLP module. Args: output_sizes: An iterable of output dimensionalities as defined in `basic.Linear`. Output size can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding linear layer. activation: An activation op. The activation is applied to intermediate layers, and optionally to the output of the final layer. activate_final: Boolean determining if the activation is applied to the output of the final layer. Default `False`. initializers: Optional dict containing ops to initialize the linear layers' weights (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition the linear layers' weights (with key 'w') or biases (with key 'b'). regularizers: Optional dict containing regularizers for the linear layers' weights (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_bias: Whether to include bias parameters in the linear layers. Default `True`. name: Name of the module. Raises: KeyError: If initializers contains any keys other than 'w' or 'b'. KeyError: If regularizers contains any keys other than 'w' or 'b'. ValueError: If output_sizes is empty. TypeError: If `activation` is not callable; or if `output_sizes` is not iterable. """ super(MLP, self).__init__(name=name) if not isinstance(output_sizes, collections.Iterable): raise TypeError("output_sizes must be iterable") output_sizes = tuple(output_sizes) if not output_sizes: raise ValueError("output_sizes must not be empty") self._output_sizes = output_sizes self._num_layers = len(self._output_sizes) self._input_shape = None self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys) self._partitioners = util.check_partitioners(partitioners, self.possible_keys) self._regularizers = util.check_regularizers(regularizers, self.possible_keys) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._use_bias = use_bias self._instantiate_layers()
def __init__(self, vocab_size=None, embed_dim=None, existing_vocab=None, densify_gradients=False, initializers=None, partitioners=None, regularizers=None, trainable=True, custom_getter=None, name="embed"): """Constructs an Embed module. Args: vocab_size: int. Number of unique tokens to embed. If not provided, an existing vocabulary matrix from which vocab_size can be inferred must be provided as existing_vocab. embed_dim: int or None. Number of dimensions to assign to each embedding. If not specified, a sensible default is chosen based on `vocab_size`. If an existing vocabulary matrix initializes the module, this should not be provided as it will be inferred. existing_vocab: a [vocab_size, embed_dim] vocabulary matrix. Will be converted to a tf.float32 tensor. If provided, neither or vocab_size or embed_dim should be provided as they are inferred. densify_gradients: if True, we convert the embedding gradient from an indexed-slices to a regular tensor before sending it back to the parameter server. This avoids excess computation on the parameter server. Use this option for moderately sized embeddings, e.g., a vocabulary size on the order of up to thousands. For embeddings larger than these, e.g. a vocabulary size on the order of tens or hundreds of thousands, set this to False. initializers: Optional dict containing initializers for embeddings (with key 'embeddings'). As a default, embeddings are initialized via a truncated normal distribution. partitioners: Optional dict containing partitioners for embeddings (with key 'embeddings'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for embeddings (with key 'embeddings'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. trainable: if True, the embeddings will be updated during training. If False, they are fixed to their initial values. If `trainable=False` and a regularizer is given, the resulting loss stays constant. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: string. Name for this module. Raises: ValueError: if neither one of vocab_size or existing_vocab is provided, or if existing_vocab is provided along with vocab_size, embedding_dim, initializers, partitioners or regularizers (as these should be inferred). """ if vocab_size is None and existing_vocab is None: raise ValueError("Must provide on of vocab_size or existing_vocab.") if existing_vocab is not None and not all( x is None for x in [vocab_size, embed_dim, initializers, partitioners]): raise ValueError("If existing_vocab is provided, none of vocab_size, " "embedding_dim, initializers, or partitioners is " "needed.") super(Embed, self).__init__(custom_getter=custom_getter, name=name) self._existing_vocab = None if existing_vocab is None: self._vocab_size = vocab_size self._embed_dim = embed_dim or _embedding_dim(self._vocab_size) else: self._existing_vocab = tf.convert_to_tensor( existing_vocab, dtype=tf.float32) existing_vocab_shape = self._existing_vocab.get_shape().with_rank(2) existing_vocab_shape.assert_is_fully_defined() self._vocab_size, self._embed_dim = existing_vocab_shape.as_list() self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) self._trainable = trainable self._densify_gradients = densify_gradients
def __init__( self, vocab_size=None, embed_dim=None, existing_vocab=None, initializers=None, partitioners=None, regularizers=None, trainable=True, custom_getter=None, name="embed", ): """Constructs an Embed module. Args: vocab_size: int. Number of unique tokens to embed. If not provided, an existing vocabulary matrix from which vocab_size can be inferred must be provided as existing_vocab. embed_dim: int or None. Number of dimensions to assign to each embedding. If not specified, a sensible default is chosen based on `vocab_size`. If an existing vocabulary matrix initializes the module, this should not be provided as it will be inferred. existing_vocab: a [vocab_size, embed_dim] vocabulary matrix. Will be converted to a tf.float32 tensor. If provided, neither or vocab_size or embed_dim should be provided as they are inferred. initializers: Optional dict containing initializers for embeddings (with key 'embeddings'). As a default, embeddings are initialized via a truncated normal distribution. partitioners: Optional dict containing partitioners for embeddings (with key 'embeddings'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for embeddings (with key 'embeddings'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. trainable: if True, the embeddings will be updated during training. If False, they are fixed to their initial values. If `trainable=False` and a regularizer is given, the resulting loss stays constant. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: string. Name for this module. Raises: ValueError: if neither one of vocab_size or existing_vocab is provided, or if existing_vocab is provided along with vocab_size, embedding_dim, initializers, partitioners or regularizers (as these should be inferred). """ if vocab_size is None and existing_vocab is None: raise ValueError( "Must provide on of vocab_size or existing_vocab.") if existing_vocab is not None and not all( x is None for x in [vocab_size, embed_dim, initializers, partitioners]): raise ValueError( "If existing_vocab is provided, none of vocab_size, " "embedding_dim, initializers, or partitioners is " "needed.") super(Embed, self).__init__(custom_getter=custom_getter, name=name) self._existing_vocab = None if existing_vocab is None: self._vocab_size = vocab_size self._embed_dim = embed_dim or _embedding_dim(self._vocab_size) else: self._existing_vocab = tf.convert_to_tensor(existing_vocab, dtype=tf.float32) existing_vocab_shape = self._existing_vocab.get_shape().with_rank( 2) existing_vocab_shape.assert_is_fully_defined() self._vocab_size, self._embed_dim = existing_vocab_shape.as_list() self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) self._trainable = trainable
def __init__(self, output_channels, kernel_shapes, strides, paddings, rates=(1,), activation=tf.nn.relu, activate_final=False, initializers=None, partitioners=None, regularizers=None, use_batch_norm=False, use_bias=True, batch_norm_config=None, data_format=DATA_FORMAT_NHWC, custom_getter=None, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options, either `snt.SAME` or `snt.VALID`; if the Iterable contains one element only, the same padding is used in each layer of the network. rates: Iterable of dilation rates as defined in `conv.Conv2D`; if the list contains one element only, the same rate is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters of the whole network (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_batch_norm: Boolean determining if batch normalization is applied after convolution. use_bias: Boolean or iterable of booleans determining whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. data_format: A string, one of "NCHW" or "NHWC". Specifies whether the channel dimension of the input and output is the last dimension (default, "NHWC"), or the second dimension ("NCHW"). custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable. ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`; or if `rates` has not length 1 or `len(output_channels)`; or if the given data_format is not a supported format ("NHWC" or "NCHW"). KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) if not isinstance(rates, collections.Iterable): raise TypeError("rates must be iterable") rates = tuple(rates) super(ConvNet2D, self).__init__(name=name, custom_getter=custom_getter) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None if data_format not in SUPPORTED_2D_DATA_FORMATS: raise ValueError("Invalid data_format {}. Allowed formats " "{}".format(data_format, SUPPORTED_2D_DATA_FORMATS)) self._data_format = data_format self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._rates = _replicate_elements(rates, self._num_layers) if len(self._rates) != self._num_layers: raise ValueError( """rates must be of length 1 or len(output_channels)""") self._use_batch_norm = use_batch_norm self._batch_norm_config = batch_norm_config or {} if isinstance(use_bias, bool): use_bias = (use_bias,) else: if not isinstance(use_bias, collections.Iterable): raise TypeError("use_bias must be either a bool or an iterable") use_bias = tuple(use_bias) self._use_bias = _replicate_elements(use_bias, self._num_layers) self._instantiate_layers()
def __init__(self, output_shape=None, bias_dims=None, initializers=None, partitioners=None, regularizers=None, name="add"): """Constructs an AddBias module that supports broadcasting. Args: output_shape: Output dimensionality. `output_shape` can be either `None`, a `tuple`, or a `callable`. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that `output_shape` can be called, returning a tuple, when build is called. If `output_shape` is left as `None`, the size will be directly inferred by the input. bias_dims: List of which dimensions to retain from the input shape when constructing the bias. The remaining dimensions will get broadcasted over (given size of 1), and leading dimensions will be removed completely. For example, for an input of [batch_size, dim1_size, dim2_size, dim3_size] and `bias_dims=[1, 3]`, the resulting bias will have shape [dim1_size, 1, dim2_size]. The default is to retain all dimensions apart from the minibatch dimension. Trying to retain the bias shape over the minibatch dimension, e.g. `bias_dims=[0]`, will result in an error at build time. See the 'Example Usage' section below for more information. initializers: Optional dict containing ops to initialize the biases (with key 'b'). The default initializer for the bias is a zero initializer. partitioners: Optional dict containing a partitioner to partition the bias (with key 'b'). As a default, no partitioner is used. regularizers: Optional dict containing regularizers of the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. name: Name of the module. Example Usage: ```python # Create a 4D input Tensor. input = tf.random_normal( shape=(batch_size, dim1_size, dim2_size, dim3_size))) # Create a scalar bias: scalar_bias = snt.AddBias(bias_dims=[]) scalar_bias_output = scalar_bias(input) scalar_bias.b.get_shape() # () # Create a bias over all non-minibatch dimensions: all_bias = snt.AddBias() # or snt.AddBias(bias_dims=None) all_bias_output = all_bias(input) all_bias.b.get_shape() # (dim1_size, dim2_size, dim3_size) # Create a bias over the last non-minibatch dimension: last_bias = snt.AddBias(bias_dims=[-1]) last_bias_output = last_bias(input) last_bias.b.get_shape() # (dim3_size) # Create a bias over the first non-minibatch dimension: first_bias = snt.AddBias(bias_dims=[1]) first_bias_output = first_bias(input) first_bias.b.get_shape() # (dim1_size, 1, 1) # Subtract and later add the same learned bias: bias = snt.AddBias() hidden1 = bias(input, multiplier=-1) # ... reconstructed_input = bias(hidden4) ``` Raises: KeyError: If `initializers` contains any keys other than 'b'. KeyError: If `partitioners` contains any keys other than 'b'. KeyError: If `regularizers` contains any keys other than 'b'. TypeError: If any of the given initializers are not callable. TypeError: If any of the given partitioners are not callable. TypeError: If any of the given regularizers are not callable. """ super(AddBias, self).__init__(name=name) self._output_shape = output_shape self._input_shape = None self._bias_dims = bias_dims self._b = None self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, output_shape=None, bias_dims=None, initializers=None, partitioners=None, regularizers=None, name="add"): """Constructs an AddBias module that supports broadcasting. Args: output_shape: Output dimensionality. `output_shape` can be either `None`, a `tuple`, or a `callable`. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that `output_shape` can be called, returning a tuple, when build is called. If `output_shape` is left as `None`, the size will be directly inferred by the input. bias_dims: List of which dimensions to retain from the input shape when constructing the bias. The remaining dimensions will get broadcasted over (given size of 1), and leading dimensions will be removed completely. For example, for an input of [batch_size, dim1_size, dim2_size, dim3_size] and `bias_dims=[1, 3]`, the resulting bias will have shape [dim1_size, 1, dim2_size]. The default is to retain all dimensions apart from the minibatch dimension. Trying to retain the bias shape over the minibatch dimension, e.g. `bias_dims=[0]`, will result in an error at build time. See the 'Example Usage' section below for more information. initializers: Optional dict containing ops to initialize the biases (with key 'b'). The default initializer for the bias is a zero initializer. partitioners: Optional dict containing a partitioner to partition the bias (with key 'b'). As a default, no partitioner is used. regularizers: Optional dict containing regularizers of the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. name: Name of the module. Example Usage: ```python # Create a 4D input Tensor. input = tf.random_normal( shape=(batch_size, dim1_size, dim2_size, dim3_size))) # Create a scalar bias: scalar_bias = snt.AddBias(bias_dims=[]) scalar_bias_output = scalar_bias(input) scalar_bias.b.get_shape() # () # Create a bias over all non-minibatch dimensions: all_bias = snt.AddBias() # or snt.AddBias(bias_dims=None) all_bias_output = all_bias(input) all_bias.b.get_shape() # (dim1_size, dim2_size, dim3_size) # Create a bias over the last non-minibatch dimension: last_bias = snt.AddBias(bias_dims=[-1]) last_bias_output = last_bias(input) last_bias.b.get_shape() # (dim3_size) # Create a bias over the first non-minibatch dimension: first_bias = snt.AddBias(bias_dims=[1]) first_bias_output = first_bias(input) first_bias.b.get_shape() # (dim1_size, 1, 1) ``` Raises: KeyError: If `initializers` contains any keys other than 'b'. KeyError: If `partitioners` contains any keys other than 'b'. KeyError: If `regularizers` contains any keys other than 'b'. TypeError: If any of the given initializers are not callable. TypeError: If any of the given partitioners are not callable. TypeError: If any of the given regularizers are not callable. """ super(AddBias, self).__init__(name=name) self._output_shape = output_shape self._input_shape = None self._bias_dims = bias_dims self._b = None self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, mode, use_batch_norm=False, batch_norm_config=None, initializers=None, partitioners=None, regularizers=None, bn_on_fc_layers=True, custom_getter=None, name="alex_net"): """Constructs AlexNet. Args: mode: Construction mode of network: `AlexNet.FULL` or `AlexNet.MINI`. use_batch_norm: Whether to use batch normalization between the output of a layer and the activation function. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. initializers: Optional dict containing ops to initialize the filters (with key 'w') or biases (with key 'b'). The default initializers are truncated normal initializers, which are commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). partitioners: Optional dict containing partitioners for the filters (with key 'w') and the biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. bn_on_fc_layers: If `use_batch_norm` is True, add batch normalization to the fully-connected layers. This is deprecated. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: base.Error: If the given `mode` is not one of `AlexNet.FULL`, or `AlexNet.MINI`. KeyError: If `initializers`, `partitioners` or `regularizers` contains any keys other than 'w' or 'b'. """ super(AlexNet, self).__init__(custom_getter=custom_getter, name=name) self._mode = mode self._use_batch_norm = use_batch_norm self._bn_on_fc_layers = bn_on_fc_layers if self._bn_on_fc_layers: tf.logging.warn( "Using BatchNorm on the fully connected layers in " "AlexNet is not recommended. 'bn_on_fc_layers' is a " "deprecated option and will likely be removed.") self._batch_norm_config = batch_norm_config or {} if self._mode == self.FULL: # The full AlexNet, i.e. originally ran on two GPUs self._conv_layers = [ (96, (11, 4), (3, 2)), (256, (5, 1), (3, 2)), (384, (3, 1), None), (384, (3, 1), None), (256, (3, 1), (3, 2)), ] self._fc_layers = [4096, 4096] elif self._mode == self.MINI: # A cut down version of the half net for testing with Cifar10 self._conv_layers = [ (48, (3, 1), (3, 1)), (128, (3, 1), (3, 1)), (192, (3, 1), None), (192, (3, 1), None), (128, (3, 1), (3, 1)), ] self._fc_layers = [1024, 1024] else: raise base.Error("AlexNet construction mode '{}' not recognised, " "must be one of: '{}', '{}'".format( mode, self.FULL, self.MINI)) self._min_size = self._calc_min_size(self._conv_layers) self._conv_modules = [] self._linear_modules = [] self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, mode, use_batch_norm=False, batch_norm_config=None, initializers=None, partitioners=None, regularizers=None, bn_on_fc_layers=True, custom_getter=None, name="alex_net"): """Constructs AlexNet. Args: mode: Construction mode of network: `AlexNet.FULL` or `AlexNet.MINI`. use_batch_norm: Whether to use batch normalization between the output of a layer and the activation function. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. initializers: Optional dict containing ops to initialize the filters (with key 'w') or biases (with key 'b'). The default initializers are truncated normal initializers, which are commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). partitioners: Optional dict containing partitioners for the filters (with key 'w') and the biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. bn_on_fc_layers: If `use_batch_norm` is True, add batch normalization to the fully-connected layers. This is deprecated. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: base.Error: If the given `mode` is not one of `AlexNet.FULL`, or `AlexNet.MINI`. KeyError: If `initializers`, `partitioners` or `regularizers` contains any keys other than 'w' or 'b'. """ super(AlexNet, self).__init__(custom_getter=custom_getter, name=name) self._mode = mode self._use_batch_norm = use_batch_norm self._bn_on_fc_layers = bn_on_fc_layers if self._bn_on_fc_layers: tf.logging.warn("Using BatchNorm on the fully connected layers in " "AlexNet is not recommended. 'bn_on_fc_layers' is a " "deprecated option and will likely be removed.") self._batch_norm_config = batch_norm_config or {} if self._mode == self.FULL: # The full AlexNet, i.e. originally ran on two GPUs self._conv_layers = [ (96, (11, 4), (3, 2)), (256, (5, 1), (3, 2)), (384, (3, 1), None), (384, (3, 1), None), (256, (3, 1), (3, 2)), ] self._fc_layers = [4096, 4096] elif self._mode == self.MINI: # A cut down version of the half net for testing with Cifar10 self._conv_layers = [ (48, (3, 1), (3, 1)), (128, (3, 1), (3, 1)), (192, (3, 1), None), (192, (3, 1), None), (128, (3, 1), (3, 1)), ] self._fc_layers = [1024, 1024] else: raise base.Error("AlexNet construction mode '{}' not recognised, " "must be one of: '{}', '{}'".format( mode, self.FULL, self.MINI)) self._min_size = self._calc_min_size(self._conv_layers) self._conv_modules = [] self._linear_modules = [] # Keep old name for backwards compatibility self.possible_keys = self.POSSIBLE_INITIALIZER_KEYS self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, output_channels, kernel_shapes, strides, paddings, activation=tf.nn.relu, activate_final=False, initializers=None, partitioners=None, regularizers=None, use_batch_norm=False, use_bias=True, batch_norm_config=None, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options, either `snt.SAME` or `snt.VALID`; if the Iterable contains one element only, the same padding is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters of the whole network (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_batch_norm: Boolean determining if batch normalization is applied after convolution. use_bias: Boolean or iterable of booleans determining whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable; or `batch_norm_config` is not a mappable (e.g. `dict`). ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`. KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) super(ConvNet2D, self).__init__(name) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None self._initializers = util.check_initializers( initializers, self.POSSIBLE_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._use_batch_norm = use_batch_norm if batch_norm_config is not None: if not isinstance(batch_norm_config, collections.Mapping): raise TypeError("`batch_norm_config` must be a mapping, e.g. `dict`.") self._batch_norm_config = batch_norm_config else: self._batch_norm_config = {} if isinstance(use_bias, bool): use_bias = (use_bias,) else: if not isinstance(use_bias, collections.Iterable): raise TypeError("use_bias must be either a bool or an iterable") use_bias = tuple(use_bias) self._use_bias = _replicate_elements(use_bias, self._num_layers) self._instantiate_layers()
def __init__(self, output_sizes, activation=tf.nn.relu, activate_final=False, initializers=None, partitioners=None, regularizers=None, use_bias=True, custom_getter=None, name="mlp"): """Constructs an MLP module. Args: output_sizes: An iterable of output dimensionalities as defined in `basic.Linear`. Output size can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding linear layer. activation: An activation op. The activation is applied to intermediate layers, and optionally to the output of the final layer. activate_final: Boolean determining if the activation is applied to the output of the final layer. Default `False`. initializers: Optional dict containing ops to initialize the linear layers' weights (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition the linear layers' weights (with key 'w') or biases (with key 'b'). regularizers: Optional dict containing regularizers for the linear layers' weights (with key 'w') and the biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_bias: Whether to include bias parameters in the linear layers. Default `True`. custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: KeyError: If initializers contains any keys other than 'w' or 'b'. KeyError: If regularizers contains any keys other than 'w' or 'b'. ValueError: If output_sizes is empty. TypeError: If `activation` is not callable; or if `output_sizes` is not iterable. """ super(MLP, self).__init__(custom_getter=custom_getter, name=name) if not isinstance(output_sizes, collections.Iterable): raise TypeError("output_sizes must be iterable") output_sizes = tuple(output_sizes) if not output_sizes: raise ValueError("output_sizes must not be empty") self._output_sizes = output_sizes self._num_layers = len(self._output_sizes) self._input_shape = None self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) self._initializers = util.check_initializers( initializers, self.possible_keys) self._partitioners = util.check_partitioners( partitioners, self.possible_keys) self._regularizers = util.check_regularizers( regularizers, self.possible_keys) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._use_bias = use_bias self._instantiate_layers()
def __init__(self, data_format=None, offset=True, scale=False, decay_rate=0.999, eps=1e-3, initializers=None, partitioners=None, regularizers=None, update_ops_collection=None, fused=True, name="batch_norm"): """Constructs a BatchNormV2 module. Reduces over all input tensor dimensions apart from the channel dimension. This has the effect of treating pixels in 1D/2D/3D images as additional elements of the minibatch. Args: data_format: The data format. Can be "NC", "NWC", "NCW", "NHWC", "NCHW", "NDHWC", or "NCDHW". If not provided we assume the channel dimension is last. offset: Optional boolean to specify whether or not to apply a trained component-wise bias after the batch normalization and scaling. scale: Optional boolean to specify whether or not to apply a trained component-wise scale after the batch normalization. decay_rate: Decay rate of the exponential moving averages of the mean and variance. eps: Small number to avoid dividing by zero when diving by the standard deviation. initializers: Optional dict containing ops to initialize the weights of the affine transform (`gamma` and `beta`). partitioners: Optional dict containing partitioners to partition the weights of the affine transform (`gamma` and `beta`). regularizers: Optional dict containing regularizers for the weights of the affine transform ("gamma" and "beta"). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. update_ops_collection: Optional name of TensorFlow variable collection to add the moving average update ops to. If not provided, we instead add the update ops as control dependencies of the output of the module. This may result in some slowdown, as the feed-forward of the network is now blocked. fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise. name: Name of the module. Raises: KeyError: If `initializers` contains any keys other than `gamma`, `beta`, `moving_mean` or `moving_variance`. KeyError: If `partitioners` or `regularizers` contains any keys other than `gamma` or `beta`. TypeError: If any of the given initializers, partitioners or regularizers are not callable. ValueError: If `data_format` is invalid. """ super(BatchNormV2, self).__init__(name=name) if data_format not in self.SUPPORTED_DATA_FORMATS.union({None}): raise ValueError("Invalid data_format: %r" % (data_format,)) self._data_format = data_format self._offset = offset self._scale = scale self._decay_rate = decay_rate self._eps = eps self._update_ops_collection = update_ops_collection self._fused = fused self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_PARTITIONER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_REGULARIZER_KEYS)
def __init__(self, data_format=None, offset=True, scale=False, decay_rate=0.999, eps=1e-3, initializers=None, partitioners=None, regularizers=None, update_ops_collection=None, fused=True, name="batch_norm"): """Constructs a BatchNormV2 module. Reduces over all input tensor dimensions apart from the channel dimension. This has the effect of treating pixels in 1D/2D/3D images as additional elements of the minibatch. Args: data_format: The data format. Can be "NC", "NWC", "NCW", "NHWC", "NCHW", "NDHWC", or "NCDHW". If not provided we assume the channel dimension is last. offset: Optional boolean to specify whether or not to apply a trained component-wise bias after the batch normalization and scaling. scale: Optional boolean to specify whether or not to apply a trained component-wise scale after the batch normalization. decay_rate: Decay rate of the exponential moving averages of the mean and variance. eps: Small number to avoid dividing by zero when diving by the standard deviation. initializers: Optional dict containing ops to initialize the weights of the affine transform (`gamma` and `beta`). partitioners: Optional dict containing partitioners to partition the weights of the affine transform (`gamma` and `beta`). regularizers: Optional dict containing regularizers for the weights of the affine transform ("gamma" and "beta"). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. update_ops_collection: Optional name of TensorFlow variable collection to add the moving average update ops to. If not provided, we instead add the update ops as control dependencies of the output of the module. This may result in some slowdown, as the feed-forward of the network is now blocked. fused: Use nn.fused_batch_norm if True, nn.batch_normalization otherwise. name: Name of the module. Raises: KeyError: If `initializers` contains any keys other than `gamma`, `beta`, `moving_mean` or `moving_variance`. KeyError: If `partitioners` or `regularizers` contains any keys other than `gamma` or `beta`. TypeError: If any of the given initializers, partitioners or regularizers are not callable. ValueError: If `data_format` is invalid. """ super(BatchNormV2, self).__init__(name=name) if data_format not in self.SUPPORTED_DATA_FORMATS.union({None}): raise ValueError("Invalid data_format: %r" % (data_format, )) self._data_format = data_format self._offset = offset self._scale = scale self._decay_rate = decay_rate self._eps = eps self._update_ops_collection = update_ops_collection self._fused = fused self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_PARTITIONER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_REGULARIZER_KEYS)
def __init__(self, output_channels, kernel_shapes, strides, paddings, rates=(1,), activation=tf.nn.relu, activate_final=False, normalization_ctor=None, normalization_kwargs=None, normalize_final=None, initializers=None, partitioners=None, regularizers=None, use_batch_norm=None, # Deprecated. use_bias=True, batch_norm_config=None, # Deprecated. data_format=DATA_FORMAT_NHWC, custom_getter=None, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options as defined in `conv.Conv2D`. Each can be `snt.SAME`, `snt.VALID`, `snt.FULL`, `snt.CAUSAL`, `snt.REVERSE_CAUSAL` or a pair of these to use for height and width. If the Iterable contains one element only, the same padding is used in each layer of the network. rates: Iterable of dilation rates as defined in `conv.Conv2D`; if the list contains one element only, the same rate is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. normalization_ctor: Constructor to return a callable which will perform normalization at each layer. Defaults to None / no normalization. Examples of what could go here: `snt.BatchNormV2`, `snt.LayerNorm`. If a string is provided, importlib is used to convert the string to a callable, so either `snt.LayerNorm` or `"snt.LayerNorm"` can be provided. normalization_kwargs: kwargs to be provided to `normalization_ctor` when it is called. normalize_final: Whether to apply normalization after the final conv layer. Default is to take the value of activate_final. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. regularizers: Optional dict containing regularizers for the filters of the whole network (with key 'w') or biases (with key 'b'). As a default, no regularizers are used. A regularizer should be a function that takes a single `Tensor` as an input and returns a scalar `Tensor` output, e.g. the L1 and L2 regularizers in `tf.contrib.layers`. use_batch_norm: Boolean determining if batch normalization is applied after convolution. Deprecated, use `normalization_ctor` instead. use_bias: Boolean or iterable of booleans determining whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `snt.BatchNorm` modules. Deprecated, use `normalization_kwargs` instead. data_format: A string, one of "NCHW" or "NHWC". Specifies whether the channel dimension of the input and output is the last dimension (default, "NHWC"), or the second dimension ("NCHW"). custom_getter: Callable or dictionary of callables to use as custom getters inside the module. If a dictionary, the keys correspond to regexes to match variable names. See the `tf.get_variable` documentation for information about the custom_getter API. name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable. ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`; or if `rates` has not length 1 or `len(output_channels)`; or if the given data_format is not a supported format ("NHWC" or "NCHW"); or if `normalization_ctor` is provided but cannot be mapped to a callable. KeyError: If `initializers`, `partitioners` or `regularizers` contain any keys other than 'w' or 'b'. TypeError: If any of the given initializers, partitioners or regularizers are not callable. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) if not isinstance(rates, collections.Iterable): raise TypeError("rates must be iterable") rates = tuple(rates) if isinstance(use_batch_norm, collections.Iterable): raise TypeError("use_batch_norm must be a boolean. Per-layer use of " "batch normalization is not supported. Previously, a " "test erroneously suggested use_batch_norm can be an " "iterable of booleans.") super(ConvNet2D, self).__init__(name=name, custom_getter=custom_getter) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None if data_format not in SUPPORTED_2D_DATA_FORMATS: raise ValueError("Invalid data_format {}. Allowed formats " "{}".format(data_format, SUPPORTED_2D_DATA_FORMATS)) self._data_format = data_format self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) self._partitioners = util.check_partitioners( partitioners, self.POSSIBLE_INITIALIZER_KEYS) self._regularizers = util.check_regularizers( regularizers, self.POSSIBLE_INITIALIZER_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._rates = _replicate_elements(rates, self._num_layers) if len(self._rates) != self._num_layers: raise ValueError( """rates must be of length 1 or len(output_channels)""") self._parse_normalization_kwargs( use_batch_norm, batch_norm_config, normalization_ctor, normalization_kwargs) if normalize_final is None: util.deprecation_warning( "normalize_final is not specified, so using the value of " "activate_final = {}. Change your code to set this kwarg explicitly. " "In the future, normalize_final will default to True.".format( activate_final)) self._normalize_final = activate_final else: # User has provided an override, so don't link to activate_final. self._normalize_final = normalize_final if isinstance(use_bias, bool): use_bias = (use_bias,) else: if not isinstance(use_bias, collections.Iterable): raise TypeError("use_bias must be either a bool or an iterable") use_bias = tuple(use_bias) self._use_bias = _replicate_elements(use_bias, self._num_layers) self._instantiate_layers()