def __init__(self, output_sizes, activation=tf.nn.relu, activate_final=False, initializers=None, use_bias=True, name="mlp"): """Constructs an MLP module. Args: output_sizes: An iterable of output dimensionalities as defined in `basic.Linear`. Output size can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding linear layer. activation: An activation op. The activation is applied to intermediate layers, and optionally to the output of the final layer. activate_final: Boolean determining if the activation is applied to the output of the final layer. Default `False`. initializers: Optional dict containing ops to initialize the linear layers' weights (with key 'w') or biases (with key 'b'). use_bias: Whether to include bias parameters in the linear layers. Default `True`. name: Name of the module. Raises: Error: If initializers contains any keys other than 'w' or 'b'. ValueError: If output_sizes is empty. TypeError: If `activation` is not callable; or if `output_sizes` is not iterable. """ super(MLP, self).__init__(name=name) if not isinstance(output_sizes, collections.Iterable): raise TypeError("output_sizes must be iterable") output_sizes = tuple(output_sizes) if not output_sizes: raise ValueError("output_sizes must not be empty") self._output_sizes = output_sizes self._num_layers = len(self._output_sizes) self._input_shape = None self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._use_bias = use_bias self._instantiate_layers()
def __init__(self, reduction_indices=None, offset=True, scale=False, decay_rate=0.999, eps=1e-3, initializers=None, use_legacy_moving_second_moment=False, name="batch_norm"): """Constructs a BatchNorm module. By default reduces over all input tensor dimensions apart from the final dimension. This has the effect of treating pixels in 1D/2D/3D images as additional elements of the minibatch. If this is not the desired behaviour, the user can specify the tensor indices to reduce over with `reduction_indices`. Args: reduction_indices: Optional indices of dimensions to reduce over. offset: Optional boolean to specify whether or not to apply a trained component-wise bias after the batch normalization and scaling. scale: Optional boolean to specify whether or not to apply a trained component-wise scale after the batch normalization. decay_rate: Decay rate of the exponential moving averages of the mean and variance. eps: Small number to avoid dividing by zero when diving by the standard deviation. initializers: Optional dict containing ops to initialize the weights of the affine transform (`gamma` and `beta`). use_legacy_moving_second_moment: Keep a moving second moment, rather than the moving variance. This is deprecated, but is kept for backwards compatability with old checkpoints. By default `False`. name: Name of the module. Raises: base.Error: If initializers contains any keys other than `gamma` or `beta`. ValueError: If `use_legacy_moving_second_moment` is not `True`. """ super(BatchNorm, self).__init__(name) self._reduction_indices = reduction_indices self._offset = offset self._scale = scale self._decay_rate = decay_rate self._eps = eps self._use_legacy_moving_second_moment = use_legacy_moving_second_moment self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS)
def __init__(self, output_size, use_bias=True, initializers=None, partitioners=None, name="linear"): """Constructs a Linear module. Args: output_size: Output dimensionality. `output_size` can be either an integer or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that output_size can be called, returning an integer, when build is called. use_bias: Whether to include bias parameters. Default `True`. initializers: Optional dict containing initializers to initialize the weights (with key 'w') or biases (with key 'b'). The default initializers are truncated normal initializers, which are commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). partitioners: Optional dict containing partitioners to partition weights (with key 'w') or biases (with key 'b'). As a default, no partitioners are used. name: Name of the module. Raises: KeyError: If an initializer is provided for a key other than 'w' or 'b' if `use_bias` is `True`.. TypeError: If a provided initializer is not a callable function. """ super(Linear, self).__init__(name=name) self._output_size = output_size self._use_bias = use_bias self._input_shape = None self._w = None self._b = None self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys) self._partitioners = util.check_partitioners(partitioners, self.possible_keys)
def __init__(self, output_channels, kernel_shapes, strides, paddings, activation=tf.nn.relu, activate_final=False, initializers=None, use_batch_norm=False, use_bias=True, batch_norm_config=None, name="conv_net_2d"): """Constructs a `ConvNet2D` module. By default, neither batch normalization nor activation are applied to the output of the final layer. Args: output_channels: Iterable of output channels, as defined in `conv.Conv2D`. Output channels can be defined either as number or via a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that entries can be called when build is called. Each entry in the iterable defines properties in the corresponding convolutional layer. kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if the list contains one element only, the same kernel shape is used in each layer of the network. strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the list contains one element only, the same stride is used in each layer of the network. paddings: Iterable of padding options, either `nn.SAME` or `nn.VALID`; if the Iterable contains one element only, the same padding is used in each layer of the network. activation: An activation op. activate_final: Boolean determining if the activation and batch normalization, if turned on, are applied to the final layer. initializers: Optional dict containing ops to initialize the filters of the whole network (with key 'w') or biases (with key 'b'). use_batch_norm: Boolean determining if batch normalization is applied after convolution. use_bias: Whether to include bias parameters in the convolutional layers. Default `True`. batch_norm_config: Optional mapping of additional configuration for the `nn.BatchNorm` modules. name: Name of the module. Raises: TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is not iterable; or `strides` is not iterable; or `paddings` is not iterable; or if `activation` is not callable; or `batch_norm_config` is not a mappable (e.g. `dict`). ValueError: If `output_channels` is empty; or if `kernel_shapes` has not length 1 or `len(output_channels)`; or if `strides` has not length 1 or `len(output_channels)`; or if `paddings` has not length 1 or `len(output_channels)`. Error: If initializers contains any keys other than 'w' or 'b'. """ if not isinstance(output_channels, collections.Iterable): raise TypeError("output_channels must be iterable") output_channels = tuple(output_channels) if not isinstance(kernel_shapes, collections.Iterable): raise TypeError("kernel_shapes must be iterable") kernel_shapes = tuple(kernel_shapes) if not isinstance(strides, collections.Iterable): raise TypeError("strides must be iterable") strides = tuple(strides) if not isinstance(paddings, collections.Iterable): raise TypeError("paddings must be iterable") paddings = tuple(paddings) super(ConvNet2D, self).__init__(name) if not output_channels: raise ValueError("output_channels must not be empty") self._output_channels = tuple(output_channels) self._num_layers = len(self._output_channels) self._input_shape = None self._initializers = util.check_initializers( initializers, self.POSSIBLE_INITIALIZER_KEYS) if not callable(activation): raise TypeError("Input 'activation' must be callable") self._activation = activation self._activate_final = activate_final self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) if len(self._kernel_shapes) != self._num_layers: raise ValueError( "kernel_shapes must be of length 1 or len(output_channels)") self._strides = _replicate_elements(strides, self._num_layers) if len(self._strides) != self._num_layers: raise ValueError( """strides must be of length 1 or len(output_channels)""") self._paddings = _replicate_elements(paddings, self._num_layers) if len(self._paddings) != self._num_layers: raise ValueError( """paddings must be of length 1 or len(output_channels)""") self._use_batch_norm = use_batch_norm if batch_norm_config is not None: if not isinstance(batch_norm_config, collections.Mapping): raise TypeError( "`batch_norm_config` must be a mapping, e.g. `dict`.") self._batch_norm_config = batch_norm_config else: self._batch_norm_config = {} self._use_bias = use_bias self._instantiate_layers()
def __init__(self, output_channels, output_shape, kernel_shape, stride=1, padding=SAME, use_bias=True, initializers=None, name="conv_2d_transpose"): """Constructs a `Conv2DTranspose module`. See the following documentation for an explanation of VALID versus SAME padding modes: https://www.tensorflow.org/versions/r0.8/api_docs/python/nn.html#convolution Args: output_channels: Number of output channels. Can be either a number or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure `output_channels` can be called, returning an integer, when build is called. output_shape: Output shape of transpose convolution. Can be either an iterable of integers or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that `output_shape` can be called, returning an iterable of format `(out_height, out_width)` when `_build` is called. Note that `output_shape` defines the size of output signal domain, as opposed to the shape of the output `Tensor`. kernel_shape: List of kernel sizes, must be length 2. stride: List of kernel strides. padding: Padding algorithm, either `nn.SAME` or `nn.VALID`. use_bias: Whether to include bias parameters. Default `True`. initializers: Optional dict containing ops to initialize the filters (with key 'w') or biases (with key 'b'). name: Name of the module. Raises: base.IncompatibleShapeError: If the given kernel shape is neither an integer nor a sequence of two integers. base.IncompatibleShapeError: If the given stride is neither an integer nor a sequence of two or four integers. ValueError: If the given padding is not `nn.VALID` or `nn.SAME`. KeyError: If `initializers` contains any keys other than 'w' or 'b'. TypeError: If any of the given initializers are not callable. """ super(Conv2DTranspose, self).__init__(name) self._output_channels = output_channels if callable(output_shape): self._output_shape = output_shape else: self._output_shape = tuple(output_shape) self._input_shape = None self._kernel_shape = _fill_and_verify_kernel_shape(kernel_shape, 2) # We want to support passing native strides akin to [1, m, n, 1]. if isinstance(stride, collections.Iterable) and len(stride) == 4: if not stride[0] == stride[3] == 1: raise base.IncompatibleShapeError( "Invalid stride: First and last element must be 1.") self._stride = tuple(stride) else: self._stride = _fill_and_one_pad_stride(stride, 2) self._padding = _verify_padding(padding) self._use_bias = use_bias self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys)
def __init__(self, output_channels, kernel_shape, stride=1, rate=1, padding=SAME, use_bias=True, initializers=None, mask=None, name="conv_2d"): """Constructs a Conv2D module. See the following documentation for an explanation of VALID versus SAME padding modes: https://www.tensorflow.org/versions/r0.8/api_docs/python/nn.html#convolution Args: output_channels: Number of output channels. `output_channels` can be either a number or a callable. In the latter case, since the function invocation is deferred to graph construction time, the user must only ensure that output_channels can be called, returning an integer, when `_build` is called. kernel_shape: List of kernel sizes, or integer that is used to define kernel size in all dimensions. stride: List of kernel strides, or integer that is used to define stride in all dimensions. rate: A positive integer, `rate=1` corresponds to standard 2D convolution, `rate > 1` corresponds to dilated convolution. padding: Padding algorithm, either `nn.SAME` or `nn.VALID`. use_bias: Whether to include bias parameters. Default `True`. initializers: Optional dict containing ops to initialize the filters (with key 'w') or biases (with key 'b'). The default initializers are truncated normal initializers, which are commonly used when the inputs are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). mask: Optional 2D or 4D array, tuple or numpy array containing values to multiply the weights by component-wise. name: Name of the module. Raises: base.IncompatibleShapeError: If the given kernel shape is not an integer; or if the given kernel shape is not a sequence of two integers. base.IncompatibleShapeError: If the given stride is not an integer; or if the given stride is not a sequence of two or four integers. base.IncompatibleShapeError: If a mask is given and its rank is neither 2 nor 4. base.NotSupportedError: If the given dilation rate is not a positive integer. base.NotSupportedError: If rate > 1 and the stride in any dimension is > 1. ValueError: If the given padding is not `nn.VALID` or `nn.SAME`. KeyError: If initializers contains any keys other than 'w' or 'b'. TypeError: If any of the given initializers are not callable. TypeError: If mask is given and is not an array, tuple or a numpy array. """ super(Conv2D, self).__init__(name=name) self._output_channels = output_channels self._input_shape = None self._kernel_shape = _fill_and_verify_kernel_shape(kernel_shape, 2) try: self._stride = (1, ) + _fill_shape(stride, 2) + (1, ) except TypeError as e: # We want to support passing native strides akin to [1, m, n, 1]. if len(stride) == 4: self._stride = tuple(stride) else: raise base.IncompatibleShapeError( "Invalid stride: {}".format(e)) if not isinstance(rate, numbers.Integral) or rate < 1: raise base.NotSupportedError( "Rate, {}, must be integer >= 1".format(rate)) elif any(x > 1 for x in self._stride) and rate > 1: raise base.NotSupportedError( "Cannot have stride > 1 with rate > 1") else: self._rate = rate self._padding = _verify_padding(padding) self._use_bias = use_bias self.possible_keys = self.get_possible_initializer_keys( use_bias=use_bias) self._initializers = util.check_initializers(initializers, self.possible_keys) if mask is not None: if not isinstance(mask, (list, tuple, np.ndarray)): raise TypeError("Invalid type for mask: {}".format(type(mask))) self._mask = np.asanyarray(mask) mask_rank = mask.ndim if mask_rank != 2 and mask_rank != 4: raise base.IncompatibleShapeError( "Invalid mask rank: {}".format(mask_rank)) else: self._mask = None
def __init__(self, hidden_size, forget_bias=1.0, initializers=None, use_peepholes=False, use_batch_norm_h=False, use_batch_norm_x=False, use_batch_norm_c=False, max_unique_stats=1, name="lstm"): """Construct LSTM. Args: hidden_size: (int) Hidden size dimensionality. forget_bias: (float) Bias for the forget activation. initializers: Dict containing ops to initialize the weights. This dictionary may contain any of the keys in POSSIBLE_KEYS. The gamma and beta variables control batch normalization values for different batch norm transformations inside the cell; see the paper for details. use_peepholes: Boolean that indicates whether peephole connections are used. use_batch_norm_h: Boolean that indicates whether to apply batch normalization at the previous_hidden -> gates contribution. If you are experimenting with batch norm then this may be the most effective to turn on. use_batch_norm_x: Boolean that indicates whether to apply batch normalization at the input -> gates contribution. use_batch_norm_c: Boolean that indicates whether to apply batch normalization at the cell -> output contribution. max_unique_stats: The maximum number of steps to use unique batch norm statistics for. (See module description above for more details.) name: name of the module. Raises: KeyError: if `initializers` contains any keys not in POSSIBLE_KEYS. ValueError: if a peephole initializer is passed in the initializer list, but `use_peepholes` is False. ValueError: if a batch norm initializer is passed in the initializer list, but batch norm is disabled. ValueError: if `max_unique_stats` is not the default value, but batch norm is disabled. ValueError: if `max_unique_stats` is < 1. """ super(LSTM, self).__init__(name=name) self._hidden_size = hidden_size self._forget_bias = forget_bias self._use_peepholes = use_peepholes self._max_unique_stats = max_unique_stats self._use_batch_norm_h = use_batch_norm_h self._use_batch_norm_x = use_batch_norm_x self._use_batch_norm_c = use_batch_norm_c self.possible_keys = self.get_possible_initializer_keys( use_peepholes=use_peepholes, use_batch_norm_h=use_batch_norm_h, use_batch_norm_x=use_batch_norm_x, use_batch_norm_c=use_batch_norm_c) self._initializers = util.check_initializers(initializers, self.possible_keys) if max_unique_stats < 1: raise ValueError("max_unique_stats must be >= 1") if max_unique_stats != 1 and not ( use_batch_norm_h or use_batch_norm_x or use_batch_norm_c): raise ValueError("max_unique_stats specified but batch norm disabled") if use_batch_norm_h: self._batch_norm_h = LSTM.IndexedStatsBatchNorm(max_unique_stats, "batch_norm_h") if use_batch_norm_x: self._batch_norm_x = LSTM.IndexedStatsBatchNorm(max_unique_stats, "batch_norm_x") if use_batch_norm_c: self._batch_norm_c = LSTM.IndexedStatsBatchNorm(max_unique_stats, "batch_norm_c")