def __call__(self,
               input_layer,
               kernel,
               depth,
               activation_fn=None,
               stride=None,
               l2loss=None,
               init=None,
               stddev=None,
               bias=True,
               bias_init=tf.zeros_initializer,
               edges=PAD_SAME,
               batch_normalize=False,
               name=PROVIDED):
    """Adds a convolution to the stack of operations.

    The current head must be a rank 4 Tensor.

    Args:
      input_layer: The chainable object, supplied.
      kernel: The size of the patch for the pool, either an int or a length 1 or
        2 sequence (if length 1 or int, it is expanded).
      depth: The depth of the new Tensor.
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      stride: The strides as a length 1, 2 or 4 sequence or an integer. If an
        int, length 1 or 2, the stride in the first and last dimensions are 1.
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      init: An optional initialization. If not specified, uses Xavier
        initialization.
      stddev: A standard deviation to use in parameter initialization.
      bias: Set to False to not have a bias.
      bias_init: An initializer for the bias or a Tensor.
      edges: Either SAME to use 0s for the out of bounds area or VALID to shrink
        the output size and only uses valid input pixels.
      batch_normalize: Supply a BatchNormalizationArguments to set the
        parameters for batch normalization.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      Handle to the generated layer.
    Raises:
      ValueError: If head is not a rank 4 tensor or the  depth of the input
        (4th dim) is not known.
    """
    if input_layer.get_shape().ndims != 4:
      raise ValueError('conv2d requires a rank 4 Tensor with a known depth %s' %
                       input_layer.get_shape())
    if input_layer.shape[3] is None:
      raise ValueError('Input depth must be known')
    kernel = _kernel(kernel)
    stride = _stride(stride)
    size = [kernel[0], kernel[1], input_layer.shape[3], depth]

    books = input_layer.bookkeeper
    if init is None:
      if stddev is None:
        patch_size = size[0] * size[1]
        init = layers.he_init(size[2] * patch_size, size[3] * patch_size,
                              activation_fn)
      else:
        tf.logging.warning(
            'Passing `stddev` to initialize weight variable is deprecated and '
            'will be removed in the future. Pass '
            'tf.truncated_normal_initializer(stddev=stddev) or '
            'tf.zeros_initializer to `init` instead.')
        if stddev:
          init = tf.truncated_normal_initializer(stddev=stddev)
        else:
          init = tf.zeros_initializer
    elif stddev is not None:
      raise ValueError('Do not set both init and stddev.')
    dtype = input_layer.tensor.dtype
    params = self.variable('weights', size, init, dt=dtype)
    y = tf.nn.conv2d(input_layer, params, stride, edges)
    layers.add_l2loss(books, params, l2loss)
    if bias:
      y += self.variable('bias', [size[-1]], bias_init, dt=dtype)
    books.add_scalar_summary(
        tf.reduce_mean(layers.spatial_slice_zeros(y)),
        '%s/zeros_spatial' % y.op.name)
    y = pretty_tensor_normalization_methods.batch_normalize_with_arguments(
        y, batch_normalize)
    if activation_fn is not None:
      if not isinstance(activation_fn, collections.Sequence):
        activation_fn = (activation_fn,)
      y = layers.apply_activation(books,
                                  y,
                                  activation_fn[0],
                                  activation_args=activation_fn[1:])
    books.add_histogram_summary(y, '%s/activations' % y.op.name)
    return input_layer.with_tensor(y, parameters=self.vars)
    def __call__(self,
                 input_layer,
                 kernel,
                 channel_multiplier,
                 activation_fn=None,
                 stride=None,
                 l2loss=None,
                 weights=None,
                 bias=tf.zeros_initializer(),
                 edges=PAD_SAME,
                 batch_normalize=False,
                 phase=prettytensor.Phase.train,
                 parameter_modifier=parameters.identity,
                 name=PROVIDED):
        """Adds a depth-wise convolution to the stack of operations.

    A depthwise convolution performs the convolutions one channel at a time and
    produces an output with depth `channel_multiplier * input_depth`.

    `kernel` is the patch that will be pooled and it describes the pooling
    along each of the 4 dimensions.  The stride is how big to take each step.

    * scalar (e.g. 3): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 3, 1]`).
    * singleton list (e.g. [3]): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 3, 1]`).
    * list of length 2 (e.g. [3, 2]): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 2, 1]`).

    Args:
      input_layer: The chainable object, supplied.
      kernel: The size of the patch for the pool, either an int or a length 1 or
        2 sequence (if length 1 or int, it is expanded).
      channel_multiplier: Output channels will be a multiple of input channels.
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      stride: The strides as a length 1, 2 or 4 sequence or an integer. If an
        int, length 1 or 2, the stride in the first and last dimensions are 1.
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      weights:  An initializer for weights or a Tensor. If not specified,
        uses He's initialization.
      bias: An initializer for the bias or a Tensor. No bias if set to None.
      edges: Either `pt.DIM_SAME` to use 0s for the out of bounds area or
        `pt.DIM_VALID` to shrink the output size and only uses valid input
        pixels.
      batch_normalize: Supply a BatchNormalizationArguments to set the
        parameters for batch normalization.
      phase: The phase of graph construction.  See `pt.Phase`.
      parameter_modifier: A function to modify parameters that is applied after
        creation and before use.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      Handle to the generated layer.
    Raises:
      ValueError: If input_layer is not a rank 4 tensor or the depth of the
        input (4th dim) is not known.
    """
        if input_layer.get_shape().ndims != 4:
            raise ValueError(
                'depthwise_conv2d requires a rank 4 Tensor with a known depth %s'
                % input_layer.get_shape())
        if input_layer.shape[3] is None:
            raise ValueError('Input depth must be known')
        kernel = _kernel(kernel)
        stride = _stride(stride)
        size = [kernel[0], kernel[1], input_layer.shape[3], channel_multiplier]

        books = input_layer.bookkeeper
        if weights is None:
            patch_size = size[0] * size[1]
            weights = layers.he_init(size[2] * patch_size,
                                     size[3] * patch_size, activation_fn)

        dtype = input_layer.tensor.dtype
        params = parameter_modifier(
            'weights', self.variable('weights', size, weights, dt=dtype),
            phase)
        y = tf.nn.depthwise_conv2d(input_layer, params, stride, edges)
        layers.add_l2loss(books, params, l2loss)
        if bias is not None:
            y += parameter_modifier(
                'bias',
                self.variable('bias',
                              [input_layer.shape[3] * channel_multiplier],
                              bias,
                              dt=dtype), phase)
        books.add_scalar_summary(tf.reduce_mean(layers.spatial_slice_zeros(y)),
                                 '%s/zeros_spatial' % y.op.name)
        y = pretty_tensor_normalization_methods.batch_normalize_with_arguments(
            y, batch_normalize)
        if activation_fn is not None:
            if not isinstance(activation_fn, collections.Sequence):
                activation_fn = (activation_fn, )
            y = layers.apply_activation(books,
                                        y,
                                        activation_fn[0],
                                        activation_args=activation_fn[1:])
        books.add_histogram_summary(y, '%s/activations' % y.op.name)
        return input_layer.with_tensor(y, parameters=self.vars)
Exemplo n.º 3
0
    def __call__(self,
                 input_layer,
                 size,
                 activation_fn=None,
                 l2loss=None,
                 weights=None,
                 bias=tf.zeros_initializer(),
                 transpose_weights=False,
                 phase=prettytensor.Phase.train,
                 parameter_modifier=parameters.identity,
                 tie_groups=False,
                 name=PROVIDED):
        """Adds the parameters for a fully connected layer and returns a tensor.
    The current PrettyTensor must have rank 2.
    Args:
      input_layer: The Pretty Tensor object, supplied.
      size: The number of neurons
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      weights:  An initializer for weights or a Tensor. If not specified,
        uses He's initialization.
      bias: An initializer for the bias or a Tensor. No bias if set to None.
      transpose_weights: Flag indicating if weights should be transposed;
        this is useful for loading models with a different shape.
      phase: The phase of graph construction.  See `pt.Phase`.
      parameter_modifier: A function to modify parameters that is applied after
        creation and before use.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      A Pretty Tensor handle to the layer.
    Raises:
      ValueError: if the Pretty Tensor is not rank 2  or the number of input
        nodes (second dim) is not known.
    """
        if input_layer.get_shape().ndims != 3:
            raise ValueError(
                'group_connected requires a rank 3 Tensor with known 2nd and 3rd '
                'dimension: %s' % input_layer.get_shape())
        group_num = input_layer.shape[1]
        in_size = input_layer.shape[2]
        if group_num is None:
            raise ValueError('Number of groups must be known.')
        if in_size is None:
            raise ValueError('Number of input nodes must be known.')
        books = input_layer.bookkeeper
        if weights is None:
            weights = layers.he_init(in_size, size, activation_fn)

        dtype = input_layer.tensor.dtype
        weight_shape = [group_num, size, in_size
                        ] if transpose_weights else [group_num, in_size, size]

        params_var = parameter_modifier(
            'weights', self.variable('weights',
                                     weight_shape,
                                     weights,
                                     dt=dtype), phase)

        if tie_groups and phase == prettytensor.Phase.train:
            with tf.variable_scope("weight_tying"):
                params = tf.tile(
                    tf.reduce_mean(params_var, axis=0, keep_dims=True),
                    [group_num, 1, 1])
                with tf.control_dependencies([tf.assign(params_var, params)]):
                    params = tf.identity(params)
        else:
            params = params_var

        input_tensor = tf.expand_dims(input_layer, axis=-2)
        params_tensor = tf.tile(tf.expand_dims(params, axis=0),
                                [tf.shape(input_tensor)[0], 1, 1, 1])
        y = tf.matmul(input_tensor,
                      params_tensor,
                      transpose_b=transpose_weights,
                      name=name)
        y = tf.squeeze(y, axis=2)
        layers.add_l2loss(books, params, l2loss)
        if bias is not None:
            y += parameter_modifier(
                'bias', self.variable('bias', [size], bias, dt=dtype), phase)

        if activation_fn is not None:
            if not isinstance(activation_fn, collections.Sequence):
                activation_fn = (activation_fn, )
            y = layers.apply_activation(books,
                                        y,
                                        activation_fn[0],
                                        activation_args=activation_fn[1:])
        books.add_histogram_summary(y, '%s/activations' % y.op.name)
        return input_layer.with_tensor(y, parameters=self.vars)
  def __call__(self,
               input_layer,
               size,
               activation_fn=None,
               l2loss=None,
               init=None,
               stddev=None,
               bias=True,
               bias_init=tf.zeros_initializer,
               transpose_weights=False,
               name=PROVIDED):
    """Adds the parameters for a fully connected layer and returns a tensor.

    The current head must be a rank 2 Tensor.

    Args:
      input_layer: The Pretty Tensor object, supplied.
      size: The number of neurons
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      init: An optional initialization. If not specified, uses Xavier
        initialization.
      stddev: A standard deviation to use in parameter initialization.
      bias: Set to False to not have a bias.
      bias_init: The initializer for the bias or a Tensor.
      transpose_weights: Flag indicating if weights should be transposed;
        this is useful for loading models with a different shape.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      A Pretty Tensor handle to the layer.
    Raises:
      ValueError: if the head_shape is not rank 2  or the number of input nodes
      (second dim) is not known.
    """
    # TODO(eiderman): bias_init shouldn't take a constant and stddev shouldn't
    # exist.
    if input_layer.get_shape().ndims != 2:
      raise ValueError(
          'fully_connected requires a rank 2 Tensor with known second '
          'dimension: %s'
          % input_layer.get_shape())
    in_size = input_layer.shape[1]
    if input_layer.shape[1] is None:
      raise ValueError('Number of input nodes must be known.')
    books = input_layer.bookkeeper
    if init is None:
      if stddev is None:
        init = layers.he_init(in_size, size, activation_fn)
      else:
        tf.logging.warning(
            'Passing `stddev` to initialize weight variable is deprecated and '
            'will be removed in the future. Pass '
            'tf.truncated_normal_initializer(stddev=stddev) or '
            'tf.zeros_initializer to `init` instead.')
        if stddev:
          init = tf.truncated_normal_initializer(stddev=stddev)
        else:
          init = tf.zeros_initializer
    elif stddev is not None:
      raise ValueError('Do not set both init and stddev.')
    dtype = input_layer.tensor.dtype
    weight_shape = [size, in_size] if transpose_weights else [in_size, size]

    params = self.variable(
        'weights',
        weight_shape,
        init,
        dt=dtype)
    y = tf.matmul(input_layer, params, transpose_b=transpose_weights)
    layers.add_l2loss(books, params, l2loss)
    if bias:
      if isinstance(bias_init, tf.compat.real_types):
        bias_init = tf.constant_initializer(bias_init)
      y += self.variable(
          'bias',
          [size],
          bias_init,
          dt=dtype)

    if activation_fn is not None:
      if not isinstance(activation_fn, collections.Sequence):
        activation_fn = (activation_fn,)
      y = layers.apply_activation(
          books,
          y,
          activation_fn[0],
          activation_args=activation_fn[1:])
    books.add_histogram_summary(y, '%s/activations' % y.op.name)
    return input_layer.with_tensor(y, parameters=self.vars)
Exemplo n.º 5
0
    def __call__(self,
                 input_layer,
                 kernel,
                 depth,
                 activation_fn=None,
                 stride=None,
                 l2loss=None,
                 init=None,
                 stddev=None,
                 bias=True,
                 bias_init=tf.zeros_initializer,
                 edges=PAD_SAME,
                 batch_normalize=False,
                 name=PROVIDED):
        """Adds a convolution to the stack of operations.

    The current head must be a rank 4 Tensor.

    Args:
      input_layer: The chainable object, supplied.
      kernel: The size of the patch for the pool, either an int or a length 1 or
        2 sequence (if length 1 or int, it is expanded).
      depth: The depth of the new Tensor.
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      stride: The strides as a length 1, 2 or 4 sequence or an integer. If an
        int, length 1 or 2, the stride in the first and last dimensions are 1.
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      init: An optional initialization. If not specified, uses Xavier
        initialization.
      stddev: A standard deviation to use in parameter initialization.
      bias: Set to False to not have a bias.
      bias_init: An initializer for the bias or a Tensor.
      edges: Either SAME to use 0s for the out of bounds area or VALID to shrink
        the output size and only uses valid input pixels.
      batch_normalize: Supply a BatchNormalizationArguments to set the
        parameters for batch normalization.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      Handle to the generated layer.
    Raises:
      ValueError: If head is not a rank 4 tensor or the  depth of the input
        (4th dim) is not known.
    """
        if input_layer.get_shape().ndims != 4:
            raise ValueError(
                'conv2d requires a rank 4 Tensor with a known depth %s' %
                input_layer.get_shape())
        if input_layer.shape[3] is None:
            raise ValueError('Input depth must be known')
        kernel = _kernel(kernel)
        stride = _stride(stride)
        size = [kernel[0], kernel[1], input_layer.shape[3], depth]

        books = input_layer.bookkeeper
        if init is None:
            if stddev is None:
                patch_size = size[0] * size[1]
                init = layers.he_init(size[2] * patch_size,
                                      size[3] * patch_size, activation_fn)
            else:
                tf.logging.warning(
                    'Passing `stddev` to initialize weight variable is deprecated and '
                    'will be removed in the future. Pass '
                    'tf.truncated_normal_initializer(stddev=stddev) or '
                    'tf.zeros_initializer to `init` instead.')
                if stddev:
                    init = tf.truncated_normal_initializer(stddev=stddev)
                else:
                    init = tf.zeros_initializer
        elif stddev is not None:
            raise ValueError('Do not set both init and stddev.')
        dtype = input_layer.tensor.dtype
        params = self.variable('weights', size, init, dt=dtype)
        y = tf.nn.conv2d(input_layer, params, stride, edges)
        layers.add_l2loss(books, params, l2loss)
        if bias:
            y += self.variable('bias', [size[-1]], bias_init, dt=dtype)
        books.add_scalar_summary(tf.reduce_mean(layers.spatial_slice_zeros(y)),
                                 '%s/zeros_spatial' % y.op.name)
        y = pretty_tensor_normalization_methods.batch_normalize_with_arguments(
            y, batch_normalize)
        if activation_fn is not None:
            if not isinstance(activation_fn, collections.Sequence):
                activation_fn = (activation_fn, )
            y = layers.apply_activation(books,
                                        y,
                                        activation_fn[0],
                                        activation_args=activation_fn[1:])
        books.add_histogram_summary(y, '%s/activations' % y.op.name)
        return input_layer.with_tensor(y, parameters=self.vars)
    def __call__(self,
                 input_layer,
                 size,
                 activation_fn=None,
                 l2loss=None,
                 init=None,
                 stddev=None,
                 bias=True,
                 bias_init=tf.zeros_initializer,
                 transpose_weights=False,
                 name=PROVIDED):
        """Adds the parameters for a fully connected layer and returns a tensor.

    The current head must be a rank 2 Tensor.

    Args:
      input_layer: The Pretty Tensor object, supplied.
      size: The number of neurons
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      init: An optional initialization. If not specified, uses Xavier
        initialization.
      stddev: A standard deviation to use in parameter initialization.
      bias: Set to False to not have a bias.
      bias_init: The initializer for the bias or a Tensor.
      transpose_weights: Flag indicating if weights should be transposed;
        this is useful for loading models with a different shape.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      A Pretty Tensor handle to the layer.
    Raises:
      ValueError: if the head_shape is not rank 2  or the number of input nodes
      (second dim) is not known.
    """
        # TODO(eiderman): bias_init shouldn't take a constant and stddev shouldn't
        # exist.
        if input_layer.get_shape().ndims != 2:
            raise ValueError(
                'fully_connected requires a rank 2 Tensor with known second '
                'dimension: %s' % input_layer.get_shape())
        in_size = input_layer.shape[1]
        if input_layer.shape[1] is None:
            raise ValueError('Number of input nodes must be known.')
        books = input_layer.bookkeeper
        if init is None:
            if stddev is None:
                init = layers.he_init(in_size, size, activation_fn)
            else:
                tf.logging.warning(
                    'Passing `stddev` to initialize weight variable is deprecated and '
                    'will be removed in the future. Pass '
                    'tf.truncated_normal_initializer(stddev=stddev) or '
                    'tf.zeros_initializer to `init` instead.')
                if stddev:
                    init = tf.truncated_normal_initializer(stddev=stddev)
                else:
                    init = tf.zeros_initializer
        elif stddev is not None:
            raise ValueError('Do not set both init and stddev.')
        dtype = input_layer.tensor.dtype
        weight_shape = [size, in_size
                        ] if transpose_weights else [in_size, size]

        params = self.variable('weights', weight_shape, init, dt=dtype)
        y = tf.matmul(input_layer, params, transpose_b=transpose_weights)
        layers.add_l2loss(books, params, l2loss)
        if bias:
            if isinstance(bias_init, tf.compat.real_types):
                bias_init = tf.constant_initializer(bias_init)
            y += self.variable('bias', [size], bias_init, dt=dtype)

        if activation_fn is not None:
            if not isinstance(activation_fn, collections.Sequence):
                activation_fn = (activation_fn, )
            y = layers.apply_activation(books,
                                        y,
                                        activation_fn[0],
                                        activation_args=activation_fn[1:])
        books.add_histogram_summary(y, '%s/activations' % y.op.name)
        return input_layer.with_tensor(y, parameters=self.vars)
Exemplo n.º 7
0
    def __call__(
        self,
        input_layer,
        size,
        activation_fn=None,
        l2loss=None,
        weights=None,
        bias=tf.zeros_initializer(),
        transpose_weights=False,
        phase=prettytensor.Phase.train,
        parameter_modifier=parameters.identity,
        name=PROVIDED,
    ):
        """Adds the parameters for a fully connected layer and returns a tensor.

    The current PrettyTensor must have rank 2.

    Args:
      input_layer: The Pretty Tensor object, supplied.
      size: The number of neurons
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      weights:  An initializer for weights or a Tensor. If not specified,
        uses He's initialization.
      bias: An initializer for the bias or a Tensor. No bias if set to None.
      transpose_weights: Flag indicating if weights should be transposed;
        this is useful for loading models with a different shape.
      phase: The phase of graph construction.  See `pt.Phase`.
      parameter_modifier: A function to modify parameters that is applied after
        creation and before use.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      A Pretty Tensor handle to the layer.
    Raises:
      ValueError: if the Pretty Tensor is not rank 2  or the number of input
        nodes (second dim) is not known.
    """
        if input_layer.get_shape().ndims != 2:
            raise ValueError(
                "fully_connected requires a rank 2 Tensor with known second " "dimension: %s" % input_layer.get_shape()
            )
        in_size = input_layer.shape[1]
        if input_layer.shape[1] is None:
            raise ValueError("Number of input nodes must be known.")
        books = input_layer.bookkeeper
        if weights is None:
            weights = layers.he_init(in_size, size, activation_fn)

        dtype = input_layer.tensor.dtype
        weight_shape = [size, in_size] if transpose_weights else [in_size, size]

        params = parameter_modifier("weights", self.variable("weights", weight_shape, weights, dt=dtype), phase)
        y = tf.matmul(input_layer, params, transpose_b=transpose_weights)
        layers.add_l2loss(books, params, l2loss)
        if bias is not None:
            y += parameter_modifier("bias", self.variable("bias", [size], bias, dt=dtype), phase)

        if activation_fn is not None:
            if not isinstance(activation_fn, collections.Sequence):
                activation_fn = (activation_fn,)
            y = layers.apply_activation(books, y, activation_fn[0], activation_args=activation_fn[1:])
        books.add_histogram_summary(y, "%s/activations" % y.op.name)
        return input_layer.with_tensor(y, parameters=self.vars)
  def __call__(self,
               input_layer,
               kernel,
               channel_multiplier,
               activation_fn=None,
               stride=None,
               l2loss=None,
               weights=None,
               bias=tf.zeros_initializer(),
               edges=PAD_SAME,
               batch_normalize=False,
               phase=prettytensor.Phase.train,
               parameter_modifier=parameters.identity,
               name=PROVIDED):
    """Adds a depth-wise convolution to the stack of operations.

    A depthwise convolution performs the convolutions one channel at a time and
    produces an output with depth `channel_multiplier * input_depth`.

    `kernel` is the patch that will be pooled and it describes the pooling
    along each of the 4 dimensions.  The stride is how big to take each step.

    * scalar (e.g. 3): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 3, 1]`).
    * singleton list (e.g. [3]): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 3, 1]`).
    * list of length 2 (e.g. [3, 2]): Square pooling on the image
        (`[b, c, r, d] = [1, 3, 2, 1]`).

    Args:
      input_layer: The chainable object, supplied.
      kernel: The size of the patch for the pool, either an int or a length 1 or
        2 sequence (if length 1 or int, it is expanded).
      channel_multiplier: Output channels will be a multiple of input channels.
      activation_fn: A tuple of (activation_function, extra_parameters). Any
        function that takes a tensor as its first argument can be used. More
        common functions will have summaries added (e.g. relu).
      stride: The strides as a length 1, 2 or 4 sequence or an integer. If an
        int, length 1 or 2, the stride in the first and last dimensions are 1.
      l2loss: Set to a value greater than 0 to use L2 regularization to decay
        the weights.
      weights:  An initializer for weights or a Tensor. If not specified,
        uses He's initialization.
      bias: An initializer for the bias or a Tensor. No bias if set to None.
      edges: Either `pt.DIM_SAME` to use 0s for the out of bounds area or
        `pt.DIM_VALID` to shrink the output size and only uses valid input
        pixels.
      batch_normalize: Supply a BatchNormalizationArguments to set the
        parameters for batch normalization.
      phase: The phase of graph construction.  See `pt.Phase`.
      parameter_modifier: A function to modify parameters that is applied after
        creation and before use.
      name: The name for this operation is also used to create/find the
        parameter variables.
    Returns:
      Handle to the generated layer.
    Raises:
      ValueError: If input_layer is not a rank 4 tensor or the depth of the
        input (4th dim) is not known.
    """
    if input_layer.get_shape().ndims != 4:
      raise ValueError(
          'depthwise_conv2d requires a rank 4 Tensor with a known depth %s' %
          input_layer.get_shape())
    if input_layer.shape[3] is None:
      raise ValueError('Input depth must be known')
    kernel = _kernel(kernel)
    stride = _stride(stride)
    size = [kernel[0], kernel[1], input_layer.shape[3], channel_multiplier]

    books = input_layer.bookkeeper
    if weights is None:
      patch_size = size[0] * size[1]
      weights = layers.he_init(size[2] * patch_size, size[3] * patch_size,
                               activation_fn)

    dtype = input_layer.tensor.dtype
    params = parameter_modifier(
        'weights',
        self.variable('weights', size, weights, dt=dtype),
        phase)
    y = tf.nn.depthwise_conv2d(input_layer, params, stride, edges)
    layers.add_l2loss(books, params, l2loss)
    if bias is not None:
      y += parameter_modifier(
          'bias',
          self.variable('bias', [input_layer.shape[3] * channel_multiplier],
                        bias,
                        dt=dtype),
          phase)
    books.add_scalar_summary(
        tf.reduce_mean(layers.spatial_slice_zeros(y)),
        '%s/zeros_spatial' % y.op.name)
    y = pretty_tensor_normalization_methods.batch_normalize_with_arguments(
        y, batch_normalize)
    if activation_fn is not None:
      if not isinstance(activation_fn, collections.Sequence):
        activation_fn = (activation_fn,)
      y = layers.apply_activation(books,
                                  y,
                                  activation_fn[0],
                                  activation_args=activation_fn[1:])
    books.add_histogram_summary(y, '%s/activations' % y.op.name)
    return input_layer.with_tensor(y, parameters=self.vars)