Exemplo n.º 1
0
def max_pool(x, name, kernel_size, strides, padding):
    """Max pooling layer on each GPU device.

  Args:
    x: A tensor of size [batch_size, height_in, width_in, channels].
    name: The prefix of tensorflow variables defined in this layer.
    kernel_size: A number indicating the size of pooling kernels.
    strides: A number indicating the stride of the sliding window for
      height and width.
    padding: 'VALID' or 'SAME'.

  Returns:
    A tensor of size [batch_size, height_out, width_out, channels].
  """
    return nn.max_pool(x, name, kernel_size, strides, padding)
def bottleneck(x,
               name,
               filters,
               strides=None,
               dilation=None,
               is_training=True,
               use_global_status=True):
    """Builds the bottleneck module in ResNet.

  This function stack 3 convolutional layers and fuse the output with
  the residual connection.

  Args:
    x: A tensor of size [batch_size, height_in, width_in, channels].
    name: The prefix of tensorflow variables defined in this layer.
    filters: A number indicating the number of output channels.
    strides: A number indicating the stride of the sliding window for
      height and width.
    dilation: A number indicating the dilation factor for height and width.
    is_training: If the tensorflow variables defined in this layer 
      would be used for training.
    use_global_status: enable/disable use_global_status for batch
      normalization. If True, moving mean and moving variance are updated
      by exponential decay.

  Returns:
    A tensor of size [batch_size, height_out, width_out, channels_out].
  """
    if strides is None and dilation is None:
        raise ValueError('None of strides or dilation is specified, ' +
                         'set one of them to 1 or bigger number.')
    elif strides > 1 and dilation is not None and dilation > 1:
        raise ValueError('strides and dilation are both specified, ' +
                         'set one of them to 1 or None.')

    with tf.variable_scope(name) as scope:
        c_i = x.get_shape().as_list()[-1]

        if c_i != filters * 4:
            # Use a convolutional layer as residual connection when the
            # number of input channels is different from output channels.
            shortcut = nn.conv(x,
                               name='shortcut',
                               filters=filters * 4,
                               kernel_size=1,
                               strides=strides,
                               padding='VALID',
                               biased=False,
                               bn=True,
                               relu=False,
                               is_training=is_training,
                               use_global_status=use_global_status)
        elif strides > 1:
            # Use max-pooling as residual connection when the number of
            # input channel is same as output channels, but stride is
            # larger than 1.
            shortcut = nn.max_pool(x,
                                   name='shortcut',
                                   kernel_size=1,
                                   strides=strides,
                                   padding='VALID')
        else:
            # Otherwise, keep the original input as residual connection.
            shortcut = x

        # Build the 1st convolutional layer.
        x = nn.conv(x,
                    name='conv1',
                    filters=filters,
                    kernel_size=1,
                    strides=1,
                    padding='SAME',
                    biased=False,
                    bn=True,
                    relu=True,
                    is_training=is_training,
                    use_global_status=use_global_status)

        if dilation is not None and dilation > 1:
            # If dilation > 1, apply atrous conv to the 2nd convolutional layer.
            x = nn.atrous_conv(x,
                               name='conv2',
                               filters=filters,
                               kernel_size=3,
                               dilation=dilation,
                               padding='SAME',
                               biased=False,
                               bn=True,
                               relu=True,
                               is_training=is_training,
                               use_global_status=use_global_status)
        else:
            padding = 'VALID' if strides > 1 else 'SAME'
            x = nn.conv(x,
                        name='conv2',
                        filters=filters,
                        kernel_size=3,
                        strides=strides,
                        padding=padding,
                        biased=False,
                        bn=True,
                        relu=True,
                        is_training=is_training,
                        use_global_status=use_global_status)

        # Build the 3rd convolutional layer (increase the channels).
        x = nn.conv(x,
                    name='conv3',
                    filters=filters * 4,
                    kernel_size=1,
                    strides=1,
                    padding='SAME',
                    biased=False,
                    bn=True,
                    relu=False,
                    is_training=is_training,
                    use_global_status=use_global_status)

        # Fuse the convolutional outputs with residual connection.
        x = tf.add_n([x, shortcut], name='add')
        x = tf.nn.relu(x, name='relu')

    return x
def resnet_v1(x,
              name,
              filters=[64, 128, 256, 512],
              num_blocks=[3, 4, 23, 3],
              strides=[2, 1, 1, 1],
              dilations=[None, None, 2, 2],
              is_training=True,
              use_global_status=True,
              reuse=False):
    """Helper function to build ResNet.

  Args:
    x: A tensor of size [batch_size, height_in, width_in, channels].
    name: The prefix of tensorflow variables defined in this network.
    filters: A list of numbers indicating the number of output channels
      (The output channels would be 4 times to the numbers).
    strides: A list of numbers indicating the stride of the sliding window for
      height and width.
    dilation: A number indicating the dilation factor for height and width.
    is_training: If the tensorflow variables defined in this layer 
      would be used for training.
    use_global_status: enable/disable use_global_status for batch
      normalization. If True, moving mean and moving variance are updated
      by exponential decay.
    reuse: enable/disable reuse for reusing tensorflow variables. It is 
      useful for sharing weight parameters across two identical networks.

  Returns:
    A tensor of size [batch_size, height_out, width_out, channels_out].
  """
    if len(filters) != len(num_blocks) or len(filters) != len(strides):
        raise ValueError('length of lists are not consistent')

    with tf.variable_scope(name, reuse=reuse) as scope:
        # Build conv1.
        x = nn.conv(x,
                    name='conv1',
                    filters=64,
                    kernel_size=7,
                    strides=2,
                    padding='VALID',
                    biased=False,
                    bn=True,
                    relu=True,
                    is_training=is_training,
                    use_global_status=use_global_status)
        bn = []
        bn.append(x)
        # Build pool1.
        x = nn.max_pool(x,
                        name='pool1',
                        kernel_size=3,
                        strides=2,
                        padding='VALID')

        # Build residual bottleneck blocks.

        for ib in range(len(filters)):
            for iu in range(num_blocks[ib]):
                name_format = 'block{:d}/unit_{:d}/bottleneck_v1'
                block_name = name_format.format(ib + 1, iu + 1)

                c_o = filters[ib]  # output channel
                # Apply strides to the last block.
                s = strides[ib] if iu == num_blocks[ib] - 1 else 1
                d = dilations[ib]
                if iu == num_blocks[ib] - 1:
                    bn.append(x)
                x = bottleneck(x,
                               name=block_name,
                               filters=c_o,
                               strides=s,
                               dilation=d,
                               is_training=is_training,
                               use_global_status=use_global_status)

        return x, bn
Exemplo n.º 4
0
def _unet_builder(x,
                  mask,
                  name,
                  filters=[64, 128, 256, 512, 1024],
                  num_blocks=[2, 3, 3, 3, 3],
                  strides=[2, 2, 2, 2, 2],
                  is_training=True,
                  use_global_status=False,
                  reuse=False):
    """Helper function to construct UNet.
  """
    if len(filters) != len(num_blocks)\
        or len(filters) != len(strides):
        raise ValueError('length of lists are not consistent')

    with tf.variable_scope('Analyzer', reuse=reuse) as scope:
        with tf.name_scope(name):
            input_x = x

            # Encoder.
            shortcuts = []
            not_ignore_masks = []
            for ib in range(len(filters)):
                for iu in range(num_blocks[ib]):
                    name_format = 'layer{:d}/unit_{:d}/encoder/'
                    block_name = name_format.format(ib + 1, iu + 1)
                    c_o = filters[ib]  # output channel

                    # strides at the begginning
                    s = strides[ib] if iu == 0 else 1
                    padding = 'VALID' if s > 1 else 'SAME'
                    if ib == 0 and iu == 0:
                        x = []
                        for ix, in_x in enumerate(input_x):
                            x.append(
                                nn.conv(
                                    in_x,
                                    name=block_name + 'conv{:d}'.format(ix),
                                    filters=int(c_o / 2),
                                    #filters=c_o,
                                    kernel_size=3,
                                    strides=s,
                                    padding=padding,
                                    #biased=False,
                                    #bn=True,
                                    biased=True,
                                    bn=False,
                                    relu=False,
                                    decay=0.99,
                                    is_training=is_training,
                                    use_global_status=use_global_status))
                        x = tf.concat(x, axis=-1, name=block_name + 'concat')
                    else:
                        x = nn.conv(
                            x,
                            name=block_name + 'conv',
                            filters=c_o,
                            kernel_size=3,
                            strides=s,
                            padding=padding,
                            #biased=False,
                            #bn=True,
                            biased=True,
                            bn=False,
                            relu=False,
                            decay=0.99,
                            is_training=is_training,
                            use_global_status=use_global_status)

                    if iu == 0:
                        mask = nn.max_pool(mask,
                                           block_name + 'mask_pool',
                                           3,
                                           s,
                                           padding=padding)
                        not_ignore_masks.append(1 - mask)
                    f = tf.multiply(x,
                                    not_ignore_masks[-1],
                                    name=block_name + 'masked_conv')
                    tf.add_to_collection('Analyzer/features', f)
                    x = tf.nn.relu(x)
                print(x)
                shortcuts.append(x)

            # Decoder.
            for ib in range(len(shortcuts) - 1, 0, -1):
                for iu in range(num_blocks[ib - 1]):
                    n, h, w, c_o = shortcuts[ib - 1].get_shape().as_list()
                    name_format = 'layer{:d}/unit_{:d}/decoder/'
                    block_name = name_format.format(2 * len(filters) - ib,
                                                    iu + 1)
                    x = nn.conv(
                        x,
                        name=block_name + 'conv',
                        filters=c_o,
                        kernel_size=3,
                        strides=1,
                        padding='SAME',
                        #biased=False,
                        #bn=True,
                        biased=True,
                        bn=False,
                        relu=False,
                        decay=0.99,
                        is_training=is_training,
                        use_global_status=use_global_status)

                    f = tf.multiply(x,
                                    not_ignore_masks[ib],
                                    name=block_name + 'masked_conv')
                    tf.add_to_collection('Analyzer/features', f)
                    x = tf.nn.relu(x)
                    if iu == 0:
                        x = tf.image.resize_bilinear(x, [h, w])
                        x = tf.concat([x, shortcuts[ib - 1]], axis=-1)
                print(x)

            c_i = 0
            for in_x in input_x:
                c_i += in_x.get_shape().as_list()[-1]
            x = nn.conv(x,
                        name='block5/fc',
                        filters=c_i,
                        kernel_size=1,
                        strides=1,
                        padding='SAME',
                        biased=True,
                        bn=False,
                        relu=False,
                        is_training=is_training)
            x = tf.image.resize_bilinear(x, tf.shape(input_x[0])[1:3])
            tf.add_to_collection('Analyzer/outputs', x)
        return x