Exemplo n.º 1
0
    def AddFCLayer(self, prev_layer, index):
        """Parse expression and add Fully Connected Layer.

    Args:
      prev_layer: Input tensor.
      index:      Position in model_str to start parsing

    Returns:
      Output tensor, end index in model_str.
    """
        pattern = re.compile(R'(F)(s|t|r|l|m)({\w+})?(\d+)')
        m = pattern.match(self.model_str, index)
        if m is None:
            return None, None
        fn = self._NonLinearity(m.group(2))
        name = self._GetLayerName(m.group(0), index, m.group(3))
        depth = int(m.group(4))
        input_depth = shapes.tensor_dim(prev_layer, 1) * shapes.tensor_dim(
            prev_layer, 2) * shapes.tensor_dim(prev_layer, 3)
        # The slim fully connected is actually a 1x1 conv, so we have to crush the
        # dimensions on input.
        # Everything except batch goes to depth, and therefore has to be known.
        shaped = tf.reshape(
            prev_layer, [-1, input_depth], name=name + '_reshape_in')
        output = slim.fully_connected(shaped, depth, activation_fn=fn, scope=name)
        # Width and height are collapsed to 1.
        self.reduction_factors[1] = None
        self.reduction_factors[2] = None
        return tf.reshape(
            output, [shapes.tensor_dim(prev_layer, 0), 1, 1, depth],
            name=name + '_reshape_out'), m.end()
Exemplo n.º 2
0
    def _AddOutputs(self, prev_layer, out_dims, out_func, num_classes):
        """Adds the output layer and loss function.

    Args:
      prev_layer:  Output of last layer of main network.
      out_dims:    Number of output dimensions, 0, 1 or 2.
      out_func:    Output non-linearity. 's' or 'c'=softmax, 'l'=logistic.
      num_classes: Number of outputs/size of last output dimension.
    """
        height_in = shapes.tensor_dim(prev_layer, dim=1)
        logits, outputs = self._AddOutputLayer(prev_layer, out_dims, out_func,
                                               num_classes)
        if self.mode == 'train':
            # Setup loss for training.
            self.loss = self._AddLossFunction(logits, height_in, out_dims,
                                              out_func)
            tf.summary.scalar('loss', self.loss)
        elif out_dims == 0:
            # Be sure the labels match the output, even in eval mode.
            self.labels = tf.slice(self.labels, [0, 0], [-1, 1])
            self.labels = tf.reshape(self.labels, [-1])

        logging.info('Final output=%s', outputs)
        logging.info('Labels tensor=%s', self.labels)
        self.output = outputs
Exemplo n.º 3
0
    def _AddOutputLayer(self, prev_layer, out_dims, out_func, num_classes):
        """Add the fully-connected logits and SoftMax/Logistic output Layer.

    Args:
      prev_layer:  Output of last layer of main network.
      out_dims:    Number of output dimensions, 0, 1 or 2.
      out_func:    Output non-linearity. 's' or 'c'=softmax, 'l'=logistic.
      num_classes: Number of outputs/size of last output dimension.

    Returns:
      logits:  Pre-softmax/logistic fully-connected output shaped to out_dims.
      outputs: Post-softmax/logistic shaped to out_dims.

    Raises:
      ValueError: if syntax is incorrect.
    """
        # Reduce dimensionality appropriate to the output dimensions.
        batch_in = shapes.tensor_dim(prev_layer, dim=0)
        height_in = shapes.tensor_dim(prev_layer, dim=1)
        width_in = shapes.tensor_dim(prev_layer, dim=2)
        depth_in = shapes.tensor_dim(prev_layer, dim=3)
        if out_dims:
            # Combine any remaining height and width with batch and unpack after.
            shaped = tf.reshape(prev_layer, [-1, depth_in])
        else:
            # Everything except batch goes to depth, and therefore has to be known.
            shaped = tf.reshape(prev_layer,
                                [-1, height_in * width_in * depth_in])
        logits = slim.fully_connected(shaped, num_classes, activation_fn=None)
        if out_func == 'l':
            raise ValueError('Logistic not yet supported!')
        else:
            output = tf.nn.softmax(logits)
        # Reshape to the dessired output.
        if out_dims == 2:
            output_shape = [batch_in, height_in, width_in, num_classes]
        elif out_dims == 1:
            output_shape = [batch_in, height_in * width_in, num_classes]
        else:
            output_shape = [batch_in, num_classes]
        output = tf.reshape(output, output_shape, name='Output')
        logits = tf.reshape(logits, output_shape)
        return logits, output
Exemplo n.º 4
0
    def _AddLossFunction(self, logits, height_in, out_dims, out_func):
        """Add the appropriate loss function.

    Args:
      logits:  Pre-softmax/logistic fully-connected output shaped to out_dims.
      height_in:  Height of logits before going into the softmax layer.
      out_dims:   Number of output dimensions, 0, 1 or 2.
      out_func:   Output non-linearity. 's' or 'c'=softmax, 'l'=logistic.

    Returns:
      loss: That which is to be minimized.

    Raises:
      ValueError: if logistic is used.
    """
        if out_func == 'c':
            # Transpose batch to the middle.
            ctc_input = tf.transpose(logits, [1, 0, 2])
            # Compute the widths of each batch element from the input widths.
            widths = self.layers.GetLengths(dim=2, factor=height_in)
            cross_entropy = tf.nn.ctc_loss(ctc_input, self.sparse_labels,
                                           widths)
        elif out_func == 's':
            if out_dims == 2:
                self.labels = _PadLabels3d(logits, self.labels)
            elif out_dims == 1:
                self.labels = _PadLabels2d(shapes.tensor_dim(logits, dim=1),
                                           self.labels)
            else:
                self.labels = tf.slice(self.labels, [0, 0], [-1, 1])
                self.labels = tf.reshape(self.labels, [-1])
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=self.labels, name='xent')
        else:
            # TODO(rays) Labels need an extra dimension for logistic, so different
            # padding functions are needed, as well as a different loss function.
            raise ValueError('Logistic not yet supported!')
        return tf.reduce_sum(cross_entropy)
Exemplo n.º 5
0
    def _LSTMLayer(self, prev_layer, direction, dim, summarize, depth, name):
        """Adds an LSTM layer with the given pre-parsed attributes.

    Always maps 4-D to 4-D regardless of summarize.
    Args:
      prev_layer: Input tensor.
      direction:  'forward' 'backward' or 'bidirectional'
      dim:        'x' or 'y', dimension to consider as time.
      summarize:  True if we are to return only the last timestep.
      depth:      Output depth.
      name:       Some string naming the op.

    Returns:
      Output tensor.
    """
        # If the target dimension is y, we need to transpose.
        if dim == 'x':
            lengths = self.GetLengths(2, 1)
            inputs = prev_layer
        else:
            lengths = self.GetLengths(1, 1)
            inputs = tf.transpose(prev_layer, [0, 2, 1, 3], name=name + '_ytrans_in')
        input_batch = shapes.tensor_dim(inputs, 0)
        num_slices = shapes.tensor_dim(inputs, 1)
        num_steps = shapes.tensor_dim(inputs, 2)
        input_depth = shapes.tensor_dim(inputs, 3)
        # Reshape away the other dimension.
        inputs = tf.reshape(
            inputs, [-1, num_steps, input_depth], name=name + '_reshape_in')
        # We need to replicate the lengths by the size of the other dimension, and
        # any changes that have been made to the batch dimension.
        tile_factor = tf.to_float(input_batch *
                                  num_slices) / tf.to_float(tf.shape(lengths)[0])
        lengths = tf.tile(lengths, [tf.cast(tile_factor, tf.int32)])
        lengths = tf.cast(lengths, tf.int64)
        outputs = nn_ops.rnn_helper(
            inputs,
            lengths,
            cell_type='lstm',
            num_nodes=depth,
            direction=direction,
            name=name,
            stddev=0.1)
        # Output depth is doubled if bi-directional.
        if direction == 'bidirectional':
            output_depth = depth * 2
        else:
            output_depth = depth
        # Restore the other dimension.
        if summarize:
            outputs = tf.slice(
                outputs, [0, num_steps - 1, 0], [-1, 1, -1], name=name + '_sum_slice')
            outputs = tf.reshape(
                outputs, [input_batch, num_slices, 1, output_depth],
                name=name + '_reshape_out')
        else:
            outputs = tf.reshape(
                outputs, [input_batch, num_slices, num_steps, output_depth],
                name=name + '_reshape_out')
        if dim == 'y':
            outputs = tf.transpose(outputs, [0, 2, 1, 3], name=name + '_ytrans_out')
        return outputs
Exemplo n.º 6
0
def lstm_layer(inp,
               length=None,
               state=None,
               memory=None,
               num_nodes=None,
               backward=False,
               clip=50.0,
               reg_func=tf.nn.l2_loss,
               weight_reg=False,
               weight_collection="LSTMWeights",
               bias_reg=False,
               stddev=None,
               seed=None,
               decode=False,
               use_native_weights=False,
               name=None):
    """Adds ops for an LSTM layer.

  This adds ops for the following operations:

    input => (forward-LSTM|backward-LSTM) => output

  The direction of the LSTM is determined by `backward`. If it is false, the
  forward LSTM is used, the backward one otherwise.

  Args:
    inp: A 3-D tensor of shape [`batch_size`, `max_length`, `feature_dim`].
    length: A 1-D tensor of shape [`batch_size`] and type int64. Each element
            represents the length of the corresponding sequence in `inp`.
    state: If specified, uses it as the initial state.
    memory: If specified, uses it as the initial memory.
    num_nodes: The number of LSTM cells.
    backward: If true, reverses the `inp` before adding the ops. The output is
              also reversed so that the direction is the same as `inp`.
    clip: Value used to clip the cell values.
    reg_func: Function used for the weight regularization such as
              `tf.nn.l2_loss`.
    weight_reg: If true, regularize the filter weights with `reg_func`.
    weight_collection: Collection to add the weights to for regularization.
    bias_reg: If true, regularize the bias vector with `reg_func`.
    stddev: Standard deviation used to initialize the variables.
    seed: Seed used to initialize the variables.
    decode: If true, does not add ops which are not used for inference.
    use_native_weights: If true, uses weights in the same format as the native
                        implementations.
    name: Name of the op.

  Returns:
    A 3-D tensor of shape [`batch_size`, `max_length`, `num_nodes`].
  """
    with tf.variable_scope(name):
        if backward:
            if length is None:
                inp = tf.reverse(inp, [1])
            else:
                inp = tf.reverse_sequence(inp, length, 1, 0)

        num_prev = inp.get_shape()[2]
        if stddev:
            initializer = tf.truncated_normal_initializer(stddev=stddev,
                                                          seed=seed)
        else:
            initializer = tf.uniform_unit_scaling_initializer(seed=seed)

        if use_native_weights:
            with tf.variable_scope("LSTMCell"):
                w = tf.get_variable(
                    "W_0",
                    shape=[num_prev + num_nodes, 4 * num_nodes],
                    initializer=initializer,
                    dtype=tf.float32)
                w_i_m = tf.slice(w, [0, 0], [num_prev, 4 * num_nodes],
                                 name="w_i_m")
                w_m_m = tf.reshape(tf.slice(w, [num_prev, 0],
                                            [num_nodes, 4 * num_nodes]),
                                   [num_nodes, 4, num_nodes],
                                   name="w_m_m")
        else:
            w_i_m = tf.get_variable("w_i_m", [num_prev, 4 * num_nodes],
                                    initializer=initializer)
            w_m_m = tf.get_variable("w_m_m", [num_nodes, 4, num_nodes],
                                    initializer=initializer)

        if not decode and weight_reg:
            tf.add_to_collection(weight_collection,
                                 reg_func(w_i_m, name="w_i_m_reg"))
            tf.add_to_collection(weight_collection,
                                 reg_func(w_m_m, name="w_m_m_reg"))

        batch_size = shapes.tensor_dim(inp, dim=0)
        num_frames = shapes.tensor_dim(inp, dim=1)
        prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev]))

        if use_native_weights:
            with tf.variable_scope("LSTMCell"):
                b = tf.get_variable("B",
                                    shape=[4 * num_nodes],
                                    initializer=tf.zeros_initializer(),
                                    dtype=tf.float32)
            biases = tf.identity(b, name="biases")
        else:
            biases = tf.get_variable("biases", [4 * num_nodes],
                                     initializer=tf.constant_initializer(0.0))
        if not decode and bias_reg:
            tf.add_to_collection(weight_collection,
                                 reg_func(biases, name="biases_reg"))
        prev = tf.nn.xw_plus_b(prev, w_i_m, biases)

        prev = tf.reshape(prev,
                          tf.stack([batch_size, num_frames, 4, num_nodes]))
        if state is None:
            state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)
        if memory is None:
            memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)

        out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip)

        if backward:
            if length is None:
                out = tf.reverse(out, [1])
            else:
                out = tf.reverse_sequence(out, length, 1, 0)

    return out, mem