Exemplo n.º 1
0
def update_network_states(comp, tensors, network_states, stride):
  """Stores Tensor objects corresponding to layer outputs.

  For use in subsequent tasks.

  Args:
    comp: Component for which the tensor handles are being stored.
    tensors: list of Tensors to store
    network_states: dictionary of component NetworkState objects
    stride: stride of the stored tensor.
  """
  network_state = network_states[comp.name]
  with tf.name_scope(comp.name + '/stored_act'):
    for index, network_tensor in enumerate(tensors):
      network_state.activations[comp.network.layers[index].name] = (
          network_units.StoredActivations(tensor=network_tensor, stride=stride,
                                          dim=comp.network.layers[index].dim))
Exemplo n.º 2
0
  def build_greedy_training(self, state, network_states):
    """Builds a training loop for this component.

    This loop repeatedly evaluates the network and computes the loss, but it
    does not advance using the predictions of the network. Instead, it advances
    using the oracle defined in the underlying transition system. The final
    state will always correspond to the gold annotation.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
        underlying master to this component.
      network_states: NetworkState object containing component TensorArrays.

    Returns:
      (state, cost, correct, total) -- These are TF ops corresponding to
      the final state after unrolling, the total cost, the total number of
      correctly predicted actions, and the total number of actions.
    """
    logging.info('Building component: %s', self.spec.name)
    stride = state.current_batch_size * self.training_beam_size

    cost = tf.constant(0.)
    correct = tf.constant(0)
    total = tf.constant(0)

    # Create the TensorArray's to store activations for downstream/recurrent
    # connections.
    def cond(handle, *_):
      all_final = dragnn_ops.emit_all_final(handle, component=self.name)
      return tf.logical_not(tf.reduce_all(all_final))

    def body(handle, cost, correct, total, *arrays):
      """Runs the network and advances the state by a step."""

      with tf.control_dependencies([handle, cost, correct, total] +
                                   [x.flow for x in arrays]):
        # Get a copy of the network inside this while loop.
        updated_state = MasterState(handle, state.current_batch_size)
        network_tensors = self._feedforward_unit(
            updated_state, arrays, network_states, stride, during_training=True)

        # Every layer is written to a TensorArray, so that it can be backprop'd.
        next_arrays = update_tensor_arrays(network_tensors, arrays)
        with tf.control_dependencies([x.flow for x in next_arrays]):
          with tf.name_scope('compute_loss'):
            # A gold label > -1 determines that the sentence is still
            # in a valid state. Otherwise, the sentence has ended.
            #
            # We add only the valid sentences to the loss, in the following way:
            #   1. We compute 'valid_ix', the indices in gold that contain
            #      valid oracle actions.
            #   2. We compute the cost function by comparing logits and gold
            #      only for the valid indices.
            gold = dragnn_ops.emit_oracle_labels(handle, component=self.name)
            gold.set_shape([None])
            valid = tf.greater(gold, -1)
            valid_ix = tf.reshape(tf.where(valid), [-1])
            gold = tf.gather(gold, valid_ix)

            logits = self.network.get_logits(network_tensors)
            logits = tf.gather(logits, valid_ix)

            cost += tf.reduce_sum(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.cast(gold, tf.int64), logits=logits))

            if (self.eligible_for_self_norm and
                self.master.hyperparams.self_norm_alpha > 0):
              log_z = tf.reduce_logsumexp(logits, [1])
              cost += (self.master.hyperparams.self_norm_alpha *
                       tf.nn.l2_loss(log_z))

            correct += tf.reduce_sum(
                tf.to_int32(tf.nn.in_top_k(logits, gold, 1)))
            total += tf.size(gold)

        with tf.control_dependencies([cost, correct, total, gold]):
          handle = dragnn_ops.advance_from_oracle(handle, component=self.name)
        return [handle, cost, correct, total] + next_arrays

    with tf.name_scope(self.name + '/train_state'):
      init_arrays = []
      for layer in self.network.layers:
        init_arrays.append(layer.create_array(state.current_batch_size))

    output = tf.while_loop(
        cond,
        body, [state.handle, cost, correct, total] + init_arrays,
        name='train_%s' % self.name)

    # Saves completed arrays and return final state and cost.
    state.handle = output[0]
    correct = output[2]
    total = output[3]
    arrays = output[4:]
    cost = output[1]

    # Store handles to the final output for use in subsequent tasks.
    network_state = network_states[self.name]
    with tf.name_scope(self.name + '/stored_act'):
      for index, layer in enumerate(self.network.layers):
        network_state.activations[layer.name] = network_units.StoredActivations(
            array=arrays[index])

    # Normalize the objective by the total # of steps taken.
    with tf.control_dependencies([tf.assert_greater(total, 0)]):
      cost /= tf.to_float(total)

    # Adds regularization for the hidden weights.
    cost = self.add_regularizer(cost)

    with tf.control_dependencies([x.flow for x in arrays]):
      return tf.identity(state.handle), cost, correct, total
Exemplo n.º 3
0
  def build_greedy_inference(self, state, network_states,
                             during_training=False):
    """Builds an inference loop for this component.

    Repeatedly evaluates the network and advances the underlying state according
    to the predicted scores.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
        underlying master to this component.
      network_states: NetworkState object containing component TensorArrays.
      during_training: whether the graph is being constructed during training

    Returns:
      Handle to the state once inference is complete for this Component.
    """
    logging.info('Building component: %s', self.spec.name)
    if during_training:
      stride = state.current_batch_size * self.training_beam_size
    else:
      stride = state.current_batch_size * self.inference_beam_size

    def cond(handle, *_):
      all_final = dragnn_ops.emit_all_final(handle, component=self.name)
      return tf.logical_not(tf.reduce_all(all_final))

    def body(handle, *arrays):
      """Runs the network and advances the state by a step."""

      with tf.control_dependencies([handle] + [x.flow for x in arrays]):
        # Get a copy of the network inside this while loop.
        updated_state = MasterState(handle, state.current_batch_size)
        network_tensors = self._feedforward_unit(
            updated_state,
            arrays,
            network_states,
            stride,
            during_training=during_training)
        next_arrays = update_tensor_arrays(network_tensors, arrays)
        with tf.control_dependencies([x.flow for x in next_arrays]):
          logits = self.network.get_logits(network_tensors)
          logits = tf.cond(self.locally_normalize,
                           lambda: tf.nn.log_softmax(logits), lambda: logits)
          handle = dragnn_ops.advance_from_prediction(
              handle, logits, component=self.name)
        return [handle] + next_arrays

    # Create the TensorArray's to store activations for downstream/recurrent
    # connections.
    with tf.name_scope(self.name + '/inference_state'):
      init_arrays = []
      for layer in self.network.layers:
        init_arrays.append(layer.create_array(stride))
    output = tf.while_loop(
        cond,
        body, [state.handle] + init_arrays,
        name='inference_%s' % self.name)

    # Saves completed arrays and returns final state.
    state.handle = output[0]
    arrays = output[1:]
    network_state = network_states[self.name]
    with tf.name_scope(self.name + '/stored_act'):
      for index, layer in enumerate(self.network.layers):
        network_state.activations[layer.name] = network_units.StoredActivations(
            array=arrays[index])
    with tf.control_dependencies([x.flow for x in arrays]):
      return tf.identity(state.handle)
Exemplo n.º 4
0
    def build_greedy_training(self, state, network_states):
        """Builds a training loop for this component.

    This loop repeatedly evaluates the network and computes the loss, but it
    does not advance using the predictions of the network. Instead, it advances
    using the oracle defined in the underlying transition system. The final
    state will always correspond to the gold annotation.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
        underlying master to this component.
      network_states: NetworkState object containing component TensorArrays.

    Returns:
      (state, cost, correct, total) -- These are TF ops corresponding to
      the final state after unrolling, the total cost, the total number of
      correctly predicted actions, and the total number of actions.
    """
        LOGGING.info('Building component: %s', self.spec.name)
        # Add 0 to training_beam_size to disable eager static evaluation.
        # This is possible because tensorflow's constant_value does not
        # propagate arithmetic operations.
        with tf.control_dependencies(
            [tf.assert_equal(self.training_beam_size + 0, 1)]):
            stride = state.current_batch_size * self.training_beam_size
        self.network.pre_create(stride)

        cost = tf.constant(0.)
        correct = tf.constant(0)
        total = tf.constant(0)

        def cond(handle, *_):
            all_final = dragnn_ops.emit_all_final(handle, component=self.name)
            return tf.logical_not(tf.reduce_all(all_final))

        def body(handle, cost, correct, total, *arrays):
            """Runs the network and advances the state by a step."""

            with tf.control_dependencies([handle, cost, correct, total] +
                                         [x.flow for x in arrays]):
                # Get a copy of the network inside this while loop.
                updated_state = MasterState(handle, state.current_batch_size)
                network_tensors = self._feedforward_unit(updated_state,
                                                         arrays,
                                                         network_states,
                                                         stride,
                                                         during_training=True)

                # Every layer is written to a TensorArray, so that it can be backprop'd.
                next_arrays = update_tensor_arrays(network_tensors, arrays)
                loss_function = self.attr('loss_function')
                with tf.control_dependencies([x.flow for x in next_arrays]):
                    with tf.name_scope('compute_loss'):
                        logits = self.network.get_logits(network_tensors)
                        if loss_function == 'softmax_cross_entropy':
                            gold = dragnn_ops.emit_oracle_labels(
                                handle, component=self.name)
                            new_cost, new_correct, new_total, valid_logits, valid_gold = (
                                build_softmax_cross_entropy_loss(logits, gold))

                            if (self.eligible_for_self_norm
                                    and self.master.hyperparams.self_norm_alpha
                                    > 0):
                                log_z = tf.reduce_logsumexp(valid_logits, [1])
                                new_cost += (
                                    self.master.hyperparams.self_norm_alpha *
                                    tf.nn.l2_loss(log_z))
                        elif loss_function == 'sigmoid_cross_entropy':
                            indices, gold, probs = (
                                dragnn_ops.
                                emit_oracle_labels_and_probabilities(
                                    handle, component=self.name))
                            new_cost, new_correct, new_total, valid_gold = (
                                build_sigmoid_cross_entropy_loss(
                                    logits, gold, indices, probs))
                        else:
                            RuntimeError("Unknown loss function '%s'" %
                                         loss_function)

                        cost += new_cost
                        correct += new_correct
                        total += new_total

                with tf.control_dependencies(
                    [cost, correct, total, valid_gold]):
                    handle = dragnn_ops.advance_from_oracle(
                        handle, component=self.name)
                return [handle, cost, correct, total] + next_arrays

        with tf.name_scope(self.name + '/train_state'):
            init_arrays = []
            for layer in self.network.layers:
                init_arrays.append(layer.create_array(
                    state.current_batch_size))

        output = tf.while_loop(cond,
                               body, [state.handle, cost, correct, total] +
                               init_arrays,
                               name='train_%s' % self.name)

        # Saves completed arrays and return final state and cost.
        state.handle = output[0]
        cost = output[1]
        correct = output[2]
        total = output[3]
        arrays = output[4:]

        # Store handles to the final output for use in subsequent tasks.
        network_state = network_states[self.name]
        with tf.name_scope(self.name + '/stored_act'):
            for index, layer in enumerate(self.network.layers):
                network_state.activations[
                    layer.name] = network_units.StoredActivations(
                        array=arrays[index])

        # Normalize the objective by the total # of steps taken.
        # Note: Total could be zero by a number of reasons, including:
        #   * Oracle labels not being emitted.
        #   * All oracle labels for a batch are unknown (-1).
        #   * No steps being taken if component is terminal at the start of a batch.
        with tf.control_dependencies([tf.assert_greater(total, 0)]):
            cost /= tf.to_float(total)

        # Adds regularization for the hidden weights.
        cost = self.add_regularizer(cost)

        with tf.control_dependencies([x.flow for x in arrays]):
            return tf.identity(state.handle), cost, correct, total