示例#1
0
    def __init__(self, component):
        """Initializes layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        layers = [
            network_units.Layer(self, 'lengths', -1),
            network_units.Layer(self, 'scores', -1),
            network_units.Layer(self, 'logits', -1),
            network_units.Layer(self, 'arcs', -1),
        ]
        super(MstSolverNetwork, self).__init__(component, init_layers=layers)

        self._attrs = network_units.get_attrs_with_defaults(
            component.spec.network_unit.parameters,
            defaults={
                'forest': False,
                'loss': 'softmax',
                'crf_max_dynamic_range': 20,
            })

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('lengths', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('scores', self._linked_feature_dims,
                 'Missing required linked feature')
示例#2
0
def CombineArcAndRootPotentials(arcs, roots):
    """Combines arc and root potentials into a single set of potentials.

  Args:
    arcs: [B,N,N] tensor of batched arc potentials.
    roots: [B,N] matrix of batched root potentials.

  Returns:
    [B,N,N] tensor P of combined potentials where
      P_{b,s,t} = s == t ? roots[b,t] : arcs[b,s,t]
  """
    # All arguments must have statically-known rank.
    check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3')
    check.Eq(roots.get_shape().ndims, 2, 'roots must be a matrix')

    # All arguments must share the same type.
    dtype = arcs.dtype.base_dtype
    check.Same([dtype, roots.dtype.base_dtype], 'dtype mismatch')

    roots_shape = tf.shape(roots)
    arcs_shape = tf.shape(arcs)
    batch_size = roots_shape[0]
    num_tokens = roots_shape[1]
    with tf.control_dependencies([
            tf.assert_equal(batch_size, arcs_shape[0]),
            tf.assert_equal(num_tokens, arcs_shape[1]),
            tf.assert_equal(num_tokens, arcs_shape[2])
    ]):
        return tf.matrix_set_diag(arcs, roots)
示例#3
0
def calculate_parse_metrics(gold_corpus, annotated_corpus):
  """Calculate POS/UAS/LAS accuracy based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_tokens = 0
  num_correct_pos = 0
  num_correct_uas = 0
  num_correct_las = 0
  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    check.Eq(len(gold.token), len(annotated.token), 'Tokens are not aligned')
    tokens = zip(gold.token, annotated.token)
    num_tokens += len(tokens)
    num_correct_pos += sum(1 for x, y in tokens if x.tag == y.tag)
    num_correct_uas += sum(1 for x, y in tokens if x.head == y.head)
    num_correct_las += sum(1 for x, y in tokens
                           if x.head == y.head and x.label == y.label)

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total num tokens: %d', num_tokens)
  pos = num_correct_pos * 100.0 / num_tokens
  uas = num_correct_uas * 100.0 / num_tokens
  las = num_correct_las * 100.0 / num_tokens
  tf.logging.info('POS: %.2f%%', pos)
  tf.logging.info('UAS: %.2f%%', uas)
  tf.logging.info('LAS: %.2f%%', las)
  return pos, uas, las
示例#4
0
def RootPotentialsFromTokens(root, tokens, weights):
    r"""Returns root selection potentials computed from tokens and weights.

  For each batch of token activations, computes a scalar potential for each root
  selection as the 3-way product between the activations of the artificial root
  token, the token activations, and the |weights|.  Specifically,

    roots[b,r] = \sum_{i,j} root[i] * weights[i,j] * tokens[b,r,j]

  Args:
    root: [S] vector of activations for the artificial root token.
    tokens: [B,N,T] tensor of batched activations for root tokens.
    weights: [S,T] matrix of weights.

    B,N may be statically-unknown, but S,T must be statically-known.  The dtype
    of all arguments must be compatible.

  Returns:
    [B,N] matrix R of root-selection potentials as defined above.  The dtype of
    R is the same as that of the arguments.
  """
    # All arguments must have statically-known rank.
    check.Eq(root.get_shape().ndims, 1, 'root must be a vector')
    check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3')
    check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix')

    # All activation dimensions must be statically-known.
    num_source_activations = weights.get_shape().as_list()[0]
    num_target_activations = weights.get_shape().as_list()[1]
    check.NotNone(num_source_activations,
                  'unknown source activation dimension')
    check.NotNone(num_target_activations,
                  'unknown target activation dimension')
    check.Eq(root.get_shape().as_list()[0], num_source_activations,
             'dimension mismatch between weights and root')
    check.Eq(tokens.get_shape().as_list()[2], num_target_activations,
             'dimension mismatch between weights and tokens')

    # All arguments must share the same type.
    check.Same([
        weights.dtype.base_dtype, root.dtype.base_dtype,
        tokens.dtype.base_dtype
    ], 'dtype mismatch')

    root_1xs = tf.expand_dims(root, 0)

    tokens_shape = tf.shape(tokens)
    batch_size = tokens_shape[0]
    num_tokens = tokens_shape[1]

    # Flatten out the batch dimension so we can use a couple big matmuls.
    tokens_bnxt = tf.reshape(tokens, [-1, num_target_activations])
    weights_targets_bnxs = tf.matmul(tokens_bnxt, weights, transpose_b=True)
    roots_1xbn = tf.matmul(root_1xs, weights_targets_bnxs, transpose_b=True)

    # Restore the batch dimension in the output.
    roots_bxn = tf.reshape(roots_1xbn, [batch_size, num_tokens])
    return roots_bxn
示例#5
0
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineLabelNetwork, self).__init__(component)

        parameters = component.spec.network_unit.parameters
        self._num_labels = int(parameters['num_labels'])

        check.Gt(self._num_labels, 0, 'Expected some labels')
        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')

        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        # TODO(googleuser): Make parameter initialization configurable.
        self._weights = []
        self._weights.append(
            tf.get_variable(
                'weights_pair',
                [self._num_labels, self._source_dim, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_source', [self._num_labels, self._source_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_target', [self._num_labels, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._biases = []
        self._biases.append(
            tf.get_variable(
                'biases', [self._num_labels], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._params.extend(self._weights + self._biases)
        self._regularized_weights.extend(self._weights)

        self._layers.append(
            network_units.Layer(self, 'labels', self._num_labels))
示例#6
0
def calculate_segmentation_metrics(gold_corpus, annotated_corpus):
    """Calculate precision/recall/f1 based on gold and annotated sentences."""
    check.Eq(len(gold_corpus), len(annotated_corpus),
             'Corpora are not aligned')
    num_gold_tokens = 0
    num_test_tokens = 0
    num_correct_tokens = 0

    def token_span(token):
        check.Ge(token.end, token.start)
        return (token.start, token.end)

    def ratio(numerator, denominator):
        check.Ge(numerator, 0)
        check.Ge(denominator, 0)
        if denominator > 0:
            return numerator / denominator
        elif numerator == 0:
            return 0.0  # map 0/0 to 0
        else:
            return float('inf')  # map x/0 to inf

    for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
        gold = sentence_pb2.Sentence()
        annotated = sentence_pb2.Sentence()
        gold.ParseFromString(gold_str)
        annotated.ParseFromString(annotated_str)
        check.Eq(gold.text, annotated.text, 'Text is not aligned')
        gold_spans = set()
        test_spans = set()
        for token in gold.token:
            check.NotIn(token_span(token), gold_spans, 'Duplicate token')
            gold_spans.add(token_span(token))
        for token in annotated.token:
            check.NotIn(token_span(token), test_spans, 'Duplicate token')
            test_spans.add(token_span(token))
        num_gold_tokens += len(gold_spans)
        num_test_tokens += len(test_spans)
        num_correct_tokens += len(gold_spans.intersection(test_spans))

    tf.logging.info('Total num documents: %d', len(annotated_corpus))
    tf.logging.info('Total gold tokens: %d', num_gold_tokens)
    tf.logging.info('Total test tokens: %d', num_test_tokens)
    precision = 100 * ratio(num_correct_tokens, num_test_tokens)
    recall = 100 * ratio(num_correct_tokens, num_gold_tokens)
    f1 = ratio(2 * precision * recall, precision + recall)
    tf.logging.info('Precision: %.2f%%', precision)
    tf.logging.info('Recall: %.2f%%', recall)
    tf.logging.info('F1: %.2f%%', f1)

    return round(precision, 2), round(recall, 2), round(f1, 2)
示例#7
0
def ArcSourcePotentialsFromTokens(tokens, weights):
    r"""Returns arc source potentials computed from tokens and weights.

  For each batch of token activations, computes a scalar potential for each arc
  as the product between the activations of the source token and the |weights|.
  Specifically,

    arc[b,s,:] = \sum_{i} weights[i] * tokens[b,s,i]

  Args:
    tokens: [B,N,S] tensor of batched activations for source tokens.
    weights: [S] vector of weights.

    B,N may be statically-unknown, but S must be statically-known.  The dtype of
    all arguments must be compatible.

  Returns:
    [B,N,N] tensor A of arc potentials as defined above.  The dtype of A is the
    same as that of the arguments.  Note that the diagonal entries (i.e., where
    s==t) represent self-loops and may not be meaningful.
  """
    # All arguments must have statically-known rank.
    check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3')
    check.Eq(weights.get_shape().ndims, 1, 'weights must be a vector')

    # All activation dimensions must be statically-known.
    num_source_activations = weights.get_shape().as_list()[0]
    check.NotNone(num_source_activations,
                  'unknown source activation dimension')
    check.Eq(tokens.get_shape().as_list()[2], num_source_activations,
             'dimension mismatch between weights and tokens')

    # All arguments must share the same type.
    check.Same([weights.dtype.base_dtype, tokens.dtype.base_dtype],
               'dtype mismatch')

    tokens_shape = tf.shape(tokens)
    batch_size = tokens_shape[0]
    num_tokens = tokens_shape[1]

    # Flatten out the batch dimension so we can use a couple big matmuls.
    tokens_bnxs = tf.reshape(tokens, [-1, num_source_activations])
    weights_sx1 = tf.expand_dims(weights, 1)
    sources_bnx1 = tf.matmul(tokens_bnxs, weights_sx1)
    sources_bnxn = tf.tile(sources_bnx1, [1, num_tokens])

    # Restore the batch dimension in the output.
    sources_bxnxn = tf.reshape(sources_bnxn,
                               [batch_size, num_tokens, num_tokens])
    return sources_bxnxn
示例#8
0
def generate_target_per_step_schedule(pretrain_steps, train_steps):
    """Generates a sampled training schedule.

  Arguments:
    pretrain_steps: List, number of pre-training steps per each target.
    train_steps: List, number of sampled training steps per each target.

  Returns:
    Python list of length sum(pretrain_steps + train_steps), containing
    target numbers per step.
  """
    check.Eq(len(pretrain_steps), len(train_steps))
    # Arbitrary seed to make sure the return is deterministic.
    random.seed(0x31337)
    tf.logging.info('Determining the training schedule...')
    target_per_step = []
    for target_idx in xrange(len(pretrain_steps)):
        target_per_step += [target_idx] * pretrain_steps[target_idx]
    train_steps = list(train_steps)
    while sum(train_steps) > 0:
        step = random.randint(0, sum(train_steps) - 1)
        cumulative_steps = 0
        for target_idx in xrange(len(train_steps)):
            cumulative_steps += train_steps[target_idx]
            if step < cumulative_steps:
                break
        assert train_steps[target_idx] > 0
        train_steps[target_idx] -= 1
        target_per_step.append(target_idx)
    tf.logging.info('Training schedule defined!')
    return target_per_step
示例#9
0
def LabelPotentialsFromTokens(tokens, weights):
    r"""Computes label potentials from tokens and weights.

  For each batch of token activations, computes a scalar potential for each
  label as the product between the activations of the source token and the
  |weights|.  Specifically,

    labels[b,t,l] = \sum_{i} weights[l,i] * tokens[b,t,i]

  Args:
    tokens: [B,N,T] tensor of batched token activations.
    weights: [L,T] matrix of weights.

    B,N may be dynamic, but L,T must be static.  The dtype of all arguments must
    be compatible.

  Returns:
    [B,N,L] tensor of label potentials as defined above, with the same dtype as
    the arguments.
  """
    check.Eq(tokens.get_shape().ndims, 3, 'tokens must be rank 3')
    check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix')

    num_labels = weights.get_shape().as_list()[0]
    num_activations = weights.get_shape().as_list()[1]
    check.NotNone(num_labels, 'unknown number of labels')
    check.NotNone(num_activations, 'unknown activation dimension')
    check.Eq(tokens.get_shape().as_list()[2], num_activations,
             'activation mismatch between weights and tokens')
    tokens_shape = tf.shape(tokens)
    batch_size = tokens_shape[0]
    num_tokens = tokens_shape[1]

    check.Same([tokens.dtype.base_dtype, weights.dtype.base_dtype],
               'dtype mismatch')

    # Flatten out the batch dimension so we can use one big matmul().
    tokens_bnxt = tf.reshape(tokens, [-1, num_activations])
    labels_bnxl = tf.matmul(tokens_bnxt, weights, transpose_b=True)

    # Restore the batch dimension in the output.
    labels_bxnxl = tf.reshape(labels_bnxl,
                              [batch_size, num_tokens, num_labels])
    return labels_bxnxl
示例#10
0
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineDigraphNetwork, self).__init__(component)

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')
        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        # TODO(googleuser): Make parameter initialization configurable.
        self._weights = []
        self._weights.append(
            tf.get_variable(
                'weights_arc', [self._source_dim, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_source', [self._source_dim], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'root', [self._source_dim], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._params.extend(self._weights)
        self._regularized_weights.extend(self._weights)

        # Negative Layer.dim indicates that the dimension is dynamic.
        self._layers.append(network_units.Layer(self, 'adjacency', -1))
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineDigraphNetwork, self).__init__(component)

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')
        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        self._weights = []
        self._weights.append(
            tf.get_variable('weights_arc',
                            [self._source_dim, self._target_dim], tf.float32,
                            tf.orthogonal_initializer()))
        self._weights.append(
            tf.get_variable('weights_source', [self._source_dim], tf.float32,
                            tf.zeros_initializer()))
        self._weights.append(
            tf.get_variable('root', [self._source_dim], tf.float32,
                            tf.zeros_initializer()))

        self._params.extend(self._weights)
        self._regularized_weights.extend(self._weights)

        # Add runtime hooks for pre-computed weights.
        self._derived_params.append(self._get_root_weights)
        self._derived_params.append(self._get_root_bias)

        # Negative Layer.dim indicates that the dimension is dynamic.
        self._layers.append(network_units.Layer(component, 'adjacency', -1))
示例#12
0
    def __init__(self, component):
        super(BulkBiLSTMNetwork, self).__init__(component)

        check.In('lengths', self._linked_feature_dims,
                 'Missing required linked feature')
        check.Eq(self._linked_feature_dims['lengths'], 1,
                 'Wrong dimension for "lengths" feature')
        self._input_dim = self._concatenated_input_dim - 1  # exclude 'lengths'
        self._output_dim = self.get_layer_size('outputs')
        tf.logging.info('[%s] Bulk bi-LSTM with input_dim=%d output_dim=%d',
                        component.name, self._input_dim, self._output_dim)

        # Create one training and inference cell per layer and direction.
        self._train_cells_forward = self._create_train_cells()
        self._train_cells_backward = self._create_train_cells()
        self._inference_cells_forward = self._create_inference_cells()
        self._inference_cells_backward = self._create_inference_cells()

        def _bilstm_closure(scope):
            """Applies the bi-LSTM to placeholder inputs and lengths."""
            # Use singleton |stride| and |steps| because their values don't affect the
            # weight variables.
            stride, steps = 1, 1
            placeholder_inputs = tf.placeholder(
                dtype=tf.float32, shape=[stride, steps, self._input_dim])
            placeholder_lengths = tf.placeholder(dtype=tf.int64,
                                                 shape=[stride])

            # Omit the initial states and sequence lengths for simplicity; they don't
            # affect the weight variables.
            tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                self._train_cells_forward,
                self._train_cells_backward,
                placeholder_inputs,
                dtype=tf.float32,
                sequence_length=placeholder_lengths,
                scope=scope)

        self._capture_variables_as_params(_bilstm_closure)

        # Allocate parameters for the initial states.  Note that an LSTM state is a
        # tuple of two substates (c, h), so there are 4 variables per layer.
        for index, num_units in enumerate(self._hidden_layer_sizes):
            for direction in ['forward', 'backward']:
                for substate in ['c', 'h']:
                    self._params.append(
                        tf.get_variable(
                            'initial_state_%s_%s_%d' %
                            (direction, substate, index),
                            [1, num_units
                             ],  # leading 1 for later batch-wise tiling
                            dtype=tf.float32,
                            initializer=tf.constant_initializer(0.0)))
示例#13
0
  def __init__(self, master, component_spec):
    """Initializes the feature ID extractor component.

    Args:
      master: dragnn.MasterBuilder object.
      component_spec: dragnn.ComponentSpec proto to be built.
    """
    super(BulkFeatureIdExtractorComponentBuilder, self).__init__(
        master, component_spec)
    check.Eq(len(self.spec.linked_feature), 0, 'Linked features are forbidden')
    for feature_spec in self.spec.fixed_feature:
      check.Lt(feature_spec.embedding_dim, 0,
               'Features must be non-embedded: %s' % feature_spec)
def get_segmenter_corpus(input_data_path, use_text_format):
  """Reads in a character corpus for segmenting."""
  # Read in the documents.
  tf.logging.info('Reading documents...')
  if use_text_format:
    char_corpus = sentence_io.FormatSentenceReader(input_data_path,
                                                   'untokenized-text').corpus()
  else:
    input_corpus = sentence_io.ConllSentenceReader(input_data_path).corpus()
    with tf.Session(graph=tf.Graph()) as tmp_session:
      char_input = gen_parser_ops.char_token_generator(input_corpus)
      char_corpus = tmp_session.run(char_input)
    check.Eq(len(input_corpus), len(char_corpus))

  return char_corpus
示例#15
0
    def create(self,
               fixed_embeddings,
               linked_embeddings,
               context_tensor_arrays,
               attention_tensor,
               during_training,
               stride=None):
        """See base class."""
        # NB: This cell pulls the lstm's h and c vectors from context_tensor_arrays
        # instead of through linked features.
        check.Eq(len(context_tensor_arrays), 2 * len(self._hidden_layer_sizes),
                 'require two context tensors per hidden layer')

        # Rearrange the context tensors into a tuple of LSTM sub-states.
        length = context_tensor_arrays[0].size()
        substates = []
        for index, num_units in enumerate(self._hidden_layer_sizes):
            state_c = context_tensor_arrays[2 * index].read(length - 1)
            state_h = context_tensor_arrays[2 * index + 1].read(length - 1)

            # Fix shapes that for some reason are not set properly for an unknown
            # reason. TODO(googleuser): Why are the shapes not set?
            state_c.set_shape([tf.Dimension(None), num_units])
            state_h.set_shape([tf.Dimension(None), num_units])
            substates.append(tf.contrib.rnn.LSTMStateTuple(state_c, state_h))
        state = tuple(substates)

        input_tensor = dragnn.get_input_tensor(fixed_embeddings,
                                               linked_embeddings)
        cell = self._train_cell if during_training else self._inference_cell

        def _cell_closure(scope):
            """Applies the LSTM cell to the current inputs and state."""
            return cell(input_tensor, state, scope)

        unused_h, state = self._apply_with_captured_variables(_cell_closure)

        # Return tensors to be put into the tensor arrays / used to compute
        # objective.
        output_tensors = []
        for new_substate in state:
            new_c, new_h = new_substate
            output_tensors.append(new_c)
            output_tensors.append(new_h)
        return self._append_base_layers(output_tensors)
示例#16
0
def extract_fixed_feature_ids(comp, state, stride):
    """Extracts fixed feature IDs.

  Args:
    comp: Component whose fixed feature IDs we wish to extract.
    state: Live MasterState object for the component.
    stride: Tensor containing current batch * beam size.

  Returns:
    state handle: Updated state handle to be used after this call.
    ids: List of [stride * num_steps, 1] feature IDs per channel.  Missing IDs
         (e.g., due to batch padding) are set to -1.
  """
    num_channels = len(comp.spec.fixed_feature)
    if not num_channels:
        return state.handle, []

    for feature_spec in comp.spec.fixed_feature:
        check.Eq(feature_spec.size, 1, 'All features must have size=1')
        check.Lt(feature_spec.embedding_dim, 0,
                 'All features must be non-embedded')

    state.handle, indices, ids, _, num_steps = dragnn_ops.bulk_fixed_features(
        state.handle, component=comp.name, num_channels=num_channels)
    size = stride * num_steps

    fixed_ids = []
    for channel, feature_spec in enumerate(comp.spec.fixed_feature):
        tf.logging.info('[%s] Adding fixed feature IDs "%s"', comp.name,
                        feature_spec.name)

        # The +1 and -1 increments ensure that missing IDs default to -1.
        #
        # TODO(googleuser): This formula breaks if multiple IDs are extracted at some
        # step.  Try using tf.unique() to enforce the unique-IDS precondition.
        sums = tf.unsorted_segment_sum(ids[channel] + 1, indices[channel],
                                       size) - 1
        sums = tf.expand_dims(sums, axis=1)
        fixed_ids.append(
            network_units.NamedTensor(sums, feature_spec.name, dim=1))
    return state.handle, fixed_ids
示例#17
0
 def testCheckEq(self):
     check.Eq(1, 1, 'foo')
     with self.assertRaisesRegexp(ValueError, 'bar'):
         check.Eq(1, 2, 'bar')
     with self.assertRaisesRegexp(RuntimeError, 'baz'):
         check.Eq(1, 2, 'baz', RuntimeError)
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    check.NotNone(FLAGS.model_dir, '--model_dir is required')
    check.Ne(
        FLAGS.pretrain_steps is None, FLAGS.pretrain_epochs is None,
        'Exactly one of --pretrain_steps or --pretrain_epochs is required')
    check.Ne(FLAGS.train_steps is None, FLAGS.train_epochs is None,
             'Exactly one of --train_steps or --train_epochs is required')

    config_path = os.path.join(FLAGS.model_dir, 'config.txt')
    master_path = os.path.join(FLAGS.model_dir, 'master.pbtxt')
    hyperparameters_path = os.path.join(FLAGS.model_dir,
                                        'hyperparameters.pbtxt')
    targets_path = os.path.join(FLAGS.model_dir, 'targets.pbtxt')
    checkpoint_path = os.path.join(FLAGS.model_dir, 'checkpoints/best')
    tensorboard_dir = os.path.join(FLAGS.model_dir, 'tensorboard')

    with tf.gfile.FastGFile(config_path) as config_file:
        config = collections.defaultdict(bool,
                                         ast.literal_eval(config_file.read()))
    train_corpus_path = config['train_corpus_path']
    tune_corpus_path = config['tune_corpus_path']
    projectivize_train_corpus = config['projectivize_train_corpus']

    master = _read_text_proto(master_path, spec_pb2.MasterSpec)
    hyperparameters = _read_text_proto(hyperparameters_path,
                                       spec_pb2.GridPoint)
    targets = spec_builder.default_targets_from_spec(master)
    if tf.gfile.Exists(targets_path):
        targets = _read_text_proto(targets_path,
                                   spec_pb2.TrainingGridSpec).target

    # Build the TensorFlow graph.
    graph = tf.Graph()
    with graph.as_default():
        tf.set_random_seed(hyperparameters.seed)
        builder = graph_builder.MasterBuilder(master, hyperparameters)
        trainers = [
            builder.add_training_from_config(target) for target in targets
        ]
        annotator = builder.add_annotation()
        builder.add_saver()

    # Read in serialized protos from training data.
    train_corpus = sentence_io.ConllSentenceReader(
        train_corpus_path, projectivize=projectivize_train_corpus).corpus()
    tune_corpus = sentence_io.ConllSentenceReader(tune_corpus_path,
                                                  projectivize=False).corpus()
    gold_tune_corpus = tune_corpus

    # Convert to char-based corpora, if requested.
    if config['convert_to_char_corpora']:
        # NB: Do not convert the |gold_tune_corpus|, which should remain word-based
        # for segmentation evaluation purposes.
        train_corpus = _convert_to_char_corpus(train_corpus)
        tune_corpus = _convert_to_char_corpus(tune_corpus)

    pretrain_steps = _get_steps(FLAGS.pretrain_steps, FLAGS.pretrain_epochs,
                                len(train_corpus))
    train_steps = _get_steps(FLAGS.train_steps, FLAGS.train_epochs,
                             len(train_corpus))
    check.Eq(len(targets), len(pretrain_steps),
             'Length mismatch between training targets and --pretrain_steps')
    check.Eq(len(targets), len(train_steps),
             'Length mismatch between training targets and --train_steps')

    # Ready to train!
    tf.logging.info('Training on %d sentences.', len(train_corpus))
    tf.logging.info('Tuning on %d sentences.', len(tune_corpus))

    tf.logging.info('Creating TensorFlow checkpoint dir...')
    summary_writer = trainer_lib.get_summary_writer(tensorboard_dir)

    checkpoint_dir = os.path.dirname(checkpoint_path)
    if tf.gfile.IsDirectory(checkpoint_dir):
        tf.gfile.DeleteRecursively(checkpoint_dir)
    elif tf.gfile.Exists(checkpoint_dir):
        tf.gfile.Remove(checkpoint_dir)
    tf.gfile.MakeDirs(checkpoint_dir)

    with tf.Session(FLAGS.tf_master, graph=graph) as sess:
        # Make sure to re-initialize all underlying state.
        sess.run(tf.global_variables_initializer())
        trainer_lib.run_training(sess, trainers, annotator,
                                 evaluation.parser_summaries, pretrain_steps,
                                 train_steps, train_corpus, tune_corpus,
                                 gold_tune_corpus, FLAGS.batch_size,
                                 summary_writer, FLAGS.report_every,
                                 builder.saver, checkpoint_path)

    tf.logging.info('Best checkpoint written to:\n%s', checkpoint_path)
def main(unused_argv):

    # Parse the flags containint lists, using regular expressions.
    # This matches and extracts key=value pairs.
    component_beam_sizes = re.findall(r'([^=,]+)=(\d+)',
                                      FLAGS.inference_beam_size)
    # This matches strings separated by a comma. Does not return any empty
    # strings.
    components_to_locally_normalize = re.findall(r'[^,]+',
                                                 FLAGS.locally_normalize)

    # Reads master spec.
    master_spec = spec_pb2.MasterSpec()
    with gfile.FastGFile(FLAGS.master_spec) as fin:
        text_format.Parse(fin.read(), master_spec)

    # Rewrite resource locations.
    if FLAGS.resource_dir:
        for component in master_spec.component:
            for resource in component.resource:
                for part in resource.part:
                    part.file_pattern = os.path.join(FLAGS.resource_dir,
                                                     part.file_pattern)

    if FLAGS.complete_master_spec:
        spec_builder.complete_master_spec(master_spec, None,
                                          FLAGS.resource_dir)

    # Graph building.
    tf.logging.info('Building the graph')
    g = tf.Graph()
    with g.as_default(), tf.device('/device:CPU:0'):
        hyperparam_config = spec_pb2.GridPoint()
        hyperparam_config.use_moving_average = True
        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)
        annotator = builder.add_annotation()
        builder.add_saver()

    tf.logging.info('Reading documents...')
    input_corpus = sentence_io.ConllSentenceReader(FLAGS.input_file).corpus()
    with tf.Session(graph=tf.Graph()) as tmp_session:
        char_input = gen_parser_ops.char_token_generator(input_corpus)
        char_corpus = tmp_session.run(char_input)
    check.Eq(len(input_corpus), len(char_corpus))

    session_config = tf.ConfigProto(log_device_placement=False,
                                    intra_op_parallelism_threads=FLAGS.threads,
                                    inter_op_parallelism_threads=FLAGS.threads)

    with tf.Session(graph=g, config=session_config) as sess:
        tf.logging.info('Initializing variables...')
        sess.run(tf.global_variables_initializer())

        tf.logging.info('Loading from checkpoint...')
        sess.run('save/restore_all', {'save/Const:0': FLAGS.checkpoint_file})

        tf.logging.info('Processing sentences...')

        processed = []
        start_time = time.time()
        run_metadata = tf.RunMetadata()
        for start in range(0, len(char_corpus), FLAGS.max_batch_size):
            end = min(start + FLAGS.max_batch_size, len(char_corpus))
            feed_dict = {annotator['input_batch']: char_corpus[start:end]}
            for comp, beam_size in component_beam_sizes:
                feed_dict['%s/InferenceBeamSize:0' % comp] = beam_size
            for comp in components_to_locally_normalize:
                feed_dict['%s/LocallyNormalize:0' % comp] = True
            if FLAGS.timeline_output_file and end == len(char_corpus):
                serialized_annotations = sess.run(
                    annotator['annotations'],
                    feed_dict=feed_dict,
                    options=tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE),
                    run_metadata=run_metadata)
                trace = timeline.Timeline(step_stats=run_metadata.step_stats)
                with open(FLAGS.timeline_output_file, 'w') as trace_file:
                    trace_file.write(trace.generate_chrome_trace_format())
            else:
                serialized_annotations = sess.run(annotator['annotations'],
                                                  feed_dict=feed_dict)
            processed.extend(serialized_annotations)

        tf.logging.info('Processed %d documents in %.2f seconds.',
                        len(char_corpus),
                        time.time() - start_time)
        evaluation.calculate_segmentation_metrics(input_corpus, processed)

        if FLAGS.output_file:
            with gfile.GFile(FLAGS.output_file, 'w') as f:
                for serialized_sentence in processed:
                    sentence = sentence_pb2.Sentence()
                    sentence.ParseFromString(serialized_sentence)
                    f.write(text_format.MessageToString(sentence) + '\n\n')
示例#20
0
def LabelPotentialsFromTokenPairs(sources, targets, weights):
    r"""Computes label potentials from source and target tokens and weights.

  For each aligned pair of source and target token activations, computes a
  scalar potential for each label on the arc from the source to the target.
  Specifically,

    labels[b,t,l] = \sum_{i,j} sources[b,t,i] * weights[l,i,j] * targets[b,t,j]

  Args:
    sources: [B,N,S] tensor of batched source token activations.
    targets: [B,N,T] tensor of batched target token activations.
    weights: [L,S,T] tensor of weights.

    B,N may be dynamic, but L,S,T must be static.  The dtype of all arguments
    must be compatible.

  Returns:
    [B,N,L] tensor of label potentials as defined above, with the same dtype as
    the arguments.
  """
    check.Eq(sources.get_shape().ndims, 3, 'sources must be rank 3')
    check.Eq(targets.get_shape().ndims, 3, 'targets must be rank 3')
    check.Eq(weights.get_shape().ndims, 3, 'weights must be rank 3')

    num_labels = weights.get_shape().as_list()[0]
    num_source_activations = weights.get_shape().as_list()[1]
    num_target_activations = weights.get_shape().as_list()[2]
    check.NotNone(num_labels, 'unknown number of labels')
    check.NotNone(num_source_activations,
                  'unknown source activation dimension')
    check.NotNone(num_target_activations,
                  'unknown target activation dimension')
    check.Eq(sources.get_shape().as_list()[2], num_source_activations,
             'activation mismatch between weights and source tokens')
    check.Eq(targets.get_shape().as_list()[2], num_target_activations,
             'activation mismatch between weights and target tokens')

    check.Same([
        sources.dtype.base_dtype, targets.dtype.base_dtype,
        weights.dtype.base_dtype
    ], 'dtype mismatch')

    sources_shape = tf.shape(sources)
    targets_shape = tf.shape(targets)
    batch_size = sources_shape[0]
    num_tokens = sources_shape[1]
    with tf.control_dependencies([
            tf.assert_equal(batch_size, targets_shape[0]),
            tf.assert_equal(num_tokens, targets_shape[1])
    ]):
        # For each token, we must compute a vector-3tensor-vector product.  There is
        # no op for this, but we can use reshape() and matmul() to compute it.

        # Reshape |weights| and |targets| so we can use a single matmul().
        weights_lsxt = tf.reshape(
            weights,
            [num_labels * num_source_activations, num_target_activations])
        targets_bnxt = tf.reshape(targets, [-1, num_target_activations])
        weights_targets_bnxls = tf.matmul(targets_bnxt,
                                          weights_lsxt,
                                          transpose_b=True)

        # Restore all dimensions.
        weights_targets_bxnxlxs = tf.reshape(
            weights_targets_bnxls,
            [batch_size, num_tokens, num_labels, num_source_activations])

        # Incorporate the source activations.  In this case, we perform a batched
        # matmul() between the trailing [L,S] matrices of the current result and the
        # trailing [S] vectors of the tokens.
        sources_bxnx1xs = tf.expand_dims(sources, 2)
        labels_bxnxlx1 = tf.matmul(weights_targets_bxnxlxs,
                                   sources_bxnx1xs,
                                   transpose_b=True)
        labels_bxnxl = tf.squeeze(labels_bxnxlx1, [3])
        return labels_bxnxl
示例#21
0
def LaplacianMatrix(lengths, arcs, forest=False):
    r"""Returns the (root-augmented) Laplacian matrix for a batch of digraphs.

  Args:
    lengths: [B] vector of input sequence lengths.
    arcs: [B,M,M] tensor of arc potentials where entry b,t,s is the potential of
      the arc from s to t in the b'th digraph, while b,t,t is the potential of t
      as a root.  Entries b,t,s where t or s >= lengths[b] are ignored.
    forest: Whether to produce a Laplacian for trees or forests.

  Returns:
    [B,M,M] tensor L with the Laplacian of each digraph, padded with an identity
    matrix.  More concretely, the padding entries (t or s >= lengths[b]) are:
      L_{b,t,t} = 1.0
      L_{b,t,s} = 0.0
    Note that this "identity matrix padding" ensures that the determinant of
    each padded matrix equals the determinant of the unpadded matrix.  The
    non-padding entries (t,s < lengths[b]) depend on whether the Laplacian is
    constructed for trees or forests.  For trees:
      L_{b,t,0} = arcs[b,t,t]
      L_{b,t,t} = \sum_{s < lengths[b], t != s} arcs[b,t,s]
      L_{b,t,s} = -arcs[b,t,s]
    For forests:
      L_{b,t,t} = \sum_{s < lengths[b]} arcs[b,t,s]
      L_{b,t,s} = -arcs[b,t,s]
    See http://www.aclweb.org/anthology/D/D07/D07-1015.pdf for details, though
    note that our matrices are transposed from their notation.
  """
    check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3')
    dtype = arcs.dtype.base_dtype

    arcs_shape = tf.shape(arcs)
    batch_size = arcs_shape[0]
    max_length = arcs_shape[1]
    with tf.control_dependencies([tf.assert_equal(max_length, arcs_shape[2])]):
        valid_arc_bxmxm, valid_token_bxm = ValidArcAndTokenMasks(lengths,
                                                                 max_length,
                                                                 dtype=dtype)
    invalid_token_bxm = tf.constant(1, dtype=dtype) - valid_token_bxm

    # Zero out all invalid arcs, to avoid polluting bulk summations.
    arcs_bxmxm = arcs * valid_arc_bxmxm

    zeros_bxm = tf.zeros([batch_size, max_length], dtype)
    if not forest:
        # For trees, extract the root potentials and exclude them from the sums
        # computed below.
        roots_bxm = tf.matrix_diag_part(arcs_bxmxm)  # only defined for trees
        arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm)

    # Sum inbound arc potentials for each target token.  These sums will form
    # the diagonal of the Laplacian matrix.  Note that these sums are zero for
    # invalid tokens, since their arc potentials were masked out above.
    sums_bxm = tf.reduce_sum(arcs_bxmxm, 2)

    if forest:
        # For forests, zero out the root potentials after computing the sums above
        # so we don't cancel them out when we subtract the arc potentials.
        arcs_bxmxm = tf.matrix_set_diag(arcs_bxmxm, zeros_bxm)

    # The diagonal of the result is the combination of the arc sums, which are
    # non-zero only on valid tokens, and the invalid token indicators, which are
    # non-zero only on invalid tokens.  Note that the latter form the diagonal
    # of the identity matrix padding.
    diagonal_bxm = sums_bxm + invalid_token_bxm

    # Combine sums and negative arc potentials.  Note that the off-diagonal
    # padding entries will be zero thanks to the arc mask.
    laplacian_bxmxm = tf.matrix_diag(diagonal_bxm) - arcs_bxmxm

    if not forest:
        # For trees, replace the first column with the root potentials.
        roots_bxmx1 = tf.expand_dims(roots_bxm, 2)
        laplacian_bxmxm = tf.concat([roots_bxmx1, laplacian_bxmxm[:, :, 1:]],
                                    2)

    return laplacian_bxmxm
示例#22
0
def ArcPotentialsFromTokens(source_tokens, target_tokens, weights):
    r"""Returns arc potentials computed from token activations and weights.

  For each batch of source and target token activations, computes a scalar
  potential for each arc as the 3-way product between the activation vectors of
  the source and target of the arc and the |weights|.  Specifically,

    arc[b,s,t] =
        \sum_{i,j} source_tokens[b,s,i] * weights[i,j] * target_tokens[b,t,j]

  Note that the token activations can be extended with bias terms to implement a
  "biaffine" model (Dozat and Manning, 2017).

  Args:
    source_tokens: [B,N,S] tensor of batched activations for the source token in
                   each arc.
    target_tokens: [B,N,T] tensor of batched activations for the target token in
                   each arc.
    weights: [S,T] matrix of weights.

    B,N may be statically-unknown, but S,T must be statically-known.  The dtype
    of all arguments must be compatible.

  Returns:
    [B,N,N] tensor A of arc potentials where A_{b,s,t} is the potential of the
    arc from s to t in batch element b.  The dtype of A is the same as that of
    the arguments.  Note that the diagonal entries (i.e., where s==t) represent
    self-loops and may not be meaningful.
  """
    # All arguments must have statically-known rank.
    check.Eq(source_tokens.get_shape().ndims, 3,
             'source_tokens must be rank 3')
    check.Eq(target_tokens.get_shape().ndims, 3,
             'target_tokens must be rank 3')
    check.Eq(weights.get_shape().ndims, 2, 'weights must be a matrix')

    # All activation dimensions must be statically-known.
    num_source_activations = weights.get_shape().as_list()[0]
    num_target_activations = weights.get_shape().as_list()[1]
    check.NotNone(num_source_activations,
                  'unknown source activation dimension')
    check.NotNone(num_target_activations,
                  'unknown target activation dimension')
    check.Eq(source_tokens.get_shape().as_list()[2], num_source_activations,
             'dimension mismatch between weights and source_tokens')
    check.Eq(target_tokens.get_shape().as_list()[2], num_target_activations,
             'dimension mismatch between weights and target_tokens')

    # All arguments must share the same type.
    check.Same([
        weights.dtype.base_dtype, source_tokens.dtype.base_dtype,
        target_tokens.dtype.base_dtype
    ], 'dtype mismatch')

    source_tokens_shape = tf.shape(source_tokens)
    target_tokens_shape = tf.shape(target_tokens)
    batch_size = source_tokens_shape[0]
    num_tokens = source_tokens_shape[1]
    with tf.control_dependencies([
            tf.assert_equal(batch_size, target_tokens_shape[0]),
            tf.assert_equal(num_tokens, target_tokens_shape[1])
    ]):
        # Flatten out the batch dimension so we can use one big multiplication.
        targets_bnxt = tf.reshape(target_tokens, [-1, num_target_activations])

        # Matrices are row-major, so we arrange for the RHS argument of each matmul
        # to have its transpose flag set.  That way no copying is required to align
        # the rows of the LHS with the columns of the RHS.
        weights_targets_bnxs = tf.matmul(targets_bnxt,
                                         weights,
                                         transpose_b=True)

        # The next computation is over pairs of tokens within each batch element, so
        # restore the batch dimension.
        weights_targets_bxnxs = tf.reshape(
            weights_targets_bnxs,
            [batch_size, num_tokens, num_source_activations])

        # Note that this multiplication is repeated across the batch dimension,
        # instead of being one big multiplication as in the first matmul.  There
        # doesn't seem to be a way to arrange this as a single multiplication given
        # the pairwise nature of this computation.
        arcs_bxnxn = tf.matmul(source_tokens,
                               weights_targets_bxnxs,
                               transpose_b=True)
        return arcs_bxnxn