Exemplo n.º 1
0
      def __init__(self, name, dep_reprs, head_reprs, roll_direction=0):
        self.name = name
        with tf.variable_scope(name + '/predictions'):
          # apply hidden layers to the input representations
          arc_dep_hidden = model_helpers.project(
              dep_reprs, config.projection_size, 'arc_dep_hidden')
          arc_head_hidden = model_helpers.project(
              head_reprs, config.projection_size, 'arc_head_hidden')
          arc_dep_hidden = tf.nn.relu(arc_dep_hidden)
          arc_head_hidden = tf.nn.relu(arc_head_hidden)
          arc_head_hidden = tf.nn.dropout(arc_head_hidden, inputs.keep_prob)
          arc_dep_hidden = tf.nn.dropout(arc_dep_hidden, inputs.keep_prob)

          # bilinear classifier excluding the final dot product
          arc_head = tf.layers.dense(
              arc_head_hidden, config.depparse_projection_size, name='arc_head')
          W = tf.get_variable('shared_W',
                              shape=[config.projection_size, n_classes,
                                     config.depparse_projection_size])
          Wr = tf.get_variable('relation_specific_W',
                               shape=[config.projection_size,
                                      config.depparse_projection_size])
          Wr_proj = tf.tile(tf.expand_dims(Wr, axis=-2), [1, n_classes, 1])
          W += Wr_proj
          arc_dep = tf.tensordot(arc_dep_hidden, W, axes=[[-1], [0]])
          shape = tf.shape(arc_dep)
          arc_dep = tf.reshape(arc_dep,
                               [shape[0], -1, config.depparse_projection_size])

          # apply the transformer scaling trick to prevent dot products from
          # getting too large (possibly not necessary)
          scale = np.power(
              config.depparse_projection_size, 0.25).astype('float32')
          scale = tf.get_variable('scale', initializer=scale, dtype=tf.float32)
          arc_dep /= scale
          arc_head /= scale

          # compute the scores for each candidate arc
          word_scores = tf.matmul(arc_head, arc_dep, transpose_b=True)
          root_scores = tf.layers.dense(arc_head, n_classes, name='root_score')
          arc_scores = tf.concat([root_scores, word_scores], axis=-1)

          # disallow the model from making impossible predictions
          mask = inputs.mask
          mask_shape = tf.shape(mask)
          mask = tf.tile(tf.expand_dims(mask, -1), [1, 1, n_classes])
          mask = tf.reshape(mask, [-1, mask_shape[1] * n_classes])
          mask = tf.concat([tf.ones((mask_shape[0], 1)),
                            tf.zeros((mask_shape[0], n_classes - 1)), mask],
                           axis=1)
          mask = tf.tile(tf.expand_dims(mask, 1), [1, mask_shape[1], 1])
          arc_scores += (mask - 1) * 100.0

          self.logits = arc_scores
          self.loss = model_helpers.masked_ce_loss(
              self.logits, labels, inputs.mask,
              roll_direction=roll_direction)
Exemplo n.º 2
0
      def __init__(self, name, input_reprs, roll_direction=0, activate=True):
        self.name = name
        with tf.variable_scope(name + '/predictions'):
          projected = model_helpers.project(input_reprs, config.projection_size)
          if activate:
            projected = tf.nn.relu(projected)
          self.logits = tf.layers.dense(projected, n_classes, name='predict')

        targets = labels
        targets *= (1 - inputs.label_smoothing)
        targets += inputs.label_smoothing / n_classes
        self.loss = model_helpers.masked_ce_loss(
            self.logits, targets, inputs.mask, roll_direction=roll_direction)
Exemplo n.º 3
0
      def __init__(self, name, input_reprs, roll_direction=0, activate=True):
        self.name = name
        with tf.variable_scope(name + '/predictions'):
          projected = model_helpers.project(input_reprs, config.projection_size)
          if activate:
            projected = tf.nn.relu(projected)
          self.logits = tf.layers.dense(projected, n_classes, name='predict')

        targets = labels
        targets *= (1 - inputs.label_smoothing)
        targets += inputs.label_smoothing / n_classes
        self.loss = model_helpers.masked_ce_loss(
            self.logits, targets, inputs.mask, roll_direction=roll_direction)