Пример #1
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = tensor.Tensor(np.array([0, 1]))
      ind2 = tensor.Tensor(np.array([2, 3]))
      ind3 = tensor.Tensor(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * tensor.Tensor(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = tensor.Tensor(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]

    with context.graph_mode(), self.test_session():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, reduction_indices=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
Пример #2
0
  def testAggregateGradients(self):

    def fn(x):
      ind1 = constant_op.constant(np.array([0, 1]))
      ind2 = constant_op.constant(np.array([2, 3]))
      ind3 = constant_op.constant(np.array([1, 3]))
      # A mixture of IndexedSlices and dense tensor to aggregate.
      g1 = embedding_ops.embedding_lookup(x, ind1)
      g2 = embedding_ops.embedding_lookup(x, ind2)
      g3 = embedding_ops.embedding_lookup(x, ind3)
      g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
      return g1 * g2 * g3 * g4

    var_np = np.random.rand(4, 2).astype(np.float32)
    var = constant_op.constant(var_np)
    grad = backprop.gradients_function(fn, [0])(var)[0]
    grad = self.evaluate(ops.convert_to_tensor(grad))

    if not context.executing_eagerly():
      tf_var = array_ops.constant(var_np, dtypes.float32)
      tf_ind1 = array_ops.constant([0, 1])
      tf_ind2 = array_ops.constant([2, 3])
      tf_ind3 = array_ops.constant([1, 3])
      tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1)
      tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2)
      tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3)
      tf_g4 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1))
      tf_y = tf_g1 * tf_g2 * tf_g3 * tf_g4
      tf_grad = gradients.gradients(tf_y, [tf_var])[0]

      tf_dense_grad = math_ops.unsorted_segment_sum(
          tf_grad.values, tf_grad.indices, tf_grad.dense_shape[0])

      self.assertAllClose(grad, self.evaluate(tf_dense_grad))
Пример #3
0
 def testConstructionNonSharded(self):
   with ops.Graph().as_default():
     p = variables.Variable(
         array_ops.zeros(
             shape=[100, 100], dtype=dtypes.float32))
     ids = constant_op.constant([0, 1, 1, 7], dtype=dtypes.int32)
     embedding_ops.embedding_lookup([p], ids)
Пример #4
0
 def testHigherRankMaxNorm(self):
   np.random.seed(8)
   with self.cached_session():
     for params_shape in (12,), (6, 3), (6, 2, 3):
       # Test embedding rank 0, 1, 2.
       # Note: the first dimension must be a common multiple of procs below.
       params = 2 * np.ones(params_shape)
       params_norm = params / np.sqrt(
           np.sum(
               params * params, tuple(range(params.ndim)[1:]), keepdims=True))
       for ids_shape in (), (3), (4, 3), (2, 3, 4):
         ids = np.random.randint(
             params.shape[0], size=np.prod(ids_shape,
                                           dtype=np.int64)).reshape(ids_shape)
         # Compare nonsharded to gather
         simple = embedding_ops.embedding_lookup(
             params, ids, max_norm=1.0).eval()
         # assertAllClose is used here as different implementations of sqrt may
         # be used to compute each of the values being compared.  For example,
         # on AVX512 builds the embedding operation makes use of Eigen's fast
         # vectorized square root algorithm for doubles.  These different
         # implementations of sqrt are not guaranteed to produce exactly the
         # same results. Therefore, an exact comparison cannot be made.
         self.assertAllClose(simple, array_ops.gather(params_norm, ids).eval())
         # Run a few different sharded versions.
         for procs in 1, 2, 3:
           stride = procs * math_ops.range(params.shape[0] // procs)
           split_params = [
               array_ops.gather(params, stride + p) for p in xrange(procs)
           ]
           sharded = embedding_ops.embedding_lookup(
               split_params, ids, max_norm=1.0).eval()
           self.assertAllEqual(simple, sharded)
Пример #5
0
 def testHigherRankMaxNorm(self):
   np.random.seed(8)
   with self.test_session():
     for params_shape in (12,), (6, 3), (6, 2, 3):
       # Test embedding rank 0, 1, 2.
       # Note: the first dimension must be a common multiple of procs below.
       params = 2 * np.ones(params_shape)
       params_norm = params / np.sqrt(
           np.sum(
               params * params, tuple(range(params.ndim)[1:]), keepdims=True))
       for ids_shape in (), (3), (4, 3), (2, 3, 4):
         ids = np.random.randint(
             params.shape[0], size=np.prod(ids_shape,
                                           dtype=np.int64)).reshape(ids_shape)
         # Compare nonsharded to gather
         simple = embedding_ops.embedding_lookup(
             params, ids, max_norm=1.0).eval()
         self.assertAllEqual(simple, array_ops.gather(params_norm, ids).eval())
         # Run a few different sharded versions.
         for procs in 1, 2, 3:
           stride = procs * math_ops.range(params.shape[0] // procs)
           split_params = [
               array_ops.gather(params, stride + p) for p in xrange(procs)
           ]
           sharded = embedding_ops.embedding_lookup(
               split_params, ids, max_norm=1.0).eval()
           self.assertAllEqual(simple, sharded)
Пример #6
0
    def body(it, cost):
      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      cost = control_flow_ops.cond(
          math_ops.equal(it, 3), lambda: math_ops.square(cost),
          (lambda: cost + math_ops.reduce_sum(embedding)))
      return it + 1, cost

      _, cost = control_flow_ops.while_loop(
          cond, body, [constant_op.constant(0),
                       constant_op.constant(0.0)])

      dynamic_grads = gradients_impl.gradients(cost, [embedding_matrix])[0]
      dynamic_grads = math_ops.segment_sum(dynamic_grads.values,
                                           dynamic_grads.indices)

      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      static = math_ops.square(
          math_ops.reduce_sum(embedding) + math_ops.reduce_sum(embedding) +
          math_ops.reduce_sum(embedding)) + math_ops.reduce_sum(embedding)
      static_grads = gradients_impl.gradients(static, [embedding_matrix])[0]
      static_grads = math_ops.segment_sum(static_grads.values,
                                          static_grads.indices)

      with self.cached_session():
        self.evaluate(variables.global_variables_initializer())
        self.assertAllEqual(*self.evaluate([static_grads, dynamic_grads]))
Пример #7
0
 def fn(x):
   ind1 = constant_op.constant(np.array([0, 1]))
   ind2 = constant_op.constant(np.array([2, 3]))
   ind3 = constant_op.constant(np.array([1, 3]))
   # A mixture of IndexedSlices and dense tensor to aggregate.
   g1 = embedding_ops.embedding_lookup(x, ind1)
   g2 = embedding_ops.embedding_lookup(x, ind2)
   g3 = embedding_ops.embedding_lookup(x, ind3)
   g4 = math_ops.reduce_sum(x * constant_op.constant(2.0))
   return g1 * g2 * g3 * g4
Пример #8
0
 def testConstructionSharded(self):
   with ops.Graph().as_default():
     p = []
     for _ in range(2):
       p += [
           variables.Variable(
               array_ops.zeros(shape=[100, 100], dtype=dtypes.float32))
       ]
       ids = constant_op.constant([0, 1, 1, 17], dtype=dtypes.int32)
     embedding_ops.embedding_lookup(p, ids)
  def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with vs.variable_scope(scope or type(self).__name__):  # "EmbeddingWrapper2"
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)
        embeddings = []
        for i in xrange(len(self._embedding_classes)):
            embeddings.append(vs.get_variable("embedding"+str(i), [self._embedding_classes[i],
                                                  self._embedding_sizes[i]],
                                    initializer=initializer))
        embedded = []
        for i in xrange(len(self._embedding_classes)):
            embedded.append(embedding_ops.embedding_lookup(
                  embeddings[i], array_ops.reshape(inputs[i], [-1])))

        finalEmbedded = tf.concat(1, embedded)

    return self._cell(finalEmbedded, state)
Пример #10
0
  def __init__(self, embedding, start_tokens, end_token):
    """Initializer.

    Args:
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`. The returned tensor
        will be passed to the decoder input.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.

    Raises:
      ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
        scalar.
    """
    if callable(embedding):
      self._embedding_fn = embedding
    else:
      self._embedding_fn = (
          lambda ids: embedding_ops.embedding_lookup(embedding, ids))

    self._start_tokens = ops.convert_to_tensor(
        start_tokens, dtype=dtypes.int32, name="start_tokens")
    self._end_token = ops.convert_to_tensor(
        end_token, dtype=dtypes.int32, name="end_token")
    if self._start_tokens.get_shape().ndims != 1:
      raise ValueError("start_tokens must be a vector")
    self._batch_size = array_ops.size(start_tokens)
    if self._end_token.get_shape().ndims != 0:
      raise ValueError("end_token must be a scalar")
    self._start_inputs = self._embedding_fn(self._start_tokens)
Пример #11
0
def embedding_encoder(encoder_inputs,
                      cell,
                      embedding,
                      num_symbols,
                      embedding_size,
                      bidirectional=False,
                      dtype=None,
                      weight_initializer=None,
                      scope=None):

  with variable_scope.variable_scope(
      scope or "embedding_encoder", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    if not embedding:
      embedding = variable_scope.get_variable("embedding", [num_symbols, embedding_size],
              initializer=weight_initializer())
    emb_inp = [embedding_ops.embedding_lookup(embedding, i) for i in encoder_inputs]
    if bidirectional:
      _, output_state_fw, output_state_bw = rnn.bidirectional_rnn(cell, cell, emb_inp,
              dtype=dtype)
      encoder_state = tf.concat(1, [output_state_fw, output_state_bw])
    else:
      _, encoder_state = rnn.rnn(
        cell, emb_inp, dtype=dtype)

    return encoder_state
Пример #12
0
    def _tf_dec_embedding_attention_decoder(self, enc_out, decoder_input, last_state,
                                    cell, num_symbols, embedding_size, num_heads=1,
                                    output_size=None, output_projection=None,
                                    dtype=dtypes.float32,
                                    scope=None, src_mask=None, maxout_layer=False, encoder="reverse",
                                    start=None, init_const=False, bow_mask=None):
        """Decode single step version of tensorflow.models.rnn.seq2seq.embedding_attention_decoder
            """
        if output_size is None:
          output_size = cell.output_size
        if output_projection is not None:
          proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
          proj_weights.get_shape().assert_is_compatible_with([cell.output_size,
                                                                num_symbols])   
          proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
          proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        with variable_scope.variable_scope(scope or "embedding_attention_decoder"):
          with ops.device("/cpu:0"):
            embedding = variable_scope.get_variable("embedding",
                                                    [num_symbols, embedding_size])
          emb_inp = embedding_ops.embedding_lookup(embedding, decoder_input)
          return self._tf_dec_attention_decoder(
              enc_out, emb_inp, last_state, cell, output_size=output_size,
              num_heads=num_heads, src_mask=src_mask, maxout_layer=maxout_layer, embedding_size=embedding_size,
              encoder=encoder, start=start, init_const=init_const, bow_mask=bow_mask)
Пример #13
0
  def create_decoder(self):
    start_time = time.time()

    with vs.variable_scope("embedding" or scope):
      tokens = self.tokens[:-1]
      embeddings = []
      with tf.device("/cpu:0"):
        sqrt3 = np.sqrt(3)
        embedding = vs.get_variable(
            "embedding", [self.vocab_size, self.embedding_size],
            initializer=tf.random_uniform_initializer(-sqrt3, sqrt3))

        for token in tokens:
          # Create the embedding layer.
          emb = embedding_ops.embedding_lookup(embedding, token)
          emb.set_shape([self.batch_size, self.embedding_size])
          embeddings.append(emb)

    cell = rnn_cell.GRUCell(self.decoder_cell_size)
    cell = rnn_cell.OutputProjectionWrapper(cell, self.vocab_size)
    self.decoder_states = rnn.rnn(
        cell, embeddings, dtype=tf.float32, sequence_length=self.tokens_len)[0]
    self.logits = self.decoder_states

    print('create_decoder graph time %f' % (time.time() - start_time))
Пример #14
0
def embedding_lookup_unique(params, ids, name=None):
  """Version of embedding_lookup that avoids duplicate lookups.

  This can save communication in the case of repeated ids.
  Same interface as embedding_lookup. Except it supports multi-dimensional `ids`
  which allows to not reshape input/output to fit gather.

  Args:
    params: A list of tensors with the same shape and type, or a
      `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
    ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
      the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
    name: A name for this operation (optional).

  Returns:
    A `Tensor` with the same type as the tensors in `params` and dimension of
    `[ids1, ids2, d1, d2, ...]`.

  Raises:
    ValueError: If `params` is empty.
  """
  with ops.name_scope(name, "EmbeddingLookupUnique", [params, ids]):
    ids = ops.convert_to_tensor(ids)
    shape = array_ops.shape(ids)
    ids_flat = array_ops.reshape(
        ids, math_ops.reduce_prod(shape, keep_dims=True))
    unique_ids, idx = array_ops.unique(ids_flat)
    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
    embeds_flat = array_ops.gather(unique_embeddings, idx)
    embed_shape = array_ops.concat(
        [shape, array_ops.shape(unique_embeddings)[1:]], 0)
    embeds = array_ops.reshape(embeds_flat, embed_shape)
    embeds.set_shape(ids.get_shape().concatenate(
        unique_embeddings.get_shape()[1:]))
    return embeds
Пример #15
0
 def extract_argmax_and_embed(prev, _):
   """Loop_function that extracts the symbol from prev and embeds it."""
   if output_projection is not None:
     prev = nn_ops.xw_plus_b(
         prev, output_projection[0], output_projection[1])
   prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
   return embedding_ops.embedding_lookup(embedding, prev_symbol)
 def calculate_loss_from_wals_model(self, wals_model, sp_inputs):
   current_rows = embedding_ops.embedding_lookup(
       wals_model.row_factors, math_ops.range(wals_model._input_rows),
       partition_strategy="div")
   current_cols = embedding_ops.embedding_lookup(
       wals_model.col_factors, math_ops.range(wals_model._input_cols),
       partition_strategy="div")
   row_wts = embedding_ops.embedding_lookup(
       wals_model._row_weights, math_ops.range(wals_model._input_rows),
       partition_strategy="div")
   col_wts = embedding_ops.embedding_lookup(
       wals_model._col_weights, math_ops.range(wals_model._input_cols),
       partition_strategy="div")
   return factorization_ops_test_utils.calculate_loss(
       sp_inputs, current_rows, current_cols, wals_model._regularization,
       wals_model._unobserved_weight, row_wts, col_wts)
Пример #17
0
  def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    # prev= prev.get_shape().with_rank(2)[1]

    probs  = tf.log(tf.nn.softmax(prev))

    if i > 1:

        probs = tf.reshape(probs + log_beam_probs[-1],
                               [-1, beam_size * num_symbols])

    best_probs, indices = tf.nn.top_k(probs, beam_size)
    indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
    best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))

    symbols = indices % num_symbols # Which word in vocabulary.
    beam_parent = indices // num_symbols # Which hypothesis it came from.


    beam_symbols.append(symbols)
    beam_path.append(beam_parent)
    log_beam_probs.append(best_probs)

    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.

    emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    emb_prev  = tf.reshape(emb_prev,[beam_size,embedding_size])
    # emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev
Пример #18
0
 def testMinimizeSparseResourceVariable(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.cached_session():
       var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
       var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
       pred += var1
       loss = pred * pred
       sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss)
       # TODO(apassos) calling initialize_resources on all resources here
       # doesn't work because the sessions and graph are reused across unit
       # tests and this would mean trying to reinitialize variables. Figure out
       # a long-term solution for this.
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
       self.assertAllCloseAccordingToType([3.0], var1.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0
       np_grad = 2 * np_pred
       self.assertAllCloseAccordingToType(
           [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], var0.eval())
       self.assertAllCloseAccordingToType([3.0 - np_grad], var1.eval())
Пример #19
0
  def testAdamSparse(self):
    with ops.device('/cpu:0'):
      # Create 2-D embedding for 3 objects on CPU because sparse/sliced updates
      # are not implemented on TPU.
      embedding_matrix = resource_variable_ops.ResourceVariable(
          array_ops.ones([3, 2]))

    with self.test_scope():
      with backprop.GradientTape() as tape:
        embedding = embedding_ops.embedding_lookup(embedding_matrix, [1])
        y = math_ops.reduce_sum(embedding)
      dy_dx = tape.gradient(y, embedding_matrix)
      self.assertIsInstance(dy_dx, ops.IndexedSlices)
      optimizer = adam.AdamOptimizer(0.1)
      # The gradient application operations will run on CPU because optimizer
      # updates are always collocated with the variable.
      optimizer.apply_gradients([(dy_dx, embedding_matrix)])

      # This assign_add will run on CPU because when an input to an
      # operation is a resource, this operation is placed on the resource's
      # device by the eager runtime.
      embedding_matrix.assign_add(array_ops.ones([3, 2]))

    self.assertAllClose([[2.0, 2.0],
                         [1.9, 1.9],
                         [2.0, 2.0]], embedding_matrix.numpy())
Пример #20
0
  def testShardedDivPartitioningUnknownParamShape(self):
    with self.test_session():
      num_shards = 5
      vocab_size = 13
      # Embedding dimensions is 10. The vocab_size x 10 embedding
      # parameters are spread in num_shards matrices, so the first
      # 3 shards are 3 x 10 and the last 2 shards are 2 x 10.

      # We clear parameter shapes, to test when shape is not statically known.
      p, params, feed_dict = _EmbeddingParams(
          num_shards, vocab_size, use_shapeless_placeholder=True)

      num_vals = 30
      # Fetch num_vals embeddings for random word ids. Since
      # num_vals > vocab_size, this ought to have repetitions, so
      # will test that aspect.
      id_vals = np.random.randint(vocab_size, size=num_vals)
      ids = constant_op.constant(list(id_vals), dtype=dtypes.int64)

      embedding = embedding_ops.embedding_lookup(
          p, ids, partition_strategy="div")
      tf_result = embedding.eval(feed_dict=feed_dict)
    np_result, _, _ = _EmbeddingResult(
        params, id_vals, num_shards, vocab_size, partition_strategy="div")
    self.assertAllEqual(np_result, tf_result)
Пример #21
0
  def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with vs.variable_scope(scope or type(self).__name__):  # "EmbeddingWrapper"
      with ops.device("/cpu:0"):
        if self._embedding:
          embedding = self._embedding
        else:
          if self._initializer:
            initializer = self._initializer
          elif vs.get_variable_scope().initializer:
            initializer = vs.get_variable_scope().initializer
          else:
            # Default initializer for embeddings should have variance=1.
            sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
            initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)
          embedding = vs.get_variable("embedding", [self._embedding_classes,
                                                    self._cell.input_size],
                                      initializer=initializer)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))

        """print (embedded)
        print ("{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}")"""

    return self._cell(embedded, state)
Пример #22
0
def embedding_rnn_decoder(decoder_inputs, initial_state, cell, num_symbols,
                          embedding_size, output_projection=None,
                          feed_previous=False,
                          update_embedding_for_previous=True, scope=None):
  """RNN decoder with embedding and a pure-decoding option.

  Args:
    decoder_inputs: A list of 1D batch-sized int32 Tensors (decoder inputs).
    initial_state: 2D Tensor [batch_size x cell.state_size].
    cell: rnn_cell.RNNCell defining the cell function.
    num_symbols: Integer, how many symbols come into the embedding.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each fed
      previous output will first be multiplied by W and added B.
    feed_previous: Boolean; if True, only the first of decoder_inputs will be
      used (the "GO" symbol), and all other decoder inputs will be generated by:
        next = embedding_lookup(embedding, argmax(previous_output)),
      In effect, this implements a greedy decoder. It can also be used
      during training to emulate http://arxiv.org/abs/1506.03099.
      If False, decoder_inputs are used as given (the standard decoder case).
    update_embedding_for_previous: Boolean; if False and feed_previous=True,
      only the embedding for the first symbol of decoder_inputs (the "GO"
      symbol) will be updated by back propagation. Embeddings for the symbols
      generated from the decoder itself remain unchanged. This parameter has
      no effect if feed_previous=False.
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_decoder".

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_size] containing the generated outputs.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: When output_projection has the wrong shape.
  """
  if output_projection is not None:
    proj_weights = ops.convert_to_tensor(output_projection[0],
                                         dtype=dtypes.float32)
    proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
    proj_biases = ops.convert_to_tensor(
        output_projection[1], dtype=dtypes.float32)
    proj_biases.get_shape().assert_is_compatible_with([num_symbols])

  with variable_scope.variable_scope(scope or "embedding_rnn_decoder"):
    with ops.device("/cpu:0"):
      embedding = variable_scope.get_variable("embedding",
                                              [num_symbols, embedding_size])
    loop_function = _extract_argmax_and_embed(
        embedding, output_projection,
        update_embedding_for_previous) if feed_previous else None
    emb_inp = (
        embedding_ops.embedding_lookup(embedding, i) for i in decoder_inputs)
    return rnn_decoder(emb_inp, initial_state, cell,
                       loop_function=loop_function)
  def doTestIndexedSlicesGradientInCondInWhileLoop(self, use_resource=False):
    with ops.Graph().as_default():
      embedding_matrix = variable_scope.get_variable(
          "embedding_matrix", [5, 5],
          initializer=init_ops.random_normal_initializer(),
          use_resource=use_resource)

      def Cond(it, _):
        return it < 5

      def Body(it, cost):
        embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
        cost = control_flow_ops.cond(
            math_ops.equal(it, 3), lambda: math_ops.square(cost),
            lambda: cost + math_ops.reduce_sum(embedding))
        return it + 1, cost

      _, cost = control_flow_ops.while_loop(
          Cond, Body, [constant_op.constant(0), constant_op.constant(0.0)])

      dynamic_grads = gradients_impl.gradients(cost, [embedding_matrix])[0]
      dynamic_grads = math_ops.segment_sum(dynamic_grads.values,
                                           dynamic_grads.indices)

      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      static = math_ops.square(
          math_ops.reduce_sum(embedding) + math_ops.reduce_sum(embedding) +
          math_ops.reduce_sum(embedding)) + math_ops.reduce_sum(embedding)
      static_grads = gradients_impl.gradients(static, [embedding_matrix])[0]
      static_grads = math_ops.segment_sum(static_grads.values,
                                          static_grads.indices)

      with self.test_session() as sess:
        sess.run(variables.global_variables_initializer())
        self.assertAllEqual(*sess.run([static_grads, dynamic_grads]))
Пример #24
0
  def testIndexedSlicesGradientInCondInWhileLoop(self):
    with ops.Graph().as_default():
      embedding_matrix = tf.get_variable(
          "embedding_matrix", [5, 5],
          initializer=tf.random_normal_initializer())

      def Cond(it, _):
        return it < 5
      def Body(it, cost):
        embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
        cost = tf.cond(tf.equal(it, 3),
                       lambda: tf.square(cost),
                       lambda: cost + tf.reduce_sum(embedding))
        return it + 1, cost
      _, cost = control_flow_ops.While(
          Cond, Body, [tf.constant(0), tf.constant(0.0)])

      dynamic_grads = tf.gradients(cost, [embedding_matrix])[0]
      dynamic_grads = tf.segment_sum(dynamic_grads.values,
                                     dynamic_grads.indices)

      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
      static = tf.square(
          tf.reduce_sum(embedding) +
          tf.reduce_sum(embedding) +
          tf.reduce_sum(embedding)) + tf.reduce_sum(embedding)
      static_grads = tf.gradients(static, [embedding_matrix])[0]
      static_grads = tf.segment_sum(static_grads.values, static_grads.indices)

      with self.test_session() as sess:
        sess.run(tf.initialize_all_variables())
        self.assertAllEqual(*sess.run([static_grads, dynamic_grads]))
Пример #25
0
def attention_decoder_with_embedding(decoder_inputs, initial_state, attention_states,
                                     cell, embedding, num_heads=1,
                                     output_size=None, dtype=dtypes.float32, scope=None,
                                     initial_state_attention=False):
    """
    We are not using output_projection because we are NOT using a sampled softmax

    Parameters
    ----------
    decoder_inputs
    initial_state
    attention_states
    cell
    embedding: outside embedding passed in
    num_heads
    output_size
    dtype
    scope
    initial_state_attention

    Returns
    -------

    """
    if output_size is None:
        output_size = cell.output_size

    with vs.variable_scope(scope or "attention_decoder_with_embedding"):
        emb_inp = [
            embedding_ops.embedding_lookup(embedding, i) for i in decoder_inputs]
        return attention_decoder(
            emb_inp, initial_state, attention_states, cell, output_size=output_size,
            num_heads=num_heads, loop_function=None,
            initial_state_attention=initial_state_attention)
Пример #26
0
  def __call__(self, inputs, state, scope=None):
    """Run the cell on embedded inputs."""
    with _checked_scope(self, scope or "embedding_wrapper", reuse=self._reuse):
      with ops.device("/cpu:0"):
        if self._initializer:
          initializer = self._initializer
        elif vs.get_variable_scope().initializer:
          initializer = vs.get_variable_scope().initializer
        else:
          # Default initializer for embeddings should have variance=1.
          sqrt3 = math.sqrt(3)  # Uniform(-sqrt(3), sqrt(3)) has variance=1.
          initializer = init_ops.random_uniform_initializer(-sqrt3, sqrt3)

        if type(state) is tuple:
          data_type = state[0].dtype
        else:
          data_type = state.dtype

        embedding = vs.get_variable(
            "embedding", [self._embedding_classes, self._embedding_size],
            initializer=initializer,
            dtype=data_type)
        embedded = embedding_ops.embedding_lookup(
            embedding, array_ops.reshape(inputs, [-1]))
    return self._cell(embedded, state)
Пример #27
0
 def _random(self):
   indices = random_ops.random_uniform(
       array_ops.reshape(self._num_remaining, [-1]),
       minval=0,
       maxval=math_ops.cast(self._num_data, dtypes.int64),
       seed=self._random_seed,
       dtype=dtypes.int64)
   return embedding_lookup(self._inputs, indices, partition_strategy='div')
def _one_hot_to_embedding(one_hot, embedding_size):
  """Get a dense embedding vector from a one-hot encoding."""
  num_tokens = one_hot.shape[1]
  label_id = math_ops.argmax(one_hot, axis=1)
  embedding = variable_scope.get_variable(
      'embedding', [num_tokens, embedding_size])
  return embedding_ops.embedding_lookup(
      embedding, label_id, name='token_to_embedding')
Пример #29
0
    def add_embedding_layer(self, emb_matrix):
        """
        Adds word embedding layer to the graph.

        Inputs:
          emb_matrix: shape (400002, embedding_size).
            The GloVe vectors, plus vectors for PAD and UNK.
        """
        with vs.variable_scope("embeddings"):

            # Note: the embedding matrix is a tf.constant which means it's not a trainable parameter
            embedding_matrix = tf.constant(emb_matrix, dtype=tf.float32, name="emb_matrix") # shape (400002, embedding_size)

            # Get the word embeddings for the context and question,
            # using the placeholders self.context_ids and self.qn_ids
            self.context_embs = embedding_ops.embedding_lookup(embedding_matrix, self.context_ids) # shape (batch_size, context_len, embedding_size)
            self.qn_embs = embedding_ops.embedding_lookup(embedding_matrix, self.qn_ids) # shape (batch_size, question_len, embedding_size)
Пример #30
0
 def calculate_loss(self):
   """Calculates the loss of the current (trained) model."""
   current_rows = embedding_ops.embedding_lookup(
       self._model.get_row_factors(), math_ops.range(self._num_rows),
       partition_strategy='div')
   current_cols = embedding_ops.embedding_lookup(
       self._model.get_col_factors(), math_ops.range(self._num_cols),
       partition_strategy='div')
   row_wts = embedding_ops.embedding_lookup(
       self._row_weights, math_ops.range(self._num_rows),
       partition_strategy='div')
   col_wts = embedding_ops.embedding_lookup(
       self._col_weights, math_ops.range(self._num_cols),
       partition_strategy='div')
   sp_inputs = self.np_array_to_sparse(self.INPUT_MATRIX)
   return factorization_ops_test_utils.calculate_loss(
       sp_inputs, current_rows, current_cols, self._regularization_coeff,
       self._unobserved_weight, row_wts, col_wts)
Пример #31
0
def embedding_rnn_decoder(decoder_inputs,
                          initial_state,
                          cell,
                          num_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          update_embedding_for_previous=True,
                          scope=None,
                          beam_search=True,
                          beam_size=10):
    """RNN decoder with embedding and a pure-decoding option.

    Args:
      decoder_inputs: A list of 1D batch-sized int32 Tensors (decoder inputs).
      initial_state: 2D Tensor [batch_size x cell.state_size].
      cell: rnn_cell.RNNCell defining the cell function.
      num_symbols: Integer, how many symbols come into the embedding.
      embedding_size: Integer, the length of the embedding vector for each symbol.
      output_projection: None or a pair (W, B) of output projection weights and
        biases; W has shape [output_size x num_symbols] and B has
        shape [num_symbols]; if provided and feed_previous=True, each fed
        previous output will first be multiplied by W and added B.
      feed_previous: Boolean; if True, only the first of decoder_inputs will be
        used (the "GO" symbol), and all other decoder inputs will be generated by:
          next = embedding_lookup(embedding, argmax(previous_output)),
        In effect, this implements a greedy decoder. It can also be used
        during training to emulate http://arxiv.org/abs/1506.03099.
        If False, decoder_inputs are used as given (the standard decoder case).
      update_embedding_for_previous: Boolean; if False and feed_previous=True,
        only the embedding for the first symbol of decoder_inputs (the "GO"
        symbol) will be updated by back propagation. Embeddings for the symbols
        generated from the decoder itself remain unchanged. This parameter has
        no effect if feed_previous=False.
      scope: VariableScope for the created subgraph; defaults to
        "embedding_rnn_decoder".

    Returns:
      A tuple of the form (outputs, state), where:
        outputs: A list of the same length as decoder_inputs of 2D Tensors with
          shape [batch_size x output_size] containing the generated outputs.
        state: The state of each decoder cell in each time-step. This is a list
          with length len(decoder_inputs) -- one item for each time-step.
          It is a 2D Tensor of shape [batch_size x cell.state_size].

    Raises:
      ValueError: When output_projection has the wrong shape.
    """
    if output_projection is not None:
        proj_weights = ops.convert_to_tensor(output_projection[0],
                                             dtype=dtypes.float32)
        proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
        proj_biases = ops.convert_to_tensor(output_projection[1],
                                            dtype=dtypes.float32)
        proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    # TODO: Investigate could an EmbeddingWrapper work here? As well as an OutputProjectionWrapper
    with variable_scope.variable_scope(scope or "embedding_rnn_decoder"):
        with ops.device("/cpu:0"):
            embedding = variable_scope.get_variable(
                "embedding", [num_symbols, embedding_size])

        emb_inp = [
            embedding_ops.embedding_lookup(embedding, i)
            for i in decoder_inputs
        ]

        if beam_search:
            return beam_rnn_decoder(emb_inp,
                                    initial_state,
                                    cell,
                                    output_projection=output_projection,
                                    embedding=embedding,
                                    beam_size=beam_size)

        else:
            loop_function = _extract_argmax_and_embed(
                embedding, output_projection,
                update_embedding_for_previous) if feed_previous else None

            return rnn_decoder(emb_inp,
                               initial_state,
                               cell,
                               loop_function=loop_function,
                               scope=scope)
Пример #32
0
 def loss():
     x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
     pred = math_ops.matmul(
         embedding_ops.embedding_lookup([var0], [0]), x)
     return pred * pred
Пример #33
0
def decode(cell,
           init_state,
           vocab_size,
           embedding,
           decoder_inputs,
           out_proj,
           maxlen,
           more_args,
           mem_args,
           feed_prev=False,
           loop_function=None,
           copy_transform=None,
           dtype=tf.float32):
    with variable_scope.variable_scope("embedding_rnn_decoder") as scope:
        outputs = []
        hiddens = []
        state = init_state
        if not feed_prev:
            emb_inputs = (embedding_ops.embedding_lookup(embedding, i)
                          for i in decoder_inputs)
            for i, emb_inp in enumerate(emb_inputs):
                if i >= maxlen:
                    break
                if i > 0:
                    variable_scope.get_variable_scope().reuse_variables()
                output, state = cell(emb_inp, state)
                outputs.append(output)
                hiddens.append(state)
            return outputs, hiddens, state
        else:
            a1s = []
            kdists = []
            Ndists = []
            Rdebugs = []
            samples = []
            i = 0
            prev = None
            tmp = None
            emb_inp = embedding_ops.embedding_lookup(embedding,
                                                     decoder_inputs[0])
            while (True):
                if i > 0:
                    variable_scope.get_variable_scope().reuse_variables()
                output, state = cell(emb_inp, state)
                outputs.append(output)
                hiddens.append(state)

                with tf.variable_scope('loop', reuse=True):
                    if output is not None:
                        loop_return = loop_function(output, out_proj,
                                                    embedding)
                        #loop_return = loop_function(state, output)
                if loop_return is not None:
                    emb_inp, prev_symbol = loop_return
                    samples.append(prev_symbol)
                    #emb_inp, prev_symbol, a1, kdist, Ndist, Rdebug = loop_return
                    #a1s.append(a1)
                    #kdists.append(kdist)
                    #Ndists.append(Ndist)
                    #Rdebugs.append(Rdebug)
                i += 1
                if i >= maxlen:
                    break
            return outputs, samples, hiddens, a1s, kdists, Ndists, Rdebugs
def _rank_resample(weights, biases, inputs, sampled_values, num_resampled,
                   resampling_temperature, partition_strategy):
  """A helper function for rank_sampled_softmax_loss.

  This computes, for each i in `sampled_values`,

      log(sum_j exp((w_i * x_j + b_i) / resampling_temperature))

  where w_i, b_i are the weight and bias of the i-th class, respectively,
  and j ranges over the rows of `inputs`. For efficiency, we rearrange the
  computation to

      log(sum_j exp(w_i * (x_j / resampling_temperature))) +
          b_i / resampling_temperature.

  This translates to the following batched computation using tensorflow ops:

      reduce_logsumexp(matmul(embeddings,
                       transpose(inputs / resampling_temperature))) +
          biases / resampling_temperature

  The computation of the first term is colocated with the embeddings using
  `transform_fn` in `embedding_ops._embedding_lookup_and_transform`. The second
  term, not the bottleneck, is computed at the worker.

  Args:
    weights: From `rank_sampled_softmax_loss`.
    biases: From `rank_sampled_softmax_loss`.
    inputs: From `rank_sampled_softmax_loss`.
    sampled_values: A tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
    num_resampled: An `int`. This many values are selected from
        `sampled_values` using the adaptive resampling algorithm. The caller
        must ensure that `num_resampled` is less than the size of
        `sampled_values`.
    resampling_temperature: A scalar `Tensor` with the temperature parameter
        for the adaptive resampling algorithm.
    partition_strategy: From `rank_sampled_softmax_loss`.

  Returns:
    A tuple of (`resampled_candidates`, `true_expected_count`,
        `resampled_expected_count`), similar to `sampled_values` but sampled
        down to `num_resampled` values.
  """
  # This code supports passing a Tensor for num_resampled, but since it is only
  # called with an int, that's what we specify in the arg list. If this
  # function is ever externalized, we should change the doc to support Tensor.

  sampled, true_expected_count, sampled_expected_count = sampled_values

  sampled = math_ops.cast(array_ops.stop_gradient(sampled), dtypes.int64)
  true_expected_count = array_ops.stop_gradient(true_expected_count)
  sampled_expected_count = array_ops.stop_gradient(sampled_expected_count)

  reweighted_inputs = inputs / resampling_temperature

  def logsumexp_logit(embeddings):
    return math_ops.reduce_logsumexp(
        math_ops.matmul(embeddings, reweighted_inputs, transpose_b=True),
        axis=1,
        keep_dims=False)

  # Calling this protected form of embedding_lookup allows co-locating
  # the logsumexp computation with the partitioned weights, which yields
  # a large speedup in practice.
  sampled_logits = embedding_ops._embedding_lookup_and_transform(  # pylint: disable=protected-access
      weights, sampled, partition_strategy, transform_fn=logsumexp_logit)
  sampled_b = array_ops.reshape(
      embedding_ops.embedding_lookup(biases, sampled, partition_strategy), [-1])
  sampled_logits += sampled_b / resampling_temperature

  _, resampled_indices = nn.top_k(sampled_logits, k=num_resampled, sorted=False)
  resampled = array_ops.gather(sampled, indices=resampled_indices)
  resampled_expected_count = array_ops.gather(
      sampled_expected_count, indices=resampled_indices)

  return resampled, true_expected_count, resampled_expected_count
Пример #35
0
 def loss():
     return math_ops.reduce_sum(
         embedding_ops.embedding_lookup(var0, [[1]]))
Пример #36
0
def loop_function(prev, out_proj, embedding):
    prev = nn_ops.xw_plus_b(prev, out_proj[0], out_proj[1])
    prev_symbol = math_ops.argmax(prev, axis=1)
    emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
    return [emb_prev, prev_symbol]
Пример #37
0
    def __init__(self, sess, config, api, log_dir, forward, scope=None):
        self.vocab = api.vocab
        self.rev_vocab = api.rev_vocab
        self.vocab_size = len(self.vocab)
        self.sess = sess
        self.scope = scope
        self.max_utt_len = config.max_utt_len
        self.go_id = self.rev_vocab["<s>"]
        self.eos_id = self.rev_vocab["</s>"]
        self.context_cell_size = config.cxt_cell_size
        self.sent_cell_size = config.sent_cell_size
        self.dec_cell_size = config.dec_cell_size
        self.num_topics = config.num_topics

        with tf.name_scope("io"):
            # all dialog context and known attributes
            self.input_contexts = tf.placeholder(dtype=tf.int32,
                                                 shape=(None, None,
                                                        self.max_utt_len),
                                                 name="dialog_context")
            self.floors = tf.placeholder(dtype=tf.float32,
                                         shape=(None, None),
                                         name="floor")  # TODO float
            self.floor_labels = tf.placeholder(dtype=tf.float32,
                                               shape=(None, 1),
                                               name="floor_labels")
            self.context_lens = tf.placeholder(dtype=tf.int32,
                                               shape=(None, ),
                                               name="context_lens")
            self.paragraph_topics = tf.placeholder(dtype=tf.float32,
                                                   shape=(None,
                                                          self.num_topics),
                                                   name="paragraph_topics")

            # target response given the dialog context
            self.output_tokens = tf.placeholder(dtype=tf.int32,
                                                shape=(None, None),
                                                name="output_token")
            self.output_lens = tf.placeholder(dtype=tf.int32,
                                              shape=(None, ),
                                              name="output_lens")
            self.output_das = tf.placeholder(dtype=tf.float32,
                                             shape=(None, self.num_topics),
                                             name="output_dialog_acts")

            # optimization related variables
            self.learning_rate = tf.Variable(float(config.init_lr),
                                             trainable=False,
                                             name="learning_rate")
            self.learning_rate_decay_op = self.learning_rate.assign(
                tf.multiply(self.learning_rate, config.lr_decay))
            self.global_t = tf.placeholder(dtype=tf.int32, name="global_t")
            self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

        max_dialog_len = array_ops.shape(self.input_contexts)[1]
        max_out_len = array_ops.shape(self.output_tokens)[1]
        batch_size = array_ops.shape(self.input_contexts)[0]

        with variable_scope.variable_scope("wordEmbedding"):
            self.embedding = tf.get_variable(
                "embedding", [self.vocab_size, config.embed_size],
                dtype=tf.float32)
            embedding_mask = tf.constant(
                [0 if i == 0 else 1 for i in range(self.vocab_size)],
                dtype=tf.float32,
                shape=[self.vocab_size, 1])
            embedding = self.embedding * embedding_mask

            # embed the input
            input_embedding = embedding_ops.embedding_lookup(
                embedding, tf.reshape(self.input_contexts, [-1]))
            # reshape embedding. -1 means that the first dimension can be whatever necessary to make the other 2 dimensions work w/the data
            input_embedding = tf.reshape(
                input_embedding, [-1, self.max_utt_len, config.embed_size])
            # embed the output so you can feed it into the VAE
            output_embedding = embedding_ops.embedding_lookup(
                embedding, self.output_tokens)

            #
            if config.sent_type == "bow":
                input_embedding, sent_size = get_bow(input_embedding)
                output_embedding, _ = get_bow(output_embedding)

            elif config.sent_type == "rnn":
                sent_cell = self.get_rnncell("gru", self.sent_cell_size,
                                             config.keep_prob, 1)
                input_embedding, sent_size = get_rnn_encode(input_embedding,
                                                            sent_cell,
                                                            scope="sent_rnn")
                output_embedding, _ = get_rnn_encode(output_embedding,
                                                     sent_cell,
                                                     self.output_lens,
                                                     scope="sent_rnn",
                                                     reuse=True)
            elif config.sent_type == "bi_rnn":
                fwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                bwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                input_embedding, sent_size = get_bi_rnn_encode(
                    input_embedding,
                    fwd_sent_cell,
                    bwd_sent_cell,
                    scope="sent_bi_rnn")
                output_embedding, _ = get_bi_rnn_encode(output_embedding,
                                                        fwd_sent_cell,
                                                        bwd_sent_cell,
                                                        self.output_lens,
                                                        scope="sent_bi_rnn",
                                                        reuse=True)
            else:
                raise ValueError(
                    "Unknown sent_type. Must be one of [bow, rnn, bi_rnn]")

            # reshape input into dialogs
            input_embedding = tf.reshape(input_embedding,
                                         [-1, max_dialog_len, sent_size])
            if config.keep_prob < 1.0:
                input_embedding = tf.nn.dropout(input_embedding,
                                                config.keep_prob)

            # reshape floors
            floor = tf.reshape(self.floors, [-1, max_dialog_len, 1])

            joint_embedding = tf.concat([input_embedding, floor], 2,
                                        "joint_embedding")

        with variable_scope.variable_scope("contextRNN"):
            enc_cell = self.get_rnncell(config.cell_type,
                                        self.context_cell_size,
                                        keep_prob=1.0,
                                        num_layer=config.num_layer)
            # and enc_last_state will be same as the true last state
            _, enc_last_state = tf.nn.dynamic_rnn(
                enc_cell,
                joint_embedding,
                dtype=tf.float32,
                sequence_length=self.context_lens)

            if config.num_layer > 1:
                if config.cell_type == 'lstm':
                    enc_last_state = [temp.h for temp in enc_last_state]

                enc_last_state = tf.concat(enc_last_state, 1)
            else:
                if config.cell_type == 'lstm':
                    enc_last_state = enc_last_state.h

        # combine with other attributes
        if config.use_hcf:
            # TODO is this reshape ok?
            attribute_embedding = tf.reshape(
                self.output_das, [-1, self.num_topics])  # da_embedding
            attribute_fc1 = layers.fully_connected(attribute_embedding,
                                                   30,
                                                   activation_fn=tf.tanh,
                                                   scope="attribute_fc1")

        # conditions include topic and rnn of all previous birnn results and metadata about the two people
        cond_list = [self.paragraph_topics, enc_last_state]
        cond_embedding = tf.concat(cond_list, 1)  #float32

        with variable_scope.variable_scope("recognitionNetwork"):
            if config.use_hcf:
                recog_input = tf.concat(
                    [cond_embedding, output_embedding, attribute_fc1], 1)
            else:
                recog_input = tf.concat([cond_embedding, output_embedding], 1)
            self.recog_mulogvar = recog_mulogvar = layers.fully_connected(
                recog_input,
                config.latent_size * 2,
                activation_fn=None,
                scope="muvar")
            # mu and logvar are both vectors of size latent_size
            recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=1)

        with variable_scope.variable_scope("priorNetwork"):
            # P(XYZ)=P(Z|X)P(X)P(Y|X,Z)
            prior_fc1 = layers.fully_connected(cond_embedding,
                                               np.maximum(
                                                   config.latent_size * 2,
                                                   100),
                                               activation_fn=tf.tanh,
                                               scope="fc1")
            prior_mulogvar = layers.fully_connected(prior_fc1,
                                                    config.latent_size * 2,
                                                    activation_fn=None,
                                                    scope="muvar")
            prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=1)

            latent_sample = tf.cond(
                self.use_prior,
                lambda: sample_gaussian(prior_mu, prior_logvar),
                lambda: sample_gaussian(recog_mu, recog_logvar))

        with variable_scope.variable_scope("generationNetwork"):
            gen_inputs = tf.concat([cond_embedding, latent_sample],
                                   1)  #float32

            # BOW loss
            bow_fc1 = layers.fully_connected(gen_inputs,
                                             400,
                                             activation_fn=tf.tanh,
                                             scope="bow_fc1")
            if config.keep_prob < 1.0:
                bow_fc1 = tf.nn.dropout(bow_fc1, config.keep_prob)
            self.bow_logits = layers.fully_connected(bow_fc1,
                                                     self.vocab_size,
                                                     activation_fn=None,
                                                     scope="bow_project")

            # Predicting Y (topic)
            if config.use_hcf:
                meta_fc1 = layers.fully_connected(gen_inputs,
                                                  400,
                                                  activation_fn=tf.tanh,
                                                  scope="meta_fc1")
                if config.keep_prob < 1.0:
                    meta_fc1 = tf.nn.dropout(meta_fc1, config.keep_prob)
                self.da_logits = layers.fully_connected(
                    meta_fc1, self.num_topics, scope="da_project")  # float32

                da_prob = tf.nn.softmax(self.da_logits)
                pred_attribute_embedding = da_prob  # TODO change the name of this to predicted sentence topic
                # pred_attribute_embedding = tf.matmul(da_prob, d_embedding)

                if forward:
                    selected_attribute_embedding = pred_attribute_embedding
                else:
                    selected_attribute_embedding = attribute_embedding
                dec_inputs = tf.concat(
                    [gen_inputs, selected_attribute_embedding], 1)

            # if use_hcf not on, the model won't predict the Y
            else:
                self.da_logits = tf.zeros((batch_size, self.num_topics))
                dec_inputs = gen_inputs
                selected_attribute_embedding = None

            # Predicting whether or not end of paragraph
            self.paragraph_end_logits = layers.fully_connected(
                gen_inputs,
                1,
                activation_fn=tf.tanh,
                scope="paragraph_end_fc1")  # float32

            # Decoder
            if config.num_layer > 1:
                dec_init_state = []
                for i in range(config.num_layer):
                    temp_init = layers.fully_connected(dec_inputs,
                                                       self.dec_cell_size,
                                                       activation_fn=None,
                                                       scope="init_state-%d" %
                                                       i)
                    if config.cell_type == 'lstm':
                        # initializer thing for lstm
                        temp_init = rnn_cell.LSTMStateTuple(
                            temp_init, temp_init)

                    dec_init_state.append(temp_init)

                dec_init_state = tuple(dec_init_state)
            else:
                dec_init_state = layers.fully_connected(dec_inputs,
                                                        self.dec_cell_size,
                                                        activation_fn=None,
                                                        scope="init_state")
                if config.cell_type == 'lstm':
                    dec_init_state = rnn_cell.LSTMStateTuple(
                        dec_init_state, dec_init_state)

        with variable_scope.variable_scope("decoder"):
            dec_cell = self.get_rnncell(config.cell_type, self.dec_cell_size,
                                        config.keep_prob, config.num_layer)
            # projects into thing of vocab size. TODO no softmax?
            dec_cell = OutputProjectionWrapper(dec_cell, self.vocab_size)

            if forward:
                loop_func = decoder_fn_lib.context_decoder_fn_inference(
                    None,
                    dec_init_state,
                    embedding,
                    start_of_sequence_id=self.go_id,
                    end_of_sequence_id=self.eos_id,
                    maximum_length=self.max_utt_len,
                    num_decoder_symbols=self.vocab_size,
                    context_vector=selected_attribute_embedding)
                dec_input_embedding = None
                dec_seq_lens = None
            else:
                loop_func = decoder_fn_lib.context_decoder_fn_train(
                    dec_init_state, selected_attribute_embedding)
                dec_input_embedding = embedding_ops.embedding_lookup(
                    embedding, self.output_tokens)
                dec_input_embedding = dec_input_embedding[:, 0:-1, :]
                dec_seq_lens = self.output_lens - 1

                if config.keep_prob < 1.0:
                    dec_input_embedding = tf.nn.dropout(
                        dec_input_embedding, config.keep_prob)

                # apply word dropping. Set dropped word to 0
                if config.dec_keep_prob < 1.0:
                    # get make of keep/throw-away
                    keep_mask = tf.less_equal(
                        tf.random_uniform((batch_size, max_out_len - 1),
                                          minval=0.0,
                                          maxval=1.0), config.dec_keep_prob)
                    keep_mask = tf.expand_dims(tf.to_float(keep_mask), 2)
                    dec_input_embedding = dec_input_embedding * keep_mask
                    dec_input_embedding = tf.reshape(
                        dec_input_embedding,
                        [-1, max_out_len - 1, config.embed_size])

            dec_outs, _, final_context_state = dynamic_rnn_decoder(
                dec_cell,
                loop_func,
                inputs=dec_input_embedding,
                sequence_length=dec_seq_lens,
                name='output_node')

            if final_context_state is not None:
                final_context_state = final_context_state[:, 0:array_ops.
                                                          shape(dec_outs)[1]]
                mask = tf.to_int32(tf.sign(tf.reduce_max(dec_outs, axis=2)))
                self.dec_out_words = tf.multiply(
                    tf.reverse(final_context_state, axis=[1]), mask)
            else:
                self.dec_out_words = tf.argmax(dec_outs, 2)

        if not forward:
            with variable_scope.variable_scope("loss"):

                labels = self.output_tokens[:, 1:]  # correct word tokens
                label_mask = tf.to_float(tf.sign(labels))

                # Loss between words
                rc_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=dec_outs, labels=labels)
                rc_loss = tf.reduce_sum(rc_loss * label_mask,
                                        reduction_indices=1)
                self.avg_rc_loss = tf.reduce_mean(rc_loss)
                # used only for perpliexty calculation. Not used for optimzation
                self.rc_ppl = tf.exp(
                    tf.reduce_sum(rc_loss) / tf.reduce_sum(label_mask))

                # BOW loss
                tile_bow_logits = tf.tile(tf.expand_dims(self.bow_logits, 1),
                                          [1, max_out_len - 1, 1])
                bow_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=tile_bow_logits, labels=labels) * label_mask
                bow_loss = tf.reduce_sum(bow_loss, reduction_indices=1)
                self.avg_bow_loss = tf.reduce_mean(bow_loss)

                # Predict 0/1 (1 = last sentence in paragraph)
                end_loss = tf.nn.softmax_cross_entropy_with_logits(
                    labels=self.floor_labels, logits=self.paragraph_end_logits)
                self.avg_end_loss = tf.reduce_mean(end_loss)

                # Topic prediction loss
                if config.use_hcf:
                    div_prob = tf.divide(self.da_logits, self.output_das)
                    self.avg_da_loss = tf.reduce_mean(
                        -tf.nn.softmax_cross_entropy_with_logits(
                            logits=self.da_logits, labels=div_prob))

                else:
                    self.avg_da_loss = 0.0

                kld = gaussian_kld(recog_mu, recog_logvar, prior_mu,
                                   prior_logvar)
                self.avg_kld = tf.reduce_mean(kld)
                if log_dir is not None:
                    kl_weights = tf.minimum(
                        tf.to_float(self.global_t) / config.full_kl_step, 1.0)
                else:
                    kl_weights = tf.constant(1.0)

                self.kl_w = kl_weights
                self.elbo = self.avg_rc_loss + kl_weights * self.avg_kld
                aug_elbo = self.avg_bow_loss + self.avg_da_loss + self.elbo + self.avg_end_loss

                tf.summary.scalar("da_loss", self.avg_da_loss)
                tf.summary.scalar("rc_loss", self.avg_rc_loss)
                tf.summary.scalar("elbo", self.elbo)
                tf.summary.scalar("kld", self.avg_kld)
                tf.summary.scalar("bow_loss", self.avg_bow_loss)
                tf.summary.scalar("paragraph_end_loss", self.avg_end_loss)

                self.summary_op = tf.summary.merge_all()

                self.log_p_z = norm_log_liklihood(latent_sample, prior_mu,
                                                  prior_logvar)
                self.log_q_z_xy = norm_log_liklihood(latent_sample, recog_mu,
                                                     recog_logvar)
                self.est_marginal = tf.reduce_mean(rc_loss + bow_loss -
                                                   self.log_p_z +
                                                   self.log_q_z_xy)

            self.optimize(sess, config, aug_elbo, log_dir)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2)
Пример #38
0
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                               num_symbols, embedding_size,
                               output_projection=None, feed_previous=False,
                               dtype=dtypes.float32, scope=None):
  """Embedding RNN sequence-to-sequence model with tied (shared) parameters.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_symbols x input_size]). Then it runs an RNN to encode embedded
  encoder_inputs into a state vector. Next, it embeds decoder_inputs using
  the same embedding. Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_symbols: Integer; number of symbols for both encoder and decoder.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the initial RNN states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_tied_rnn_seq2seq".

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: When output_projection has the wrong shape.
  """
  if output_projection is not None:
    proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
    proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
    proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
    proj_biases.get_shape().assert_is_compatible_with([num_symbols])

  with variable_scope.variable_scope(scope or "embedding_tied_rnn_seq2seq"):
    with ops.device("/cpu:0"):
      embedding = variable_scope.get_variable("embedding",
                                              [num_symbols, embedding_size])

    emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in encoder_inputs]
    emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in decoder_inputs]

    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols)

    if isinstance(feed_previous, bool):
      loop_function = _extract_argmax_and_embed(
          embedding, output_projection, True) if feed_previous else None
      return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell,
                              loop_function=loop_function, dtype=dtype)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      loop_function = _extract_argmax_and_embed(
        embedding, output_projection, False) if feed_previous_bool else None
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                         reuse=reuse):
        outputs, state = tied_rnn_seq2seq(
            emb_encoder_inputs, emb_decoder_inputs, cell,
            loop_function=loop_function, dtype=dtype)
        return outputs + [state]

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    return outputs_and_state[:-1], outputs_and_state[-1]
Пример #39
0
def dynamic_distraction_m2_decoder_wrapper(decoder_inputs,
                                           initial_state,
                                           distract_initial_state,
                                           attention_states,
                                           attention_states_query,
                                           cell_encoder,
                                           distraction_cell,
                                           num_symbols,
                                           embedding_size,
                                           num_heads=1,
                                           output_size=None,
                                           output_projection=None,
                                           feed_previous=False,
                                           update_embedding_for_previous=True,
                                           embedding_scope=None,
                                           dtype=None,
                                           scope=None,
                                           initial_state_attention=False):
    """RNN decoder with embedding and attention and a pure-decoding option.

  Args:
    decoder_inputs: A list of 1D batch-sized int32 Tensors (decoder inputs).
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function.
    num_symbols: Integer, how many symbols come into the embedding.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_size: Size of the output vectors; if None, use output_size.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has shape
      [num_symbols]; if provided and feed_previous=True, each fed previous
      output will first be multiplied by W and added B.
    feed_previous: Boolean; if True, only the first of decoder_inputs will be
      used (the "GO" symbol), and all other decoder inputs will be generated by:
        next = embedding_lookup(embedding, argmax(previous_output)),
      In effect, this implements a greedy decoder. It can also be used
      during training to emulate http://arxiv.org/abs/1506.03099.
      If False, decoder_inputs are used as given (the standard decoder case).
    update_embedding_for_previous: Boolean; if False and feed_previous=True,
      only the embedding for the first symbol of decoder_inputs (the "GO"
      symbol) will be updated by back propagation. Embeddings for the symbols
      generated from the decoder itself remain unchanged. This parameter has
      no effect if feed_previous=False.
    dtype: The dtype to use for the RNN initial states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_decoder".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states -- useful when we wish to resume decoding from a previously
      stored decoder state and attention states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_size] containing the generated outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: When output_projection has the wrong shape.
  """
    if output_size is None:
        output_size = cell_encoder.output_size
    if output_projection is not None:
        proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
        proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    with variable_scope.variable_scope(
            embedding_scope or "dynamic_distraction_m2_decoder_wrapper",
            dtype=dtype,
            reuse=True) as s1:

        print("Preksha", s1.name)
        embedding = variable_scope.get_variable("embedding",
                                                [num_symbols, embedding_size])
        loop_function = _extract_argmax_and_embed(
            embedding, output_projection,
            update_embedding_for_previous) if feed_previous else None
        emb_inp = [
            embedding_ops.embedding_lookup(embedding, i)
            for i in decoder_inputs
        ]

    with variable_scope.variable_scope(
            scope or "dynamic_distraction_m2_decoder_wrapper",
            dtype=dtype) as scope:
        return dynamic_distraction_m2_decoder(
            emb_inp,
            initial_state=initial_state,
            attention_states_query=attention_states_query,
            attention_states=attention_states,
            cell1=cell_encoder,
            cell2=cell_encoder,
            distract_initial_state=distract_initial_state,
            distraction_cell=distraction_cell,
            output_size=output_size,
            num_heads=num_heads,
            loop_function=loop_function,
            initial_state_attention=initial_state_attention)
Пример #40
0
def _compute_sampled_logits(weights,
                            biases,
                            inputs,
                            labels,
                            num_sampled,
                            num_classes,
                            num_true=1,
                            sampled_values=None,
                            subtract_log_q=True,
                            remove_accidental_hits=False,
                            partition_strategy="mod",
                            name=None):
    """Helper function for nce_loss and sampled_softmax_loss functions.

  Computes sampled output training logits and labels suitable for implementing
  e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see
  sampled_softmax_loss).

  Note: In the case where num_true > 1, we assign to each target class
  the target probability 1 / num_true so that the target probabilities
  sum to 1 per-example.

  Args:
    weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor`
        objects whose concatenation along dimension 0 has shape
        `[num_classes, dim]`.  The (possibly-partitioned) class embeddings.
    biases: A `Tensor` of shape `[num_classes]`.  The class biases.
    inputs: A `Tensor` of shape `[batch_size, dim]`.  The forward
        activations of the input network.
    labels: A `Tensor` of type `int64` and shape `[batch_size,
        num_true]`. The target classes.  Note that this format differs from
        the `labels` argument of `nn.softmax_cross_entropy_with_logits`.
    num_sampled: An `int`.  The number of classes to randomly sample per batch.
    num_classes: An `int`. The number of possible classes.
    num_true: An `int`.  The number of target classes per training example.
    sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`,
        `sampled_expected_count`) returned by a `*_candidate_sampler` function.
        (if None, we default to `log_uniform_candidate_sampler`)
    subtract_log_q: A `bool`.  whether to subtract the log expected count of
        the labels in the sample to get the logits of the true labels.
        Default is True.  Turn off for Negative Sampling.
    remove_accidental_hits:  A `bool`.  whether to remove "accidental hits"
        where a sampled class equals one of the target classes.  Default is
        False.
    partition_strategy: A string specifying the partitioning strategy, relevant
        if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported.
        Default is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: A name for the operation (optional).
  Returns:
    out_logits, out_labels: `Tensor` objects each with shape
        `[batch_size, num_true + num_sampled]`, for passing to either
        `nn.sigmoid_cross_entropy_with_logits` (NCE) or
        `nn.softmax_cross_entropy_with_logits` (sampled softmax).
  """

    if not isinstance(weights, list):
        weights = [weights]

    with ops.op_scope(weights + [biases, inputs, labels], name,
                      "compute_sampled_logits"):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        # Sample the negative labels.
        #   sampled shape: [num_sampled] tensor
        #   true_expected_count shape = [batch_size, 1] tensor
        #   sampled_expected_count shape = [num_sampled] tensor
        if sampled_values is None:
            sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(
                true_classes=labels,
                num_true=num_true,
                num_sampled=num_sampled,
                unique=True,
                range_max=num_classes)
        # NOTE: pylint cannot tell that 'sampled_values' is a sequence
        # pylint: disable=unpacking-non-sequence
        sampled, true_expected_count, sampled_expected_count = sampled_values
        # pylint: enable=unpacking-non-sequence

        # labels_flat is a [batch_size * num_true] tensor
        # sampled is a [num_sampled] int tensor
        all_ids = array_ops.concat(0, [labels_flat, sampled])

        # weights shape is [num_classes, dim]
        all_w = embedding_ops.embedding_lookup(
            weights, all_ids, partition_strategy=partition_strategy)
        all_b = embedding_ops.embedding_lookup(biases, all_ids)
        # true_w shape is [batch_size * num_true, dim]
        # true_b is a [batch_size * num_true] tensor
        true_w = array_ops.slice(
            all_w, [0, 0],
            array_ops.pack([array_ops.shape(labels_flat)[0], -1]))
        true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat))

        # inputs shape is [batch_size, dim]
        # true_w shape is [batch_size * num_true, dim]
        # row_wise_dots is [batch_size, num_true, dim]
        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim])
        row_wise_dots = math_ops.mul(
            array_ops.expand_dims(inputs, 1),
            array_ops.reshape(true_w, new_true_w_shape))
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat(0, [[-1], dim]))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix),
                                        [-1, num_true])
        true_b = array_ops.reshape(true_b, [-1, num_true])
        true_logits += true_b

        # Lookup weights and biases for sampled labels.
        #   sampled_w shape is [num_sampled, dim]
        #   sampled_b is a [num_sampled] float tensor
        sampled_w = array_ops.slice(
            all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]),
            [-1, -1])
        sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1])

        # inputs has shape [batch_size, dim]
        # sampled_w has shape [num_sampled, dim]
        # sampled_b has shape [num_sampled]
        # Apply X*W'+B, which yields [batch_size, num_sampled]
        sampled_logits = math_ops.matmul(inputs, sampled_w,
                                         transpose_b=True) + sampled_b

        if remove_accidental_hits:
            acc_hits = candidate_sampling_ops.compute_accidental_hits(
                labels, sampled, num_true=num_true)
            acc_indices, acc_ids, acc_weights = acc_hits

            # This is how SparseToDense expects the indices.
            acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
            acc_ids_2d_int32 = array_ops.reshape(
                math_ops.cast(acc_ids, dtypes.int32), [-1, 1])
            sparse_indices = array_ops.concat(
                1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices")
            # Create sampled_logits_shape = [batch_size, num_sampled]
            sampled_logits_shape = array_ops.concat(0, [
                array_ops.shape(labels)[:1],
                array_ops.expand_dims(num_sampled, 0)
            ])
            if sampled_logits.dtype != acc_weights.dtype:
                acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype)
            sampled_logits += sparse_ops.sparse_to_dense(
                sparse_indices,
                sampled_logits_shape,
                acc_weights,
                default_value=0.0,
                validate_indices=False)

        if subtract_log_q:
            # Subtract log of Q(l), prior probability that l appears in sampled.
            true_logits -= math_ops.log(true_expected_count)
            sampled_logits -= math_ops.log(sampled_expected_count)

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat(1, [true_logits, sampled_logits])
        # true_logits is a float tensor, ones_like(true_logits) is a float tensor
        # of ones. We then divide by num_true to ensure the per-example labels sum
        # to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat(1, [
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)
        ])

    return out_logits, out_labels
Пример #41
0
 def Body(it, cost):
     embedding = embedding_ops.embedding_lookup(
         embedding_matrix, [0])
     cost += math_ops.reduce_sum(embedding)
     return it + 1, cost
Пример #42
0
 def call(self, inputs):
     dtype = K.dtype(inputs)
     if dtype != 'int32' and dtype != 'int64':
         inputs = math_ops.cast(inputs, 'int32')
     out = embedding_ops.embedding_lookup(self.embeddings, inputs)
     return out
Пример #43
0
    def __init__(self,
                 cell,
                 embedding,
                 first_input,
                 end_token,
                 initial_state,
                 beam_width,
                 vocab_size=None,
                 output_fn=None,
                 length_penalty_weight=0.0):
        """Initialize BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_fn: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_fn` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
        if not isinstance(cell, core_rnn_cell.RNNCell):
            raise TypeError("cell must be an RNNCell, received: %s" %
                            type(cell))

        self._cell = cell
        self._output_fn = output_fn

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        if vocab_size is not None:
            if output_fn is not None:
                self._output_fn = output_fn
            else:
                self._output_fn = lambda cell_output: tf.contrib.layers.fully_connected(
                    inputs=cell_output,
                    num_outputs=vocab_size,
                    activation_fn=None)

        self._vocab_size = vocab_size

        self._output_size = self._vocab_size if self._vocab_size is not None else self._cell.output_size

        #--TODO
        #try:
        #self._batch_size = ops.convert_to_tensor(first_input.get_shape().as_list()[0])
        self._batch_size = first_input.shape[0].value
        #except Exception:
        if self._batch_size is None:
            self._batch_size = array_ops.shape(first_input)[0]

        self._beam_width = beam_width
        self._length_penalty_weight = length_penalty_weight
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._start_inputs = array_ops.tile(
            array_ops.expand_dims(first_input, 1), [1, self._beam_width, 1])
        self._finished = array_ops.zeros([self._batch_size, self._beam_width],
                                         dtype=dtypes.bool)
Пример #44
0
 def call(self, x):
     return embedding_ops.embedding_lookup(self.embedding, x)
Пример #45
0
    def __init__(self, sess, config, api, log_dir, forward, scope=None):
        self.vocab_size = 32
        self.sess = sess
        self.scope = scope
        self.sent_cell_size = config.sent_cell_size
        self.max_length = config.max_length

        with tf.name_scope("io"):
            # all dialog context and known attributes
            self.sensor = tf.placeholder(dtype=tf.int32,
                                         shape=(None, None),
                                         name="sensor")

            # target response given the dialog context
            self.output = tf.placeholder(dtype=tf.float32,
                                         shape=(None, ),
                                         name="output")

            # optimization related variables
            self.learning_rate = tf.Variable(float(config.init_lr),
                                             trainable=False,
                                             name="learning_rate")
            self.learning_rate_decay_op = self.learning_rate.assign(
                tf.multiply(self.learning_rate, config.lr_decay))
            self.global_t = tf.placeholder(dtype=tf.int32, name="global_t")

        with variable_scope.variable_scope("wordEmbedding"):
            self.embedding = tf.get_variable(
                "embedding", [self.vocab_size, config.embed_size],
                dtype=tf.float32)
            embedding_mask = tf.constant(
                [0 if i == 0 else 1 for i in range(self.vocab_size)],
                dtype=tf.float32,
                shape=[self.vocab_size, 1])
            embedding = self.embedding * embedding_mask

            input_embedding = embedding_ops.embedding_lookup(
                embedding, self.sensor)

            length_mask = tf.reduce_sum(tf.sign(
                tf.reduce_max(tf.abs(input_embedding), reduction_indices=2)),
                                        reduction_indices=1)
            length_mask = tf.to_int32(length_mask)
            mask = tf.sequence_mask(length_mask, self.max_length, tf.float32)

            one = tf.ones_like(mask)
            bias = one - mask
            bias = -100000 * bias

            if config.sent_type == "bow":
                pass
                # input_embedding, sent_size = get_bow(input_embedding)

            elif config.sent_type == "rnn":
                pass
                # sent_cell = self.get_rnncell("gru", self.sent_cell_size, config.keep_prob, 1)
                # input_embedding, sent_size = get_rnn_encode(input_embedding, sent_cell, scope="sent_rnn")

            elif config.sent_type == "bi_rnn":
                fwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                bwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                input_embedding, sent_size, hidden = get_bi_rnn_encode(
                    input_embedding,
                    fwd_sent_cell,
                    bwd_sent_cell,
                    scope="sent_bi_rnn")
                input_embedding = tf.expand_dims(input_embedding, 1)
                query = tf.get_variable("query", [config.att_size],
                                        dtype=tf.float32)
                #input_embedding = layers.fully_connected(input_embedding, config.att_size, activation_fn=None, biases_initializer=None, scope="att")
                hidden_project = layers.fully_connected(
                    hidden,
                    config.att_size,
                    activation_fn=None,
                    biases_initializer=None,
                    scope="att")
                vector_attn = tf.reduce_sum(tf.multiply(hidden_project, query),
                                            axis=2,
                                            keep_dims=True)
                bias = tf.expand_dims(bias, -1)
                attention_weights = tf.nn.softmax(vector_attn + bias, dim=1)
                self.weights = attention_weights
                attention = hidden * attention_weights
                feature = tf.reduce_sum(attention, 1)

            else:
                raise ValueError(
                    "Unknown sent_type. Must be one of [bow, rnn, bi_rnn]")

            # reshape input into dialogs

            if config.keep_prob < 1.0:
                feature = tf.nn.dropout(feature, config.keep_prob)

            # convert floors into 1 hot
            predict = layers.fully_connected(feature,
                                             1,
                                             activation_fn=None,
                                             scope="fc")
            self.predict = tf.squeeze(predict)
            self.loss = tf.losses.absolute_difference(self.output,
                                                      self.predict)

            tf.summary.scalar("loss", self.loss)

            self.summary_op = tf.summary.merge_all()
            self.optimize(sess, config, self.loss, log_dir)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2)
def generate_embedding_RNN_output(encoder_inputs,
                                  cell,
                                  num_encoder_symbols,
                                  word_embedding_size,
                                  embedding,
                                  num_heads=1,
                                  dtype=dtypes.float32,
                                  scope=None,
                                  initial_state_attention=False,
                                  sequence_length=None,
                                  bidirectional_rnn=False):
    """
  Generate RNN state outputs with word embeddings as inputs
      - Note that this example code does not include output label dependency modeling.
      One may add a loop function as in the rnn_decoder function in tf seq2seq.py
      example to feed emitted label embedding back to RNN state.
  """
    with variable_scope.variable_scope(scope
                                       or "generate_embedding_RNN_output"):
        if bidirectional_rnn:
            encoder_cell_fw = cell
            encoder_cell_bw = cell
            #embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            #n_symbol, embed_size = embedding.shape
            #X = variable_scope.get_variable("X", [embed_size, embed_size])
            #b = variable_scope.get_variable("b", [embed_size])
            #encoder_embedded_inputs = [tf.multiply(embedding_ops.embedding_lookup(embedding, encoder_input), X) + b for encoder_input in encoder_inputs]
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn(
                encoder_cell_fw,
                encoder_cell_bw,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat([
                array_ops.concat(encoder_state_fw, 1),
                array_ops.concat(encoder_state_bw, 1)
            ], 1)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size * 2])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(top_states, 1)
        else:
            encoder_cell = cell
            embedding = variable_scope.get_variable(
                "embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            encoder_outputs, encoder_state = rnn.rnn(
                encoder_cell,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat(encoder_state, 1)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(top_states, 1)

        return encoder_outputs, encoder_state, attention_states
Пример #47
0
    def _process_input_helper(self,
                              update_row_factors,
                              sp_input=None,
                              transpose_input=False,
                              row_weights=None):
        """Creates the graph for processing a sparse slice of input.

    Args:
      update_row_factors: if True, update or project the row_factors, else
        update or project the column factors.
      sp_input: Please refer to comments for update_row_factors,
        update_col_factors, project_row_factors, and project_col_factors for
        restrictions.
      transpose_input: If True, the input is logically transposed and then the
        corresponding rows/columns of the transposed input are updated.
      row_weights: If not None, this is the row/column weights to be used for
        the update or projection. If None, use the corresponding weights from
        the model. Note that the feature (column/row) weights will be
        determined by the model. When not None, it can either be a scalar or
        a rank-1 tensor with the same number of elements as the number of rows
        of columns to be updated/projected.

    Returns:
      A tuple consisting of the following elements:
      new_values: New values for the row/column factors.
      update_op: An op that assigns the newly computed values to the row/column
        factors.
      unregularized_loss: A tensor (scalar) that contains the normalized
        minibatch loss corresponding to sp_input, without the regularization
        term. Add the regularization term below to yield the loss.
      regularization: A tensor (scalar) that contains the normalized
        regularization term for the minibatch loss corresponding to sp_input.
      sum_weights: The sum of the weights corresponding to sp_input. This
        can be used with unregularized loss to calculate the root weighted
        squared error.
    """
        assert isinstance(sp_input, sparse_tensor.SparseTensor)

        if update_row_factors:
            left = self._row_factors
            right_factors = self._col_factors_cache
            row_wt = self._row_wt_cache
            col_wt = self._col_wt_cache
            total_rows = self._input_rows
            total_cols = self._input_cols
            sharding_func = WALSModel._get_sharding_func(
                self._input_rows, self._num_row_shards)
            gramian = self._col_gramian_cache
        else:
            left = self._col_factors
            right_factors = self._row_factors_cache
            row_wt = self._col_wt_cache
            col_wt = self._row_wt_cache
            total_rows = self._input_cols
            total_cols = self._input_rows
            sharding_func = WALSModel._get_sharding_func(
                self._input_cols, self._num_col_shards)
            gramian = self._row_gramian_cache
            transpose_input = not transpose_input

        # Note that the row indices of sp_input are based on the original full input
        # Here we reindex the rows and give them contiguous ids starting at 0.
        # We use tf.unique to achieve this reindexing. Note that this is done so
        # that the downstream kernel can assume that the input is "dense" along the
        # row dimension.
        row_ids, col_ids = array_ops.split(value=sp_input.indices,
                                           num_or_size_splits=2,
                                           axis=1)
        update_row_indices, all_row_ids = array_ops.unique(row_ids[:, 0])
        update_col_indices, all_col_ids = array_ops.unique(col_ids[:, 0])
        col_ids = array_ops.expand_dims(
            math_ops.cast(all_col_ids, dtypes.int64), 1)
        row_ids = array_ops.expand_dims(
            math_ops.cast(all_row_ids, dtypes.int64), 1)

        if transpose_input:
            update_indices = update_col_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_row_indices)[0], dtypes.int64)
            ]
            gather_indices = update_row_indices
        else:
            update_indices = update_row_indices
            row_shape = [
                math_ops.cast(
                    array_ops.shape(update_col_indices)[0], dtypes.int64)
            ]
            gather_indices = update_col_indices

        num_rows = math_ops.cast(
            array_ops.shape(update_indices)[0], dtypes.int64)
        col_shape = [num_rows]
        right = embedding_ops.embedding_lookup(right_factors,
                                               gather_indices,
                                               partition_strategy="div")
        new_sp_indices = array_ops.concat([row_ids, col_ids], 1)
        new_sp_shape = (array_ops.concat([row_shape, col_shape], 0)
                        if transpose_input else array_ops.concat(
                            [col_shape, row_shape], 0))
        new_sp_input = sparse_tensor.SparseTensor(indices=new_sp_indices,
                                                  values=sp_input.values,
                                                  dense_shape=new_sp_shape)

        # Compute lhs and rhs of the normal equations
        total_lhs = (self._unobserved_weight * gramian)
        if self._regularization_matrix is not None:
            total_lhs += self._regularization_matrix
        if self._row_weights is None:
            # Special case of ALS. Use a much simpler update rule.
            total_rhs = (self._unobserved_weight *
                         sparse_ops.sparse_tensor_dense_matmul(
                             new_sp_input, right, adjoint_a=transpose_input))
            # TODO(rmlarsen): handle transposing in tf.matrix_solve instead of
            # transposing explicitly.
            # TODO(rmlarsen): multi-thread tf.matrix_solve.
            new_left_values = array_ops.transpose(
                linalg_ops.matrix_solve(total_lhs,
                                        array_ops.transpose(total_rhs)))
        else:
            if row_weights is None:
                # TODO(yifanchen): Add special handling for single shard without using
                # embedding_lookup and perform benchmarks for those cases. Same for
                # col_weights lookup below.
                row_weights_slice = embedding_ops.embedding_lookup(
                    row_wt, update_indices, partition_strategy="div")
            else:
                num_indices = array_ops.shape(update_indices)[0]
                with ops.control_dependencies([
                        check_ops.assert_less_equal(
                            array_ops.rank(row_weights), 1)
                ]):
                    row_weights_slice = control_flow_ops.cond(
                        math_ops.equal(array_ops.rank(row_weights), 0), lambda:
                        (array_ops.ones([num_indices]) * row_weights),
                        lambda: math_ops.cast(row_weights, dtypes.float32))

            col_weights = embedding_ops.embedding_lookup(
                col_wt, gather_indices, partition_strategy="div")
            partial_lhs, total_rhs = (
                gen_factorization_ops.wals_compute_partial_lhs_and_rhs(
                    right,
                    col_weights,
                    self._unobserved_weight,
                    row_weights_slice,
                    new_sp_input.indices,
                    new_sp_input.values, [],
                    num_rows,
                    transpose_input,
                    name="wals_compute_partial_lhs_rhs"))
            total_lhs = array_ops.expand_dims(total_lhs, 0) + partial_lhs
            total_rhs = array_ops.expand_dims(total_rhs, -1)
            new_left_values = array_ops.squeeze(
                linalg_ops.matrix_solve(total_lhs, total_rhs), [2])

        update_op_name = "row_update" if update_row_factors else "col_update"
        update_op = self.scatter_update(left,
                                        update_indices,
                                        new_left_values,
                                        sharding_func,
                                        name=update_op_name)

        # Create the loss subgraph
        loss_sp_input = (sparse_ops.sparse_transpose(new_sp_input)
                         if transpose_input else new_sp_input)
        # sp_approx is the low rank estimate of the input matrix, formed by
        # computing the product <\\(u_i, v_j\\)> for (i, j) in loss_sp_input.indices.
        sp_approx_vals = gen_factorization_ops.masked_matmul(
            new_left_values,
            right,
            loss_sp_input.indices,
            transpose_a=False,
            transpose_b=True)
        sp_approx = sparse_tensor.SparseTensor(loss_sp_input.indices,
                                               sp_approx_vals,
                                               loss_sp_input.dense_shape)
        sp_approx_sq = math_ops.square(sp_approx)
        sp_residual = sparse_ops.sparse_add(loss_sp_input, sp_approx * (-1))
        sp_residual_sq = math_ops.square(sp_residual)
        row_wt_mat = (constant_op.constant(0.) if self._row_weights is None
                      else array_ops.expand_dims(row_weights_slice, 1))
        col_wt_mat = (constant_op.constant(0.) if self._col_weights is None
                      else array_ops.expand_dims(col_weights, 0))

        # We return the normalized loss
        partial_row_gramian = math_ops.matmul(new_left_values,
                                              new_left_values,
                                              transpose_a=True)
        normalization_factor = total_rows / math_ops.cast(
            num_rows, dtypes.float32)

        unregularized_loss = (
            self._unobserved_weight * (  # pyformat line break
                sparse_ops.sparse_reduce_sum(sp_residual_sq) -  # pyformat break
                sparse_ops.sparse_reduce_sum(sp_approx_sq) +  # pyformat break
                math_ops.trace(math_ops.matmul(partial_row_gramian, gramian)))
            + sparse_ops.sparse_reduce_sum(
                row_wt_mat *
                (sp_residual_sq * col_wt_mat))) * normalization_factor

        if self._regularization is not None:
            regularization = self._regularization * (
                math_ops.trace(partial_row_gramian) * normalization_factor +
                math_ops.trace(gramian))
        else:
            regularization = constant_op.constant(0.)

        sum_weights = self._unobserved_weight * math_ops.cast(
            total_rows * total_cols, dtypes.float32)
        if self._row_weights is not None and self._col_weights is not None:
            ones = sparse_tensor.SparseTensor(
                indices=loss_sp_input.indices,
                values=array_ops.ones(array_ops.shape(loss_sp_input.values)),
                dense_shape=loss_sp_input.dense_shape)
            sum_weights += sparse_ops.sparse_reduce_sum(
                row_wt_mat * (ones * col_wt_mat)) * normalization_factor

        return (new_left_values, update_op, unregularized_loss, regularization,
                sum_weights)
Пример #48
0
 def call(self, inputs):
     inputs = math_ops.cast(inputs, 'int32')
     return embedding_ops.embedding_lookup(self.embeddings, inputs)
Пример #49
0
 def testConstructionNonSharded(self):
     with ops.Graph().as_default():
         p = variables.Variable(
             array_ops.zeros(shape=[100, 100], dtype=dtypes.float32))
         ids = constant_op.constant([0, 1, 1, 7], dtype=dtypes.int32)
         embedding_ops.embedding_lookup([p], ids)
Пример #50
0
def _sampled_scattered_embedding_lookup(
    params, values, dimension=None, sampled_candidates=None, hash_key=None,
    name=None):
  """Looks up embeddings using parameter hashing for each value in `values`.

  This method looks up selected embedding dimensions if `sampled_candidates` is
  given, otherwise looks up all dimensions.

  The i-th embedding component of a value v in `values` is found by retrieving
  the weight whose index is a fingerprint of the pair (v,i).
  The concept is explored as "feature hashing" for model compression in this
  paper: http://arxiv.org/pdf/1504.04788.pdf

  Feature hashing has the pleasant effect of allowing us to compute an embedding
  without needing a pre-determined vocabulary, relieving some amount of process
  complexity. It also allows for us to maintain embeddings for possibly
  trillions of features with a fixed amount of memory.

  Note that this is superior to out-of-vocabulary shared "hash buckets" in that
  the embedding is extremely likely to be unique for each token as opposed to
  being shared across probably-colliding tokens. The price is that we must
  compute a hash once for each scalar in the token's embedding as opposed to
  once per token.

  If `params` is a list, it represents a partition of the embedding parameters.
  Each tensor in the list should have the same length, except for the first ones
  which may have an additional element. For instance 10 parameters can be
  partitioned in 4 tensors with length `[3, 3, 2, 2]`.

  Args:
    params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`.
      Each tensor must be of rank 1 with fully-defined shape.
    values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`.
    dimension: Embedding dimension. The user must specify either `dimension` or
      `sampled_candidates`.
    sampled_candidates: An optional `Tensor` of slice indices to keep along the
      final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is
      ignored. If `None`, looks up all candidates.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).
    name: An optional name for this op.

  Returns:
    A `Tensor` with shape `[d0, ..., dn, dimension]`.
    If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]`

  Raises:
    ValueError: if dimension is not positive or the partition size is invalid.
  """
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)
  if not isinstance(params, list):
    params = [params]

  with ops.name_scope(name, "scattered_embedding_lookup",
                      params + [dimension, values]):
    # Flatten the values
    values_shape = array_ops.shape(values)
    values = array_ops.reshape(values, [-1, 1])

    if sampled_candidates is None:
      if dimension is None:
        raise ValueError(
            "You must specify either dimension or sampled_candidates.")
      if dimension <= 0:
        raise ValueError("Dimension must be >0. Given is %d" % dimension)
      sampled_candidates = array_ops.tile(array_ops.expand_dims(
          math_ops.range(0, dimension), 0), array_ops.shape(values))
    else:
      dimension = array_ops.shape(sampled_candidates)[
          math_ops.subtract(array_ops.rank(sampled_candidates), 1)]
      sampled_candidates_shape = array_ops.shape(sampled_candidates)
      dimension_tensor = array_ops.reshape(dimension, shape=[1,])
      expected_shape = array_ops.concat([values_shape, dimension_tensor], 0)
      with ops.control_dependencies([control_flow_ops.Assert(
          math_ops.reduce_all(math_ops.equal(sampled_candidates_shape,
                                             expected_shape)),
          ["The shape of sampled_candidates: ", sampled_candidates_shape,
           " does not match the shape of values: ", values_shape])]):
        # Flatten sampled_candidates, same way as values are flattened.
        sampled_candidates = array_ops.reshape(sampled_candidates,
                                               [-1, dimension])

    num_partitions = len(params)
    partition_sizes = []
    for p in range(num_partitions):
      shape = params[p].get_shape()
      shape.assert_has_rank(1)
      shape.assert_is_fully_defined()
      partition_sizes.append(shape[0].value)
    num_params = sum(partition_sizes)  # Total number of parameters.

    # Assert the size of each partition.
    for p in range(num_partitions):
      expected_size = (num_params - p - 1) // num_partitions + 1
      if partition_sizes[p] != expected_size:
        raise ValueError("Tensor %d in params has size %d, expected %d." %
                         (p, partition_sizes[p], expected_size))

    # With two values v1 and v2 and 3 dimensions, we will cross
    # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]].
    tensors_to_cross = [sampled_candidates, values]
    ids = sparse_feature_cross_op.sparse_feature_cross(
        tensors_to_cross, hashed_output=True, num_buckets=num_params,
        hash_key=hash_key)
    ids = sparse_ops.sparse_tensor_to_dense(ids)

    # No need to validate the indices since we have checked the params
    # dimensions and we know the largest id.
    result = embedding_ops.embedding_lookup(
        params, ids, partition_strategy="div")

    return array_ops.reshape(result,
                             array_ops.concat([values_shape, [dimension]], 0))
Пример #51
0
 def f():
     tape.watch_variable(embedding)
     embedded_x = embedding_ops.embedding_lookup(embedding, x)
     return constant_op.constant(1.0, dtypes.float32) - embedded_x
Пример #52
0
    def __init__(self, sess, config, api, log_dir, forward, scope=None):
        self.vocab = api.vocab
        self.rev_vocab = api.rev_vocab
        self.vocab_size = len(self.vocab)
        self.topic_vocab = api.topic_vocab
        self.topic_vocab_size = len(self.topic_vocab)
        self.da_vocab = api.dialog_act_vocab
        self.da_vocab_size = len(self.da_vocab)
        self.sess = sess
        self.scope = scope
        self.max_utt_len = config.max_utt_len
        self.go_id = self.rev_vocab["<s>"]
        self.eos_id = self.rev_vocab["</s>"]
        self.context_cell_size = config.cxt_cell_size
        self.sent_cell_size = config.sent_cell_size
        self.dec_cell_size = config.dec_cell_size

        with tf.name_scope("io"):
            # all dialog context and known attributes
            self.input_contexts = tf.placeholder(dtype=tf.int32,
                                                 shape=(None, None,
                                                        self.max_utt_len),
                                                 name="dialog_context")
            self.floors = tf.placeholder(dtype=tf.int32,
                                         shape=(None, None),
                                         name="floor")
            self.context_lens = tf.placeholder(dtype=tf.int32,
                                               shape=(None, ),
                                               name="context_lens")
            self.topics = tf.placeholder(dtype=tf.int32,
                                         shape=(None, ),
                                         name="topics")
            self.my_profile = tf.placeholder(dtype=tf.float32,
                                             shape=(None, 4),
                                             name="my_profile")
            self.ot_profile = tf.placeholder(dtype=tf.float32,
                                             shape=(None, 4),
                                             name="ot_profile")

            # target response given the dialog context
            self.output_tokens = tf.placeholder(dtype=tf.int32,
                                                shape=(None, None),
                                                name="output_token")
            self.output_lens = tf.placeholder(dtype=tf.int32,
                                              shape=(None, ),
                                              name="output_lens")
            self.output_das = tf.placeholder(dtype=tf.int32,
                                             shape=(None, ),
                                             name="output_dialog_acts")

            # optimization related variables
            self.learning_rate = tf.Variable(float(config.init_lr),
                                             trainable=False,
                                             name="learning_rate")
            self.learning_rate_decay_op = self.learning_rate.assign(
                tf.multiply(self.learning_rate, config.lr_decay))
            self.global_t = tf.placeholder(dtype=tf.int32, name="global_t")
            self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

        max_dialog_len = array_ops.shape(
            self.input_contexts)[1]  #shape: Returns the shape of a tensor.
        max_out_len = array_ops.shape(self.output_tokens)[1]
        batch_size = array_ops.shape(self.input_contexts)[0]

        with variable_scope.variable_scope("topicEmbedding"):
            t_embedding = tf.get_variable(
                "embedding", [self.topic_vocab_size, config.topic_embed_size],
                dtype=tf.float32)
            topic_embedding = embedding_ops.embedding_lookup(
                t_embedding, self.topics)

        if config.use_hcf:
            with variable_scope.variable_scope("dialogActEmbedding"):
                d_embedding = tf.get_variable(
                    "embedding", [self.da_vocab_size, config.da_embed_size],
                    dtype=tf.float32)
                da_embedding = embedding_ops.embedding_lookup(
                    d_embedding, self.output_das)
                #about embedding_lookup: http://blog.csdn.net/u013041398/article/details/60955847

        with variable_scope.variable_scope("wordEmbedding"):
            self.embedding = tf.get_variable(
                "embedding", [self.vocab_size, config.embed_size],
                dtype=tf.float32)
            embedding_mask = tf.constant(
                [0 if i == 0 else 1 for i in range(self.vocab_size)],
                dtype=tf.float32,
                shape=[self.vocab_size, 1])  #??????????????
            embedding = self.embedding * embedding_mask
            #Whether or not the input value0 is a special "padding" value that should be masked out.
            # This is useful for recurrent layers which may take variable length input.
            #maybe need to be changed

            input_embedding = embedding_ops.embedding_lookup(
                embedding,
                tf.reshape(self.input_contexts,
                           [-1]))  # pass '[-1]' to flatten input_contexts
            input_embedding = tf.reshape(
                input_embedding, [-1, self.max_utt_len, config.embed_size])
            output_embedding = embedding_ops.embedding_lookup(
                embedding, self.output_tokens)

            if config.sent_type == "bow":
                input_embedding, sent_size = get_bow(input_embedding)
                output_embedding, _ = get_bow(output_embedding)

            elif config.sent_type == "rnn":
                sent_cell = self.get_rnncell("gru", self.sent_cell_size,
                                             config.keep_prob, 1)
                input_embedding, sent_size = get_rnn_encode(input_embedding,
                                                            sent_cell,
                                                            scope="sent_rnn")
                output_embedding, _ = get_rnn_encode(output_embedding,
                                                     sent_cell,
                                                     self.output_lens,
                                                     scope="sent_rnn",
                                                     reuse=True)
            elif config.sent_type == "bi_rnn":
                fwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                bwd_sent_cell = self.get_rnncell("gru",
                                                 self.sent_cell_size,
                                                 keep_prob=1.0,
                                                 num_layer=1)
                input_embedding, sent_size = get_bi_rnn_encode(
                    input_embedding,
                    fwd_sent_cell,
                    bwd_sent_cell,
                    scope="sent_bi_rnn")
                output_embedding, _ = get_bi_rnn_encode(output_embedding,
                                                        fwd_sent_cell,
                                                        bwd_sent_cell,
                                                        self.output_lens,
                                                        scope="sent_bi_rnn",
                                                        reuse=True)
            else:
                raise ValueError(
                    "Unknown sent_type. Must be one of [bow, rnn, bi_rnn]")

            # reshape input into dialogs
            input_embedding = tf.reshape(input_embedding,
                                         [-1, max_dialog_len, sent_size])
            if config.keep_prob < 1.0:
                input_embedding = tf.nn.dropout(input_embedding,
                                                config.keep_prob)

            # convert floors into 1 hot
            floor_one_hot = tf.one_hot(tf.reshape(self.floors, [-1]),
                                       depth=2,
                                       dtype=tf.float32)
            floor_one_hot = tf.reshape(floor_one_hot, [-1, max_dialog_len, 2])

            joint_embedding = tf.concat([input_embedding, floor_one_hot], 2,
                                        "joint_embedding")

        with variable_scope.variable_scope("contextRNN"):
            enc_cell = self.get_rnncell(config.cell_type,
                                        self.context_cell_size,
                                        keep_prob=1.0,
                                        num_layer=config.num_layer)
            # and enc_last_state will be same as the true last state
            _, enc_last_state = tf.nn.dynamic_rnn(
                enc_cell,
                joint_embedding,
                dtype=tf.float32,
                sequence_length=self.context_lens)

            if config.num_layer > 1:
                enc_last_state = tf.concat(enc_last_state, 1)

        # combine with other attributes
        if config.use_hcf:
            attribute_embedding = da_embedding
            attribute_fc1 = layers.fully_connected(attribute_embedding,
                                                   30,
                                                   activation_fn=tf.tanh,
                                                   scope="attribute_fc1")

        cond_list = [
            topic_embedding, self.my_profile, self.ot_profile, enc_last_state
        ]
        cond_embedding = tf.concat(
            cond_list, 1)  ##context and some meta data, such as topic

        #introduce a recognition network q(phi)(z|x, c, y) to approximate the true posterior distribution p(z|x, c, y)
        with variable_scope.variable_scope("recognitionNetwork"):
            if config.use_hcf:
                recog_input = tf.concat(
                    [cond_embedding, output_embedding, attribute_fc1],
                    1)  #c: cond, x: output, y: attribute
            else:
                recog_input = tf.concat([cond_embedding, output_embedding], 1)
            self.recog_mulogvar = recog_mulogvar = layers.fully_connected(
                recog_input,
                config.latent_size * 2,
                activation_fn=None,
                scope="muvar")
            recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=1)

        with variable_scope.variable_scope("priorNetwork"):
            # P(XYZ)=P(Z|X)P(X)P(Y|X,Z)
            prior_fc1 = layers.fully_connected(
                cond_embedding,
                int(np.maximum(config.latent_size * 2, 100)),
                activation_fn=tf.tanh,
                scope="fc1")
            prior_mulogvar = layers.fully_connected(prior_fc1,
                                                    config.latent_size * 2,
                                                    activation_fn=None,
                                                    scope="muvar")
            prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=1)

            # use sampled Z or posterior Z
            latent_sample = tf.cond(
                self.use_prior,
                lambda: sample_gaussian(prior_mu, prior_logvar),
                lambda: sample_gaussian(recog_mu, recog_logvar))

        with variable_scope.variable_scope("generationNetwork"):
            gen_inputs = tf.concat([cond_embedding, latent_sample], 1)

            # BOW loss
            bow_fc1 = layers.fully_connected(gen_inputs,
                                             400,
                                             activation_fn=tf.tanh,
                                             scope="bow_fc1")
            if config.keep_prob < 1.0:
                bow_fc1 = tf.nn.dropout(bow_fc1, config.keep_prob)
            self.bow_logits = layers.fully_connected(bow_fc1,
                                                     self.vocab_size,
                                                     activation_fn=None,
                                                     scope="bow_project")

            # Y loss
            if config.use_hcf:
                meta_fc1 = layers.fully_connected(gen_inputs,
                                                  400,
                                                  activation_fn=tf.tanh,
                                                  scope="meta_fc1")
                if config.keep_prob < 1.0:
                    meta_fc1 = tf.nn.dropout(meta_fc1, config.keep_prob)
                self.da_logits = layers.fully_connected(meta_fc1,
                                                        self.da_vocab_size,
                                                        scope="da_project")
                da_prob = tf.nn.softmax(self.da_logits)
                pred_attribute_embedding = tf.matmul(da_prob, d_embedding)
                if forward:
                    selected_attribute_embedding = pred_attribute_embedding
                else:
                    selected_attribute_embedding = attribute_embedding
                dec_inputs = tf.concat(
                    [gen_inputs, selected_attribute_embedding], 1)
            else:
                self.da_logits = tf.zeros((batch_size, self.da_vocab_size))
                dec_inputs = gen_inputs

            # Decoder
            if config.num_layer > 1:
                dec_init_state = [
                    layers.fully_connected(dec_inputs,
                                           self.dec_cell_size,
                                           activation_fn=None,
                                           scope="init_state-%d" % i)
                    for i in range(config.num_layer)
                ]
                dec_init_state = tuple(dec_init_state)
            else:
                dec_init_state = layers.fully_connected(dec_inputs,
                                                        self.dec_cell_size,
                                                        activation_fn=None,
                                                        scope="init_state")

        with variable_scope.variable_scope("decoder"):
            dec_cell = self.get_rnncell(config.cell_type, self.dec_cell_size,
                                        config.keep_prob, config.num_layer)
            dec_cell = OutputProjectionWrapper(dec_cell, self.vocab_size)

            if forward:
                loop_func = decoder_fn_lib.context_decoder_fn_inference(
                    None,
                    dec_init_state,
                    embedding,
                    start_of_sequence_id=self.go_id,
                    end_of_sequence_id=self.eos_id,
                    maximum_length=self.max_utt_len,
                    num_decoder_symbols=self.vocab_size,
                    context_vector=selected_attribute_embedding)
                dec_input_embedding = None
                dec_seq_lens = None
            else:
                loop_func = decoder_fn_lib.context_decoder_fn_train(
                    dec_init_state, selected_attribute_embedding)
                dec_input_embedding = embedding_ops.embedding_lookup(
                    embedding, self.output_tokens)
                dec_input_embedding = dec_input_embedding[:, 0:-1, :]
                dec_seq_lens = self.output_lens - 1

                if config.keep_prob < 1.0:
                    dec_input_embedding = tf.nn.dropout(
                        dec_input_embedding, config.keep_prob)

                # apply word dropping. Set dropped word to 0
                if config.dec_keep_prob < 1.0:
                    keep_mask = tf.less_equal(
                        tf.random_uniform((batch_size, max_out_len - 1),
                                          minval=0.0,
                                          maxval=1.0), config.dec_keep_prob)
                    keep_mask = tf.expand_dims(tf.to_float(keep_mask), 2)
                    dec_input_embedding = dec_input_embedding * keep_mask
                    dec_input_embedding = tf.reshape(
                        dec_input_embedding,
                        [-1, max_out_len - 1, config.embed_size])

            dec_outs, _, final_context_state = dynamic_rnn_decoder(
                dec_cell,
                loop_func,
                inputs=dec_input_embedding,
                sequence_length=dec_seq_lens)
            if final_context_state is not None:
                final_context_state = final_context_state[:, 0:array_ops.
                                                          shape(dec_outs)[1]]
                mask = tf.to_int32(tf.sign(tf.reduce_max(dec_outs, axis=2)))
                self.dec_out_words = tf.multiply(
                    tf.reverse(final_context_state, axis=[1]), mask)
            else:
                self.dec_out_words = tf.argmax(dec_outs, 2)

        if not forward:
            with variable_scope.variable_scope("loss"):
                labels = self.output_tokens[:, 1:]
                label_mask = tf.to_float(tf.sign(labels))

                rc_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=dec_outs, labels=labels)
                rc_loss = tf.reduce_sum(rc_loss * label_mask,
                                        reduction_indices=1)
                self.avg_rc_loss = tf.reduce_mean(rc_loss)
                # used only for perpliexty calculation. Not used for optimzation
                self.rc_ppl = tf.exp(
                    tf.reduce_sum(rc_loss) / tf.reduce_sum(label_mask))
                """ as n-trial multimodal distribution. """
                tile_bow_logits = tf.tile(tf.expand_dims(self.bow_logits, 1),
                                          [1, max_out_len - 1, 1])
                bow_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=tile_bow_logits, labels=labels) * label_mask
                bow_loss = tf.reduce_sum(bow_loss, reduction_indices=1)
                self.avg_bow_loss = tf.reduce_mean(bow_loss)

                # reconstruct the meta info about X
                if config.use_hcf:
                    da_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.da_logits, labels=self.output_das)
                    self.avg_da_loss = tf.reduce_mean(da_loss)
                else:
                    self.avg_da_loss = 0.0

                kld = gaussian_kld(recog_mu, recog_logvar, prior_mu,
                                   prior_logvar)
                self.avg_kld = tf.reduce_mean(kld)
                if log_dir is not None:
                    kl_weights = tf.minimum(
                        tf.to_float(self.global_t) / config.full_kl_step, 1.0)
                else:
                    kl_weights = tf.constant(1.0)

                self.kl_w = kl_weights
                self.elbo = self.avg_rc_loss + kl_weights * self.avg_kld
                aug_elbo = self.avg_bow_loss + self.avg_da_loss + self.elbo

                tf.summary.scalar("da_loss", self.avg_da_loss)
                tf.summary.scalar("rc_loss", self.avg_rc_loss)
                tf.summary.scalar("elbo", self.elbo)
                tf.summary.scalar("kld", self.avg_kld)
                tf.summary.scalar("bow_loss", self.avg_bow_loss)

                self.summary_op = tf.summary.merge_all()

                self.log_p_z = norm_log_liklihood(latent_sample, prior_mu,
                                                  prior_logvar)
                self.log_q_z_xy = norm_log_liklihood(latent_sample, recog_mu,
                                                     recog_logvar)
                self.est_marginal = tf.reduce_mean(rc_loss + bow_loss -
                                                   self.log_p_z +
                                                   self.log_q_z_xy)

            self.optimize(sess, config, aug_elbo, log_dir)

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2)
Пример #53
0
 def testRaggedMaxNorm(self):
   embeddings = constant_op.constant([[2.0]])
   ids = ragged_factory_ops.constant([[0, 0], [0]], dtype=dtypes.int32)
   embedding = embedding_ops.embedding_lookup([embeddings], ids, max_norm=1.0)
   self.assertAllEqual(embedding, [[[1.0], [1.0]], [[1.0]]])
Пример #54
0
    def __init__(self,
                 cell,
                 embedding,
                 start_tokens,
                 end_token,
                 initial_state,
                 beam_width,
                 output_layer=None,
                 length_penalty_weight=0.0,
                 coverage_penalty_weight=0.0,
                 reorder_tensor_arrays=True):
        """Initialize the BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
      coverage_penalty_weight: Float weight to penalize the coverage of source
        sentence. Disabled with 0.0.
      reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell
        state will be reordered according to the beam search path. If the
        `TensorArray` can be reordered, the stacked form will be returned.
        Otherwise, the `TensorArray` will be returned as is. Set this flag to
        `False` if the cell state contains `TensorArray`s that are not amenable
        to reordering.

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_layer` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
        rnn_cell_impl.assert_like_rnncell("cell", cell)  # pylint: disable=protected-access
        if (output_layer is not None
                and not isinstance(output_layer, layers_base.Layer)):
            raise TypeError("output_layer must be a Layer, received: %s" %
                            type(output_layer))
        self._cell = cell
        self._output_layer = output_layer
        self._reorder_tensor_arrays = reorder_tensor_arrays

        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(start_tokens,
                                                   dtype=dtypes.int32,
                                                   name="start_tokens")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        self._batch_size = array_ops.size(start_tokens)
        self._beam_width = beam_width
        self._length_penalty_weight = length_penalty_weight
        self._coverage_penalty_weight = coverage_penalty_weight
        self._initial_cell_state = nest.map_structure(
            self._maybe_split_batch_beams, initial_state,
            self._cell.state_size)
        self._start_tokens = array_ops.tile(
            array_ops.expand_dims(self._start_tokens, 1),
            [1, self._beam_width])
        self._start_inputs = self._embedding_fn(self._start_tokens)

        self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size],
                                                           dtype=dtypes.int32),
                                           depth=self._beam_width,
                                           on_value=False,
                                           off_value=True,
                                           dtype=dtypes.bool)
Пример #55
0
def embedding_tied_rnn_seq2seq(encoder_inputs,
                               decoder_inputs,
                               cell,
                               num_symbols,
                               output_projection=None,
                               feed_previous=False,
                               dtype=dtypes.float32,
                               scope=None):
    """Embedding RNN sequence-to-sequence model with tied (shared) parameters.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_symbols x cell.input_size]). Then it runs an RNN to encode embedded
  encoder_inputs into a state vector. Next, it embeds decoder_inputs using
  the same embedding. Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_symbols: integer; number of symbols for both encoder and decoder.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the initial RNN states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_tied_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when output_projection has the wrong shape.
  """
    if output_projection is not None:
        proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
        proj_weights.get_shape().assert_is_compatible_with(
            [cell.output_size, num_symbols])
        proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
        proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    with vs.variable_scope(scope or "embedding_tied_rnn_seq2seq"):
        with ops.device("/cpu:0"):
            embedding = vs.get_variable("embedding",
                                        [num_symbols, cell.input_size])

        emb_encoder_inputs = [
            embedding_ops.embedding_lookup(embedding, x)
            for x in encoder_inputs
        ]
        emb_decoder_inputs = [
            embedding_ops.embedding_lookup(embedding, x)
            for x in decoder_inputs
        ]

        def extract_argmax_and_embed(prev, _):
            """Loop_function that extracts the symbol from prev and embeds it."""
            if output_projection is not None:
                prev = nn_ops.xw_plus_b(prev, output_projection[0],
                                        output_projection[1])
            prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
            return embedding_ops.embedding_lookup(embedding, prev_symbol)

        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols)

        if isinstance(feed_previous, bool):
            loop_function = extract_argmax_and_embed if feed_previous else None
            return tied_rnn_seq2seq(emb_encoder_inputs,
                                    emb_decoder_inputs,
                                    cell,
                                    loop_function=loop_function,
                                    dtype=dtype)
        else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
            outputs1, states1 = tied_rnn_seq2seq(
                emb_encoder_inputs,
                emb_decoder_inputs,
                cell,
                loop_function=extract_argmax_and_embed,
                dtype=dtype)
            vs.get_variable_scope().reuse_variables()
            outputs2, states2 = tied_rnn_seq2seq(emb_encoder_inputs,
                                                 emb_decoder_inputs,
                                                 cell,
                                                 dtype=dtype)

            outputs = control_flow_ops.cond(feed_previous, lambda: outputs1,
                                            lambda: outputs2)
            states = control_flow_ops.cond(feed_previous, lambda: states1,
                                           lambda: states2)
            return outputs, states
  def __init__(self,
               cell,
               embedding,
               start_tokens,
               end_token,
               initial_state,
               beam_width,
               output_layer=None,
               length_penalty_weight=0.0,
               positional_embedding=None):
    """Initialize the BeamSearchDecoder.

    Args:
      cell: An `RNNCell` instance.
      embedding: A callable that takes a vector tensor of `ids` (argmax ids),
        or the `params` argument for `embedding_lookup`.
      start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
      end_token: `int32` scalar, the token that marks end of decoding.
      initial_state: A (possibly nested tuple of...) tensors and TensorArrays.
      beam_width:  Python integer, the number of beams.
      output_layer: (Optional) An instance of `tf.layers.Layer`, i.e.,
        `tf.layers.Dense`.  Optional layer to apply to the RNN output prior
        to storing the result or sampling.
      length_penalty_weight: Float weight to penalize length. Disabled with 0.0.
      positional_embedding: A callable to use decoder positional embedding.
      Default is None in which case positional embedding is disabled

    Raises:
      TypeError: if `cell` is not an instance of `RNNCell`,
        or `output_layer` is not an instance of `tf.layers.Layer`.
      ValueError: If `start_tokens` is not a vector or
        `end_token` is not a scalar.
    """
    rnn_cell_impl.assert_like_rnncell("cell", cell)
    if (output_layer is not None and
            not isinstance(output_layer, layers_base.Layer)):
      raise TypeError(
          "output_layer must be a Layer, received: %s" % type(output_layer))
    self._cell = cell
    self._output_layer = output_layer

    if callable(embedding):
      self._embedding_fn = embedding
    else:
      self._embedding_fn = (
          lambda ids: embedding_ops.embedding_lookup(embedding, ids))

    self._use_pos_embedding = False
    if positional_embedding is not None:
      if callable(positional_embedding):
        self._pos_embedding_fn = positional_embedding
      else:
        self._pos_embedding_fn = (
            lambda ids: embedding_ops.embedding_lookup(positional_embedding, ids))
      self._use_pos_embedding = True

    self._start_tokens = ops.convert_to_tensor(
        start_tokens, dtype=dtypes.int32, name="start_tokens")
    if self._start_tokens.get_shape().ndims != 1:
      raise ValueError("start_tokens must be a vector")
    self._end_token = ops.convert_to_tensor(
        end_token, dtype=dtypes.int32, name="end_token")
    if self._end_token.get_shape().ndims != 0:
      raise ValueError("end_token must be a scalar")

    self._batch_size = array_ops.size(start_tokens)
    self._beam_width = beam_width
    self._length_penalty_weight = length_penalty_weight
    self._initial_cell_state = nest.map_structure(
        self._maybe_split_batch_beams, initial_state, self._cell.state_size)
    self._start_tokens = array_ops.tile(
        array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width])
    self._start_inputs = self._embedding_fn(self._start_tokens)

    if self._use_pos_embedding:
      self._start_inputs += self._pos_embedding_fn(ops.convert_to_tensor(0))

    self._finished = array_ops.one_hot(
        array_ops.zeros([self._batch_size], dtype=dtypes.int32),
        depth=self._beam_width,
        on_value=False,
        off_value=True,
        dtype=dtypes.bool)
Пример #57
0
def embedding_attention_decoder(decoder_inputs,
                                initial_state,
                                attention_states,
                                cell,
                                num_symbols,
                                num_heads=1,
                                output_size=None,
                                output_projection=None,
                                feed_previous=False,
                                dtype=dtypes.float32,
                                scope=None):
    """RNN decoder with embedding and attention and a pure-decoding option.

  Args:
    decoder_inputs: a list of 1D batch-sized int32-Tensors (decoder inputs).
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function.
    num_symbols: integer, how many symbols come into the embedding.
    num_heads: number of attention heads that read from attention_states.
    output_size: size of the output vectors; if None, use cell.output_size.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has shape
      [num_symbols]; if provided and feed_previous=True, each fed previous
      output will first be multiplied by W and added B.
    feed_previous: Boolean; if True, only the first of decoder_inputs will be
      used (the "GO" symbol), and all other decoder inputs will be generated by:
        next = embedding_lookup(embedding, argmax(previous_output)),
      In effect, this implements a greedy decoder. It can also be used
      during training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the RNN initial states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_decoder".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x output_size] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when output_projection has the wrong shape.
  """
    if output_size is None:
        output_size = cell.output_size
    if output_projection is not None:
        proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
        proj_weights.get_shape().assert_is_compatible_with(
            [cell.output_size, num_symbols])
        proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
        proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    with vs.variable_scope(scope or "embedding_attention_decoder"):
        with ops.device("/cpu:0"):
            embedding = vs.get_variable("embedding",
                                        [num_symbols, cell.input_size])

        def extract_argmax_and_embed(prev, _):
            """Loop_function that extracts the symbol from prev and embeds it."""
            if output_projection is not None:
                prev = nn_ops.xw_plus_b(prev, output_projection[0],
                                        output_projection[1])
            prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
            emb_prev = embedding_ops.embedding_lookup(embedding, prev_symbol)
            return emb_prev

        loop_function = None
        if feed_previous:
            loop_function = extract_argmax_and_embed

        emb_inp = [
            embedding_ops.embedding_lookup(embedding, i)
            for i in decoder_inputs
        ]
        return attention_decoder(emb_inp,
                                 initial_state,
                                 attention_states,
                                 cell,
                                 output_size=output_size,
                                 num_heads=num_heads,
                                 loop_function=loop_function)
Пример #58
0
 def loss():
     pred = math_ops.matmul(
         embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
     return pred * pred
Пример #59
0
def dynamic_distraction_m2_seq2seq(encoder_inputs,
                                   decoder_inputs,
                                   query_inputs,
                                   cell_encoder_fw,
                                   cell_encoder_bw,
                                   distraction_cell,
                                   num_encoder_symbols,
                                   num_decoder_symbols,
                                   embedding_size,
                                   initial_embedding=None,
                                   num_heads=1,
                                   embedding_trainable=False,
                                   output_projection=None,
                                   feed_previous=False,
                                   dtype=None,
                                   scope=None,
                                   initial_state_attention=False):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with variable_scope.variable_scope(scope
                                       or "dynamic_distraction_m2_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        """encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    """
        if initial_embedding is not None:
            embedding = variable_scope.get_variable(
                'embedding',
                initializer=initial_embedding,
                trainable=embedding_trainable)

        else:
            embedding = variable_scope.get_variable(
                'embedding', [num_encoder_symbols, embedding_size],
                trainable=embedding_trainable)

        embedded_inputs = embedding_ops.embedding_lookup(
            embedding, encoder_inputs)

        embedded_inputs = array_ops.unpack(embedded_inputs)

        query_embeddings = embedding_ops.embedding_lookup(
            embedding, query_inputs)

        query_embeddings = array_ops.unpack(query_embeddings)

        print("Embedded Inputs length:", len(embedded_inputs))

        print("Shape in embedded inputs:", embedded_inputs[0].get_shape())

        with variable_scope.variable_scope("Encoder_Cell"):
            encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.bidirectional_rnn(
                cell_encoder_fw, cell_encoder_bw, embedded_inputs, dtype=dtype)

        with variable_scope.variable_scope("Query_Cell"):

            query_outputs, query_state_fw, query_state_bw = rnn.bidirectional_rnn(
                cell_encoder_fw,
                cell_encoder_bw,
                query_embeddings,
                dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.

        encoder_state = array_ops.concat(1,
                                         [encoder_state_fw, encoder_state_bw])
        query_state = array_ops.concat(1, [query_state_fw, query_state_bw])

        top_states_encoder = [
            array_ops.reshape(e, [-1, 1, 2 * cell_encoder_fw.output_size])
            for e in encoder_outputs
        ]
        attention_states_encoder = array_ops.concat(1, top_states_encoder)

        top_states_query = [
            array_ops.reshape(e, [-1, 1, 2 * cell_encoder_fw.output_size])
            for e in query_outputs
        ]

        attention_states_query = array_ops.concat(1, top_states_query)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell_encoder_fw = rnn_cell.OutputProjectionWrapper(
                cell_encoder_fw, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return dynamic_distraction_m2_decoder_wrapper(
                decoder_inputs,
                initial_state=encoder_state,
                attention_state=attention_states_encoder,
                attention_states_query=attention_states_query,
                cell_encoder=cell_encoder_fw,
                num_symbols=num_decoder_symbols,
                embedding_size=embedding_size,
                distract_initial_state=encoder_state,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                embedding_scope=scope,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):

            reuse = None if feed_previous_bool else True

            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:

                outputs, state = dynamic_distraction_m2_decoder_wrapper(
                    decoder_inputs,
                    initial_state=encoder_state,
                    attention_states=attention_states_encoder,
                    attention_states_query=attention_states_query,
                    cell_encoder=cell_encoder_fw,
                    num_symbols=num_decoder_symbols,
                    embedding_size=embedding_size,
                    distract_initial_state=encoder_state,
                    distraction_cell=distraction_cell,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    embedding_scope=scope,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)

                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)

                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
 def func(self, x):
     return embedding_ops.embedding_lookup([self._var0, self._var1],
                                           x)