def test_batch_mean(self): correct = np.array([-2. / 3, 1., 21. / 4]) with clean_session(): array = tf.constant([ [1, -8, 5, 4, 9], [0, 2, 7, 8, 1], [2, -8, 6, 4, 9], ], dtype=tf.float32) mask = tf.constant([ [1, 1, 1, 0, 0], [1, 1, 0, 0, 0], [1, 0, 1, 1, 1], ], dtype=tf.float32) bad_mask = tf.constant([ [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], [1, 0, 1, 1, 1], ], dtype=tf.float32) bm = reduce_mean(SequenceBatch(array, mask)) assert_almost_equal(bm.eval(), correct, decimal=5) bm2 = reduce_mean(SequenceBatch(array, bad_mask)) with pytest.raises(InvalidArgumentError): bm2.eval() # try allow_empty option bm3 = reduce_mean(SequenceBatch(array, bad_mask), allow_empty=True) assert_almost_equal(bm3.eval(), np.array([-2. / 3, 0., 21. / 4]))
def __init__(self, memory_cells, query, project_query=False): """Define Attention. Args: memory_cells (SequenceBatch): a SequenceBatch containing a Tensor of shape (batch_size, num_cells, cell_dim) query (Tensor): a tensor of shape (batch_size, query_dim). project_query (bool): defaults to False. If True, the query goes through an extra projection layer to coerce it to cell_dim. """ cell_dim = memory_cells.values.get_shape().as_list()[2] if project_query: # project the query up/down to cell_dim self._projection_layer = Dense(cell_dim, activation='linear') query = self._projection_layer(query) # (batch_size, cand_dim) memory_values, memory_mask = memory_cells.values, memory_cells.mask # batch matrix multiply to compute logit scores for all choices in all batches query = tf.expand_dims(query, 2) # (batch_size, cell_dim, 1) logit_values = tf.batch_matmul(memory_values, query) # (batch_size, num_cells, 1) logit_values = tf.squeeze(logit_values, [2]) # (batch_size, num_cells) # set all pad logits to negative infinity logits = SequenceBatch(logit_values, memory_mask) logits = logits.with_pad_value(-float('inf')) # normalize to get probs probs = tf.nn.softmax(logits.values) # (batch_size, num_cells) retrieved = tf.batch_matmul(tf.expand_dims(probs, 1), memory_values) # (batch_size, 1, cell_dim) retrieved = tf.squeeze(retrieved, [1]) # (batch_size, cell_dim) self._logits = logits.values self._probs = probs self._retrieved = retrieved
def test(self): npa = lambda arr: np.array(arr, dtype=np.float32) correct = npa([ npa([3, 5, 7]), npa([3, 5, 7]), npa([9, 9, 9]), ]) with clean_session(): array = tf.constant( [[[1., 2., 3.], [3., 5., 7.], [100., 200., 2000.]], [[2., 4., 6.], [3., 5., 7.], [3., 5., 7.]], [[9., 9., 9.], [3., 5., 7.], [1., 2., 3.]]], dtype=tf.float32) mask = tf.constant([ [1, 1, 0], [1, 1, 1], [1, 1, 1], ], dtype=tf.float32) bm = reduce_max(SequenceBatch(array, mask)) assert_almost_equal(bm.eval(), correct, decimal=5) bad_mask = tf.constant([ [0, 0, 0], [1, 1, 1], [1, 1, 1], ], dtype=tf.float32) bm2 = reduce_mean(SequenceBatch(array, bad_mask)) with pytest.raises(InvalidArgumentError): bm2.eval()
def embed_sequences(self, embed_sequence_batch): """Return sentence embeddings as a tensor with with shape [batch_size, hidden_size * 2] """ forward_values = embed_sequence_batch.values forward_mask = embed_sequence_batch.mask backward_values = tf.reverse(forward_values, [False, True, False]) backward_mask = tf.reverse(forward_mask, [False, True]) # Initialize LSTMs self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) self._backward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) forward_seq = self._forward_lstm(forward_values, forward_mask) forward_seq.set_shape((None, self.seq_length, self.hidden_size)) backward_seq = self._backward_lstm(backward_values, backward_mask) backward_seq.set_shape((None, self.seq_length, self.hidden_size)) # Stitch the outputs together --> hidden states (for computing attention) # Final dimension: (batch_size, seq_length, hidden_size * 2) lstm_states = tf.concat(2, [forward_seq, tf.reverse(backward_seq, [False, True, False])]) self._hidden_states = SequenceBatch(lstm_states, forward_mask) # Stitch the final outputs together --> sequence embedding # Final dimension: (batch_size, hidden_size * 2) seq_length = tf.shape(forward_values)[1] forward_final = tf.slice(forward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) backward_final = tf.slice(backward_seq, [0, seq_length - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(tf.concat(2, [forward_final, backward_final]), [1])
def test_multidim(self): npa = lambda arr: np.array(arr, dtype=np.float32) correct = npa([ npa([4, 7, 10]) / 2, npa([8, 14, 20]) / 3, npa([13, 16, 19]) / 3, ]) with clean_session(): array = tf.constant([[[1., 2., 3.], [3., 5., 7.], [0., 0., 0.]], [[2., 4., 6.], [3., 5., 7.], [3., 5., 7.]], [[9., 9., 9.], [3., 5., 7.], [1., 2., 3.]]], dtype=tf.float32) mask = tf.constant([ [1, 1, 0], [1, 1, 1], [1, 1, 1], ], dtype=tf.float32) bm = reduce_mean(SequenceBatch(array, mask)) assert_almost_equal(bm.eval(), correct, decimal=5)
def __init__(self, query, cand_embeds, project_query=False): """Create a CandidateScorer. Args: query (Tensor): of shape (batch_size, query_dim) cand_embeds (Tensor): of shape (cand_vocab_size, cand_dim) project_query (bool): whether to project the query tensor to match the dimension of the cand_embeds """ with tf.name_scope("CandidateScorer"): cand_batch = FeedSequenceBatch() embedded_cand_batch = embed(cand_batch, cand_embeds) # (batch_size, num_candidates, cand_dim) attention = Attention(embedded_cand_batch, query, project_query=project_query) self._attention = attention self._cand_batch = cand_batch self._scores = SequenceBatch(attention.logits, cand_batch.mask) self._probs = SequenceBatch(attention.probs, cand_batch.mask)
def embed_sequences(self, embed_sequence_batch): self._forward_lstm = LSTM(self.hidden_size, return_sequences=True) # Pass input through the LSTMs # Shape: (batch_size, seq_length, hidden_size) hidden_state_values = self._forward_lstm(embed_sequence_batch.values, embed_sequence_batch.mask) self._hidden_states = SequenceBatch(hidden_state_values, embed_sequence_batch.mask) # Embedding dimension: (batch_size, hidden_size) shape = tf.shape(embed_sequence_batch.values) forward_final = tf.slice(hidden_state_values, [0, shape[1] - 1, 0], [-1, 1, self.hidden_size]) return tf.squeeze(forward_final, [1])
def test(self): values = tf.constant([ [1, -8, 5], [0, 2, 7], [2, -8, 6], ], dtype=tf.float32) float_mask = tf.constant([ [1, 1, 1], [0, 0, 1], [1, 1, 0], ], dtype=tf.float32) bool_mask = tf.constant([ [True, True, True], [False, False, True], [True, True, False], ], dtype=tf.bool) ninf = float('-inf') correct = np.array([ [1, -8, 5], [ninf, ninf, 7], [2, -8, ninf], ], dtype=np.float32) seq_batch0 = SequenceBatch(values, float_mask) seq_batch1 = SequenceBatch(values, bool_mask) with tf.Session(): assert_almost_equal(seq_batch0.with_pad_value(ninf).values.eval(), correct) assert_almost_equal(seq_batch1.with_pad_value(ninf).values.eval(), correct)
def memory_cells(self): # (batch_size, num_cells, cell_dim) values = tf.constant( [ # (2, 2, 3) [[1., 2., 3.], [1., 1., 1.]], [[1., 1.5, 0.], [-0.8, 1., -0.4]] ], dtype=tf.float32) mask = tf.constant( [ # (2, 2) [1, 0], [1, 1], ], dtype=tf.float32) return SequenceBatch(values, mask)
def test(self): correct = np.array([-2, 2, 21]) with clean_session(): array = tf.constant([ [1, -8, 5, 4, 9], [0, 2, 7, 8, 1], [2, -8, 6, 4, 9], ], dtype=tf.float32) mask = tf.constant([ [1, 1, 1, 0, 0], [1, 1, 0, 0, 0], [1, 0, 1, 1, 1], ], dtype=tf.float32) result = reduce_sum(SequenceBatch(array, mask)) assert_almost_equal(result.eval(), correct, decimal=5)
def test_empty(self): with clean_session(): array = tf.constant(np.empty((0, 10, 20))) mask = tf.constant(np.empty((0, 10))) bm = reduce_mean(SequenceBatch(array, mask)) assert bm.eval().shape == (0, 20)