Пример #1
0
    def build_sampler_with_beam_search(self, beam_size=10, max_len=20):
        features = self.features

        # batch normalize feature vectors
        features = self._batch_norm(features, mode='test', name='conv_features')
        features_proj = self._project_features(features=features)

        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H)

        def tokens_to_inputs_attention_fn(model, symbols, feats, feats_proj, hidden_state, beam_size):
            embed_symbols = model._word_embedding(inputs=tf.reshape(symbols, [-1]), reuse=True)

            context, alpha = self._attention_layer(feats, feats_proj, hidden_state, reuse=True)

            if self.enable_selector:
                context, beta = self._selector(context, hidden_state, reuse=True)

            next_input = tf.concat([embed_symbols, context], 1)
            next_input = tf.reshape(next_input, [-1, beam_size, next_input.shape[-1]])
            return next_input, context, alpha, beta

        def outputs_to_score_attention_fn(model, symbols, outputs, beam_context, beam_size):
            embed_symbols = model._word_embedding(inputs=symbols, reuse=True)
            outputs = tf.reshape(outputs, [-1, outputs.shape[-1]])

            logits = model._decode_lstm(embed_symbols, outputs, beam_context)
            logits = tf.reshape(logits, [-1, beam_size, logits.shape[-1]])
            return tf.nn.log_softmax(logits)

        sampled_captions, logprobs, alphas, betas = beam_decoder(lstm_cell, beam_size, self._start, self._end, 
                                                tokens_to_inputs_attention_fn, outputs_to_score_attention_fn,
                                                features=features, features_proj=features_proj,
                                                max_len=35, selector=self.enable_selector, output_dense=True, scope='lstm', model=self)

        return alphas, betas, sampled_captions
Пример #2
0
    def test1(self):
        """
        test correct decode in sequence
        """
        with self.test_session() as sess:
            table = np.array(
                [[[0.0, 0.6, 0.4], [0.0, 0.4, 0.6], [0.0, 0.0, 1.0]]] * 3)

            for cell_transform in ['default', 'flatten', 'replicate']:
                cell = MarkovChainCell(table)
                initial_state = cell.zero_state(1, tf.int32)
                initial_input = initial_state[0]

                with tf.variable_scope('test1_{}'.format(cell_transform)):
                    best_sparse, best_logprobs = beam_decoder(
                        cell=cell,
                        beam_size=7,
                        stop_token=2,
                        initial_state=initial_state,
                        initial_input=initial_input,
                        tokens_to_inputs_fn=lambda x: tf.expand_dims(x, -1),
                        max_len=5,
                        cell_transform=cell_transform,
                        output_dense=False,
                    )

                    tf.variables_initializer([cell.log_table_var]).run()
                    assert all(best_sparse.eval().values == [2])
                    assert np.isclose(np.exp(best_logprobs.eval())[0], 0.4)
Пример #3
0
    def test3(self):
        """
        test that variable reuse works as expected
        """
        with self.test_session() as sess:
            table = np.array(
                [[[0.0, 0.6, 0.4], [0.0, 0.4, 0.6], [0.0, 0.0, 1.0]]] * 3)

            for cell_transform in ['default', 'flatten', 'replicate']:
                cell = MarkovChainCell(table)
                initial_state = cell.zero_state(1, tf.int32)
                initial_input = initial_state[0]

                with tf.variable_scope(
                        'test3_{}'.format(cell_transform)) as scope:
                    best_sparse, best_logprobs = beam_decoder(
                        cell=cell,
                        beam_size=7,
                        stop_token=2,
                        initial_state=initial_state,
                        initial_input=initial_input,
                        tokens_to_inputs_fn=lambda x: tf.expand_dims(x, -1),
                        max_len=5,
                        cell_transform=cell_transform,
                        output_dense=False,
                        scope=scope)

                tf.variables_initializer([cell.log_table_var]).run()

                with tf.variable_scope(scope, reuse=True) as varscope:
                    best_sparse_2, best_logprobs_2 = beam_decoder(
                        cell=cell,
                        beam_size=7,
                        stop_token=2,
                        initial_state=initial_state,
                        initial_input=initial_input,
                        tokens_to_inputs_fn=lambda x: tf.expand_dims(x, -1),
                        max_len=5,
                        cell_transform=cell_transform,
                        output_dense=False,
                        scope=varscope)

                assert all(
                    sess.run(tf.equal(best_sparse.values,
                                      best_sparse_2.values)))
                assert np.isclose(*sess.run((best_logprobs, best_logprobs_2)))
Пример #4
0
    def add_decoder_test(self):
        print 'Adding decoder test'
        scope = 'Decoder'
        with tf.variable_scope(scope, reuse=True):

            # Use the same cell and output projection as in the decoder train case
            cell = tf.nn.rnn_cell.GRUCell(
                num_units=self.config.decoder_hidden_size)
            W = tf.get_variable('W')
            b = tf.get_variable('b')

            def output_fn(inputs):
                original_shape = tf.shape(inputs)
                outputs_flat = tf.reshape(
                    inputs, [-1, self.config.decoder_hidden_size])
                logits_flat = tf.matmul(outputs_flat, W) + b
                logits = tf.reshape(logits_flat, [
                    original_shape[0], original_shape[1],
                    self.config.vocab_size
                ])
                return tf.nn.log_softmax(logits)

            def emb_fn(tokens):
                original_shape = tf.shape(tokens)
                outputs = tf.nn.embedding_lookup(self.L, tokens)
                return tf.reshape(outputs, [
                    original_shape[0], original_shape[1],
                    self.config.embedding_dim
                ])

            start_tokens = tf.nn.embedding_lookup(
                self.L, self.labels_placeholder[:, 0])
            print 'Start tokens shape', start_tokens.get_shape()
            self.decoded, _ = beam_decoder(cell=cell,
                                           beam_size=self.config.num_beams,
                                           stop_token=self.config.vocab_size -
                                           1,
                                           initial_state=self.encoded,
                                           initial_input=start_tokens,
                                           tokens_to_inputs_fn=emb_fn,
                                           max_len=self.config.max_out_len,
                                           scope=scope,
                                           outputs_to_score_fn=output_fn,
                                           output_dense=True,
                                           cell_transform='replicate',
                                           score_upper_bound=0.0)
            params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            for param in params:
                print param
Пример #5
0
    def add_decoder_test(self):
        print 'Adding decoder test'
        scope = 'Decoder'
        with tf.variable_scope(scope, reuse=True):

            # Use the same output projection as in the decoder train case
            W = tf.get_variable('W')
            b = tf.get_variable('b')

            def output_fn(inputs):
                original_shape = tf.shape(inputs)
                outputs_flat = tf.reshape(
                    inputs, [-1, self.config.decoder_hidden_size])
                logits_flat = tf.matmul(outputs_flat, W) + b
                logits = tf.reshape(logits_flat, [
                    original_shape[0], original_shape[1],
                    self.config.vocab_size
                ])
                return tf.nn.log_softmax(logits)

            def emb_fn(tokens):
                original_shape = tf.shape(tokens)
                outputs = tf.nn.embedding_lookup(self.L, tokens)
                return tf.reshape(outputs, [
                    original_shape[0], original_shape[1],
                    self.config.embedding_dim
                ])

            start_tokens = tf.nn.embedding_lookup(
                self.L, self.labels_placeholder[:, 0])
            init_state = [self.encoded]
            for i in range(self.config.num_dec_layers):
                init_state.append(tf.zeros_like(self.encoded,
                                                dtype=tf.float32))
            init_state = tuple(init_state)
            self.decoded, _ = beam_decoder(cell=self.cell,
                                           beam_size=self.config.num_beams,
                                           stop_token=self.config.vocab_size -
                                           1,
                                           initial_state=init_state,
                                           initial_input=start_tokens,
                                           tokens_to_inputs_fn=emb_fn,
                                           max_len=self.config.max_out_len,
                                           scope=scope,
                                           outputs_to_score_fn=output_fn,
                                           output_dense=True,
                                           cell_transform='replicate',
                                           score_upper_bound=0.0)

            # Greedy decoder
            def loop_fn(prev, i):
                indices = tf.argmax(tf.matmul(prev, W) + b, axis=1)
                return tf.nn.embedding_lookup(self.L, indices)

            decoder_inputs = tf.nn.embedding_lookup(
                self.L, ids=self.labels_placeholder)
            decoder_inputs = tf.unstack(decoder_inputs, axis=1)[:-1]
            outputs, _ = tf.nn.seq2seq.rnn_decoder(decoder_inputs=decoder_inputs,\
                     initial_state = init_state,\
                     cell=self.cell, loop_function=loop_fn, scope=scope)

            # Convert back to tensor
            tensor_preds = tf.stack(outputs, axis=1)

            # Compute output_projection
            original_shape = tf.shape(tensor_preds)
            outputs_flat = tf.reshape(tensor_preds,
                                      [-1, self.config.decoder_hidden_size])
            logits_flat = tf.matmul(outputs_flat, W) + b

            # Reshape back to original
            self.test_scores = tf.reshape(
                logits_flat,
                [original_shape[0], original_shape[1], self.config.vocab_size])
            self.greedy_decoded = tf.argmax(self.test_scores, axis=2)
Пример #6
0

beamSz = 1

image_embedding = tf.matmul(img, img_embedding) + img_embedding_bias
with tf.variable_scope("RNN"):
    output, state = rnn(image_embedding, state)
    previous_word = tf.nn.embedding_lookup(E, [0])

    with tf.variable_scope("RNN_beam") as scope:
        best_sparse, best_logprobs = beam_decoder(
            cell=rnn,
            beam_size=beamSz,
            stop_token=0,
            initial_state=state,
            initial_input=previous_word,
            tokens_to_inputs_fn=lambda x: tf.nn.embedding_lookup(E, x),
            outputs_to_score_fn=lambda x: outputs_to_score_fn(x),
            max_len=maxlen,
            cell_transform='default',
            output_dense=True,
            scope=scope)

    # for i in range(maxlen):
    # 	tf.get_variable_scope().reuse_variables()

    # 	output, state = rnn(previous_word, state)
    # 	prob = tf.matmul(output, W) + b
    # 	best_word = tf.argmax(prob, 1)
    # 	previous_word = tf.nn.embedding_lookup(E, best_word)
    # 	all_words.append(best_word)
Пример #7
0
	def add_decoder_test(self):
		print 'Adding decoder test'
		scope='Decoder'
		with tf.variable_scope(scope, reuse=True):

			# Use the same output projection as in the decoder train case
			W = tf.get_variable('W')
			b = tf.get_variable('b')
			W_ini = tf.get_variable('W_ini')
			def output_fn(inputs):
				original_shape = tf.shape(inputs)
				outputs_flat = tf.reshape(inputs, [-1, self.config.decoder_hidden_size])
				logits_flat = tf.matmul(outputs_flat, W) + b
				logits = tf.reshape(logits_flat, [original_shape[0], original_shape[1], self.config.vocab_size])
				return tf.nn.log_softmax(logits)

			def emb_fn(tokens):
				original_shape = tf.shape(tokens)
				outputs = tf.nn.embedding_lookup(self.L, tokens)
				return tf.reshape(outputs, [original_shape[0], original_shape[1], self.config.embedding_dim])

			start_tokens = tf.nn.embedding_lookup(self.L, self.labels_placeholder[:, 0])
			init_state = list(self.encoded) + \
						[tf.zeros_like(self.encoded[0])] + \
						[tf.zeros(shape=(tf.shape(self.encoded[0])[0], self.config.num_cells), dtype=tf.float32)]*2 

						# + \
						# [tf.zeros(shape=(tf.shape(self.encoded[0])[0], self.config.num_cells, self.config.decoder_hidden_size), dtype=tf.float32)]
			
			# Memory is currently 
			summed_memory_vec = tf.reduce_sum(self.memory, axis=1)
			numer = tf.sigmoid(tf.matmul(summed_memory_vec, W_ini))
			print 'Numer is', numer
			init_memory = numer/tf.expand_dims(tf.cast(self.input_seq_lens, tf.float32), 1)
			init_memory = tf.expand_dims(init_memory, 1)
			init_memory = tf.tile(init_memory, [1, self.config.num_cells, 1])
			init_memory = init_memory + tf.random_normal(shape=tf.shape(init_memory), mean=0.0, stddev=np.sqrt(0.1))
			print 'Init memory', init_memory
			init_state += [init_memory]
			# for i in range(self.config.num_dec_layers):
			# 	init_state.append(tf.zeros_like(self.encoded, dtype=tf.float32))
			init_state = tuple(init_state)
			self.decoded, _ = beam_decoder(
			    cell=self.cell,
			    beam_size=self.config.num_beams,
			    stop_token=self.config.vocab_size - 1,
			    initial_state=init_state,
			    initial_input=start_tokens,
			    tokens_to_inputs_fn=emb_fn,
			    max_len=self.config.max_out_len,
			    scope=scope,
			    outputs_to_score_fn=output_fn,
			    output_dense=True,
			    cell_transform='replicate',
			    score_upper_bound = self.config.beam_threshold
			)


			# Greedy decoder
			def loop_fn(prev, i):
				indices = tf.argmax(tf.matmul(prev, W) + b, axis=1)
				return tf.nn.embedding_lookup(self.L, indices)

			decoder_inputs = tf.nn.embedding_lookup(self.L, ids=self.labels_placeholder)
			decoder_inputs = tf.unstack(decoder_inputs, axis=1)[:-1]
			outputs, _ = tf.nn.seq2seq.rnn_decoder(decoder_inputs=decoder_inputs,\
												initial_state = init_state,\
												cell=self.cell, loop_function=loop_fn, scope=scope)

			# Convert back to tensor
			tensor_preds = tf.stack(outputs, axis=1)

			# Compute output_projection
			original_shape = tf.shape(tensor_preds)
			outputs_flat = tf.reshape(tensor_preds, [-1, self.config.decoder_hidden_size])
			logits_flat = tf.matmul(outputs_flat, W) + b

			# Reshape back to original
			self.test_scores = tf.reshape(logits_flat, [original_shape[0], original_shape[1], self.config.vocab_size])
			self.greedy_decoded = tf.argmax(self.test_scores, axis=2)