Python Dense.Dense примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorflow.python.layers.core

Класс/Тип: Dense

Метод/Функция: Dense

Примеров на hotexamples.com: 30

Python Dense.Dense - 30 примеров найдено. Это лучшие примеры Python кода для tensorflow.python.layers.core.Dense.Dense, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dense(30)

build(6)

apply(2)

get_shape(1)

kernel(1)

Пример #1

Показать файл

def Decoder(args, mode, enc_rnn_out, enc_rnn_state, X, emb_Y, emb_out):
    
    with tf.variable_scope("Decoder") as decoder_scope:

        mem_units = 2 * args.dim
        out_layer = Dense(args.output_vocab_size) # projection W*X+b
        beam_width = args.beam_width
        batch_size = tf.shape(enc_rnn_out)[0]

        cell, initial_state = _decoder(args, enc_rnn_out, enc_rnn_state, mode, beam_width, batch_size)

        if mode == "train":
            seq_len = tf.tile(tf.constant([args.maxlen], dtype=tf.int32), [batch_size])
            helper = tf.contrib.seq2seq.TrainingHelper(inputs=emb_Y, sequence_length=seq_len)
            decoder = BasicDecoder(cell=cell, helper=helper, initial_state=initial_state, X=X, output_layer=out_layer) 
            outputs, final_state, _= tf.contrib.seq2seq.dynamic_decode(decoder=decoder, maximum_iterations=args.maxlen, scope=decoder_scope)
            logits = outputs.rnn_output
            sample_ids = outputs.sample_id
        else:
            start_tokens = tf.tile(tf.constant([_GO], dtype=tf.int32), [batch_size])
            end_token = _END
            my_decoder = BeamSearchDecoder(cell=cell,
                                           embedding=emb_out,
                                           start_tokens=start_tokens,
                                           end_token=end_token,
                                           initial_state=initial_state,
                                           beam_width=beam_width,
                                           X=X,
                                           output_layer=out_layer,
                                           length_penalty_weight=0.0)
                      
            outputs, t1, t2 = tf.contrib.seq2seq.dynamic_decode(my_decoder, maximum_iterations=args.maxlen, scope=decoder_scope)
            logits = tf.no_op()
            sample_ids = outputs.predicted_ids
        
    return logits, sample_ids

Пример #2

Показать файл

Файл: skip_thought_model.py Проект: aiweiw/skip_thought

    def _build_decoder(self, dec_scope_name, encoder_output, encoder_state, target_data, target_seq_len):
        with tf.name_scope(dec_scope_name):
            decoder_embeddings = tf.Variable(tf.random_uniform([self.tgt_vocab_size, self.embedding_size]))
            # cell
            cell = tf.contrib.rnn.MultiRNNCell([self.get_gru_cell(self.rnn_size, self.dropout)
                                                for _ in range(self.num_layers)])
            # attention-model
            cell, decoder_initial_state = self._build_attention(encoder_output, encoder_state, cell)
            # output_layer
            output_layer = Dense(self.tgt_vocab_size, use_bias=False,
                                 kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))

            with tf.variable_scope(dec_scope_name + '_train'):
                # Data format of target_data: <GO>...<PAD>
                # Tensor: [batch_size, max_time, embed_size], type: float32.
                decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings, target_data)
                train_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_embed_input,
                                                                 sequence_length=target_seq_len, time_major=False)
                train_decoder = tf.contrib.seq2seq.BasicDecoder(cell, train_helper,
                                                                decoder_initial_state, output_layer)
                train_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(train_decoder, impute_finished=True,
                                                                               maximum_iterations=self.max_target_len)

            with tf.variable_scope(dec_scope_name + '_predict', reuse=True):
                # start_tokens = tf.tile(tf.constant([self.start_vocab.index('<go>')], dtype=tf.int32),
                #                        [self.batch_size], name='start_tokens')
                predict_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    decoder_embeddings,
                    tf.fill([self.batch_size], self.start_vocab.index('<go>')),
                    self.start_vocab.index('<eos>'))
                predict_decoder = tf.contrib.seq2seq.BasicDecoder(cell, predict_helper,
                                                                  decoder_initial_state, output_layer)
                predict_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(predict_decoder, impute_finished=True,
                                                                                 maximum_iterations=self.max_target_len)

        return train_decoder_output, predict_decoder_output

Пример #3

Показать файл

Файл: decoder_unimodal.py Проект: Wenabi/skip-avsr

def _project_lstm_state_tuple(state_tuple, num_units):
    r"""
    Concatenates all the `c` and `h` members from a list of `LSTMStateTuple`
      and projects them to a space of dimension `num_units`
    Args:
        state_tuple: a list of `LSTMStateTuple` objects
        num_units: output dimension

    Returns:
        projected_state: a single `LSTMStateTuple` with `c` and `h` of dimension `num_units`
    """
    state_proj_layer = Dense(num_units,
                             name='state_projection',
                             use_bias=False)

    cat_c = tf.concat([state.c for state in state_tuple], axis=-1)
    cat_h = tf.concat([state.h for state in state_tuple], axis=-1)

    proj_c = state_proj_layer(cat_c)
    proj_h = state_proj_layer(cat_h)

    projected_state = tf.contrib.rnn.LSTMStateTuple(c=proj_c, h=proj_h)
    print('projected_state', projected_state)
    return projected_state

Пример #4

Показать файл

    def add_decoder_for_training(self):
        self.add_attention_for_training()
        decoder_embedding = tf.get_variable(
            'decoder_embedding',
            [len(self.Y_word2idx), self.decoder_embedding_dim], tf.float32,
            tf.random_uniform_initializer(-1.0, 1.0))

        training_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=tf.nn.embedding_lookup(decoder_embedding,
                                          self.processed_decoder_input()),
            sequence_length=self.Y_seq_len,
            time_major=False)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=self.decoder_cell,
            helper=training_helper,
            initial_state=self.decoder_cell.zero_state(
                self.batch_size,
                tf.float32).clone(cell_state=self.encoder_state),
            output_layer=Dense(len(self.Y_word2idx)))
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=training_decoder,
            impute_finished=True,
            maximum_iterations=tf.reduce_max(self.Y_seq_len))
        self.training_logits = training_decoder_output.rnn_output

Пример #5

Показать файл

Файл: seq2seq_model.py Проект: matteo-pagliari/polispell

    def build_encoder(self):

        print 'Building Encoder'

        with tf.variable_scope('encoder'):
            self.encoder_cell = self.build_encoder_cell()

            # Initialize encoder_embeddings to have variance=1
            initializer = tf.random_uniform_initializer(-math.sqrt(3),
                                                        math.sqrt(3),
                                                        dtype=tf.float32)
            self.encoder_embeddings = tf.get_variable(
                "encoder_embeddings",
                [self.src_vocab_size, self.input_embedding_size],
                initializer=initializer,
                dtype=tf.float32)

            # [batch_size, time_step, embedding_size]
            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings, ids=self.encoder_inputs)

            # Input projection layer to feed embedded inputs to the cell
            input_layer = Dense(self.encoder_hidden_units, dtype=tf.float32)

            self.encoder_inputs_embedded = input_layer(
                self.encoder_inputs_embedded)

            # Encode input sequences into context vectors:
            # encoder_outputs: [batch_size, max_time_step, cell_output_size]
            # encoder_state: [batch_size, cell_output_size]
            self.encoder_outputs, self.encoder_last_state = tf.nn.dynamic_rnn(
                cell=self.encoder_cell,
                inputs=self.encoder_inputs_embedded,
                sequence_length=self.encoder_inputs_length,
                dtype=tf.float32,
                time_major=False)

Пример #6

Показать файл

Файл: model.py Проект: zhouleidcc/CRNN_Attention_OCR_Chinese

def decode(helper, memory, scope, enc_state, reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        attention_mechanism = tf.contrib.seq2seq.LuongAttention(
            num_units=cfg.RNN_UNITS, memory=memory)
        cell = tf.contrib.rnn.GRUCell(num_units=cfg.RNN_UNITS)
        attn_cell = tf.contrib.seq2seq.AttentionWrapper(
            cell,
            attention_mechanism,
            attention_layer_size=cfg.RNN_UNITS,
            output_attention=True)
        output_layer = Dense(units=cfg.VOCAB_SIZE)

        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=attn_cell,
            helper=helper,
            initial_state=attn_cell.zero_state(
                dtype=tf.float32,
                batch_size=cfg.BATCH_SIZE).clone(cell_state=enc_state[0]),
            output_layer=output_layer)
        outputs = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
                                                    output_time_major=False,
                                                    impute_finished=True,
                                                    maximum_iterations=27)
        return outputs

Пример #7

Показать файл

Файл: TULVAE.py Проект: lzzppp/DERT

def decoder(decoder_embed_input,decoder_y,target_length,max_target_length,encode_state,keep_prob,reuse=False):
    with tf.variable_scope("decoder",reuse=reuse):
        decode_lstm = tf.contrib.rnn.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
        decode_cell = tf.contrib.rnn.DropoutWrapper(decode_lstm, output_keep_prob=keep_prob)
        decoder_initial_state = encode_state
        output_layer = Dense(n_input) #TOTAL_SIZE
        decoder_input_ = tf.concat([tf.fill([batch_size, 1], vocab_to_int['<GO>']), decoder_embed_input],
                                   1)  # add GO to the end
        decoder_input = tf.nn.embedding_lookup(dic_embeddings, decoder_input_)
        decoder_input=tf.concat([decoder_input,decoder_y],2)
        # # input_=tf.transpose(decoder_input,[1,0,2])
        training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_input,
                                                            sequence_length=target_length)
        training_decoder = tf.contrib.seq2seq.BasicDecoder(decode_cell, training_helper, decoder_initial_state,
                                                           output_layer)
        output, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                                         impute_finished=True,
                                                         maximum_iterations=max_target_length)
        predicting_logits = tf.identity(output.sample_id, name='predictions')
        training_logits = tf.identity(output.rnn_output, 'logits')
        masks = tf.sequence_mask(target_length, max_target_length, dtype=tf.float32, name='masks')
        #target = tf.concat([target_input, tf.fill([batch_size, 1], vocab_to_int['<EOS>'])], 1)  #
        target = decoder_embed_input
        return output,predicting_logits,training_logits,masks,target

Пример #8

Показать файл

Файл: model_base.py Проект: zhuzhutingru123/Self-Critical-SCST-TensorFlow

 def _build_word_projections(self):
     """Helper to update word embedding and output projection variables."""
     c = self._config
     rnn_size = c.rnn_size
     word_size = c.rnn_word_size
     softmax_size = self._softmax_size
     token_type = c.token_type
     place_var_on_cpu = token_type == 'word'
     
     #with tf.variable_scope('decoder/rnn_decoder', reuse=tf.AUTO_REUSE):
     dec_out_layer = Dense(softmax_size, name='output_projection')
     dec_out_layer.build(rnn_size)
     self.decoder_output_layer = dec_out_layer
     print('INFO: Building separate embedding matrix.')
     kwargs = dict(name='embedding_map',
                   shape=[softmax_size, word_size],
                   dtype=tf.float32,
                   trainable=True)
     if place_var_on_cpu:
         with tf.device('/cpu:0'):
             self._word_embed_map = tf.get_variable(**kwargs)
     else:
         self._word_embed_map = tf.get_variable(**kwargs)
     return self._word_embed_map

Пример #9

Показать файл

    def _decoder_inference(self, init_state):
        tiled_z = tf.tile(tf.expand_dims(self.z, 1), [1, args.beam_width, 1])

        decoder = BeamSearchDecoder(
            cell=tf.nn.rnn_cell.MultiRNNCell([
                self._rnn_cell(args.rnn_size, reuse=True)
                for _ in range(args.decoder_layers)
            ]),
            embedding=self.tied_embedding,
            start_tokens=tf.tile(
                tf.constant([self._word2idx['<start>']], dtype=tf.int32),
                [self._batch_size]),
            end_token=self._word2idx['<end>'],
            initial_state=tf.contrib.seq2seq.tile_batch(
                init_state, args.beam_width),
            beam_width=args.beam_width,
            output_layer=Dense(args.vocab_size, _reuse=True),
            length_penalty_weight=0.0,
            z=tiled_z)
        decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            impute_finished=False,
            maximum_iterations=self.gen_seq_length)
        return decoder_output.predicted_ids[:, :, 0]

Пример #10

Показать файл

Файл: RNN-TF-dynamic-decode.py Проект: rheehot/hccho2FirstGitProject

def attention_keras_test():
    # tf.keras.layers.SimpleRNNCell를 이용하기
    vocab_size = 6
    SOS_token = 0
    EOS_token = 5
    
    x_data = np.array([[SOS_token, 3, 1, 4, 3, 2],[SOS_token, 3, 4, 2, 3, 1],[SOS_token, 1, 3, 2, 2, 1]], dtype=np.int32)
    y_data = np.array([[3, 1, 4, 3, 2,EOS_token],[3, 4, 2, 3, 1,EOS_token],[1, 3, 2, 2, 1,EOS_token]],dtype=np.int32)
    print("data shape: ", x_data.shape)
    sess = tf.InteractiveSession()
    
    output_dim = vocab_size
    batch_size = len(x_data)
    hidden_dim =7
    seq_length = x_data.shape[1]
    embedding_dim = 8
    state_tuple_mode = True

    init = np.arange(vocab_size*embedding_dim).reshape(vocab_size,-1)
    
    train_mode = True
    alignment_history_flag = True   # True이면 initial_state나 last state를 sess.run 하면 안됨. alignment_history가 function이기 때문에...
    with tf.variable_scope('test',reuse=tf.AUTO_REUSE) as scope:
        # Make rnn cell
        cell = tf.keras.layers.SimpleRNNCell(units=hidden_dim)
    
        embedding = tf.get_variable("embedding", initializer=init.astype(np.float32),dtype = tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, x_data) # batch_size  x seq_length x embedding_dim
    
        Y = tf.convert_to_tensor(y_data)
    
        #encoder_outputs = tf.ones([batch_size,20,30])
        encoder_outputs = tf.convert_to_tensor(np.random.normal(0,1,[batch_size,20,30]).astype(np.float32)) # 20: encoder sequence length, 30: encoder hidden dim
        
        #input_lengths = [20]*batch_size
        input_lengths = [5,10,20]  # encoder에 padding 같은 것이 있을 경우, attention을 주지 않기 위해
        
        # attention mechanism  # num_units = Na = 11
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=11, memory=encoder_outputs,memory_sequence_length=input_lengths,normalize=False)
        #attention_mechanism = tf.contrib.seq2seq.BahdanauMonotonicAttention(num_units=11, memory=encoder_outputs,memory_sequence_length=input_lengths)
        
        # LuongAttention에서는 num_units이 임의로 들어가면 안되고, decoder의 hidden_dim과 일치해야 한다
        #attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=hidden_dim, memory=encoder_outputs,memory_sequence_length=input_lengths)
        
        
        # output_attention = True(default) ==> 이면 output으로 attention이 나가고, False이면 cell의 output이 나간다
        # attention_layer_size = N_l
        
        attention_initial_state = [cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)]
        
        cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=13,initial_cell_state=attention_initial_state,
                                                   alignment_history=alignment_history_flag,output_attention=True)

        # 여기서 zero_state를 부르면, 위의 attentionwrapper에서 넝허준 attention_initial_state를 가져온다. 즉, AttentionWrapperState.cell_state에는 넣어준 값이 들어있다.
        initial_state = cell.zero_state(batch_size, tf.float32) # AttentionWrapperState
 
        if train_mode:
            helper = tf.contrib.seq2seq.TrainingHelper(inputs, np.array([seq_length]*batch_size,dtype=np.int32))
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding, start_tokens=tf.tile([SOS_token], [batch_size]), end_token=EOS_token)
     
        output_layer = Dense(output_dim, name='output_projection')
        decoder = tf.contrib.seq2seq.BasicDecoder(cell=cell,helper=helper,initial_state=initial_state,output_layer=output_layer)    
        # maximum_iterations를 설정하지 않으면, inference에서 EOS토큰을 만나지 못하면 무한 루프에 빠진다
        outputs, last_state, last_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,output_time_major=False,impute_finished=True,maximum_iterations=10)
     
        weights = tf.ones(shape=[batch_size,seq_length])
        loss =   tf.contrib.seq2seq.sequence_loss(logits=outputs.rnn_output, targets=Y, weights=weights)
     
     
        
        
        opt = tf.train.AdamOptimizer(0.01).minimize(loss)
        
        sess.run(tf.global_variables_initializer())
        for i in range(100):
            loss_,_ =sess.run([loss,opt])
            print("{} loss: = {}".format(i,loss_))
        
        if alignment_history_flag ==False:
            print("initial_state: ", sess.run(initial_state))
        print("\n\noutputs: ",outputs)
        o = sess.run(outputs.rnn_output)  #batch_size, seq_length, outputs
        o2 = sess.run(tf.argmax(outputs.rnn_output,axis=-1))
        print("\n",o,o2) #batch_size, seq_length, outputs
     
        print("\n\nlast_state: ",last_state)
        if alignment_history_flag == False:
            print(sess.run(last_state)) # batch_size, hidden_dim
        else:
            print("alignment_history: ", last_state.alignment_history.stack())
            alignment_history_ = sess.run(last_state.alignment_history.stack())
            print(alignment_history_)
            print("alignment_history sum: ",np.sum(alignment_history_,axis=-1))
            
            print("cell_state: ", sess.run(last_state.cell_state))
            print("attention: ", sess.run(last_state.attention))
            print("time: ", sess.run(last_state.time))
            
            alignments_ = sess.run(last_state.alignments)
            print("alignments: ", alignments_)
            print('alignments sum: ', np.sum(alignments_,axis=1))   # alignments의 합이 1인지 확인
            print("attention_state: ", sess.run(last_state.attention_state))

     
        print("\n\nlast_sequence_lengths: ",last_sequence_lengths)
        print(sess.run(last_sequence_lengths)) #  [seq_length]*batch_size    
         
        print("kernel(weight)",sess.run(output_layer.trainable_weights[0]))  # kernel(weight)
        print("bias",sess.run(output_layer.trainable_weights[1]))  # bias
     
        if train_mode:
            p = sess.run(tf.nn.softmax(outputs.rnn_output)).reshape(-1,output_dim)
            print("loss: {:20.6f}".format(sess.run(loss)))
            print("manual cal. loss: {:0.6f} ".format(np.average(-np.log(p[np.arange(y_data.size),y_data.flatten()]))) )

Пример #11

Показать файл

Файл: RNN-TF-dynamic-decode.py Проект: rheehot/hccho2FirstGitProject

def attention_multicell_test():
    # BasicRNNCell을 multi로 쌓아 attention 적용. multi에서는 제일 아래 layer에 attention을 적용한다
    vocab_size = 6
    SOS_token = 0
    EOS_token = 5
    
    x_data = np.array([[SOS_token, 3, 1, 4, 3, 2],[SOS_token, 3, 4, 2, 3, 1],[SOS_token, 1, 3, 2, 2, 1]], dtype=np.int32)
    y_data = np.array([[3, 1, 4, 3, 2,EOS_token],[3, 4, 2, 3, 1,EOS_token],[1, 3, 2, 2, 1,EOS_token]],dtype=np.int32)
    print("data shape: ", x_data.shape)
    sess = tf.InteractiveSession()
    
    output_dim = vocab_size
    batch_size = len(x_data)
    hidden_dim =7
    num_layers = 2
    seq_length = x_data.shape[1]
    embedding_dim = 8
    state_tuple_mode = True
    init = np.arange(vocab_size*embedding_dim).reshape(vocab_size,-1)
    
    train_mode = True
    with tf.variable_scope('test',reuse=tf.AUTO_REUSE) as scope:
        # Make multi-rnn cell
        cells = []
        for _ in range(num_layers):
            cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_dim)
            cells.append(cell)
        cell = tf.contrib.rnn.MultiRNNCell(cells)
    
        embedding = tf.get_variable("embedding", initializer=init.astype(np.float32),dtype = tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, x_data) # batch_size  x seq_length x embedding_dim
    
        Y = tf.convert_to_tensor(y_data)
    
        encoder_outputs = tf.ones([batch_size,20,30])
        input_lengths = [20]*batch_size
        # attention mechanism
        attention_initial_state = cell.zero_state(batch_size, tf.float32)  # 다른 값을 줄수도 있다.
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=11, memory=encoder_outputs,memory_sequence_length=input_lengths)
        cell = tf.contrib.seq2seq.AttentionWrapper(cell, attention_mechanism,initial_cell_state=attention_initial_state, attention_layer_size=13)  # AttentionWrapperState를 return한다.


        initial_state = cell.zero_state(batch_size, tf.float32) #(batch_size x hidden_dim) x layer 개수   ==> AttentionWrapperState class object를 return한다.
  
        if train_mode:
            helper = tf.contrib.seq2seq.TrainingHelper(inputs, np.array([seq_length]*batch_size,dtype=np.int32))
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding, start_tokens=tf.tile([SOS_token], [batch_size]), end_token=EOS_token)
      
        output_layer = Dense(output_dim, name='output_projection')
        decoder = tf.contrib.seq2seq.BasicDecoder(cell=cell,helper=helper,initial_state=initial_state,output_layer=output_layer)    
        # maximum_iterations를 설정하지 않으면, inference에서 EOS토큰을 만나지 못하면 무한 루프에 빠진다
        outputs, last_state, last_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,output_time_major=False,impute_finished=True,maximum_iterations=10)
      
        weights = tf.ones(shape=[batch_size,seq_length])
        loss =   tf.contrib.seq2seq.sequence_loss(logits=outputs.rnn_output, targets=Y, weights=weights)
      
      
        sess.run(tf.global_variables_initializer())
        print("initial_state: ", sess.run(initial_state))
        print("\n\noutputs: ",outputs)
        o = sess.run(outputs.rnn_output)  #batch_size, seq_length, outputs
        o2 = sess.run(tf.argmax(outputs.rnn_output,axis=-1))
        print("\n",o,o2) #batch_size, seq_length, outputs
      
        print("\n\nlast_state: ",last_state)
        print(sess.run(last_state)) # batch_size, hidden_dim
      
        print("\n\nlast_sequence_lengths: ",last_sequence_lengths)
        print(sess.run(last_sequence_lengths)) #  [seq_length]*batch_size    
          
        print("kernel(weight)",sess.run(output_layer.trainable_weights[0]))  # kernel(weight)
        print("bias",sess.run(output_layer.trainable_weights[1]))  # bias
      
        if train_mode:
            p = sess.run(tf.nn.softmax(outputs.rnn_output)).reshape(-1,output_dim)
            print("loss: {:20.6f}".format(sess.run(loss)))
            print("manual cal. loss: {:0.6f} ".format(np.average(-np.log(p[np.arange(y_data.size),y_data.flatten()]))) )

Пример #12

Показать файл

    def __init__(self, data, args, embed):

        with tf.variable_scope("input"):
            with tf.variable_scope("embedding"):
                # build the embedding table and embedding input
                if embed is None:
                    # initialize the embedding randomly
                    self.embed = tf.get_variable(
                        'embed', [data.vocab_size, args.embedding_size],
                        tf.float32)
                else:
                    # initialize the embedding by pre-trained word vectors
                    self.embed = tf.get_variable('embed',
                                                 dtype=tf.float32,
                                                 initializer=embed)

            self.sentence = tf.placeholder(tf.int32, (None, None),
                                           'sen_inps')  # batch*len
            self.sentence_length = tf.placeholder(tf.int32, (None, ),
                                                  'sen_lens')  # batch
            self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior")

            batch_size, batch_len = tf.shape(self.sentence)[0], tf.shape(
                self.sentence)[1]
            self.decoder_max_len = batch_len - 1

            self.encoder_input = tf.nn.embedding_lookup(
                self.embed, self.sentence)  # batch*len*unit
            self.encoder_len = self.sentence_length

            decoder_input = tf.split(self.sentence, [self.decoder_max_len, 1],
                                     1)[0]  # no eos_id
            self.decoder_input = tf.nn.embedding_lookup(
                self.embed, decoder_input)  # batch*(len-1)*unit
            self.decoder_target = tf.split(self.sentence,
                                           [1, self.decoder_max_len],
                                           1)[1]  # no go_id, batch*(len-1)
            self.decoder_len = self.sentence_length - 1
            self.decoder_mask = tf.sequence_mask(
                self.decoder_len, self.decoder_max_len,
                dtype=tf.float32)  # batch*(len-1)

        # initialize the training process
        self.learning_rate = tf.Variable(float(args.lr),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * args.lr_decay)
        self.global_step = tf.Variable(0, trainable=False)

        # build rnn_cell
        cell_enc = tf.nn.rnn_cell.GRUCell(args.eh_size)
        cell_dec = tf.nn.rnn_cell.GRUCell(args.dh_size)

        # build encoder
        with tf.variable_scope('encoder'):
            encoder_output, encoder_state = dynamic_rnn(cell_enc,
                                                        self.encoder_input,
                                                        self.encoder_len,
                                                        dtype=tf.float32,
                                                        scope="encoder_rnn")

        with tf.variable_scope('recognition_net'):
            recog_input = encoder_state
            self.recog_mu = tf.layers.dense(inputs=recog_input,
                                            units=args.z_dim,
                                            activation=None,
                                            name='recog_mu')
            self.recog_logvar = tf.layers.dense(inputs=recog_input,
                                                units=args.z_dim,
                                                activation=None,
                                                name='recog_logvar')

            epsilon = tf.random_normal(tf.shape(self.recog_logvar),
                                       name="epsilon")
            std = tf.exp(0.5 * self.recog_logvar)
            self.recog_z = tf.add(self.recog_mu,
                                  tf.multiply(std, epsilon),
                                  name='recog_z')

            self.kld = tf.reduce_mean(0.5 * tf.reduce_sum(
                tf.exp(self.recog_logvar) + self.recog_mu * self.recog_mu -
                self.recog_logvar - 1,
                axis=-1))
            self.prior_z = tf.random_normal(tf.shape(self.recog_logvar),
                                            name="prior_z")
            latent_sample = tf.cond(self.use_prior,
                                    lambda: self.prior_z,
                                    lambda: self.recog_z,
                                    name='latent_sample')
            dec_init_state = tf.layers.dense(inputs=latent_sample,
                                             units=args.dh_size,
                                             activation=None)

        with tf.variable_scope("output_layer",
                               initializer=tf.orthogonal_initializer()):
            self.output_layer = Dense(
                data.vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                use_bias=True)

        with tf.variable_scope("decode",
                               initializer=tf.orthogonal_initializer()):
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=self.decoder_input, sequence_length=self.decoder_len)
            train_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell_dec,
                helper=train_helper,
                initial_state=dec_init_state,
                output_layer=self.output_layer)
            train_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=train_decoder,
                maximum_iterations=self.decoder_max_len,
                impute_finished=True)
            logits = train_output.rnn_output

            crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.decoder_target, logits=logits)
            crossent = tf.reduce_sum(crossent * self.decoder_mask)
            self.sen_loss = crossent / tf.to_float(batch_size)
            self.ppl_loss = crossent / tf.reduce_sum(self.decoder_mask)

            self.decoder_distribution_teacher = tf.nn.log_softmax(logits)

        with tf.variable_scope("decode", reuse=True):
            infer_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                self.embed, tf.fill([batch_size], data.go_id), data.eos_id)
            infer_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=cell_dec,
                helper=infer_helper,
                initial_state=dec_init_state,
                output_layer=self.output_layer)
            infer_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=infer_decoder,
                maximum_iterations=self.decoder_max_len,
                impute_finished=True)
            self.decoder_distribution = infer_output.rnn_output
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, data.vocab_size - 2],
                         2)[1], 2) + 2  # for removing UNK

        self.kl_weights = tf.minimum(
            tf.to_float(self.global_step) / args.full_kl_step, 1.0)
        self.kl_loss = self.kl_weights * tf.maximum(self.kld, args.min_kl)
        self.loss = self.sen_loss + self.kl_loss

        # calculate the gradient of parameters and update
        self.params = [
            k for k in tf.trainable_variables() if args.name in k.name
        ]
        opt = tf.train.MomentumOptimizer(learning_rate=self.learning_rate,
                                         momentum=args.momentum)
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, args.grad_clip)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        # save checkpoint
        self.latest_saver = tf.train.Saver(
            write_version=tf.train.SaverDef.V2,
            max_to_keep=args.checkpoint_max_to_keep,
            pad_step_number=True,
            keep_checkpoint_every_n_hours=1.0)
        self.best_saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                         max_to_keep=1,
                                         pad_step_number=True,
                                         keep_checkpoint_every_n_hours=1.0)

        # create summary for tensorboard
        self.create_summary(args)

Пример #13

Показать файл

Файл: model.py Проект: pamasse/instacorrect

def cnnlstm(features, labels, mode, params):
    """
    Model to be used in the tf.estimator. Basically the machine learning model.
    Simple RNN model that:
        - Takes a sentence represented like ['This', 'is', 'a', 'sentence']
          where each character in a word is represented by a integer and each
          word in a batch has the same length (zero padded)
        - One word at a time, each word is embedded using a CNN and a Highway
          network. (TODO: add the highway network)
        - This embedding is given to a RNN
        - The last state is given to another RNN (+ Attention over the previous
          hidden state) that predicts the next word.

    Args:
        - features: a dict:
            - sequence: a tensor of shape [batch_size, max_sentence_length,
                                           max_word_size]
            filled with the character ids, and padded with 0
            - sequence_length: a tensor of shape [batch_size] with the original
              length of the sequences.
            - max_word_size: tha maximum length of each word in the batch
        - labels: a dict:
            - sequence: a tensor of shape [batch_size, max_sentence_length]
            filled with the words ids of each sentence and padded with 0.
            - sequence_length: a tensor of shape [batch_size] with the original
             length of the sequences.
        - mode: the mode of the model (given by the estimator)
        - params: a dict with the following keys:
            - vocab_size: the size of the character vocabulary used
            - embedding_size: the size of the embeddings
            - dropout: 1 - dropout probability (the keep probability)
    """
    with tf.variable_scope('ModelParams'):
        batch_size = tf.shape(features['sequence'])[0]
        timesteps = tf.shape(features['sequence'])[1]
        maxwordlength = tf.shape(features['sequence'])[2]
        c_embed_s = params['char_embedding_size']
        dropout = params['dropout']
        hidden_size = params['hidden_size']
        network_depth = params['network_depth']
        kernels = params['kernels']
        kernel_features = params['kernel_features']

    with tf.variable_scope('Convolution'):
        ###########
        # ENCODER #
        ###########
        # Characters embeddings matrix. Basically each character id (int)
        # is associated a vector [char_embedding_size]
        embeddings_c = tf.Variable(tf.random_uniform([params['char_vocab_size'],
                                   c_embed_s], -1.0, 1.0))
        # Embed every char id into their embedding. Will go from this dimension
        # [batch_size, max_sequence_length, max_word_size] to this dimension
        # [batch_size, max_sequence_length, max_word_size, char_embedding_size]
        embedded_chars = tf.nn.embedding_lookup(embeddings_c, features['sequence'])
        # Change the dimension and bring every word as an example.

        # Reshape the inputs to have words as second and third dimension
        # from [batch, timesteps, wordlength, embedsize] to
        # [batch*timesteps, wordlength, embedisze]
        cnn_inputs = tf.reshape(embedded_chars, [batch_size*timesteps,
                                                 maxwordlength,
                                                 c_embed_s])
        # Expand the second dimension for convolution purposes
        cnn_inputs = tf.expand_dims(cnn_inputs, 1)
        # Layer to hold all of the convolution results
        layers = []
        # For each kernel, tuple of [kernel size, num filters]
        for kernel_size, kernel_feature_size in zip(kernels, kernel_features):
            # Apply the convolution on all of the inputs for this kernal
            conv = conv2d(cnn_inputs, kernel_feature_size, 1, kernel_size, name="kernel_%d" % kernel_size)
            pool = tf.reduce_max(tf.tanh(conv), 2, keep_dims=True)
            layers.append(tf.squeeze(pool, [1, 2]))
        cnn_output = tf.concat(layers, 1)
        rnn_inputs = tf.reshape(cnn_output, [batch_size, timesteps, sum(kernel_features)])

    with tf.variable_scope('RNN_Encoder'):
        # Create the actual encoder. Which applies a convolution on the char input
        # to have an embedding for each word. This embedding is then fed to the
        # classical LSMT RNN.
        # TODO: apply dropout
        cell_list = [create_cell(mode, dropout, hidden_size) for _ in range(network_depth)]
        cell = tf.contrib.rnn.MultiRNNCell(cell_list)
        # Loop over the inputs and apply the previously created cell at every
        # timestep. Returns the output at every step and last hidden state.
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(cell=cell, dtype=tf.float32,
                                                 inputs=rnn_inputs,
                                                 sequence_length=features['sequence_length'])

    with tf.variable_scope('Decoder'):
        ###########
        # DECODER #
        ###########
        # Words embeddings matrix. Basically every word id (int) in the vocab
        # is associated a vector [char_embedding_size]
        embeddings_w = tf.Variable(tf.random_uniform([params['word_vocab_size'],
                                   params['word_embedding_size']], -1.0, 1.0))
        # Decoder cell. Basic LSTM cell that will do the decoding.
        cell_list_dec = [create_cell(mode, dropout, hidden_size) for _ in range(network_depth)]
        decoder_cell = cell = tf.contrib.rnn.MultiRNNCell(cell_list_dec)
        # Attention mechanism
        attention_mechanism = LuongAttention(num_units=hidden_size,
                                    memory=encoder_outputs,
                                    memory_sequence_length=features['sequence_length'])
        # Attention Wrapper
        attn_cell = AttentionWrapper(decoder_cell, attention_mechanism)
        initial_decoder_state = attn_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)
        # Projection layer. Layer that takes the output of the decoder cell
        # and projects it on the word vocab dimension.
        projection_layer = Dense(params['word_vocab_size'], use_bias=False)
        # If not at infering mode, use the decoder_inputs
        # output at each time step.
        if mode != tf.estimator.ModeKeys.PREDICT:
            # Decoder outputs, i.e., what we are trying to predict.
            decoder_o = tf.cast(labels['sequence_output'], tf.int32)
            # Embed the decoder input
            decoder_i = tf.nn.embedding_lookup(embeddings_w, labels['sequence_input'])
            # Helper method. Basically a function that "helps" the decoder
            # at each time step by giving it the true input, whatever it computed
            # earlier.
            output_sequence_length = tf.cast(labels['sequence_length'], tf.int32)
            helper = TrainingHelper(decoder_i, output_sequence_length)
        else:
            # Helper method. At inference time it is different, we do not have the
            # true inputs, so this function will take the previously generated output
            # and embbed it with the decoder embeddings.
            start_token = tf.fill([batch_size], params['start_token'])
            end_token = tf.cast(params['end_token'], tf.int32)
            helper = GreedyEmbeddingHelper(embeddings_w, start_token, end_token)
        # The final decoder, with its cell, its intial state, its helper function,
        # and its projection layer.
        decoder = BasicDecoder(attn_cell, helper,initial_decoder_state, output_layer=projection_layer)
        # Use this decoder to perform a dynamic decode.
        # Dynamic Decoder: controls the flow of operations and mainly store the outputs
        # and keeps decoding until the decoder is done.
        # Decoder: kind of the cell of the dynacmic decoder. It passes the inputs
        # to the RNN, samples the output of the RNN and computes the next input.
        # To sample and compute the next inputs, the decoder uses a Helper function.
        # During training it is a TrainingHelper and during inference it is GreedyEmbeddingHelper
        # In our case the sampling is simply taking the argmax of the output logit.
        # The main difference between the two helpers is on the way they "compute"
        # the next input. TrainingHelper will use the decoder inputs provided while
        # the GreedyEmbeddingHelper will use the sampled RNN output and give it to
        # an embedding function to give it at as the next input.
        # Outputs of the BasicDecoder is a BasicDecoderOutput which holds the logits
        # and the sample_ids.
        if mode != tf.estimator.ModeKeys.PREDICT:
            outputs, state, sequence_lengths = dynamic_decode(decoder)
        else:
            max_iterations = tf.cast(tf.reduce_max(features['sequence_length'])*2, tf.int32)
            outputs, state, sequence_lengths = dynamic_decode(decoder,
                                                    maximum_iterations=max_iterations)
    with tf.variable_scope('Prediction'):
        # Contains the
        logits = outputs.rnn_output # output of the projection layer
        sample_id = outputs.sample_id # argmax of the logits
        # If we are INFER time only
        if mode == tf.estimator.ModeKeys.PREDICT:
            # Return a dict with the sample word ids.
            predictions = {"sequence": sample_id}
            export_outputs = {
                'prediction': tf.estimator.export.PredictOutput(predictions)
            }
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
                                              export_outputs=export_outputs)
    with tf.variable_scope('Loss'):
        # We are not at INFER time. We compute the cross entropy.
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=decoder_o,
                                                                  logits=logits)
        # Here we create a mask to "erase" the loss where the sentences are finished
        target_w = tf.sequence_mask(labels['sequence_length'], dtype=logits.dtype)
        # We apply the mask and sum the loss accross all the dimensions and divide it
        # by the batch size to make it independent of the batch_size.
        batch_size_32 = tf.cast(batch_size, tf.float32)
        timesteps_32 = tf.cast(timesteps, tf.float32)
        loss = (tf.reduce_sum(crossent * target_w) / (batch_size_32+timesteps_32))
    with tf.variable_scope('Train'):
        # At train time only.
        if mode == tf.estimator.ModeKeys.TRAIN:
            # Initialize an optimize that has for goal to minimize the loss
            learning_rate = tf.train.exponential_decay(params['learning_rate'],
                                                       tf.train.get_global_step(),
                                                       params['decay_steps'],
                                                       0.96, staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            # Apply gradient clipping
            gradients, variables = zip(*optimizer.compute_gradients(loss))
            gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
            train_op = optimizer.apply_gradients(zip(gradients, variables),
                                                 global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
    with tf.variable_scope('Evaluate'):
        # Compute the accuracy of the model (the number of sequences that the model
        # got right)
        eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=decoder_o,
                                                           predictions=sample_id,
                                                           weights=target_w)}
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

Пример #14

Показать файл

    def build_model(self):
        print('building model... ...')
        #=================================1, 定义模型的placeholder
        self.encoder_inputs = tf.placeholder(tf.int32, [None, None],
                                             name='encoder_inputs')
        self.encoder_inputs_length = tf.placeholder(
            tf.int32, [None], name='encoder_inputs_length')

        self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        self.keep_prob_placeholder = tf.placeholder(
            tf.float32, name='keep_prob_placeholder')

        self.decoder_targets = tf.placeholder(tf.int32, [None, None],
                                              name='decoder_targets')
        self.decoder_targets_length = tf.placeholder(
            tf.int32, [None], name='decoder_targets_length')
        # 根据目标序列长度，选出其中最大值，然后使用该值构建序列长度的mask标志。用一个sequence_mask的例子来说明起作用
        #  tf.sequence_mask([1, 3, 2], 5)
        #  [[True, False, False, False, False],
        #  [True, True, True, False, False],
        #  [True, True, False, False, False]]
        self.max_target_sequence_length = tf.reduce_max(
            self.decoder_targets_length, name='max_target_len')
        self.mask = tf.sequence_mask(self.decoder_targets_length,
                                     self.max_target_sequence_length,
                                     dtype=tf.float32,
                                     name='masks')

        #=================================2, 定义模型的encoder部分
        with tf.variable_scope('encoder'):
            #创建LSTMCell，两层+dropout
            encoder_cell = self._create_rnn_cell()
            #构建embedding矩阵,encoder和decoder公用该词向量矩阵
            embedding = tf.get_variable('embedding',
                                        [self.vocab_size, self.embedding_size])
            encoder_inputs_embedded = tf.nn.embedding_lookup(
                embedding, self.encoder_inputs)
            # 使用dynamic_rnn构建LSTM模型，将输入编码成隐层向量。
            # encoder_outputs用于attention，batch_size*encoder_inputs_length*rnn_size,
            # encoder_state用于decoder的初始化状态，batch_size*rnn_szie
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_inputs_embedded,
                sequence_length=self.encoder_inputs_length,
                dtype=tf.float32)

        # =================================3, 定义模型的decoder部分
        with tf.variable_scope('decoder'):
            encoder_inputs_length = self.encoder_inputs_length
            if self.beam_search:
                # 如果使用beam_search，则需要将encoder的输出进行tile_batch，其实就是复制beam_size份。
                print("use beamsearch decoding..")
                encoder_outputs = tf.contrib.seq2seq.tile_batch(
                    encoder_outputs, multiplier=self.beam_size)
                encoder_state = nest.map_structure(
                    lambda s: tf.contrib.seq2seq.tile_batch(s, self.beam_size),
                    encoder_state)
                encoder_inputs_length = tf.contrib.seq2seq.tile_batch(
                    self.encoder_inputs_length, multiplier=self.beam_size)

            #定义要使用的attention机制。
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=self.rnn_size,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)
            #attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length)
            # 定义decoder阶段要是用的LSTMCell，然后为其封装attention wrapper
            decoder_cell = self._create_rnn_cell()
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                attention_layer_size=self.rnn_size,
                name='Attention_Wrapper')
            #如果使用beam_seach则batch_size = self.batch_size * self.beam_size。因为之前已经复制过一次
            batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size
            #定义decoder阶段的初始化状态，直接使用encoder阶段的最后一个隐层状态进行赋值
            decoder_initial_state = decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_state)
            #TODO here i DONT CHANGE anything i think
            # output_layer = tf.layers.Dense(self.vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
            output_layer = Dense(
                self.vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                                   stddev=0.1))

            if self.mode == 'train':
                # 定义decoder阶段的输入，其实就是在decoder的target开始处添加一个<go>,并删除结尾处的<end>,并进行embedding。
                # decoder_inputs_embedded的shape为[batch_size, decoder_targets_length, embedding_size]
                ending = tf.strided_slice(self.decoder_targets, [0, 0],
                                          [self.batch_size, -1], [1, 1])
                decoder_input = tf.concat([
                    tf.fill([self.batch_size, 1], self.word_to_idx['<go>']),
                    ending
                ], 1)
                decoder_inputs_embedded = tf.nn.embedding_lookup(
                    embedding, decoder_input)
                #训练阶段，使用TrainingHelper+BasicDecoder的组合，这一般是固定的，当然也可以自己定义Helper类，实现自己的功能
                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=decoder_inputs_embedded,
                    sequence_length=self.decoder_targets_length,
                    time_major=False,
                    name='training_helper')
                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=training_helper,
                    initial_state=decoder_initial_state,
                    output_layer=output_layer)
                #调用dynamic_decode进行解码，decoder_outputs是一个namedtuple，里面包含两项(rnn_outputs, sample_id)
                # rnn_output: [batch_size, decoder_targets_length, vocab_size]，保存decode每个时刻每个单词的概率，可以用来计算loss
                # sample_id: [batch_size], tf.int32，保存最终的编码结果。可以表示最后的答案
                decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    impute_finished=True,
                    maximum_iterations=self.max_target_sequence_length)
                # 根据输出计算loss和梯度，并定义进行更新的AdamOptimizer和train_op
                self.decoder_logits_train = tf.identity(
                    decoder_outputs.rnn_output)
                self.decoder_predict_train = tf.argmax(
                    self.decoder_logits_train,
                    axis=-1,
                    name='decoder_pred_train')
                # 使用sequence_loss计算loss，这里需要传入之前定义的mask标志
                self.loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.decoder_logits_train,
                    targets=self.decoder_targets,
                    weights=self.mask)

                # Training summary for the current batch_loss
                tf.summary.scalar('loss', self.loss)
                self.summary_op = tf.summary.merge_all()

                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                trainable_params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, trainable_params)
                clip_gradients, _ = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(clip_gradients, trainable_params))
            elif self.mode == 'decode':
                start_tokens = tf.ones([
                    self.batch_size,
                ], tf.int32) * self.word_to_idx['<go>']
                end_token = self.word_to_idx['<eos>']
                # decoder阶段根据是否使用beam_search决定不同的组合，
                # 如果使用则直接调用BeamSearchDecoder（里面已经实现了helper类）
                # 如果不使用则调用GreedyEmbeddingHelper+BasicDecoder的组合进行贪婪式解码
                if self.beam_search:
                    inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=decoder_cell,
                        embedding=embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=self.beam_size,
                        output_layer=output_layer)
                else:
                    decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        embedding=embedding,
                        start_tokens=start_tokens,
                        end_token=end_token)
                    inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell=decoder_cell,
                        helper=decoding_helper,
                        initial_state=decoder_initial_state,
                        output_layer=output_layer)
                decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=inference_decoder, maximum_iterations=10)
                # 调用dynamic_decode进行解码，decoder_outputs是一个namedtuple，
                # 对于不使用beam_search的时候，它里面包含两项(rnn_outputs, sample_id)
                # rnn_output: [batch_size, decoder_targets_length, vocab_size]
                # sample_id: [batch_size, decoder_targets_length], tf.int32

                # 对于使用beam_search的时候，它里面包含两项(predicted_ids, beam_search_decoder_output)
                # predicted_ids: [batch_size, decoder_targets_length, beam_size],保存输出结果
                # beam_search_decoder_output: BeamSearchDecoderOutput instance namedtuple(scores, predicted_ids, parent_ids)
                # 所以对应只需要返回predicted_ids或者sample_id即可翻译成最终的结果
                if self.beam_search:
                    self.decoder_predict_decode = decoder_outputs.predicted_ids
                else:
                    self.decoder_predict_decode = tf.expand_dims(
                        decoder_outputs.sample_id, -1)
        # =================================4, 保存模型
        self.saver = tf.train.Saver(tf.global_variables())

Пример #15

Показать файл

def decoding_layer(target_letter_to_int, decoding_embedding_size, num_layers,
                   rnn_size, target_sequence_length,
                   max_target_sequence_length, encoder_state, decoder_input):
    '''
    :param target_letter_to_int: target数据的映射表
    :param decoding_embedding_size: embed向量大小
    :param num_layers: 堆叠的RNN单元数量
    :param rnn_size: RNN单元的隐层结点数量
    :param target_sequence_length: target数据序列长度
    :param max_target_sequence_length: target数据序列最大长度
    :param encoder_state: encoder端编码的状态向量
    :param decoder_input: decoder端输入
    '''

    # 1. Embedding
    target_vocab_size = len(target_letter_to_int)
    decoder_embeddings = tf.Variable(
        tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings,
                                                 decoder_input)

    # 2. 构造Decoder中的RNN单元
    def get_decoder_cell(rnn_size):
        decoder_cell = tf.contrib.rnn.LSTMCell(
            rnn_size,
            initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
        return decoder_cell

    cell = tf.contrib.rnn.MultiRNNCell(
        [get_decoder_cell(rnn_size) for _ in range(num_layers)])

    # 3. Output全连接层
    output_layer = Dense(target_vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.0, stddev=0.1))

    # 4. Training decoder
    with tf.variable_scope("decode"):
        # 得到help对象
        training_helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=decoder_embed_input,
            sequence_length=target_sequence_length,
            time_major=False)
        # 构造decoder
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell, training_helper, encoder_state, output_layer)
        training_decoder_output, _ = tf.contrib.seq2seq.dynamic_decode(
            training_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length)

    # 5. Predicting decoder
    # 与training共享参数
    with tf.variable_scope("decode", reuse=True):
        # 创建一个常量tensor并复制为batch_size的大小
        start_tokens = tf.tile(tf.constant([target_letter_to_int['<GO>']],
                                           dtype=tf.int32), [batch_size],
                               name='start_tokens')
        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            decoder_embeddings, start_tokens, target_letter_to_int['<EOS>'])
        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(
            cell, predicting_helper, encoder_state, output_layer)
        predicting_decoder_output, _ = tf.contrib.seq2seq.dynamic_decode(
            predicting_decoder,
            impute_finished=True,
            maximum_iterations=max_target_sequence_length)

    return training_decoder_output, predicting_decoder_output

Пример #16

Показать файл

Файл: AttentionTransform.py Проект: Judonlee/Project-Python-Total

    def BuildNetwork(self, learningRate):
        #############################################################################
        # Input Data
        #############################################################################

        self.dataInput = tensorflow.placeholder(
            dtype=tensorflow.float32,
            shape=[None, None, self.featureShape],
            name='dataInput')
        self.dataLenInput = tensorflow.placeholder(dtype=tensorflow.int32,
                                                   shape=[None],
                                                   name='dataLenInput')

        self.labelInputSR = tensorflow.placeholder(dtype=tensorflow.int32,
                                                   shape=[None, None],
                                                   name='labelInput')
        self.labelLenInputSR = tensorflow.placeholder(dtype=tensorflow.int32,
                                                      shape=[None],
                                                      name='labelLenInput')

        self.labelInputDR = tensorflow.placeholder(dtype=tensorflow.float32,
                                                   shape=None,
                                                   name='labelInputDR')

        #############################################################################
        # Batch Parameters
        #############################################################################

        self.parameters['BatchSize'], self.parameters[
            'TimeStep'], _ = tensorflow.unstack(
                tensorflow.shape(input=self.dataInput, name='DataShape'))
        self.parameters['LabelStep'] = tensorflow.shape(
            input=self.labelInputSR, name='LabelShape')[1]

        ###################################################################################################
        # Encoder
        ###################################################################################################

        with tensorflow.variable_scope('Encoder'):
            self.parameters[
                'Encoder_Cell_Forward'] = tensorflow.nn.rnn_cell.MultiRNNCell(
                    cells=[
                        rnn.LSTMCell(num_units=self.hiddenNodules)
                        for _ in range(self.rnnLayers)
                    ],
                    state_is_tuple=True)
            self.parameters[
                'Encoder_Cell_Backward'] = tensorflow.nn.rnn_cell.MultiRNNCell(
                    cells=[
                        rnn.LSTMCell(num_units=self.hiddenNodules)
                        for _ in range(self.rnnLayers)
                    ],
                    state_is_tuple=True)

            self.parameters['Encoder_Output'], self.parameters['Encoder_FinalState'] = \
                tensorflow.nn.bidirectional_dynamic_rnn(
                    cell_fw=self.parameters['Encoder_Cell_Forward'], cell_bw=self.parameters['Encoder_Cell_Backward'],
                    inputs=self.dataInput, sequence_length=self.dataLenInput, dtype=tensorflow.float32)

        self.attentionList = self.firstAttention(
            dataInput=self.parameters['Encoder_Output'],
            scopeName=self.firstAttentionName,
            hiddenNoduleNumber=2 * self.hiddenNodules,
            attentionScope=self.firstAttentionScope,
            blstmFlag=True)
        self.parameters['Decoder_InitalState'] = []
        for index in range(self.rnnLayers):
            self.parameters[
                'Encoder_Cell_Layer%d' % index] = rnn.LSTMStateTuple(
                    c=self.attentionList['FinalResult'],
                    h=tensorflow.concat([
                        self.parameters['Encoder_FinalState'][index][0].h,
                        self.parameters['Encoder_FinalState'][index][1].h
                    ],
                                        axis=1))
            self.parameters['Decoder_InitalState'].append(
                self.parameters['Encoder_Cell_Layer%d' % index])
        self.parameters['Decoder_InitalState'] = tuple(
            self.parameters['Decoder_InitalState'])

        #############################################################################
        # Decoder Label Pretreatment
        #############################################################################

        self.parameters['DecoderEmbedding'] = tensorflow.Variable(
            initial_value=tensorflow.truncated_normal(
                shape=[VOCABULAR, self.hiddenNodules * 2],
                stddev=0.1,
                name='DecoderEmbedding'))

        self.parameters[
            'DecoderEmbeddingResult'] = tensorflow.nn.embedding_lookup(
                params=self.parameters['DecoderEmbedding'],
                ids=self.labelInputSR,
                name='DecoderEmbeddingResult')

        #############################################################################
        # Decoder
        #############################################################################

        self.parameters['Decoder_Helper'] = seq2seq.TrainingHelper(
            inputs=self.parameters['DecoderEmbeddingResult'],
            sequence_length=self.labelLenInputSR,
            name='Decoder_Helper')
        with tensorflow.variable_scope('Decoder'):
            self.parameters['Decoder_FC'] = Dense(VOCABULAR)

            self.parameters[
                'Decoder_Cell'] = tensorflow.nn.rnn_cell.MultiRNNCell(
                    cells=[
                        rnn.LSTMCell(num_units=self.hiddenNodules * 2)
                        for _ in range(self.rnnLayers)
                    ],
                    state_is_tuple=True)

            self.parameters['Decoder'] = seq2seq.BasicDecoder(
                cell=self.parameters['Decoder_Cell'],
                helper=self.parameters['Decoder_Helper'],
                initial_state=self.parameters['Decoder_InitalState'],
                output_layer=self.parameters['Decoder_FC'])

            self.parameters['Decoder_Logits'], self.parameters[
                'Decoder_FinalState'], self.parameters[
                    'Decoder_FinalSeq'] = seq2seq.dynamic_decode(
                        decoder=self.parameters['Decoder'])

        with tensorflow.name_scope('Loss'):
            self.parameters['TargetsReshape'] = tensorflow.reshape(
                tensor=self.labelInputSR, shape=[-1], name='TargetsReshape')
            self.parameters['Decoder_Reshape'] = tensorflow.reshape(
                self.parameters['Decoder_Logits'].rnn_output, [-1, VOCABULAR],
                name='Decoder_Reshape')
            self.parameters[
                'Cost'] = tensorflow.losses.sparse_softmax_cross_entropy(
                    labels=self.parameters['TargetsReshape'],
                    logits=self.parameters['Decoder_Reshape'])

            self.trainEncoderDecoder = tensorflow.train.AdamOptimizer(
                learning_rate=learningRate).minimize(self.parameters['Cost'])

        #############################################################################
        self.DBLSTM_Structure(learningRate=learningRate)

Пример #17

Показать файл

Файл: run_model.py Проект: paulegradie/QDetect

def model_fn(features, labels, mode, params):
    # particular to this project
    word2index = params['word2index']
    # index2word = params['index2word']

    GPUs = get_available_gpus()
    GPU = {
        'titan': GPUs[1],
        'sidekick': GPUs[0]}

    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab'])
    embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs'])
    forget_bias = get_forget_bias(params, mode)

    num_units = [2048, 2048]
    init = tf.initializers.truncated_normal(0.0, 0.01)

    with tf.device(GPU['titan']):
        encoder_cells = [tf.nn.rnn_cell.LSTMCell(num_units=num, forget_bias=forget_bias, initializer=init) for num in num_units]
        encoder_stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)

        enc_outputs, enc_final_state = tf.nn.dynamic_rnn(encoder_stacked_rnn_cell,
                                                         embedded_enc_input,
                                                         sequence_length=features['encoder_input_lengths'],
                                                         dtype=tf.float32)
    # Decoder model
    with tf.device(GPU['sidekick']):
        partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors)
        if mode == tf.estimator.ModeKeys.TRAIN:
            embed_dec_inputs = tf.nn.embedding_lookup(emb_vectors, features['decoder_inputs'])
            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=embed_dec_inputs,
                sequence_length=features['decoder_input_lengths'],
            )
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=partial_embedding_helper,
                start_tokens=tf.tile([word2index['<GO>']],
                                     [tf.shape(features['encoder_inputs'])[0]]),
                end_token=word2index['<EOS>'])

        dec_cell = tf.nn.rnn_cell.LSTMCell(num_units=num_units[-1],  # needs to match size of last layer of encoder
                                           forget_bias=forget_bias,
                                           initializer=init)

        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=dec_cell,
            helper=helper,
            initial_state=enc_final_state[-1],
            output_layer=Dense(params['vocab_size'], use_bias=False))
        dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=params['output_max_length'])
    logits = tf.identity(dec_outputs.rnn_output, 'logits')

    if mode == tf.estimator.ModeKeys.PREDICT:
        indices = predict_words(logits)
        predictions = {'sentence_tokens': indices}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    training_labels = labels['target_sequences']
    weights = tf.cast(tf.cast(tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32)
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights)

    tf.summary.scalar('sequence_loss', sequence_loss)

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {'loss': sequence_loss}
        return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(sequence_loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)

Пример #18

Показать файл

    def __init__(self, tfFLAGS, embed=None):
        self.vocab_size = tfFLAGS.vocab_size
        self.embed_size = tfFLAGS.embed_size
        self.num_units = tfFLAGS.num_units
        self.num_layers = tfFLAGS.num_layers
        self.beam_width = tfFLAGS.beam_width
        self.use_lstm = tfFLAGS.use_lstm
        self.attn_mode = tfFLAGS.attn_mode
        self.train_keep_prob = tfFLAGS.keep_prob
        self.max_decode_len = tfFLAGS.max_decode_len
        self.bi_encode = tfFLAGS.bi_encode
        self.recog_hidden_units = tfFLAGS.recog_hidden_units
        self.prior_hidden_units = tfFLAGS.prior_hidden_units
        self.z_dim = tfFLAGS.z_dim
        self.full_kl_step = tfFLAGS.full_kl_step

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.max_gradient_norm = 5.0
        if tfFLAGS.opt == 'SGD':
            self.learning_rate = tf.Variable(float(tfFLAGS.learning_rate),
                                             trainable=False,
                                             dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * tfFLAGS.learning_rate_decay_factor)
            self.opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        elif tfFLAGS.opt == 'Momentum':
            self.opt = tf.train.MomentumOptimizer(
                learning_rate=tfFLAGS.learning_rate, momentum=tfFLAGS.momentum)
        else:
            self.learning_rate = tfFLAGS.learning_rate
            self.opt = tf.train.AdamOptimizer()

        self._make_input(embed)

        with tf.variable_scope("output_layer"):
            self.output_layer = Dense(
                self.vocab_size,
                kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))

        with tf.variable_scope("encoders",
                               initializer=tf.orthogonal_initializer()):
            self.enc_post_outputs, self.enc_post_state = self._build_encoder(
                scope='post_encoder',
                inputs=self.enc_post,
                sequence_length=self.post_len)
            self.enc_ref_outputs, self.enc_ref_state = self._build_encoder(
                scope='ref_encoder',
                inputs=self.enc_ref,
                sequence_length=self.ref_len)
            self.enc_response_outputs, self.enc_response_state = self._build_encoder(
                scope='resp_encoder',
                inputs=self.enc_response,
                sequence_length=self.response_len)

            self.post_state = self._get_representation_from_enc_state(
                self.enc_post_state)
            self.ref_state = self._get_representation_from_enc_state(
                self.enc_ref_state)
            self.response_state = self._get_representation_from_enc_state(
                self.enc_response_state)
            self.cond_embed = tf.concat([self.post_state, self.ref_state],
                                        axis=-1)

        with tf.variable_scope("RecognitionNetwork"):
            recog_input = tf.concat([self.cond_embed, self.response_state],
                                    axis=-1)
            recog_hidden = tf.layers.dense(inputs=recog_input,
                                           units=self.recog_hidden_units,
                                           activation=tf.nn.tanh)
            recog_mulogvar = tf.layers.dense(inputs=recog_hidden,
                                             units=self.z_dim * 2,
                                             activation=None)
            # recog_mulogvar = tf.layers.dense(inputs=recog_input, units=self.z_dim * 2, activation=None)
            recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=-1)

        with tf.variable_scope("PriorNetwork"):
            prior_input = self.cond_embed
            prior_hidden = tf.layers.dense(inputs=prior_input,
                                           units=self.prior_hidden_units,
                                           activation=tf.nn.tanh)
            prior_mulogvar = tf.layers.dense(inputs=prior_hidden,
                                             units=self.z_dim * 2,
                                             activation=None)
            prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=-1)

        with tf.variable_scope("GenerationNetwork"):
            latent_sample = tf.cond(
                self.use_prior,
                lambda: sample_gaussian(prior_mu, prior_logvar),
                lambda: sample_gaussian(recog_mu, recog_logvar),
                name='latent_sample')

            gen_input = tf.concat([self.cond_embed, latent_sample], axis=-1)
            if self.use_lstm:
                self.dec_init_state = tuple([
                    tf.contrib.rnn.LSTMStateTuple(
                        c=tf.layers.dense(inputs=gen_input,
                                          units=self.num_units,
                                          activation=None),
                        h=tf.layers.dense(inputs=gen_input,
                                          units=self.num_units,
                                          activation=None))
                    for _ in range(self.num_layers)
                ])
                print self.dec_init_state
            else:
                self.dec_init_state = tuple([
                    tf.layers.dense(inputs=gen_input,
                                    units=self.num_units,
                                    activation=None)
                    for _ in range(self.num_layers)
                ])

            kld = gaussian_kld(recog_mu, recog_logvar, prior_mu, prior_logvar)
            self.avg_kld = tf.reduce_mean(kld)
            self.kl_weights = tf.minimum(
                tf.to_float(self.global_step) / self.full_kl_step, 1.0)
            self.kl_loss = self.kl_weights * self.avg_kld

        self._build_decoder()
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=1,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
        for var in tf.trainable_variables():
            print var

Пример #19

Показать файл

Файл: model_attention.py Проект: AbhinavDS/BoostingSpeech

def build_decoder(encoder_outputs, encoder_state, input_sequence_length,
                  char_ids, batch_size, num_classes, num_decoder_layers,
                  maximum_iterations):

    vocab_size = num_classes
    out_layer = Dense(vocab_size, name='output_projection')

    # Decoder.
    with tf.variable_scope("decoder") as decoder_scope:

        cell, decoder_initial_state = build_decoder_cell(
            encoder_outputs, encoder_state, input_sequence_length,
            num_decoder_layers, batch_size)

        # Train
        # if mode != 'INFER':
        # char_ids = tf.placeholder(tf.int32,
        #                                shape=[None, None],
        #                                name='ids_target')
        embedding = tf.get_variable(
            'embedding',
            shape=[vocab_size, 300],  # embeddings dimension I have given 2
            dtype=tf.float32)

        char_embedding_lookup = tf.nn.embedding_lookup(embedding,
                                                       char_ids,
                                                       name='char_embedding')
        char_embedding = tf.nn.dropout(char_embedding_lookup,
                                       0.986,
                                       name='char_embedding_dropout')

        helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
            inputs=char_embedding,
            sequence_length=input_sequence_length,
            embedding=embedding,
            sampling_probability=0.5,
            time_major=False)

        # Decoder
        my_decoder = tf.contrib.seq2seq.BasicDecoder(cell,
                                                     helper,
                                                     decoder_initial_state,
                                                     output_layer=out_layer)

        # Dynamic decoding
        outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
            my_decoder,
            output_time_major=False,
            maximum_iterations=maximum_iterations,
            swap_memory=False,
            impute_finished=True,
            scope=decoder_scope)

        sample_id = outputs.sample_id
        logits = outputs.rnn_output

        # Inference
        # else:
        #     start_tokens = tf.fill([batch_size], sos_id_2)
        #     end_token = eos_id_2

        #     # Beam search
        #     if beam_width > 0:
        #         my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
        #             cell=cell,
        #             embedding=embedding,
        #             start_tokens=start_tokens,
        #             end_token=end_token,
        #             initial_state=decoder_initial_state,
        #             beam_width=beam_width,
        #             output_layer=output_layer,
        #         )

        #     # Greedy
        #     else:
        #         helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding,
        #                                                           start_tokens,
        #                                                           end_token)

        #         my_decoder = tf.contrib.seq2seq.BasicDecoder(cell,
        #                                                      helper,
        #                                                      decoder_initial_state,
        #                                                      output_layer=output_layer)
        #     if inference_targets:
        #         maximum_iterations = maximum_iterations
        #     else:
        #         maximum_iterations = None

        #     # Dynamic decoding
        #     outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
        #         my_decoder,
        #         maximum_iterations=maximum_iterations,
        #         output_time_major=False,
        #         impute_finished=False,
        #         swap_memory=False,
        #         scope=decoder_scope)

        #     if beam_width > 0:
        #         logits = tf.no_op()
        #         sample_id = outputs.predicted_ids
        #     else:
        #         logits = tf.no_op()
        #         sample_id = outputs.sample_id

    return logits, sample_id, final_context_state

Пример #20

Показать файл

Файл: seq2seq.py Проект: zhf459/pycadl

def _create_decoder(cells,
                    batch_size,
                    encoder_outputs,
                    encoder_state,
                    encoder_lengths,
                    decoding_inputs,
                    decoding_lengths,
                    embed_matrix,
                    target_vocab_size,
                    scope,
                    max_sequence_size,
                    use_attention=True):
    """Summary

    Parameters
    ----------
    cells : TYPE
        Description
    batch_size : TYPE
        Description
    encoder_outputs : TYPE
        Description
    encoder_state : TYPE
        Description
    encoder_lengths : TYPE
        Description
    decoding_inputs : TYPE
        Description
    decoding_lengths : TYPE
        Description
    embed_matrix : TYPE
        Description
    target_vocab_size : TYPE
        Description
    scope : TYPE
        Description
    max_sequence_size : TYPE
        Description
    use_attention : bool, optional
        Description

    Returns
    -------
    TYPE
        Description
    """
    from tensorflow.python.layers.core import Dense

    # Output projection
    output_layer = Dense(target_vocab_size, name='output_projection')

    # Setup Attention
    if use_attention:
        attn_mech = tf.contrib.seq2seq.LuongAttention(cells.output_size,
                                                      encoder_outputs,
                                                      encoder_lengths,
                                                      scale=True)
        cells = tf.contrib.seq2seq.AttentionWrapper(
            cell=cells,
            attention_mechanism=attn_mech,
            attention_layer_size=cells.output_size,
            alignment_history=False)
        initial_state = cells.zero_state(dtype=tf.float32,
                                         batch_size=batch_size)
        initial_state = initial_state.clone(cell_state=encoder_state)

    # Setup training a build decoder
    helper = tf.contrib.seq2seq.TrainingHelper(
        inputs=decoding_inputs,
        sequence_length=decoding_lengths,
        time_major=False)
    train_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=cells,
        helper=helper,
        initial_state=initial_state,
        output_layer=output_layer)
    train_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        train_decoder,
        output_time_major=False,
        impute_finished=True,
        maximum_iterations=max_sequence_size)
    train_logits = tf.identity(train_outputs.rnn_output, name='train_logits')

    # Setup inference and build decoder
    scope.reuse_variables()
    start_tokens = tf.tile(tf.constant([GO_ID], dtype=tf.int32), [batch_size])
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        embedding=embed_matrix, start_tokens=start_tokens, end_token=EOS_ID)
    infer_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=cells,
        helper=helper,
        initial_state=initial_state,
        output_layer=output_layer)
    infer_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        infer_decoder,
        output_time_major=False,
        impute_finished=True,
        maximum_iterations=max_sequence_size)
    infer_logits = tf.identity(infer_outputs.sample_id, name='infer_logits')

    return train_logits, infer_logits

Пример #21

Показать файл

    def Decoder(self, encoder_output, encoder_final_state):
        with tf.variable_scope('embedding', reuse=tf.AUTO_REUSE):
            emb_w = tf.get_variable("embedding",
                                    shape=[self.voc_size, self.dim_hidden])

        if self.mode == 'test' and self.beam == True:
            print("use beamsearch decoding..")
            encoder_output = tf.contrib.seq2seq.tile_batch(
                encoder_output, multiplier=self.beam_size)
            encoder_final_state = tf.contrib.seq2seq.tile_batch(
                encoder_final_state, multiplier=self.beam_size)

        attention_output = tf.contrib.seq2seq.LuongAttention(
            self.dim_hidden, encoder_output)
        decoder_cell = tf.nn.rnn_cell.MultiRNNCell([
            self.get_a_cell(self.dim_hidden)
            for _ in range(self.lstm_num_layer)
        ])
        decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
            decoder_cell,
            attention_output,
            attention_layer_size=self.dim_hidden)
        projection_layer = Dense(self.voc_size, use_bias=False)

        if self.mode == 'train':
            decoder_input = tf.nn.embedding_lookup(emb_w, self.ys[:, :-1])
            decoder_seq_length = [self.input_timestep] * self.batch_size
            decoder_init_state = decoder_cell.zero_state(
                self.batch_size,
                self.dtype).clone(cell_state=encoder_final_state)
            helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                decoder_input,
                decoder_seq_length,
                emb_w,
                0.2,
                time_major=False)
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_cell,
                helper,
                decoder_init_state,
                output_layer=projection_layer)

            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=training_decoder, maximum_iterations=self.max_len)
        elif self.mode == 'test':
            start_tokens = tf.ones([self.batch_size], tf.int32)
            end_token = 2
            if self.beam == True:
                decoder_init_state = decoder_cell.zero_state(
                    self.batch_size * self.beam_size,
                    self.dtype).clone(cell_state=encoder_final_state)
                inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=emb_w,
                    start_tokens=start_tokens,
                    end_token=end_token,
                    initial_state=decoder_init_state,
                    beam_width=self.beam_size,
                    output_layer=projection_layer)
            else:

                decoder_init_state = decoder_cell.zero_state(
                    self.batch_size,
                    self.dtype).clone(cell_state=encoder_final_state)
                decoding_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=emb_w,
                    start_tokens=start_tokens,
                    end_token=end_token)
                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=decoder_cell,
                    helper=decoding_helper,
                    initial_state=decoder_init_state,
                    output_layer=projection_layer)

            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder=inference_decoder, maximum_iterations=self.max_len)

        return outputs

Пример #22

Показать файл

def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
                   target_dict_dim, is_generating, beam_size,
                   max_generation_length):
    src_word_idx = tf.placeholder(tf.int32, shape=[None, None])
    src_sequence_length = tf.placeholder(tf.int32, shape=[
        None,
    ])

    src_embedding_weights = tf.get_variable("source_word_embeddings",
                                            [source_dict_dim, embedding_dim])
    src_embedding = tf.nn.embedding_lookup(src_embedding_weights, src_word_idx)

    src_forward_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_size)
    src_reversed_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_size)
    # no peephole
    encoder_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=src_forward_cell,
        cell_bw=src_reversed_cell,
        inputs=src_embedding,
        sequence_length=src_sequence_length,
        dtype=tf.float32)

    # concat the forward outputs and backward outputs
    encoded_vec = tf.concat(encoder_outputs, axis=2)

    # project the encoder outputs to size of decoder lstm
    encoded_proj = tf.contrib.layers.fully_connected(inputs=tf.reshape(
        encoded_vec, shape=[-1, embedding_dim * 2]),
                                                     num_outputs=decoder_size,
                                                     activation_fn=None,
                                                     biases_initializer=None)
    encoded_proj_reshape = tf.reshape(
        encoded_proj, shape=[-1, tf.shape(encoded_vec)[1], decoder_size])

    # get init state for decoder lstm's H
    backword_first = tf.slice(encoder_outputs[1], [0, 0, 0], [-1, 1, -1])
    decoder_boot = tf.contrib.layers.fully_connected(inputs=tf.reshape(
        backword_first, shape=[-1, embedding_dim]),
                                                     num_outputs=decoder_size,
                                                     activation_fn=tf.nn.tanh,
                                                     biases_initializer=None)

    # prepare the initial state for decoder lstm
    cell_init = tf.zeros(tf.shape(decoder_boot), tf.float32)
    initial_state = LSTMStateTuple(cell_init, decoder_boot)

    # create decoder lstm cell
    decoder_cell = LSTMCellWithSimpleAttention(
        decoder_size,
        encoded_vec if not is_generating else seq2seq.tile_batch(
            encoded_vec, beam_size),
        encoded_proj_reshape if not is_generating else seq2seq.tile_batch(
            encoded_proj_reshape, beam_size),
        src_sequence_length if not is_generating else seq2seq.tile_batch(
            src_sequence_length, beam_size),
        forget_bias=0.0)

    output_layer = Dense(target_dict_dim, name='output_projection')

    if not is_generating:
        trg_word_idx = tf.placeholder(tf.int32, shape=[None, None])
        trg_sequence_length = tf.placeholder(tf.int32, shape=[
            None,
        ])
        trg_embedding_weights = tf.get_variable(
            "target_word_embeddings", [target_dict_dim, embedding_dim])
        trg_embedding = tf.nn.embedding_lookup(trg_embedding_weights,
                                               trg_word_idx)

        training_helper = seq2seq.TrainingHelper(
            inputs=trg_embedding,
            sequence_length=trg_sequence_length,
            time_major=False,
            name='training_helper')

        training_decoder = seq2seq.BasicDecoder(cell=decoder_cell,
                                                helper=training_helper,
                                                initial_state=initial_state,
                                                output_layer=output_layer)

        # get the max length of target sequence
        max_decoder_length = tf.reduce_max(trg_sequence_length)

        decoder_outputs_train, _, _ = seq2seq.dynamic_decode(
            decoder=training_decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=max_decoder_length)

        decoder_logits_train = tf.identity(decoder_outputs_train.rnn_output)
        decoder_pred_train = tf.argmax(decoder_logits_train,
                                       axis=-1,
                                       name='decoder_pred_train')
        masks = tf.sequence_mask(lengths=trg_sequence_length,
                                 maxlen=max_decoder_length,
                                 dtype=tf.float32,
                                 name='masks')

        # place holder of label sequence
        lbl_word_idx = tf.placeholder(tf.int32, shape=[None, None])

        # compute the loss
        loss = seq2seq.sequence_loss(logits=decoder_logits_train,
                                     targets=lbl_word_idx,
                                     weights=masks,
                                     average_across_timesteps=True,
                                     average_across_batch=True)

        # return feeding list and loss operator
        return {
            'src_word_idx': src_word_idx,
            'src_sequence_length': src_sequence_length,
            'trg_word_idx': trg_word_idx,
            'trg_sequence_length': trg_sequence_length,
            'lbl_word_idx': lbl_word_idx
        }, loss
    else:
        start_tokens = tf.ones([
            tf.shape(src_word_idx)[0],
        ], tf.int32) * START_TOKEN_IDX
        # share the same embedding weights with target word
        trg_embedding_weights = tf.get_variable(
            "target_word_embeddings", [target_dict_dim, embedding_dim])

        inference_decoder = beam_search_decoder.BeamSearchDecoder(
            cell=decoder_cell,
            embedding=lambda tokens: tf.nn.embedding_lookup(
                trg_embedding_weights, tokens),
            start_tokens=start_tokens,
            end_token=END_TOKEN_IDX,
            initial_state=tf.nn.rnn_cell.LSTMStateTuple(
                tf.contrib.seq2seq.tile_batch(initial_state[0], beam_size),
                tf.contrib.seq2seq.tile_batch(initial_state[1], beam_size)),
            beam_width=beam_size,
            output_layer=output_layer)

        decoder_outputs_decode, _, _ = seq2seq.dynamic_decode(
            decoder=inference_decoder,
            output_time_major=False,
            #impute_finished=True,# error occurs
            maximum_iterations=max_generation_length)

        predicted_ids = decoder_outputs_decode.predicted_ids

        return {
            'src_word_idx': src_word_idx,
            'src_sequence_length': src_sequence_length
        }, predicted_ids

Пример #23

Показать файл

Файл: SpeechRecognizer.py Проект: gargimahale/Speech-Recognition-Using-Tensorflow

    def build_decoder(self, encoder_outputs, encoder_state):

        sos_id_2 = tf.cast(self.char2ind[self.sos], tf.int32)
        eos_id_2 = tf.cast(self.char2ind[self.eos], tf.int32)
        self.output_layer = Dense(self.vocab_size, name='output_projection')

        # Decoder.
        with tf.variable_scope("decoder") as decoder_scope:

            cell, decoder_initial_state = self.build_decoder_cell(
                encoder_outputs, encoder_state, self.audio_sequence_lengths)

            # Train
            if self.mode != 'INFER':

                helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
                    inputs=self.char_embedding,
                    sequence_length=self.char_sequence_lengths,
                    embedding=self.embedding,
                    sampling_probability=0.5,
                    time_major=False)

                # Decoder
                my_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell,
                    helper,
                    decoder_initial_state,
                    output_layer=self.output_layer)

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    output_time_major=False,
                    maximum_iterations=self.maximum_iterations,
                    swap_memory=False,
                    impute_finished=True,
                    scope=decoder_scope)

                sample_id = outputs.sample_id
                logits = outputs.rnn_output

            # Inference
            else:
                start_tokens = tf.fill([self.batch_size], sos_id_2)
                end_token = eos_id_2

                # Beam search
                if self.beam_width > 0:
                    my_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=self.embedding,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.output_layer,
                    )

                # Greedy
                else:
                    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                        self.embedding, start_tokens, end_token)

                    my_decoder = tf.contrib.seq2seq.BasicDecoder(
                        cell,
                        helper,
                        decoder_initial_state,
                        output_layer=self.output_layer)
                if self.inference_targets:
                    maximum_iterations = self.maximum_iterations
                else:
                    maximum_iterations = None

                # Dynamic decoding
                outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    my_decoder,
                    maximum_iterations=maximum_iterations,
                    output_time_major=False,
                    impute_finished=False,
                    swap_memory=False,
                    scope=decoder_scope)

                if self.beam_width > 0:
                    logits = tf.no_op()
                    sample_id = outputs.predicted_ids
                else:
                    logits = tf.no_op()
                    sample_id = outputs.sample_id

        return logits, sample_id, final_context_state

Пример #24

Показать файл

Файл: seq2seq_model_no_beam.py Проект: HenryL-study/GAN-for-Question-Rewrite

    def __init__(self,
                 num_emb,
                 batch_size,
                 emb_dim,
                 encoder_num_units,
                 emb_data,
                 ques_length,
                 ans_length,
                 start_token,
                 gen_filter_sizes,
                 gen_num_filters,
                 learning_rate=0.01,
                 reward_gamma=0.95):
        self.num_emb = num_emb
        self.batch_size = batch_size
        self.emb_dim = emb_dim
        self.emb_data = emb_data
        self.encoder_num_units = encoder_num_units
        self.max_ques_length = ques_length
        self.max_ans_length = ans_length
        self.start_token = tf.constant([start_token] * self.batch_size,
                                       dtype=tf.int32)
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.reward_gamma = reward_gamma
        self.gen_filter_sizes = gen_filter_sizes
        self.gen_num_filters = gen_num_filters
        self.grad_clip = 5.0

        self.seq_start_token = None
        self.seq_end_token = None
        self.rnn_size = 50
        self.layer_size = 2
        self.beam_width = 10
        self.atten_depth = 50  #The depth of the query mechanism

        self.g_embeddings = tf.Variable(
            self.init_matrix([self.num_emb, self.emb_dim]))

        self.x = tf.placeholder(tf.int32,
                                shape=[
                                    self.batch_size, self.max_ques_length
                                ])  # sequence of tokens generated by generator
        self.response = tf.placeholder(
            tf.int32, shape=[self.batch_size, self.max_ans_length
                             ])  # get from rollout policy and discriminator
        self.target_sequence_length = tf.placeholder(
            tf.int32, [self.batch_size], name='target_sequence_length')
        self.target_response_length = tf.placeholder(
            tf.int32, [self.batch_size], name='target_response_length')
        self.max_response_length_per_batch = tf.placeholder(tf.int32, shape=())

        with tf.device("/cpu:0"):
            #self.processed_x = tf.transpose(tf.nn.embedding_lookup(self.g_embeddings, self.x), perm=[1, 0, 2])  # seq_length x batch_size x emb_dim
            self.processed_x = tf.nn.embedding_lookup(self.g_embeddings,
                                                      self.x)
            self.processed_response = tf.nn.embedding_lookup(
                self.g_embeddings, self.response)
            print("processed_x shape: ", self.processed_x.shape)
            print("processed_response shape: ", self.processed_response.shape)

        self.add_encoder_layer()
        self.getCnnEncoder(self.gen_filter_sizes, self.gen_num_filters)
        self.output_layer = Dense(
            self.num_emb,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))
        with tf.variable_scope('decode'):
            training_decoder_output = self.add_decoder_for_training()
        with tf.variable_scope('decode', reuse=True):
            predicting_decoder_output, final_context_state = self.add_decoder_for_inference(
            )

        #attention visualizion
        attention_images = (final_context_state.alignment_history.stack())
        print("attention_images shape: ", attention_images.shape)
        # Reshape to (batch, src_seq_len, tgt_seq_len,1)
        attention_images = tf.expand_dims(
            tf.transpose(attention_images, [1, 2, 0]), -1)
        # Scale to range [0, 255]
        attention_images *= 255
        self.infer_summary = tf.summary.image("attention_images",
                                              attention_images)

        # encoder_output, encoder_state = self.get_encoder_layer(self.processed_x, self.encode_rnn_size, self.encode_layer_size, self.target_sequence_length) #sourse seqlenth

        # training_decoder_output, predicting_decoder_output = self.decoding_layer(
        #     self.decode_layer_size,
        #     self.decode_rnn_size,
        #     self.target_response_length,
        #     self.max_ans_length,
        #     encoder_state,
        #     encoder_output,
        #     self.x)

        #######################################################################################################
        #  Training
        #######################################################################################################
        self.g_pretrain_predictions = training_decoder_output.rnn_output
        self.g_pretrain_sample = training_decoder_output.sample_id
        print("self.g_pretrain_predictions: ", self.g_pretrain_predictions)
        masks = tf.sequence_mask(self.target_sequence_length,
                                 self.max_response_length_per_batch,
                                 dtype=tf.float32,
                                 name='masks')
        self.pretrain_loss = tf.contrib.seq2seq.sequence_loss(
            self.g_pretrain_predictions,
            self.response[:, 0:self.max_response_length_per_batch], masks)
        # training updates
        pretrain_opt = self.g_optimizer(self.learning_rate)

        pre_gradients = pretrain_opt.compute_gradients(self.pretrain_loss)
        self.pretrain_grad_zip = [(tf.clip_by_value(grad, -5., 5.), var)
                                  for grad, var in pre_gradients
                                  if grad is not None]
        self.pretrain_updates = pretrain_opt.apply_gradients(
            self.pretrain_grad_zip)

        self.g_samples = predicting_decoder_output.predicted_ids

Пример #25

Показать файл

    def _init_decoder(self, forward_only):
        with tf.variable_scope("decoder") as scope:

            def output_fn(outputs):
                return tf.contrib.layers.linear(outputs,
                                                self.target_vocab_size,
                                                scope=scope)

            # attention_states: size [batch_size, max_time, num_units]
            #attention_states = tf.transpose(self.encoder_outputs, [1, 0, 2])
            self.batch_size = tf.shape(self.encoder_inputs)[0]

            self.attn_mech = tf.contrib.seq2seq.LuongAttention(
                num_units=self.dec_hidden_size,
                memory=self.encoder_outputs,
                memory_sequence_length=self.encoder_inputs_length,
                normalize=False,
                name='LuongAttention')

            self.dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
                cell=self.decoder_cell,
                attention_mechanism=self.attn_mech,
                attention_size=self.dec_hidden_size,
                # attention_history=False (in ver 1.2)
                name='Attention_Wrapper')

            self.initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
                cell_state=self.encoder_state,
                attention=_zero_state_tensors(self.dec_hidden_size,
                                              self.batch_size, tf.float32))

            self.output_layer = Dense(self.target_vocab_size + 2,
                                      name='output_projection')

            if forward_only:
                start_tokens = tf.tile(tf.constant([model_config.PAD_ID],
                                                   dtype=tf.int32),
                                       [self.batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=self.dec_embedding_matrix,
                    start_tokens=start_tokens,
                    end_token=model_config.EOS_ID)

                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.dec_cell,
                    helper=inference_helper,
                    initial_state=self.initial_state,
                    output_layer=self.output_layer)

                infer_dec_outputs, infer_dec_last_state = tf.contrib.seq2seq.dynamic_decode(
                    inference_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.target_vocab_size)

                # [batch_size x dec_sentence_length], tf.int32
                self.predictions = tf.identity(infer_dec_outputs.sample_id,
                                               name='predictions')
            else:
                # maxium unrollings in current batch = max(dec_sent_len) + 1(GO symbol)
                self.max_dec_len = tf.reduce_max(self.decoder_inputs_length +
                                                 1,
                                                 name='max_dec_len')

                self.training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length + 1,
                    time_major=False,
                    name='training_helper')

                self.training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.dec_cell,
                    helper=self.training_helper,
                    initial_state=self.initial_state,
                    output_layer=self.output_layer)

                self.decoder_outputs, self.decoder_state = tf.contrib.seq2seq.dynamic_decode(
                    self.training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.max_dec_len)

                # logits: [batch_size x max_dec_len x dec_vocab_size+2]
                self.logits = tf.identity(self.decoder_outputs.rnn_output,
                                          name='logits')

                # targets: [batch_size x max_dec_len x dec_vocab_size+2]
                self.targets = tf.slice(self.decoder_inputs, [0, 0],
                                        [-1, self.max_dec_len], 'targets')

                # masks: [batch_size x max_dec_len]
                # => ignore outputs after `dec_senquence_length+1` when calculating loss
                self.masks = tf.sequence_mask(self.decoder_inputs_length + 1,
                                              self.max_dec_len,
                                              dtype=tf.float32,
                                              name='masks')

                # internal: `tf.nn.sparse_softmax_cross_entropy_with_logits`
                self.loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.targets,
                    weights=self.masks,
                    name='batch_loss')

Пример #26

Показать файл

Файл: BiAttseq2seq.py Проект: wangcan04/chatbot

    def __init__(self,
                 vocab_size,
                 hidden_size,
                 dropout,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 learning_rate,
                 lr_decay_factor,
                 max_target_length,
                 max_source_length,
                 decoder_mode=False):
        '''
        vocab_size: number of vocab tokens
        buckets: buckets of max sequence lengths
        hidden_size: dimension of hidden layers
        num_layers: number of hidden layers
        max_gradient_norm: maximum gradient magnitude
        batch_size: number of training examples fed to network at once
        learning_rate: starting learning rate of network
        lr_decay_factor: amount by which to decay learning rate
        num_samples: number of samples for sampled softmax
        decoder_mode: Whether to build backpass nodes or not
        '''
        GO_ID = config.GO_ID
        EOS_ID = config.EOS_ID
        self.max_source_length = max_source_length
        self.max_target_length = max_target_length
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = learning_rate
        self.encoder_inputs = tf.placeholder(shape=(None, None),
                                             dtype=tf.int32,
                                             name='encoder_inputs')
        self.source_lengths = tf.placeholder(shape=(None, ),
                                             dtype=tf.int32,
                                             name='source_lengths')

        self.decoder_targets = tf.placeholder(shape=(None, None),
                                              dtype=tf.int32,
                                              name='decoder_targets')
        self.target_lengths = tf.placeholder(shape=(None, ),
                                             dtype=tf.int32,
                                             name="target_lengths")

        with tf.variable_scope('embeddings') as scope:
            embeddings = tf.Variable(tf.random_uniform(
                [vocab_size, hidden_size], -1.0, 1.0),
                                     dtype=tf.float32)
            encoder_inputs_embedded = tf.nn.embedding_lookup(
                embeddings, self.encoder_inputs)
            targets_embedding = tf.nn.embedding_lookup(embeddings,
                                                       self.decoder_targets)

        with tf.variable_scope('encoder') as scope:

            encoder_cell = rnn.LSTMCell(hidden_size)
            encoder_cell = rnn.DropoutWrapper(encoder_cell,
                                              input_keep_prob=dropout)
            encoder_cell = tf.nn.rnn_cell.MultiRNNCell(
                [encoder_cell for _ in range(num_layers)], state_is_tuple=True)

            encoder_outputs, encoder_state = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=encoder_cell,
                cell_bw=encoder_cell,
                sequence_length=self.source_lengths,
                inputs=encoder_inputs_embedded,
                dtype=tf.float32,
                time_major=False)  #BiLSTM encoder
            encoder_output = encoder_outputs[0]
            encoder_outputs = tf.concat(encoder_outputs, 2)

        with tf.variable_scope('decoder') as scope:
            decoder_cell = rnn.LSTMCell(hidden_size)
            decoder_cell = rnn.DropoutWrapper(decoder_cell,
                                              input_keep_prob=dropout)

            decoder_cell = tf.nn.rnn_cell.MultiRNNCell(
                [decoder_cell for _ in range(num_layers)], state_is_tuple=True)

            #TODO add attention
            #attention_mechanism= seq2seq.BahdanauAttention(num_units=hidden_size,memory=encoder_outputs)

            #decoder_cell = seq2seq.AttentionWrapper(cell=decoder_cell,
            #                                       attention_mechanism=)

            attn_mech = seq2seq.BahdanauAttention(
                num_units=hidden_size,  #depth of query mechanism
                memory=encoder_output,  #out of RNN hidden states
                memory_sequence_length=self.source_lengths,
                name='BahdanauAttentiion')
            attn_cell = seq2seq.AttentionWrapper(
                cell=decoder_cell,  #same as encoder
                attention_mechanism=attn_mech,
                attention_layer_size=hidden_size,  #depth of attention tensor
                name='attention_wrapper')  #attention layer

        if decoder_mode:
            beam_width = 1

            attn_zero = attn_cell.zero_state(batch_size=(batch_size *
                                                         beam_width),
                                             dtype=tf.float32)
            init_state = attn_zero.clone(cell_state=encoder_state)
            decoder = seq2seq.BeamSearchDecoder(
                cell=attn_cell,
                embedding=embeddings,
                start_tokens=tf.tile([GO_ID], [1]),
                end_token=EOS_ID,
                initial_state=init_state,
                beam_width=beam_width,
                output_layer=Dense(vocab_size))  #BeamSearch in Decoder
            final_outputs, final_state, final_sequence_lengths =\
                            seq2seq.dynamic_decode(decoder=decoder)
            self.logits = final_outputs.predicted_ids
        else:
            helper = seq2seq.TrainingHelper(
                inputs=targets_embedding, sequence_length=self.target_lengths)
            decoder = seq2seq.BasicDecoder(
                cell=attn_cell,
                helper=helper,
                #initial_state=attn_cell.zero_state(batch_size, tf.float32),
                initial_state=attn_cell.zero_state(
                    batch_size, tf.float32).clone(cell_state=encoder_state[0]),
                output_layer=Dense(vocab_size))
            final_outputs, final_state, final_sequence_lengths =\
                            seq2seq.dynamic_decode(decoder=decoder)

            self.logits = final_outputs.rnn_output

        if not decoder_mode:
            with tf.variable_scope("loss") as scope:
                #have to pad logits, dynamic decode produces results not consistent
                #in shape with targets
                pad_size = self.max_target_length - tf.reduce_max(
                    final_sequence_lengths)
                self.logits = tf.pad(self.logits,
                                     [[0, 0], [0, pad_size], [0, 0]])

                weights = tf.sequence_mask(lengths=final_sequence_lengths,
                                           maxlen=self.max_target_length,
                                           dtype=tf.float32,
                                           name='weights')

                x_entropy_loss = seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.decoder_targets,
                    weights=weights)  #cross-entropy loss function

                self.loss = tf.reduce_mean(x_entropy_loss)

            optimizer = tf.train.AdamOptimizer()  #Adam optimization algorithm
            gradients = optimizer.compute_gradients(x_entropy_loss)
            capped_grads = [(tf.clip_by_value(grad, -max_gradient_norm,
                                              max_gradient_norm), var)
                            for grad, var in gradients]
            self.train_op = optimizer.apply_gradients(
                capped_grads, global_step=self.global_step)
            self.saver = tf.train.Saver(tf.global_variables())

Пример #27

Показать файл

Файл: RNN-TF-dynamic-decode.py Проект: rheehot/hccho2FirstGitProject

def dynamic_decode_test():

    vocab_size = 6
    SOS_token = 0
    EOS_token = 5
    
    x_data = np.array([[SOS_token, 3, 1, 4, 3, 2],[SOS_token, 3, 4, 2, 3, 1],[SOS_token, 1, 3, 2, 2, 1]], dtype=np.int32)
    y_data = np.array([[3, 1, 4, 3, 2,EOS_token],[3, 4, 2, 3, 1,EOS_token],[1, 3, 2, 2, 1,EOS_token]],dtype=np.int32)
    print("data shape: ", x_data.shape)
    sess = tf.InteractiveSession()
    
    output_dim = vocab_size
    batch_size = len(x_data)
    hidden_dim =7
    num_layers = 2
    seq_length = x_data.shape[1]
    embedding_dim = 8
    state_tuple_mode = True
    init_state_flag = 0
    init = np.arange(vocab_size*embedding_dim).reshape(vocab_size,-1)
    
    train_mode = False
    with tf.variable_scope('test',reuse=tf.AUTO_REUSE) as scope:
        # Make rnn
        
        method = 1
        if method == 0:
            cells = []
            for _ in range(num_layers):
                cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_dim)
                #cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_dim,state_is_tuple=state_tuple_mode)
                #cell = tf.contrib.rnn.GRUCell(num_units=hidden_dim)  # init_state_flag==0 으로 해야 됨.
                cells.append(cell)
            cell = tf.contrib.rnn.MultiRNNCell(cells)    
        else:
            #cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_dim)
            cell = tf.contrib.rnn.LSTMCell(num_units=hidden_dim,num_proj=7)
    
        embedding = tf.get_variable("embedding", initializer=init.astype(np.float32),dtype = tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, x_data) # batch_size  x seq_length x embedding_dim
    
        Y = tf.convert_to_tensor(y_data)
    
    
        # tf.contrib.rnn.OutputProjectionWrapper  마지막에 FC layer를 하나 더 추가하는 효과. 아래에서 적용하는 Dense보다 앞에 적용된다. Dense가 있기 때문에 OutputProjectionWrapper 또는 Dense로 처리 가능함
        # FC layer를 multiple로 적용하려면 OutputProjectionWrapper을 사용해야 함.
        if False:
            cell = tf.contrib.rnn.OutputProjectionWrapper(cell,13,activation=tf.nn.relu)
            cell = tf.contrib.rnn.OutputProjectionWrapper(cell,17)
    
        if init_state_flag==0:
             initial_state = cell.zero_state(batch_size, tf.float32) #(batch_size x hidden_dim) x layer 개수 
        else:
            if state_tuple_mode:
                h0 = tf.random_normal([batch_size,hidden_dim]) #h0 = tf.cast(np.random.randn(batch_size,hidden_dim),tf.float32)
                # 첫번째 layer의 c=0, h=h0, 두번째 layer의 c=0, h=0, ....
                initial_state=(tf.contrib.rnn.LSTMStateTuple(tf.zeros_like(h0), h0),) + (tf.contrib.rnn.LSTMStateTuple(tf.zeros_like(h0), tf.zeros_like(h0)),)*(num_layers-1)
                
            else:
                h0 = tf.random_normal([batch_size,hidden_dim]) #h0 = tf.cast(np.random.randn(batch_size,hidden_dim),tf.float32)
                initial_state = (tf.concat((tf.zeros_like(h0),h0), axis=1),) + (tf.concat((tf.zeros_like(h0),tf.zeros_like(h0)), axis=1),) * (num_layers-1)
        if train_mode:
            helper = tf.contrib.seq2seq.TrainingHelper(inputs, np.array([seq_length]*batch_size,dtype=np.int32))
            #helper = tf.contrib.seq2seq.TrainingHelper(inputs, np.array([[2],[4],[6]]).reshape(-1))
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding, start_tokens=tf.tile([SOS_token], [batch_size]), end_token=EOS_token)
    
        output_layer = Dense(output_dim, name='output_projection')
        #output_layer = None
        
        
        decoder = tf.contrib.seq2seq.BasicDecoder(cell=cell,helper=helper,initial_state=initial_state,output_layer=output_layer)    
        # maximum_iterations를 설정하지 않으면, inference에서 EOS토큰을 만나지 못하면 무한 루프에 빠진다
        # last_state는 num_layers 만큼 나온다.
        outputs, last_state, last_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,output_time_major=False,impute_finished=True,maximum_iterations=10)
    
        weights = tf.ones(shape=[batch_size,seq_length])
        loss =   tf.contrib.seq2seq.sequence_loss(logits=outputs.rnn_output, targets=Y, weights=weights)
    
    
        sess.run(tf.global_variables_initializer())
        print("initial_state: ", sess.run(initial_state))
        print("\n\noutputs: ",outputs)
        o = sess.run(outputs.rnn_output)  #batch_size, seq_length, outputs
        o2 = sess.run(tf.argmax(outputs.rnn_output,axis=-1))
        print("\n",o,o2) #batch_size, seq_length, outputs
    
        print("\n\nlast_state: ",last_state)
        print(sess.run(last_state)) # batch_size, hidden_dim
    
        print("\n\nlast_sequence_lengths: ",last_sequence_lengths)
        print(sess.run(last_sequence_lengths)) #  [seq_length]*batch_size    
        if output_layer is not None:
            print("kernel(weight)",sess.run(output_layer.trainable_weights[0]))  # kernel(weight)
            print("bias",sess.run(output_layer.trainable_weights[1]))  # bias
    
        if train_mode:
            p = sess.run(tf.nn.softmax(outputs.rnn_output)).reshape(-1,output_dim)   #(18,5) = (batch_size x seq_length, vocab_size)
            print("loss: {:20.6f}".format(sess.run(loss)))
            print("manual cal. loss: {:0.6f} ".format(np.average(-np.log(p[np.arange(y_data.size),y_data.flatten()]))) )

Пример #28

Показать файл

Файл: model.py Проект: jjykim/tf-NMT

    def add_decoder(self):
        with tf.variable_scope('Decoder') as scope:
            with tf.device('/cpu:0'):
                self.dec_Wemb = tf.get_variable('embedding',
                                                initializer=tf.random_uniform([
                                                    self.dec_vocab_size + 2,
                                                    self.dec_emb_size
                                                ]),
                                                dtype=tf.float32)

            batch_size = tf.shape(self.enc_inputs)[0]

            dec_cell = self.cell(self.hidden_size)

            attn_mech = tf.contrib.seq2seq.LuongAttention(
                num_units=self.attn_size,
                memory=self.enc_outputs,
                memory_sequence_length=self.enc_sequence_length,
                name='LuongAttention')

            dec_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell=dec_cell,
                attention_mechanism=attn_mech,
                attention_layer_size=self.attn_size,
                name='Attention_Wrapper')

            initial_state = dec_cell.zero_state(
                dtype=tf.float32,
                batch_size=batch_size).clone(cell_state=self.enc_last_state)

            output_layer = Dense(self.dec_vocab_size + 2,
                                 name='output_projection')

            if self.mode == 'training':

                max_dec_len = tf.reduce_max(self.dec_sequence_length + 1,
                                            name='max_dec_len')

                dec_emb_inputs = tf.nn.embedding_lookup(self.dec_Wemb,
                                                        self.dec_inputs,
                                                        name='emb_inputs')

                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=dec_emb_inputs,
                    sequence_length=self.dec_sequence_length + 1,
                    time_major=False,
                    name='training_helper')

                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=dec_cell,
                    helper=training_helper,
                    initial_state=initial_state,
                    output_layer=output_layer)

                train_dec_outputs, train_dec_last_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=max_dec_len)

                logits = tf.identity(train_dec_outputs.rnn_output,
                                     name='logits')

                targets = tf.slice(self.dec_inputs, [0, 0], [-1, max_dec_len],
                                   'targets')

                masks = tf.sequence_mask(self.dec_sequence_length + 1,
                                         max_dec_len,
                                         dtype=tf.float32,
                                         name='masks')

                self.batch_loss = tf.contrib.seq2seq.sequence_loss(
                    logits=logits,
                    targets=targets,
                    weights=masks,
                    name='batch_loss')

                self.valid_predictions = tf.identity(
                    train_dec_outputs.sample_id, name='valid_preds')

            elif self.mode == 'inference':

                start_tokens = tf.tile(tf.constant([self.start_token],
                                                   dtype=tf.int32),
                                       [batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=self.dec_Wemb,
                    start_tokens=start_tokens,
                    end_token=self.end_token)

                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=dec_cell,
                    helper=inference_helper,
                    initial_state=initial_state,
                    output_layer=output_layer)

                infer_dec_outputs, infer_dec_last_state, _ = tf.contrib.seq2seq.dynamic_decode(
                    inference_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.dec_sentence_length)

                self.predictions = tf.identity(infer_dec_outputs.sample_id,
                                               name='predictions')

Пример #29

Показать файл

Файл: _model_functions.py Проект: thunlp/HLCP

def _match_model_fn_v6(features, labels, mode, params):
    '''
    this version uses origianl seq2seq, but uses a lstm merges the cause and word embedding_tabel

    and this version use the input embedding as the attention query
    '''
    # print('aaa')
    '''set parameters'''
    with tf.device('/gpu:0'), tf.variable_scope('model',
                                                reuse=tf.AUTO_REUSE) as scope:
        # set hyper parameters
        embedding_size = params['embedding_size']
        num_units = params['num_units']
        if mode == tf.estimator.ModeKeys.TRAIN:
            dropout_keep_prob = params['dropout_keep_prob']
        else:
            dropout_keep_prob = 1
        beam_width = params['beam_width']
        EOS = params['EOS']
        SOS = params['SOS']
        # set training parameters
        max_sequence_length = params['max_sequence_length']
        max_cause_length = params['max_cause_length']
        vocab_size = params['vocab_size']
        num_causes = EOS + 1
        '''process input and target'''
        # input layer
        input = tf.reshape(features['content'], [-1, max_sequence_length])
        batch_size = tf.shape(input)[0]
        input_length = tf.reshape(features['content_length'], [batch_size])
        cause_label = tf.reshape(labels['cause_label'],
                                 [batch_size, max_cause_length])
        cause_length = tf.reshape(labels['cause_length'], [batch_size])

        # necessary cast
        input = tf.cast(input, dtype=tf.int32)
        input_length = tf.cast(input_length, dtype=tf.int32)
        cause_label = tf.cast(cause_label, dtype=tf.int32)
        cause_length = tf.cast(cause_length, dtype=tf.int32)

        # word embedding layer
        embeddings_word = load_embedding(params['word2vec_model'], vocab_size,
                                         embedding_size)

        embedded_input = gen_array_ops.gather_v2(embeddings_word,
                                                 input,
                                                 axis=0)
        # cause-label embedding layer
        cause_encoder = CauseEncoder(word_embeddings=embeddings_word,
                                     params=params)
        embedded_cause = cause_encoder.apply(cause_label)

        # cause lookpu_table
        cause_table = tf.constant(params['cause_table'], dtype=tf.int32)
        encoder_output = encoders(embedded_input, input_length, params, mode)
        '''hierarchical multilabel decoder'''
        # build lstm cell with attention
        lstm = rnn.LayerNormBasicLSTMCell(num_units=num_units,
                                          reuse=tf.AUTO_REUSE,
                                          dropout_keep_prob=dropout_keep_prob)
        # lstm = rnn.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
        # the subtraction at the end of the line is a ele-wise subtraction supported by tensorflow

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=encoder_output.attention_values,
            memory_sequence_length=encoder_output.attention_values_length)
        initial_state = rnn.LSTMStateTuple(encoder_output.initial_state,
                                           encoder_output.initial_state)
        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size)
        cell_state = cell_state.clone(cell_state=initial_state,
                                      attention=encoder_output.final_state)

        # extra dense layer to project a rnn output into a classification
        project_dense = Dense(num_causes,
                              _reuse=tf.AUTO_REUSE,
                              _scope='project_dense_scope',
                              name='project_dense')

        # train_decoder
        train_helper = MyTrainingHelper(embedded_cause, cause_label,
                                        cause_length)
        train_decoder = MyBasicDecoder(cell,
                                       train_helper,
                                       cell_state,
                                       lookup_table=cause_table,
                                       output_layer=project_dense,
                                       hie=params['hie'])

        decoder_output_train, decoder_state_train, decoder_len_train = dynamic_decode(
            train_decoder,
            maximum_iterations=max_cause_length - 1,
            parallel_iterations=64,
            scope='decoder')

        # beam_width = 1
        tiled_memory_sequence_length = tile_batch(
            encoder_output.attention_values_length, multiplier=beam_width)
        tiled_memory = tile_batch(encoder_output.attention_values,
                                  multiplier=beam_width)
        tiled_encoder_output_initital_state = tile_batch(
            encoder_output.initial_state, multiplier=beam_width)
        tiled_initial_state = rnn.LSTMStateTuple(
            tiled_encoder_output_initital_state,
            tiled_encoder_output_initital_state)
        tiled_first_attention = tile_batch(encoder_output.final_state,
                                           multiplier=beam_width)

        attention_mechanism = MyBahdanauAttention(
            num_units=embedding_size,
            memory=tiled_memory,
            memory_sequence_length=tiled_memory_sequence_length)

        cell = MyAttentionWrapper_v2(lstm,
                                     attention_mechanism,
                                     sot=SOS,
                                     output_attention=False,
                                     name="MyAttentionWrapper")
        cell_state = cell.zero_state(dtype=tf.float32,
                                     batch_size=batch_size * beam_width)
        cell_state = cell_state.clone(cell_state=tiled_initial_state,
                                      attention=tiled_first_attention)
        infer_decoder = MyBeamSearchDecoder(cell,
                                            embedding=cause_encoder,
                                            sots=tf.fill([batch_size], SOS),
                                            start_tokens=tf.fill([batch_size],
                                                                 SOS),
                                            end_token=EOS,
                                            initial_state=cell_state,
                                            beam_width=beam_width,
                                            output_layer=project_dense,
                                            lookup_table=cause_table,
                                            length_penalty_weight=0.7,
                                            hie=params['hie'])

        cause_output_infer, cause_state_infer, cause_length_infer = dynamic_decode(
            infer_decoder,
            parallel_iterations=64,
            maximum_iterations=max_cause_length - 1,
            scope='decoder')

        # loss
        mask_for_cause = tf.sequence_mask(cause_length - 1,
                                          max_cause_length - 1,
                                          dtype=tf.float32)
        # loss = sequence_loss(logits=padded_train_output, targets=cause_label, weights=mask_for_cause, name='loss')
        tmp_padding = tf.pad(decoder_output_train.rnn_output,
                             [[0, 0],
                              [
                                  0, max_cause_length - 1 -
                                  tf.shape(decoder_output_train.rnn_output)[1]
                              ], [0, 0]],
                             constant_values=0)
        loss = _compute_loss(tmp_padding, cause_label, mask_for_cause,
                             batch_size)
        # predicted_ids: [batch_size, max_cause_length, beam_width]

        predicted_and_cause_ids = tf.transpose(
            cause_output_infer.predicted_ids,
            perm=[0, 2, 1],
            name='predicted_cause_ids')

        # for monitoring
        cause_label_expanded = tf.reshape(cause_label[:, 1:],
                                          [-1, 1, max_cause_length - 1])
        predicted_and_cause_ids = tf.pad(
            predicted_and_cause_ids,
            [[0, 0], [0, 0],
             [0, max_cause_length - 1 - tf.shape(predicted_and_cause_ids)[2]]],
            constant_values=EOS)
        predicted_and_cause_ids = tf.concat(
            [predicted_and_cause_ids, cause_label_expanded],
            axis=1,
            name='predicted_and_cause_ids')
        predicted_and_cause_ids = tf.reshape(
            predicted_and_cause_ids,
            [-1, beam_width + 1, max_cause_length - 1])
        predicted_and_cause_ids_train = tf.concat(
            [decoder_output_train.sample_id, cause_label[:, 1:]],
            axis=1,
            name='predicted_and_cause_ids_train')

        predictions = {
            'predicted_and_cause_ids': predicted_and_cause_ids,
        }
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # warm_up_constant = params['warm_up_steps'] ** (-1.5)
            # embedding_constant = embedding_size ** (-0.5)
            # global_step = tf.to_float(tf.train.get_global_step())
            # learning_rate = tf.minimum(1 / tf.sqrt(global_step),
            #                            warm_up_constant * global_step) * embedding_constant
            # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9)
            optimizer = tf.train.AdamOptimizer()
            # # train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
            # '''using gradient clipping'''
            # loss = tf.Print(loss, [loss, 'to be clear, this is the loss'])
            grads_and_vars = optimizer.compute_gradients(loss)
            clipped_gvs = [
                ele if ele[0] is None else
                (tf.clip_by_value(ele[0], -0.1, 0.1), ele[1])
                for ele in grads_and_vars
            ]
            train_op = optimizer.apply_gradients(
                clipped_gvs, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # predicted_cause_ids shape = [batch_size, cause_length]
        # cause_label = [batch_size, cause_length]
        # 　select the predicted cause with the highest possibility
        # todo: evalutaion
        # bi_predicted_cause_ids = binarizer(predicted_cause_ids[:, 0, :], num_causes)
        # bi_cause_label = binarizer(cause_label, num_causes)

        # todo: now I have to leave the evaluation work be done outside the estimator
        eval_metric_ops = {
            'predicted_and_cause_ids':
            tf.contrib.metrics.streaming_concat(predicted_and_cause_ids),
            # 'precision': tf.metrics.precision(bi_cause_label, bi_predicted_cause_ids),
            # 'recall': tf.metrics.recall(bi_cause_label, bi_predicted_cause_ids),
            # 'f1-score': f_score(bi_cause_label, bi_predicted_cause_ids),
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

Пример #30

Показать файл

    def build_model(self, grad_clip, is_train=1):
        data = tf.placeholder(tf.int32, shape=[1, None], name="input_id")
        train_data = tf.placeholder(tf.int32, shape=[1, None], name="train_id")
        train_label = tf.placeholder(tf.int32,
                                     shape=[1, None],
                                     name="trian_label")
        z_0 = tf.placeholder(tf.float32, shape=[1],
                             name="prior_selection")  # 1 or 0

        wrods = tf.nn.embedding_lookup(self.embed, data)
        decoder_input = tf.nn.embedding_lookup(self.embed, train_data)

        with tf.variable_scope("encoder"):
            encoder = self._get_simple_lstm(lstm_size, lstm_layer)
            words = tf.nn.embedding_lookup(self.embed, data)
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder,
                                                           words,
                                                           dtype=tf.float32)

        # define the variational approximation
        epsilon = tf.placeholder(tf.float32, shape=[1], name="epsilon")
        with tf.variable_scope("encoder_approx"):
            mean_encode_layer_1 = Dense(1)  # 1
            #mean_encode_layer_2 = Dense(1) # -1
            var_encode_layer = Dense(1)
        mean_approx_1 = mean_encode_layer_1(encoder_state[lstm_layer - 1][1])
        #mean_approx_2 = mean_encode_layer_2(encoder_state[0][1])
        var_approx = var_encode_layer(encoder_state[lstm_layer - 1][1])
        # p(Z) = z_0 * N(1, 1) + (1-z_0) * N(-1, 1)
        self.Z = (2 * z_0 - 1) * mean_approx_1 + epsilon * var_approx

        if is_train == 0:
            # do inference
            self.Z = tf.placeholder(tf.float32, shape=[1, 1], name="Z_input")
            self.start_tokens = tf.placeholder(tf.int32,
                                               shape=[1],
                                               name='start_tokens')
            self.end_tokens = tf.placeholder(tf.int32,
                                             shape=(),
                                             name="end_tokens")
            #print self.end_tokens.shape
            helper = GreedyEmbeddingHelper(self.embed, self.start_tokens,
                                           self.end_tokens)
        elif is_train == 1:
            self.decoder_seq_length = tf.placeholder(tf.int32,
                                                     shape=[None],
                                                     name='decoder_seq_length')
            '''
            NOTICE: since it is an auto-encoder, the input of the traininghelper
                  is the first n-1 words and the output is the last n-1 words
                  Otherwise, it will be just an identity transformation
            '''
            # words' shape: [1, sen_length, vocab_dim]
            helper = TrainingHelper(decoder_input, self.decoder_seq_length)

        with tf.variable_scope("decoder"):
            # decoder, use the latent variable to compute the new initial hidden state
            # and the cell state for the decoding lstm model.
            fc_rec = Dense(lstm_size)
            fc_rec2 = Dense(lstm_size)
            decoder_h = fc_rec(self.Z)
            decoder_c = fc_rec2(self.Z)
            fc_layer = Dense(self.shape[0])
            decoder_cell = self._get_simple_lstm(lstm_size, lstm_layer)
            d_i_s = tf.contrib.rnn.LSTMStateTuple(decoder_c, decoder_h)
            decoder = BasicDecoder(decoder_cell, helper, (d_i_s, ), fc_layer)

        logits, final_state, final_sequence_lengths = dynamic_decode(
            decoder, maximum_iterations=LENGTH)

        if is_train == 0:
            loss = tf.reshape(tf.nn.softmax(logits.rnn_output),
                              [-1, self.shape[0]])  # output shouldn't have SOS
            predict = tf.argmax(loss, axis=1)
            return predict, loss

        elif is_train == 1:
            # train
            targets = tf.reshape(train_label, [-1])
            logits_flatten = tf.reshape(logits.rnn_output, [-1, self.shape[0]])
            cross_ent = tf.losses.sparse_softmax_cross_entropy(
                targets, logits_flatten)
            #DL_loss = -0.5 * (2 * tf.log(var_approx) - z_0 * tf.square(mean_approx_1)
            #                - (1-z_0) * tf.square(mean_approx_2) + tf.square(var_approx)
            #                + z_0 * mean_approx_1 - (1-z_0) * mean_approx_2)
            DL_loss = -(0.5 *
                        (tf.log(tf.square(var_approx)) -
                         tf.square(mean_approx_1) - tf.square(var_approx)) +
                        mean_approx_1)
            loss = DL_loss + cross_ent  # negative ELOB
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                              grad_clip)
            optimizer = tf.train.AdamOptimizer(pretrain_lr)
            train_op = optimizer.apply_gradients(zip(
                grads, tvars))  # minimize the loss
            return train_op, loss, data, train_data, train_label, z_0, epsilon