Exemplo n.º 1
0
  def __init__(self, hidden_num, inputs, 
    cell=None, optimizer=None, reverse=True, 
    decode_without_input=False):
    """
    Args:
      hidden_num : number of hidden elements of each LSTM unit.
      inputs : a list of input tensors with size 
              (batch_num x elem_num)
      cell : an rnn cell object (the default option 
            is `tf.python.ops.rnn_cell.LSTMCell`)
      optimizer : optimizer for rnn (the default option is
              `tf.train.AdamOptimizer`)
      reverse : Option to decode in reverse order.
      decode_without_input : Option to decode without input.
    """

    self.batch_num = inputs[0].get_shape().as_list()[0]
    self.elem_num = inputs[0].get_shape().as_list()[1]

    if cell is None:
      self._enc_cell = LSTMCell(hidden_num)
      self._dec_cell = LSTMCell(hidden_num)
    else :
      self._enc_cell = cell
      self._dec_cell = cell

    with tf.variable_scope('encoder'):
      self.z_codes, self.enc_state = tf.nn.rnn(
        self._enc_cell, inputs, dtype=tf.float32)

    with tf.variable_scope('decoder') as vs:
      dec_weight_ = tf.Variable(
        tf.truncated_normal([hidden_num, self.elem_num], dtype=tf.float32),
        name="dec_weight")
      dec_bias_ = tf.Variable(
        tf.constant(0.1, shape=[self.elem_num], dtype=tf.float32),
        name="dec_bias")

#      if decode_without_input:
#        dec_inputs = [tf.zeros(tf.shape(inputs[0]), dtype=tf.float32)
#                      for _ in range(len(inputs))]
#        dec_outputs, dec_state = tf.nn.rnn(
#          self._dec_cell, dec_inputs, 
#          initial_state=self.enc_state, dtype=tf.float32)
        """the shape of each tensor
          dec_output_ : (step_num x hidden_num)
          dec_weight_ : (hidden_num x elem_num)
          dec_bias_ : (elem_num)
          output_ : (step_num x elem_num)
          input_ : (step_num x elem_num)
        """
#        if reverse:
#          dec_outputs = dec_outputs[::-1]
#        dec_output_ = tf.transpose(tf.pack(dec_outputs), [1,0,2])
#        dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0), [self.batch_num,1,1])
#        self.output_ = tf.batch_matmul(dec_output_, dec_weight_) + dec_bias_

      else : 
Exemplo n.º 2
0
 def __init__(self, num_units, state_is_tuple=True, cell_type='lstm', scope='bi_rnn'):
     self.num_units = num_units
     if cell_type == 'gru':
         self.cell_fw = GRUCell(self.num_units)
         self.cell_bw = GRUCell(self.num_units)
     else:  # default
         self.cell_fw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple)
         self.cell_bw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple)
     self.scope = scope
Exemplo n.º 3
0
 def __init__(self, num_layers, num_units, cell_type='lstm', scope='stacked_bi_rnn'):
     self.num_layers = num_layers
     self.num_units = num_units
     if cell_type == 'gru':
         self.cells_fw = [GRUCell(self.num_units) for _ in range(self.num_layers)]
         self.cells_bw = [GRUCell(self.num_units) for _ in range(self.num_layers)]
     else:  # default
         self.cells_fw = [LSTMCell(self.num_units) for _ in range(self.num_layers)]
         self.cells_bw = [LSTMCell(self.num_units) for _ in range(self.num_layers)]
     self.scope = scope
Exemplo n.º 4
0
 def __init__(self, num_layers, num_units, scope='stacked_bi_rnn'):
     self.num_layers = num_layers
     self.num_units = num_units
     self.cells_fw = [
         LSTMCell(self.num_units) for _ in range(self.num_layers)
     ]
     self.cells_bw = [
         LSTMCell(self.num_units) for _ in range(self.num_layers)
     ]
     self.scope = scope
Exemplo n.º 5
0
    def getCell(self, is_training, dp, config):
        # code for RNN
        if is_training == True:
            print("==> Construct ", config.cell_type, " graph for training")
        else:
            print("==> Construct ", config.cell_type, " graph for testing")

        if config.cell_type == "LSTM":
            if config.num_layer == 1:
                basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
            elif config.num_layer == 2:
                basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
                basicCell_2 = LSTMCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True)
            else:
                raise ValueError("config.num_layer should be 1:2 ")
        elif config.cell_type == "RNN":
            if config.num_layer == 1:
                basicCell = BasicRNNCell(config.hidden_size)
            elif config.num_layer == 2:
                basicCell = BasicRNNCell(config.hidden_size)
                basicCell_2 = BasicRNNCell(config.hidden_size_2)
            else:
                raise ValueError("config.num_layer should be [1-3] ")
        elif config.cell_type == "GRU":
            if config.num_layer == 1:
                basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
            elif config.num_layer == 2:
                basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
                basicCell_2 = GRUCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True)
            else:
                raise ValueError("only support 1-2 layers ")
        else:
            raise ValueError("cell type should be GRU,LSTM,RNN")

            # add dropout layer between hidden layers
        if is_training and config.keep_prob < 1:
            if config.num_layer == 1:
                basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob,
                                           output_keep_prob=config.keep_prob)
            elif config.num_layer == 2:
                basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob,
                                           output_keep_prob=config.keep_prob)
                basicCell_2 = DropoutWrapper(basicCell_2, input_keep_prob=config.keep_prob,
                                             output_keep_prob=config.keep_prob)
            else:
                pass

        if config.num_layer == 1:
            cell = rnn_cell.MultiRNNCell([basicCell], state_is_tuple=True)
        elif config.num_layer == 2:
            cell = rnn_cell.MultiRNNCell([basicCell, basicCell_2], state_is_tuple=True)

        return cell
Exemplo n.º 6
0
def build_decoder_cell(rank, u_emb, batch_size, depth=2):
  cell = []
  for i in range(depth):
    if i == 0:
      cell.append(LSTMCell(rank, state_is_tuple=True))
    else:
      cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True)))
  initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb)
  initial_state = [initial_state, ]
  for i in range(1, depth):
    initial_state.append(cell[i].zero_state(batch_size, tf.float32))
  return MultiRNNCell(cell), tuple(initial_state)
Exemplo n.º 7
0
    def inference_layer(self, inputs):
        if self.dblstm:
            with tf.name_scope('deep_bidirectional_rnn'):
                rnn_outputs, _ = deep_bidirectional_dynamic_rnn(
                    [self._dblstm_cell() for _ in range(self.num_layers)],
                    inputs,
                    sequence_length=self.sequence_lengths)
            state_dim = self.state_dim
        else:
            cell_fw = DropoutWrapper(LSTMCell(num_units=self.state_dim),
                                     variational_recurrent=True,
                                     state_keep_prob=self.dropout_keep_prob,
                                     output_keep_prob=self.dropout_keep_prob,
                                     dtype=tf.float32)
            cell_bw = DropoutWrapper(LSTMCell(num_units=self.state_dim),
                                     variational_recurrent=True,
                                     state_keep_prob=self.dropout_keep_prob,
                                     output_keep_prob=self.dropout_keep_prob,
                                     dtype=tf.float32)

            with tf.name_scope('bidirectional_rnn'):
                rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    inputs,
                    sequence_length=self.sequence_lengths,
                    dtype=tf.float32)
                rnn_outputs = tf.concat(rnn_outputs, 2)
                state_dim = self.state_dim * 2

        with tf.name_scope('linear_projection'):
            softmax_weights = tf.get_variable(
                'softmax_W', [state_dim, self.num_classes],
                initializer=tf.random_normal_initializer(0, 0.01))
            softmax_bias = tf.get_variable('softmax_b', [self.num_classes],
                                           initializer=tf.zeros_initializer)

            time_steps = tf.shape(rnn_outputs)[1]
            rnn_outputs = tf.reshape(
                rnn_outputs, [-1, state_dim],
                name="flatten_rnn_outputs_for_linear_projection")
            logits = tf.nn.xw_plus_b(x=rnn_outputs,
                                     weights=softmax_weights,
                                     biases=softmax_bias,
                                     name="softmax_projection")
            self.scores = tf.reshape(logits,
                                     [-1, time_steps, self.num_classes],
                                     name="unflatten_logits")

        if self.crf:
            self.transition_params = tf.get_variable(
                "transitions", [self.num_classes, self.num_classes])
Exemplo n.º 8
0
Arquivo: rnn.py Projeto: ylfzr/rnnprop
 def _build_pre(self):
     self.dimA = 20
     self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2)
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
Exemplo n.º 9
0
    def impress(self, state_code, pre_impress_states):
        # LSTM, 3 layers
        self.impress_lay_num = 3
        with tf.variable_scope('impress', reuse=tf.AUTO_REUSE):
            def loop_fn(time, cell_output, cell_state, loop_state):
                if cell_output is None:#time = 0
                    # initialization
                    input = state_code
                    state = state_
                    emit_output = None
                    loop_state = None
                else:
                    input = cell_output
                    emit_output = cell_output
                    state = cell_state
                    loop_state = None
                    
                elements_finished = (time >= 1)
                return (elements_finished, input, state, emit_output, loop_state)

            multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) 
                    for _ in range(self.impress_lay_num)],  state_is_tuple=True) 
            
            if pre_impress_states == None:
                state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32))
            else:
                state_ = pre_impress_states   
    
            emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn)
            state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension
            
            return state_impress_code, final_loop_state
Exemplo n.º 10
0
def RNN(_X, _weights, _biases, lens):
    if FLAGS.unit == "PLSTM":
        cell = PhasedLSTMCell(FLAGS.n_hidden,
                              use_peepholes=True,
                              state_is_tuple=True)
    elif FLAGS.unit == "GRU":
        cell = GRUCell(FLAGS.n_hidden)
    elif FLAGS.unit == "LSTM":
        cell = LSTMCell(FLAGS.n_hidden,
                        use_peepholes=True,
                        state_is_tuple=True)
    else:
        raise ValueError("Unit '{}' not implemented.".format(FLAGS.unit))

    outputs = multiPLSTM(_X, lens, FLAGS.n_layers, FLAGS.n_hidden, n_input)

    outputs = tf.slice(outputs, [0, 0, 0], [-1, -1, FLAGS.n_hidden])

    # TODO better (?) in lack of smart indexing
    batch_size = tf.shape(outputs)[0]
    max_len = tf.shape(outputs)[1]
    out_size = int(outputs.get_shape()[2])
    index = tf.range(0, batch_size) * max_len + (lens - 1)
    flat = tf.reshape(outputs, [-1, out_size])
    relevant = tf.gather(flat, index)

    return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
Exemplo n.º 11
0
 def __init__(self, num_units, memory, pmemory, cell_type='lstm'):
     super(AttentionCell, self).__init__()
     self._cell = LSTMCell(num_units)
     self.num_units = num_units
     self.memory = memory
     self.pmemory = pmemory
     self.mem_units = memory.get_shape().as_list()[-1]
Exemplo n.º 12
0
    def Encoder(self, xs):
        encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) 
    
        encoder_input = self.WordEmb(encoder_input)
        
        if self.args.train:
            inputs_length = self.inputs_length_PH
        elif self.args.test:
            inputs_length = self.inputs_length_test_PH
            
        multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) 
            for _ in range(self.encoder_lay_Num)],  state_is_tuple=True)
            
        (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (
            tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, 
                                            cell_bw=multirnn_cell, inputs=encoder_input,
                                            sequence_length=inputs_length, dtype=self.dtype))
                                            
        sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2)
      
        sentence_code_ = []
        for i in range(self.batch_size):
            sentence_code_.append(sentence_code[i,inputs_length[i]-1,:])
        
        encoder_output = tf.stack(sentence_code_)
        
        encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu)
        

  
        return encoder_output
Exemplo n.º 13
0
    def s2v(self):
        sqrt3 = math.sqrt(3.0)
        initializer = tf.random_uniform_initializer(-sqrt3,
                                                    sqrt3,
                                                    dtype=self.dtype)

        # word embedding layer
        if self.pre_trained_word_emb is not None:
            self.word_embeddings = tf.get_variable(
                name='word_embedding',
                initializer=self.pre_trained_word_emb,
                dtype=self.dtype)
        else:
            self.word_embeddings = tf.get_variable(
                name='word_embedding',
                shape=[self.voc_size, self.emb_size],
                initializer=initializer,
                dtype=self.dtype)
        self.embedded_sentence = tf.nn.embedding_lookup(
            self.word_embeddings, self.sentence)
        self.embedded_sentence = tf.nn.dropout(
            self.embedded_sentence, keep_prob=self.keep_word_prob_placeholder)

        # create the rnn cell
        if self.rnn_cell_type.lower() == 'gru':
            rnn_cell = GRUCell
        else:
            rnn_cell = LSTMCell
        rnn_cell = rnn_cell(self.hidden_units)

        if self.use_lstm_dropout:
            rnn_cell = DropoutWrapper(
                rnn_cell,
                dtype=tf.float32,
                output_keep_prob=self.keep_lstm_prob_placeholder)
        if self.rnn_model == 'leap-lstm':
            self.sentence_emb, self.skip_dis_output = self.leap_lstm(rnn_cell)
        elif self.rnn_model == 'rnn':
            if self.rnn_pattern == 1:
                self.sentence_emb = self.general_rnn(rnn_cell, out='LAST')
            else:
                self.sentence_emb = self.general_rnn_for_pattern(
                    rnn_cell, out='LAST')  # for test the training time
        elif self.rnn_model == 'brnn':
            self.sentence_emb = self.general_brnn()
        elif self.rnn_model == 'skip-rnn-2017':
            self.sentence_emb, self.budget_loss, self.updated_states, self.rnn_final_states, self.rnn_outputs = self.skip_rnn_2017(
            )
        elif self.rnn_model == 'skim-rnn':
            small_rnn_cell = LSTMCell(5)  # small size 5
            small_rnn_cell = DropoutWrapper(
                small_rnn_cell,
                dtype=tf.float32,
                output_keep_prob=self.keep_lstm_prob_placeholder)
            self.sentence_emb, self.skip_dis_output, self.skim_loss = self.skim_rnn(
                rnn_cell, small_rnn_cell)  # skim-rnn的设定直接按照github上源码来就可以了
        else:
            print("bad rnn model!")
            exit()
Exemplo n.º 14
0
 def _build_pre(self, size):
     self.dimA = size
     self.num_of_layers = 2
     self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)])
     self.b1 = 0.95
     self.b2 = 0.95
     self.lr = 0.1
     self.eps = 1e-8
Exemplo n.º 15
0
 def _create_rnn_cell(self):
     cell = GRUCell(
         self.cfg.num_units) if self.cfg.cell_type == "gru" else LSTMCell(
             self.cfg.num_units)
     if self.cfg.use_dropout:
         cell = DropoutWrapper(cell, output_keep_prob=self.keep_prob)
     if self.cfg.use_residual:
         cell = ResidualWrapper(cell)
     return cell
Exemplo n.º 16
0
    def Decoder(self, encoder_output):
        def loop_fn(time, cell_output, cell_state, loop_state):
            if cell_output is None:  #time = 0
                # initialization
                input = tf.concat((encoder_output, encoder_output), axis=1)
                state = (multirnn_cell.zero_state(self.batch_size, tf.float32))
                emit_output = None
                loop_state = None
                elements_finished = False
            else:
                emit_output = cell_output
                if self.args.test:
                    #decoder_units to val_size
                    transformed_output = tf.nn.xw_plus_b(
                        cell_output, self.decoder_W,
                        self.decoder_b)  #decoder_units to vac_size
                    #argmax
                    transformed_output = tf.argmax(transformed_output, 1)
                    transformed_output = tf.one_hot(transformed_output,
                                                    self.val_size,
                                                    on_value=1.0,
                                                    off_value=0.0,
                                                    axis=-1)
                    #val_size to decoder_units//2
                    transformed_output = self.WordEmb(transformed_output)
                elif self.args.train:
                    ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)],
                                           self.val_size,
                                           on_value=1.0,
                                           off_value=0.0,
                                           axis=-1)
                    transformed_output = self.WordEmb(ys_onehot)

                input = tf.concat([transformed_output, encoder_output], axis=1)
                state = cell_state
                loop_state = None
            elements_finished = (time >= self.max_len)
            return (elements_finished, input, state, emit_output, loop_state)

        multirnn_cell = MultiRNNCell(
            [LSTMCell(self.decoder_units) for _ in range(self.lay_num)],
            state_is_tuple=True)
        emit_ta, final_state, final_loop_state = tf.nn.raw_rnn(
            multirnn_cell, loop_fn)

        # transpose for putting batch dimension to first dimension
        outputs = tf.transpose(emit_ta.stack(), [1, 0, 2])

        #transform decoder_units to val_size
        decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units])
        decoder_output_transform_flat = tf.nn.xw_plus_b(
            decoder_output_flat, self.decoder_W, self.decoder_b)
        decoder_logits = tf.reshape(
            decoder_output_transform_flat,
            (self.batch_size, self.max_len, self.val_size))

        return decoder_logits
Exemplo n.º 17
0
 def _create_single_rnn_cell(self, num_units):
     cell = GRUCell(
         num_units) if self.cfg["cell_type"] == "gru" else LSTMCell(
             num_units)
     if self.cfg["use_dropout"]:
         cell = DropoutWrapper(cell, output_keep_prob=self.rnn_keep_prob)
     if self.cfg["use_residual"]:
         cell = ResidualWrapper(cell)
     return cell
    def _build_model_op(self):
        with tf.variable_scope('encoder'):
            cell_fw = LSTMCell(num_units=self.cfg.num_units)
            cell_bw = LSTMCell(num_units=self.cfg.num_units)
            outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw,
                                                   self.word_embeddings,
                                                   self.seq_lengths)
            enc_outputs = tf.concat(outputs, axis=-1)
            print('encoder output shape: {}'.format(
                enc_outputs.get_shape().as_list()))
        '''with tf.variable_scope('attention'):
            attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                num_units=self.cfg.num_units, memory=enc_outputs, memory_sequence_length=self.seq_lengths)
            cell_fw = LSTMCell(num_units=self.cfg.num_units)
            cell_bw = LSTMCell(num_units=self.cfg.num_units)
            attn_cell_fw = tf.contrib.seq2seq.AttentionWrapper(cell_fw, attention_mechanism)
            attn_cell_bw = tf.contrib.seq2seq.AttentionWrapper(cell_bw, attention_mechanism)
            outputs, _ = bidirectional_dynamic_rnn(attn_cell_fw, attn_cell_bw, enc_outputs, self.seq_lengths)
            attn_outputs = tf.concat(outputs, axis=-1)
            print('bidirectional attention output shape: {}'.format(attn_outputs.get_shape().as_list()))'''

        with tf.variable_scope('self_attention'):
            self_att = dot_attention(enc_outputs,
                                     enc_outputs,
                                     self.cfg.num_units,
                                     keep_prob=self.keep_prob,
                                     is_train=self.is_train)
            print('self-attention output shape: {}'.format(
                self_att.get_shape().as_list()))
        '''with tf.variable_scope('decoder'):
            cell_fw = LSTMCell(num_units=self.cfg.num_units)
            cell_bw = LSTMCell(num_units=self.cfg.num_units)
            outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self_att, self.seq_lengths)
            dec_outputs = tf.concat(outputs, axis=-1)
            print('decoder output shape: {}'.format(dec_outputs.get_shape().as_list()))'''

        with tf.variable_scope('project'):
            self.logits = dense(self_att,
                                self.cfg.tag_vocab_size,
                                use_bias=True)
            print('projected output (logits) shape: {}'.format(
                self.logits.get_shape().as_list()))
Exemplo n.º 19
0
def generator(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, cond_dim=0, c=None, learn_scale=True):
    """
    If parameters are supplied, initialise as such
    """
    with tf.variable_scope("generator") as scope:
        if reuse:
            scope.reuse_variables()
        if parameters is None:
            W_out_G_initializer = tf.truncated_normal_initializer()
            b_out_G_initializer = tf.truncated_normal_initializer()
            scale_out_G_initializer = tf.constant_initializer(value=1.0)
            lstm_initializer = None
            bias_start = 1.0
        else:
            W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0'])
            b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0'])
            try:
                scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0'])
            except KeyError:
                scale_out_G_initializer = tf.constant_initializer(value=1)
                assert learn_scale
            lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0'])
            bias_start = parameters['generator/rnn/lstm_cell/biases:0']

        W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer)
        b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer)
        scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=learn_scale)
        if cond_dim > 0:
            # CGAN!
            assert not c is None
            repeated_encoding = tf.stack([c]*seq_length, axis=1)
            inputs = tf.concat([z, repeated_encoding], axis=2)

            #repeated_encoding = tf.tile(c, [1, tf.shape(z)[1]])
            #repeated_encoding = tf.reshape(repeated_encoding, [tf.shape(z)[0], tf.shape(z)[1], cond_dim])
            #inputs = tf.concat([repeated_encoding, z], 2)
        else:
            inputs = z

        cell = LSTMCell(num_units=hidden_units_g,
                           state_is_tuple=True,
                           initializer=lstm_initializer,
                           reuse=reuse)
        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
            cell=cell,
            dtype=tf.float32,
            sequence_length=[seq_length]*batch_size,
            inputs=inputs)
        rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g])
        logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G
#        output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G)
        output_2d = tf.nn.tanh(logits_2d)
        output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features])
    return output_3d
Exemplo n.º 20
0
 def __init__(self,
              num_layers,
              num_units,
              cell_type='lstm',
              scope='stack_bi_rnn'):
     if type(num_units) == list:
         assert len(
             num_units
         ) == num_layers, "if num_units is a list, then its size should equal to num_layers"
         self.cells_fw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units[i]) for i in range(num_layers)]
         self.cells_bw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units[i]) for i in range(num_layers)]
     else:
         self.cells_fw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units) for _ in range(num_layers)]
         self.cells_bw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \
             [GRUCell(num_units) for _ in range(num_layers)]
     self.num_layers = num_layers
     self.scope = scope
Exemplo n.º 21
0
    def __init__(self, cfg):

        # fed by 'feed_dict'
        self.context = tf.placeholder(name='context', shape=[None, None], dtype=tf.int32)
        self.seq_len = tf.placeholder(name='sequence_length', shape=[None], dtype=tf.int32)
        self.labels = tf.placeholder(name='labels', shape=[None, cfg.num_classes], dtype=tf.float32)
        self.lr = tf.placeholder(name='learning_rate', dtype=tf.float32)

        with tf.device('/gpu:0'):
            with tf.variable_scope('context_lookup_table'):
                with open(params['default_word_emb_pkl_path'], 'rb') as f:
                    word_emb = pickle.load(f)

                word_embeddings = tf.constant(word_emb, dtype=tf.float32)
                # make lookup table for given review context
                context_emb = tf.nn.embedding_lookup(word_embeddings, self.context)

            with tf.variable_scope('context_representation'):
                cell_fw = LSTMCell(num_units = cfg.num_units)
                cell_bw = LSTMCell(num_units = cfg.num_units)

                h,_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, context_emb, sequence_length=self.seq_len, dtype=tf.float32, time_major=False)
                #concat forward and backward hidden states
                h = tf.concat(h, axis=-1)
                h = self.self_attention(h)
                weight = tf.get_variable(name='weight', shape=[2*cfg.num_units, 2*cfg.num_units], dtype=tf.float32)  ###
                h = tf.nn.tanh(tf.matmul(h, weight))
				
				
            with tf.variable_scope('compute_logits'):
                context_logits = self.ffn_layer(h, cfg.hidden_units, cfg.num_classes, scope='ffn_layer')

            with tf.variable_scope('compute_loss'):
                self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=context_logits, labels=self.labels))
                self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)

            with tf.variable_scope('accuracy'):
                #pred is 0 (neg) or 1 (pos)
                self.pred = tf.argmax(tf.nn.softmax(context_logits),1,name='prediction')
                num_correct_pred = tf.equal(self.pred, tf.argmax(self.labels, 1))
                self.accuracy = tf.reduce_mean(tf.cast(num_correct_pred, tf.float32))
def stacked_rnn_step(input_vocabulary_size,
                     hidden_size=13,
                     emb_dim=11,
                     n_layers=2,
                     variable_scope='encdec'):
    with tf.variable_scope(variable_scope, reuse=None):
        rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] *
                                n_layers)  # stacked LSTM
        proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim)
    embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size,
                                         emb_dim)
    return embedding_wrapper
Exemplo n.º 23
0
 def __init__(self,
              num_layers,
              num_units,
              batch_size,
              input_size,
              keep_prob=1.0,
              is_train=None,
              scope="native_lstm",
              activation=tf.nn.tanh):
     self.num_layers = num_layers
     self.grus = []
     self.inits = []
     self.dropout_mask = []
     self.scope = scope
     for layer in range(num_layers):
         input_size_ = input_size if layer == 0 else 2 * num_units
         gru_fw = LSTMCell(num_units, activation=activation)
         gru_bw = LSTMCell(num_units, activation=activation)
         # init_fw = tf.tile(tf.Variable(
         #     tf.zeros([1, num_units])), [batch_size, 1])
         # init_bw = tf.tile(tf.Variable(
         #     tf.zeros([1, num_units])), [batch_size, 1])
         mask_fw = Dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode='')
         mask_bw = Dropout(tf.ones([batch_size, 1, input_size_],
                                   dtype=tf.float32),
                           keep_prob=keep_prob,
                           is_train=is_train,
                           mode='')
         self.grus.append((
             gru_fw,
             gru_bw,
         ))
         self.dropout_mask.append((
             mask_fw,
             mask_bw,
         ))
Exemplo n.º 24
0
def context_representation(inputs,
                           seq_len,
                           num_units,
                           activation=tf.nn.tanh,
                           use_bias=False,
                           reuse=None,
                           name="context_rep"):
    with tf.variable_scope(name, reuse=reuse, dtype=tf.float32):
        cell_fw = LSTMCell(num_units=num_units)
        cell_bw = LSTMCell(num_units=num_units)

        context_features, _ = bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=seq_len,
            dtype=tf.float32,
            time_major=False,
            scope="bidirectional_dynamic_rnn")

        context_features = tf.concat(context_features, axis=-1)

        # self-attention
        context_features = self_attention(context_features,
                                          num_units=num_units,
                                          return_alphas=False,
                                          reuse=reuse,
                                          name="self_attention")

        # dense layer project
        context_features = tf.layers.dense(
            context_features,
            units=num_units,
            use_bias=use_bias,
            kernel_initializer=tf.glorot_uniform_initializer(),
            activation=activation,
            name="context_project")

        return context_features
Exemplo n.º 25
0
    def model(data, weights, biases):
        cell = LSTMCell(NUM_NEURONS)  # Or LSTMCell(num_neurons)
        cell = MultiRNNCell([cell] * NUM_LAYERS)

        output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE)
        output = tf.transpose(output, [1, 0, 2])
        last = tf.gather(output, int(output.get_shape()[0]) - 1)
        out_size = int(train_labels_node.get_shape()[1])

        prediction = tf.nn.softmax(
            tf.matmul(last, weights['out']) + biases['out'])
        # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction))
        return prediction
Exemplo n.º 26
0
    def encoder(self):
        ####Encoder
        with tf.variable_scope(self.model_name + "encoder_model"):
            if self.Bidirection == False:
                encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.num_units)
                self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn(
                    cell=encoder_cell,
                    inputs=self.encoder_inputs_embedded,
                    sequence_length=self.encoder_inputs_length,
                    time_major=False,
                    dtype=tf.float32)
                self.hidden_units = self.num_units

            elif self.Bidirection == True:
                encoder_cell_fw = LSTMCell(self.num_units)
                encoder_cell_bw = LSTMCell(self.num_units)
                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_final_state,
                  encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=encoder_cell_fw,
                      cell_bw=encoder_cell_bw,
                      inputs=self.encoder_inputs_embedded,
                      sequence_length=self.encoder_inputs_length,
                      dtype=tf.float32,
                      time_major=False))
                # Concatenates tensors along one dimension.
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                encoder_final_state_c = tf.concat(
                    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
                encoder_final_state_h = tf.concat(
                    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

                # TF Tuple used by LSTM Cells for state_size, zero_state, and output state.
                self.encoder_final_state = LSTMStateTuple(
                    c=encoder_final_state_c, h=encoder_final_state_h)
                self.hidden_units = 2 * self.num_units
Exemplo n.º 27
0
    def __init__(self, feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer):
        super(LSTM4Rec, self).__init__(feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer)

        # RNN layer
        with tf.name_scope('item_rnn'):
            _, item_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.item_part_emb,
                                        sequence_length=self.item_len_ph, dtype=tf.float32, scope='lstm1')
        item_part = item_part_final_state
        
        with tf.name_scope('user_rnn'):
            _, user_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.user_part_emb,
                                        sequence_length=self.user_len_ph, dtype=tf.float32, scope='lstm2')
        user_part = user_part_final_state
        
        if use_hist_i and use_hist_u:
            inp = tf.concat([item_part, user_part], axis=1)
        elif use_hist_i and not use_hist_u:
            inp = item_part
        elif not use_hist_i and use_hist_u:
            inp = user_part
        # fully connected layer
        self.build_fc_net(inp)
        self.build_loss()
Exemplo n.º 28
0
 def Encoder(self):
     # a list that length is batch_size, every element refers to the time_steps of corresponding input
     inputs_length = tf.fill([tf.shape(self.xs)[0]], self.input_timestep)
     rnn_cell = LSTMCell(self.encoder_units)
     # use bidirectional rnn as encoder architecture
     (fw_outputs,
      bw_outputs), (fw_final_state,
                    bw_final_state) = (tf.nn.bidirectional_dynamic_rnn(
                        cell_fw=rnn_cell,
                        cell_bw=rnn_cell,
                        inputs=self.xs,
                        sequence_length=inputs_length,
                        dtype=self.dtype))
     # merge every forward and backward output as total output
     output = tf.add(fw_outputs, bw_outputs) / 2
     # merge every forward and backward final state as final state
     state_c = tf.concat([fw_final_state.c, bw_final_state.c], axis=1)
     state_h = tf.concat([fw_final_state.h, bw_final_state.h], axis=1)
     final_state = LSTMStateTuple(c=state_c, h=state_h)
     return output, final_state
Exemplo n.º 29
0
    def build_graph(self):
        with tf.variable_scope('lstm'):
            lstm_cell = LSTMCell(self.layer_size)
            rnn_cell = MultiRNNCell([lstm_cell] * self.layers)
            cell_output, self.init_state = rnn_cell(self.model_input,
                                                    self.init_state)
            print("%i layers created" % self.layers)
            self.output_layer = self.__add_output_layer(
                "fc_out", cell_output, self.layer_size, self.output_dim)

            self.output_layer = tf.Print(
                self.output_layer,
                [self.output_layer,
                 tf.convert_to_tensor(self.ground_truth)],
                'Value of output layer and ground truth:',
                summarize=6)

            tf.histogram_summary('lstm_output', self.output_layer)

            return self.output_layer
Exemplo n.º 30
0
    def build_decoder_cell(self):
        # No beam search currently

        # Attention
        # TODO: other attention mechanism?
        attention_mechanism = BahdanauAttention(
            num_units=self.config.hidden_units,
            memory=self.encoder_outputs,
            memory_sequence_length=self.encoder_inputs_length)

        decoder_cells = [LSTMCell(self.config.hidden_units)
                         ] * self.config.decoder_depth
        decoder_initial_state = list(self.encoder_last_state)

        def attn_decoder_input_fn(inputs, attention):
            if not self.config.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            _input_layer = Dense(self.config.hidden_units,
                                 dtype=tf.float32,
                                 name='attn_input_feeding')
            return _input_layer(concat([inputs, attention], -1))

        #Add an attentionWrapper in the lastest layer of decoder
        decoder_cells[-1] = AttentionWrapper(
            cell=decoder_cells[-1],
            attention_mechanism=attention_mechanism,
            attention_layer_size=self.config.hidden_units,
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=decoder_initial_state[-1],
            alignment_history=False,
            name='Attention_Wrapper')

        decoder_initial_state[-1] = decoder_cells[-1].zero_state(
            batch_size=self.batch_size, dtype=tf.float32)
        decoder_initial_state = tuple(decoder_initial_state)

        return MultiRNNCell(decoder_cells), decoder_initial_state
Exemplo n.º 31
0
    def __init__(self, data, FLAGS):
        with tf.variable_scope("history_length"):
            history_length = data.train_set["features"].shape[1]

        encoder_lstm_size = 16
        encoder_embedding_size = 16 * 2
        encoder_vocabulary_length = len(data.idx2word_history)
        with tf.variable_scope("encoder_sequence_length"):
            encoder_sequence_length = data.train_set["features"].shape[2]

        decoder_lstm_size = 16
        decoder_embedding_size = 16
        decoder_vocabulary_length = len(data.idx2word_target)
        with tf.variable_scope("decoder_sequence_length"):
            decoder_sequence_length = data.train_set["targets"].shape[1]

        # inference model
        with tf.name_scope("model"):
            features = tf.placeholder("int32", name="features")
            targets = tf.placeholder("int32", name="true_targets")
            use_dropout_prob = tf.placeholder("float32", name="use_dropout_prob")

            with tf.variable_scope("batch_size"):
                batch_size = tf.shape(features)[0]

            encoder_embedding = embedding(
                input=features, length=encoder_vocabulary_length, size=encoder_embedding_size, name="encoder_embedding"
            )

            with tf.name_scope("UtterancesEncoder"):
                with tf.name_scope("RNNForwardUtteranceEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                        num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                        num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True
                    )
                    initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardUtteranceEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                        use_peepholes=True,
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                # the input data has this dimensions
                # [
                #   #batch,
                #   #utterance in a history (a dialogue),
                #   #word in an utterance (a sentence),
                #   embedding dimension
                # ]

                # encode all utterances along the word axis
                encoder_states_2d = []

                for utterance in range(history_length):
                    encoder_outputs, _ = brnn(
                        cell_fw=cell_fw_1,
                        cell_bw=cell_bw_1,
                        inputs=[encoder_embedding[:, utterance, word, :] for word in range(encoder_sequence_length)],
                        initial_state_fw=initial_state_fw_1,
                        initial_state_bw=initial_state_bw_1,
                        name="RNNUtteranceBidirectionalLayer",
                        reuse=True if utterance > 0 else None,
                    )

                    _, encoder_states = rnn(
                        cell=cell_fw_2,
                        inputs=encoder_outputs,
                        initial_state=initial_state_fw_2,
                        name="RNNUtteranceForwardEncoder",
                        reuse=True if utterance > 0 else None,
                    )

                    # print(encoder_states[-1])
                    encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1))
                    # print(encoder_states)
                    encoder_states_2d.append(encoder_states)

                encoder_states_2d = tf.concat(1, encoder_states_2d)
                # print('encoder_states_2d', encoder_states_2d)

            with tf.name_scope("HistoryEncoder"):
                # encode all histories along the utterance axis
                with tf.name_scope("RNNForwardHistoryEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                        num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardHistoryEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                        num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True
                    )
                    initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardHistoryEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                        use_peepholes=True,
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                encoder_outputs, _ = brnn(
                    cell_fw=cell_fw_1,
                    cell_bw=cell_bw_1,
                    inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)],
                    initial_state_fw=initial_state_fw_1,
                    initial_state_bw=initial_state_bw_1,
                    name="RNNHistoryBidirectionalLayer",
                    reuse=None,
                )

                _, encoder_states = rnn(
                    cell=cell_fw_2,
                    inputs=encoder_outputs,
                    initial_state=initial_state_fw_2,
                    name="RNNHistoryForwardEncoder",
                    reuse=None,
                )

            with tf.name_scope("Decoder"):
                use_inputs_prob = tf.placeholder("float32", name="use_inputs_prob")

                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size + cell_fw_2.state_size,
                        use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoder_states[-1]

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[targets[:, word] for word in range(decoder_sequence_length)],
                    static_input=final_encoder_state,
                    initial_state=initial_state,  # final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name="RNNDecoder",
                    reuse=False,
                    use_inputs_prob=use_inputs_prob,
                )

                targets_given_features = tf.concat(1, decoder_outputs_softmax)
                # print(p_o_i)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope("loss"):
            one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length)
            loss = tf.reduce_mean(-one_hot_labels * tf.log(targets_given_features), name="loss")
            for v in tf.trainable_variables():
                for n in ["/W_", "/W:", "/B:"]:
                    if n in v.name:
                        print("Regularization using", v.name)
                        loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2))
            tf.scalar_summary("loss", loss)

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_given_features, 2))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.scalar_summary("accuracy", accuracy)

        self.data = data
        self.train_set = data.train_set
        self.test_set = data.test_set
        self.idx2word_history = data.idx2word_history
        self.word2idx_history = data.word2idx_history
        self.idx2word_target = data.idx2word_target
        self.word2idx_target = data.word2idx_target

        self.history_length = history_length
        self.encoder_sequence_length = encoder_sequence_length
        self.features = features
        self.targets = targets
        self.batch_size = batch_size
        self.use_inputs_prob = use_inputs_prob
        self.targets_given_features = targets_given_features
        self.loss = loss
        self.accuracy = accuracy
Exemplo n.º 32
0
def train(train_set, test_set, idx2word_history, word2idx_history, idx2word_target, word2idx_target):
    with tf.variable_scope("history_length"):
        history_length = train_set['features'].shape[1]

    encoder_lstm_size = 16*4
    encoder_embedding_size = 16*8
    encoder_vocabulary_length = len(idx2word_history)
    with tf.variable_scope("encoder_sequence_length"):
        encoder_sequence_length = train_set['features'].shape[2]

    decoder_lstm_size = 16*4
    decoder_embedding_size = 16*4
    decoder_vocabulary_length = len(idx2word_target)
    with tf.variable_scope("decoder_sequence_length"):
        decoder_sequence_length = train_set['targets'].shape[1]

    # inference model
    with tf.name_scope('model'):
        features = tf.placeholder("int32", name='features')
        targets = tf.placeholder("int32", name='true_targets')
        use_dropout_prob = tf.placeholder("float32", name='use_dropout_prob')

        with tf.variable_scope("batch_size"):
            batch_size = tf.shape(features)[0]

        encoder_embedding = embedding(
                input=features,
                length=encoder_vocabulary_length,
                size=encoder_embedding_size,
                name='encoder_embedding'
        )

        with tf.name_scope("UtterancesEncoder"):
            with tf.name_scope("RNNForwardUtteranceEncoderCell_1"):
                cell_fw_1 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=encoder_embedding_size,
                        use_peepholes=True
                )
                initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

            with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"):
                cell_bw_1 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=encoder_embedding_size,
                        use_peepholes=True
                )
                initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32)

            with tf.name_scope("RNNForwardUtteranceEncoderCell_2"):
                cell_fw_2 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                        use_peepholes=True
                )
                initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

            # the input data has this dimensions
            # [
            #   #batch,
            #   #utterance in a history (a dialogue),
            #   #word in an utterance (a sentence),
            #   embedding dimension
            # ]

            # encode all utterances along the word axis
            encoder_states_2d = []

            for utterance in range(history_length):
                encoder_outputs, _ = brnn(
                        cell_fw=cell_fw_1,
                        cell_bw=cell_bw_1,
                        inputs=[encoder_embedding[:, utterance, word, :] for word in range(encoder_sequence_length)],
                        initial_state_fw=initial_state_fw_1,
                        initial_state_bw=initial_state_bw_1,
                        name='RNNUtteranceBidirectionalLayer',
                        reuse=True if utterance > 0 else None
                )

                _, encoder_states = rnn(
                        cell=cell_fw_2,
                        inputs=encoder_outputs,
                        initial_state=initial_state_fw_2,
                        name='RNNUtteranceForwardEncoder',
                        reuse=True if utterance > 0 else None
                )

                # print(encoder_states[-1])
                encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1))
                # print(encoder_states)
                encoder_states_2d.append(encoder_states)

            encoder_states_2d = tf.concat(1, encoder_states_2d)
            # print('encoder_states_2d', encoder_states_2d)

        with tf.name_scope("HistoryEncoder"):
            # encode all histories along the utterance axis
            with tf.name_scope("RNNFrowardHistoryEncoderCell_1"):
                cell_fw_1 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_2.state_size,
                        use_peepholes=True
                )
                initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

            with tf.name_scope("RNNBackwardHistoryEncoderCell_1"):
                cell_bw_1 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_2.state_size,
                        use_peepholes=True
                )
                initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32)

            with tf.name_scope("RNNFrowardHistoryEncoderCell_2"):
                cell_fw_2 = LSTMCell(
                        num_units=encoder_lstm_size,
                        input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                        use_peepholes=True
                )
                initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

            encoder_outputs, _ = brnn(
                    cell_fw=cell_fw_1,
                    cell_bw=cell_bw_1,
                    inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)],
                    initial_state_fw=initial_state_fw_1,
                    initial_state_bw=initial_state_bw_1,
                    name='RNNHistoryBidirectionalLayer',
                    reuse=None
            )

            _, encoder_states = rnn(
                    cell=cell_fw_2,
                    inputs=encoder_outputs,
                    initial_state=initial_state_fw_2,
                    name='RNNHistoryForwardEncoder',
                    reuse=None
            )

        with tf.name_scope("Decoder"):
            use_inputs_prob = tf.Variable(1.0, name='use_inputs_prob', trainable=False)
            use_inputs_prob_decay_op = use_inputs_prob.assign(use_inputs_prob * FLAGS.use_inputs_prob_decay)

            with tf.name_scope("RNNDecoderCell"):
                cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size,
                        use_peepholes=True,
                )

            # decode all histories along the utterance axis
            final_encoder_state = encoder_states[-1]

            decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[targets[:, word] for word in range(decoder_sequence_length)],
                    initial_state=final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name='RNNDecoder',
                    reuse=False,
                    use_inputs_prob=use_inputs_prob
            )

            targets_give_features = tf.concat(1, decoder_outputs_softmax)
            # print(p_o_i)

    if FLAGS.print_variables:
        for v in tf.trainable_variables():
            print(v.name)

    with tf.name_scope('loss'):
        one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length)
        loss = tf.reduce_mean(- one_hot_labels * tf.log(targets_give_features), name='loss')
        for v in tf.trainable_variables():
            for n in ['/W_', '/W:', '/B:']:
                if n in v.name:
                    print('Regularization using', v.name)
                    loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2))
        tf.scalar_summary('loss', loss)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_give_features, 2))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
        tf.scalar_summary('accuracy', accuracy)

    # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    with tf.Session() as sess:
        # Merge all the summaries and write them out to ./log
        merged = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter('./log', sess.graph_def)
        saver = tf.train.Saver()

        # training
        tvars = tf.trainable_variables()
        # tvars = [v for v in tvars if 'embedding_table' not in v.name] # all variables except embeddings
        learning_rate = tf.Variable(float(FLAGS.learning_rate), trainable=False)

        # train_op = tf.train.GradientDescentOptimizer(
        train_op = AdamPlusOptimizer(
                learning_rate=learning_rate,
                beta1=FLAGS.beta1,
                beta2=FLAGS.beta2,
                epsilon=FLAGS.epsilon,
                pow=FLAGS.pow,
                use_locking=False,
                name='trainer')

        learning_rate_decay_op = learning_rate.assign(learning_rate * FLAGS.decay)
        global_step = tf.Variable(0, trainable=False)
        gradients = tf.gradients(loss, tvars)

        clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm)
        train_op = train_op.apply_gradients(zip(clipped_gradients, tvars), global_step=global_step)

        tf.initialize_all_variables().run()

        # prepare batch indexes
        train_set_size = train_set['features'].shape[0]
        print('Train set size:', train_set_size)
        batch_size = FLAGS.batch_size
        print('Batch size:', batch_size)
        batch_indexes = [[i, i + batch_size] for i in range(0, train_set_size, batch_size)]
        print('#Batches:', len(batch_indexes))
        # print('Batch indexes', batch_indexes)

        previous_accuracies = []
        previous_losses = []
        for epoch in range(FLAGS.max_epochs):
            print('Batch: ', end=' ', flush=True)
            for b, batch in enumerate(batch_indexes):
                print(b, end=' ', flush=True)
                sess.run(
                        train_op,
                        feed_dict={
                            features: train_set['features'][batch[0]:batch[1]],
                            targets: train_set['targets'][batch[0]:batch[1]],
                        }
                )
            print()
            shuffle(batch_indexes)

            if epoch % max(min(int(FLAGS.max_epochs / 100), 100), 1) == 0:
                summary, lss, acc = sess.run([merged, loss, accuracy],
                                             feed_dict={features: test_set['features'], targets: test_set['targets']})
                writer.add_summary(summary, epoch)
                print()
                print('Epoch: {epoch}'.format(epoch=epoch))
                print(' - accuracy        = {acc:f}'.format(acc=acc))
                print(' - loss            = {lss:f}'.format(lss=lss))
                print(' - learning rate   = {lr:f}'.format(lr=learning_rate.eval()))
                print(' - use inputs prob = {uip:f}'.format(uip=use_inputs_prob.eval()))
                print()

                # decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and lss > max(previous_losses[-3:]):
                    sess.run(learning_rate_decay_op)
                previous_losses.append(lss)

                # stop when reached a threshold maximum or when no improvement in the last 20 steps
                previous_accuracies.append(acc)
                if acc > 0.9999 or max(previous_accuracies) > max(previous_accuracies[-20:]):
                    break

            sess.run(use_inputs_prob_decay_op)

        save_path = saver.save(sess, ".rnn-model.ckpt")
        print()
        print("Model saved in file: %s" % save_path)
        print()

        # print('Test features')
        # print(test_set['features'])
        # print('Test targets')
        print('Shape of targets:', test_set['targets'].shape)
        # print(test_set['targets'])
        print('Predictions')
        targets_give_features = sess.run(targets_give_features,
                                         feed_dict={features: test_set['features'], targets: test_set['targets']})
        targets_given_features_argmax = np.argmax(targets_give_features, 2)
        print('Shape of predictions:', targets_give_features.shape)
        print('Argmax predictions')
        # print(p_o_i_argmax)
        print()
        for features in range(0, targets_given_features_argmax.shape[0], max(int(targets_given_features_argmax.shape[0]/10), 1)):
            print('History', features)

            for j in range(test_set['features'].shape[1]):
                utterance = []
                for k in range(test_set['features'].shape[2]):
                    w = idx2word_history[test_set['features'][features, j, k]]
                    if w not in ['_SOS_', '_EOS_']:
                        utterance.append(w)
                print('U {j}: {c:80}'.format(j=j, c=' '.join(utterance)))

            prediction = []
            for j in range(targets_given_features_argmax.shape[1]):
                w = idx2word_target[targets_given_features_argmax[features, j]]
                if w not in ['_SOS_', '_EOS_']:
                    prediction.append(w)

            print('P  : {t:80}'.format(t=' '.join(prediction)))

            target = []
            for j in range(test_set['targets'].shape[1]):
                w = idx2word_target[test_set['targets'][features, j]]
                if w not in ['_SOS_', '_EOS_']:
                    target.append(w)

            print('T  : {t:80}'.format(t=' '.join(target)))
            print()
Exemplo n.º 33
0
    def __init__(self, data, FLAGS):
        with tf.variable_scope("history_length"):
            history_length = data.train_set['features'].shape[1]

        encoder_embedding_size = 32 * 4
        encoder_vocabulary_length = len(data.idx2word_history)
        with tf.variable_scope("encoder_sequence_length"):
            encoder_sequence_length = data.train_set['features'].shape[2]

        decoder_lstm_size = 16 * 2
        decoder_embedding_size = 16 * 2
        decoder_vocabulary_length = len(data.idx2word_target)
        with tf.variable_scope("decoder_sequence_length"):
            decoder_sequence_length = data.train_set['targets'].shape[1]

        # inference model
        with tf.name_scope('model'):
            features = tf.placeholder("int32", name='features')
            targets = tf.placeholder("int32", name='true_targets')
            use_dropout_prob = tf.placeholder("float32", name='use_dropout_prob')

            with tf.variable_scope("batch_size"):
                batch_size = tf.shape(features)[0]

            encoder_embedding = embedding(
                    input=features,
                    length=encoder_vocabulary_length,
                    size=encoder_embedding_size,
                    name='encoder_embedding'
            )

            with tf.name_scope("UtterancesEncoder"):
                conv3 = encoder_embedding
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                # encoded_utterances = mp_s3
                encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True)

            with tf.name_scope("HistoryEncoder"):
                conv3 = encoded_utterances
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                encoded_history = tf.reduce_max(conv3, [1, 2])

                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_1'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_2'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=decoder_lstm_size * 2,
                #         name='linear_projection_3'
                # )
                # encoded_history = tf.nn.relu(projection)

            with tf.name_scope("Decoder"):
                use_inputs_prob = tf.placeholder("float32", name='use_inputs_prob')

                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                            num_units=decoder_lstm_size,
                            input_size=decoder_embedding_size+encoder_embedding_size,
                            use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoded_history

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                        cell=cell,
                        inputs=[targets[:, word] for word in range(decoder_sequence_length)],
                        static_input=final_encoder_state,
                        initial_state=initial_state, #final_encoder_state,
                        embedding_size=decoder_embedding_size,
                        embedding_length=decoder_vocabulary_length,
                        sequence_length=decoder_sequence_length,
                        name='RNNDecoder',
                        reuse=False,
                        use_inputs_prob=use_inputs_prob
                )

                targets_given_features = tf.concat(1, decoder_outputs_softmax)
                # print(p_o_i)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length)
            loss = tf.reduce_mean(- one_hot_labels * tf.log(targets_given_features), name='loss')
            for v in tf.trainable_variables():
                for n in ['/W_', '/W:', '/B:']:
                    if n in v.name:
                        print('Regularization using', v.name)
                        loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2))
            tf.scalar_summary('loss', loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_given_features, 2))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
            tf.scalar_summary('accuracy', accuracy)

        self.data = data
        self.train_set = data.train_set
        self.test_set = data.test_set
        self.idx2word_history = data.idx2word_history
        self.word2idx_history = data.word2idx_history
        self.idx2word_target = data.idx2word_target
        self.word2idx_target = data.word2idx_target

        self.history_length = history_length
        self.encoder_sequence_length = encoder_sequence_length
        self.features = features
        self.targets = targets
        self.batch_size = batch_size
        self.use_inputs_prob = use_inputs_prob
        self.targets_given_features = targets_given_features
        self.loss = loss
        self.accuracy = accuracy
Exemplo n.º 34
0
	n_steps = 5
	seq_width = 6

	initializer = tf.random_uniform_initializer(-1,1) 

	#sequence we will provide at runtime  
	seq_input = tf.placeholder(tf.float32, [n_steps, batch_seq_len, seq_width])
	
	#what timestep we want to stop at
	early_stop = tf.placeholder(tf.int32)
	
	#inputs for rnn needs to be a list, each item being a timestep. 
	#we need to split our input into each timestep, and reshape it because split keeps dims by default  
	inputs = [tf.reshape(i, (batch_seq_len, seq_width)) for i in tf.split(0, n_steps, seq_input)]

	cell = LSTMCell(size, seq_width, initializer=initializer)

	initial_state = cell.zero_state(batch_seq_len, tf.float32)
	outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state, sequence_length=early_stop)
	
	#set up lstm
	iop = tf.global_variables_initializer()
	
	#create initialize op, this needs to be run by the session!
	session = tf.Session()
	session.run(iop)
	
	#actually initialize, if you don't do this you get errors about uninitialized stuff
	# 4 X 10 X 5
	feed = {early_stop:5, seq_input:np.random.rand(n_steps, batch_seq_len, seq_width).astype('float32')}
	
Exemplo n.º 35
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 16
        encoder_lstm_size = 16
        encoder_vocabulary_length = len(data.idx2word_history)
        encoder_sequence_length = data.train_set['histories'].shape[2]
        history_length = data.train_set['histories'].shape[1]

        action_templates_vocabulary_length = len(data.idx2word_action_template)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False)
            batch_actions_template = tf.Variable(data.batch_actions_template, name='actions',
                                                 trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions_template = tf.gather(batch_actions_template, self.batch_idx)

        with tf.name_scope('model'):
            with tf.variable_scope("batch_size"):
                batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(
                    input=histories,
                    length=encoder_vocabulary_length,
                    size=encoder_embedding_size,
                    name='encoder_embedding'
            )

            with tf.name_scope("UtterancesEncoder"):
                with tf.name_scope("RNNForwardUtteranceEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=encoder_embedding_size,
                            use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=encoder_embedding_size,
                            use_peepholes=True
                    )
                    initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardUtteranceEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                            use_peepholes=True
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                # the input data has this dimensions
                # [
                #   #batch,
                #   #utterance in a history (a dialogue),
                #   #word in an utterance (a sentence),
                #   embedding dimension
                # ]

                # encode all utterances along the word axis
                encoder_states_2d = []

                for utterance in range(history_length):
                    encoder_outputs, _ = brnn(
                            cell_fw=cell_fw_1,
                            cell_bw=cell_bw_1,
                            inputs=[encoder_embedding[:, utterance, word, :] for word in
                                    range(encoder_sequence_length)],
                            initial_state_fw=initial_state_fw_1,
                            initial_state_bw=initial_state_bw_1,
                            name='RNNUtteranceBidirectionalLayer',
                            reuse=True if utterance > 0 else None
                    )

                    _, encoder_states = rnn(
                            cell=cell_fw_2,
                            inputs=encoder_outputs,
                            initial_state=initial_state_fw_2,
                            name='RNNUtteranceForwardEncoder',
                            reuse=True if utterance > 0 else None
                    )

                    # print(encoder_states[-1])
                    encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1))
                    # print(encoder_states)
                    encoder_states_2d.append(encoder_states)

                encoder_states_2d = tf.concat(1, encoder_states_2d)
                # print('encoder_states_2d', encoder_states_2d)

            with tf.name_scope("HistoryEncoder"):
                # encode all histories along the utterance axis
                with tf.name_scope("RNNForwardHistoryEncoderCell_1"):
                    cell_fw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_2.state_size,
                            use_peepholes=True
                    )
                    initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNBackwardHistoryEncoderCell_1"):
                    cell_bw_1 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_2.state_size,
                            use_peepholes=True
                    )
                    initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32)

                with tf.name_scope("RNNForwardHistoryEncoderCell_2"):
                    cell_fw_2 = LSTMCell(
                            num_units=encoder_lstm_size,
                            input_size=cell_fw_1.output_size + cell_bw_1.output_size,
                            use_peepholes=True
                    )
                    initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32)

                encoder_outputs, _ = brnn(
                        cell_fw=cell_fw_1,
                        cell_bw=cell_bw_1,
                        inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)],
                        initial_state_fw=initial_state_fw_1,
                        initial_state_bw=initial_state_bw_1,
                        name='RNNHistoryBidirectionalLayer',
                        reuse=None
                )

                _, encoder_states = rnn(
                        cell=cell_fw_2,
                        inputs=encoder_outputs,
                        initial_state=initial_state_fw_2,
                        name='RNNHistoryForwardEncoder',
                        reuse=None
                )

            with tf.name_scope("Decoder"):
                linear_size = cell_fw_2.state_size

                # decode all histories along the utterance axis
                activation = tf.nn.relu(encoder_states[-1])
                activation = tf.nn.dropout(activation, self.dropout_keep_prob)

                projection = linear(
                        input=activation,
                        input_size=linear_size,
                        output_size=linear_size,
                        name='linear_projection_1'
                )
                activation = tf.nn.relu(projection)
                activation = tf.nn.dropout(activation, self.dropout_keep_prob)

                projection = linear(
                        input=activation,
                        input_size=linear_size,
                        output_size=linear_size,
                        name='linear_projection_2'
                )
                activation = tf.nn.relu(projection)
                activation = tf.nn.dropout(activation, self.dropout_keep_prob)

                projection = linear(
                        input=activation,
                        input_size=linear_size,
                        output_size=action_templates_vocabulary_length,
                        name='linear_projection_3'
                )
                self.predictions = tf.nn.softmax(projection, name="softmax_output")
                # print(self.predictions)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions_template, action_templates_vocabulary_length)
            self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)), name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 1), tf.argmax(self.predictions, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
            tf.scalar_summary('accuracy', self.accuracy)
Exemplo n.º 36
0
    def __init__(self, data, FLAGS):
        super(Model, self).__init__(data, FLAGS)

        encoder_embedding_size = 32 * 4
        encoder_vocabulary_length = len(data.idx2word_history)

        decoder_lstm_size = 16 * 2
        decoder_embedding_size = 16 * 2
        decoder_sequence_length = data.batch_actions.shape[2]
        decoder_vocabulary_length = len(data.idx2word_action)

        with tf.name_scope('data'):
            batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False)
            batch_actions = tf.Variable(data.batch_actions, name='actions', trainable=False)

            histories = tf.gather(batch_histories, self.batch_idx)
            actions = tf.gather(batch_actions, self.batch_idx)

        with tf.name_scope('model'):
            batch_size = tf.shape(histories)[0]

            encoder_embedding = embedding(
                input=histories,
                length=encoder_vocabulary_length,
                size=encoder_embedding_size,
                name='encoder_embedding'
            )

            with tf.name_scope("UtterancesEncoder"):
                conv3 = encoder_embedding
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[1, 3, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_utt_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                # encoded_utterances = mp_s3
                encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True)

            with tf.name_scope("HistoryEncoder"):
                conv3 = encoded_utterances
                # conv3 = conv2d(
                #         input=conv3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_1'
                # )
                # conv_s3 = conv2d(
                #         input=conv_s3,
                #         filter=[3, 1, encoder_embedding_size, encoder_embedding_size],
                #         name='conv_hist_size_3_layer_2'
                # )
                # print(conv3)
                # k = encoder_sequence_length
                # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1])
                # print(mp_s3)

                encoded_history = tf.reduce_max(conv3, [1, 2])

                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_1'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=encoder_embedding_size,
                #         name='linear_projection_2'
                # )
                # encoded_history = tf.nn.relu(projection)
                # projection = linear(
                #         input=encoded_history,
                #         input_size=encoder_embedding_size,
                #         output_size=decoder_lstm_size * 2,
                #         name='linear_projection_3'
                # )
                # encoded_history = tf.nn.relu(projection)

            with tf.name_scope("Decoder"):
                with tf.name_scope("RNNDecoderCell"):
                    cell = LSTMCell(
                        num_units=decoder_lstm_size,
                        input_size=decoder_embedding_size + encoder_embedding_size,
                        use_peepholes=True,
                    )
                    initial_state = cell.zero_state(batch_size, tf.float32)

                # decode all histories along the utterance axis
                final_encoder_state = encoded_history

                decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                    cell=cell,
                    inputs=[actions[:, word] for word in range(decoder_sequence_length)],
                    static_input=final_encoder_state,
                    initial_state=initial_state,  # final_encoder_state,
                    embedding_size=decoder_embedding_size,
                    embedding_length=decoder_vocabulary_length,
                    sequence_length=decoder_sequence_length,
                    name='RNNDecoder',
                    reuse=False,
                    use_inputs_prob=self.use_inputs_prob
                )

                self.predictions = tf.concat(1, decoder_outputs_softmax)

        if FLAGS.print_variables:
            for v in tf.trainable_variables():
                print(v.name)

        with tf.name_scope('loss'):
            one_hot_labels = dense_to_one_hot(actions, decoder_vocabulary_length)
            self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)),
                                       name='loss')
            tf.scalar_summary('loss', self.loss)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(self.predictions, 2))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
            tf.scalar_summary('accuracy', self.accuracy)
Exemplo n.º 37
0
def train(train_set, test_set, idx2word, word2idx):
    embedding_size = 5
    vocabulary_length = len(idx2word)
    sequence_size = train_set['features'].shape[1]
    lstm_size = 5

    # inference model
    with tf.name_scope('model'):
        i = tf.placeholder("int32", name='input')
        o = tf.placeholder("int32", name='true_output')

        with tf.variable_scope("batch_size"):
            batch_size = tf.shape(i)[0]

        e = embedding(
                input=i,
                length=vocabulary_length,
                size=embedding_size,
                name='embedding'
        )

        with tf.name_scope("RNNCell"):
            cell = LSTMCell(lstm_size, input_size=embedding_size)
            state = cell.zero_state(batch_size, tf.float32)

        outputs, states = rnn(
                cell=cell,
                inputs=[e[:, j, :] for j in range(sequence_size)],
                initial_state=state,
                name='RNN'
        )

        final_state = states[-1]

        l = linear(
                input=final_state,
                input_size=cell.state_size,
                output_size=vocabulary_length,
                name='linear'
        )

        p_o_i = tf.nn.softmax(l, name="softmax_output")

    with tf.name_scope('loss'):
        one_hot_labels = dense_to_one_hot(o, vocabulary_length)
        loss = tf.reduce_mean(-one_hot_labels * tf.log(p_o_i), name='loss')
        tf.scalar_summary('loss', loss)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(one_hot_labels, 1), tf.argmax(p_o_i, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
        tf.scalar_summary('accuracy', accuracy)

    with tf.Session() as sess:
        # Merge all the summaries and write them out to ./log
        merged = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter('./log', sess.graph_def)
        saver = tf.train.Saver()

        # training
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate, name='trainer').minimize(loss)
        tf.initialize_all_variables().run()

        for epoch in range(FLAGS.max_epochs):
            sess.run(train_op, feed_dict={i: train_set['features'], o: train_set['targets']})

            if epoch % max(int(FLAGS.max_epochs / 100), 1) == 0:
                summary, lss, acc = sess.run([merged, loss, accuracy],
                                             feed_dict={i: test_set['features'], o: test_set['targets']})
                writer.add_summary(summary, epoch)
                print()
                print('Epoch: {epoch}'.format(epoch=epoch))
                print(' - accuracy = {acc}'.format(acc=acc))
                print(' - loss     = {lss}'.format(lss=lss))

        save_path = saver.save(sess, "model.ckpt")
        print()
        print("Model saved in file: %s" % save_path)
        print()

        print('Test features')
        print(test_set['features'])
        print('Test targets')
        print(test_set['targets'])
        # print('Predictions')
        p_o_i = sess.run(p_o_i, feed_dict={i: test_set['features'], o: test_set['targets']})
        # print(p_o_i)
        print('Argmax predictions')
        print(np.argmax(p_o_i, 1).reshape((-1, 1)))
Exemplo n.º 38
0
def train(train_set, test_set, idx2word, word2idx):
    encoder_lstm_size = 5
    encoder_embedding_size = 5
    encoder_vocabulary_length = len(idx2word)
    encoder_sequence_length = train_set['features'].shape[1]

    decoder_lstm_size = 5
    decoder_embedding_size = 5
    decoder_vocabulary_length = len(idx2word)
    decoder_sequence_length = train_set['targets'].shape[1]

    # inference model
    with tf.name_scope('model'):
        i = tf.placeholder("int32", name='input')
        o = tf.placeholder("int32", name='true_output')

        with tf.variable_scope("batch_size"):
            batch_size = tf.shape(i)[0]

        encoder_embedding = embedding(
                input=i,
                length=encoder_vocabulary_length,
                size=encoder_embedding_size,
                name='encoder_embedding'
        )

        with tf.name_scope("RNNEncoderCell"):
            cell = LSTMCell(
                    num_units=encoder_lstm_size,
                    input_size=encoder_embedding_size,
                    use_peepholes=False
            )
            initial_state = cell.zero_state(batch_size, tf.float32)

        encoder_outputs, encoder_states = rnn(
                cell=cell,
                inputs=[encoder_embedding[:, j, :] for j in range(encoder_sequence_length)],
                initial_state=initial_state,
                name='RNNForwardEncoder'
        )

        final_encoder_state = encoder_states[-1]

        with tf.name_scope("RNNDecoderCell"):
            cell = LSTMCell(
                    num_units=decoder_lstm_size,
                    input_size=decoder_embedding_size,
                    use_peepholes=False,
            )

        decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder(
                cell=cell,
                initial_state=final_encoder_state,
                embedding_size=decoder_embedding_size,
                embedding_length=decoder_vocabulary_length,
                sequence_length=decoder_sequence_length,
                name='RNNDecoder'
        )

        p_o_i = tf.concat(1, decoder_outputs_softmax)

    with tf.name_scope('loss'):
        one_hot_labels = dense_to_one_hot(o, decoder_vocabulary_length)
        loss = tf.reduce_mean(-one_hot_labels * tf.log(p_o_i), name='loss')
        # loss = tf.constant(0.0, dtype=tf.float32)
        tf.scalar_summary('loss', loss)

    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(p_o_i, 2))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
        # accuracy = tf.constant(0.0, dtype=tf.float32)
        tf.scalar_summary('accuracy', accuracy)

    with tf.Session() as sess:
        # Merge all the summaries and write them out to ./log
        merged = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter('./log', sess.graph_def)
        saver = tf.train.Saver()

        # training
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate, name='trainer').minimize(loss)
        tf.initialize_all_variables().run()

        for epoch in range(FLAGS.max_epochs):
            sess.run(train_op, feed_dict={i: train_set['features'], o: train_set['targets']})

            if epoch % max(int(FLAGS.max_epochs / 100), 1) == 0:
                summary, lss, acc = sess.run([merged, loss, accuracy],
                                             feed_dict={i: test_set['features'], o: test_set['targets']})
                writer.add_summary(summary, epoch)
                print()
                print('Epoch: {epoch}'.format(epoch=epoch))
                print(' - accuracy = {acc}'.format(acc=acc))
                print(' - loss     = {lss}'.format(lss=lss))

        save_path = saver.save(sess, "model.ckpt")
        print()
        print("Model saved in file: %s" % save_path)
        print()

        print('Test features')
        print(test_set['features'])
        print('Test targets')
        print('Shape of targets:', test_set['targets'].shape)
        print(test_set['targets'])
        print('Predictions')
        p_o_i = sess.run(p_o_i, feed_dict={i: test_set['features'], o: test_set['targets']})
        p_o_i_argmax = np.argmax(p_o_i, 2)
        print('Shape of predictions:', p_o_i.shape)
        print('Argmax predictions')
        print(p_o_i_argmax)
        print()
        for i in range(p_o_i_argmax.shape[0]):
            for j in range(p_o_i_argmax.shape[1]):
                w = idx2word[p_o_i_argmax[i, j]]
                if w not in ['_SOS_', '_EOS_']:
                    print(w, end=' ')
            print()