Пример #1
0
 def __init__(self, in_d, out_d, name, activation=tf.nn.tanh, dynamic=True):
     """
     Parameters
     ----------
     in_d : int
         Number of neurons in the layer.
     out_d : int
         Dimensionality of the input vectors, e.t. number of features. Dimensionality:
         [batch_size, seq_length, num_features(this is input_dim in this case)].
     seq_length : int
         Max length of the input sequences.
     activation : tensorflow function
         Activation function of the layer.
     dynamic : boolean
         Influences whether the layer will be working as dynamic RNN or static. The difference
         between static and dynamic is that in case of static TensorFlow builds static graph and the RNN
         will always go through each time step in the sequence. In case of dynamic TensorFlow will be
         creating RNN `in a while loop`, that is to say that using dynamic RNN you can pass sequences of
         variable length, but you have to provide list of sequences' lengthes. Currently API for using
         dynamic RNNs is not provided.
         WARNING! THIS PARAMETER DOESN'T PLAY ANY ROLE IF YOU'RE GONNA STACK RNN LAYERS.
     """
     self._num_cells = in_d
     self._input_dim = in_d
     self._f = activation
     self._cell = LSTMCell(num_units=out_d, activation=activation, dtype=tf.float32)
     self._cell.build(input_shape=[out_d])
     self._dynamic = dynamic
     params = self._cell.variables
     param_common_name = name + f'_{in_d}_{out_d}'
     named_params_dict = {(param_common_name + '_' + str(i)): param for i, param in enumerate(params)}
     super().__init__(
         name=name,
         params=params,
         regularize_params=params,
         named_params_dict=named_params_dict,
         outputs_names=[
             LSTMLayer.OUTPUT_HIDDEN_STATE,
             LSTMLayer.OUTPUT_LAST_CANDIDATE,
             LSTMLayer.OUTPUT_LAST_HIDDEN_STATE
         ]
     )
 def build_cell(self, name=None):
     if self.hparams.cell_type == 'linear':
         cell = BasicRNNCell(self.hparams.hidden_units,
                             activation=tf.identity,
                             name=name)
     elif self.hparams.cell_type == 'tanh':
         cell = BasicRNNCell(self.hparams.hidden_units,
                             activation=tf.tanh,
                             name=name)
     elif self.hparams.cell_type == 'relu':
         cell = BasicRNNCell(self.hparams.hidden_units,
                             activation=tf.nn.relu,
                             name=name)
     elif self.hparams.cell_type == 'gru':
         cell = GRUCell(self.hparams.hidden_units, name=name)
     elif self.hparams.cell_type == 'lstm':
         cell = LSTMCell(self.hparams.hidden_units, name=name)
     else:
         raise ValueError('Provided cell type not supported.')
     return cell
Пример #3
0
    def calc_pred(self):
        # Recurrent network.
        cell = LSTMCell(self._num_hidden)
        cell_drop = DropoutWrapper(cell, output_keep_prob=self.dropout)
        self.network = MultiRNNCell([cell_drop] * self._num_layers)
        max_length = int(self.target.get_shape()[1])
        output, _ = tf.nn.dynamic_rnn(self.network,
                                      self.data,
                                      dtype=tf.float32)
        ## What is the functionality of dynamic_rnn ##

        # Softmax layer.
        num_classes = int(self.target.get_shape()[2])
        self.weight, self.bias = self._weight_and_bias(self._num_hidden,
                                                       num_classes)
        # Flatten to apply same weights to all time steps.
        output = tf.reshape(output, [-1, self._num_hidden])
        predictions = tf.nn.softmax(tf.matmul(output, self.weight) + self.bias)
        predictions = tf.reshape(predictions, [-1, max_length, num_classes])
        return predictions
    def __init__(self, max_len, hidden_units, lr):
        # Placeholders
        self.input_node = tf.placeholder(tf.float32, [None, max_len, 5])
        self.labels = tf.placeholder(tf.float32, [None])

        weights = tf.Variable(tf.random_normal([hidden_units, 1]))
        biases = tf.Variable(tf.random_normal([1]))

        cell = LSTMCell(hidden_units)
        outputs, states = tf.nn.dynamic_rnn(cell,
                                            self.input_node,
                                            dtype=tf.float32,
                                            time_major=False)
        outputs_T = tf.transpose(outputs, [1, 0, 2])
        last = tf.gather(outputs_T, int(outputs_T.get_shape()[0]) - 1)
        raw_logits = tf.matmul(last, weights) + biases
        self.logits = tf.squeeze(tf.nn.sigmoid(raw_logits))
        self.loss = tf.reduce_mean(tf.square(self.labels - self.logits))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(
            self.loss)
Пример #5
0
 def make_RNN_cell(self, Nneurons, fn=tf.nn.relu):
     """
     Returns a new cell (for deep recurrent networks), with Nneurons,
     and activation function fn.
     """
     #Make cell type
     if self.config.cell_type == 'RNN':
         cell = BasicRNNCell(num_units=Nneurons, activation=fn)
     elif self.config.cell_type == 'LSTM':
         cell = LSTMCell(num_units=Nneurons, activation=fn)
     elif self.config.cell_type == 'GRU':
         cell = GRUCell(num_units=Nneurons, activation=fn)
     #include dropout
     #when training, keep_prob is set by config, and is 1 in eval/predict
     cell = DropoutWrapper(cell,
                           input_keep_prob=self.keep_prob,
                           variational_recurrent=True,
                           input_size=Nneurons,
                           dtype=tf.float32)
     return cell
Пример #6
0
    def build_rnn(self, rnn_type, hidden_size, num_layes):
        cells = []
        for i in range(num_layes):
            if rnn_type == 'lstm':
                cell = LSTMCell(num_units=hidden_size,
                                state_is_tuple=True,
                                initializer=tf.random_uniform_initializer(
                                    -0.25, 0.25))
            elif rnn_type == 'gru':
                cell = GRUCell(num_units=hidden_size)
            elif rnn_type:
                cell = BasicRNNCell(num_units=hidden_size)
            else:
                raise NotImplementedError(
                    f'the rnn type is unexist: {rnn_type}')
            cells.append(cell)

        cells = MultiRNNCell(cells, state_is_tuple=True)

        return cells
Пример #7
0
def _new_RNN_cell(memory_dim, num_layers, cell_type, dropout, keep_prob):

    assert memory_dim is not None and num_layers is not None and cell_type is not None and dropout is not None, 'At least one of the arguments is passed as None'

    if cell_type == 'LSTM':
        constituent_cell = LSTMCell(memory_dim)
    elif cell_type == 'GRU':
        constituent_cell = GRUCell(memory_dim)
    else:
        raise Exception('unsupported rnn cell type: %s' % cell_type)

    if dropout != 0:
        constituent_cell = DropoutWrapper(constituent_cell,
                                          input_keep_prob=keep_prob,
                                          output_keep_prob=keep_prob)

    if num_layers > 1:
        return MultiRNNCell([constituent_cell for _ in range(num_layers)])

    return constituent_cell
Пример #8
0
def CreateMultiRNNCell(cell_name, num_units, num_layers=1, output_keep_prob=1.0, reuse=False):
	#tf.contrib.training.bucket_by_sequence_length
	cells = []
	for i in range(num_layers):
		if cell_name == "GRUCell":
			single_cell = GRUCell(num_units=num_units, reuse=reuse)
		elif cell_name == "LSTMCell":
			single_cell = LSTMCell(num_units=num_units, reuse=reuse)
		else:
			graphlg.info("Unknown Cell type !")
			exit(0)
		if output_keep_prob < 1.0:
			single_cell = tf.contrib.rnn.DropoutWrapper(single_cell, output_keep_prob=output_keep_prob) 
			graphlg.info("Layer %d, Dropout used: output_keep_prob %f" % (i, output_keep_prob))

		#single_cell = DeviceWrapper(ResidualWrapper(single_cell), device='/gpu:%d' % i)
		#single_cell = DeviceWrapper(single_cell, device='/gpu:%d' % i)

		cells.append(single_cell)
	return MultiRNNCell(cells) 
Пример #9
0
    def _build_lstm(self):
        """Apply an LSTM for modeling.

        Returns:
            obj: The output of LSTM section.
        """
        with tf.name_scope("lstm"):
            self.mask = self.iterator.mask
            self.sequence_length = tf.reduce_sum(self.mask, 1)
            self.history_embedding = tf.concat(
                [self.item_history_embedding, self.cate_history_embedding], 2)
            rnn_outputs, final_state = dynamic_rnn(
                LSTMCell(self.hidden_size),
                inputs=self.history_embedding,
                sequence_length=self.sequence_length,
                dtype=tf.float32,
                scope="lstm",
            )
            tf.summary.histogram("LSTM_outputs", rnn_outputs)
            return final_state[1]
Пример #10
0
    def decoder(self, max_twee_len):
        scope = 'Decoder'
        with self.graph.as_default():
            with tf.name_scope(scope):
                decoder_cell = LSTMCell(self.decoder_hidden_nodes)

                encoder_max_time, self.batch_size = tf.unstack(
                    tf.shape(self.encoder_input))

                # self.decoder_length = self.en_in_len + 3
                self.decoder_length = max_twee_len + 3

                # -- Simple RNN --
                # decoder_output, decoder_final_state = tf.nn.dynamic_rnn(decoder_cell,
                #                                                         decoder_input_embed,
                #                                                         'tf.float32',
                #                                                         initial_state = self.encoder_final_state,
                #                                                         time_major = True,
                #                                                         scope="plain_decoder")
                # decoder_logits = tf.contrib.layers.linear(decoder_output, self.vocab_size)
                # decoder_prediction = tf.argmax(decoder_logits, 2)
                assert self.EOS == 1 and self.PAD == 0
                # -- Complex mechanism for decoder: with previous generated tokens, or with attention
                # import pdb; pdb.set_trace()
                decoder_output_ta, decoder_final_state, _ = \
                    tf.nn.raw_rnn(decoder_cell, self.loop_fn)

                decoder_output = decoder_output_ta.stack()

                decoder_max_step, decoder_batch_size, decoder_dim = tf.unstack(
                    tf.shape(decoder_output))
                decoder_output_flat = tf.reshape(decoder_output,
                                                 (-1, decoder_dim))
                decoder_logits_flat = tf.add(
                    tf.matmul(decoder_output_flat, self.W), self.b)
                decoder_logits = tf.reshape(
                    decoder_logits_flat,
                    (decoder_max_step, decoder_batch_size, self.vocab_size))
                decoder_prediction = tf.argmax(decoder_logits, 2)

        return self.de_out, decoder_logits, decoder_prediction
    def build_critic(self):

        # Embed input sequence (for critic)
        W_embed_c = tf.Variable(tf.truncated_normal([1,self.input_new_dimension,self.input_embed_c]), name="critic_W_embed")
        with tf.variable_scope("Critic"):
            if self.step>0:
                tf.get_variable_scope().reuse_variables()
            embeded_input_c = tf.nn.conv1d(self.input_description, W_embed_c, 1, "VALID", name="Critic_EncoderInput")

            # ENCODER LSTM cell
            cell_c = LSTMCell(self.num_neurons_c,initializer=self.initializer)   # cell = DropoutWrapper(cell, output_keep_prob=dropout) or MultiRNNCell([cell] * num_layers)

            # RNN-ENCODER returns the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state as tensors.
            encoder_output_c, encoder_state_c = tf.nn.dynamic_rnn(cell_c, embeded_input_c, dtype=tf.float32)
            encoder_output_c = tf.transpose(encoder_output_c, [1, 0, 2]) # transpose time axis first [time steps x Batch size x num_neurons]
            last_c = tf.gather(encoder_output_c, int(encoder_output_c.get_shape()[0]) - 1) # select last frame [Batch size x num_neurons]

        ### DO A CONVOLUTION HERE INSTEAD OF A FFN !!! ###
        weight_c = tf.Variable(tf.truncated_normal([self.num_neurons_c, 1], stddev=0.1))
        bias_c = tf.Variable(tf.constant(self.init_bias_c, shape=[1]))
        self.prediction_c = tf.matmul(last_c, weight_c) + bias_c
Пример #12
0
def RNN(_X, _weights, _biases, lens):
    if FLAGS.unit == 'PLSTM':
        cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True)
    elif FLAGS.unit == 'GRU':
        cell = GRUCell(FLAGS.n_hidden)
    elif FLAGS.unit == 'LSTM':
        cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True)
    else:
        raise ValueError('Unit {} not implemented.'.format(FLAGS.unit))

    outputs, states = tf.nn.dynamic_rnn(cell, _X, dtype=tf.float32, sequence_length=lens)

    # TODO better (?) in lack of smart indexing
    batch_size = tf.shape(outputs)[0]
    max_len = tf.shape(outputs)[1]
    out_size = int(outputs.get_shape()[2])
    index = tf.range(0, batch_size) * max_len + (lens - 1)
    flat = tf.reshape(outputs, [-1, out_size])
    relevant = tf.gather(flat, index)

    return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
Пример #13
0
def CreateMultiRNNCell(cell_name,
                       num_units,
                       num_layers=1,
                       output_keep_prob=1.0,
                       reuse=False):
    cells = []
    for i in range(num_layers):
        if cell_name == "GRUCell":
            single_cell = GRUCell(num_units=num_units, reuse=reuse)
        elif cell_name == "LSTMCell":
            single_cell = LSTMCell(num_units=num_units, reuse=reuse)
        else:
            graphlg.info("Unknown Cell type !")
            exit(0)
        if output_keep_prob < 1.0:
            single_cell = tf.contrib.rnn.DropoutWrapper(
                single_cell, output_keep_prob=output_keep_prob)
            graphlg.info("Layer %d, Dropout used: output_keep_prob %f" %
                         (i, output_keep_prob))
        cells.append(single_cell)
    return MultiRNNCell(cells)
Пример #14
0
def get_rnn_cell_list(config, name, reuse=False, seed=123, dtype=tf.float32):
    cell_list = []
    for i, units in enumerate(config['num_units']):
        cell = None
        if config['cell_type'] == 'clstm':
            cell = CustomLSTMCell(units, layer_norm=config['layer_norm'], activation=config['activation'], seed=seed,
                                  reuse=reuse, dtype=dtype, name='{}_{}'.format(name, i))
        elif config['cell_type'] == 'tflstm':

            act = get_activation(config['activation'])

            if config['layer_norm']:
                cell = LayerNormBasicLSTMCell(num_units=units, activation=act, layer_norm=config['layer_norm'],
                                              reuse=reuse)
            elif config['layer_norm'] == False and config['activation'] != 'tanh':
                cell = LSTMCell(num_units=units, activation=act, reuse=reuse)
            else:
                cell = LSTMBlockCell(num_units=units)
        cell_list.append(cell)

    return cell_list
Пример #15
0
    def _encode(self, input, seq_actual_len, reuse=None):
        with tf.variable_scope('encoding', reuse=reuse):
            # import numpy as np
            # init = tf.constant_initializer(np.ones([self.n_embed+self.n_hidden, self.n_hidden*4]))
            # init_bias = tf.constant_initializer(np.ones([self.n_hidden*4]))
            # lstm_cell = CustomLSTMCell(self.n_hidden, initializer=init, bias_initializer=init_bias)
            lstm_cell = LSTMCell(self.n_hidden)
            lstm_drop_cell = lambda: DropoutWrapper(
                lstm_cell, output_keep_prob=self.dropout_keep_prob)

            lstm_drop_f_cell = lstm_drop_cell()
            lstm_drop_b_cell = lstm_drop_cell()

            bilstm_outputs = tf.nn.bidirectional_dynamic_rnn(
                lstm_drop_f_cell,
                lstm_drop_b_cell,
                input,
                sequence_length=seq_actual_len,
                dtype=tf.float32)
            outputs, final_state = bilstm_outputs
        return tf.concat(outputs, axis=2)
Пример #16
0
 def biLSTMBlock(self,
                 inputs,
                 num_units,
                 scope,
                 rnn_type,
                 dropout_keep_prob,
                 seq_len=None,
                 isReuse=None):
     with tf.variable_scope(scope, reuse=isReuse):
         if rnn_type == 'LSTM':
             lstmCell = LSTMCell(num_units=num_units)
         elif rnn_type == 'GRU':
             lstmCell = GRUCell(num_units=num_units)
         dropLSTMCell = lambda: DropoutWrapper(
             lstmCell, output_keep_prob=dropout_keep_prob)
         fwLSTMCell, bwLSTMCell = dropLSTMCell(), dropLSTMCell()
         output = tf.nn.bidirectional_dynamic_rnn(cell_fw=fwLSTMCell,
                                                  cell_bw=bwLSTMCell,
                                                  inputs=inputs,
                                                  sequence_length=seq_len,
                                                  dtype=tf.float32)
         return output
    def build_permutation(self):

        with tf.variable_scope("encoder"):

            with tf.variable_scope("embedding"):
                # Embed input sequence
                W_embed = tf.get_variable(
                    "weights", [1, self.input_dimension + 2, self.input_embed],
                    initializer=self.initializer)  # +2 for TW feat. here too
                embedded_input = tf.nn.conv1d(self.input_,
                                              W_embed,
                                              1,
                                              "VALID",
                                              name="embedded_input")
                # Batch Normalization
                embedded_input = tf.layers.batch_normalization(
                    embedded_input,
                    axis=2,
                    training=self.is_training,
                    name='layer_norm',
                    reuse=None)

            with tf.variable_scope("dynamic_rnn"):
                # Encode input sequence
                cell1 = LSTMCell(
                    self.num_neurons, initializer=self.initializer
                )  # BNLSTMCell(self.num_neurons, self.training) or cell1 = DropoutWrapper(cell1, output_keep_prob=0.9)
                # Return the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state as tensors.
                encoder_output, encoder_state = tf.nn.dynamic_rnn(
                    cell1, embedded_input, dtype=tf.float32)

        with tf.variable_scope('decoder'):
            # Ptr-net returns permutations (self.positions), with their log-probability for backprop
            self.ptr = Pointer_decoder(encoder_output, self.config)
            self.positions, self.log_softmax, self.attending, self.pointing = self.ptr.loop_decode(
                encoder_state)
            variable_summaries('log_softmax',
                               self.log_softmax,
                               with_max_min=True)
Пример #18
0
    def build_balancing_representation(self):
        """Process the inputs to the model (history of covariates and previous treatments ) using RNN with LSTM cell to
    build the balancing representation.

    Returns:
      - balancing_representation: balancing representation for each timestep in the sequence.
    """

        self.rnn_input = tf.concat(
            [self.current_covariates, self.previous_treatments], axis=-1)
        self.sequence_length = tf.cast(
            tf.reduce_sum(tf.reduce_max(self.active_entries, axis=2), axis=1),
            tf.int32)

        rnn_cell = DropoutWrapper(LSTMCell(self.rnn_hidden_units,
                                           state_is_tuple=False),
                                  output_keep_prob=self.rnn_keep_prob,
                                  state_keep_prob=self.rnn_keep_prob,
                                  variational_recurrent=True,
                                  dtype=tf.float32)

        decoder_init_state = None
        if (self.b_train_decoder):
            decoder_init_state = tf.concat([self.init_state, self.init_state],
                                           axis=-1)

        rnn_output, _ = rnn.dynamic_rnn(rnn_cell,
                                        self.rnn_input,
                                        initial_state=decoder_init_state,
                                        dtype=tf.float32,
                                        sequence_length=self.sequence_length)

        # Flatten to apply same weights to all time steps.
        rnn_output = tf.reshape(rnn_output, [-1, self.rnn_hidden_units])
        balancing_representation = tf.layers.dense(rnn_output,
                                                   self.br_size,
                                                   activation=tf.nn.elu)

        return balancing_representation
Пример #19
0
    def create_critic_network(self, Scope):
        inputs = tf.placeholder(shape=[1, None], dtype=tf.int32, name="inputs")
        lenth = tf.placeholder(shape=[1], dtype=tf.int32, name="lenth")
        # length = tf.placeholder(shape=[1], dtype=tf.int32, name="length")
        #Lower network
        if Scope[-1] == 'e':  # Active
            vec = tf.nn.embedding_lookup(self.wordvector, inputs)
        else:
            vec = tf.nn.embedding_lookup(self.target_wordvector, inputs)
        cell = LSTMCell(self.dim, initializer=self.init, state_is_tuple=False)

        with tf.variable_scope("Lower", reuse=True):
            out, _ = tf.nn.dynamic_rnn(cell,
                                       vec,
                                       lenth,
                                       dtype=tf.float32,
                                       scope=Scope)
        # out = tf.gather(out[0], lenth-1)
        # print("out1:",out.shape)
        # out = tflearn.dropout(out, self.keep_prob)
        # print("out2:", out.shape)
        out = tf.squeeze(out, [0])
        # print("out3:", out.shape)
        out = tflearn.fully_connected(out,
                                      self.grained,
                                      scope=Scope + "/pred",
                                      name="get_pred")
        # print("out shape1:", out.shape)
        # print("lenth shape:",lenth.shape)
        # length=lenth.detach()
        # print("lenth :", lenth)
        # length =tf.Session().run(lenth)
        # print("length:",length)
        # out = out[:length]
        # print("out :", out )
        """added by huiyanfei"""
        # log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(out, labels, lenth)
        return inputs, lenth, out
Пример #20
0
    def _rnn_encoder(self, sequence):
        with tf.variable_scope("sequence_encoder"):
            in_cell = MultiRNNCell(
                [
                    tf.contrib.rnn.DropoutWrapper(
                        LSTMCell(self.input_size, state_is_tuple=True),
                        output_keep_prob=self.keep_prob,
                        state_keep_prob=self.keep_prob
                    ) for _ in range(self.num_layers)
                ],
                state_is_tuple=True
            )

            state = tf.random_normal((self.batch_size, self.input_size))
            initial_state = (LSTMStateTuple(state, state),) * self.num_layers
            #initial_state = in_cell.zero_state(self.batch_size, tf.float32)

            # using length we select the last output per sequence which
            # represents the sequence encoding
            self.length = tf.placeholder(tf.int32, shape=(self.batch_size,), name="lengths")
            self.enc_outs, self.enc_state = tf.nn.dynamic_rnn(
                in_cell,
                inputs=sequence,
                initial_state=initial_state,
                sequence_length=self.length,
                dtype=tf.float32
            )
            length = tf.squeeze(self.length)

            last_c = tf.gather_nd(
                self.enc_outs,
                tf.stack([tf.range(self.batch_size), length - 1], axis=1)
            )
            hidden_states = []
            for tup in self.enc_state:
                last_h = tf.convert_to_tensor(tup.h)
                hidden_states.append(last_h)
            return hidden_states + [last_c]
Пример #21
0
def RNN(X, weights, biases):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    # Permuting batch_size and n_steps
    X = tf.transpose(batchX_placeholder, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    X = tf.reshape(X, [-1, truncated_backprop_length])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    X = tf.split(X, truncated_num, 0)

    cell = MultiRNNCell([
        DropoutWrapper(LSTMCell(state_size), output_keep_prob=dropout)
        for _ in range(num_layers)
    ])

    # Forward passes
    outputs, current_state = tf.contrib.rnn.static_rnn(cell,
                                                       X,
                                                       dtype=tf.float32)
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1],
                     weights['out']) + biases['out']  #, outputs, current_state
Пример #22
0
    def create_critic_network(self, Scope):
        inputs = tf.placeholder(shape=[1, self.max_lenth],
                                dtype=tf.int32,
                                name="inputs")
        # length表明有多少个真实的单词(因为有长度较短的句子被填充了)
        lenth = tf.placeholder(shape=[1], dtype=tf.int32, name="lenth")

        #Lower network
        if Scope[-1] == 'e':
            vec = tf.nn.embedding_lookup(self.wordvector, inputs)
        else:
            vec = tf.nn.embedding_lookup(self.target_wordvector, inputs)
        cell = LSTMCell(self.dim, initializer=self.init, state_is_tuple=False)

        # 将整个句子输入到dynamic_rnn中,因此输出out是整个句子的表示
        with tf.variable_scope("Lower", reuse=True):
            #out:[1,70,300]
            out, _ = tf.nn.dynamic_rnn(cell,
                                       vec,
                                       lenth,
                                       dtype=tf.float32,
                                       scope=Scope)
        # out:[1,300]
        #tf.gather()用来取出tensor中指定索引位置的元素
        #out = tf.gather(out[0], lenth-1)

        #使用最后一个位置的hidden作为句子的表达
        out = tf.transpose(out, [1, 0, 2])
        out = out[-1]

        out = tflearn.dropout(out, self.keep_prob)
        out = tflearn.fully_connected(out,
                                      self.grained,
                                      scope=Scope + "/pred",
                                      name="get_pred")
        return inputs, lenth, out
Пример #23
0
def _bilstm_block(inputs,
                  num_units,
                  scope,
                  dropout_keep_prob=1.0,
                  seq_len=None,
                  reuse=False):
    """
       :param inputs: tensor with shape (batch_size, hidden_size)
       :param scope: scope name
       :output:  tuple (outputs, output_states) where outputs is a tuple (output_fw, output_bw)
    """
    #print(seq_len)
    with tf.variable_scope(scope, reuse=reuse):
        # reuse lstm cell for fw and bw cell
        lstm_cell = LSTMCell(num_units=num_units)
        lstmDrop = lambda: DropoutWrapper(lstm_cell,
                                          output_keep_prob=dropout_keep_prob)
        lstm_cell_fw, lstm_cell_bw = lstmDrop(), lstmDrop()
        output = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_cell_fw,
                                                 cell_bw=lstm_cell_bw,
                                                 inputs=inputs,
                                                 sequence_length=seq_len,
                                                 dtype=tf.float32)
        return output
Пример #24
0
    def __init__(self, input_dim, lstm_size, max_length, num_classes=3, learning_rate=0.001, num_layers=1, bidirectionoal=False):

        with tf.variable_scope('placeholders'):
            self.inputs = tf.placeholder(tf.float32, [None, max_length, input_dim], 'inputs')
            self.targets = tf.placeholder(tf.int32, [None], 'targets')
            self.seq_lens = tf.placeholder(tf.int32, [None], 'seq_lens')
        
        with tf.variable_scope('lstm'):
            if not bidirectionoal:
                if num_layers > 1:
                    cell = tf.contrib.rnn.MultiRNNCell([LSTMCell(lstm_size) for _ in range(num_layers)]) 
                else:
                    cell = LSTMCell(lstm_size)
                # [batch_size, max_length, 513]
                outputs, _ = tf.nn.dynamic_rnn(cell, self.inputs, sequence_length=self.seq_lens, dtype=tf.float32)
                # [batch_size, 513]
                lasts = last_relevant(outputs, self.seq_lens)
            else:
                if num_layers > 1:
                    fw_cell = tf.contrib.rnn.MultiRNNCell([LSTMCell(lstm_size) for _ in range(num_layers)]) 
                    bw_cell = tf.contrib.rnn.MultiRNNCell([LSTMCell(lstm_size) for _ in range(num_layers)]) 
                else:
                    fw_cell = LSTMCell(lstm_size)
                    bw_cell = LSTMCell(lstm_size)
                # [batch_size, max_length, 513]
                outputs, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, self.inputs, sequence_length=self.seq_lens, dtype=tf.float32)
                # [batch_size, max_length, 1026]
                cat_outputs = tf.concat(outputs, axis=2)
                # [batch_size, 1026] 
                lasts = last_relevant(cat_outputs, self.seq_lens)

        with tf.variable_scope('dense'):
            self.logits = tf.layers.dense(lasts, num_classes)  # B x 3
        
        with tf.variable_scope('loss'):
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.targets, logits=self.logits)
            self.loss = tf.reduce_mean(losses)
        
        with tf.variable_scope('optimizer'):
            self.train_op = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
Пример #25
0
def train():
    timestamp = str(int(time.time()))
    output_path = os.path.join('.', "model_save", timestamp)
    if not os.path.exists(output_path): os.makedirs(output_path)
    summary_path = os.path.join(output_path, "summaries")
    model_path = os.path.join(output_path, "checkpoints/")
    if not os.path.exists(model_path): os.makedirs(model_path)
    result_path = os.path.join(output_path, "results")
    if not os.path.exists(result_path): os.makedirs(result_path)
    log_path = os.path.join(result_path, "log.txt")
    logger = get_logger(log_path)
    graph = tf.Graph()
    with graph.as_default():
        word_ids = tf.placeholder(tf.int32,
                                  shape=[None, None],
                                  name="word_ids")
        labels = tf.placeholder(tf.int32, shape=[None, None], name="labels")
        sequence_lengths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name="sequence_lengths")
        dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout")
        lr_pl = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        with tf.variable_scope("words"):
            _word_embeddings = tf.Variable(embeddings,
                                           dtype=tf.float32,
                                           trainable=True,
                                           name="_word_embeddings")
            word_embeddings = tf.nn.embedding_lookup(params=_word_embeddings,
                                                     ids=word_ids,
                                                     name="word_embeddings")
        word_embeddings = tf.nn.dropout(word_embeddings, dropout_pl)

        cell_fw = LSTMCell(hidden_dim)
        cell_bw = LSTMCell(hidden_dim)
        (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=word_embeddings,
            sequence_length=sequence_lengths,
            dtype=tf.float32)
        output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
        output = tf.nn.dropout(output, dropout_pl)

        W = tf.get_variable(name="W",
                            shape=[2 * hidden_dim, label_num],
                            initializer=tf.contrib.layers.xavier_initializer(),
                            dtype=tf.float32)
        b = tf.get_variable(name="b",
                            shape=[label_num],
                            initializer=tf.zeros_initializer(),
                            dtype=tf.float32)
        s = tf.shape(output)
        output = tf.reshape(output, [-1, 2 * hidden_dim])
        pred = tf.matmul(output, W) + b
        logits = tf.reshape(pred, [-1, s[1], label_num])

        labels_softmax_ = tf.argmax(logits, axis=-1)
        labels_softmax_ = tf.cast(labels_softmax_, tf.int32)

        log_likelihood, transition_params = crf_log_likelihood(
            inputs=logits,
            tag_indices=labels,
            sequence_lengths=sequence_lengths)
        loss = -tf.reduce_mean(log_likelihood)
        tf.summary.scalar("loss", loss)

        with tf.variable_scope("train_step"):
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optim = tf.train.AdamOptimizer(learning_rate=lr_pl)
            grads_and_vars = optim.compute_gradients(loss)
            grads_and_vars_clip = [[
                tf.clip_by_value(g, -clip_grad, clip_grad), v
            ] for g, v in grads_and_vars]
            train_op = optim.apply_gradients(grads_and_vars_clip,
                                             global_step=global_step)

        init_op = tf.global_variables_initializer()

        saver = tf.train.Saver(tf.global_variables())
    with tf.Session(graph=graph) as sess:
        sess.run(init_op)
        merged = tf.summary.merge_all()
        file_writer = tf.summary.FileWriter(summary_path, sess.graph)
        for epoch in range(epoch_num):
            num_batches = (len(train_data) + batch_size - 1) // batch_size

            start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            batches = data_helper.batch_yield(train_data, batch_size,
                                              vocab_index_dict,
                                              data_helper.tag2label)
            for step, (seqs, labs) in enumerate(batches):
                sys.stdout.write(' processing: {} batch / {} batches.'.format(
                    step + 1, num_batches) + '\r')
                step_num = epoch * num_batches + step + 1
                w_ids, seq_len_list = data_helper.pad_sequences(seqs,
                                                                pad_mark=0)
                labels_, _ = data_helper.pad_sequences(labs, pad_mark=0)
                feed_dict = {
                    word_ids: w_ids,
                    sequence_lengths: seq_len_list,
                    labels: labels_,
                    lr_pl: lr,
                    dropout_pl: dropout_keep_prob
                }
                _, loss_train, summary, step_num_ = sess.run(
                    [train_op, loss, merged, global_step], feed_dict=feed_dict)
                if step + 1 == 1 or (step +
                                     1) % 300 == 0 or step + 1 == num_batches:
                    logger.info(
                        '{} epoch {}, step {}, loss: {:.4}, global_step: {}'.
                        format(start_time, epoch + 1, step + 1, loss_train,
                               step_num))
                file_writer.add_summary(summary, step_num)
                if step + 1 == num_batches:
                    saver.save(sess, model_path, global_step=step_num)
Пример #26
0
def predict():
    graph = tf.Graph()
    with graph.as_default():
        word_ids = tf.placeholder(tf.int32,
                                  shape=[None, None],
                                  name="word_ids")
        labels = tf.placeholder(tf.int32, shape=[None, None], name="labels")
        sequence_lengths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name="sequence_lengths")
        dropout_pl = tf.placeholder(dtype=tf.float32, shape=[], name="dropout")
        lr_pl = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        with tf.variable_scope("words"):
            _word_embeddings = tf.Variable(embeddings,
                                           dtype=tf.float32,
                                           trainable=True,
                                           name="_word_embeddings")
            word_embeddings = tf.nn.embedding_lookup(params=_word_embeddings,
                                                     ids=word_ids,
                                                     name="word_embeddings")
        word_embeddings = tf.nn.dropout(word_embeddings, dropout_pl)

        cell_fw = LSTMCell(hidden_dim)
        cell_bw = LSTMCell(hidden_dim)
        (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=word_embeddings,
            sequence_length=sequence_lengths,
            dtype=tf.float32)
        output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
        output = tf.nn.dropout(output, dropout_pl)

        W = tf.get_variable(name="W",
                            shape=[2 * hidden_dim, label_num],
                            initializer=tf.contrib.layers.xavier_initializer(),
                            dtype=tf.float32)
        b = tf.get_variable(name="b",
                            shape=[label_num],
                            initializer=tf.zeros_initializer(),
                            dtype=tf.float32)
        s = tf.shape(output)
        output = tf.reshape(output, [-1, 2 * hidden_dim])
        pred = tf.matmul(output, W) + b
        logits = tf.reshape(pred, [-1, s[1], label_num])

        labels_softmax_ = tf.argmax(logits, axis=-1)
        labels_softmax_ = tf.cast(labels_softmax_, tf.int32)

        log_likelihood, transition_params = crf_log_likelihood(
            inputs=logits,
            tag_indices=labels,
            sequence_lengths=sequence_lengths)
        loss = -tf.reduce_mean(log_likelihood)
        tf.summary.scalar("loss", loss)

        with tf.variable_scope("train_step"):
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optim = tf.train.AdamOptimizer(learning_rate=lr_pl)
            grads_and_vars = optim.compute_gradients(loss)
            grads_and_vars_clip = [[
                tf.clip_by_value(g, -clip_grad, clip_grad), v
            ] for g, v in grads_and_vars]
            train_op = optim.apply_gradients(grads_and_vars_clip,
                                             global_step=global_step)

        init_op = tf.global_variables_initializer()

        saver = tf.train.Saver(tf.global_variables())
    with tf.Session(graph=graph) as sess:
        module_file = tf.train.latest_checkpoint(restore_model_path)
        saver.restore(sess, module_file)
        sent = '我是中国人'
        # sent = '深圳市宝安区'
        sent = list(sent.strip())
        sent_data = [(sent, ['U'] * len(sent))]

        label_list = []
        for seqs, labels in data_helper.batch_yield(sent_data,
                                                    batch_size,
                                                    vocab_index_dict,
                                                    data_helper.tag2label,
                                                    shuffle=False):
            word_ids_, seq_len_list = data_helper.pad_sequences(seqs,
                                                                pad_mark=0)
            feed_dict = {
                word_ids: word_ids_,
                sequence_lengths: seq_len_list,
                dropout_pl: 1
            }
            logits, transition_params = sess.run([logits, transition_params],
                                                 feed_dict=feed_dict)
            for logit, seq_len in zip(logits, seq_len_list):
                viterbi_seq, _ = viterbi_decode(logit[:seq_len],
                                                transition_params)
                label_list.append(viterbi_seq)
        print(label_list)
Пример #27
0
                    tf.float32, initializer=xavier_initializer())
output_bias = tf.get_variable("bias", [conf.vocab_size], 
                    tf.float32, initializer=xavier_initializer())
projection_matrix = tf.get_variable("projection", [conf.num_hidden_state, conf.proj_hidden_state], 
                    tf.float32, initializer=xavier_initializer())

# embedding lookup
word_embeddings = tf.nn.embedding_lookup(embedding_matrix, data) # shape: (64, 29, 1, 100)
word_embeddings = tf.reshape(word_embeddings, [conf.batch_size, conf.seq_length -1, conf.embed_size]) #shape: (64, 29, 100)
assert word_embeddings.shape == (conf.batch_size, conf.seq_length - 1, conf.embed_size)

# RNN unrolling
print("creating RNN")
lstm_outputs = []
with tf.variable_scope("rnn") as scope:
    cell = LSTMCell(conf.num_hidden_state)
    state = cell.zero_state(conf.batch_size, tf.float32)
    for i in range(conf.seq_length - 1):
        if i > 0:
            scope.reuse_variables()
        lstm_output, state = cell(word_embeddings[:, i, :], state)
        lstm_outputs.append(lstm_output)

# stack the outputs together, reshape, project, multiply
lstm_outputs = tf.stack(lstm_outputs, axis = 1)
lstm_outputs = tf.reshape(lstm_outputs, [conf.batch_size * (conf.seq_length - 1), conf.num_hidden_state])
assert lstm_outputs.shape == (conf.batch_size * (conf.seq_length - 1), conf.num_hidden_state)
lstm_outputs = tf.matmul(lstm_outputs, projection_matrix)
predictions = tf.matmul(lstm_outputs, output_matrix) + output_bias

# reshape the labels
Пример #28
0
    def __init__(self, config_dict):
        config_dict = deepcopy(config_dict)
        mode = config_dict.pop("mode")
        super(SiameseBiLSTM, self).__init__(mode=mode)

        self.word_vocab_size = config_dict.pop("word_vocab_size")
        self.word_embedding_dim = config_dict.pop("word_embedding_dim")
        self.word_embedding_matrix = config_dict.pop("word_embedding_matrix",
                                                     None)
        self.fine_tune_embeddings = config_dict.pop("fine_tune_embeddings")
        self.rnn_hidden_size = config_dict.pop("rnn_hidden_size")
        self.share_encoder_weights = config_dict.pop("share_encoder_weights")
        self.rnn_output_mode = config_dict.pop("rnn_output_mode")
        self.output_keep_prob = config_dict.pop("output_keep_prob")

        self.num_sentence_words = config_dict.pop("num_sentence_words")
        self.att_dim = self.rnn_hidden_size  #config_dict.pop("att_dim")

        trainable = self.mode == 'train'

        self.multi_ATT1 = tf.get_variable(name='w1',
                                          shape=(2 * self.rnn_hidden_size,
                                                 self.att_dim),
                                          trainable=trainable)
        self.multi_ATT2 = tf.get_variable(name='w2',
                                          shape=(self.att_dim,
                                                 self.num_sentence_words),
                                          trainable=trainable)

        self.ATT1 = tf.get_variable(name='w3',
                                    shape=(2 * self.rnn_hidden_size,
                                           self.att_dim),
                                    trainable=trainable)
        self.ATT2 = tf.get_variable(name='w4',
                                    shape=(self.att_dim, 1),
                                    trainable=trainable)

        self.use_contrastive = config_dict.pop("contrastive_loss")
        self.margin = 1.25  #margin for contrastive loss

        if self.mode == "train":
            # Load the word embedding matrix that was passed in
            # since we are training
            self.word_emb_mat = tf.get_variable(
                "word_emb_mat",
                dtype="float",
                shape=[self.word_vocab_size, self.word_embedding_dim],
                initializer=tf.constant_initializer(
                    self.word_embedding_matrix),
                trainable=self.fine_tune_embeddings)
        else:
            # We are not training, so a model should have been
            # loaded with the embedding matrix already there.
            self.word_emb_mat = tf.get_variable(
                "word_emb_mat",
                shape=[self.word_vocab_size, self.word_embedding_dim],
                dtype="float",
                trainable=self.fine_tune_embeddings)

        self.rnn_cell_fw = LSTMCell(self.rnn_hidden_size, state_is_tuple=True)
        self.rnn_cell_bw = LSTMCell(self.rnn_hidden_size, state_is_tuple=True)

        if config_dict:
            logger.warning(
                "UNUSED VALUES IN CONFIG DICT: {}".format(config_dict))
Пример #29
0
 def _single_rnn_cell(self):
     single_cell = LSTMCell(self.rnn_size)
     basic_cell = DropoutWrapper(single_cell,
                                 output_keep_prob=self.keep_prob)
     return basic_cell
Пример #30
0
    def build_decoder(self,
                      encoder_output,
                      encoder_state,
                      triple_input,
                      decoder_input,
                      train_mode=True):
        if self.cell_class == 'GRU':
            decoder_cell = MultiRNNCell(
                [GRUCell(self.num_units) for _ in range(self.num_layers)])
        elif self.cell_class == 'LSTM':
            decoder_cell = MultiRNNCell(
                [LSTMCell(self.num_units) for _ in range(self.num_layers)])
        else:
            decoder_cell = MultiRNNCell(
                [RNNCell(self.num_units) for _ in range(self.num_layers)])

        if train_mode:
            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
                if self.use_trans_select:
                    kd_context = self.transfer_matching(
                        encoder_output, triple_input)
                else:
                    kd_context = None
                # prepare attention
                attention_keys, attention_values, attention_construct_fn \
                    = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units)
                decoder_fn_train = attention_decoder_train(
                    encoder_state=encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_construct_fn=attention_construct_fn)
                # train decoder
                decoder_output, _, _ = dynamic_rnn_decoder(
                    cell=decoder_cell,
                    decoder_fn=decoder_fn_train,
                    inputs=decoder_input,
                    sequence_length=self.responses_length,
                    scope=scope)
                output_fn = create_output_fn(vocab_size=self.vocab_size)
                output_logits = output_fn(decoder_output)
                return output_logits
        else:
            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope:
                if self.use_trans_select:
                    kd_context = self.transfer_matching(
                        encoder_output, triple_input)
                else:
                    kd_context = None
                attention_keys, attention_values, attention_construct_fn \
                    = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units, reuse=tf.AUTO_REUSE)
                output_fn = create_output_fn(vocab_size=self.vocab_size)
                # inference decoder
                decoder_fn_inference = attention_decoder_inference(
                    num_units=self.num_units,
                    num_decoder_symbols=self.vocab_size,
                    output_fn=output_fn,
                    encoder_state=encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_construct_fn=attention_construct_fn,
                    embeddings=self.word_embed,
                    start_of_sequence_id=GO_ID,
                    end_of_sequence_id=EOS_ID,
                    maximum_length=self.max_length)

                # get decoder output
                decoder_distribution, _, _ = dynamic_rnn_decoder(
                    cell=decoder_cell,
                    decoder_fn=decoder_fn_inference,
                    scope=scope)
                return decoder_distribution