示例#1
0
        def RNN(x, weights, biases):
            # Prepare data shape to match `rnn` function requirements
            # Current data input shape: (batch_size, timesteps, n_input)
            # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

            # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
            x = tf.unstack(x, timesteps, 1)

            # Define a lstm cell with tensorflow
            lstm_cell1 = rnn.LSTMBlockCell(num_hidden, forget_bias=1.0)
            #lstm_cell = rnn.BasicRNNCell(num_hidden)
            #lstm_cell = rnn.PhasedLSTMCell(num_hidden)
            #lstm_cell2 = rnn.PhasedLSTMCell(num_hidden)
            lstm_cell1 = tf.nn.rnn_cell.DropoutWrapper(lstm_cell1,
                                                       output_keep_prob=0.75)
            lstm_cell2 = rnn.LSTMBlockCell(num_hidden,
                                           forget_bias=1.0,
                                           use_peephole=True)
            lstm_cell1 = tf.nn.rnn_cell.DropoutWrapper(lstm_cell1,
                                                       output_keep_prob=0.75)
            lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell1, lstm_cell2] *
                                                    4)
            # Get lstm cell output
            outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

            # Linear activation, using rnn inner loop last output
            return tf.matmul(outputs[-1], weights['out']) + biases['out']
示例#2
0
def bi_lstm_class(input_,
                  n_hidden=256,
                  n_steps=32,
                  n_input=54,
                  num_class=10,
                  name='class_bi_lstm'):
    with tf.variable_scope(name):

        input_x = tf.unstack(input_, n_steps, 1)
        lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)
        lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)

        x = []
        for i in range(n_steps - 1):
            x.append(tf.concat([input_x[i], input_x[i + 1] - input_x[i]], 1))

        try:
            outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                        [lstm_bw_cell],
                                                        x,
                                                        dtype=tf.float32)
        except Exception:
            outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                  [lstm_bw_cell],
                                                  x,
                                                  dtype=tf.float32)

        h = tf.concat(outputs, 1)

        h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True)
        h = tf.nn.relu(h)

        h, h_w, h_b = linear(h, num_class, 'd_h4_lin', with_w=True)

        return h
示例#3
0
    def test_multi_rnn_lstm(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")

        cell_0 = rnn.LSTMBlockCell(units)

        cell_1 = rnn.LSTMBlockCell(units)

        cell_2 = rnn.LSTMBlockCell(units)

        cells = rnn.MultiRNNCell([cell_0, cell_1, cell_2], state_is_tuple=True)
        outputs, cell_state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state, name="cell_state")

        input_names_with_port = ["input_1:0"]
        feed_dict = {"input_1:0": x_val}

        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-03,
                           atol=1e-06)
示例#4
0
    def test_dynamic_bilstm_state_consumed_only(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")

        # bilstm, no scope
        cell1 = rnn.LSTMBlockCell(units)
        cell2 = rnn.LSTMBlockCell(units)
        _, cell_state = tf.nn.bidirectional_dynamic_rnn(cell1,
                                                        cell2,
                                                        x,
                                                        dtype=tf.float32)

        _ = tf.identity(cell_state, name="cell_state")

        feed_dict = {"input_1:0": x_val}
        input_names_with_port = ["input_1:0"]
        output_names_with_port = ["cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 1))
示例#5
0
def bilstm_filter(input,
                  mask,
                  keep_prob,
                  prefix='lstm',
                  dim=50,
                  is_training=True):
    with tf.variable_scope(name_or_scope=prefix, reuse=tf.AUTO_REUSE):
        sequence = tf.cast(tf.reduce_sum(mask, 1), tf.int32)
        lstm_fw_cell = rnn.LSTMBlockCell(
            dim, forget_bias=1.0
        )  # initializer=tf.orthogonal_initializer(), state_is_tuple=True
        # back directions
        lstm_bw_cell = rnn.LSTMBlockCell(dim, forget_bias=1.0)
        keep_rate = tf.cond(is_training is not False and keep_prob < 1,
                            lambda: 0.8, lambda: 1.0)
        cell_dp_fw = rnn.DropoutWrapper(cell=lstm_fw_cell,
                                        output_keep_prob=keep_rate)
        cell_dp_bw = rnn.DropoutWrapper(cell=lstm_bw_cell,
                                        output_keep_prob=keep_rate)
        outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            cell_dp_fw,
            cell_dp_bw,
            input,
            sequence_length=sequence,
            swap_memory=False,
            dtype=tf.float32)  # batch major
    return outputs
示例#6
0
            def BiRNN(x):

                # Prepare data shape to match `bidirectional_rnn` function requirements
                # Current data input shape: (batch_size, n_steps, n_input)
                # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

                # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
                # Define lstm cells with tensorflow
                # Forward direction cell
                #lstm_fw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2) #, use_peepholes=True)
                lstm_fw_cell = rnn.DropoutWrapper(
                    rnn.LSTMBlockCell(n_hidden, forget_bias=1.0),
                    self.keep_prob2)
                # Backward direction cell
                #lstm_bw_cell = rnn.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0), self.keep_prob2, use_peepholes=True)
                lstm_bw_cell = rnn.DropoutWrapper(
                    rnn.LSTMBlockCell(n_hidden, forget_bias=1.0),
                    self.keep_prob2)
                # Get lstm cell output
                """
                try:
                    outputs, _, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                                          dtype=tf.float32)
                except Exception: # Old TensorFlow version only returns outputs not states
                    outputs,_ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell,x,
                                                    dtype=tf.float32)"""
                outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,
                                                             lstm_bw_cell,
                                                             x,
                                                             dtype=tf.float32)
                # Linear activation, using rnn inner loop last output
                #return tf.matmul(outputs[-1], weights['out']) + biases['out']
                return tf.concat(outputs, 2)
                """
示例#7
0
    def test_attention_wrapper_lstm_encoder(self):
        size = 5
        time_step = 3
        input_size = 4
        attn_size = size

        batch_size = 9

        # shape  [batch size, time step, size]
        # attention_state: usually the output of an RNN encoder.
        # This tensor should be shaped `[batch_size, max_time, ...]`
        encoder_time_step = time_step
        encoder_x_val = np.random.randn(encoder_time_step,
                                        input_size).astype('f')
        encoder_x_val = np.stack([encoder_x_val] * batch_size)
        encoder_x = tf.placeholder(tf.float32,
                                   encoder_x_val.shape,
                                   name="input_1")
        encoder_cell = rnn.LSTMBlockCell(size)
        output, attr_state = tf.nn.dynamic_rnn(encoder_cell,
                                               encoder_x,
                                               dtype=tf.float32)
        _ = tf.identity(output, name="output_0")
        attention_states = output
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            attn_size, attention_states)

        match_input_fn = lambda curr_input, state: tf.concat(
            [curr_input, state], axis=-1)
        cell = rnn.LSTMBlockCell(size)
        match_cell_fw = tf.contrib.seq2seq.AttentionWrapper(
            cell,
            attention_mechanism,
            attention_layer_size=attn_size,
            cell_input_fn=match_input_fn,
            output_attention=False)

        decoder_time_step = 6
        decoder_x_val = np.random.randn(decoder_time_step,
                                        input_size).astype('f')
        decoder_x_val = np.stack([decoder_x_val] * batch_size)

        decoder_x = tf.placeholder(tf.float32,
                                   decoder_x_val.shape,
                                   name="input_2")
        output, attr_state = tf.nn.dynamic_rnn(match_cell_fw,
                                               decoder_x,
                                               dtype=tf.float32)

        _ = tf.identity(output, name="output")
        _ = tf.identity(attr_state.cell_state, name="final_state")

        feed_dict = {"input_1:0": encoder_x_val, "input_2:0": decoder_x_val}
        input_names_with_port = ["input_1:0", "input_2:0"]
        output_names_with_port = ["output_0:0", "output:0", "final_state:0"]
        self.run_test_case(feed_dict, input_names_with_port,
                           output_names_with_port, 0.1)
示例#8
0
def bidirectional_lstm(input_,
                       cond,
                       n_hidden=256,
                       n_steps=32,
                       n_input=54,
                       name='bidirec_lstm'):
    with tf.variable_scope(name):

        print('new_lstm discrim')
        # weights = tf.get_variable('weights', [4096, 1],
        #                     initializer=tf.random_normal_initializer(stddev=0.02))

        # biases = tf.get_variable('biases', [1], initializer=tf.constant_initializer(0.0))

        # Prepare data shape to match `bidirectional_rnn` function requirements
        # Current data input shape: (batch_size, n_steps, n_input)
        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

        # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        input_x = tf.unstack(input_, n_steps, 1)
        # print(image.shape)s
        # print('-----------------------------------x shape: ', x[0].get_shape())

        # Calculate shifts
        x = []
        for i in range(n_steps - 1):
            x.append(
                tf.concat([input_x[i], input_x[i + 1] - input_x[i], cond], 1))

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)

        # Get lstm cell output
        try:
            outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                        [lstm_bw_cell],
                                                        x,
                                                        dtype=tf.float32)
        except Exception:  # Old TensorFlow version only returns outputs not states
            outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                  [lstm_bw_cell],
                                                  x,
                                                  dtype=tf.float32)

        h = tf.concat(outputs, 1)

        h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True)
        h = tf.nn.relu(h)

        h, h_w, h_b = linear(h, 1, 'd_h4_lin', with_w=True)

        return h
示例#9
0
 def bilstm_layer(self, data, keep_prob):
     x = tf.unstack(data, self.max_seq_len, 1)
     lstm_fw_cell = rnn.LSTMBlockCell(num_units=self.lstm_hidden_unit_size)
     lstm_fw_cell_dropout = rnn.DropoutWrapper(cell=lstm_fw_cell,
                                               input_keep_prob=keep_prob,
                                               output_keep_prob=keep_prob,
                                               state_keep_prob=keep_prob)
     lstm_bw_cell = rnn.LSTMBlockCell(num_units=self.lstm_hidden_unit_size)
     lstm_bw_cell_dropout = rnn.DropoutWrapper(cell=lstm_bw_cell,
                                               input_keep_prob=keep_prob,
                                               output_keep_prob=keep_prob,
                                               state_keep_prob=keep_prob)
     rnn_output, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell_dropout, lstm_bw_cell_dropout, x,
                                                     sequence_length=self.seq_length_placeholder,
                                                     dtype=tf.float32)
     return rnn_output
示例#10
0
    def test_single_dynamic_lstm_seq_length_is_not_const(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)
        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")

        y_val = np.array([4, 3, 4, 5, 2, 1], dtype=np.int32)
        seq_length = tf.placeholder(tf.int32, y_val.shape, name="input_2")

        # no scope
        cell = rnn.LSTMBlockCell(units)
        outputs, cell_state = tf.nn.dynamic_rnn(
            cell, x, dtype=tf.float32, sequence_length=tf.identity(seq_length))

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state, name="cell_state")

        feed_dict = {"input_1:0": x_val, "input_2:0": y_val}
        input_names_with_port = ["input_1:0", "input_2:0"]
        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 1))
示例#11
0
    def test_single_dynamic_lstm_with_cell_clip(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")

        # no scope
        cell = rnn.LSTMBlockCell(units, cell_clip=0.05)
        outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state, name="cell_state")

        input_names_with_port = ["input_1:0"]
        feed_dict = {"input_1:0": x_val}

        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-03,
                           atol=1e-06)
示例#12
0
    def test_single_dynamic_lstm_consume_one_of_ch_tuple(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
        # no scope
        cell = rnn.LSTMBlockCell(units)
        outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state.c, name="cell_state_c")
        _ = tf.identity(cell_state.h, name="cell_state_h")

        feed_dict = {"input_1:0": x_val}
        input_names_with_port = ["input_1:0"]
        output_names_with_port = [
            "output:0", "cell_state_c:0", "cell_state_h:0"
        ]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 1))
示例#13
0
    def test_single_dynamic_lstm_time_major(self):
        units = 5
        seq_len = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * seq_len)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")

        # no scope
        cell = rnn.LSTMBlockCell(units)
        outputs, cell_state = tf.nn.dynamic_rnn(cell,
                                                x,
                                                time_major=True,
                                                dtype=tf.float32)

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state, name="cell_state")

        input_names_with_port = ["input_1:0"]
        feed_dict = {"input_1:0": x_val}

        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 1))
        def create_lstm_cell(layer):
            if hyperparameters.layer_norm:
                if hyperparameters.num_proj:
                    raise Exception(
                        'No support for layer normalization together with projection layer.'
                    )

                cell = rnn.LayerNormBasicLSTMCell(
                    hyperparameters.lstm_state_size,
                    # here, we use the local variable dropout that is set to 0
                    # if we are evaluating.
                    dropout_keep_prob=1 - dropout,
                    layer_norm=hyperparameters.layer_norm)
            else:
                if hyperparameters.num_proj:
                    cell = rnn.LSTMCell(hyperparameters.lstm_state_size,
                                        num_proj=hyperparameters.num_proj)
                else:
                    cell = rnn.LSTMBlockCell(hyperparameters.lstm_state_size,
                                             forget_bias=0)

                if dropout > 0:
                    cell = rnn.DropoutWrapper(cell,
                                              output_keep_prob=1 - dropout)

            return cell
示例#15
0
def rnn_cell(rnn_cell_size, dropout_keep_prob):
    """Builds an LSTMBlockCell based on the given parameters."""
    cells = []
    for layer_size in rnn_cell_size:
        cell = rnn.LSTMBlockCell(layer_size)
        cell = rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob)
        cells.append(cell)
    return rnn.MultiRNNCell(cells)
示例#16
0
 def _get_lstm_cell(self, rnn_mode, hidden_size, is_training):
     if rnn_mode == BASIC:
         return tfrnn.LSTMCell(
             hidden_size, 
             state_is_tuple=True,
             reuse = not is_training )
     if rnn_mode == BLOCK:
         return tfrnn.LSTMBlockCell(
             hidden_size)
     raise ValueError('rnn mode {} not supported'.format(rnn_mode))
示例#17
0
    def test_multiple_dynamic_lstm(self):
        units = 5
        batch_size = 6
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * batch_size)

        x = tf.placeholder(tf.float32, x_val.shape, name="input_1")
        _ = tf.placeholder(tf.float32, x_val.shape, name="input_2")

        lstm_output_list = []
        lstm_cell_state_list = []
        # no scope
        cell = rnn.LSTMBlockCell(units)
        outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
        lstm_output_list.append(outputs)
        lstm_cell_state_list.append(cell_state)

        # given scope
        cell = rnn.LSTMBlockCell(units)
        with variable_scope.variable_scope("root1") as scope:
            outputs, cell_state = tf.nn.dynamic_rnn(
                cell,
                x,
                dtype=tf.float32,
                sequence_length=[4, 4, 4, 4, 4, 4],
                scope=scope)
        lstm_output_list.append(outputs)
        lstm_cell_state_list.append(cell_state)

        _ = tf.identity(lstm_output_list, name="output")
        _ = tf.identity(lstm_cell_state_list, name="cell_state")

        feed_dict = {"input_1:0": x_val}
        input_names_with_port = ["input_1:0"]
        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 2))
示例#18
0
文件: ptb_word_lm.py 项目: RM1708/PTB
 def _get_lstm_cell(self, config, is_training):
     if config.rnn_mode == BASIC:
         #      return tf.contrib.rnn.BasicLSTMCell(
         return rnn.BasicLSTMCell(config.hidden_size,
                                  forget_bias=0.0,
                                  state_is_tuple=True,
                                  reuse=not is_training)
     if config.rnn_mode == BLOCK:
         #      return tf.contrib.rnn.LSTMBlockCell(
         return rnn.LSTMBlockCell(config.hidden_size, forget_bias=0.0)
     raise ValueError("rnn_mode %s not supported" % config.rnn_mode)
示例#19
0
def rnn_cell(rnn_cell_size, dropout_keep_prob, residual, is_training=True):
    """Builds an LSTMBlockCell based on the given parameters."""
    dropout_keep_prob = dropout_keep_prob if is_training else 1.0
    cells = []
    for i in range(len(rnn_cell_size)):
        cell = rnn.LSTMBlockCell(rnn_cell_size[i])
        if residual:
            cell = rnn.ResidualWrapper(cell)
            if i == 0 or rnn_cell_size[i] != rnn_cell_size[i - 1]:
                cell = rnn.InputProjectionWrapper(cell, rnn_cell_size[i])
        cell = rnn.DropoutWrapper(cell, input_keep_prob=dropout_keep_prob)
        cells.append(cell)
    return rnn.MultiRNNCell(cells)
示例#20
0
def RNN(x):
    # Define a lstm cell with tensorflow
    lstm_cell = rnn.LSTMBlockCell(
        num_hidden, forget_bias=1.0)

    # Get lstm cell output
    # outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    outputs, states = tf.nn.dynamic_rnn(
        cell=lstm_cell, inputs=x, time_major=False, dtype=tf.float32)
    
    output_layer = tf.layers.Dense(
        num_classes, activation=None, 
        kernel_initializer=tf.orthogonal_initializer()
    )
    return output_layer(tf.layers.batch_normalization(outputs[:, -1, :]))
def RNN(x, weights, biases):
    
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, n_steps, 1)

    # Define a lstm cell with tensorflow
    #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    #lstm_cell = rnn.GRUCell(n_hidden)
    lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
    #lstm_cell_bk = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
    lstm_cell_bk = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0 )
    
    # make the deep rnn
    no_of_layers = 3 # layer number of drnn
    stacked_lstm = rnn.MultiRNNCell(
        [
        #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        for _ in range(no_of_layers)
        ]
        )
    
    stacked_lstm_bk = rnn.MultiRNNCell(
        [
        #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        for _ in range(no_of_layers)
        ]
        )
    
    # providing the dropout for rnn
    #lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # for rnn
    stacked_lstm = rnn.DropoutWrapper(stacked_lstm, output_keep_prob=0.5) # for deep rnn
    
    # Get lstm cell output
    #outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # single layer rnn
    #outputs, states = rnn.static_rnn(stacked_lstm,, x, dtype=tf.float32) # deep rnn
    #outputs, states, states_bk = rnn.static_bidirectional_rnn(lstm_cell, lstm_cell_bk, x, dtype=tf.float32) # single layer dirnn
    #outputs, states, states_bk = rnn.static_bidirectional_rnn(stacked_lstm, stacked_lstm_bk, x, dtype=tf.float32) # deep dirnn
    outputs, states, states_bk = rnn.stack_bidirectional_rnn([rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], [rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], x, dtype=tf.float32) # deep dirnn
    
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
示例#22
0
    def __init__(self,
                 num_actions,
                 observation_names,
                 lstm_num_hiddens=256,
                 feed_action_and_reward=True,
                 max_reward=1.0,
                 name="streetlearn_core"):
        """Initializes an agent core designed to be used with A3C/IMPALA.

    Supports a single visual observation tensor and outputs a single, scalar
    discrete action with policy logits and a baseline value.

    Args:
      num_actions: Number of actions available.
      observation_names: String with observation types separated by semi-colon.
      lstm_num_hiddens: Number of hiddens in the LSTM core.
      feed_action_and_reward: If True, the last action (one hot) and last reward
          (scalar) will be concatenated to the torso.
      max_reward: If `feed_action_and_reward` is True, the last reward will
          be clipped to `[-max_reward, max_reward]`. If `max_reward`
          is None, no clipping will be applied. N.B., this is different from
          reward clipping during gradient descent, or reward clipping by the
          environment.
      name: Optional name for the module.
    """
        super(PlainAgent, self).__init__(name='agent')

        # Policy config
        self._num_actions = num_actions
        tf.logging.info('Agent trained on %d-action policy', self._num_actions)
        # Append last reward (clipped) and last action?
        self._feed_action_and_reward = feed_action_and_reward
        self._max_reward = max_reward
        # Policy LSTM core config
        self._lstm_num_hiddens = lstm_num_hiddens
        # Extract the observation names
        observation_names = observation_names.split(';')
        self._idx_frame = observation_names.index('view_image')

        with self._enter_variable_scope():
            tf.logging.info('LSTM core with %d hiddens',
                            self._lstm_num_hiddens)
            self._core = contrib_rnn.LSTMBlockCell(self._lstm_num_hiddens)
示例#23
0
    def build_graph(self, input, nextinput):
        # embedding
        embeddingW = tf.get_variable('embedding',
                                     [self.vocab_size, self.num_rnn_unit])
        input_feature = tf.nn.embedding_lookup(embeddingW, input)
        input_list = tf.unstack(input_feature, axis=1)

        # rnn
        cell = rnn.MultiRNNCell([
            rnn.LSTMBlockCell(num_units=self.num_rnn_unit)
            for _ in range(self.num_rnn_layer)
        ])

        def get_v(n):
            ret = tf.get_variable(n + '_unused',
                                  [BATCH_SIZE, self.num_rnn_unit],
                                  trainable=False,
                                  initializer=tf.constant_initializer())
            ret = tf.placeholder_with_default(ret,
                                              shape=[None, self.num_rnn_unit],
                                              name=n)
            return ret

        initial_state = [
            rnn.LSTMStateTuple(get_v('c{}'.format(i)), get_v('h{}'.format(i)))
            for i in range(self.num_rnn_layer)
        ]

        outputs, last_state = rnn.static_rnn(cell, input_list, initial_state)
        last_state = tf.identity(last_state, 'last_state')

        # FC
        output = tf.reshape(tf.concat(outputs, 1), [-1, self.num_rnn_unit])
        logits = FullyConnected('fc', output, self.vocab_size)
        tf.nn.softmax(logits, name='prob')

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))
        cost = tf.reduce_mean(loss, name='cost')
        summary.add_moving_summary(cost)
        return cost
    def BuildLSTM(self, name, input_x, initstate, weights, biases, reuse):
        with tf.variable_scope("LSTM") as scope:
            if (reuse):
                tf.get_variable_scope().reuse_variables()
            #processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors
            #input_x = tf.unstack(input_x ,self.num_steps,axis = 1)
            cells = []
            for _ in range(self.num_layers):
                cell = rnn.LSTMBlockCell(self.hidden_size, forget_bias=1)
                cell = tf.contrib.rnn.DropoutWrapper(cell,
                                                     output_keep_prob=1.0 -
                                                     self.dropout)
                cells.append(cell)
            cell = tf.contrib.rnn.MultiRNNCell(
                cells
            )  #RNN cell composed sequentially of multiple simple cells.

            outputs, states = tf.nn.dynamic_rnn(
                cell, input_x, dtype="float32")  #dynamic unrolling of inputs
            reuse = True
            tf.get_variable_scope().reuse_variables()

            return outputs
示例#25
0
    def test_single_dynamic_lstm_placeholder_input(self):
        units = 5
        x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]],
                         dtype=np.float32)
        x_val = np.stack([x_val] * 6)
        x = tf.placeholder(tf.float32, shape=(None, 4, 2), name="input_1")

        # no scope
        cell = rnn.LSTMBlockCell(units)
        outputs, cell_state = tf.nn.dynamic_rnn(
            cell, x, dtype=tf.float32)  # by default zero initializer is used

        _ = tf.identity(outputs, name="output")
        _ = tf.identity(cell_state, name="cell_state")

        feed_dict = {"input_1:0": x_val}
        input_names_with_port = ["input_1:0"]
        output_names_with_port = ["output:0", "cell_state:0"]
        self.run_test_case(feed_dict,
                           input_names_with_port,
                           output_names_with_port,
                           rtol=1e-3,
                           atol=1e-06,
                           graph_validator=lambda g: check_lstm_count(g, 1))
示例#26
0
def RNN_BLOCK_LSTM(X):
    with tf.variable_scope('RNN'):
        lstm_cell = rnn.LSTMBlockCell(num_hidden, forget_bias=1.0)

        # lstm_cell = rnn.DropoutWrapper(
        #     rnn.LSTMBlockCell(num_hidden, forget_bias=1.0),
        #     input_keep_prob=0.5,
        #     output_keep_prob=0.5,
        #     state_keep_prob=0.5,
        #     dtype=tf.float32
        # )

        outputs, state = tf.nn.dynamic_rnn(cell=lstm_cell,
                                           inputs=X,
                                           dtype=tf.float32)

    batch_norm = tf.layers.batch_normalization(outputs[:, -1, :])

    logits = tf.layers.dense(inputs=batch_norm,
                             units=num_classes,
                             activation=None,
                             kernel_initializer=tf.orthogonal_initializer())

    return logits
示例#27
0
 def _get_rnn_cell(self):
     with tf.variable_scope('rnn_cell'):
         lstm_cell = rnn.LSTMBlockCell(self.hidden_size, forget_bias=1.0)
         return rnn.DropoutWrapper(cell=lstm_cell,
                                   input_keep_prob=self._keep_prob)
示例#28
0
  if count > 0:
    print("%s %sdoes NOT match%s, error count = %d (out of %d) AVG=%g ABSAVG=%g" % (buf, RED+BOLD, ENDC, count, size, avg_ref, avg_abs_ref_orig))
  else:
    print("%s %sDOES match%s, size = %d AVG=%g ABSAVG=%g" % (buf, GREEN+BOLD, ENDC, size, avg_ref, avg_abs_ref_orig))

N=64
C=128
K=192
T=10
forget_bias=1.0
tf.set_random_seed(1)
#x = tf.constant(-0.1, shape=[N,C], dtype = tf.float32)
#x2 = tf.constant(0.1, shape=[N,C], dtype = tf.float32)
x = tf.random_normal(shape=[N,C], dtype = tf.float32) #+ 0.5
x2 = tf.random_normal(shape=[N,C], dtype = tf.float32) #+ 0.5
lstm_cell_ref = rnn.LSTMBlockCell(K, forget_bias=forget_bias, name='test')
#lstm_cell_ref = rnn.BasicLSTMCell(K, forget_bias=forget_bias, name='test')
#lstm_cell = rnn.LSTMBlockCell(K, forget_bias=forget_bias, name='test', reuse=True)
lstm_cell = xsmm_lstm.XsmmLSTMCell(K, forget_bias=forget_bias, name='test', reuse=True)
init_state = lstm_cell_ref.zero_state(N, dtype=tf.float32)
x_fused = tf.convert_to_tensor([x] + [x2 for _ in range(T-1)])
print("x_fused is: %s" % x_fused)
outputs_ref, states_ref = tf.nn.dynamic_rnn(lstm_cell_ref, x_fused, dtype=tf.float32, initial_state=init_state, time_major=True)
outputs, states = tf.nn.dynamic_rnn(lstm_cell, x_fused, dtype=tf.float32, initial_state=init_state, time_major=True)

init = tf.global_variables_initializer()
W = tf.global_variables()[0]
B = tf.global_variables()[1]

g_ref = tf.gradients(outputs_ref, [x_fused] + [W, B, init_state.c, init_state.h])
g = tf.gradients(outputs, [x_fused] + [W, B, init_state.c, init_state.h])
示例#29
0
    def __init__(self,
                 nqc,
                 value_encodings,
                 relation_encodings,
                 num_gpus=1,
                 encoder=None):
        """Builds a simple, fully-connected model to predict the outcome set given a query string.

    Args:
      nqc: NeuralQueryContext
      value_encodings: (bert features for values, length of value span)
      relation_encodings: (bert features for relations, length of relation span)
      num_gpus: number of gpus for distributed computation
      encoder: encoder (layers.RNN) for parameter sharing between train and dev

    Needs:
      self.input_ph: input to encoder (either one-hot or BERT layers)
      self.mask_ph: mask for the input
      self.correct_set_ph.name: target labels (if loss or accuracy is computed)
      self.prior_start: sparse matrix for string similarity features
      self.is_training: whether the model should is training (for dropout)

    Exposes:
      self.loss: objective for loss
      self.accuracy: mean accuracy metric (P_{predicted}(gold))
      self.accuracy_per_ex: detailed per example accuracy
      self.log_nql_pred_set: predicted entity set (in nql)
      self.log_decoded_relations: predicted relations (as indices)
      self.log_start_values: predicted start values (in nql)
      self.log_start_cmps: components of predicted start values (in nql)
    """
        # Encodings should have the same dimensions
        assert value_encodings[0].shape[-1] == relation_encodings[0].shape[-1]
        self.context = nqc
        self.input_ph = tf.placeholder(tf.float32,
                                       shape=(None, FLAGS.max_query_length,
                                              value_encodings[0].shape[-1]),
                                       name="oh_seq_ph")
        self.mask_ph = tf.placeholder(tf.float32,
                                      shape=(None, FLAGS.max_query_length),
                                      name="oh_mask_ph")
        self.debug = None
        layer_size = FLAGS.layer_size
        num_layers = FLAGS.num_layers
        max_properties = FLAGS.max_properties
        logits_strategy = FLAGS.logits
        dropout_rate = FLAGS.dropout

        inferred_batch_size = tf.shape(self.input_ph)[0]
        self.is_training = tf.placeholder(tf.bool, shape=[])
        value_tensor = util.reshape_to_tensor(value_encodings[0],
                                              value_encodings[1])
        relation_tensor = util.reshape_to_tensor(relation_encodings[0],
                                                 relation_encodings[1])
        # The last state of LSTM encoder is the representation of the input string
        with tf.variable_scope("model"):
            # Build all the model parts:

            #   encoder: LSTM encoder
            #   prior: string features
            #   {value, relation}_similarity: learned embedding similarty
            #   decoder: LSTM decoder
            #   value_model: map from encoder to key for attention
            #   attention: Luong (dot product) attention

            # Builds encoder - note that this is in keras
            self.encoder = self._build_encoder(encoder, layer_size, num_layers)

            # Build module to turn prior (string features) into logits
            self.prior_start = tf.sparse.placeholder(
                tf.float32,
                name="prior_start_ph",
                shape=[inferred_batch_size, value_tensor.shape[1]])

            with tf.variable_scope("prior"):
                prior = Prior()

            # Build similarity module - biaffine qAr
            with tf.variable_scope("value_similarity"):
                value_similarity = Similarity(layer_size, value_tensor,
                                              num_gpus)
            # Build relation decoder
            with tf.variable_scope("relation_decoder"):
                rel_dec_rnn_layers = [
                    contrib_rnn.LSTMBlockCell(layer_size,
                                              name=("attr_lstm_%d" % i))
                    for (i, layer_size) in enumerate([layer_size] * num_layers)
                ]
                relation_decoder_cell = tf.nn.rnn_cell.MultiRNNCell(
                    rel_dec_rnn_layers)
                tf.logging.info(
                    "relation decoder lstm has state of size: {}".format(
                        relation_decoder_cell.state_size))

            # Build similarity module - biaffine qAr
            with tf.variable_scope("relation_similarity"):
                relation_similarity = Similarity(layer_size, relation_tensor,
                                                 1)
            with tf.variable_scope("attention"):
                attention = layers.Attention()
            value_model = tf.get_variable(
                "value_transform",
                shape=[layer_size, relation_decoder_cell.output_size],
                trainable=True)

        # Initialization for logging, variables shouldn't be used elsewhere
        log_decoded_starts = []
        log_start_logits = []
        log_decoded_relations = []

        # Initialization to prepare before first iteration of loop
        prior_logits_0 = prior.compute_logits(
            tf.sparse.to_dense(self.prior_start))
        cumulative_entities = nqc.all("id_t")
        relation_decoder_out = tf.zeros([inferred_batch_size, layer_size])
        encoder_output = self.encoder(self.input_ph, mask=self.mask_ph)
        query_encoder_out = encoder_output[0]
        relation_decoder_state = encoder_output[1:]

        # Initialization for property loss, equal to log vars but separating
        value_dist = []
        relation_dist = []

        for i in range(max_properties):
            prior_logits = tf.layers.dropout(prior_logits_0,
                                             rate=dropout_rate,
                                             training=self.is_training)
            # Use the last state to determine key; more stable than last output
            query_key = tf.nn.relu(
                tf.matmul(
                    tf.expand_dims(relation_decoder_state[-1][-1], axis=1),
                    value_model))

            query_emb = tf.squeeze(attention(
                [query_key, query_encoder_out],
                mask=[None, tf.cast(self.mask_ph, tf.bool)]),
                                   axis=1)

            similarity_logits = value_similarity.compute_logits(query_emb)
            if logits_strategy == "prior":
                total_logits = prior_logits
            elif logits_strategy == "sim":
                total_logits = similarity_logits
            elif logits_strategy == "mixed":
                total_logits = prior_logits + similarity_logits
            total_dist = contrib_layers.softmax(total_logits)
            values_pred = nqc.as_nql(total_dist, "val_g")
            with tf.variable_scope("start_follow_{}".format(i)):
                start_pred = nqc.all("v_t").follow(
                    values_pred)  # find starting nodes

            # Given the previous set of attributes, where are we going?
            (relation_decoder_out,
             relation_decoder_state) = relation_decoder_cell(
                 relation_decoder_out, relation_decoder_state)
            pred_relation = tf.nn.softmax(
                relation_similarity.compute_logits(relation_decoder_out))
            if FLAGS.enforce_type:
                if i == 0:
                    is_adjust = nqc.as_tf(nqc.one(IS_A, "rel_g"))
                else:
                    is_adjust = 1 - nqc.as_tf(nqc.one(IS_A, "rel_g"))
                pred_relation = pred_relation * is_adjust
            nql_pred_relation = nqc.as_nql(pred_relation, "rel_g")
            # Conjunctive (& start.follow() & start.follow()...).
            with tf.variable_scope("relation_follow_{}".format(i)):
                current_entities = start_pred.follow(nql_pred_relation)
            cumulative_entities = cumulative_entities & current_entities

            # For property loss and regularization
            value_dist.append(total_dist)
            relation_dist.append(pred_relation)

            # Store predictions for logging
            log_decoded_starts.append(start_pred)
            log_decoded_relations.append(pred_relation)
            log_start_logits.append([prior_logits, similarity_logits])

        (loss, pred_set_tf,
         pred_set_tf_norm) = self._compute_loss(cumulative_entities)
        property_loss = self._compute_property_loss(value_dist, relation_dist)
        (accuracy_per_ex,
         accuracy) = self._compute_accuracy(cumulative_entities, pred_set_tf)
        value_loss = self._compute_distribution_regularizer(value_dist)
        relation_loss = self._compute_distribution_regularizer(relation_dist)
        self.regularization = FLAGS.time_reg * (value_loss + relation_loss)
        self.loss = loss - self.regularization
        self.property_loss = property_loss
        self.accuracy_per_ex = accuracy_per_ex
        self.accuracy = accuracy

        # Debugging/logging information
        log_decoded_relations = tf.transpose(tf.stack(log_decoded_relations),
                                             [1, 0, 2])
        tf.logging.info("decoded relations has shape: {}".format(
            log_decoded_relations.shape))
        self.log_start_values = log_decoded_starts
        self.log_start_cmps = [[
            nqc.as_nql(logits, "val_g") for logits in comp
        ] for comp in log_start_logits]
        self.log_decoded_relations = tf.nn.top_k(log_decoded_relations, k=5)
        self.log_nql_pred_set = nqc.as_nql(pred_set_tf_norm, "id_t")
示例#30
0
 def lstm_fw():
     cell_fw = rnn.LSTMBlockCell(self.hidden_size)
     cell_fw = rnn.DropoutWrapper(cell_fw,
                                  output_keep_prob=self.keep_prob)
     return cell_fw