def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError( "cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell, sequence_length=sequence_length, initial_state=initial_state) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32, sequence_length=sequence_length, initial_state=initial_state) return target_predictor_fn(encoding[-1], y)
def __init__(self, is_training, glove_word_vectors, vocabulary, config): self.size = config.hidden_size self.config = config self.is_training = is_training self.word_vec_size = config.word_vec_size vocab_size = config.vocab_size self.glove_word_vectors = glove_word_vectors self.vocabulary = vocabulary # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. # TODO: these might be able to be improved if used the LSTMCell which has other features # to improve performance, but then need the sentence_length with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.BasicLSTMCell(self.size, forget_bias=1.0) if is_training and config.keep_prob < 1: with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.DropoutWrapper( self.left_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.DropoutWrapper( self.right_lstm_cell, output_keep_prob=config.keep_prob) with tf.variable_scope("LeftLSTM"): self.left_lstm_cell = rnn_cell.MultiRNNCell([self.left_lstm_cell] * config.num_layers) with tf.variable_scope("RightLSTM"): self.right_lstm_cell = rnn_cell.MultiRNNCell( [self.right_lstm_cell] * config.num_layers)
def build_lm_multicell_rnn(num_layers, hidden_size, word_proj_size, use_lstm=True, hidden_projection=None, input_feeding=False, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") if hidden_projection is not None: print("I'm ignoring the projection size for GRUs.") hidden_projection = None cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) if input_feeding: lm_cell0 = cell_class(num_units=hidden_size, input_size=word_proj_size + hidden_size, initializer=initializer, num_proj=hidden_projection) else: lm_cell0 = cell_class(num_units=hidden_size, input_size=hidden_size, initializer=initializer, num_proj=hidden_projection) lm_cell0 = rnn_cell.DropoutWrapper(lm_cell0, output_keep_prob=1.0 - dropout) if num_layers > 1: hidden_input = hidden_size if hidden_projection is not None: hidden_input = hidden_projection lm_cell1 = cell_class(num_units=hidden_size, input_size=hidden_input, initializer=initializer, num_proj=hidden_projection) lm_cell1 = rnn_cell.DropoutWrapper(lm_cell1, output_keep_prob=1.0 - dropout) lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0] + [lm_cell1] * (num_layers - 1)) else: lm_rnncell = rnn_cell.MultiRNNCell([lm_cell0]) return lm_rnncell
def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): fw_single_cell = rnn_cell.LSTMCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] * num_layers) with tf.variable_scope("backward"): bw_single_cell = rnn_cell.LSTMCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] * num_layers) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def build_nmt_multicell_rnn(num_layers_encoder, num_layers_decoder, encoder_size, decoder_size, source_proj_size, use_lstm=True, input_feeding=True, dropout=0.0): if use_lstm: print("I'm building the model with LSTM cells") cell_class = rnn_cell.LSTMCell else: print("I'm building the model with GRU cells") cell_class = GRUCell initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234) encoder_cell = cell_class(num_units=encoder_size, input_size=source_proj_size, initializer=initializer) if input_feeding: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size * 2, initializer=initializer) else: decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) # if dropout > 0.0: # if dropout is 0.0, it is turned off encoder_cell = rnn_cell.DropoutWrapper(encoder_cell, output_keep_prob=1.0 - dropout) encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] * num_layers_encoder) decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0, output_keep_prob=1.0 - dropout) if num_layers_decoder > 1: decoder_cell1 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer) decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1, output_keep_prob=1.0 - dropout) decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] + [decoder_cell1] * (num_layers_decoder - 1)) else: decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0]) return encoder_rnncell, decoder_rnncell
def create_model(self): self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="input_data") self.target_data = tf.placeholder(tf.int32,[self.batch_size, self.seq_length], name="target_data") # define hyper_parameters self.keep_prob = tf.Variable(0.3, trainable=False, name='keep_prob') self.lr = tf.Variable(0.0, trainable=False, name="lr") softmax_weights = tf.get_variable("softmax_weights",[self.rnn_size, self.vocab_size]) softmax_biases = tf.get_variable("softmax_biases", [self.vocab_size]) lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size) # if self.is_training and self.keep_prob < 1: # lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob) multilayer_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) self.initial_state = multilayer_cell.zero_state(self.batch_size, tf.float32) with tf.device("/cpu:0"): # define the embedding matrix for the whole vocabulary self.embedding = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size]) # take the vector representation for each word in the embeddings embeds = tf.nn.embedding_lookup(self.embedding, self.input_data) if self.is_training and self.keep_prob < 1: embeds = tf.nn.dropout(embeds, self.keep_prob) def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_weights, softmax_biases) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.embedding, prev_symbol) #convert input to a list of seq_length inputs = tf.split(1,self.seq_length, embeds) #after splitting the shape becomes (batch_size,1,rnn_size). We need to modify it to [batch*rnn_size] inputs = [ tf.squeeze(input_, [1]) for input_ in inputs] output,states= seq2seq.rnn_decoder(inputs,self.initial_state, multilayer_cell, loop_function=loop if self.infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, output), [-1, self.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_weights, softmax_biases) self.probs = tf.nn.softmax(self.logits, name= "probability") loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.target_data, [-1])], [tf.ones([self.batch_size * self.seq_length])], self.vocab_size ) self.cost = tf.reduce_sum(loss) / ( self.batch_size * self.seq_length ) self.final_state= states[-1] tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),self.grad_clip) optimizer = tf.train.AdamOptimizer(0.01) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, config): lstm_cell = rnn_cell.BasicLSTMCell(config.n_hidden, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._train_op = tf.no_op() self._input_data = tf.placeholder(tf.int32, [config.batch_size]) _X = tf.matmul(self._input_data, tf.get_variable("weights_out", [ config.n_hidden, 1 ])) + tf.get_variable("bias_hidden", [config.n_hidden]) self._targets = tf.placeholder(tf.int32, [config.batch_size]) self._initial_state = cell.zero_state(config.batch_size, tf.float32) state = self._initial_state outputs, states = rnn.rnn(cell, self.input_data, tf.split(0, 1, _X), initial_state=state) pred = tf.matmul( outputs[-1], tf.get_variable("weights_hidden", [config.n_features, config.n_hidden ])) + tf.get_variable("weights_out", [1]) self._final_state = states[-1] self._cost = cost = tf.reduce_mean(tf.square(pred - self.targets)) #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) if not config.is_training: return optimizer = tf.train.GradientDescentOptimizer( learning_rate=config.learning_rate).minimize(cost) self._train_op = optimizer
def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("train_test", reuse=True): gru = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.cell = rnn_cell.MultiRNNCell([gru] * num_layers) # embedding model if not self.attention: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse=True): self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse=True): self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def build_lstm_inner(settings, lstm_input): ''' build lstm decoder ''' lstm_cell = rnn_cell.BasicLSTMCell(settings['lstm_size'], forget_bias=0.0, state_is_tuple=False) if settings['num_lstm_layers'] > 1: lstm = rnn_cell.MultiRNNCell([lstm_cell] * settings['num_lstm_layers'], state_is_tuple=False) else: lstm = lstm_cell batch_size = settings['batch_size'] * settings['grid_height'] * settings[ 'grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer( -0.1, 0.1)): for time_step in range(settings['rnn_len']): if time_step > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # 问题embedding self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W') # RNN编码器 self.lstm_1 = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True) self.lstm_dropout_1 = rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate) self.lstm_2 = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True) self.lstm_dropout_2 = rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2]) # 状态embedding self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W') self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b') # 图像embedding self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b') # 打分embedding self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W') self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
def build_lstm_inner(H, lstm_input): ''' build lstm decoder ''' #lstm_cell = rnn_cell.BasicLSTMCell(H['lstm_size'], forget_bias=0.0, state_is_tuple=True) if H['num_lstm_layers'] > 1: #lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=True) lstm = rnn_cell.MultiRNNCell( [lstm_cell(H) for _ in range(H['num_lstm_layers'])]) else: #lstm = lstm_cell lstm = lstm_cell(H) batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] state = lstm.zero_state(batch_size, tf.float32) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer( -0.1, 0.1)): for time_step in range(H['rnn_len']): if time_step > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def predict_next_frame(H, lstm_input): lstm_cell = rnn_cell.BasicLSTMCell(832, forget_bias=0.0, state_is_tuple=False) if H['num_lstm_layers'] > 1: lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=False) else: lstm = lstm_cell batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.variable_scope('RNN', initializer=tf.random_uniform_initializer( -0.1, 0.1)): for i in range(9): if i > 0: tf.get_variable_scope().reuse_variables() input_data = tf.reshape(lstm_input[8 - i], [300, 832]) output, state = lstm(input_data, state) output = tf.reshape(output, [1, 15, 20, 832]) outputs.append(output) return outputs
def build_lstm_inner(H, lstm_input): ''' build lstm decoder ''' def lstm_cell(): lstm_cell = rnn_cell.BasicLSTMCell(H['lstm_size'], forget_bias=0.0, state_is_tuple=False) return lstm_cell if H['num_lstm_layers'] > 1: #lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'], state_is_tuple=False) lstm = rnn_cell.MultiRNNCell([lstm_cell() for _ in range(H['num_lstm_layers'])], state_is_tuple=False) print H['num_lstm_layers'] else: lstm = lstm_cell() print 'basic' batch_size = H['batch_size'] * H['grid_height'] * H['grid_width'] #print batch_size, lstm.state_size state = tf.zeros([batch_size, lstm.state_size]) outputs = [] with tf.compat.v1.variable_scope('RNN', initializer=tf.random_uniform_initializer(-0.1, 0.1)): for time_step in range(H['rnn_len']): if time_step > 0: tf.compat.v1.get_variable_scope().reuse_variables() output, state = lstm(lstm_input, state) outputs.append(output) return outputs
def __init__(self, params, infer=False): self.params = params if infer: self.batch_size = batch_size = 1 self.sequence_length = sequence_length = 1 else: self.batch_size = batch_size = self.params.batch_size self.sequence_length = sequence_length = self.params.sequence_length cell1 = rnn_cell.LSTMCell(self.params.rnn_size, self.params.input_channels, use_peepholes=True) cell2 = rnn_cell.LSTMCell(self.params.rnn_size, cell1.output_size, use_peepholes=True, num_proj=params.output_channels) self.cell = cell = rnn_cell.MultiRNNCell([cell1, cell2]) self.data_placeholder = tf.placeholder(tf.float32, shape=(batch_size, params.input_channels, sequence_length), name='data_placeholder') self.labels_placeholder = tf.placeholder(tf.float32, shape=(batch_size, params.input_channels, sequence_length), name='labels_placeholder') # Initial state of the LSTM memory. # To train or to leave as all zeros...that is the question. get_variable means train, zeros means zeros. # To make this a trainable variable we'd want it to be *the same* initial state for every sequence # in a batch self.initial_state = cell.zero_state(batch_size, dtype=tf.float32)
def _shared_layer(input_data, config): """Build the model to decoding Args: input_data = size batch_size X num_steps X embedding size Returns: output units """ cell = rnn_cell.BasicLSTMCell(config.encoder_size) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, config.num_steps, input_data) ] if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers) initial_state = cell.zero_state(config.batch_size, tf.float32) encoder_outputs, encoder_states = rnn.rnn( cell, inputs, initial_state=initial_state, scope="encoder_rnn") return encoder_outputs, initial_state
def __init__(self, vocab_size, size=256, depth=2, learning_rate=1e-4, batch_size=32, keep_prob=0.1, num_steps=100, checkpoint_dir="checkpoint", forward_only=False): """Initialize the parameters for an Deep Bidirectional LSTM model. Args: vocab_size: int, The dimensionality of the input vocab size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256] learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5] batch_size: int, The size of a batch [16, 32] keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2] num_steps: int, The max time unit [100] """ super(DeepBiLSTM, self).__init__() self.vocab_size = int(vocab_size) self.size = int(size) self.depth = int(depth) self.learning_rate = float(learning_rate) self.batch_size = int(batch_size) self.keep_prob = float(keep_prob) self.num_steps = int(seq_length) self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.num_steps]) self.input_lengths = tf.placeholder(tf.int64, [self.batch_size]) with tf.device("/cpu:0"): self.emb = tf.Variable(tf.truncated_normal( [self.vocab_size, self.size], -0.1, 0.1), name='emb') import ipdb ipdb.set_trace() self.embed_inputs = tf.nn.embedding_lookup( self.emb, tf.transpose(self.inputs)) self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth) self.initial_state = self.stacked_cell.zero_state( batch_size, tf.float32) if not forward_only and self.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) self.outputs, self.states = rnn.rnn(self.stacked_cell, tf.unpack(self.embed_inputs), dtype=tf.float32, sequence_length=self.input_lengths, initial_state=self.initial_state) output = tf.reduce_sum(tf.pack(self.output), 0)
def GRUSeq2Seq(enc_inp, dec_inp): cell = rnn_cell.MultiRNNCell([rnn_cell.GRUCell(24)] * 2) return seq2seq.embedding_attention_seq2seq( enc_inp, dec_inp, cell, classes, classes, output_projection=(w, b))
def __init__(self, args, infer=False): self.args = args self.args.data_dim = 1 if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) one_cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([one_cell] * args.num_layers) self.input_data = tf.placeholder(tf.float32, \ [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.float32, \ [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) inputs = tf.split(1, args.seq_length, self.input_data) outputs, last_state = \ rnn.rnn(cell, inputs, self.initial_state, dtype=tf.float32) self.final_state = last_state output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) softmax_w = tf.get_variable("softmax_w", \ [args.rnn_size, self.args.data_dim]) softmax_b = tf.get_variable("softmax_b", \ [self.args.data_dim]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) flat_targets = tf.reshape(self.targets, [-1]) print(self.logits) print(self.targets) self.cost = tf.reduce_sum(tf.pow(self.logits-flat_targets, 2))/ \ (2*(args.batch_size*args.seq_length)) #L2 loss print(self.cost) self.lr = tf.Variable(args.learning_rate, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) #optimizer = tf.train.AdamOptimizer(self.lr) #optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdagradOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def prediction(self): fw_cell = rnn_cell.LSTMCell(self._num_hidden) fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout) bw_cell = rnn_cell.LSTMCell(self._num_hidden) bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout) if self._num_layers > 1: fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers) bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers) output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length) max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes) output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size size = config.n_hidden num_steps = config.num_steps self._input_data = tf.placeholder(tf.float32, (batch_size, config.num_steps)) self._targets = tf.placeholder(tf.float32, [batch_size, 1]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=2.8) # lstm_cell = rnn_cell.LSTMCell(size, 1) # cell = lstm_cell cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) self._train_op = tf.no_op() self._result = -1 weights_hidden = tf.constant( 1.0, shape=[config.num_features, config.n_hidden]) weights_hidden = tf.get_variable( "weights_hidden", [config.num_features, config.n_hidden]) inputs = [] for k in range(num_steps): nextitem = tf.matmul( tf.reshape(self._input_data[:, k], [config.batch_size, config.num_features]), weights_hidden) inputs.append(nextitem) outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) #output = tf.reshape(tf.concat(1, outputs), [-1, config.n_hidden]) #pred = tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size]) #pred = tf.matmul(output, tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1]) pred = tf.sigmoid( tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden, 1])) + tf.get_variable("bias_out", [1])) self._pred = pred self._final_state = states[-1] self._cost = cost = tf.square((pred[:, 0] - self.targets[:, 0])) self._result = tf.abs(pred[0, 0] - self.targets[0, 0]) # self._cost = cost = tf.abs(pred[0, 0] - self.targets[0,0]) if not config.is_training: return #optimizer = tf.train.GradientDescentOptimizer(learning_rate = config.learning_rate).minimize(cost) optimizer = tf.train.AdamOptimizer().minimize(cost) self._train_op = optimizer print("top ", self._train_op)
def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases): # input shape: (batch_size, n_steps, n_input) _X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size # Reshape to prepare input to hidden activation # (n_steps*batch_size, n_input) _X = tf.reshape(_X, [-1, self.config.num_input]) # Linear activation _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Forward direction cell single_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) single_fw_cell = rnn_cell.DropoutWrapper(single_fw_cell, self.config.input_keep_prob, self.config.output_keep_prob, 0.8) rnn_fw_cell = rnn_cell.MultiRNNCell([single_fw_cell] * self.config.model_depth) # Backward direction cell single_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden) single_bw_cell = rnn_cell.DropoutWrapper(single_bw_cell, self.config.input_keep_prob, self.config.output_keep_prob) rnn_bw_cell = rnn_cell.MultiRNNCell([single_bw_cell] * self.config.model_depth) # Split data because rnn cell needs a list of inputs for the RNN inner # loop # n_steps * (batch_size, n_hidden) _X = tf.split(0, self.config.num_steps, _X) # Get lstm cell output outputs, final_fw, final_bw = rnn.bidirectional_rnn( rnn_fw_cell, rnn_bw_cell, _X, initial_state_fw=_istate_fw, initial_state_bw=_istate_bw) # Linear activation return [ tf.matmul(output, _weights['out']) + _biases['out'] for output in outputs ], final_fw, final_bw
def __init__(self, rnn_size, num_layers, vocab_size, grad_clip, batch_size=1, seq_length=1): cell = rnn_cell.BasicLSTMCell(rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers) self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length]) self.targets = tf.placeholder(tf.int32, [batch_size, seq_length]) self.initial_state = cell.zero_state(batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable('softmax_w', [rnn_size, vocab_size]) softmax_b = tf.get_variable('softmax_b', [vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [vocab_size, rnn_size]) inputs = tf.split( 1, seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) train = batch_size == 1 and seq_length == 1 loop_fn = loop if train else None outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop_fn, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * seq_length])], vocab_size) self.cost = tf.reduce_sum(loss) / batch_size / seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False, loop=0): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm') as scope1: if loop > 0: scope1.reuse_variables() softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, deterministic=False): self.args = args if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell elif args.model == 'bn-lstm': cell_fn = BNLSTMCell else: raise Exception('model type not supported: {}'.format(args.model)) deterministic = tf.Variable(deterministic, name='deterministic') # when training, set to False; when testing, set to True if args.model == 'bn-lstm': cell = cell_fn(args.rnn_size, bn=args.bn_level, deterministic=deterministic) else: cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int64, [None, args.seq_length]) # self.targets = tf.placeholder(tf.int64, [None, args.seq_length]) # seq2seq model self.targets = tf.placeholder(tf.int64, [None, ]) # target is class label self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('embeddingLayer'): with tf.device('/cpu:0'): W = tf.get_variable('W', [args.vocab_size, args.rnn_size]) embedded = tf.nn.embedding_lookup(W, self.input_data) # shape: (batch_size, seq_length, cell.input_size) => (seq_length, batch_size, cell.input_size) inputs = tf.split(1, args.seq_length, embedded) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = rnn.rnn(cell, inputs, self.initial_state, scope='rnnLayer') with tf.variable_scope('softmaxLayer'): softmax_w = tf.get_variable('w', [args.rnn_size, args.label_size]) softmax_b = tf.get_variable('b', [args.label_size]) logits = tf.matmul(outputs[-1], softmax_w) + softmax_b self.probs = tf.nn.softmax(logits) # self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.targets)) # Softmax loss self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.targets)) # Softmax loss self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost) # Adam Optimizer self.correct_pred = tf.equal(tf.argmax(self.probs, 1), self.targets) self.correct_num = tf.reduce_sum(tf.cast(self.correct_pred, tf.float32)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def __init__(self, config, is_training): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) if is_training and config.keep_prob < 1: cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=config.keep_prob) self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.target_data = tf.placeholder(dtype=tf.float32, shape=[None, num_steps, 1]) self.initial_state = cell.zero_state(batch_size=config.batch_size, dtype=tf.float32) inputs = tf.split(1, num_steps, self.input_data) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] with tf.variable_scope('rnnvm'): output_w = tf.get_variable("output_w", [size, 1]) output_b = tf.get_variable("output_b", [1]) outputs, states = seq2seq.rnn_decoder(inputs, self.initial_state, cell, scope='rnnvm') output = tf.reshape(tf.concat(1, outputs), [-1, size]) output = tf.nn.xw_plus_b(output, output_w, output_b) entropy = tf.nn.sigmoid_cross_entropy_with_logits( output, tf.reshape(self.target_data, shape=[num_steps * batch_size, 1])) self.cost = cost = tf.reduce_mean(entropy) self.final_state = states[-1] if not is_training: return self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def RNN(_X, _istate, _weights, _biases): global initial_state _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden'] # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=0.0) if num_layers > 1: lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers) k = tf.split(0, 1, _X) initial_state = lstm_cell.zero_state(batch_size, tf.float32) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, k, initial_state=_istate) return tf.matmul(outputs[-1], _weights['out']) + _biases['out'], states[-1]
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.float32, [batch_size, num_steps]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) iw = tf.get_variable("input_w", [1, size]) ib = tf.get_variable("input_b", [size]) inputs = [ tf.nn.xw_plus_b(i_, iw, ib) for i_ in tf.split(1, num_steps, self._input_data) ] if is_training and config.keep_prob < 1: inputs = [ tf.nn.dropout(input_, config.keep_prob) for input_ in inputs ] outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state) rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size]) self._output = output = tf.nn.xw_plus_b( rnn_output, tf.get_variable("out_w", [size, 1]), tf.get_variable("out_b", [1])) self._cost = cost = tf.reduce_mean( tf.square(output - tf.reshape(self._targets, [-1]))) self._final_state = states[-1] if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) #optimizer = tf.train.GradientDescentOptimizer(self.lr) optimizer = tf.train.AdamOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def initialize_model(self): self.keep_prob = tf.placeholder(tf.float32) sigma = 1e-3 #embeddings = tf.Variable(tf.convert_to_tensor(wv, dtype=tf.float32), name="Embedding") self.x = tf.placeholder(tf.float32, shape = (self.batch_size, wv_dim, self.num_steps)) self.y = tf.placeholder(tf.int32, shape = (self.batch_size, self.num_steps)) self.loan_amounts = tf.placeholder(tf.float32, shape = (self.batch_size, self.num_steps)) if self.num_steps > 1: inputs = map(tf.squeeze, tf.split(2, self.num_steps, self.x)) loans = tf.split(1, self.num_steps, self.loan_amounts) else: inputs = [self.x[:,:,0]] loans = [self.loan_amounts] filter_number_1 = 256 filter_number_2 = 144 cell1 = rnn_cell.BasicLSTMCell(filter_number_1, forget_bias=1.0, input_size = wv_dim) cell2 = rnn_cell.BasicLSTMCell(filter_number_2, forget_bias=1.0, input_size = filter_number_1) cell = rnn_cell.MultiRNNCell([cell1, cell2]) self.initial_state = cell.zero_state(self.batch_size, tf.float32) state = self.initial_state self.loss = 0 rnn_outputs = [] for idx, batch in enumerate(inputs): with tf.variable_scope("RNN") as scope: if idx > 0: scope.reuse_variables() wc3 = tf.get_variable("wc3", (filter_number_2 + 1, self.n_classes), initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32)) bc3 = tf.get_variable("bc3", (self.n_classes,), initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32)) output, state = cell(batch, state) pred = bc3 + tf.matmul(tf.concat(1, [loans[idx], output]), wc3) #pred = tf.matmul(output, wc3) + bc3 rnn_outputs.append(pred) self.previous_state = state self.output = tf.argmax(rnn_outputs[-1], 1) for i in range(len(inputs)): #print rnn_outputs[i].get_shape() self.loss += tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(rnn_outputs[i], self.y[:,i])) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
def __init__(self, vocabularySize, config_param): self.vocabularySize = vocabularySize self.config = config_param self._inputX = tf.placeholder( tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX") self._inputTargetsY = tf.placeholder( tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY") #Converting Input in an Embedded form with tf.device( "/cpu:0"): #Tells Tensorflow what GPU to use specifically embedding = tf.get_variable( "embedding", [self.vocabularySize, self.config.embeddingSize]) embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX) inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp) inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs] #Define Tensor RNN singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size) self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers) self._initial_state = self.multilayerRNN.zero_state( self.config.batch_size, tf.float32) #Defining Logits hidden_layer_output, states = rnn.rnn( self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state) hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size]) self._logits = tf.nn.xw_plus_b( hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize])) self._predictionSoftmax = tf.nn.softmax(self._logits) #Define the loss loss = seq2seq.sequence_loss_by_example( [self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize) self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size) self._final_state = states[-1]
def rnn_estimator(X, y): """RNN estimator with target predictor function on top.""" X = input_op_fn(X) if cell_type == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif cell_type == 'gru': cell_fn = rnn_cell.GRUCell elif cell_type == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise ValueError( "cell_type {} is not supported. ".format(cell_type)) if bidirection: # forward direction cell rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) # backward direction cell rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell) else: cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers) _, encoding = rnn.rnn(cell, X, dtype=tf.float32) return target_predictor_fn(encoding[-1], y)