def get_cell(hiddenSize, dropOutRate): print('building ordinary cell!') cell = BasicLSTMCell(num_units=hiddenSize, state_is_tuple=True) cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=dropOutRate, output_keep_prob=dropOutRate) return cell
def _lstm_cell(model_opt): """ Defines a basic LSTM cell to which various wrappers can be applied. """ base_cell = BasicLSTMCell(model_opt.hidden_dims, forget_bias=2.5, state_is_tuple=True) if model_opt.is_train: base_cell = DropoutWrapper( base_cell, output_keep_prob=self.rnn_keep_prob) return base_cell
def get_cell(hiddenSize, dropOutRate, scope): # print('building ordinary cell!') with tf.variable_scope(scope, reuse=False): cell = BasicLSTMCell(num_units=hiddenSize, state_is_tuple=True) cell = tf.contrib.rnn.DropoutWrapper( cell, input_keep_prob=dropOutRate, output_keep_prob=dropOutRate) return cell
def _outputs(self): cell = BasicLSTMCell(num_units=hidden_size) initial_state = cell.zero_state(batch_size, tf.float32) outputs_d_rnn, _states = tf.nn.dynamic_rnn(cell, self.Input_data, initial_state=initial_state, dtype=tf.float32) # outputs_d_rnn = tf.Print(outputs_d_rnn,[outputs_d_rnn],"\n--PRINT-- outputs_d_rnn:\n",summarize=1000) # return outputs_d_rnn X_for_fc = tf.reshape(outputs_d_rnn, [-1, hidden_size]) outputs_fc = fully_connected(inputs=X_for_fc, num_outputs=num_classes, activation_fn=None) outputs = tf.reshape(outputs_fc, [batch_size, sequence_length, num_classes]) return outputs
def new_rnn_layer(prev_layer, weights, biases, hidden_size, timesteps, num_classes, name): X = tf.unstack(prev_layer, timesteps, 1) rnn_cell = BasicLSTMCell(hidden_size, forget_bias=1.0, name=name) outputs, states = static_rnn(rnn_cell, X, dtype=tf.float32) outputs = tf.reshape(tf.convert_to_tensor(outputs), shape=[-1, hidden_size]) res = tf.matmul(outputs, weights) + biases res = tf.reshape(res, shape=[timesteps, -1, num_classes]) return tf.transpose(res[(timesteps // 2):], [1, 0, 2])
def build_rnn(in_layer, nodes, num_layers, batch_size, mode='RNN'): if mode.upper() == 'RNN': cell = MultiRNNCell([BasicRNNCell(nodes) for _ in range(num_layers)]) elif mode.upper() == 'LSTM': cell = MultiRNNCell([BasicLSTMCell(nodes) for _ in range(num_layers)]) initial_state = cell.zero_state(batch_size, tf.float32) outputs, state = tf.nn.static_rnn(cell, in_layer, initial_state=initial_state) return initial_state, outputs, state
def define_sequence_model(self): seed=12345 np.random.seed(12345) layer_list=[] with self.graph.as_default() as g: utt_length=tf.placeholder(tf.int32,shape=(None)) g.add_to_collection(name="utt_length",value=utt_length) with tf.name_scope("input"): input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer") if self.dropout_rate!=0.0: print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop") input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop) layer_list.append(input_layer_drop) g.add_to_collection(name="is_training_drop",value=is_training_drop) else: layer_list.append(input_layer) g.add_to_collection("input_layer",layer_list[0]) with tf.name_scope("hidden_layer"): basic_cell=[] if "tanh" in self.hidden_layer_type: is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch") bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None} g.add_to_collection("is_training_batch",is_training_batch) for i in xrange(len(self.hidden_layer_type)): if self.dropout_rate!=0.0: if self.hidden_layer_type[i]=="tanh": new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params) new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop) layer_list.append(new_layer_drop) if self.hidden_layer_type[i]=="lstm": basic_cell.append(MyDropoutWrapper(BasicLSTMCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop)) if self.hidden_layer_type[i]=="gru": basic_cell.append(MyDropoutWrapper(GRUCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop)) else: if self.hidden_layer_type[i]=="tanh": new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params) layer_list.append(new_layer) if self.hidden_layer_type[i]=="lstm": basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i])) if self.hidden_layer_type[i]=="gru": basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i])) multi_cell=MultiRNNCell(basic_cell) rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length) layer_list.append(rnn_outputs) with tf.name_scope("output_layer"): if self.output_type=="linear" : output_layer=tf.layers.dense(rnn_outputs,self.n_out) # stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out]) # stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out) # output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out]) g.add_to_collection(name="output_layer",value=output_layer) with tf.name_scope("training_op"): if self.optimizer=="adam": self.training_op=tf.train.AdamOptimizer()
def build_sentence_encoder(vocabulary_size, embeddings_matrix): """ build the computational graph for the lstm sentence encoder. Return only the palceholders and tensors that are called from other methods """ sentence_oh_placeholder = tf.placeholder(shape=[None, vocabulary_size], dtype=tf.float32, name="sentence_placeholder") word_embeddings_matrix = tf.get_variable( "W_we", # shape=[vocabulary_size, WORD_EMB_SIZE] initializer=tf.constant(embeddings_matrix, dtype=tf.float32)) sentence_embedded = tf.expand_dims( tf.matmul(sentence_oh_placeholder, word_embeddings_matrix), 0) # placeholders for sentence and it's length sent_lengths = tf.placeholder(dtype=tf.int32, name="sent_length_placeholder") # Forward cell lstm_fw_cell = BasicLSTMCell(LSTM_HIDDEN_SIZE, forget_bias=1.0) # Backward cell lstm_bw_cell = BasicLSTMCell(LSTM_HIDDEN_SIZE, forget_bias=1.0) # stack cells together in RNN outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, sentence_embedded, sent_lengths, dtype=tf.float32) # outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`. # both output_fw, output_bw will be a `Tensor` shaped: [batch_size, max_time, cell_fw.output_size]` # outputs is a (output_forward,output_backwards) tuple. concat them together to receive h vector lstm_outputs = tf.concat(outputs, 2)[0] # shape: [max_time, 2 * hidden_layer_size ] final_fw = outputs[0][:, -1, :] final_bw = outputs[1][:, 0, :] e_m = tf.concat((final_fw, final_bw), axis=1) sentence_words_bow = tf.placeholder(tf.float32, [None, len(words_vocabulary)], name="sentence_words_bow") e_m_with_bow = tf.concat([e_m, sentence_words_bow], axis=1) return sentence_oh_placeholder, sent_lengths, sentence_words_bow, lstm_outputs, e_m_with_bow
def alpha(self, inputs, state=None, u=None, buffer=None, reuse=None, init_buffer=False, name='alpha'): """The dynamics parameter network alpha for mixing transitions in a state space model. This function is quite general and supports different architectures (NN, RNN, FIFO queue, learning the inputs) Args: inputs: tensor to condition mixing vector on state: previous state if using RNN network to model alpha u: pass-through variable if u is given (learn_u=False) buffer: buffer for the FIFO network (used for fifo_size>1) reuse: `True` or `None`; if `True`, we go into reuse mode for this scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. init_buffer: initialize buffer for a_t name: name of the scope Returns: alpha: mixing vector of dimension (batch size, K) state: new state u: either inferred u from model or pass-through buffer: FIFO buffer """ # Increase the number of hidden units if we also learn u (learn_u=True) num_units = self.config.alpha_units * 2 if self.config.learn_u else self.config.alpha_units # Overwrite input buffer if init_buffer: buffer = tf.zeros((tf.shape(inputs)[0], self.config.dim_a, self.config.fifo_size), dtype=tf.float32) # If K == 1, return inputs if self.config.K == 1: return tf.ones([self.config.batch_size, self.config.K]), state, u, buffer with tf.variable_scope(name, reuse=reuse): if self.config.alpha_rnn: rnn_cell = BasicLSTMCell(num_units, reuse=reuse) output, state = rnn_cell(inputs, state) else: # Shift buffer buffer = tf.concat([buffer[:, :, 1:], tf.expand_dims(inputs, 2)], 2) output = slim.repeat( tf.reshape(buffer, (tf.shape(inputs)[0], self.config.dim_a * self.config.fifo_size)), self.config.alpha_layers, slim.fully_connected, num_units, get_activation_fn(self.config.alpha_activation), scope='hidden') # Get Alpha as the first part of the output alpha = slim.fully_connected(output[:, :self.config.alpha_units], self.config.K, activation_fn=tf.nn.softmax, scope='alpha_var') if self.config.learn_u: # Get U as the second half of the output u = slim.fully_connected(output[:, self.config.alpha_units:], self.config.dim_u, activation_fn=None, scope='u_var') return alpha, state, u, buffer
def cell(): if encoder.use_lstm: cell = BasicLSTMCell(encoder.cell_size, state_is_tuple=False) else: cell = GRUCell(encoder.cell_size, initializer=orthogonal_initializer()) if dropout is not None: cell = DropoutWrapper(cell, input_keep_prob=dropout) return cell
def encoder_rnn(rnn_inputs, rnn_size, num_layers, keep_prob, seq_length): lstm = BasicLSTMCell(num_units=rnn_size) lstm_dropout = DropoutWrapper(cell=lstm, input_keep_prob=keep_prob) encoder_cell = MultiRNNCell(cells=[lstm_dropout] * num_layers) _, encoder_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, sequence_length=seq_length, inputs=rnn_inputs, dtype=tf.float32) return encoder_state
def rnn_encoder(rnn_inputs, rnn_size, keep_prob, num_of_layers, sequence_length): lstm = BasicLSTMCell(input_size=rnn_size) lstm_dropout = DropoutWrapper(lstm, input_keep_prob=keep_prob) encoder_cell = MultiRNNCell([lstm_dropout] * num_of_layers) _, encoder_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, sequence_length=sequence_length, dtype=tf.float32, inputs=rnn_inputs) return encoder_state
def create_cell(): if not forward_only and dropout_keep_prob < 1.0: #single_cell = lambda: BasicLSTMCell(self.cell_size) single_cell = lambda: tf.nn.rnn_cell.LSTMCell(self.cell_size,name="basic_lstm_cell") cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) cell = DropoutWrapper(cell, input_keep_prob=dropout_keep_prob, output_keep_prob=dropout_keep_prob) else: single_cell = lambda: BasicLSTMCell(self.cell_size) cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)]) return cell
def __init__(self, FLAGS, vocab_embed): self.n_class = FLAGS.n_class self.max_len = FLAGS.max_len self.embed_size = FLAGS.embed_size self.vocab_embed = tf.convert_to_tensor(vocab_embed, name='vocab_embed') self.global_step = tf.Variable(0, trainable=False, name='global_step') with tf.name_scope('input'): self.X = tf.placeholder(tf.int32, [None, self.max_len], name='X') self.y = tf.placeholder(tf.float32, [None, self.n_class], name='y') with tf.name_scope('embed'): embed_words = tf.nn.embedding_lookup(self.vocab_embed, self.X, name='embed_words') with tf.name_scope('encode-word'): # case 1 # encode_words = embed_words # case 2 (fw_outputs, bw_outputs), _ = bidirectional_dynamic_rnn( BasicLSTMCell(self.max_len), BasicLSTMCell(self.max_len), inputs=embed_words, dtype=tf.float32) encode_words = fw_outputs + bw_outputs with tf.name_scope('word-attn'): v = self.attention(encode_words) with tf.name_scope('output'): self.logits = self.output(v) # mean loss self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y) self.mean_loss = tf.reduce_mean(self.loss, name='mean_loss') tf.summary.scalar('mean_loss', self.mean_loss) # accuracy self.target = tf.argmax(self.y, 1, name='target') self.prediction = tf.argmax(self.logits, 1, name='prediction') correct_prediction = tf.equal(self.prediction, tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'), name='accuracy') tf.summary.scalar('accuracy', self.accuracy)
def __init__(self): self.training = tf.placeholder(tf.bool, name='training') self.inputs = tf.placeholder(dtype=tf.float32, shape=[None, 5, 224, 224, 3]) self.inputs = tf.unstack(self.inputs, axis=1) self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None]) LSTM_inputs = [] for i in self.inputs: LSTM_inputs.append(self.get_features(i)) self.LSTM_inputs = LSTM_inputs # seq_length*32*128 print('Image feature extraction is successful') lstm_f_cell = BasicLSTMCell(num_units=hidden_size) lstm_b_cell = BasicLSTMCell(num_units=hidden_size) init_fw = lstm_f_cell.zero_state(batch_size, dtype=tf.float32) init_bw = lstm_b_cell.zero_state(batch_size, dtype=tf.float32) outputs, output_state_fw, output_state_bw = static_bidirectional_rnn( lstm_f_cell, lstm_b_cell, self.LSTM_inputs, initial_state_fw=init_fw, initial_state_bw=init_bw, sequence_length=self.sequence_length) self.predict = tf.layers.dense(outputs[-1], classes) self.finally_pre = tf.nn.softmax(self.predict) self.finally_pre = tf.argmax(self.predict) self.targets = tf.placeholder(dtype=tf.int32, shape=[None]) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.targets, logits=self.predict)) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
def encoder_rnn(rnn_input, rnn_size, num_of_layers, keep_prob, sequence_length): _lstm = BasicLSTMCell(rnn_size) lstm = DropoutWrapper(_lstm, input_keep_prob=keep_prob) cell = MultiRNNCell([lstm] * num_of_layers) output, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell, cell_bw=cell, sequence_length=sequence_length, inputs=rnn_input, dtype=tf.float32) return state
def cal_loss_logit(embedded, keep_prob, reuse=True, scope="loss"): with tf.variable_scope(scope, reuse=reuse) as scope: rnn_outputs, _ = bi_rnn(BasicLSTMCell(self.hidden_size), BasicLSTMCell(self.hidden_size), inputs=embedded, dtype=tf.float32) # Attention H = tf.add(rnn_outputs[0], rnn_outputs[1]) # fw + bw M = tf.tanh(H) # M = tanh(H) (batch_size, seq_len, HIDDEN_SIZE) # alpha (bs * sl, 1) alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, self.hidden_size]), tf.reshape(W, [-1, 1]))) r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, self.max_len, 1])) # supposed to be (batch_size * HIDDEN_SIZE, 1) r = tf.squeeze(r) h_star = tf.tanh(r) drop = tf.nn.dropout(h_star, keep_prob) # Fully connected layer(dense layer) y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc) return y_hat, tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=self.label))
def _make_graph(self): self.graph = tf.Graph() with self.graph.as_default(): # input self.batch_size = tf.placeholder(tf.int32, []) self.seq_len = tf.placeholder(tf.int32) self.x = tf.placeholder(tf.float32, shape=(None, None, self.specification.dims)) # batch_size, seq_len, dim self.y = tf.placeholder(tf.int32, shape=(None, None)) # batch_size, seq_len self.keep_prob = tf.placeholder(tf.float32, shape=()) # LSTM cell self.lstm_cell = MultiRNNCell( [BasicLSTMCell(units) for units in self.specification.lstm_units] ) self.initial_hidden_state = self.lstm_cell.zero_state(self.batch_size, tf.float32) # recurrent part self.rnn_output, self.hidden_state = tf.nn.dynamic_rnn( cell=self.lstm_cell, inputs=self.x, initial_state=self.initial_hidden_state ) # dropout self.rnn_dropout = tf.nn.dropout(self.rnn_output, self.keep_prob) # dense part afunc = tf.nn.relu self.dense = tf.layers.dense(self.rnn_dropout, self.specification.dense_units[0], activation=afunc) self.dense_dropout = tf.nn.dropout(self.dense, self.keep_prob) for i in range(1, len(self.specification.dense_units)): self.dense = tf.layers.dense(self.dense_dropout, self.specification.dense_units[i], activation=afunc) self.dense_dropout = tf.nn.dropout(self.dense, self.keep_prob) # softmax output self.activation = tf.layers.dense(self.dense_dropout, self.specification.dims) self.probabilities = tf.nn.softmax(self.activation) # loss self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y, logits=self.activation) # optimizer self.optimizer = tf.train.AdamOptimizer().minimize(self.loss) # open session self.session = tf.Session(graph=self.graph)
def __init__(self, n_hidden, scope='SimpleLSTM', lstm_activation=tanh, initializer=None): """ Sets up the LSTM model with an additional output filter to shape to size n_outputs :param input_placeholder: Placeholder tensor of shape (n_steps, batch_size, n_inputs) :param state_placeholder: List (length num_layers) of a tuple of 2 placeholder tensors of shape (batch_size, n_hidden). Can be None, in which case, the LSTM is initialised with a zero state (see rnn.rnn implementation) :param n_hidden: size of the hidden layers of the LSTM :param lstm_activation: Activation function of the inner states of the LSTM (determines the range of values stored in the hidden states) """ self.cell = BasicLSTMCell(n_hidden, forget_bias=1.0, activation=lstm_activation) self.scope = scope self.lstm_activation = lstm_activation self.initializer = initializer
def __init__(self, hidden_size = 128, num_layers =2, chars_size = 34, l2_reg_lambda=0.0): self.sentence_x = tf.placeholder(tf.float32, shape = [None,None,34]) self.sentence_y = tf.placeholder(tf.float32, shape = [None,34]) input_shape = tf.shape(self.sentence_x) l2_loss = tf.constant(0.0) with tf.name_scope("lstm"): f_cell = BasicLSTMCell(hidden_size) #b_cell = BasicLSTMCell(hidden_size) #f_cell = MultiRNNCell([f_cell] * num_layers) #b_cell = MultiRNNCell([b_cell] * num_layers) lstm_x,_ = tf.nn.dynamic_rnn(f_cell, inputs = self.sentence_x, dtype = tf.float32) W = tf.Variable(tf.random_normal(shape = [hidden_size,chars_size],stddev =0.1), name = "W") b = tf.Variable(tf.constant(0.1, shape = [chars_size]), name = "b") #out = tf.add(tf.matmul(tf.reshape(inp,[-1, hidden_size]),W),b) inp = tf.reduce_max(lstm_x, axis = 1) scores = tf.add(tf.matmul(tf.reshape(inp,[-1, hidden_size]),W),b) # [batch_size,chars_size] self.predictions = tf.argmax(scores,1,"predictions") """ with tf.name_scope("highway_layer"): W_t = tf.Variable(tf.truncated_normal(size = [hidden_size,hidden_size],stddev =0.1), name = "W_t") b_t = tf.Variable(tf.constant(0.1, shape = [hidden_size]), name = "b_t") W = tf.Variable(tf.truncated_normal(size = [hidden_size,hidden_size],stddev =0.1), name = "W") b = tf.Variable(tf.constant(0.1, shape = [hidden_size]), name = "b") t = tf.sigmoid(tf.matmul(tf.reshape(lstm_x, [-1, hidden_size]), W_t) + b_t, name="transform_gate") h = tf.tanh(tf.matmul(tf.reshape(lstm_x, [-1, hidden_size]), W) + b, name="activation") c = tf.sub(1.0, t, name="carry_gate") highway_x = tf.add(tf.mul(h, t), tf.mul(x, c)) """ with tf.name_scope('loss'): losses = tf.nn.softmax_cross_entropy_with_logits(labels = self.sentence_y,logits = scores) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.sentence_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def cal_loss_logit(batch_embedded, keep_prob, reuse=True, scope="loss"): with tf.variable_scope(scope, reuse=reuse) as scope: rnn_outputs, _ = bi_rnn(BasicLSTMCell(HIDDEN_SIZE), BasicLSTMCell(HIDDEN_SIZE), inputs=batch_embedded, dtype=tf.float32) # Attention H = tf.add(rnn_outputs[0], rnn_outputs[1]) # fw + bw M = tf.tanh(H) # M = tanh(H) (batch_size, seq_len, HIDDEN_SIZE) print(M.shape) # alpha (bs * sl, 1) alpha = tf.nn.softmax(tf.matmul(tf.reshape(M, [-1, HIDDEN_SIZE]), tf.reshape(W, [-1, 1]))) r = tf.matmul(tf.transpose(H, [0, 2, 1]), tf.reshape(alpha, [-1, MAX_DOCUMENT_LENGTH, 1])) # supposed to be (batch_size * HIDDEN_SIZE, 1) print(r.shape) r = tf.squeeze(r) h_star = tf.tanh(r) # (batch , HIDDEN_SIZE # attention_output, alphas = attention(rnn_outputs, ATTENTION_SIZE, return_alphas=True) drop = tf.nn.dropout(h_star, keep_prob) # Fully connected layer(dense layer) y_hat = tf.nn.xw_plus_b(drop, W_fc, b_fc) return y_hat, tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_hat, labels=batch_y))
def bidirectional_lstm(input_tensor, list_n_hidden=[256, 256], keep_prob_dropout=0.7): with tf.name_scope('deep_bidirectional_lstm'): # Forward direction cells fw_cell_list = [ BasicLSTMCell(nh, forget_bias=1.0) for nh in list_n_hidden ] # Backward direction cells bw_cell_list = [ BasicLSTMCell(nh, forget_bias=1.0) for nh in list_n_hidden ] lstm_net, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( fw_cell_list, bw_cell_list, input_tensor, # THE INPUT dtype=tf.float32) # Dropout layer lstm_net = tf.nn.dropout(lstm_net, keep_prob=keep_prob_dropout) return lstm_net
class RecurrentController(BaseController): def network_vars(self): self.lstm_cell = BasicLSTMCell(256) self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32) def network_op(self, X, state): X = tf.convert_to_tensor(X) return self.lstm_cell(X, state) def get_state(self): return self.state def update_state(self, new_state): return tf.no_op()
def recurrent_neural_network(x): layer = { 'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases': tf.Variable(tf.random_normal([n_classes])) } x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, chunk_size]) x = tf.split(x, n_chunks, 0) lstm = BasicLSTMCell(rnn_size, state_is_tuple=True, reuse=True) (outputs, states) = static_rnn(lstm, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def get_cell(input_size=None, reuse=False): if encoder.cell_type.lower() == 'lstm': cell = CellWrapper(BasicLSTMCell(encoder.cell_size, reuse=reuse)) elif encoder.cell_type.lower() == 'dropoutgru': cell = DropoutGRUCell(encoder.cell_size, reuse=reuse, layer_norm=encoder.layer_norm, input_size=input_size, input_keep_prob=encoder.rnn_input_keep_prob, state_keep_prob=encoder.rnn_state_keep_prob) else: cell = GRUCell(encoder.cell_size, reuse=reuse, layer_norm=encoder.layer_norm) if encoder.use_dropout and encoder.cell_type.lower() != 'dropoutgru': cell = DropoutWrapper(cell, input_keep_prob=encoder.rnn_input_keep_prob, output_keep_prob=encoder.rnn_output_keep_prob, state_keep_prob=encoder.rnn_state_keep_prob, variational_recurrent=encoder.pervasive_dropout, dtype=tf.float32, input_size=input_size) return cell
def get_cell(input_size=None, reuse=False): if encoder.use_lstm: cell = CellWrapper( BasicLSTMCell(encoder.cell_size, reuse=reuse)) else: cell = GRUCell(encoder.cell_size, reuse=reuse) if encoder.use_dropout: cell = DropoutWrapper( cell, input_keep_prob=encoder.rnn_input_keep_prob, output_keep_prob=encoder.rnn_output_keep_prob, state_keep_prob=encoder.rnn_state_keep_prob, variational_recurrent=encoder.pervasive_dropout, dtype=tf.float32, input_size=input_size) return cell
class DilatedLSTM(object): def __init__(self, inputs, initial_state, hidden_state_size, max_steps, num_cores=10, pool_size=10): self.shared_cell = BasicLSTMCell(hidden_state_size) self.initial_state = initial_state self.max_steps = max_steps self.num_cores = num_cores self.pool_size = pool_size self.inputs = inputs self._build_ops() def _build_ops(self): i0 = tf.constant(0, dtype=tf.int32) loop_condition = lambda i, inputs, state: tf.less(i, self.max_steps) def body(i, inputs, full_state): idx = i % self.num_cores prev_state = full_state[idx] inputs, full_state[idx] = self.shared_cell(inputs, prev_state) return i + 1, inputs, full_state _, inputs, full_state = tf.while_loop( loop_condition, body, loop_vars=[i0, self.inputs, self.initial_state]) lstm_outputs = tf.reshape(tf.concat(full_state, 1), [-1, 256]) self.outpus = tf.avg_pool(tf.expand(lstm_outputs, -1), [1, self.pool_size, 1, 1], strides=[1, 1, 1, 1], padding='SAME') def zero_state(self): return [ self.shared_cell.zero_state( tf.shape(self.max_steps)[0], tf.float32) for _ in range(self.stride) ]
def __init__(self, batch_size, seq_length, n_layers, rnn_size, vocab_size, scope, **ignored_args): self.batch_size = batch_size self.seq_length = seq_length self.rnn_size = rnn_size self.vocab_size = vocab_size self.n_layers = n_layers self.scope = scope self.grad_clip = 5.0 self.input_data = tf.placeholder(tf.int32, (self.seq_length, self.batch_size)) self.target_data = tf.placeholder(tf.int32, (self.seq_length, self.batch_size)) self.embedding = tf.get_variable("embedding", (self.vocab_size, self.rnn_size)) embedded = tf.nn.embedding_lookup(self.embedding, self.input_data) # embedded.shape = (self.seq_length, self.batch_size, self.rnn_size) self.softmax_w = tf.get_variable("softmax_w", (self.rnn_size, self.vocab_size)) self.softmax_b = tf.get_variable("softmax_b", (self.vocab_size,)) self.learning_rate = tf.placeholder(tf.float32, ()) cell = MultiRNNCell([BasicLSTMCell(self.rnn_size) for _ in range(self.n_layers)]) state = self.init_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) logits = [] # .shape = (seq_length, batch_size, vocab_size) with tf.variable_scope(self.scope): for i in range(self.seq_length): output, state = cell(embedded[i], state) # output.shape = (batch_size, rnn_size) logits.append(tf.matmul(output, self.softmax_w) + self.softmax_b) tf.get_variable_scope().reuse_variables() self.final_state = state loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.target_data, logits=logits) self.cost = tf.reduce_mean(loss) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip) self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(zip(grads, tvars)) # sample model self.sample_input_char = tf.placeholder(tf.int32) embedded = tf.nn.embedding_lookup(self.embedding, tf.reshape(self.sample_input_char, (1,))) self.sample_init_state = cell.zero_state(batch_size=1, dtype=tf.float32) with tf.variable_scope(self.scope, reuse=True): output, self.sample_final_state = cell(embedded, self.sample_init_state) logits = tf.matmul(output, self.softmax_w) + self.softmax_b self.sample_output_probs = tf.nn.softmax(logits[0])
def prediction(self): # Recurrent network. cell = BasicLSTMCell(self._num_hidden) #cell = DropoutWrapper(cell, output_keep_prob = 0.8) cell = MultiRNNCell([cell] * self._num_layers) output, _ = tf.nn.dynamic_rnn( cell, self.data, dtype=tf.float32, #sequence_length=self.length, ) last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, 1) prediction = (tf.matmul(last, weight) + bias) return prediction
def __init__(self, hidden_layer_size, output_size, activation_fn=tf.nn.sigmoid): self.fn = activation_fn self.hidden_layer_size = hidden_layer_size self.output_size = output_size self.Wo = self.weight_variable(shape=(hidden_layer_size, output_size)) self.bo = self.bias_variable(shape=[output_size]) # Input weights, hidden weights, and hidden biases are created by the # rnn unit #self.rnn_unit = BasicRNNCell( # num_units=hidden_layer_size, activation=self.fn, reuse=None) self.rnn_unit = BasicLSTMCell(num_units=hidden_layer_size, activation=self.fn, reuse=None)