def lsrc_sequence_graph_old(self, config, inputs): """ Build the recurrence graph of the LSRC model. It returns the output and the last hidden layer """ outputs = inputs last_state = [] for layer in range(self.num_layers): inputs, last_local_state_ = legacy_seq2seq.rnn_decoder( outputs, self.initial_state[layer].Local, self.cell[layer].Local, loop_function=None) outputs, last_global_state_ = legacy_seq2seq.rnn_decoder( inputs, self.initial_state[layer].Global, self.cell[layer].Global, loop_function=None) last_lsrc_states_ = LSRCTuple(last_local_state_, last_global_state_) last_state.append(last_lsrc_states_) output = tf.reshape(tf.concat(outputs, 1), [-1, self.global_state_size]) return output, last_state
def generate(self): inputs = tf.split(axis=1, num_or_size_splits=self.args.seq_length, value=tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): return prev with tf.variable_scope('GEN', reuse=self.has_init_seq2seq) as scope: self.has_init_seq2seq = True if self.args.num_layers == 1: outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, [self.initial_state1], self.cell, loop_function=loop, scope=scope) elif self.args.num_layers == 2: outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, [self.initial_state1, self.initial_state2], self.cell, loop_function=loop, scope=scope) else: raise Exception( 'Unsupported number of layers. Use 1 or 2 layers for now..' ) outputs = map(lambda o: tf.nn.l2_normalize(o, 1), outputs) self.outputs = outputs return outputs
def __init__(self, args, embedding): self.args = args if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input') self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets') self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.embedding = embedding with tf.variable_scope('STAND'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1) o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND') with tf.variable_scope('STAND', reuse=True) as scope: sf_o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope) output = tf.reshape(tf.concat(axis=1, values=o), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) sf_output = tf.reshape(tf.concat(axis=1, values=sf_o), [-1, args.rnn_size]) self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b self.self_feed_probs = tf.nn.softmax(self_feed_logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), args.grad_clip) for g, v in zip(grads, tvars): print v.name optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def createGraph(self): self.input = tf.placeholder(tf.int32, [self.batch_size, self.seq_len], name='inputs') self.targs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len], name='targets') onehot = tf.one_hot(self.input, self.vocab_size, name='input_oh') inputs = tf.split(onehot, self.seq_len, 1) inputs = [tf.squeeze(i, [1]) for i in inputs] targets = tf.split(self.targs, self.seq_len, 1) with tf.variable_scope("posRNN"): cells = [GRUCell(self.num_hidden) for _ in range(self.num_layers)] stacked = MultiRNNCell(cells, state_is_tuple=True) self.zero_state = stacked.zero_state(self.batch_size, tf.float32) outputs, self.last_state = seq2seq.rnn_decoder( inputs, self.zero_state, stacked) w = tf.get_variable( "w", [self.num_hidden, self.vocab_size], tf.float32, initializer=tf.random_normal_initializer(stddev=0.02)) b = tf.get_variable("b", [self.vocab_size], initializer=tf.constant_initializer(0.0)) logits = [tf.matmul(o, w) + b for o in outputs] const_weights = [ tf.ones([self.batch_size]) for _ in xrange(self.seq_len) ] self.loss = seq2seq.sequence_loss(logits, targets, const_weights) self.opt = tf.train.AdamOptimizer(0.001, beta1=0.5).minimize(self.loss) with tf.variable_scope("posRNN", reuse=True): batch_size = 1 self.s_inputs = tf.placeholder(tf.int32, [batch_size], name='s_inputs') s_onehot = tf.one_hot(self.s_inputs, self.vocab_size, name='s_input_oh') self.s_zero_state = stacked.zero_state(batch_size, tf.float32) s_outputs, self.s_last_state = seq2seq.rnn_decoder( [s_onehot], self.s_zero_state, stacked) s_outputs = tf.reshape(s_outputs, [1, self.num_hidden]) self.s_probs = tf.nn.softmax(tf.matmul(s_outputs, w) + b)
def add_rnn_decoder_layer(self): # https://stackoverflow.com/questions/36994067/no-feed-previous-argument-for-tensorflow-basic-rnn-seq2seq-function weights = tf.Variable( tf.random_normal( [self.hidden_size, self.output_size], # mean=0.5, stddev=0.5, dtype=tf.float64)) biases = tf.Variable( tf.random_normal( [self.output_size], # mean=0.5, stddev=0.5, dtype=tf.float64)) def inference_loop_function(prev, _): return tf.matmul(prev, weights) + biases loop_function = inference_loop_function if self.feed_previous else None layers = rnn.MultiRNNCell([ self.lstm_cell(self.hidden_size) for i in range(self.layer_depth) ], state_is_tuple=True) outputs, self.cell_states = legacy_seq2seq.rnn_decoder( tf.unstack(self.ys, axis=1), self.final_enc_states, layers, loop_function=loop_function) self.cell_outputs = tf.stack(outputs, axis=1)
def Generate(self): with tf.variable_scope('gen') as scope: inputs = tf.split( tf.nn.embedding_lookup(self.emb_matrix, self.input_data), self.args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell_fn, scope='rnn_dec') output_ = tf.reshape(tf.concat(outputs, 1), [-1, self.args.rnn_size]) logits = tf.matmul(tf.cast(output_, tf.float32), self.weight) + self.bias probs = tf.nn.softmax(logits) pred = tf.multinomial(probs, 1) prediction = tf.reshape( pred, [self.args.batch_size, self.args.seq_length]) fake_data = tf.concat(self.input_data, prediction, 1) tvars = tf.trainable_variables() Gtvars = [ v for v in tf.all_variables() if v.name.startswith(scope.name) ] print Gtvars
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) cells = [] for _ in range(args.rnn_layers): cells.append(rnn.BasicLSTMCell(args.rnn_size)) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) dense_layer_w = tf.get_variable("dense_layer_w", [args.rnn_size, args.vocab_size]) dense_layer_b = tf.get_variable("dense_layer_b", [args.vocab_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(ip, [1]) for ip in inputs] self.initial_state = cell.zero_state(args.batch_size, tf.float32) outputs, self.final_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell) output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) logits = tf.matmul(output, dense_layer_w) + dense_layer_b self.probs = tf.nn.softmax(logits) self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length]) self.lr = tf.Variable(0.0, trainable=False) loss = sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.reshape(self.targets, [-1])) self.cost = tf.reduce_mean(loss) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
def discriminate_wv(self, input_data_wv): with tf.variable_scope('DISC', reuse=self.has_init_seq2seq) as scope: self.has_init_seq2seq = True output_wv, states_wv = legacy_seq2seq.rnn_decoder( input_data_wv, self.initial_state, self.cell, scope=scope) predicted_classes_wv = tf.matmul(output_wv[-1], self.fc_layer) return predicted_classes_wv
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 use_dropout = training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0) cell_fn = self.select_cell_fn(args.model) cells = self.create_cell_stack('hidden', cell_fn, args, use_dropout=use_dropout) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) softmax_w = self.create_var('rnnlm', 'softmax_w', [args.rnn_size, args.vocab_size]) softmax_b = self.create_var('rnnlm', 'softmax_b', [args.vocab_size]) embedding = self.create_var('rnnlm', 'embedding', [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def custom_rnn_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, dtype=dtypes.float32, initial_state=None, use_previous=False, scope=None, num_units=0): with variable_scope.variable_scope(scope or "custom_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype, scope=scope, initial_state=initial_state) print(enc_state.get_shape) c = tf.tanh( tf.matmul(tf.get_variable("v", [dim_hidden, dim_hidden]), enc_state)) h_prime_init = tf.tanh( tf.matmul(tf.get_variable("v_prime", [dim_hidden, dim_hidden]), c)) if not use_previous: return seq2seq.rnn_decoder(decoder_inputs, LSTMStateTuple(c, h_prime_init), dec_cell, scope=scope) return infer(LSTMStateTuple(c, h_prime_init), dec_cell, num_units)
def advanced_rnn_decoder(decoder_inputs, initial_state, cell, num_symbols, output_projection=None, feed_previous=False, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_decoder") as scope: if output_projection is not None: dtype = scope.dtype proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with( [None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) if feed_previous: loop_function = _extract_last_and_project(output_projection) else: loop_function = None return rnn_decoder(decoder_inputs, initial_state, cell, loop_function=loop_function)
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0}) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(list(zip(grads, tvars)))
def __init__(self, vocabulary_size, batch_size, sequence_length, hidden_layer_size, cells_size, gradient_clip=5., training=True): cells = [] [cells.append(rnn.LSTMCell(hidden_layer_size)) for _ in range(cells_size)] self.cell = rnn.MultiRNNCell(cells) self.input_data = tf.placeholder(tf.int32, [batch_size, sequence_length]) self.targets = tf.placeholder(tf.int32, [batch_size, sequence_length]) self.initial_state = self.cell.zero_state(batch_size, tf.float32) with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE): softmax_layer = tf.get_variable("softmax_layer", [hidden_layer_size, vocabulary_size]) softmax_bias = tf.get_variable("softmax_bias", [vocabulary_size]) with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE): embedding = tf.get_variable("embedding", [vocabulary_size, hidden_layer_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, sequence_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(previous, _): previous = tf.matmul(previous, softmax_layer) + softmax_bias previous_symbol = tf.stop_gradient(tf.argmax(previous, 1)) return tf.nn.embedding_lookup(embedding, previous_symbol) with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE): outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if not training else None) output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_layer_size]) self.logits = tf.matmul(output, softmax_layer) + softmax_bias self.probabilities = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * sequence_length])]) with tf.name_scope("cost"): self.cost = tf.reduce_sum(loss) / batch_size / sequence_length self.final_state = last_state self.learning_rate = tf.Variable(0.0, trainable=False) trainable_vars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_vars), gradient_clip) with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, trainable_vars)) tf.summary.histogram("logits", self.logits) tf.summary.histogram("probabilitiess", self.probabilities) tf.summary.histogram("loss", loss) tf.summary.scalar("cost", self.cost) tf.summary.scalar("learning_rate", self.learning_rate)
def sample(vocab_inv, vocab, model_dir, sample_length=30, prime=None): with tf.Session() as sess: cell = setup_cell() input_data = tf.placeholder(tf.int32, [1, 1]) initial_state = cell.zero_state(1, tf.float32) main_scope = 'light' vocab_size = len(vocab) with tf.variable_scope(main_scope, reuse=tf.AUTO_REUSE): softmax_w = tf.get_variable("softmax_w", [RNN_SIZE, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, RNN_SIZE]) inputs = tf.split( tf.nn.embedding_lookup(embedding, input_data), 1, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, initial_state, cell, scope=main_scope) output = tf.reshape(tf.concat(outputs, 1), [-1, RNN_SIZE]) logits = tf.matmul(output, softmax_w) + softmax_b probs = tf.nn.softmax(logits) final_state = last_state tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('starting sampling') state = sess.run(initial_state) if not prime: prime = random.choice(list(vocab.keys())) print('prime is:' + prime) x = np.zeros((1, 1)) x[0, 0] = vocab.get(prime, 0) feed = {input_data: x, initial_state: state} [state] = sess.run([final_state], feed) ret = prime word = prime.split()[-1] for n in range(sample_length): x = np.zeros((1, 1)) x[0, 0] = vocab.get(word, 0) feed = {input_data: x, initial_state: state} [state_probs, state] = sess.run([probs, final_state], feed) p = state_probs[0] t = np.cumsum(p) s = np.sum(p) sample = int(np.searchsorted(t, np.random.rand(1) * s)) pred = vocab_inv[sample] ret += ' ' + pred word = pred print('sampling finished') print('sampling result: ' + ret)
def __init__(self, args, training=True): self.args = args # When we don't train then we will take in one character at a time and try to predict if not training: args.batch_size = 1 args.seq_length = 1 # Assign the basic type of RNN unit if args.mtype == 'rnn': cell_fn = rnn.BasicRNNCell elif args.mtype == 'gru': cell_fn = rnn.GRUCell elif args.mtype == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.mtype == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell) output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length]) loss = sparse_softmax_cross_entropy_with_logits( logits=[self.logits], labels=[tf.reshape(self.targets, [-1])]) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
def model(cell_state_size, rnn_cells_depth, batch_size, batch_len, number_of_tokens, reuse): input_placeholder = tf.placeholder(tf.int32, shape=(None, batch_len), name="input") target_placeholder = tf.placeholder(tf.int32, shape=(None, batch_len), name="target") # make dictionary for letters (60, 128) with tf.variable_scope("rnn") as scope: if reuse: scope.reuse_variables() cell = tf.nn.rnn_cell.BasicLSTMCell(cell_state_size) #cell = tf.nn.rnn_cell.BasicRNNCell(cell_state_size) #cell = tf.contrib.rnn.IntersectionRNNCell(cell_state_size) #cell = tf.contrib.rnn.LSTMCell(cell_state_size) #cell = tf.contrib.rnn.TimeFreqLSTMCell(cell_state_size) rnn_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * rnn_cells_depth) W = tf.get_variable("W", shape=(128, number_of_tokens)) b = tf.get_variable("b", shape=(number_of_tokens)) embedding = tf.get_variable("embedding", [number_of_tokens, 128]) # (60, 50, 128) rnn_input = tf.nn.embedding_lookup(embedding, input_placeholder) # 50 of (60, 1, 128) rnn_input = tf.split(rnn_input, batch_len, axis=1) rnn_input = [tf.squeeze(rni, [1]) for rni in rnn_input] # map input from id numbers to rnn states decoder_initial_state = rnn_cell.zero_state(batch_size, tf.float32) # outputs list of 50 - (60,128) outputs, last_state = seq2seq.rnn_decoder(rnn_input, decoder_initial_state, rnn_cell, scope="rnn") # (60, -1) outputs = tf.concat(outputs, 1) # (-1, 128) ie a list of letters outputs = tf.reshape(outputs, [-1, 128]) # (3000, number_of_tokens) logits = tf.matmul(outputs, W) + b #probs = tf.nn.softmax(logits, 1, name="probs") probs = tf.nn.softmax(logits, -1, name="probs") loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(target_placeholder, [-1])], [tf.ones([batch_size * batch_len])], number_of_tokens) return ([ loss, probs, decoder_initial_state, input_placeholder, target_placeholder, last_state, logits ])
def time_sequence_graph(self, inputs): """ Apply the recurrence cell to an input sequence (each batch entry is a sequence of words). return: stacked cell outputs of the complete sequence in addition to the last hidden state (and memory for LSTM/LSTMP) obtained after processing the last word (in each batch entry). """ outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=None) output = tf.reshape(tf.concat(outputs, 1), [-1, self.recurrent_state_size]) return output, last_state
def noembedding_rnn_decoder(self, decoder_inputs, init_state, cell): loop_function = self._extract_argmax( self.fake_embedding, self.decoder_output_projection) if self.feed_previous else None emb_inp = (tf.nn.embedding_lookup(self.fake_embedding, i) for i in decoder_inputs) return rnn_decoder(emb_inp, init_state, cell, loop_function=loop_function)
def decode_seq(decoder_inputs, decoder_init_state, hidden_size, vocab_size, initializer=tf.contrib.layers.xavier_initializer()): with tf.variable_scope("decoder"): # Decoder layer (train) cell_fw2 = tf.nn.rnn_cell.LSTMCell(hidden_size, initializer=initializer) # LSTM cell. decoder num_neurons = hidden_size = intent dim decoder_output, _ = legacy_seq2seq.rnn_decoder(decoder_inputs=decoder_inputs, initial_state=decoder_init_state, cell=cell_fw2) # [batch_size, pad_len-1, hidden_size] decoder_output = tf.stack(decoder_output, axis=1) # [batch_size, pad_len-1, hidden_size] with tf.variable_scope("linear_projection"): # Projection layer W_proj =tf.get_variable("weights",[1,hidden_size, vocab_size], initializer=initializer) # hidden_size to vocab_size logits = tf.nn.conv1d(decoder_output, W_proj, 1, "VALID", name="logits") # project [batch_size, pad_length-1, vocab_size] return logits
def __init__(self,args, mode='TRAIN'): '''Create the model. Args: args: parsed arguments mode: TRAIN | EVAL | INFER ''' # When sample, the batch and seq length = 1 if mode == 'INFER': args.batch_size = 1 args.seq_length = 1 cell = rnn.BasicLSTMCell(args.rnn_size,state_is_tuple = True) self.cell = cell = rnn.MultiRNNCell([cell]*args.num_layers, state_is_tuple = True) # Build the inputs and outputs placeholders self.input_data = tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) self.targets = tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) self.initial_state = cell.zero_state(args.batch_size,dtype = tf.float32) with tf.name_scope('rnn_cells'): # final w softmax_w = tf.get_variable('softmax_w',[args.rnn_size,args.vocab_size]) # final bias softmax_b = tf.get_variable('softmax_b',[args.vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding',[args.vocab_size,args.rnn_size], dtype = tf.float32) inputs = tf.split(tf.nn.embedding_lookup(embedding,self.input_data), args.seq_length,1) inputs = [tf.squeeze(input_,[1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev,softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev,1)) return tf.nn.embedding_lookup(embedding,prev_symbol) ## Using legacy_seq2seq##################################### outputs, last_state = legacy_seq2seq.rnn_decoder( inputs,self.initial_state,cell,loop_function=loop if mode != 'INFER' else None, scope = 'rnn_cells') output = tf.reshape(tf.concat(outputs,1),[-1,args.rnn_size]) self.logits = tf.matmul(output,softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( logits = [self.logits],targets = [tf.reshape(self.targets,[-1])], weights = [tf.ones([args.batch_size*args.seq_length])]) self.cost = tf.reduce_mean(loss)/args.batch_size/args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable = False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads,tvars))
def __init__(self, args, training=True): """Initialize RNN model""" self.args = args if not training: args.batch_size = 1 args.seq_length = 1 cell_fn = rnn.GRUCell cells = [cell_fn(args.rnn_size) for _ in range(args.num_layers)] self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnn'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('loss'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimization'): optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 def lstm_cell(lstm_size): return tf.contrib.rnn.BasicLSTMCell(lstm_size) cells = [] for i in range(args.num_layers): cells.append(lstm_cell(args.lstm_size)) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.output_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('lstm'): softmax_w = tf.get_variable("softmax_w", [args.lstm_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.lstm_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split( inputs, args.seq_length, 1) # splits the input into subtensor sequences dimension 1 inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell) output = tf.reshape(tf.concat(outputs, 1), [-1, args.lstm_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length]) ## loss definition loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.output_data, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.eta = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(self.eta).minimize(self.cost)
def lsrc_global_sequence_graph(self, config, inputs): """ Build the recurrence graph of the global state of the LSRC model. It returns a list of the hidden outputs and the last hidden layer """ outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.global_state, self.global_cell, loop_function=None) output = tf.reshape(tf.concat(outputs, 1), [-1, self.global_state_size]) return output, last_state
def createGraph(self): # ------------------------------------------- # Inputs self.in_ph = tf.placeholder(tf.int32, [self.batch_size, self.sequence_length], name='inputs') self.target_profile = tf.placeholder( tf.float32, [self.batch_size, self.profile_size], name="target") in_onehot = tf.one_hot(self.in_ph, self.vocab_size, name="input_onehot") inputs = tf.split(in_onehot, self.sequence_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # ------------------------------------------- # Computation Graph with tf.variable_scope("profRNN"): cells = [ rnn_cell.GRUCell(self.state_dim) for i in range(self.num_layers) ] # cells = [GORUCell( state_dim, str(i) ) for i in range(num_layers)] self.stacked_cells = rnn_cell.MultiRNNCell(cells, state_is_tuple=True) self.initial_state = self.stacked_cells.zero_state( self.batch_size, tf.float32) # call seq2seq.rnn_decoder outputs, self.final_state = seq2seq.rnn_decoder( inputs, self.initial_state, self.stacked_cells) # transform the list of state outputs to a list of logits. # use a linear transformation. self.W = tf.get_variable("W", [self.state_dim, self.profile_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) self.b = tf.get_variable("b", [self.profile_size], initializer=tf.constant_initializer(0.0)) self.logits = tf.nn.softmax( tf.matmul(outputs[-1], self.W) + self.b) # call seq2seq.sequence_loss self.loss = tf.reduce_sum(tf.abs(self.target_profile - self.logits)) self.loss_summary = tf.summary.scalar("loss", self.loss) # create a training op using the Adam optimizer self.optim = tf.train.AdamOptimizer(0.001, beta1=0.5).minimize(self.loss)
def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous): def inference_loop_function(prev, _): prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax) return tf.to_float( tf.equal( prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True))) loop_function = inference_loop_function if feed_previous else None with variable_scope.variable_scope('seq2seq'): _, final_enc_state = static_rnn(cell, encoder_inputs, dtype=dtypes.float32) return rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function)
def model(self): """ Core Network of the RAM :return: Sequence of hidden states of the RNN """ self.location_list = [] self.location_mean_list = [] self.location_stddev_list = [] self.glimpses_list = [] # Create LSTM Cell lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hs_size, activation=tf.nn.relu, state_is_tuple=True) initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) # Initial location mean generated by initial hidden state of RNN initial_loc = self.hard_tanh(tf.matmul(initial_state[0], self.h_l_out)) #initial_std = self.hard_sigmoid(tf.matmul(initial_state[0], self.h_l_std_out)) initial_std = tf.nn.sigmoid( tf.matmul(initial_state[0], self.h_l_std_out)) sample_loc = self.hard_tanh(initial_loc + tf.cond( self.training, lambda: tf.random_normal(initial_loc.get_shape(), 0, initial_std), lambda: 0.)) loc = sample_loc * self.pixel_scaling self.location_mean_list.append(tf.reduce_sum(initial_loc, 1)) self.location_stddev_list.append(tf.reduce_sum(initial_std, 1)) self.location_list.append(tf.reduce_sum(sample_loc, 1)) self.eval_location_list.append(loc) # Compute initial glimpse initial_glimpse = self.Glimpse_Net(loc) inputs = [initial_glimpse] inputs.extend([0] * (self.glimpses - 1)) outputs, _ = seq2seq.rnn_decoder(inputs, initial_state, lstm_cell, loop_function=self.get_next_input) return outputs
def pre_processing(self, args, cell, training): with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) # output layer self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def compute_profile_from_within(self, x): with tf.variable_scope("qualRNN", reuse=True): # inputs self.s_inputs = x s_onehot = tf.one_hot(self.s_inputs, self.vocab_size, name="s_input_onehot") s_onehot = tf.split(s_onehot, self.sample_sequence_length, 1) s_onehot = [tf.squeeze(input_, [1]) for input_ in s_onehot] # initialize self.s_initial_state = self.stacked_cells.zero_state( self.sample_batch_size, tf.float32) # call seq2seq.rnn_decoder s_outputs, self.s_final_state = seq2seq.rnn_decoder( s_onehot, self.s_initial_state, self.stacked_cells) # transform the list of state outputs to a list of logits. # use a linear transformation. # s_outputs = tf.reshape(s_outputs, [1, self.state_dim]) self.s_probs = tf.matmul(s_outputs[-1], self.W) + self.b return self.s_probs
def rnn_decoder_custom_embedding(emb_inp, initial_state, cell, embedding, num_symbols, output_projection=None, feed_previous=False, update_embedding_for_previous=True, scope=None, is_fed_h=True): with variable_scope.variable_scope(scope or "embedding_rnn_decoder") as scope: if output_projection is not None: dtype = scope.dtype proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with( [None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) # embedding = variable_scope.get_variable("embedding", # [num_symbols, embedding_size]) loop_function = _extract_argmax_and_embed( embedding, initial_state[0], output_projection, update_embedding_for_previous, is_fed_h=is_fed_h) if feed_previous else None return rnn_decoder(emb_inp, initial_state, cell, loop_function=loop_function)
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper( cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell additional_cell_args.update({'state_is_tuple': False}) elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0, 'state_is_tuple': False, 'output_is_tuple': False}) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell additional_cell_args.update({'state_is_tuple': False, 'output_is_tuple': False}) else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), num_or_size_splits=args.seq_length, axis=1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper(cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32) self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1) self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False) self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False) tf.summary.scalar("time_batch", self.batch_time) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) #with tf.name_scope('stddev'): # stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) #tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) #tf.summary.histogram('histogram', var) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) variable_summaries(softmax_w) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) variable_summaries(softmax_b) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length tf.summary.scalar("cost", self.cost) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))