def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 use_dropout = training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0) cell_fn = self.select_cell_fn(args.model) cells = self.create_cell_stack('hidden', cell_fn, args, use_dropout=use_dropout) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) softmax_w = self.create_var('rnnlm', 'softmax_w', [args.rnn_size, args.vocab_size]) softmax_b = self.create_var('rnnlm', 'softmax_b', [args.vocab_size]) embedding = self.create_var('rnnlm', 'embedding', [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def cal_loss(out, labels): with tf.name_scope("loss"): labels = tf.reshape(labels, [-1]) loss = seq2seq.sequence_loss_by_example( [out], [labels], [tf.ones_like(labels, dtype=tf.float32)]) loss = tf.reduce_mean(loss) return loss
def __init__(self, config, is_training=False): self.config = config self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps self.hidden_size = hidden_size = config.hidden_size self.num_layers = 1 vocab_size = config.vocab_size self.max_grad_norm = config.max_grad_norm self.use_lstm = config.use_lstm # Placeholders for inputs. self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self.targets = tf.placeholder(tf.int32, [batch_size, num_steps]) self.initial_state = array_ops.zeros(tf.stack([self.batch_size, self.num_steps]), dtype=tf.float32).set_shape([None, self.num_steps]) embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.hidden_size]) # Set up ACT cell and inner rnn-type cell for use inside the ACT cell. with tf.variable_scope("rnn"): if self.use_lstm: inner_cell = BasicLSTMCell(self.config.hidden_size) else: inner_cell = GRUCell(self.config.hidden_size) with tf.variable_scope("ACT"): act = ACTCell(self.config.hidden_size, inner_cell, config.epsilon, max_computation=config.max_computation, batch_size=self.batch_size) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = [tf.squeeze(single_input, [1]) for single_input in tf.split(inputs, self.config.num_steps, 1)] self.outputs, final_state = static_rnn(act, inputs, dtype = tf.float32) # Softmax to get probability distribution over vocab. output = tf.reshape(tf.concat(self.outputs, 1), [-1, hidden_size]) softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b # dim (numsteps*batchsize, vocabsize) loss = sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) # Add up loss and retrieve batch-normalised ponder cost: sum N + sum Remainder. ponder_cost = act.calculate_ponder_cost(time_penalty=self.config.ponder_time_penalty) self.cost = (tf.reduce_sum(loss) / batch_size) + ponder_cost self.final_state = self.outputs[-1] if is_training: self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.max_grad_norm) optimizer = tf.train.AdamOptimizer(self.config.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def build_graph(self): config = self.configuration self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename) self.cell = FWRNNCell(num_units=config.rnn_size) self.input_data = tf.placeholder(tf.int32, [None, config.input_length]) self.answers = tf.placeholder(tf.int32, [None, 1]) self.initial_state = self.cell.zero_state( tf.shape(self.answers)[0], tf.float32) self.fw_initial = self.cell.fw_zero( tf.shape(self.answers)[0], tf.float32) with tf.variable_scope("emb_input"): embedding = tf.get_variable( "emb", [config.size_chars, config.embedding_size]) inputs = tf.split( tf.nn.embedding_lookup(embedding, self.input_data), config.input_length, 1) inputs = [tf.squeeze(input, [1]) for input in inputs] with tf.variable_scope("rnn_desig"): state = (self.initial_state, self.fw_initial) output = None for i, input in enumerate(inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, state = self.cell(input, state) with tf.variable_scope("softmax"): softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.size_chars]) softmax_b = tf.get_variable("softmax_b", [config.size_chars]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.p = tf.nn.softmax(self.logits) self.output = tf.cast(tf.reshape(tf.arg_max(self.p, 1), [-1, 1]), tf.int32) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.output, self.answers), tf.float32)) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.answers, [-1])], [tf.ones([config.batch_size])], config.size_chars) self.cost = tf.reduce_mean(loss) self.end_state = state train_vars = tf.trainable_variables() gradients, _ = tf.clip_by_global_norm( tf.gradients(self.cost, train_vars), config.grad_clip) optimizer = tf.train.AdamOptimizer() self.train_op = optimizer.apply_gradients(zip(gradients, train_vars)) self.summary_accuracy = tf.summary.scalar('accuracy', self.accuracy) tf.summary.scalar('cost', self.cost) self.summary_all = tf.summary.merge_all()
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0}) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(list(zip(grads, tvars)))
def __init__(self, vocabulary_size, batch_size, sequence_length, hidden_layer_size, cells_size, gradient_clip=5., training=True): cells = [] [cells.append(rnn.LSTMCell(hidden_layer_size)) for _ in range(cells_size)] self.cell = rnn.MultiRNNCell(cells) self.input_data = tf.placeholder(tf.int32, [batch_size, sequence_length]) self.targets = tf.placeholder(tf.int32, [batch_size, sequence_length]) self.initial_state = self.cell.zero_state(batch_size, tf.float32) with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE): softmax_layer = tf.get_variable("softmax_layer", [hidden_layer_size, vocabulary_size]) softmax_bias = tf.get_variable("softmax_bias", [vocabulary_size]) with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE): embedding = tf.get_variable("embedding", [vocabulary_size, hidden_layer_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, sequence_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(previous, _): previous = tf.matmul(previous, softmax_layer) + softmax_bias previous_symbol = tf.stop_gradient(tf.argmax(previous, 1)) return tf.nn.embedding_lookup(embedding, previous_symbol) with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE): outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if not training else None) output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_layer_size]) self.logits = tf.matmul(output, softmax_layer) + softmax_bias self.probabilities = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * sequence_length])]) with tf.name_scope("cost"): self.cost = tf.reduce_sum(loss) / batch_size / sequence_length self.final_state = last_state self.learning_rate = tf.Variable(0.0, trainable=False) trainable_vars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_vars), gradient_clip) with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE): optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, trainable_vars)) tf.summary.histogram("logits", self.logits) tf.summary.histogram("probabilitiess", self.probabilities) tf.summary.histogram("loss", loss) tf.summary.scalar("cost", self.cost) tf.summary.scalar("learning_rate", self.learning_rate)
def compute_cost(self): losses = sequence_loss_by_example( [tf.reshape(self.pred, [-1])], [tf.reshape(self.ys, [-1])], [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)], average_across_timesteps=True, softmax_loss_function=self.msr_error) self.cost = tf.div(tf.reduce_sum(losses), tf.cast(self.batch_size, tf.float32))
def __init__(self, data, model='lstm', infer=False): self.rnn_size = 128 self.n_layers = 2 if infer: self.batch_size = 1 else: self.batch_size = data.batch_size if model == 'rnn': cell_rnn = tf.nn.rnn_cell.BasicRNNCell elif model == 'gru': cell_rnn = tf.nn.rnn_cell.GRUCell elif model == 'lstm': cell_rnn = tf.nn.rnn_cell.LSTMCell cell = cell_rnn( self.rnn_size, name='basic_lstm_cell', ) self.cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.n_layers) self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size]) softmax_b = tf.get_variable("softmax_b", [data.words_size]) # with tf.device("/gpu:1"): embedding = tf.get_variable("embedding", [data.words_size, self.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.x_tf) # tf.nn.dynamic_rnn可以运行输入的shape不同, # 而tf.nn.rnn必须要求输入的shape必须一致。 outputs, final_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state, scope='rnnlm') self.output = tf.reshape(outputs, [-1, self.rnn_size]) self.logits = tf.matmul(self.output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.final_state = final_state pred = tf.reshape(self.y_tf, [-1]) # seq2seq 损失 loss = seq2seq.sequence_loss_by_example( [self.logits], [pred], [tf.ones_like(pred, dtype=tf.float32)]) self.cost = tf.reduce_mean(loss) self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), clip_norm=5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, data, infer=False): if infer: args.batch_size = 1 args.seq_length = 1 with tf.name_scope('inputs'): self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.target_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) with tf.name_scope('model'): self.cell = MultiRNNCell([ tf.nn.rnn_cell.DropoutWrapper(BasicLSTMCell(args.state_size), output_keep_prob=0.7) ] * args.num_layers) self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): w = tf.get_variable('softmax_w', [args.state_size, data.vocab_size]) b = tf.get_variable('softmax_b', [data.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( 'embedding', [data.vocab_size, args.state_size]) self.embedding = embedding inputs = tf.nn.embedding_lookup(embedding, self.input_data) self.in_data = self.input_data self.inputs = inputs outputs, last_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state) with tf.name_scope('loss'): output = tf.reshape(outputs, [-1, args.state_size]) self.logits = tf.matmul(output, w) + b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_data, [-1]) loss = seq2seq.sequence_loss_by_example( [self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / args.batch_size tf.summary.scalar('loss', self.cost) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) tf.summary.scalar('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) for g in grads: tf.summary.histogram(g.name, g) grads, _ = tf.clip_by_global_norm(grads, args.grad_clip) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_op = tf.summary.merge_all()
def init_ops(self): self._build_placeholder() # Get loss and prediction operations with tf.variable_scope(self.name) as scope: # Reuse variables for validation if self.reuse_params: scope.reuse_variables() # Build model network = self.build_model(input_var=self.input_var) # Softmax linear name = "l{}_softmax_linear".format(self.layer_idx) network = fc(name=name, input_var=network, n_hiddens=self.n_classes, bias=0.0, wd=0) self.activations.append((name, network)) self.layer_idx += 1 # Outputs of softmax linear are logits self.logits = network ######### Compute loss ######### # Weighted cross-entropy loss for a sequence of logits (per example) ''' the old code loss = tf.nn.seq2seq.sequence_loss_by_example( [self.logits], [self.target_var], [tf.ones([self.batch_size * self.seq_length])], name="sequence_loss_by_example" ) ''' loss = legacy_seq2seq.sequence_loss_by_example( logits=[self.logits], targets=[self.target_var], weights=[tf.ones([self.batch_size * self.seq_length])]) loss = tf.reduce_sum(loss) / self.batch_size # Regularization loss regular_loss = tf.add_n(tf.get_collection("losses", scope=scope.name + "\/"), name="regular_loss") # print " " # print "Params to compute regularization loss:" # for p in tf.get_collection("losses", scope=scope.name + "\/"): # print p.name # print " " # Total loss self.loss_op = tf.add(loss, regular_loss) # Predictions self.pred_op = tf.argmax(self.logits, 1)
def generate_sequence_output( num_encoder_symbols, encoder_outputs, encoder_state, targets, sequence_length, num_decoder_symbols, # vocab_size weights, buckets, softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False): if len(targets) < buckets[-1][1]: # decoder_output_length raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) # 4-1. Define all input all_inputs = encoder_outputs + targets + weights with tf.name_scope(name, "model_with_buckets", all_inputs): with tf.variable_scope("decoder_sequence_output", reuse=None): # 4-2. Get attention_encoder_outputs(=logits) and attention_weights logits, attention_weights = attention_RNN( encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, use_attention=use_attention) if per_example_loss is None: assert len(logits) == len(targets) # 4-3. Reshape to calculate loss. bucket_target = [ tf.reshape(tf.to_int64(x), [-1]) for x in targets ] # 4-4. Get loss function crossent = sequence_loss_by_example( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) else: assert len(logits) == len(targets) bucket_target = [ tf.reshape(tf.to_int64(x), [-1]) for x in targets ] crossent = sequence_loss( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) return logits, crossent
def loss_op(self): loss = seq2seq.sequence_loss_by_example( [self.logits], [self.pred], [tf.ones_like(self.pred, dtype=tf.float64)], ) self.cost = tf.reduce_mean(loss)
def model(cell_state_size, rnn_cells_depth, batch_size, batch_len, number_of_tokens, reuse): input_placeholder = tf.placeholder(tf.int32, shape=(None, batch_len), name="input") target_placeholder = tf.placeholder(tf.int32, shape=(None, batch_len), name="target") # make dictionary for letters (60, 128) with tf.variable_scope("rnn") as scope: if reuse: scope.reuse_variables() cell = tf.nn.rnn_cell.BasicLSTMCell(cell_state_size) #cell = tf.nn.rnn_cell.BasicRNNCell(cell_state_size) #cell = tf.contrib.rnn.IntersectionRNNCell(cell_state_size) #cell = tf.contrib.rnn.LSTMCell(cell_state_size) #cell = tf.contrib.rnn.TimeFreqLSTMCell(cell_state_size) rnn_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * rnn_cells_depth) W = tf.get_variable("W", shape=(128, number_of_tokens)) b = tf.get_variable("b", shape=(number_of_tokens)) embedding = tf.get_variable("embedding", [number_of_tokens, 128]) # (60, 50, 128) rnn_input = tf.nn.embedding_lookup(embedding, input_placeholder) # 50 of (60, 1, 128) rnn_input = tf.split(rnn_input, batch_len, axis=1) rnn_input = [tf.squeeze(rni, [1]) for rni in rnn_input] # map input from id numbers to rnn states decoder_initial_state = rnn_cell.zero_state(batch_size, tf.float32) # outputs list of 50 - (60,128) outputs, last_state = seq2seq.rnn_decoder(rnn_input, decoder_initial_state, rnn_cell, scope="rnn") # (60, -1) outputs = tf.concat(outputs, 1) # (-1, 128) ie a list of letters outputs = tf.reshape(outputs, [-1, 128]) # (3000, number_of_tokens) logits = tf.matmul(outputs, W) + b #probs = tf.nn.softmax(logits, 1, name="probs") probs = tf.nn.softmax(logits, -1, name="probs") loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(target_placeholder, [-1])], [tf.ones([batch_size * batch_len])], number_of_tokens) return ([ loss, probs, decoder_initial_state, input_placeholder, target_placeholder, last_state, logits ])
def __init__(self, data, model='lstm', infer=False, layers_size=128, num_layers=2): self.rnn_size = layers_size self.n_layers = num_layers print('build model') if infer: self.batch_size = 1 else: self.batch_size = data.batch_size if model == 'rnn': cell_rnn = rnn.BasicRNNCell elif model == 'gru': cell_rnn = rnn.GRUCell elif model == 'lstm': cell_rnn = rnn.BasicLSTMCell cell = cell_rnn(self.rnn_size, state_is_tuple=False) self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False) self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size]) softmax_b = tf.get_variable("softmax_b", [data.words_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [data.words_size, self.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.x_tf) outputs, final_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state, scope='rnnlm') self.output = tf.reshape(outputs, [-1, self.rnn_size]) self.logits = tf.matmul(self.output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.pre = tf.argmax(self.probs, 1) self.final_state = final_state pred = tf.reshape(self.y_tf, [-1]) # seq2seq loss = seq2seq.sequence_loss_by_example([self.logits], [pred], [tf.ones_like(pred, dtype=tf.float32)],) self.cost = tf.reduce_mean(loss) self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) print('build model done')
def __init__(self, args, embedding): self.args = args if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input') self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets') self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.embedding = embedding with tf.variable_scope('STAND'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1) o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND') with tf.variable_scope('STAND', reuse=True) as scope: sf_o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope) output = tf.reshape(tf.concat(axis=1, values=o), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) sf_output = tf.reshape(tf.concat(axis=1, values=sf_o), [-1, args.rnn_size]) self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b self.self_feed_probs = tf.nn.softmax(self_feed_logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), args.grad_clip) for g, v in zip(grads, tvars): print v.name optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def build(self): self.inputs = tf.placeholder(tf.int32, [self.batch_size, None]) self.targets = tf.placeholder(tf.int32, [self.batch_size, None]) self.keep_prob = tf.placeholder(tf.float32) self.seq_len = tf.placeholder(tf.int32, [self.batch_size]) self.learning_rate = tf.placeholder(tf.float64) with tf.variable_scope('rnn'): w = tf.get_variable("softmax_w", [self.num_units, self.data.words_size]) b = tf.get_variable("softmax_b", [self.data.words_size]) embedding = tf.get_variable("embedding", [self.data.words_size, self.num_units]) inputs = tf.nn.embedding_lookup(embedding, self.inputs) self.cell = tf.nn.rnn_cell.MultiRNNCell( [self.unit() for _ in range(self.num_layer)]) self.init_state = self.cell.zero_state(self.batch_size, dtype=tf.float32) output, self.final_state = tf.nn.dynamic_rnn( self.cell, inputs=inputs, sequence_length=self.seq_len, initial_state=self.init_state, scope='rnn') with tf.name_scope('fc'): y = tf.reshape(output, [-1, self.num_units]) logits = tf.matmul(y, w) + b with tf.name_scope('softmax'): prob = tf.nn.softmax(logits) self.prob = tf.reshape(prob, [self.batch_size, -1]) pre = tf.argmax(prob, 1) self.pre = tf.reshape(pre, [self.batch_size, -1]) targets = tf.reshape(self.targets, [-1]) with tf.name_scope('loss'): loss = seq2seq.sequence_loss_by_example( [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.loss = tf.reduce_mean(loss) with tf.name_scope('summary'): tf.summary.scalar('loss', self.loss) self.merged_summary = tf.summary.merge_all() with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), 5) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self,args, mode='TRAIN'): '''Create the model. Args: args: parsed arguments mode: TRAIN | EVAL | INFER ''' # When sample, the batch and seq length = 1 if mode == 'INFER': args.batch_size = 1 args.seq_length = 1 cell = rnn.BasicLSTMCell(args.rnn_size,state_is_tuple = True) self.cell = cell = rnn.MultiRNNCell([cell]*args.num_layers, state_is_tuple = True) # Build the inputs and outputs placeholders self.input_data = tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) self.targets = tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) self.initial_state = cell.zero_state(args.batch_size,dtype = tf.float32) with tf.name_scope('rnn_cells'): # final w softmax_w = tf.get_variable('softmax_w',[args.rnn_size,args.vocab_size]) # final bias softmax_b = tf.get_variable('softmax_b',[args.vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable('embedding',[args.vocab_size,args.rnn_size], dtype = tf.float32) inputs = tf.split(tf.nn.embedding_lookup(embedding,self.input_data), args.seq_length,1) inputs = [tf.squeeze(input_,[1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev,softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev,1)) return tf.nn.embedding_lookup(embedding,prev_symbol) ## Using legacy_seq2seq##################################### outputs, last_state = legacy_seq2seq.rnn_decoder( inputs,self.initial_state,cell,loop_function=loop if mode != 'INFER' else None, scope = 'rnn_cells') output = tf.reshape(tf.concat(outputs,1),[-1,args.rnn_size]) self.logits = tf.matmul(output,softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( logits = [self.logits],targets = [tf.reshape(self.targets,[-1])], weights = [tf.ones([args.batch_size*args.seq_length])]) self.cost = tf.reduce_mean(loss)/args.batch_size/args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable = False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads,tvars))
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 def lstm_cell(lstm_size): return tf.contrib.rnn.BasicLSTMCell(lstm_size) cells = [] for i in range(args.num_layers): cells.append(lstm_cell(args.lstm_size)) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.output_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('lstm'): softmax_w = tf.get_variable("softmax_w", [args.lstm_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.lstm_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split( inputs, args.seq_length, 1) # splits the input into subtensor sequences dimension 1 inputs = [tf.squeeze(input_, [1]) for input_ in inputs] outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell) output = tf.reshape(tf.concat(outputs, 1), [-1, args.lstm_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.predicted_output = tf.reshape(tf.argmax(self.probs, 1), [args.batch_size, args.seq_length]) ## loss definition loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.output_data, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.eta = tf.Variable(0.0, trainable=False) self.optimizer = tf.train.AdamOptimizer(self.eta).minimize(self.cost)
def generate_sequence_output(num_encoder_symbols, encoder_outputs, encoder_state, sequence_length, num_decoder_symbols, targets, weights, buckets, softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False): if len(targets) and len(targets) < buckets[-1][1]: raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) all_inputs = encoder_outputs + targets + weights with tf.name_scope(name, "model_with_buckets", all_inputs): with tf.variable_scope("decoder_sequence_output", reuse=None): logits, attention_weights = attention_RNN( encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, use_attention=use_attention) if len(targets): if per_example_loss is None: assert len(logits) == len(targets) # We need to make target and int64-tensor and set its shape. bucket_target = [ tf.reshape(tf.to_int64(x), [-1]) for x in targets ] crossent = sequence_loss_by_example( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) else: assert len(logits) == len(targets) bucket_target = [ tf.reshape(tf.to_int64(x), [-1]) for x in targets ] crossent = sequence_loss( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) else: crossent = None return logits, crossent
def __init__(self, infer=False): if infer: CONFIG["batch_size"] = 1 CONFIG["seq_length"] = 1 with tf.name_scope('inputs'): self.input_data = tf.placeholder( tf.int32, [CONFIG["batch_size"], CONFIG["seq_length"]]) self.target_data = tf.placeholder( tf.int32, [CONFIG["batch_size"], CONFIG["seq_length"]]) with tf.name_scope('model'): self.cell = rnn.GRUCell(CONFIG["hidden_size"]) self.cell = rnn.MultiRNNCell([self.cell] * CONFIG["num_layers"]) self.initial_state = self.cell.zero_state(CONFIG["batch_size"], tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable( 'softmax_w', [CONFIG["hidden_size"], data.vocabulary_size]) softmax_b = tf.get_variable('softmax_b', [data.vocabulary_size]) embedding = tf.get_variable( 'embedding', [data.vocabulary_size, CONFIG["hidden_size"]]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) outputs, last_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state) with tf.name_scope('loss'): output = tf.reshape(outputs, [-1, CONFIG["hidden_size"]]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_data, [-1]) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / CONFIG["batch_size"] tf.summary.scalar('loss', self.cost) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) tf.summary.scalar('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) for g in grads: tf.summary.histogram(g.name, g) grads, _ = tf.clip_by_global_norm(grads, CONFIG["grad_clip"]) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_op = tf.summary.merge_all()
def __init__(self, data, model='lstm', infer=False): self.rnn_size = 128 self.n_layers = 2 if infer: self.batch_size = 1 else: self.batch_size = data.batch_size if model == 'rnn': cell_rnn = rnn.BasicRNNCell elif model == 'gru': cell_rnn = rnn.GRUCell elif model == 'lstm': cell_rnn = rnn.BasicLSTMCell cell = cell_rnn(self.rnn_size, state_is_tuple=False) self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False) self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size]) softmax_b = tf.get_variable("softmax_b", [data.words_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [data.words_size, self.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.x_tf) outputs, final_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state, scope='rnnlm') self.output = tf.reshape(outputs, [-1, self.rnn_size]) self.logits = tf.matmul(self.output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.final_state = final_state pred = tf.reshape(self.y_tf, [-1]) # seq2seq loss = seq2seq.sequence_loss_by_example([self.logits], [pred], [tf.ones_like(pred, dtype=tf.float32)],) self.cost = tf.reduce_mean(loss) self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, training=True): """Initialize RNN model""" self.args = args if not training: args.batch_size = 1 args.seq_length = 1 cell_fn = rnn.GRUCell cells = [cell_fn(args.rnn_size) for _ in range(args.num_layers)] self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnn'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('loss'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimization'): optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def loss(self, batch_x, batch_y): self.inference(batch_x, training=True) batch_size = int(batch_x.shape[0]) seq_length = int(batch_x.shape[1]) seq_loss = legacy_seq2seq.sequence_loss_by_example( [self.logit], [tf.reshape(batch_y, [-1])], [tf.ones([batch_size * seq_length])]) with tf.name_scope('loss'): self.loss = tf.reduce_sum(seq_loss) / batch_size / seq_length return self.loss
def __init__(self, args, data): with tf.name_scope("inputs"): self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.target_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) with tf.name_scope("model"): self.cell = rnn.MultiRNNCell([ rnn.BasicLSTMCell(args.state_size) for _ in range(args.num_layer) ]) self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope("rnnlm"): w = tf.get_variable('softmax_w', [args.state_size, data.vocab_size]) b = tf.get_variable('softmax_b', [data.vocab_size]) with tf.device('/cpu:0'): embedding = tf.get_variable( 'embedding', [data.vocab_size, args.state_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) outputs, last_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state) with tf.name_scope("loss"): output = tf.reshape(outputs, [-1, args.state_size]) self.logits = tf.matmul(output, w) + b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_data, [-1]) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / args.batch_size scalar_summary('loss', self.cost) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) scalar_summary('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) train_vars = tf.trainable_variables() grads = tf.gradients(self.cost, train_vars) for g in grads: histogram_summary(g.name, g) grads, _ = tf.clip_by_global_norm(grads, args.grad_clip) self.train_op = optimizer.apply_gradients(zip(grads, train_vars)) self.merged_op = merge_all_summaries()
def compute_cost(self): from tensorflow.contrib import legacy_seq2seq losses = legacy_seq2seq.sequence_loss_by_example( [tf.reshape(self.pred, [-1], name='reshape_pred')], [tf.reshape(self.ys, [-1], name='reshape_target')], [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)], average_across_timesteps=True, softmax_loss_function=self.ms_error, name='losses') with tf.name_scope('average_cost'): self.cost = tf.div(tf.reduce_sum(losses, name='losses_sum'), self.batch_size, name='average_cost') tf.summary.scalar('cost', self.cost)
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, state_is_tuple=False) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False) self.input_data = tf.placeholder(tf.int32, [args.batch_size, None]) # the length of input sequence is variable. self.targets = tf.placeholder(tf.int32, [args.batch_size, None]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) outputs, last_state = tf.nn.dynamic_rnn( cell, inputs, initial_state=self.initial_state, scope='rnnlm') output = tf.reshape(outputs, [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) targets = tf.reshape(self.targets, [-1]) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], args.vocab_size) self.cost = tf.reduce_mean(loss) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def train_neural_network(): logits, last_state, _, _, _ = neural_network() targets = tf.reshape(output_targets, [-1]) from tensorflow.contrib import legacy_seq2seq loss = legacy_seq2seq.sequence_loss_by_example( [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], len(words)) cost = tf.reduce_mean(loss) learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.apply_gradients(zip(grads, tvars)) Session_config = tf.ConfigProto(allow_soft_placement=True) Session_config.gpu_options.allow_growth = True trainds = DataSet(len(poetrys_vector)) with tf.Session(config=Session_config) as sess: with tf.device('/gpu:2'): sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) last_epoch = load_model(sess, saver, 'model/') for epoch in range(last_epoch + 1, 100): sess.run(tf.assign(learning_rate, 0.002 * (0.97**epoch))) #sess.run(tf.assign(learning_rate, 0.01)) all_loss = 0.0 for batche in range(n_chunk): x, y = trainds.next_batch(batch_size) train_loss, _, _ = sess.run([cost, last_state, train_op], feed_dict={ input_data: x, output_targets: y }) all_loss = all_loss + train_loss if batche % 50 == 1: #print(epoch, batche, 0.01,train_loss) print(epoch, batche, 0.002 * (0.97**epoch), train_loss) saver.save(sess, 'model/poetry.module', global_step=epoch) print(epoch, ' Loss: ', all_loss * 1.0 / n_chunk)
def __init__(self, args, text, test=False): if test: args.batch_size = 1 args.seq_length = 1 with tf.name_scope('inputs'): self.input_text = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.target_text = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) with tf.name_scope('model'): # LSTM单元 state_size为隐藏层的大小 self.cell = rnn.BasicLSTMCell(args.state_size) self.cells = rnn.MultiRNNCell([self.cell] * args.num_layers) self.initial_state = self.cells.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): w = tf.get_variable('softmax_w', [args.state_size, text.vocabulary_size]) b = tf.get_variable('softmax_b', [text.vocabulary_size]) with tf.device('/cpu:0'): embedding = tf.get_variable( 'embedding', [text.vocabulary_size, args.state_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_text) outputs, last_state = tf.nn.dynamic_rnn( self.cells, inputs, initial_state=self.initial_state) with tf.name_scope('loss'): output = tf.reshape(outputs, [-1, args.state_size]) self.logits = tf.matmul(output, w) + b self.probs = tf.nn.softmax(self.logits) self.last_state = last_state targets = tf.reshape(self.target_text, [-1]) loss = seq2seq.sequence_loss_by_example( [self.logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.loss_avg = tf.reduce_sum(loss) / args.batch_size tf.summary.scalar('loss', self.loss_avg) with tf.name_scope('optimize'): self.lr = tf.placeholder(tf.float32, []) tf.summary.scalar('learning_rate', self.lr) optimizer = tf.train.AdamOptimizer(self.lr) tvars = tf.trainable_variables() grads = tf.gradients(self.loss_avg, tvars) for g in grads: tf.summary.histogram(g.name, g) grads, _ = tf.clip_by_global_norm(grads, args.grad_clip) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.merged_op = tf.summary.merge_all()
def loss(self, out): with tf.name_scope("loss"): targets = tf.reshape(self.y, [-1]) out = tf.reshape(out, [-1, self.num_class]) loss = seq2seq.sequence_loss_by_example([out], [targets], [tf.ones_like(targets, dtype=tf.float32)]) self.loss = tf.reduce_mean(loss) self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize( self.loss, global_step=self.global_step) with tf.name_scope("output"): out = tf.nn.softmax(out) self.prob = tf.reshape(out,[-1, self.maxlen, self.num_class], name = 'prob') out_max = tf.argmax(self.prob,-1, output_type = tf.int32) self.predictions = tf.reshape(out_max, [-1, self.maxlen], name = 'predictions') with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, self.y) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def pre_processing(self, args, cell, training): with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) # output layer self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def generate_sequence_output(num_encoder_symbols, encoder_outputs, encoder_state, targets, sequence_length, num_decoder_symbols, weights, buckets, softmax_loss_function=None, per_example_loss=False, name=None, use_attention=False): if len(targets) < buckets[-1][1]: raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) all_inputs = encoder_outputs + targets + weights with tf.name_scope(name, "model_with_buckets", all_inputs): with tf.variable_scope("decoder_sequence_output", reuse=None): logits, attention_weights = attention_RNN(encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, use_attention=use_attention) if per_example_loss is None: assert len(logits) == len(targets) # We need to make target and int64-tensor and set its shape. bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets] crossent = sequence_loss_by_example( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) else: assert len(logits) == len(targets) bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets] crossent = sequence_loss( logits, bucket_target, weights, softmax_loss_function=softmax_loss_function) return logits, crossent
def train_neural_network(): logits, last_state, _, _, _ = neural_network() targets = tf.reshape(output_targets, [-1]) # [batch_size*28*28,] # loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], # len(words)) loss = legacy_seq2seq.sequence_loss_by_example( [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) # loss = legacy_seq2seq.sequence_loss([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)]) cost = tf.reduce_mean(loss) learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.apply_gradients(zip(grads, tvars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) n_chunk = len(mnist.train.images) // batch_size for epoch in range(epochs): sess.run(tf.assign(learning_rate, 0.002 * (0.97**epoch))) # n = 0 for batche in range(21): # range(n_chunk): x_batches, _ = mnist.train.next_batch(batch_size) # y_batches=np.round(x_batches).astype(np.uint8) train_loss, _, _ = sess.run( [cost, last_state, train_op], feed_dict={ input_data: x_batches.reshape([-1, 28, 28, 1]), output_targets: x_batches }) # n += 1 if batche % 20 == 0: print(epoch, batche, train_loss) if epoch % 1 == 0: saver.save(sess, logdir + 'model.ckpt', global_step=epoch)
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper( cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper(cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True) self.input_data = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder( tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) inputs = tf.split(inputs, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])]) with tf.name_scope('cost'): self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) with tf.name_scope('optimizer'): optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # instrument tensorboard tf.summary.histogram('logits', self.logits) tf.summary.histogram('loss', loss) tf.summary.scalar('train_loss', self.cost)
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32) self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1) self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False) self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False) tf.summary.scalar("time_batch", self.batch_time) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) #with tf.name_scope('stddev'): # stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) #tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) #tf.summary.histogram('histogram', var) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) variable_summaries(softmax_w) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) variable_summaries(softmax_b) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length tf.summary.scalar("cost", self.cost) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell additional_cell_args.update({'state_is_tuple': False}) elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0, 'state_is_tuple': False, 'output_is_tuple': False}) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell additional_cell_args.update({'state_is_tuple': False, 'output_is_tuple': False}) else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), num_or_size_splits=args.seq_length, axis=1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))