def _CreateModel(self, rnn_mode, num_layers, num_units, input_size, input_mode="linear_input", dropout=0.): if rnn_mode == "lstm": model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == "gru": model = cudnn_rnn_ops.CudnnGRU(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == "rnn_tanh": model = cudnn_rnn_ops.CudnnRNNTanh(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == "rnn_relu": model = cudnn_rnn_ops.CudnnRNNRelu(num_layers, num_units, input_size, dropout=dropout) else: raise ValueError("Invalid rnn_mode: %s" % rnn_mode) return model
def _CreateModel(self, rnn_mode, num_layers, num_units, input_size, input_mode="linear_input", dropout=0.): if rnn_mode == cudnn_rnn_ops.CUDNN_LSTM: model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == cudnn_rnn_ops.CUDNN_GRU: model = cudnn_rnn_ops.CudnnGRU(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == cudnn_rnn_ops.CUDNN_RNN_TANH: model = cudnn_rnn_ops.CudnnRNNTanh(num_layers, num_units, input_size, dropout=dropout) elif rnn_mode == cudnn_rnn_ops.CUDNN_RNN_RELU: model = cudnn_rnn_ops.CudnnRNNRelu(num_layers, num_units, input_size, dropout=dropout) else: raise ValueError("Invalid rnn_mode: %s" % rnn_mode) return model
def __init__(self, is_training, batch_size, num_unrollings, vocab_size, hidden_size, max_grad_norm, embedding_size, num_layers, learning_rate, model, dropout=0.0, input_dropout=0.0, use_batch=True): self.batch_size = batch_size self.num_unrollings = num_unrollings if not use_batch: self.batch_size = 1 self.num_unrollings = 1 self.hidden_size = hidden_size self.vocab_size = vocab_size self.max_grad_norm = max_grad_norm self.num_layers = num_layers self.embedding_size = embedding_size self.model = model self.dropout = dropout self.input_dropout = input_dropout if embedding_size <= 0: self.input_size = vocab_size # Don't do dropout on one hot representation. self.input_dropout = 0.0 else: self.input_size = embedding_size self.model_size = ( embedding_size * vocab_size + # embedding parameters # lstm parameters 4 * hidden_size * (hidden_size + self.input_size + 1) + # softmax parameters vocab_size * (hidden_size + 1) + # multilayer lstm parameters for extra layers. (num_layers - 1) * 4 * hidden_size * (hidden_size + hidden_size + 1)) # self.decay_rate = decay_rate # Placeholder to feed in input and targets/labels data. self.input_data = tf.placeholder( tf.int64, [self.batch_size, self.num_unrollings], name='inputs') self.targets = tf.placeholder(tf.int64, [self.batch_size, self.num_unrollings], name='targets') ################################################# #NEED TO REPLACE ALL CELL CODE # if self.model == 'rnn': # cell_fn = tf.contrib.rnn.BasicRNNCell # elif self.model == 'lstm': # cell_fn = tf.contrib.rnn.BasicLSTMCell # elif self.model == 'gru': # cell_fn = tf.contrib.rnn.GRUCell # # params = {'input_size': self.input_size} # params = {} # if self.model == 'lstm': # # add bias to forget gate in lstm. # params['forget_bias'] = 0.0 # params['state_is_tuple'] = True # # Create multilayer cell. # cell = cell_fn( # self.hidden_size, reuse=tf.get_variable_scope().reuse, # **params) # cells = [cell] # # params['input_size'] = self.hidden_size # # more explicit way to create cells for MultiRNNCell than # # [higher_layer_cell] * (self.num_layers - 1) # for i in range(self.num_layers-1): # higher_layer_cell = cell_fn( # self.hidden_size, reuse=tf.get_variable_scope().reuse, # **params) # cells.append(higher_layer_cell) # if is_training and self.dropout > 0: # cells = [tf.contrib.rnn.DropoutWrapper( # cell, # output_keep_prob=1.0-self.dropout) # for cell in cells] # multi_cell = tf.contrib.rnn.MultiRNNCell(cells) # with tf.name_scope('initial_state'): # # zero_state is used to compute the intial state for cell. # self.zero_state = multi_cell.zero_state(self.batch_size, tf.float32) # # Placeholder to feed in initial state. # # self.initial_state = tf.placeholder( # # tf.float32, # # [self.batch_size, multi_cell.state_size], # # 'initial_state') # self.initial_state = create_tuple_placeholders_with_default( # multi_cell.zero_state(batch_size, tf.float32), # extra_dims=(None,), # shape=multi_cell.state_size) ######## MIGHT NEED THIS STUFF ################## # Embeddings layers. with tf.name_scope('embedding_layer'): if embedding_size > 0: self.embedding = tf.get_variable( 'embedding', [self.vocab_size, self.embedding_size]) else: self.embedding = tf.constant(np.eye(self.vocab_size), dtype=tf.float32) inputs = tf.nn.embedding_lookup(self.embedding, self.input_data) if is_training and self.input_dropout > 0: inputs = tf.nn.dropout(inputs, 1 - self.input_dropout) with tf.name_scope('slice_inputs'): # Slice inputs into a list of shape [batch_size, 1] data colums. sliced_inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(axis=1, num_or_size_splits=self.num_unrollings, value=inputs) ] # Copy cell to do unrolling and collect outputs. # outputs, final_state = tf.contrib.rnn.static_rnn( # multi_cell, sliced_inputs, # initial_state=self.initial_state) ######################## #Insert MIOPEN if self.model == 'lstm': model = cudnn_rnn_ops.CudnnLSTM(self.num_layers, self.hidden_size, self.embedding_size, dropout=self.dropout) elif self.model == 'gru': model = cudnn_rnn_ops.CudnnGRU(self.num_layers, self.hidden_size, self.embedding_size, dropout=self.dropout) elif self.model == 'rnn': model = cudnn_rnn_ops.CudnnRNNTanh(self.num_layers, self.hidden_size, self.embedding_size, dropout=self.dropout) else: raise ValueError("Invalid model: %s" % self.model) # Set zero init input states input_h = constant_op.constant(np.zeros( [self.num_layers, self.num_unrollings, self.hidden_size]), dtype=tf.float32) has_input_c = (self.model == 'lstm') if has_input_c: input_c = constant_op.constant(np.zeros( [self.num_layers, self.num_unrollings, self.hidden_size]), dtype=tf.float32) # Set rnn params params_size_t = model.params_size() rand_params = random_ops.random_uniform(params_size_t.shape) print "PARAMS size" print params_size_t print rand_params.shape print "Input sizes" print input_h print input_c print "Batch size" print batch_size print "Hidden size" print self.hidden_size #rand_params.set_shape(params_size_t.shape); params = variables.Variable(rand_params, validate_shape=True) args = { "input_data": inputs, "input_h": input_h, "params": params, "is_training": is_training } if has_input_c: args["input_c"] = input_c # Build cell if (self.model == 'lstm'): outputs, final_state, final_cell = model(input_data=inputs, input_h=input_h, input_c=input_c, params=params) else: outputs, final_state, final_cell = model(input_data=inputs, input_h=input_h, params=params) # model(**args) self.zero_state = state_ops.assign( params, array_ops.zeros(params_size_t.shape)) self.initial_state = create_tuple_placeholders_with_default( self.zero_state, extra_dims=(None, ), shape=params_size_t.shape) print "Initial State" print self.initial_state ######################## self.final_state = final_state with tf.name_scope('flatten_ouputs'): # Flatten the outputs into one dimension. flat_outputs = tf.reshape(tf.concat(axis=1, values=outputs), [-1, hidden_size]) with tf.name_scope('flatten_targets'): # Flatten the targets too. flat_targets = tf.reshape(tf.concat(axis=1, values=self.targets), [-1]) # Create softmax parameters, weights and bias. with tf.variable_scope('softmax') as sm_vs: softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) self.logits = tf.matmul(flat_outputs, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) with tf.name_scope('loss'): # Compute mean cross entropy loss for each output. loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=flat_targets) self.mean_loss = tf.reduce_mean(loss) with tf.name_scope('loss_monitor'): # Count the number of elements and the sum of mean_loss # from each batch to compute the average loss. count = tf.Variable(1.0, name='count') sum_mean_loss = tf.Variable(1.0, name='sum_mean_loss') self.reset_loss_monitor = tf.group(sum_mean_loss.assign(0.0), count.assign(0.0), name='reset_loss_monitor') self.update_loss_monitor = tf.group( sum_mean_loss.assign(sum_mean_loss + self.mean_loss), count.assign(count + 1), name='update_loss_monitor') with tf.control_dependencies([self.update_loss_monitor]): self.average_loss = sum_mean_loss / count self.ppl = tf.exp(self.average_loss) # Monitor the loss. loss_summary_name = "average loss" ppl_summary_name = "perplexity" average_loss_summary = tf.summary.scalar(loss_summary_name, self.average_loss) ppl_summary = tf.summary.scalar(ppl_summary_name, self.ppl) # Monitor the loss. self.summaries = tf.summary.merge([average_loss_summary, ppl_summary], name='loss_monitor') self.global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0.0)) self.learning_rate = tf.constant(learning_rate) if is_training: # learning_rate = tf.train.exponential_decay(1.0, self.global_step, # 5000, 0.1, staircase=True) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.mean_loss, tvars), self.max_grad_norm) # optimizer = tf.train.GradientDescentOptimizer(learning_rate) # optimizer = tf.train.RMSPropOptimizer(learning_rate, decay_rate) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=self.global_step)