def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def run_epoch(session, data, train_op=None, verbose=10): dp = dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, batch_size, num_steps)) total_loss = [] state = initial_state[0].eval(), initial_state[1].eval() for step, (x, y) in enumerate(ptb_iterator(data, batch_size, num_steps)): feed = { input_placeholder: x, labels_placeholder: y, initial_state: state, dropout_placeholder: dp } loss, state, _ = session.run([loss_op, final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1.0 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def compute_loss(model, model_info, device, data, loss_fn): model.eval() all_losses = np.empty((0, 35)) # LOOP THROUGH MINIBATCHES for step, (x, y) in tqdm.tqdm(enumerate(ptb_iterator(data, model.batch_size, model.seq_len)), total=(len(data)//model.batch_size - 1)//model.seq_len): if model_info.model == 'TRANSFORMER': batch = Batch(torch.from_numpy(x).long().to(device)) model.zero_grad() outputs = model.forward(batch.data, batch.mask).transpose(1,0) else: inputs = torch.from_numpy(x.astype(np.int64)).transpose(0, 1).contiguous().to(device)#.cuda() model.zero_grad() hidden = model.init_hidden().to(device) outputs, hidden = model(inputs, hidden) # Target targets = torch.from_numpy(y.astype(np.int64)).transpose(0, 1).contiguous().to(device) # Loss computation outputs = outputs.contiguous() losses_in_batch = [] for output_t, target_t in zip(outputs, targets): losses_in_batch.append(loss_fn(output_t, target_t).data.item()) all_losses = np.vstack((all_losses, losses_in_batch)) # Return return np.mean(all_losses, axis=0)
def run_epoch(self, session, data, train_op=None, verbose=10, writer=None): config = self.config dp = config.dropout is_training = 1 if not train_op: train_op = tf.no_op() dp = 1 is_training = 0 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() merged_summary = tf.summary.merge_all() # merged_summary = tf.summary.merge([self.loss_summary]) for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): if is_training == 1: self.total_train_step += 1 # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } if step % 5 == 0: loss, state, _, summary_str = session.run([ self.calculate_loss, self.final_state, train_op, merged_summary ], feed_dict=feed) writer.add_summary(summary_str, self.total_train_step) else: loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def run_epoch(self, session, data, train_op=None, verbose=10, epoch=0): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp}
def run_epoch(our_model, config, model_optimizer, criterion, data, mode='train', verbose=10): """ Run for one epoch. Operations are determined by the mode. """ if mode == 'train': our_model.zero_grad() else: our_model.eval() ### take the init_hidden as the state input for the 1st step. state = our_model.init_hidden() total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): x = torch.from_numpy(x).type(torch.LongTensor) y = torch.from_numpy(y).type(torch.LongTensor) ## if you are using cpu, do not attach x,y to cuda. x = x.cuda() y = y.cuda() outputs, state = our_model(x, state) loss = compute_loss(outputs, y, criterion) if mode == 'train': loss.backward() model_optimizer.step() state = state.detach() ## if you are using cpu, you do not need to change loss.data back to cpu. total_loss.append(loss.data.cpu().numpy()) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
def generate_text(session, model, config, starting_text='<eos>', stop_length=5, stop_tokens=None, temp=0.2): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] for i in xrange(stop_length): ### YOUR CODE HERE for step, (x, y) in enumerate( ptb_iterator(tokens, config.batch_size, config.num_steps)): #train_pp = model.run_epoch( # session, tokens, # train_op=model.train_step, verbose=100) #import pdb; pdb.set_trace() train_op = tf.no_op() feed = {model.input_placeholder: x, model.labels_placeholder: y, model.initial_state: state, model.dropout_placeholder: 1} loss, state, summary = session.run( [model.calculate_loss, model.final_state, train_op], feed_dict=feed) y_pred = state ### END YOUR CODE try: next_word_idx = sample(y_pred[0], temperature=temp) #print(model.vocab.decode(next_word_idx)) # for checking tokens.append(next_word_idx) except ValueError: print("Exception on sum(y_pred[0][:-1])>1 {}".format(sum(y_pred[0][:-1]))) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def generate_text(session, model, config, starting_text='<eos>', stop_length=100, stop_tokens=None, temp=1.0): """Generate text from the model. Hint: Create a feed-dictionary and use sess.run() to execute the model. Note that you will need to use model.initial_state as a key to feed_dict Hint: Fetch model.final_state and model.predictions[-1]. (You set model.final_state in add_model() and model.predictions is set in __init__) Hint: Store the outputs of running the model in local variables state and y_pred (used in the pre-implemented parts of this function.) Args: session: tf.Session() object model: Object of type RNNLM_Model config: A Config() object starting_text: Initial text passed to model. Returns: output: List of word idxs """ state = model.initial_state.eval() # Imagine tokens as a batch size of one, length of len(tokens[0]) tokens = [model.vocab.encode(word) for word in starting_text.split()] print ptb_iterator(tokens, config.batch_size, config.num_steps) for i in xrange(stop_length): ### YOUR CODE HERE for (x,y) in ptb_iterator(tokens, config.batch_size,config.num_steps): feed = {model.initial_state: state, model.input_placeholder: x, model.labels_placeholder:y, model.dropout_placeholder:1} state, y_pred = session.run([model.final_state, model.predictions[-1]],feed_dict = feed) ### END YOUR CODE next_word_idx = sample(y_pred[0], temperature=temp) tokens.append(next_word_idx) if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens: break output = [model.vocab.decode(word_idx) for word_idx in tokens] return output
def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout # 表示不训练 if not train_op: train_op = tf.no_op() dp = 1 # 总的迭代步骤 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) # 总的损失列表 total_loss = [] # 每次跑完所有数据后要初始化状态 state = self.initial_state.eval() # 对每个批量数据 for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # 获取feed_dict feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } # 得到损失和最后的状态 loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) # 如果让过程显示可见 if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') # 指数化损失,相当于困惑度,当然也可以不用指数化,只是为了容易看出变化 return np.exp(np.mean(total_loss))
def run_epoch(model, data): """ One epoch of training/validation (depending on flag is_train). """ model.eval() seq_losses = np.zeros(model.seq_len) # LOOP THROUGH MINIBATCHES for step, (x, y) in enumerate( utils.ptb_iterator(data, model.batch_size, model.seq_len)): if step % 10 == 0: print('step', step) step_seq_losses = [] if args.model != 'TRANSFORMER': hidden = model.init_hidden() hidden = hidden.to(device) if args.model == 'TRANSFORMER': batch = utils.Batch(torch.from_numpy(x).long().to(device)) model.zero_grad() outputs = model.forward(batch.data, batch.mask).transpose(1, 0) # print ("outputs.shape", outputs.shape) else: inputs = torch.from_numpy(x.astype(np.int64)).transpose( 0, 1).contiguous().to(device) # .cuda() model.zero_grad() hidden = utils.repackage_hidden(hidden) if task == '5.2': model.init_hidden_state_list() outputs, hidden = model(inputs, hidden) targets = torch.from_numpy(y.astype(np.int64)).transpose( 0, 1).contiguous().to(device) # .cuda() # LOSS COMPUTATION # This line currently averages across all the sequences in a mini-batch # and all time-steps of the sequences. # For problem 5.3, you will (instead) need to compute the average loss # at each time-step separately. if task == '5.1': with torch.no_grad(): for output, target in zip(outputs, targets): l = loss_fn(output, target) step_seq_losses.append(l.data.item()) seq_losses = np.sum( [seq_losses, np.array(step_seq_losses)], axis=0) elif task == '5.2': loss = loss_fn(outputs[-1], targets[-1]) layer_grads = [] for unit in model.hidden_stack: ret = torch.autograd.grad(loss, unit.hiddens, retain_graph=True) # mean over examples in the minibatch 35x[20x1500] -> 35x[1500] layer_grads.append([r.mean(dim=0) for r in ret]) stacked = [] # iterate over each time-step for i in range(len(layer_grads[0])): stacked.append( torch.stack([ layer_grads[j][i] for j in range(len(layer_grads)) ])) # 34 x 2 x 1500 -> 34 x 1 ts_grads = [s.norm() for s in stacked] print('norms: ', ts_grads) ts_path = os.path.join(args['experiment_path'], 'timestep_grads.npy') print('\nDONE\n\nSaving timestep_grads to ' + ts_path) np.save(ts_path, ts_grads) break if task == '5.1': seq_loss = seq_losses / (step + 1) log_str = '\nseq_losses (len={}, sum={}): {}'.format( len(seq_loss), sum(seq_loss), seq_loss) print(log_str) sl_path = os.path.join(args['experiment_path'], 'seq_loss.npy') print('\nDONE\n\nSaving seq_loss to ' + sl_path) np.save(sl_path, seq_loss)
from utils import ptb_iterator, sample train_data = [i for i in range(1024)] #num_steps is how many things in a sequence do we grab from data. #If you want to grab 10 words then num_steps is 10 for batch in ptb_iterator(train_data, batch_size=2, num_steps=1): print("Batch") x, y = batch print(x, y)