def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda and torch.cuda.is_available(): dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) score = self.model(dev_sentences1, sent_len1, dev_sentences2, sent_len2) n_correct += (torch.max(score, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(dev_batches[batch_no - 1]) return 100. * n_correct / n_total
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda and torch.cuda.is_available(): train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
def evaluate(model, batches, batch_label, dictionary, outfile=None): """Evaluate question classifier model on test data.""" # Turn on evaluation mode which disables dropout. model.eval() n_correct, n_total = 0, 0 y_preds = [] y_true = [] output = [] for batch_no in range(len(batches)): if args.use_elmo: test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_elmo_input( batches[batch_no], dictionary, iseval=True) else: test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors( batches[batch_no], dictionary, iseval=True) if args.cuda: test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() test_labels = test_labels.cuda() assert test_sentences1.size(0) == test_sentences1.size(0) if model.config.adversarial: softmax_prob, adv_loss, diff_loss = model(test_sentences1, sent_len1, test_sentences2, sent_len2, batch_label) else: softmax_prob = model(test_sentences1, sent_len1, test_sentences2, sent_len2, batch_label) preds = torch.max(softmax_prob, 1)[1] y_preds.extend(preds.data.cpu().tolist()) if not outfile: y_true.extend(test_labels.data.cpu().tolist()) n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum() n_total += len(batches[batch_no]) else: current_y_preds = preds.data.cpu().tolist() for i in range(len(batches[batch_no])): output.append([batches[batch_no][i].id, current_y_preds[i]]) if batch_label == 'quora': target_names = ['non_duplicate', 'duplicate'] elif batch_label == 'snli' or batch_label == 'multinli': target_names = ['entailment', 'neutral', 'contradiction'] if outfile: with open(outfile, 'w') as f: f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: print(classification_report(numpy.asarray(y_true), numpy.asarray(y_preds), target_names=target_names)) return 100. * n_correct / n_total
def evaluate(model, batches, dictionary): """Evaluate question classifier model on test data.""" model.eval() # Turn on evaluation mode which disables dropout. SCORE = 1 for batch_no in range(len(batches)): test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no],dictionary) if args.cuda: test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() # test_labels = test_labels.cuda() ## softmax_prob = model(test_sentences1, sent_len1, test_sentences2, sent_len2) SCORE = torch.nn.functional.softmax(softmax_prob).cpu().data.numpy()[:,0] ##numpy() #.data.numpy() # cpu().tolist() ## use exp(x) / exp(x) + exp(y) return np.array(SCORE)
def evaluate(model, batches, dictionary, outfile=None): # Turn on evaluation mode which disables dropout. model.eval() n_correct, n_total = 0, 0 y_preds, y_true, output = [], [], [] for batch_no in range(len(batches)): test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors( batches[batch_no], dictionary, True) if args.cuda and torch.cuda.is_available(): test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() test_labels = test_labels.cuda() assert test_sentences1.size(0) == test_sentences1.size(0) score = model(test_sentences1, sent_len1, test_sentences2, sent_len2) preds = torch.max(score, 1)[1] if outfile: predictions = preds.data.cpu().tolist() for i in range(len(batches[batch_no])): output.append([batches[batch_no][i].id, predictions[i]]) else: y_preds.extend(preds.data.cpu().tolist()) y_true.extend(test_labels.data.cpu().tolist()) p = preds.view(test_labels.size()).data n_correct += (preds.view( test_labels.size()).data == test_labels.data).sum() #n_same = (np.array(y_preds) == np.array(y_true)).sum() # n_correct += n_same n_total += len(batches[batch_no]) if model.config.log_test: sent_list = [inst.sentence1_str for inst in batches[batch_no]] with open('test_log.txt', 'a') as f: for gnd_truth, pred, sent in zip(y_true, y_preds, sent_list): f.write( str(gnd_truth) + '\t' + str(pred) + '\t' + sent + '\n') clf_report = classification_report(np.array(y_true), np.array(y_preds)) if outfile: target_names = ['entailment', 'neutral', 'contradiction'] with open(outfile, 'w') as f: f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: return 100. * n_correct / n_total, 100. * f1_score( np.asarray(y_true), np.asarray(y_preds), average='weighted'), clf_report
def validate(self): # Turn on evaluation mode which disables dropout. self.generator.eval() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.dev_corpus.items(): dev_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(dev_batches) batch_labels.extend([task_name] * len(dev_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of dev batches = ', len(batches)) num_batches = len(batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): if self.config.use_elmo: dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_elmo_input( batches[batch_no - 1], self.dictionary, iseval=True) else: dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary, iseval=True) if self.config.cuda: dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) if self.config.adversarial: scores, adv_loss, diff_loss = self.generator( dev_sentences1, sent_len1, dev_sentences2, sent_len2, batch_labels[batch_no - 1]) else: scores = self.generator(dev_sentences1, sent_len1, dev_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct += (torch.max(scores, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(batches[batch_no - 1]) return 100. * n_correct / n_total
def evaluate(model, batches, batch_label, dictionary): """Evaluate question classifier model on test data.""" # Turn on evaluation mode which disables dropout. model.eval() n_correct, n_total = 0, 0 y_preds = [] y_true = [] for batch_no in range(len(batches)): test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors( batches[batch_no], dictionary) if args.cuda: test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() test_labels = test_labels.cuda() assert test_sentences1.size(0) == test_sentences1.size(0) if model.config.adversarial: softmax_prob, adv_loss, diff_loss = model(test_sentences1, sent_len1, test_sentences2, sent_len2, batch_label) else: softmax_prob = model(test_sentences1, sent_len1, test_sentences2, sent_len2, batch_label) preds = torch.max(softmax_prob, 1)[1] y_preds.extend(preds.data.cpu().tolist()) y_true.extend(test_labels.data.cpu().tolist()) n_correct += (preds.view( test_labels.size()).data == test_labels.data).sum() n_total += len(batches[batch_no]) if batch_label == 'quora': target_names = ['non_duplicate', 'duplicate'] elif batch_label == 'snli' or batch_label == 'multinli': target_names = ['entailment', 'neutral', 'contradiction'] print( classification_report(numpy.asarray(y_true), numpy.asarray(y_preds), target_names=target_names)) return 100. * n_correct / n_total
def evaluate(model, batches, dictionary, outfile=None): # Turn on evaluation mode which disables dropout. model.eval() n_correct, n_total = 0, 0 y_preds, y_true, output = [], [], [] for batch_no in range(len(batches)): test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no], dictionary) if args.cuda: test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() test_labels = test_labels.cuda() assert test_sentences1.size(0) == test_sentences1.size(0) score = model(test_sentences1, sent_len1, test_sentences2, sent_len2) preds = torch.max(score, 1)[1] if outfile: predictions = preds.data.cpu().tolist() for i in range(len(batches[batch_no])): output.append([batches[batch_no][i].id, predictions[i]]) else: y_preds.extend(preds.data.cpu().tolist()) y_true.extend(test_labels.data.cpu().tolist()) n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum() n_total += len(batches[batch_no]) if outfile: target_names = ['entailment', 'neutral', 'contradiction'] with open(outfile, 'w') as f: f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds), average='weighted')
def validate(self, dev_corpus): # Turn on evaluation mode which disables dropout. self.model.eval() print_every = self.config.print_every start = time.time() dev_batches = helper.batchify(dev_corpus.data, self.config.batch_size) print('number of dev batches = ', len(dev_batches)) num_batches = len(dev_batches) n_correct, n_total = 0, 0 for batch_no in range(1, num_batches + 1): dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( dev_batches[batch_no - 1], self.dictionary, True) if self.config.cuda: dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) score = self.model(dev_sentences1, sent_len1, dev_sentences2, sent_len2) n_correct += (torch.max(score, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(dev_batches[batch_no - 1]) print_acc = 100. * n_correct / n_total if batch_no % print_every == 0 or self.config.debug: p = 100.0 print('%s (%d %d%%) (%.2f) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, p, print_acc)) return 100. * n_correct / n_total
def train(self, train_corpus): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches train_batches = helper.batchify(train_corpus.data, self.config.batch_size) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # gradient clipping (off by default) shrink_factor = 1 total_norm = 0 for p in self.model.parameters(): if p.requires_grad: p.grad.data.div_(train_sentences1.size(0)) # divide by the actual batch size total_norm += p.grad.data.norm() ** 2 total_norm = numpy.sqrt(total_norm) if total_norm > self.config.clip: shrink_factor = self.config.clip / total_norm current_lr = self.optimizer.param_groups[0]['lr'] # current lr (no external "lr", for adam) self.optimizer.param_groups[0]['lr'] = current_lr * shrink_factor # just for update self.optimizer.step() self.optimizer.param_groups[0]['lr'] = current_lr print_acc_total += 100. * n_correct / len(train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % ( helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
def evaluate(model, batches, dictionary, outfile=None, selection_time=0.9318): #selection_time=0.9318 for IMDB by budget model # Turn on evaluation mode which disables dropout. model.eval() n_correct, n_total = 0, 0 y_preds, y_true, output = [], [], [] start = time.time() num_batches = len(batches) num_tokens_padded = 0 selection_time = 0 selected_tokens = 0 for batch_no in range(len(batches)): test_sentences1, sent_len1, test_sentences2, sent_len2, test_labels = helper.batch_to_tensors(batches[batch_no], dictionary, True) if args.cuda: test_sentences1 = test_sentences1.cuda() test_sentences2 = test_sentences2.cuda() test_labels = test_labels.cuda() assert test_sentences1.size(0) == test_sentences1.size(0) selected_tokens+= sum(sent_len1)+sum(sent_len2) num_tokens_padded += 2*(force_min_sen_len*args.eval_batch_size) score = model(test_sentences1, sent_len1, test_sentences2, sent_len2) preds = torch.max(score, 1)[1] if outfile: predictions = preds.data.cpu().tolist() for i in range(len(batches[batch_no])): output.append([batches[batch_no][i].id, predictions[i]]) else: y_preds.extend(preds.data.cpu().tolist()) y_true.extend(test_labels.data.cpu().tolist()) n_correct += (preds.view(test_labels.size()).data == test_labels.data).sum() n_total += len(batches[batch_no]) if (batch_no+1) % args.print_every == 0: padded_p = 100.0 * selected_tokens/num_tokens_padded print_acc_avg = 100. * n_correct / n_total print('%s (%d %d%%) (padded %.2f) %.2f' % ( helper.show_progress(start, (batch_no+1) / num_batches), (batch_no+1), (batch_no+1) / num_batches * 100, padded_p, print_acc_avg)) now = time.time() s = now - start estimated_full_text_padded_time = (s ) * num_tokens_padded / selected_tokens s+=selection_time print('estimated full text time padded = %s'% (helper.convert_to_minutes(estimated_full_text_padded_time))) padded_p = 100.0 * selected_tokens/num_tokens_padded padded_speed_up = 1.0*estimated_full_text_padded_time/s print_acc_avg = 100. * n_correct / n_total print('total: %s (%d %d%%)(padded %.2f) %.2f' % ( helper.show_progress(start, (batch_no+1) / num_batches), (batch_no+1), (batch_no+1) / num_batches * 100, padded_p, print_acc_avg)) print('estimated padded speed up = %0.2f, selection text percentage spped up padded = %0.2f' % (padded_speed_up, 100.0/padded_p )) if outfile: target_names = ['entailment', 'neutral', 'contradiction'] with open(outfile, 'w') as f: f.write('pairID,gold_label' + '\n') for item in output: f.write(str(item[0]) + ',' + target_names[item[1]] + '\n') else: return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds), average='weighted'), s
def train(self, train_corpus, epoch): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches shuffle = True # if self.config.task == 'sst': shuffle = False print(shuffle) train_batches = helper.batchify(train_corpus.data, self.config.batch_size, shuffle) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() # print (' score size ', score.size(), train_labels.size()) loss = self.criterion(score, train_labels) ############################ custom new_loss ############################ # z2 = z_pred.dimshuffle((0,1,"x")) # logpz = - T.nnet.binary_crossentropy(probs, z2) * masks # logpz = self.logpz = logpz.reshape(x.shape) # probs = self.probs = probs.reshape(x.shape) # # batch # z = z_pred # self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX) # self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX) # zsum = generator.zsum # zdiff = generator.zdiff # logpz = generator.logpz # coherent_factor = args.sparsity * args.coherent # loss = self.loss = T.mean(loss_vec) #this is not needed as in cost_vec loss_vec is used # sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \ # T.mean(zdiff) * coherent_factor # cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor # cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0)) # self.obj = T.mean(cost_vec) ############################ custom new_loss ############################ if loss.size(0) > 1: loss = loss.mean() # print ('loss:', loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. grad_norm = clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) # if epoch==11: # print(batch_no, grad_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
def train(self): # Turn on training mode which enables dropout. self.generator.train() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.train_corpus.items(): train_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(train_batches) batch_labels.extend([task_name] * len(train_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of train batches = ', len(batches)) start = time.time() num_back, print_acc_total, plot_acc_total = 0, 0, 0 num_batches = len(batches) for batch_no in range(1, num_batches + 1): if self.config.use_elmo: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_input( batches[batch_no - 1], self.dictionary) else: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) if self.config.adversarial: self.optimizerD.zero_grad() scores, diff_loss, shared_rep = self.generator( train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(scores, 1)[1].view( train_labels.size()).data == train_labels.data).sum() shared_sent_rep1 = shared_rep[0] shared_sent_rep2 = shared_rep[1] # runt the discriminator to distinguish tasks task_prob1 = self.discriminator( shared_sent_rep1.detach()) # B X num_tasks task_prob2 = self.discriminator( shared_sent_rep2.detach()) # B X num_tasks comb_prob = torch.cat((task_prob1, task_prob2), 0) # 2B X num_tasks task_prob = torch.sum(comb_prob, 0).squeeze() # size = |num_tasks| adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no - 1]]] adv_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.discriminator.parameters()), self.config.max_norm) self.optimizerD.step() self.optimizerG.zero_grad() cross_entropy_loss = self.criterion(scores, train_labels) # runt the discriminator to distinguish tasks task_prob1 = self.discriminator( shared_sent_rep1) # B X num_tasks task_prob2 = self.discriminator( shared_sent_rep2) # B X num_tasks comb_prob = torch.cat((task_prob1, task_prob2), 0) # 2B X num_tasks task_prob = torch.sum(comb_prob, 0).squeeze() # size = |num_tasks| adv_loss = -1 * task_prob[self.task_ids[batch_labels[batch_no - 1]]] total_loss = cross_entropy_loss + self.config.beta * adv_loss + self.config.gamma * diff_loss # Important if we are using nn.DataParallel() if total_loss.size(0) > 1: total_loss = total_loss.mean() total_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.generator.parameters()), self.config.max_norm) self.optimizerG.step() else: self.optimizerG.zero_grad() scores = self.generator(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(scores, 1)[1].view( train_labels.size()).data == train_labels.data).sum() loss = self.criterion(scores, train_labels) # Important if we are using nn.DataParallel() if loss.size(0) > 1: loss = loss.mean() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm( filter(lambda p: p.requires_grad, self.generator.parameters()), self.config.max_norm) self.optimizerG.step() print_acc_total += 100. * n_correct / len(batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(batches[batch_no - 1]) if batch_no % self.config.print_every == 0: sys.stdout.write("\b" * num_back) sys.stdout.write(" " * num_back) sys.stdout.write("\b" * num_back) log_info = '%s (%d %d%%) %.2f%%' % (helper.show_progress( start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_total / batch_no) sys.stdout.write(log_info) sys.stdout.flush() num_back = len(log_info) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0 # this releases all cache memory and becomes visible to other applications torch.cuda.empty_cache()
def train(self, train_corpus, epoch): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches shuffle = True # if self.config.task == 'sst': shuffle = False print(shuffle) train_batches = helper.batchify(train_corpus.data, self.config.batch_size, shuffle) print('number of train batches = ', len(train_batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_batches = len(train_batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( train_batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) # print(' train label size: ', train_labels.size(), ' train data size: ', train_sentences1.size()) # print(' labels: ', train_labels) score = self.model(train_sentences1) n_correct = (torch.max(score, 1)[1].view( train_labels.size()).data == train_labels.data).sum() # print (' score size ', score.size(), train_labels.size()) loss = self.criterion(score, train_labels) if loss.size(0) > 1: loss = loss.mean() # print ('loss:', loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. grad_norm = clip_grad_norm( filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) # if epoch==11: # print(batch_no, grad_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len( train_batches[batch_no - 1]) if batch_no % self.config.print_every == 0: print_acc_avg = print_acc_total / self.config.print_every print_acc_total = 0 print('%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_avg)) if batch_no % self.config.plot_every == 0: plot_acc_avg = plot_acc_total / self.config.plot_every self.train_accuracies.append(plot_acc_avg) plot_acc_total = 0
#### testing file_path = args.output_base_path + args.task + '/' + args.model_file_name print('loading selector from: ', file_path) helper.load_model(selector, file_path, 'state_dict', args.cuda) selector.eval() dev_batches = helper.batchify(test_corpus.data, args.batch_size) print('number of dev batches = ', len(dev_batches)) num_batches = len(dev_batches) n_correct, n_total = 0, 0 with open('../bcn_output/sst/predicted_text_words_dummy.txt', 'w') as wf: for batch_no in range(1, num_batches + 1): dev_sentences1, sent_len1, dev_sentences2, sent_len2, dev_labels = helper.batch_to_tensors( dev_batches[batch_no - 1], dictionary, True) if args.cuda: dev_sentences1 = dev_sentences1.cuda() dev_sentences2 = dev_sentences2.cuda() dev_labels = dev_labels.cuda() assert dev_sentences1.size(0) == dev_sentences2.size(0) score = selector(dev_sentences1) n_correct += (torch.max(score, 1)[1].view( dev_labels.size()).data == dev_labels.data).sum() n_total += len(dev_batches[batch_no - 1]) for (sent1, sel, tl) in zip(dev_sentences1, torch.max(score, 1)[1].view(dev_labels.size()).data,
def train(self): # Turn on training mode which enables dropout. self.model.train() # Splitting the data in batches batches, batch_labels = [], [] for task_name, task in self.train_corpus.items(): train_batches = helper.batchify(task.data, self.config.batch_size) batches.extend(train_batches) batch_labels.extend([task_name] * len(train_batches)) combined = list(zip(batches, batch_labels)) numpy.random.shuffle(combined) batches[:], batch_labels[:] = zip(*combined) print('number of train batches = ', len(batches)) start = time.time() print_acc_total = 0 plot_acc_total = 0 num_back = 0 num_batches = len(batches) for batch_no in range(1, num_batches + 1): # Clearing out all previous gradient computations. self.optimizer.zero_grad() if self.config.use_elmo: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_elmo_tensors( batches[batch_no - 1], self.dictionary) else: train_sentences1, sent_len1, train_sentences2, sent_len2, train_labels = helper.batch_to_tensors( batches[batch_no - 1], self.dictionary) if self.config.cuda: train_sentences1 = train_sentences1.cuda() train_sentences2 = train_sentences2.cuda() train_labels = train_labels.cuda() assert train_sentences1.size(0) == train_sentences2.size(0) score = self.model(train_sentences1, sent_len1, train_sentences2, sent_len2, batch_labels[batch_no - 1]) n_correct = (torch.max(score, 1)[1].view(train_labels.size()).data == train_labels.data).sum() loss = self.criterion(score, train_labels) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs. clip_grad_norm(filter(lambda p: p.requires_grad, self.model.parameters()), self.config.max_norm) self.optimizer.step() print_acc_total += 100. * n_correct / len(batches[batch_no - 1]) plot_acc_total += 100. * n_correct / len(batches[batch_no - 1]) if batch_no % self.config.print_every == 0: sys.stdout.write("\b" * num_back) sys.stdout.write(" " * num_back) sys.stdout.write("\b" * num_back) log_info = '%s (%d %d%%) %.2f' % (helper.show_progress(start, batch_no / num_batches), batch_no, batch_no / num_batches * 100, print_acc_total / batch_no) sys.stdout.write(log_info) sys.stdout.flush() num_back = len(log_info)