def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores()[0] # Print validation results tqdm.write(self.log_header) tqdm.write(self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write("Early Stopping. Epoch: {}, Best Dev F1: {}".format(epoch, self.best_dev_f1)) break
def get_bert_layers(self, silent=False, last_bert_layers=-1): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() bert_layers_l, label_ids_l = [], [] for input_ids, input_mask, segment_ids, label_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): bert_layers = self.model.get_bert_embedding( input_ids, segment_ids, input_mask, last_bert_layers=last_bert_layers) label_ids = torch.argmax(label_ids, dim=1).cpu().detach().numpy() bert_layers_l.extend(bert_layers) label_ids_l.extend(label_ids) bert_layers_l = torch.stack(bert_layers_l, dim=0) #label_ids_l = torch.stack(label_ids_l, dim=0) return bert_layers_l, label_ids_l
def train(self, epochs): os.makedirs(self.model_outfile, exist_ok=True) os.makedirs(os.path.join(self.model_outfile, self.config['dataset'].NAME), exist_ok=True) if self.config['model'] in { 'BERT-Base', 'BERT-Large', 'HBERT-Base', 'HBERT-Large' }: train_features = convert_examples_to_features( self.train_examples, self.config['max_seq_length'], self.tokenizer, self.config['is_hierarchical']) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.config['is_hierarchical']: pad_input_matrix(unpadded_input_ids, self.config['max_doc_length']) pad_input_matrix(unpadded_input_mask, self.config['max_doc_length']) pad_input_matrix(unpadded_segment_ids, self.config['max_doc_length']) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) train_sampler = RandomSampler(train_data) self.train_loader = DataLoader( train_data, sampler=train_sampler, batch_size=self.config['batch_size']) with trange(1, epochs + 1, desc="Epoch") as t_epochs: for epoch in t_epochs: self.train_epoch() # Evaluate performance on validation set dev_acc, dev_precision, dev_ap, dev_f1, dev_loss = self.dev_evaluator.get_scores( )[0] tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch, self.iterations, epoch, epochs, dev_acc, dev_precision, dev_ap, dev_f1, dev_loss)) # Update validation results if dev_f1 > self.best_dev_ap: self.unimproved_iters = 0 self.best_dev_ap = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}". format(epoch, self.best_dev_ap)) t_epochs.close() break
def get_scores(self, silent=False, return_indices=False): all_indices = [] if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features_with_emotion( self.eval_examples, self.args.max_seq_length, self.tokenizer, self.emotioner) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] unpadded_emotion_scores = [f.sentiment_scores for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) padded_emotion_ids = torch.tensor(unpadded_emotion_scores, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, padded_emotion_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels = list(), list() for input_ids, input_mask, segment_ids, emotion_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) emotion_ids = emotion_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): if return_indices: outs = self.model(input_ids, segment_ids, input_mask, emotion_ids=emotion_ids, return_indices=return_indices) else: outs = self.model(input_ids, segment_ids, input_mask, emotion_ids=emotion_ids) if isinstance(outs, tuple): outs, _ = outs if return_indices: logits, indices = outs all_indices.extend(indices.cpu().detach().numpy()) else: logits = outs if self.args.is_multilabel: predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False) average, average_mac = 'micro', 'macro' else: predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) average, average_mac = 'binary', 'binary' if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels) accuracy = metrics.accuracy_score(target_labels, predicted_labels) precision = metrics.precision_score(target_labels, predicted_labels, average=average) recall = metrics.recall_score(target_labels, predicted_labels, average=average) avg_loss = total_loss / nb_eval_steps hamming_loss = metrics.hamming_loss(target_labels, predicted_labels) jaccard_score = metrics.jaccard_score(target_labels, predicted_labels, average=average) f1_micro = metrics.f1_score(target_labels, predicted_labels, average=average) f1_macro = metrics.f1_score(target_labels, predicted_labels, average=average_mac) if return_indices: return [accuracy, precision, recall, f1_micro, avg_loss, f1_macro, hamming_loss, jaccard_score, predicted_labels, target_labels, all_indices],\ ['accuracy', 'precision', 'recall', 'f1_micro', 'avg_loss', 'f1_macro', 'hamming_loss', 'jaccard', 'predicted_labels', 'target_labels', 'all_indices'] else: return [accuracy, precision, recall, f1_micro, avg_loss, f1_macro, hamming_loss, jaccard_score, predicted_labels, target_labels],\ ['accuracy', 'precision', 'recall', 'f1_micro', 'avg_loss', 'f1_macro', 'hamming_loss', 'jaccard', 'predicted_labels', 'target_labels']
def get_scores(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer, use_guid=True, is_regression=self.args.is_regression) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) if self.args.is_regression: label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.float) else: label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) doc_ids = torch.tensor([f.guid for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids, doc_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels, target_doc_ids = list(), list(), list( ) for input_ids, input_mask, segment_ids, label_ids, doc_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) target_doc_ids.extend(doc_ids.tolist()) with torch.no_grad(): logits = self.model(input_ids=input_ids, attention_mask=input_mask, token_type_ids=segment_ids)[0] if self.args.is_multilabel: predicted_labels.extend( F.sigmoid(logits).round().long().cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) # if self.args.pos_weights: # pos_weights = [float(w) for w in self.args.pos_weights.split(',')] # pos_weight = torch.FloatTensor(pos_weights) # else: # pos_weight = torch.ones([self.args.num_labels]) if self.args.loss == 'cross-entropy': criterion = torch.nn.BCEWithLogitsLoss(size_average=False) loss = criterion(logits.cpu(), label_ids.float().cpu()) elif self.args.loss == 'mse': criterion = torch.nn.MSELoss(size_average=False) m = torch.nn.Sigmoid() loss = criterion(m(logits.cpu()), label_ids.float().cpu()) else: if self.args.num_labels > 2: predicted_labels.extend( torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) else: if self.args.is_regression: predicted_labels.extend( logits.view(-1).cpu().detach().numpy()) target_labels.extend( label_ids.view(-1).cpu().detach().numpy()) criterion = torch.nn.MSELoss() loss = criterion( logits.view(-1).cpu(), label_ids.view(-1).cpu()) else: predicted_labels.extend( torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss_fct = torch.nn.CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.args.num_labels), label_ids.view(-1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 avg_loss = total_loss / nb_eval_steps predicted_label_sets = [ predicted_label.tolist() for predicted_label in predicted_labels ] target_label_sets = [ target_label.tolist() for target_label in target_labels ] if self.args.is_regression: rmse, kendall, pearson, spearman, pearson_spearman = evaluate_for_regression( target_labels, predicted_labels) score_values = [ rmse.tolist(), kendall, pearson, spearman, pearson_spearman, avg_loss, list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ METRIC_RMSE, METRIC_KENDALL, METRIC_PEARSON, METRIC_SPEARMAN, METRIC_PEARSON_SPEARMAN, 'avg_loss', 'label_set_info (id/gold/pred)' ] else: hamming_loss = metrics.hamming_loss(target_labels, predicted_labels) predicted_labels, target_labels = np.array( predicted_labels), np.array(target_labels) cm = metrics.multilabel_confusion_matrix(target_labels, predicted_labels) accuracy = metrics.accuracy_score(target_labels, predicted_labels) if self.args.num_labels == 2: precision = metrics.precision_score(target_labels, predicted_labels, average='binary') recall = metrics.recall_score(target_labels, predicted_labels, average='binary') f1 = evaluate_with_metric(target_labels, predicted_labels, METRIC_F1_BINARY) else: precision_micro = metrics.precision_score(target_labels, predicted_labels, average='micro') recall_micro = metrics.recall_score(target_labels, predicted_labels, average='micro') f1_micro = metrics.f1_score(target_labels, predicted_labels, average='micro') f1_macro = evaluate_with_metric(target_labels, predicted_labels, METRIC_F1_MACRO) precision_macro = metrics.precision_score(target_labels, predicted_labels, average='macro') recall_macro = metrics.recall_score(target_labels, predicted_labels, average='macro') precision_class, recall_class, f1_class, support_class = metrics.precision_recall_fscore_support( target_labels, predicted_labels) if self.args.num_labels == 2: score_values = [ precision, recall, f1, accuracy, avg_loss, hamming_loss, cm.tolist(), list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ 'precision', 'recall', 'f1', 'accuracy', 'avg_loss', 'hamming_loss', 'confusion_matrix', 'label_set_info (id/gold/pred)' ] else: score_values = [ precision_macro, recall_macro, f1_macro, accuracy, avg_loss, hamming_loss, precision_micro, recall_micro, f1_micro, precision_class.tolist(), recall_class.tolist(), f1_class.tolist(), support_class.tolist(), cm.tolist(), list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ 'precision_macro', 'recall_macro', METRIC_F1_MACRO, 'accuracy', 'avg_loss', 'hamming_loss', 'precision_micro', 'recall_micro', 'f1_micro', 'precision_class', 'recall_class', 'f1_class', 'support_class', 'confusion_matrix', 'label_set_info (id/gold/pred)' ] return score_values, score_names
def get_scores(self, silent=False): self.model.eval() self.y_target = list() self.y_pred = list() self.docid = list() total_loss = 0 if self.config['model'] in { 'BERT-Base', 'BERT-Large', 'HBERT-Base', 'HBERT-Large' }: eval_features = convert_examples_to_features( self.eval_examples, self.config['max_seq_length'], self.tokenizer, self.config['is_hierarchical']) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.config['is_hierarchical']: pad_input_matrix(unpadded_input_ids, self.config['max_doc_length']) pad_input_matrix(unpadded_input_mask, self.config['max_doc_length']) pad_input_matrix(unpadded_segment_ids, self.config['max_doc_length']) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) document_ids = torch.tensor([f.guid for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids, document_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.config['batch_size']) for input_ids, input_mask, segment_ids, label_ids, document_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.config['device']) input_mask = input_mask.to(self.config['device']) segment_ids = segment_ids.to(self.config['device']) label_ids = label_ids.to(self.config['device']) with torch.no_grad(): logits = torch.sigmoid( self.model(input_ids, segment_ids, input_mask)).squeeze(dim=1) # Computing loss and storing predictions self.docid.extend(document_ids.cpu().detach().numpy()) self.y_pred.extend(logits.cpu().detach().numpy()) self.y_target.extend(label_ids.cpu().detach().numpy()) loss = F.binary_cross_entropy(logits, label_ids.float()) if self.config['n_gpu'] > 1: loss = loss.mean() if self.config['gradient_accumulation_steps'] > 1: loss = loss / self.config['gradient_accumulation_steps'] total_loss += loss.item() else: self.data_loader.init_epoch() if hasattr(self.model, 'beta_ema') and self.model.beta_ema > 0: # Temporal averaging old_params = self.model.get_params() self.model.load_ema_params() for batch in tqdm(self.data_loader, desc="Evaluating", disable=silent): if hasattr(self.model, 'tar') and self.model.tar: if self.ignore_lengths: logits, rnn_outs = torch.sigmoid(self.model( batch.text)).squeeze(dim=1) else: logits, rnn_outs = torch.sigmoid( self.model(batch.text[0], lengths=batch.text[1])).squeeze(dim=1) else: if self.ignore_lengths: logits = torch.sigmoid(self.model( batch.text)).squeeze(dim=1) else: logits = torch.sigmoid( self.model(batch.text[0], lengths=batch.text[1])).squeeze(dim=1) total_loss += F.binary_cross_entropy( logits, batch.label.float()).item() if hasattr(self.model, 'tar') and self.model.tar: # Temporal activation regularization total_loss += (rnn_outs[1:] - rnn_outs[:-1]).pow(2).mean() self.docid.extend(batch.docid.cpu().detach().numpy()) self.y_pred.extend(logits.cpu().detach().numpy()) self.y_target.extend(batch.label.cpu().detach().numpy()) if hasattr(self.model, 'beta_ema') and self.model.beta_ema > 0: # Temporal averaging self.model.load_params(old_params) predicted_labels = np.around(np.array(self.y_pred)) target_labels = np.array(self.y_target) accuracy = metrics.accuracy_score(target_labels, predicted_labels) average_precision = metrics.average_precision_score(target_labels, predicted_labels, average=None) f1 = metrics.f1_score(target_labels, predicted_labels, average='macro') avg_loss = total_loss / len(predicted_labels) try: precision = metrics.precision_score(target_labels, predicted_labels, average=None)[1] except IndexError: # Handle cases without positive labels precision = 0 return [accuracy, precision, average_precision, f1, avg_loss], \ ['accuracy', 'precision', 'average_precision', 'f1', 'cross_entropy_loss']
def train_gradually(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) # train gradually model_path = self.snapshot_path.split('/')[0:-1] model_path = Path('/'.join(model_path)) # freeze all layers except classifier self.train_layer_qroup(train_dataloader, to_freeze_layer='classifier', model_path=model_path) # freeze all layers expect pooler and its subsequents '''self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.pooler', model_path=model_path) for i in range(11,-1, -1): self.train_layer_qroup(train_dataloader, to_freeze_layer='bert.encoder.layer.'+str(i), model_path=model_path)''' self.unfreez_all() for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score, dev_predicted_labels, dev_target_labels = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss, dev_f1_macro, dev_hamming_loss, dev_jaccard_score)) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) break
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) if self.args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) # results for graphing learning curves results = [] iterator = trange(int(self.args.epochs), desc="Epoch") for epoch in iterator: self.train_epoch(train_dataloader) dev_evaluator = BertEvaluator(self.model, self.processor, self.args, split='dev') dev_acc, dev_precision, dev_recall, dev_f1, dev_loss = dev_evaluator.get_scores( )[0] # Print validation results tqdm.write(self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) results.append([ epoch + 1, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss ]) # Update validation results if dev_f1 > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1 torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev F1: {}".format( epoch, self.best_dev_f1)) iterator.close() break # create learning curves results_frame = pd.DataFrame(data=np.array(results), columns=['Epoch', 'Accuracy', 'Precision', 'Recall', 'F1', 'Loss']) \ .set_index('Epoch') ax_acc = results_frame[['Accuracy', 'Precision', 'Recall', 'F1']].plot() ax_loss = results_frame[['Loss']].plot() ax_acc.get_figure().savefig('accuracy_curves.png') ax_loss.get_figure().savefig('loss_curves.png')
def get_pred(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels = list(), list() output_preds = None for input_ids, input_mask, segment_ids, label_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): logits = self.model(input_ids, input_mask, segment_ids)[0] if output_preds is None: output_preds = logits.cpu().detach().numpy() if self.args.is_multilabel: target_labels = label_ids.cpu().detach().numpy() else: target_labels = torch.argmax(label_ids, dim=1).cpu().detach().numpy() else: output_preds = np.append(output_preds, logits.cpu().detach().numpy(), axis=0) if self.args.is_multilabel: target_labels = np.append(target_labels, label_ids.cpu().detach().numpy(), axis=0) else: target_labels = np.append( target_labels, torch.argmax(label_ids, dim=1).cpu().detach().numpy(), axis=0) # if self.args.is_multilabel: # predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy()) # target_labels.extend(label_ids.cpu().detach().numpy()) # loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False) # else: # predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy()) # target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy()) # loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) # # if self.args.n_gpu > 1: # loss = loss.mean() # if self.args.gradient_accumulation_steps > 1: # loss = loss / self.args.gradient_accumulation_steps # total_loss += loss.item() # # nb_eval_examples += input_ids.size(0) # nb_eval_steps += 1 # # predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels) # accuracy = metrics.accuracy_score(target_labels, predicted_labels) # precision = metrics.precision_score(target_labels, predicted_labels, average='micro') # recall = metrics.recall_score(target_labels, predicted_labels, average='micro') # f1 = metrics.f1_score(target_labels, predicted_labels, average='micro') # avg_loss = total_loss / nb_eval_steps # 需要把每一个的label id和logits都 concate起来 return target_labels, output_preds
def get_scores(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: if 'longformer' in self.args.model: eval_features = convert_examples_to_features_long( self.eval_examples, self.args.max_seq_length, self.tokenizer) elif 'reformer' in self.args.model: eval_features = convert_examples_to_features_long( self.eval_examples, self.args.max_seq_length, self.tokenizer, 'reformer') else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels = list(), list() for input_ids, input_mask, segment_ids, label_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): logits = self.model(input_ids, input_mask, segment_ids)[0] if self.args.is_multilabel: predicted_labels.extend( F.sigmoid(logits).round().long().cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False) else: predicted_labels.extend( torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend( torch.argmax(label_ids, dim=1).cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 predicted_labels, target_labels = np.array(predicted_labels), np.array( target_labels) if self.dump_predictions: pickle.dump((predicted_labels, target_labels), open( os.path.join( self.args.data_dir, self.args.dataset, '{}_{}_{}_{}_predictions.p'.format( self.split, self.args.model, self.args.training_file, self.args.max_seq_length)), 'wb')) accuracy = metrics.accuracy_score(target_labels, predicted_labels) precision = metrics.precision_score(target_labels, predicted_labels, average='micro') recall = metrics.recall_score(target_labels, predicted_labels, average='micro') f1 = metrics.f1_score(target_labels, predicted_labels, average='micro') avg_loss = total_loss / nb_eval_steps if self.dump_predictions: pickle.dump( ([accuracy, precision, recall, f1, avg_loss ], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']), open( os.path.join( self.args.data_dir, self.args.dataset, '{}_{}_{}_{}_metrics.p'.format( self.split, self.args.model, self.args.training_file, self.args.max_seq_length)), 'wb')) return [accuracy, precision, recall, f1, avg_loss ], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']
def get_scores(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels = list(), list() start_time = time.time() for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): logits = self.model(input_ids, input_mask, segment_ids)[0] if self.args.is_multilabel: predicted_labels.extend(F.sigmoid(logits).round().long().cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False) else: predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels) accuracy_real = metrics.accuracy_score(target_labels, predicted_labels) accuracy_offset1 = metrics.accuracy_score(target_labels+1, predicted_labels) accuracy_offset2 = metrics.accuracy_score(target_labels-1, predicted_labels) all_accuracy = accuracy_real + accuracy_offset1 + accuracy_offset2 precision = metrics.precision_score(target_labels, predicted_labels, average='micro') recall = metrics.recall_score(target_labels, predicted_labels, average='micro') f1 = metrics.f1_score(target_labels, predicted_labels, average='micro') mse = metrics.mean_squared_error(target_labels, predicted_labels) avg_loss = total_loss / nb_eval_steps print("Evaluation Time: {}".format(time.time()-start_time)) return [accuracy_real, precision, recall, f1, avg_loss, mse], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss', 'mse']
def get_scores(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels = list(), list() for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) with torch.no_grad(): logits = self.model(input_ids, segment_ids, input_mask) if self.args.is_multilabel: predicted_labels.extend(F.softmax(logits, dim=1).cpu().detach().numpy()) # print(F.softmax(logits).cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.binary_cross_entropy_with_logits(logits, label_ids.float(), size_average=False) else: predicted_labels.extend(torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(torch.argmax(label_ids, dim=1).cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 if self.args.is_multilabel: score_method = 'weighted' pos_label = None else: score_method = 'binary' pos_label = 1 # np.savetxt('predicted_untransformed.csv', predicted_labels, delimiter=',') predicted_labels, target_labels = np.array(predicted_labels), np.array(target_labels) predicted_labels = (predicted_labels == predicted_labels.max(axis=1, keepdims=True)).astype(int) accuracy = metrics.accuracy_score(target_labels, predicted_labels) precision = metrics.precision_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label) recall = metrics.recall_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label) f1 = metrics.f1_score(target_labels, predicted_labels, average=score_method, pos_label=pos_label) avg_loss = total_loss / nb_eval_steps predicted_labels = np.apply_along_axis(lambda x: ''.join(x), 1, predicted_labels.astype(str)) target_labels = np.apply_along_axis(lambda x: ''.join(x), 1, target_labels.astype(str)) # predictions = np.hstack([predicted_labels, target_labels]) # np.savetxt('predictions.csv', predictions, delimiter=',') x = np.random.randint(1000) with open('predictions_{}.txt'.format(x), 'w') as f: pred = pd.DataFrame( { 'predicted': predicted_labels, 'target': target_labels } ) pred.to_csv(f) return [accuracy, precision, recall, f1, avg_loss], ['accuracy', 'precision', 'recall', 'f1', 'avg_loss']
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer, use_guid=True) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) print('Begin training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) start_time = time.monotonic() for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) print('COARSE Train loss: ', self.tr_loss_coarse) print('FINE Train loss: ', self.tr_loss_fine) if epoch == 0: self.initial_tr_loss_fine = self.tr_loss_fine if self.args.evaluate_dev: dev_evaluator = BertHierarchicalEvaluator(self.model, self.processor, self.tokenizer, self.args, split='dev') scores_fine, scores_coarse = dev_evaluator.get_scores( silent=True) dev_precision_fine, dev_recall_fine, dev_f1_fine, dev_acc_fine, dev_loss_fine = scores_fine[ 0][:5] dev_precision_coarse, dev_recall_coarse, dev_f1_coarse, dev_acc_coarse, dev_loss_coarse = scores_coarse[ 0][:5] # Print validation results tqdm.write('COARSE: ' + self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc_coarse, dev_precision_coarse, dev_recall_coarse, dev_f1_coarse, dev_loss_coarse)) tqdm.write('FINE: ' + self.log_header) tqdm.write( self.log_template.format(epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc_fine, dev_precision_fine, dev_recall_fine, dev_f1_fine, dev_loss_fine)) # Update validation results if dev_f1_fine > self.best_dev_f1: self.unimproved_iters = 0 self.best_dev_f1 = dev_f1_fine torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev {}: {}". format(epoch, self.args.eval_metric, self.best_dev_f1)) break if self.args.evaluate_test: # when evaluating on test, we can't use dev # so check train loss is converging if epoch == self.patience_training: loss_percent = ( self.initial_tr_loss_fine - self.tr_loss_fine) / self.initial_tr_loss_fine if loss_percent <= self.minimum_loss_percent_decrease: self.training_converged = False tqdm.write( "Training failed to converge. Epoch: {}, Loss percent: {}" .format(epoch, loss_percent)) break end_time = time.monotonic() # save model at end of training # when evaluating on test if self.args.evaluate_test: torch.save(self.model, self.snapshot_path) print('End training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) print('Time elapsed: ', end_time - start_time)
def train(self): if self.args.is_hierarchical: train_features = convert_examples_to_hierarchical_features( self.train_examples, self.args.max_seq_length, self.tokenizer) else: train_features = convert_examples_to_features( self.train_examples, self.args.max_seq_length, self.tokenizer, use_guid=True, is_regression=self.args.is_regression) unpadded_input_ids = [f.input_ids for f in train_features] unpadded_input_mask = [f.input_mask for f in train_features] unpadded_segment_ids = [f.segment_ids for f in train_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) print("Number of examples: ", len(self.train_examples)) print("Batch size:", self.args.batch_size) print("Num of steps:", self.num_train_optimization_steps) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) if self.args.is_regression: label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.float) else: label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.args.batch_size) print('Begin training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) start_time = time.monotonic() for epoch in trange(int(self.args.epochs), desc="Epoch"): self.train_epoch(train_dataloader) print('Train loss: ', self.tr_loss) if epoch == 0: self.initial_tr_loss = self.tr_loss if self.args.evaluate_dev: dev_evaluator = BertEvaluator(self.model, self.processor, self.tokenizer, self.args, split='dev') dev_scores, dev_score_names = dev_evaluator.get_scores() dev_metric = dev_scores[dev_score_names.index( self.args.eval_metric)] if self.args.is_regression: dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss = dev_scores[: 6] # Print validation results tqdm.write(self.log_header_regression) tqdm.write( self.log_template_regression.format( epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_rmse, dev_kendall, dev_pearson, dev_spearman, dev_pearson_spearman, dev_loss)) else: dev_precision, dev_recall, dev_f1, dev_acc, dev_loss = dev_scores[: 5] # Print validation results tqdm.write(self.log_header_classification) tqdm.write( self.log_template_classification.format( epoch + 1, self.iterations, epoch + 1, self.args.epochs, dev_acc, dev_precision, dev_recall, dev_f1, dev_loss)) # Update validation results dev_improved = self.check_dev_improved(dev_metric) if dev_improved: self.unimproved_iters = 0 self.best_dev_metric = dev_metric torch.save(self.model, self.snapshot_path) else: self.unimproved_iters += 1 if self.unimproved_iters >= self.args.patience: self.early_stop = True tqdm.write( "Early Stopping. Epoch: {}, Best Dev {}: {}". format(epoch, self.args.eval_metric, self.best_dev_metric)) break if self.args.evaluate_test: if epoch == self.patience_training: loss_percent = (self.initial_tr_loss - self.tr_loss) / self.initial_tr_loss if loss_percent <= self.minimum_loss_percent_decrease: self.training_converged = False tqdm.write( "Training failed to converge. Epoch: {}, Loss percent: {}" .format(epoch, loss_percent)) break end_time = time.monotonic() # save model at end of training # when evaluating on test if self.args.evaluate_test: torch.save(self.model, self.snapshot_path) print('End training: ', datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) print('Time elapsed: ', end_time - start_time)