def test(model, args, data, mode='test'): if mode == 'dev': iterator = iter(data.dev_iter) else: iterator = iter(data.test_iter) criterion = nn.CrossEntropyLoss() model.eval() acc, loss, size = 0, 0, 0 ids, scores, labels = [], [], [] losses = [] for batch in iterator: s1, s2 = getattr(batch, 'q1'), getattr(batch, 'q2') kwargs = {'p': s1, 'h': s2} if args.use_char_emb: char_p = Variable(torch.LongTensor(data.characterize(s1))) char_h = Variable(torch.LongTensor(data.characterize(s2))) if args.gpu > -1: char_p = char_p.cuda(args.gpu) char_h = char_h.cuda(args.gpu) kwargs['char_p'] = char_p kwargs['char_h'] = char_h pred = model(**kwargs) batch_loss = criterion(pred, batch.label) losses.append(batch_loss.item()) ''' _, pred = pred.max(dim=1) acc += (pred == batch.label).sum().float() size += len(pred) ''' logit = pred.detach().cpu().numpy() label = batch.label.to('cpu').numpy() ids.extend(getattr(batch, 'id')) scores.append(logit) labels.append(label) labels = np.concatenate(labels, 0) scores = np.concatenate(scores, 0) # print(len(ids),labels.shape,scores.shape) eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ids, scores, labels) # acc /= size # acc = acc.cpu().data[0] return np.mean(losses), eval_DOUBAN_MRR
def test_eval(self): data = DATAUBUNTU(debug=False, data_dir=self.data_dir) test_examples = data.read_examples( os.path.join(self.data_dir, 'test.csv')) print('eval_examples的数量', len(test_examples)) ID = [x.guid for x in test_examples] test_features = data.convert_examples_to_features( test_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(test_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( test_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( test_features, 'segment_ids'), dtype=torch.long) all_utterance_mask = torch.tensor(data.select_field( test_features, 'utterance_mask'), dtype=torch.long) all_response_mask = torch.tensor(data.select_field( test_features, 'response_mask'), dtype=torch.long) all_history_mask = torch.tensor(data.select_field( test_features, 'history_mask'), dtype=torch.long) all_label = torch.tensor([f.label for f in test_features], dtype=torch.long) test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_utterance_mask, all_response_mask, all_history_mask, all_label) # Run prediction for full data test_sampler = SequentialSampler(test_data) test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=self.eval_batch_size) config = BertConfig.from_pretrained(self.model_name_or_path, num_labels=self.num_labels) model = BertForSequenceClassification.from_pretrained(os.path.join( self.output_dir, "pytorch_model.bin"), self.args, config=config) model.to(self.device) model.eval() inference_labels = [] gold_labels = [] scores = [] for input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids in test_dataloader: input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) utterance_mask = utterance_mask.to(self.device) response_mask = response_mask.to(self.device) history_mask = history_mask.to(self.device) label_ids = label_ids.to(self.device) with torch.no_grad(): logits = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, utterance_mask=utterance_mask, response_mask=response_mask, history_mask=history_mask, ).detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() scores.append(logits) inference_labels.append(np.argmax(logits, axis=1)) gold_labels.append(label_ids) gold_labels = np.concatenate(gold_labels, 0) scores = np.concatenate(scores, 0) logits = np.concatenate(inference_labels, 0) # 计算评价指标 assert len(ID) == scores.shape[0] == scores.shape[0] eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ID, scores, gold_labels) r_at_1 = r_at_k(ID, scores, gold_labels, 1) r_at_2 = r_at_k(ID, scores, gold_labels, 2) r_at_5 = r_at_k(ID, scores, gold_labels, 5) print( 'eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP', eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1, 'r10@1', r_at_1, 'r10@2', r_at_2, 'r10@5', r_at_5, )
def train(self): if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) # logger.info(f'Fold {split_index + 1}') train_dataloader, eval_dataloader, train_examples, eval_examples = self.create_dataloader( ) num_train_optimization_steps = self.train_steps # Prepare model config = BertConfig.from_pretrained(self.model_name_or_path, num_labels=self.num_labels) model = BertForSequenceClassification.from_pretrained( self.model_name_or_path, self.args, config=config) model.to(self.device) model.train() # Prepare optimizer param_optimizer = list(model.named_parameters()) param_optimizer = [n for n in param_optimizer] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': self.weight_decay }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=self.learning_rate, eps=self.adam_epsilon) scheduler = WarmupLinearSchedule(optimizer, warmup_steps=self.warmup_steps, t_total=self.train_steps) global_step = 0 logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", self.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) best_acc = 0 best_MRR = 0 tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 train_dataloader = cycle(train_dataloader) for step in range(num_train_optimization_steps): batch = next(train_dataloader) batch = tuple(t.to(self.device) for t in batch) input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids = batch loss = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, utterance_mask=utterance_mask, response_mask=response_mask, history_mask=history_mask, labels=label_ids) tr_loss += loss.item() train_loss = round(tr_loss / (nb_tr_steps + 1), 4) nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 loss.backward() if (nb_tr_steps + 1) % self.gradient_accumulation_steps == 0: optimizer.step() optimizer.zero_grad() scheduler.step() global_step += 1 if (step + 1) % (self.eval_steps * self.gradient_accumulation_steps) == 0: tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 logger.info("***** Report result *****") logger.info(" %s = %s", 'global_step', str(global_step)) logger.info(" %s = %s", 'train loss', str(train_loss)) if self.do_eval and (step + 1) % ( self.eval_steps * self.gradient_accumulation_steps) == 0: for file in ['dev.csv']: inference_labels = [] gold_labels = [] inference_logits = [] scores = [] ID = [x.guid for x in eval_examples] logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", self.eval_batch_size) model.eval() eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 for input_ids, input_mask, segment_ids, utterance_mask, response_mask, history_mask, label_ids in eval_dataloader: input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) utterance_mask = utterance_mask.to(self.device) response_mask = response_mask.to(self.device) history_mask = history_mask.to(self.device) label_ids = label_ids.to(self.device) with torch.no_grad(): tmp_eval_loss = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, utterance_mask=utterance_mask, response_mask=response_mask, history_mask=history_mask, labels=label_ids) logits = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, utterance_mask=utterance_mask, response_mask=response_mask, history_mask=history_mask, ) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() inference_labels.append(np.argmax(logits, axis=1)) scores.append(logits) gold_labels.append(label_ids) inference_logits.append(logits) eval_loss += tmp_eval_loss.mean().item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 gold_labels = np.concatenate(gold_labels, 0) inference_logits = np.concatenate(inference_logits, 0) scores = np.concatenate(scores, 0) model.train() eval_loss = eval_loss / nb_eval_steps eval_accuracy = accuracyCQA(inference_logits, gold_labels) eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ID, scores, gold_labels) r_at_1 = r_at_k(ID, scores, gold_labels, 1) r_at_2 = r_at_k(ID, scores, gold_labels, 2) r_at_5 = r_at_k(ID, scores, gold_labels, 5) # print('eval_mrr',eval_mrr) print('eval_F1', eval_accuracy, 'eval_MRR', eval_DOUBAN_MRR, 'eval_MAP', eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1, 'r10@1', r_at_1, 'r10@2', r_at_2, 'r10@5', r_at_5, 'global_step', global_step, 'loss', train_loss) result = { 'eval_loss': eval_loss, 'eval_F1': eval_accuracy, 'eval_MRR': eval_DOUBAN_MRR, 'eval_MAP': eval_DOUBAN_MAP, 'eval_Precision1': eval_Precision1, 'r10@1': r_at_1, 'r10@2': r_at_2, 'r10@5': r_at_5, 'global_step': global_step, 'loss': train_loss } output_eval_file = os.path.join(self.output_dir, "eval_results.txt") with open(output_eval_file, "a") as writer: for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write('*' * 80) writer.write('\n') # if eval_accuracy > best_acc : if eval_DOUBAN_MRR > best_MRR: print("=" * 80) print("Best MRR", eval_DOUBAN_MRR) print("Saving Model......") # best_acc = eval_accuracy best_MRR = eval_DOUBAN_MRR # Save a trained model model_to_save = model.module if hasattr( model, 'module') else model output_model_file = os.path.join( self.output_dir, "pytorch_model.bin") torch.save(model_to_save.state_dict(), output_model_file) print("=" * 80) else: print("=" * 80)
def train(self): trainset, train_dataloader, testset, test_dataloader = self.create_dataloader( ) model = Classifier(1, 256, trainset.num_classes).to(self.device) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) model.train() logger.info("***** Running training *****") logger.info(" Num examples = %d", len(trainset.ids)) logger.info(" Batch size = %d", self.train_batchsize) logger.info(" Num steps = %d", self.train_steps) global_step, nb_tr_steps, tr_loss = 0, 0, 0 best_MRR = 0 train_dataloader = cycle(train_dataloader) for each_step in range(self.train_steps): bg, label = next(train_dataloader) prediction = model(bg) loss = loss_func(prediction, label) tr_loss += loss.item() train_loss = round(tr_loss / (nb_tr_steps + 1), 4) nb_tr_steps += 1 optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if (each_step + 1) % (self.eval_steps) == 0: tr_loss = 0 nb_tr_steps = 0 logger.info("***** Report result *****") logger.info(" %s = %s", 'global_step', str(global_step)) logger.info(" %s = %s", 'train loss', str(train_loss)) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(testset.ids)) logger.info(" Batch size = %d", self.eval_batchsize) scores = [] labels = [] ids = testset.ids model.eval() for iter, (bg, label) in enumerate(test_dataloader): with torch.no_grad(): logits = model(bg).detach().cpu().numpy() label = label.detach().cpu().numpy() scores.append(logits) labels.append(label) scores = np.concatenate(scores, 0) labels = np.concatenate(labels, 0) model.train() assert len(ids) == len(scores) == len(labels) eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ids, scores, labels) print('eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP', eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1) result = { 'eval_MRR': eval_DOUBAN_MRR, 'eval_MAP': eval_DOUBAN_MAP, 'eval_Precision1': eval_Precision1, 'global_step': global_step, 'loss': train_loss } output_eval_file = os.path.join(self.output_dir, "eval_results.txt") with open(output_eval_file, "a") as writer: for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write('*' * 80) writer.write('\n') if eval_DOUBAN_MRR > best_MRR: print("=" * 80) print("Best MRR", eval_DOUBAN_MRR) print("Saving Model......") best_MRR = eval_DOUBAN_MRR model_to_save = model.module if hasattr( model, 'module') else model output_model_file = os.path.join(self.output_dir, "pytorch_model.bin") torch.save(model_to_save.state_dict(), output_model_file) print("=" * 80) else: print("=" * 80)
def test_eval(self): data = DATADOUBAN(debug=False, data_dir=self.data_dir) test_examples = data.read_examples( os.path.join(self.data_dir, 'test.csv')) print('eval_examples的数量', len(test_examples)) questions = [x.text_a for x in test_examples] ID = [x.guid for x in test_examples] test_features = data.convert_examples_to_features( test_examples, self.tokenizer, self.max_seq_length) all_input_ids = torch.tensor(data.select_field(test_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(data.select_field( test_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(data.select_field( test_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in test_features], dtype=torch.long) test_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) # Run prediction for full data test_sampler = SequentialSampler(test_data) test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=self.eval_batch_size) config = BertConfig.from_pretrained(self.model_name_or_path, num_labels=self.num_labels) model = BertForSequenceClassification.from_pretrained(os.path.join( self.output_dir, "pytorch_model.bin"), self.args, config=config) model.to(self.device) model.eval() inference_labels = [] gold_labels = [] scores = [] for input_ids, input_mask, segment_ids, label_ids in test_dataloader: input_ids = input_ids.to(self.device) input_mask = input_mask.to(self.device) segment_ids = segment_ids.to(self.device) label_ids = label_ids.to(self.device) with torch.no_grad(): logits = model( input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask).detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() scores.append(logits) inference_labels.append(np.argmax(logits, axis=1)) gold_labels.append(label_ids) gold_labels = np.concatenate(gold_labels, 0) scores = np.concatenate(scores, 0) logits = np.concatenate(inference_labels, 0) # 计算评价指标 assert len(ID) == scores.shape[0] == scores.shape[0] # eval_accuracy = accuracyCQA(inference_logits, gold_labels) # eval_mrr = compute_MRR_CQA(scores, gold_labels, questions) eval_5R20 = compute_5R20(scores, gold_labels, questions) # print(type(ID), type(scores), type(gold_labels)) # exit() eval_DOUBAN_MRR, eval_DOUBAN_mrr, eval_DOUBAN_MAP, eval_Precision1 = compute_DOUBAN( ID, scores, gold_labels) # print('eval_mrr',eval_mrr) print('eval_MRR', eval_DOUBAN_MRR, eval_DOUBAN_mrr, 'eval_MAP', eval_DOUBAN_MAP, 'eval_Precision1', eval_Precision1) print('eval_5R20', eval_5R20)