def post_processing_function(examples, features, predictions, stage="eval"): # Post-processing: we match the start logits and end logits to answers in the original context. predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, version_2_with_negative=args.version_2_with_negative, n_best_size=args.n_best_size, max_answer_length=args.max_answer_length, null_score_diff_threshold=args.null_score_diff_threshold, output_dir=args.output_dir, prefix=stage, ) # Format the result to the format the metric expects. if args.version_2_with_negative: formatted_predictions = [{ "id": k, "prediction_text": v, "no_answer_probability": 0.0 } for k, v in predictions.items()] else: formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in predictions.items()] references = [{ "id": ex["id"], "answers": ex[answer_column_name] } for ex in examples] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def post_processing_function(examples, features, predictions): # Post-processing: we match the start logits and end logits to answers in the original context. predictions, scores_diff_json = postprocess_qa_predictions_with_beam_search( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, start_n_top=model.config.start_n_top, end_n_top=model.config.end_n_top, output_dir=training_args.output_dir, is_world_process_zero=trainer.is_world_process_zero(), ) # Format the result to the format the metric expects. if data_args.version_2_with_negative: formatted_predictions = [{ "id": k, "prediction_text": v, "no_answer_probability": scores_diff_json[k] } for k, v in predictions.items()] else: formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in predictions.items()] references = [{ "id": ex["id"], "answers": ex[answer_column_name] } for ex in datasets["validation"]] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def _post_processing_function(self, examples, features, predictions, output_dir): # Post-processing: we match the start logits and end logits to answers in the original context. data_args = self.data_args training_args = self.training_args predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, null_score_diff_threshold=data_args.null_score_diff_threshold, output_dir=output_dir, is_world_process_zero=self.trainer.is_world_process_zero(), ) # Format the result to the format the metric expects. if data_args.version_2_with_negative: formatted_predictions = [{ "id": k, "prediction_text": v, "no_answer_probability": 0.0 } for k, v in predictions.items()] else: formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in predictions.items()] references = [{ "id": ex["id"], "answers": ex[self.answer_column_name] } for ex in self.datasets["validation"]] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def _post_processing_function(self, examples, features, predictions, training_args): pred_results = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, training_args=training_args, topk=self.args.retriever.topk, max_answer_length=self.args.data.max_answer_length, output_dir=training_args.output_dir, prefix="test" if self.args.train.do_predict else "valid", ) for k in pred_results.keys(): assert k in ["predictions", "pororo_predictions"] formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in pred_results["predictions"].items()] if training_args.do_predict: return formatted_predictions if not training_args.do_eval: # do_predict, do_eval 모두 False일 때 여기까지 오면 이상한 거 raise KeyError("run evaluate with do_predict or do_eval") references = [{ "id": ex["id"], "answers": ex[self.answer_column_name] } for ex in self.eval_answers] valid_results = {} valid_results["predictions"] = EvalPrediction( predictions=formatted_predictions, label_ids=references) if training_args.pororo_prediction: formatted_pororo_predictions = [{ "id": k, "prediction_text": v } for k, v in pred_results["pororo_predictions"].items()] valid_results["pororo_predictions"] = EvalPrediction( predictions=formatted_pororo_predictions, label_ids=references) return valid_results
def self_test(): pred = EvalPrediction( label_ids=np.array([[-100, 1, -100], [2, -100, -100], [-100, -100, 3], [-100, -100, 4]]), predictions=np.array([ [-100, 1, -100], # 1 true positive [2, -100, -100], # 1 true positive [2, 6, 8], # 1 false positive, irrelevant pos will be ignored [1, 7, 4] # 1 true positive, irrelevant pos will be ignored ])) m = compute_metrics(pred) print(f"recall={m['recall']}") assert m['recall'] == 0.75 print("Looks like it is working!")
def post_processing_function_for_eval(examples, features, predictions, train_args): # Post-processing: we match the start logits and end logits to answers in the original context. predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, max_answer_length=token_args.max_answer_length, output_dir=train_args.output_dir, ) # Format the result to the format the metric expects. formatted_predictions = [ {"id": k, "prediction_text": v} for k, v in predictions.items() ] references = [ {"id": ex["id"], "answers": ex[answer_column_name]} for ex in datasets["validation"] ] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def post_proc(self, xs, features, preds, stage="eval"): ps = self.params ys = proc_tests( examples=xs, features=features, predictions=preds, version_2_with_negative=ps.version_2_with_negative, n_best_size=ps.n_best_size, max_answer_length=ps.max_answer_length, null_score_diff_threshold=ps.null_score_diff_threshold, out_dir=ps.out_dir, prefix=stage, ) if ps.version_2_with_negative: ys = [ {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in ys.items() ] else: ys = [{"id": k, "prediction_text": v} for k, v in ys.items()] ids = [{"id": x["id"], "answers": x[self.cols[EACH][2]]} for x in xs] return EvalPrediction(predictions=ys, label_ids=ids)
def post_processing_function(examples, features, predictions, text_data, data_args, training_args): '''Model의 Prediction을 Text 형태로 변환하는 함수''' predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, max_answer_length=data_args.max_answer_length, output_dir=training_args.output_dir, ) formatted_predictions = [{ "id": k, "prediction_text": last_processing(v) } for k, v in predictions.items()] if training_args.do_predict: return formatted_predictions references = [{ "id": ex["id"], "answers": ex["answers"] } for ex in text_data["validation"]] return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def post_proc(self, xs, features, preds, stage="eval"): ps = self.params ys, diff = proc_preds( examples=xs, features=features, predictions=preds, version_2_with_negative=ps.version_2_with_negative, n_best_size=ps.n_best_size, max_answer_length=ps.max_answer_length, start_n_top=self.model.config.start_n_top, end_n_top=self.model.config.end_n_top, out_dir=ps.out_dir, prefix=stage, ) if ps.version_2_with_negative: ys = [{ "id": k, "prediction_text": v, "no_answer_probability": diff[k] } for k, v in ys.items()] else: ys = [{"id": k, "prediction_text": v} for k, v in ys.items()] ids = [{"id": x["id"], "answers": x[self.cols[EACH][2]]} for x in xs] return EvalPrediction(predictions=ys, label_ids=ids)
def prediction_loop(self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None, extract_path: Optional[str] = None, cache_path: Optional[str] = None) -> PredictionOutput: """ Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`. Works both with or without labels. """ prediction_loss_only = (prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only) model = self.model # multi-gpu eval if self.args.n_gpu > 1: model = torch.nn.DataParallel(model) else: model = self.model # Note: in torch.distributed mode, there's no point in wrapping the model # inside a DistributedDataParallel as we'll be under `no_grad` anyways. batch_size = dataloader.batch_size eval_losses: List[float] = [] hidden_states: torch.tensor = None preds: torch.Tensor = None label_ids: torch.Tensor = None model.eval() if self.args.past_index >= 0: self._past = None # Unfortunate, but we'll run through the dataloader once to count the number of tokens (or this could be pre-processed) if extract_path is not None: stimulus_mask = lambda tokens: (tokens != 101) & (tokens != 102 ) & (tokens != 0) cached_masks = None if osp.exists(f"{cache_path}.npy"): # np instead of torch, something's funky with Vivek's env. cached_masks = torch.from_numpy(np.load(f"{cache_path}.npy")) else: all_masks = None limit_tokens = self.custom_cfg.TASK.EXTRACT_TOKENS_LIMIT # Calculate the random ratio of tokens to grab (we specify number of tokens to extract) total_tokens = 0 for inputs in dataloader: tokens = inputs["input_ids"] total_tokens += stimulus_mask(tokens).sum() subset_ratio = torch.true_divide(limit_tokens, total_tokens) # Seed, we want to be sure that we're finding the same stimuli disable_tqdm = not self.is_local_process_zero( ) or self.args.disable_tqdm samples_count = 0 for inputs in tqdm(dataloader, desc=description, disable=disable_tqdm): loss, logits, labels, states = self.prediction_step( model, inputs, prediction_loss_only, output_hidden_states=extract_path is not None) batch_size = inputs[list(inputs.keys())[0]].shape[0] if loss is not None: eval_losses.append(loss * batch_size) if states is not None: # L + 1 [ Batch x Length x Hidden ] (layers and embedding) if cached_masks is not None: cached_masks = cached_masks.to(logits.device) mask = cached_masks[samples_count:samples_count + inputs["input_ids"].shape[0]] # B x T mask = mask[:, :inputs["input_ids"]. shape[1]] # Dynamic padding else: subset_mask = torch.full(inputs["input_ids"].shape, subset_ratio, device=logits.device) mask = (torch.bernoulli(subset_mask).long() & stimulus_mask(inputs["input_ids"])).bool() # B X T if all_masks is None: all_masks = mask else: all_masks = nested_concat(all_masks, mask, padding_index=-100) # B x T # [1:] to drop embedding layer states = torch.stack(states)[1:].permute(1, 2, 0, 3) # B x T x L x H target_tokens = states[mask] # M x L x H if hidden_states is None: hidden_states = target_tokens else: hidden_states = torch.cat([hidden_states, target_tokens], dim=0) samples_count += batch_size if logits is not None: preds = logits if preds is None else nested_concat( preds, logits, padding_index=-100) if labels is not None: label_ids = labels if label_ids is None else nested_concat( label_ids, labels, padding_index=-100) if extract_path is not None: os.makedirs(osp.split(extract_path)[0], exist_ok=True) np.save(extract_path, hidden_states.half().cpu().numpy()) # half to save memory if cached_masks is None: os.makedirs(osp.split(cache_path)[0], exist_ok=True) np.save(cache_path, all_masks.cpu().numpy()) if self.args.past_index and hasattr(self, "_past"): # Clean the state at the end of the evaluation loop delattr(self, "_past") if self.args.local_rank != -1: # In distributed mode, concatenate all results from all nodes: if preds is not None: preds = self.distributed_concat( preds, num_total_examples=self.num_examples(dataloader)) if label_ids is not None: label_ids = self.distributed_concat( label_ids, num_total_examples=self.num_examples(dataloader)) # Finally, turn the aggregated tensors into numpy arrays. if preds is not None: preds = preds.cpu().numpy() if label_ids is not None: label_ids = label_ids.cpu().numpy() if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics( EvalPrediction(predictions=preds, label_ids=label_ids)) else: metrics = {} if len(eval_losses) > 0: metrics["eval_loss"] = np.sum(eval_losses) / samples_count # Prefix all keys with eval_ for key in list(metrics.keys()): if not key.startswith("eval_"): metrics[f"eval_{key}"] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
def main(): #_use_cuda() parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) model_args, data_args, training_args = parser.parse_args_into_dataclasses() config = AutoConfig.from_pretrained( model_args.config_name if model_args.config_name else model_args.model_name_or_path, num_labels=3, ) # Set seed set_seed(training_args.seed) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, ) model = AutoModelForMultitaskSequenceClassification.from_pretrained( model_args.model_name_or_path, config=config, ) # print(model.state_dict()) # Fetch Datasets train_set = SarcArgDataset(_load_data(data_args), tokenizer) if training_args.do_train else None eval_dataset = SarcArgDataset(_load_data(data_args, evaluate=True), tokenizer) if training_args.do_eval else None def compute_metrics(p: EvalPrediction) -> Dict: preds = np.argmax(p.predictions, axis=1) return f1(preds, p.label_ids) trainer = Trainer( model=model, args=training_args, train_dataset=train_set, eval_dataset=eval_dataset, compute_metrics=compute_metrics ) # Training if training_args.do_train: trainer.train( model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None ) trainer.save_model() tokenizer.save_pretrained(training_args.output_dir) # Evaluation results = {} if training_args.do_eval and training_args.local_rank in [-1, 0]: logger.info("*** Evaluate ***") eval_datasets = [eval_dataset] for eval_dataset in eval_datasets: result_set = trainer.evaluate(eval_dataset=eval_dataset) result = result_set[0].metrics output_eval_file = os.path.join( training_args.output_dir, f"eval_results_.txt" ) with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) results.update(result) preds_t1, label_ids_t1 = result_set[0].predictions, result_set[0].label_ids preds_t2, label_ids_t2 = result_set[1].predictions, result_set[1].label_ids preds_t1, labels_t1 = store_preds(EvalPrediction(predictions=preds_t1, label_ids=label_ids_t1)) preds_t2, labels_t2 = store_preds(EvalPrediction(predictions=preds_t2, label_ids=label_ids_t2)) data = _load_data(data_args, evaluate=True) context, reply = [], [] for example in data: ctx, rpl = example.split('\t')[0:2] context.append(ctx) reply.append(rpl) output_score_file_t1 = os.path.join( training_args.output_dir, f"eval_preds_t1.txt" ) output_score_file_t2 = os.path.join( training_args.output_dir, f"eval_preds_t2.txt" ) with open(output_score_file_t1, "w") as writer: for i in range(len(context)): writer.write("%s\t%s\t%s\t%s\n" % (context[i], reply[i], labels_t1[i], preds_t1[i])) with open(output_score_file_t2, "w") as writer: for i in range(len(context)): writer.write("%s\t%s\t%s\t%s\n" % (context[i], reply[i], labels_t2[i], preds_t2[i])) return results
def _prediction_loop( self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None) -> PredictionOutput: """ Prediction/evaluation loop, shared by `evaluate()` and `predict()`. Works both with or without labels. """ prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only model = self.model # multi-gpu eval if self.args.n_gpu > 1: model = torch.nn.DataParallel(model) else: model = self.model # Note: in torch.distributed mode, there's no point in wrapping the model # inside a DistributedDataParallel as we'll be under `no_grad` anyways. batch_size = dataloader.batch_size LOG.info("***** Running %s *****", description) LOG.info(" Num examples = %d", self.num_examples(dataloader)) LOG.info(" Batch size = %d", batch_size) eval_losses: List[float] = [] preds: torch.Tensor = None label_ids: torch.Tensor = None model.eval() # if is_torch_tpu_available(): # dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device) if self.args.past_index >= 0: past = None for inputs in tqdm(dataloader, desc=description): has_labels = any( inputs.get(k) is not None for k in ["labels", "lm_labels", "masked_lm_labels"]) for k, v in inputs.items(): if isinstance(v, torch.Tensor): inputs[k] = v.to(self.args.device) if self.args.past_index >= 0: inputs["mems"] = past with torch.no_grad(): # if self.args.predict_from_generate: if True: max_length = model.config.max_length logits_out = model.generate( inputs["input_ids"], attention_mask=inputs["attention_mask"]) # in case the batch is shorter then max length, the output should be padded logits = model.config.eos_token_id * torch.ones( (logits_out.shape[0], max_length), dtype=logits_out.dtype, device=logits_out.device) logits[:, :logits_out.shape[-1]] = logits_out if has_labels: outputs = model(**inputs) step_eval_loss = outputs[0] eval_losses += [step_eval_loss.mean().item()] else: outputs = model(**inputs) if has_labels: step_eval_loss, logits = outputs[:2] eval_losses += [step_eval_loss.mean().item()] else: logits = outputs[0] if self.args.past_index >= 0: past = outputs[self.args.past_index if has_labels else self.args.past_index - 1] if not prediction_loss_only: if preds is None: preds = logits.detach() else: preds = torch.cat((preds, logits.detach()), dim=0) if inputs.get("labels") is not None: if label_ids is None: label_ids = inputs["labels"].detach() else: label_ids = torch.cat( (label_ids, inputs["labels"].detach()), dim=0) if self.args.local_rank != -1: # In distributed mode, concatenate all results from all nodes: if preds is not None: preds = self.distributed_concat( preds, num_total_examples=self.num_examples(dataloader)) if label_ids is not None: label_ids = self.distributed_concat( label_ids, num_total_examples=self.num_examples(dataloader)) # elif is_torch_tpu_available(): # # tpu-comment: Get all predictions and labels from all worker shards of eval dataset # if preds is not None: # preds = xm.mesh_reduce("eval_preds", preds, torch.cat) # if label_ids is not None: # label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat) # Finally, turn the aggregated tensors into numpy arrays. if preds is not None: preds = preds.cpu().numpy() if label_ids is not None: label_ids = label_ids.cpu().numpy() if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics( EvalPrediction(predictions=preds, label_ids=label_ids)) else: metrics = {} if len(eval_losses) > 0: metrics["eval_loss"] = np.mean(eval_losses) # Prefix all keys with eval_ for key in list(metrics.keys()): if not key.startswith("eval_"): metrics[f"eval_{key}"] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
def train_session(self, model_tts: ForwardTacotron, model_asr: Wav2Vec2ForCTC, optimizer_tts: Optimizer, tts_session: ForwardSession, asr_session: ASRSession, asr_trainer, optimizer_asr) -> None: # print(tts_session.path) # exit() asr_trainer_state = {'logs': []} current_step = model_tts.get_step() tts_training_steps = tts_session.max_step - current_step try: _, asr_current_step = get_last_checkpoint( './checkpoints/sme_speech_tts.asr_forward/', 'model_at') asr_training_steps = tts_session.max_step - asr_current_step except: asr_current_step = 0 asr_training_steps = tts_training_steps total_iters = len(tts_session.train_set) epochs = tts_training_steps // total_iters + 1 simple_table([ ('TTS Steps', str(tts_training_steps // 1000) + 'k Steps'), ('ASR Steps', str(asr_training_steps // 1000) + 'k Steps'), ('Batch Size TTS', tts_session.bs), ('Learning Rate', tts_session.lr) ]) for g in optimizer_tts.param_groups: g['lr'] = tts_session.lr m_loss_avg = Averager() dur_loss_avg = Averager() duration_avg = Averager() device = next(model_tts.parameters() ).device # use same device as model parameters warnings.filterwarnings('ignore', category=UserWarning) for e in range(1, epochs + 1): #tts train loop for epoch for i, (x, m, ids, x_lens, mel_lens, dur) in enumerate(tts_session.train_set, 1): start = time.time() model_tts.train() x, m, dur, x_lens, mel_lens = x.to(device), m.to(device), dur.to(device),\ x_lens.to(device), mel_lens.to(device) m1_hat, m2_hat, dur_hat = model_tts(x, m, dur, mel_lens) m1_loss = self.l1_loss(m1_hat, m, mel_lens) m2_loss = self.l1_loss(m2_hat, m, mel_lens) dur_loss = self.l1_loss(dur_hat.unsqueeze(1), dur.unsqueeze(1), x_lens) tts_s_loss = m1_loss + m2_loss + 0.1 * dur_loss optimizer_tts.zero_grad() # tts_s_loss.backward() torch.nn.utils.clip_grad_norm_(model_tts.parameters(), hp.tts_clip_grad_norm) # optimizer_tts.step() m_loss_avg.add(m1_loss.item() + m2_loss.item()) dur_loss_avg.add(dur_loss.item()) step = model_tts.get_step() k = step // 1000 duration_avg.add(time.time() - start) # pitch_loss_avg.add(pitch_loss.item()) speed = 1. / duration_avg.get() msg_tts = f'| TTS MODEL (supervised training ): '\ f'| Epoch: {e}/{epochs} ({i}/{total_iters}) | Mel Loss: {m_loss_avg.get():#.4} ' \ f'| Dur Loss: {dur_loss_avg.get():#.4} ' \ f'| {speed:#.2} steps/s | Step: {k}k | ' if step % hp.forward_checkpoint_every == 0: ckpt_name = f'forward_step{k}K' save_checkpoint('forward', self.paths, model_tts, optimizer_tts, name=ckpt_name, is_silent=True) if step % hp.forward_plot_every == 0: self.generate_plots(model_tts, tts_session) self.writer.add_scalar('Mel_Loss/train', m1_loss + m2_loss, model_tts.get_step()) self.writer.add_scalar('Duration_Loss/train', dur_loss, model_tts.get_step()) self.writer.add_scalar('Params/batch_size', tts_session.bs, model_tts.get_step()) self.writer.add_scalar('Params/learning_rate', tts_session.lr, model_tts.get_step()) stream(msg_tts) # print(msg_tts) # print(torch.cuda.memory_allocated(device=device)) # model_tts = model_tts.to('cpu') for step, inputs in enumerate(asr_session.train_set): optimizer_asr.zero_grad() model_asr.to(device) for k, v in inputs.items(): if isinstance(v, torch.Tensor): inputs[k] = v.to(device) model_asr.train() outputs = model_asr(**inputs) asr_s_loss = outputs["loss"] if isinstance( outputs, dict) else outputs[0] # asr_s_loss = asr_s_loss.mean() msg_asr = f'| ASR MODEL (supervised training) : '\ f'| Epoch: {e}/{epochs} ({step}/{len(asr_session.train_set)}) | Loss ASR: {asr_s_loss:#.4} '\ f' ||||||||||||||||||||||' stream(msg_asr) # # model_asr.to('cuda') m_val_loss, dur_val_loss = self.evaluate(model_tts, tts_session.val_set) eval_tts_msg = f'| TTS MODEL (supervised eval ): '\ f'| Epoch: {e}/{epochs} | Val Loss: {m_val_loss:#.4} ' \ f'| Dur Val Loss: {dur_val_loss:#.4} ' \ stream(eval_tts_msg) tts_eval_loss = m_val_loss + dur_val_loss # print(eval_tts_msg) # ASR eval supervised print('\nEvaluating ASR model ...') # model_asr.to('cpu') asr_eval_loss = 0 eval_wer = 0 for step, inputs in enumerate(asr_session.test_set): asr_eval_loss_i, logits_a, labels_a = asr_trainer.prediction_step( model_asr, inputs, False) asr_eval_loss += asr_eval_loss_i logits_a.to('cpu') eval_wer_i = asr_trainer.compute_metrics( EvalPrediction(predictions=logits_a, label_ids=labels_a)) eval_wer += eval_wer_i['wer'] # print(eval_wer) eval_wer = eval_wer / step asr_eval_loss = asr_eval_loss / step msg_asr_eval = f'| ASR MODEL (supervised eval) : Epoch {e}/{epochs} | Loss ASR: {asr_eval_loss:#.4} | WER: {eval_wer} |||||||||||||||||||||||||||||||||||||||||||||||||||||' stream(msg_asr_eval) # dual transformation loop # tts_s_loss = 3 # asr_s_loss = 1 tts_u_loss, asr_u_loss = self.dual_transform( model_tts, model_asr, optimizer_tts, optimizer_asr, asr_session.test_set, m_loss_avg, dur_loss_avg, device, asr_current_step, e, epochs, duration_avg, total_iters, tts_s_loss, asr_s_loss, tts_session.lr, tts_session.path) step += 1 asr_path = f'checkpoint-27364' modelasr_folder = './checkpoints/sme_speech_tts.asr_forward/' new_check = modelasr_folder + asr_path os.makedirs(new_check, exist_ok=True) # asr_path, asr_step = get_last_checkpoint(modelasr_folder, modelasr_name) save_checkpoint('forward', self.paths, model_tts, optimizer_tts, is_silent=True) # asr_u_loss = 2 if "logs" not in asr_trainer_state: asr_trainer_state['logs'] = [] asr_trainer_state['logs'].append({ 'step': step, 'epoch': e, 'asr_s_loss': int(asr_s_loss), 'asr_u_loss': int(asr_u_loss), 'tts_s_loss': int(tts_s_loss), 'tts_u_loss': int(tts_u_loss), 'tts_eval_loss': int(tts_eval_loss), 'asr_eval_loss': int(asr_eval_loss), 'eval_wer': eval_wer }) with open(f'{modelasr_folder+ asr_path}/dt_trainer_state.json', 'w') as f: json.dump(asr_trainer_state, f) model_asr.save_pretrained(f'{new_check}') torch.save(optimizer_asr.state_dict(), f'{new_check}/optimizer.pt') print("Exiting due to cuda OOM!") exit(11)
def _prediction_loop( self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None ) -> PredictionOutput: """ Prediction/evaluation loop, shared by `evaluate()` and `predict()`. Works both with or without labels. """ prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only # multi-gpu eval if self.args.n_gpu > 1 and not isinstance(self.model, torch.nn.DataParallel): model = torch.nn.DataParallel(self.model) else: model = self.model model.to(self.args.device) logger.info("***** Running %s *****", description) logger.info(" Num examples = %d", len(dataloader.dataset)) logger.info(" Batch size = %d", dataloader.batch_size) eval_losses: List[float] = [] preds: np.ndarray = None label_ids: np.ndarray = None model.eval() for inputs in tqdm(dataloader, desc=description): has_labels = any(inputs.get(k) is not None for k in ["labels", "masked_lm_labels"]) for k, v in inputs.items(): inputs[k] = v.to(self.args.device) with torch.no_grad(): outputs = model(**inputs) if has_labels: step_eval_loss, logits = outputs[:2] eval_losses += [step_eval_loss.mean().item()] else: logits = outputs[0] if not prediction_loss_only: if self.args.classify_or_insertion == 'classify': padding_max_length = self.model.config.max_position_embeddings elif self.args.classify_or_insertion == 'insertion': padding_max_length = self.model.encoder.config.max_position_embeddings mode = self.args.classify_or_insertion if preds is None: preds = self.padding_ndarray( ndarray=self.convert_to_np_array(logits, description=mode), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) else: tmp_preds = self.padding_ndarray( ndarray=self.convert_to_np_array(logits, description=mode), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) preds = np.append(preds, tmp_preds, axis=0) if inputs.get("labels") is not None: if label_ids is None: label_ids = self.padding_ndarray( ndarray=inputs["labels"].detach().cpu().numpy(), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) else: tmp_label_ids = self.padding_ndarray( ndarray=inputs["labels"].detach().cpu().numpy(), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) label_ids = np.append(label_ids, tmp_label_ids, axis=0) elif inputs.get("masked_lm_labels") is not None: if label_ids is None: label_ids = self.padding_ndarray( ndarray=inputs["masked_lm_labels"].detach().cpu().numpy(), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) else: tmp_label_ids = self.padding_ndarray( ndarray=inputs["masked_lm_labels"].detach().cpu().numpy(), padding_max_length=padding_max_length, axis=1, padding_id=-100 ) label_ids = np.append(label_ids, tmp_label_ids, axis=0) if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics(self.args, EvalPrediction(predictions=preds, label_ids=label_ids)) else: metrics = {} if len(eval_losses) > 0: metrics["loss"] = np.mean(eval_losses) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
def post_processing_function(examples, features, predictions, stage="eval"): # Post-processing: we match the start logits and end logits to # answers in the original context. if data_args.beam_search: predictions, scores_diff_json = \ postprocess_qa_predictions_with_beam_search( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, start_n_top=model.config.start_n_top, end_n_top=model.config.end_n_top, output_dir=training_args.output_dir, # log_level=log_level, prefix=stage, ) else: predictions = postprocess_qa_predictions( examples=examples, features=features, predictions=predictions, version_2_with_negative=data_args.version_2_with_negative, n_best_size=data_args.n_best_size, max_answer_length=data_args.max_answer_length, output_dir=training_args.output_dir, prefix=stage, ) if data_args.version_2_with_negative: if data_args.beam_search: formatted_predictions = [ { "id": k, "prediction_text": v, "no_answer_probability": scores_diff_json[k] } # noqa E501 for k, v in predictions.items() ] else: formatted_predictions = [ { "id": k, "prediction_text": v, "no_answer_probability": 0.0 } for k, v in predictions.items() # noqa E501 ] else: formatted_predictions = [{ "id": k, "prediction_text": v } for k, v in predictions.items()] # noqa E501 references = [{ "id": ex["id"], "answers": ex[answer_column_name] } for ex in examples] # noqa E501 return EvalPrediction(predictions=formatted_predictions, label_ids=references)
def prediction_loop( self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None, use_tqdm: Optional[bool] = True, reduce_other_outputs:Callable[[Tuple[torch.tensor]], Any] = None, ) -> PredictionOutput: """ Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`. Works both with or without labels. """ if hasattr(self, "_prediction_loop"): warnings.warn( "The `_prediction_loop` method is deprecated and won't be called in a future version, define `prediction_loop` in your subclass.", FutureWarning, ) return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only) prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only model = self.model # multi-gpu eval if self.args.n_gpu > 1: model = torch.nn.DataParallel(model) else: model = self.model # Note: in torch.distributed mode, there's no point in wrapping the model # inside a DistributedDataParallel as we'll be under `no_grad` anyways. batch_size = dataloader.batch_size logger.info("***** Running %s *****", description) logger.info(" Num examples = %d", self.num_examples(dataloader)) logger.info(" Batch size = %d", batch_size) eval_losses: List[float] = [] preds: torch.Tensor = None label_ids: torch.Tensor = None other_outputs: Tuple[torch.Tensor] = None model.eval() if is_torch_tpu_available(): dataloader = pl.ParallelLoader(dataloader, [self.args.device]).per_device_loader(self.args.device) if self.args.past_index >= 0: self._past = None data_iterators = tqdm(dataloader, desc=description) if use_tqdm else dataloader reduce_other_outputs = reduce_other_outputs if reduce_other_outputs is not None else self.reduce_other_outputs for inputs in data_iterators: loss, logits, labels, other_outputs_ = self.prediction_step(model, inputs, prediction_loss_only) if loss is not None: eval_losses.append(loss) if logits is not None: preds = logits if preds is None else torch.cat((preds, logits), dim=0) if labels is not None: label_ids = labels if label_ids is None else torch.cat((label_ids, labels), dim=0) if other_outputs_ is not None: #print(list(o.size() for o in other_outputs_)) #if other_outputs is not None: # print(list(o.size() for o in others)) if reduce_other_outputs is not None: other_outputs = other_outputs_ if other_outputs is None else tuple( reduce_other_outputs(output, output_) for output, output_ in zip(other_outputs, other_outputs_) ) # 不加tuple只用()就是generator if self.args.past_index and hasattr(self, "_past"): # Clean the state at the end of the evaluation loop delattr(self, "_past") if self.args.local_rank != -1: # In distributed mode, concatenate all results from all nodes: if preds is not None: preds = self.distributed_concat(preds, num_total_examples=self.num_examples(dataloader)) if label_ids is not None: label_ids = self.distributed_concat(label_ids, num_total_examples=self.num_examples(dataloader)) #if other_outputs is not None: # [TODO] maybe error!!! 不熟悉distributed训练 # other_outputs = tuple(self.distributed_concat(o, num_total_examples=self.num_examples(dataloader)) for o in other_outputs) elif is_torch_tpu_available(): # tpu-comment: Get all predictions and labels from all worker shards of eval dataset if preds is not None: preds = xm.mesh_reduce("eval_preds", preds, torch.cat) if label_ids is not None: label_ids = xm.mesh_reduce("eval_label_ids", label_ids, torch.cat) #if others is not None: # [TODO] maybe error!!! 不熟悉tpu训练, 这里就不考虑了 #others = tuple(xm.mesh_reduce("eval_label_ids", label_ids, torch.cat) # Finally, turn the aggregated tensors into numpy arrays. if preds is not None: preds = preds.cpu().numpy() if label_ids is not None: label_ids = label_ids.cpu().numpy() if other_outputs is not None: other_outputs = other_outputs # 假设一切都在self.reduce_other_outputs中处理好了 if self.compute_metrics is not None and preds is not None and label_ids is not None: metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids)) else: metrics = {} if len(eval_losses) > 0: metrics[f"{description}_loss"] = np.mean(eval_losses) # Prefix all keys with eval_ #for key in list(metrics.keys()): # if not key.startswith("eval_"): # metrics[f"eval_{key}"] = metrics.pop(key) for key in list(metrics.keys()): if not key.startswith(description): tqdm_prefix = "" new_key = key if key[0] == "_": tqdm_prefix = "_" new_key = key[1:] metrics[tqdm_prefix+description+"_"+new_key] = metrics.pop(key) return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics, other_outputs=other_outputs)