def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args): device = torch.device(args.device) for data_blob in logger.log_every(train_loader): optimizer.zero_grad() image1, image2, flow_gt, valid_flow_mask = (x.to(device) for x in data_blob) flow_predictions = model(image1, image2, num_flow_updates=args.num_flow_updates) loss = utils.sequence_loss(flow_predictions, flow_gt, valid_flow_mask, args.gamma) metrics, _ = utils.compute_metrics(flow_predictions[-1], flow_gt, valid_flow_mask) metrics.pop("f1") logger.update(loss=loss, **metrics) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) optimizer.step() scheduler.step()
def evaluate(self, mode): # We use test dataset because semeval doesn't have dev dataset if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'e1_mask': batch[4], 'e2_mask': batch[5]} outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = { "loss": eval_loss } preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results
def inner_loop(blob): if blob[0].dim() == 3: # input is not batched so we add an extra dim for consistency blob = [x[None, :, :, :] if x is not None else None for x in blob] image1, image2, flow_gt = blob[:3] valid_flow_mask = None if len(blob) == 3 else blob[-1] image1, image2 = image1.to(device), image2.to(device) padder = utils.InputPadder(image1.shape, mode=padder_mode) image1, image2 = padder.pad(image1, image2) flow_predictions = model(image1, image2, num_flow_updates=num_flow_updates) flow_pred = flow_predictions[-1] flow_pred = padder.unpad(flow_pred).cpu() metrics, num_pixels_tot = utils.compute_metrics( flow_pred, flow_gt, valid_flow_mask) # We compute per-pixel epe (epe) and per-image epe (called f1-epe in RAFT paper). # per-pixel epe: average epe of all pixels of all images # per-image epe: average epe on each image independently, then average over images for name in ("epe", "1px", "3px", "5px", "f1"): # f1 is called f1-all in paper logger.meters[name].update(metrics[name], n=num_pixels_tot) logger.meters["per_image_epe"].update(metrics["epe"], n=batch_size)
def evaluate_tfidf(index, tokenized_candidates, tfidf_corpus, tokenized_names): metrics = [] for i, example in tqdm(enumerate(tfidf_corpus)): top_5_idx = np.argsort(index.get_similarities(example))[-1:-5:-1] candidates = [tokenized_candidates[j] for j in top_5_idx] metrics.append(compute_metrics(tokenized_names[i], candidates)) return pd.DataFrame(metrics)
def evaluate(args, model, eval_dataset): batch_size = args.batch_size eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=batch_size) logger.info("***** Running normal evaluation *****") logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", batch_size) eval_loss = 0. eval_steps = 0 preds = None out_label_ids = None for batch in tqdm.tqdm(eval_dataloader): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): outputs = _predict(model, args.model_type, batch) tmp_eval_loss, logits = outputs[:2] eval_loss += np.mean(tmp_eval_loss.tolist()) eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = batch[3].detach().cpu().numpy() if args.model_type != 'char-cnn' else batch[1].detach().cpu().numpy() else: label_ids = batch[3].detach().cpu().numpy() if args.model_type != 'char-cnn' else batch[1].detach().cpu().numpy() preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) # (B, 2) out_label_ids = np.append(out_label_ids, label_ids, axis=0) # (B,) preds = np.argmax(preds, axis=1) acc = utils.compute_metrics(preds, out_label_ids) logger.info("eval result acc={:.4f} loss={:.2f}".format(acc, eval_loss / eval_steps)) return acc
def evaluate(self, dataset, mode="test"): # We use test dataset because semeval doesn't have dev dataset eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader( dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size ) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3], "e1_mask": batch[4], "e2_mask": batch[5], } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0 ) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) write_prediction( self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds ) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" {:15}: {:.4f}".format(key, results[key])) return results
def compute_test_metrics(self, X_test, y_test, models_trained): metrics = {} for name, model in models_trained: metrics[name] = compute_metrics(X_test, y_test, model) return metrics
def for_loop(net, data_loader, train_optimizer): is_train = train_optimizer is not None net.train() if is_train else net.eval() total_loss, total_time, total_num, preds, targets = 0.0, 0.0, 0, [], [] data_bar = tqdm(data_loader, dynamic_ncols=True) with (torch.enable_grad() if is_train else torch.no_grad()): for data, target, grad, boundary, name in data_bar: data, target, grad, boundary = data.cuda(), target.cuda(), grad.cuda(), boundary.cuda() torch.cuda.synchronize() start_time = time.time() seg, edge = net(data, grad) prediction = torch.argmax(seg.detach(), dim=1) torch.cuda.synchronize() end_time = time.time() semantic_loss = semantic_criterion(seg, target) edge_loss = edge_criterion(edge, target, boundary) task_loss = task_criterion(seg, edge, target) loss = semantic_loss + 20 * edge_loss + task_loss if is_train: train_optimizer.zero_grad() loss.backward() train_optimizer.step() total_num += data.size(0) total_time += end_time - start_time total_loss += loss.item() * data.size(0) preds.append(prediction.cpu()) targets.append(target.cpu()) if not is_train: if data_loader.dataset.split == 'test': # revert train id to regular id for key in sorted(trainId2label.keys(), reverse=True): prediction[prediction == key] = trainId2label[key].id # save pred images save_root = '{}/{}_{}_{}/{}'.format(save_path, backbone_type, crop_h, crop_w, data_loader.dataset.split) if not os.path.exists(save_root): os.makedirs(save_root) for pred_tensor, pred_name in zip(prediction, name): pred_img = ToPILImage()(pred_tensor.unsqueeze(dim=0).byte().cpu()) if data_loader.dataset.split == 'val': pred_img.putpalette(get_palette()) pred_name = pred_name.replace('leftImg8bit', 'color') path = '{}/{}'.format(save_root, pred_name) pred_img.save(path) data_bar.set_description('{} Epoch: [{}/{}] Loss: {:.4f} FPS: {:.0f}' .format(data_loader.dataset.split.capitalize(), epoch, epochs, total_loss / total_num, total_num / total_time)) # compute metrics preds = torch.cat(preds, dim=0) targets = torch.cat(targets, dim=0) pa, mpa, class_iou, category_iou = compute_metrics(preds, targets) print('{} Epoch: [{}/{}] PA: {:.2f}% mPA: {:.2f}% Class_mIOU: {:.2f}% Category_mIOU: {:.2f}%' .format(data_loader.dataset.split.capitalize(), epoch, epochs, pa * 100, mpa * 100, class_iou * 100, category_iou * 100)) return total_loss / total_num, pa * 100, mpa * 100, class_iou * 100, category_iou * 100
def main(args): data_config = load_config_from_json(args.data_config_path) model_config = load_config_from_json( os.path.join(args.saved_model_path, "config.jsonl")) # initialize model model = SFNet(model_config["sfnet"]) model = model.to(device) if not os.path.exists(args.saved_model_path): raise FileNotFoundError(args.saved_model_path) checkpoint = os.path.join(args.saved_model_path, args.checkpoint) model.load_state_dict(torch.load(checkpoint, map_location="cpu")) print("Model loaded from %s" % (args.saved_model_path)) # tracker to keep true labels and predicted probabilitites target_tracker = [] pred_tracker = [] print("Preparing test data ...") dataset = ModCloth(data_config, split="test") data_loader = DataLoader( dataset=dataset, batch_size=model_config["trainer"]["batch_size"], shuffle=False, ) print("Evaluating model on test data ...") model.eval() with torch.no_grad(): for iteration, batch in enumerate(data_loader): for k, v in batch.items(): if torch.is_tensor(v): batch[k] = to_var(v) # Forward pass _, pred_probs = model(batch) target_tracker.append(batch["fit"].cpu().numpy()) pred_tracker.append(pred_probs.cpu().data.numpy()) target_tracker = np.stack(target_tracker[:-1]).reshape(-1) pred_tracker = np.stack(pred_tracker[:-1], axis=0).reshape( -1, model_config["sfnet"]["num_targets"]) precision, recall, f1_score, accuracy, auc = compute_metrics( target_tracker, pred_tracker) print("-" * 50) print( "Metrics:\n Precision = {:.3f}\n Recall = {:.3f}\n F1-score = {:.3f}\n Accuracy = {:.3f}\n AUC = {:.3f}\n " .format(precision, recall, f1_score, accuracy, auc)) print("-" * 50)
def main(args): video = imageio.get_reader(args.video) n_frames = video.count_frames() fps = video.get_meta_data()['fps'] frame_w, frame_h = video.get_meta_data()['size'] model = load_model(args.model, compile=False) input_shape = model.input.shape[1:3] # default RoI if None in (args.rl, args.rt, args.rr, args.rb): side = min(frame_w, frame_h) args.rl = (frame_w - side) / 2 args.rt = (frame_h - side) / 2 args.rr = (frame_w + side) / 2 args.rb = (frame_h + side) / 2 crop = (args.rl, args.rt, args.rr, args.rb) def preprocess(frame): frame = Image.fromarray(frame) eye = frame.crop(crop) eye = ImageOps.grayscale(eye) eye = eye.resize(input_shape) return eye def predict(eye): eye = np.array(eye).astype(np.float32) / 255.0 eye = eye[None, :, :, None] return model.predict(eye) out_video = imageio.get_writer(args.output_video, fps=fps) cropped = map(preprocess, video) frames_and_predictions = map(lambda x: (x, predict(x)), cropped) with open(args.output_csv, 'w') as out_csv: print('frame,pupil-area,pupil-x,pupil-y,eye,blink', file=out_csv) for idx, (frame, predictions) in enumerate( tqdm(frames_and_predictions, total=n_frames)): pupil_map, tags = predictions is_eye, is_blink = tags.squeeze() (pupil_y, pupil_x), pupil_area = compute_metrics(pupil_map, thr=args.thr, nms=True) row = [idx, pupil_area, pupil_x, pupil_y, is_eye, is_blink] row = ','.join(list(map(str, row))) print(row, file=out_csv) img = draw_predictions(frame, predictions, thr=args.thr) img = np.array(img) out_video.append_data(img) out_video.close()
def main(): with open('config.json', 'r', encoding='utf-8') as f: args = AttrDict(json.load(f)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') tokenizer = load_tokenizer(args) checkpoints = sorted([ dir for dir in glob.glob(f'{args.save_model_dir}/*') if os.path.isdir(dir) ]) if not args.eval_all_ckpts: checkpoints = checkpoints[-1:] results = {} eval_preds, eval_labels = [], [] for ckpt in checkpoints: steps = ckpt.split('-')[-1] model = AutoModelForSequenceClassification.from_pretrained(ckpt).to( device) test_dataset = DATASET_LIST[args.model_mode](args, tokenizer, "test") test_dataloader = DataLoader(dataset=test_dataset, sampler=SequentialSampler(test_dataset), batch_size=args.eval_batch_size) all_preds, all_out_label_ids, texts = predict(args, model, tokenizer, device, test_dataloader) all_preds_argmax = np.argmax(all_preds, axis=1) eval_preds.append(all_preds_argmax) eval_labels.append(all_out_label_ids) results[steps] = compute_metrics(all_preds_argmax, all_out_label_ids) result = [{ "id": idx, "text": t[0], "label": test_dataset.answer2labels[an] } for idx, (t, an) in enumerate(zip(texts, all_preds_argmax))] result = {'annotations': result} with open(os.path.join(ckpt, 'results.json'), 'w', encoding='utf-8') as f: json.dump(result, f, ensure_ascii=False, indent='\t') with open(os.path.join(args.save_model_dir, 'eval_results.txt'), 'w', encoding='utf-8') as f: for idx, key in enumerate(sorted(results.keys())): print(f"{key}: {str(results[key]['acc'])}") print(confusion_matrix(eval_labels[idx], eval_preds[idx]).tolist()) print() f.write(f"{key}: {str(results[key]['acc'])}\n") f.write( f"{confusion_matrix(eval_labels[idx], eval_preds[idx]).tolist()}\n\n" )
def read_metrics_zhou_results(results, record): tp, tn, fp, fn = 0, 0, 0, 0 tp = int(results[record][0]) tn = int(results[record][1]) fp = int(results[record][2]) fn = int(results[record][3]) se, sp, ppv, acc = utils.compute_metrics(tp, tn, fp, fn) mcc = utils.compute_mcc(tp, tn, fp, fn) return [record, "ZHOU", tp, tn, fp, fn, se, sp, ppv, acc, mcc]
def local_train(stemmer=data_provider.NoStemmer(), text_representation='bag-of-words', C=1, max_iter=10000): data_provider.STATE['stemmer'] = stemmer X, Y = data_provider.get_data(input_format='hot_vector', output_format='numerical', ngrams=text_representation=='ngrams', all_data=True) model = create_model(C, max_iter) X_train, X_val = split(X, 0.9) Y_train, Y_val = split(Y, 0.9) data_provider.STATE = data_provider.initial_state() del X, Y gc.collect() #X_train, X_val, Y_train, Y_val = np.array(X_train), np.array(X_val), np.array(Y_train), np.array(Y_val) data_provider.STATE = data_provider.initial_state() print(">>> {} {} {} {} {}".format(type(stemmer).__name__, text_representation, 'svm', C, max_iter)) start = time.time() model.fit(X_train, Y_train) print(">>> TRAINING TIME: {}s".format(time.time() - start)) Y_pred = model.predict(X_val) compute_metrics(Y_val, Y_pred)
def predict(self): logger.info("***** Model Loaded *****") test_loader = build_loader(self.args, self.tokenizer, 'test') nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(test_loader, desc="Predicting"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2] } outputs = self.model(**inputs) pooled_output = outputs[1] logits = self.classifier(pooled_output) nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = batch[3].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, batch[3].detach().cpu().numpy(), axis=0) results = {} preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) # with open(f'{self.args.test_data_dir}_test.json', 'r', encoding='utf-8') as f, \ # open(f'wrong_sports.txt', "w", encoding="utf-8") as fw: # data = json.load(f) # for line, pred in zip(data, preds): # if line['sentiment'] != pred: # fw.write(f"{line['text']}\t{line['sentiment']}\t{pred}\n") # # Write to output file # with open(self.args.output_file, "w", encoding="utf-8") as f: # for pred in preds: # f.write("{}\n".format(pred)) logger.info("Prediction Done!")
def evaluate(self, eval_dataloader, mode): logger.info(" ***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(eval_dataloader)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 preds = None out_label_ids = None loss_fct = nn.CrossEntropyLoss() nb_eval_steps = 0 self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2] } labels = batch[3] outputs = self.model(**inputs) pooled_output = outputs[1] pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) loss = loss_fct(logits.view(-1, 2), labels.view(-1)) eval_loss += loss.item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = labels.detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, labels.detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results['loss'], results['acc']
def evaluate(self, dataset_orig_test, verbose=True): assert self.classifier, 'There is no model to use. Please fit the model first.' best_ultimate_thres = self.best_ultimate_thres # Transform into standardized dataframe if not self.is_valid: # dataset_orig_test[self.label_name] = self.favorable_classes[0] dataset_transf_test = generate_formatted_dataframe(dataset_orig_test, label_name=self.label_name, \ favorable_classes=self.favorable_classes, \ protected_attribute_names=self.protected_attribute_names, \ privileged_classes=self.privileged_classes,\ categorical_features=self.categorical_features, \ features_to_keep=self.features_to_keep, \ features_to_drop=self.features_to_drop,\ na_values=self.na_values, \ custom_preprocessing=self.custom_preprocessing, \ metadata=self.metadata) print("Data has been transformed into standardized dataframe.") else: dataset_transf_test = dataset_orig_test dataset_transf_test_pred = dataset_transf_test.copy(deepcopy=True) X_test = dataset_transf_test_pred.features y_test = dataset_transf_test_pred.labels # Predict_proba on test data pos_ind = dataset_transf_test_pred.favorable_label y_pred = self.classifier.predict_proba(X_test)[:, int(pos_ind)].reshape( -1, 1) fav_inds = y_pred > best_ultimate_thres dataset_transf_test_pred.labels[ fav_inds] = dataset_transf_test_pred.favorable_label dataset_transf_test_pred.labels[ ~fav_inds] = dataset_transf_test_pred.unfavorable_label metric_test_aft = compute_metrics(dataset_transf_test, dataset_transf_test_pred, \ self.unprivileged_groups, self.privileged_groups, disp=True) if verbose: print( "Optimal classification threshold (after fairness processing) = %.4f" % best_ultimate_thres) display( pd.DataFrame(metric_test_aft, columns=metric_test_aft.keys(), index=[0])) return metric_test_aft
def evaluate(self): # self.load_model() # Load model eval_sampler = SequentialSampler(self.test_dataset) eval_dataloader = DataLoader(self.test_dataset, sampler=eval_sampler, batch_size=self.config.batch_size) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(self.test_dataset)) logger.info(" Batch size = %d", self.config.batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None results = {} for batch in tqdm(eval_dataloader, desc="Evaluating"): self.model.eval() batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'e1_mask': batch[4], 'e2_mask': batch[5]} outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) write_prediction(os.path.join(self.config.eval_dir, "proposed_answers.txt"), preds) return results
def evaluate(self, eval_dataloader, mode): eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = batch[:-1] batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3] } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results['loss'], results['acc']
def main(): args = parseArguments() data, t = DataLoader.load_data(args['dataFile'], args['trainingPoints'], args['validationPoints']) _SVM = SVM(args['B'], args['K'], args['C'], args['gamma'], args['xi'], args['trainingPoints'], args['type']) alpha, b = _SVM.train_SVM(data, t) if args['fig']: utils.plot_figure(_SVM, alpha, data, t, b, args['trainingPoints'], args['type']) precision, recall, f_score, accuracy = utils.compute_metrics( _SVM, alpha, data, t, b, args['trainingPoints'], args['validationPoints']) print(f'{precision=} {recall=} {f_score=} {accuracy=}')
def test(self): logger.info("***** Model Loaded *****") test_loader = build_loader(self.args, self.tokenizer, 'test') preds = None out_label_ids = None nb_eval_steps = 0 self.model.eval() for batch in tqdm(test_loader, desc="Predicting"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": None } outputs = self.model(**inputs) logits = outputs[0] nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = batch[3].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, batch[3].detach().cpu().numpy(), axis=0) results = {} preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) # Write to output file with open(self.args.test_output_file, "w", encoding="utf-8") as f: for pred in preds: f.write("{}\n".format(pred)) logger.info("Prediction Done!")
def evaluate(model, data_loader, tokenizer, ignore_pad_token_for_loss, min_target_length, max_target_length): model.eval() all_preds = [] all_labels = [] model = model._layers if isinstance(model, paddle.DataParallel) else model for batch in tqdm(data_loader, total=len(data_loader), desc="Eval step"): input_ids, _, _, labels = batch preds = model.generate(input_ids=input_ids, min_length=min_target_length, max_length=max_target_length, use_cache=True)[0] all_preds.extend(preds.numpy()) all_labels.extend(labels.numpy()) rouge_result, decoded_preds = compute_metrics(all_preds, all_labels, tokenizer, ignore_pad_token_for_loss) logger.info(rouge_result) model.train()
def evaluate(self, mode): if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.batch_size) eval_loss = 0.0 nb_eval_steps = 0 intent_preds = None slot_preds = None out_intent_label_ids = None out_slot_labels_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'intent_label_ids': batch[3], 'slot_labels_ids': batch[4] } if self.args.model_type != 'distilbert': inputs['token_type_ids'] = batch[2] outputs = self.model(**inputs) tmp_eval_loss, (intent_logits, slot_logits) = outputs[:2] eval_loss += tmp_eval_loss.mean().item() # 对batch内的 nb_eval_steps += 1 # Intent prediction if intent_preds is None: intent_preds = intent_logits.detach().cpu().numpy( ) #intent输出转化成numpy() out_intent_label_ids = inputs['intent_label_ids'].detach().cpu( ).numpy() else: intent_preds = np.append(intent_preds, intent_logits.detach().cpu().numpy(), axis=0) # np.append()是拼接两个nparray的操作 out_intent_label_ids = np.append( out_intent_label_ids, inputs['intent_label_ids'].detach().cpu().numpy(), axis=0) # Slot prediction if slot_preds is None: if self.args.use_crf: # decode() in `torchcrf` returns list with best index directly slot_preds = np.array(self.model.crf.decode(slot_logits)) else: slot_preds = slot_logits.detach().cpu().numpy() out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu( ).numpy() else: if self.args.use_crf: slot_preds = np.append( slot_preds, np.array(self.model.crf.decode(slot_logits)), axis=0) else: slot_preds = np.append(slot_preds, slot_logits.detach().cpu().numpy(), axis=0) out_slot_labels_ids = np.append( out_slot_labels_ids, inputs["slot_labels_ids"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} # Intent result intent_preds = np.argmax(intent_preds, axis=1) # Slot result if not self.args.use_crf: slot_preds = np.argmax(slot_preds, axis=2) slot_label_map = { i: label for i, label in enumerate(self.slot_label_lst) } # {sentence_id:label_list} out_slot_label_list = [[] for _ in range(out_slot_labels_ids.shape[0]) ] #建立测试样例个空数组 slot_preds_list = [[] for _ in range(out_slot_labels_ids.shape[0])] for i in range(out_slot_labels_ids.shape[0]): for j in range(out_slot_labels_ids.shape[1]): if out_slot_labels_ids[i, j] != self.pad_token_label_id: out_slot_label_list[i].append( slot_label_map[out_slot_labels_ids[i][j]]) slot_preds_list[i].append(slot_label_map[slot_preds[i][j]]) total_result = compute_metrics(intent_preds, out_intent_label_ids, slot_preds_list, out_slot_label_list) results.update(total_result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) return results
# disc.method = Orange.preprocess.discretize.EqualFreq(n=3) disc_predicted_data_table = disc(predicted_data_table) # disc_predicted_test_data_table = disc(predicted_test_data_table) disc_predicted_test_data_table = Orange.data.Table.from_table(disc_predicted_data_table.domain, predicted_test_data_table) from utils import uniform_enlarge_dataset,estimated_enlarge_dataset rate = 1.0 * 49804 / predicted_data_table.X.shape[0] # rate = 0 print("sampling rate",rate) new_predicted_data_table = estimated_enlarge_dataset(predicted_data_table,black_box,sampling_rate=rate,random_seed=random_seed) print(new_predicted_data_table.X.shape) disc_new_predicted_data_table = Orange.data.Table.from_table(disc_predicted_data_table.domain, new_predicted_data_table) from approach import explain_tabular explanations,explainer = explain_tabular(disc_new_predicted_data_table, black_box, target_class_idx=1, random_seed=random_seed,beta = 0,use_pre_mined=True, objective = 'bayesian') print(len(explanations)) from utils import rule_to_string,ruleset_predict our_prediction = ruleset_predict(explanations,disc_predicted_test_data_table.X) import sklearn print('Blackbox and our, acc', sklearn.metrics.accuracy_score(predicted_test_data_table.Y, our_prediction)) print('Blackbox and our, f1 score', sklearn.metrics.f1_score(predicted_test_data_table.Y, our_prediction)) print('Blackbox and our,recall', sklearn.metrics.recall_score(predicted_test_data_table.Y, our_prediction)) print('Blackbox and our,precision', sklearn.metrics.precision_score(predicted_test_data_table.Y, our_prediction)) from utils import compute_metrics compute_metrics(explanations,disc_predicted_data_table.domain)
def evaluate(self, mode, step): if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset elif mode == 'train': dataset = self.train_dataset else: raise Exception("Only train, dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): self.model.eval() batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'word_ids': batch[0], 'char_ids': batch[1], 'mask': batch[2], 'label_ids': batch[3] } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 # Slot prediction if preds is None: # decode() in `torchcrf` returns list with best index directly preds = np.array( self.model.crf.decode(logits, mask=inputs['mask'].byte())) out_label_ids = inputs["label_ids"].detach().cpu().numpy() else: preds = np.append(preds, np.array( self.model.crf.decode( logits, mask=inputs['mask'].byte())), axis=0) out_label_ids = np.append( out_label_ids, inputs["label_ids"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} # Slot result slot_label_map = {i: label for i, label in enumerate(self.label_lst)} out_label_list = [[] for _ in range(out_label_ids.shape[0])] preds_list = [[] for _ in range(out_label_ids.shape[0])] for i in range(out_label_ids.shape[0]): for j in range(out_label_ids.shape[1]): if out_label_ids[i, j] != self.pad_token_label_id: out_label_list[i].append( slot_label_map[out_label_ids[i][j]]) preds_list[i].append(slot_label_map[preds[i][j]]) if self.args.write_pred: if not os.path.exists(self.args.pred_dir): os.mkdir(self.args.pred_dir) with open(os.path.join(self.args.pred_dir, "pred_{}.txt".format(step)), "w", encoding="utf-8") as f: for text, true_label, pred_label in zip( self.test_texts, out_label_list, preds_list): for t, tl, pl in zip(text, true_label, pred_label): f.write("{} {} {}\n".format(t, tl, pl)) f.write("\n") result = compute_metrics(out_label_list, preds_list) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) logger.info("\n" + show_report( out_label_list, preds_list)) # Get the report for each tag result return results
def evaluate(model, eval_features): all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_attention_mask = torch.tensor([f.attention_mask for f in eval_features], dtype=torch.long) all_token_type_ids = torch.tensor([f.token_type_ids for f in eval_features], dtype=torch.long) all_intent_label_ids = torch.tensor([f.intent_label_id for f in eval_features], dtype=torch.long) all_slot_labels_ids = torch.tensor([f.slot_labels_ids for f in eval_features], dtype=torch.long) dev_dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_intent_label_ids, all_slot_labels_ids) eval_sampler = SequentialSampler(dev_dataset) eval_dataloader = DataLoader(dev_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! print("***** Running evaluation on dataset *****") print(" Num examples = %d", len(dev_dataset)) print(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 intent_preds = None slot_preds = None out_intent_label_ids = None out_slot_labels_ids = None model.eval() for batch in tqdm(eval_dataloader, desc='Evaluating'): batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'intent_label_ids': batch[3], 'slot_labels_ids': batch[4]} tmp_eval_loss, intent_logits, slot_logits = model(**inputs) eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 # Intent prediction if intent_preds is None: intent_preds = intent_logits.detach().cpu().numpy() out_intent_label_ids = inputs['intent_label_ids'].detach().cpu().numpy() else: intent_preds = np.append(intent_preds, intent_logits.detach().cpu().numpy(), axis=0) out_intent_label_ids = np.append( out_intent_label_ids, inputs['intent_label_ids'].detach().cpu().numpy(), axis=0) # Slot prediction if slot_preds is None: if args.use_crf: # decode() in `torchcrf` returns list with best index directly slot_preds = np.array(model.crf.decode(slot_logits)) else: slot_preds = slot_logits.detach().cpu().numpy() out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu().numpy() else: if args.use_crf: slot_preds = np.append(slot_preds, np.array(model.crf.decode(slot_logits)), axis=0) else: slot_preds = np.append(slot_preds, slot_logits.detach().cpu().numpy(), axis=0) out_slot_labels_ids = np.append(out_slot_labels_ids, inputs["slot_labels_ids"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = { "loss": eval_loss } intent_preds = np.argmax(intent_preds, axis=1) if not args.use_crf: slot_preds = np.argmax(slot_preds, axis=2) slot_label_map = {i: label for i, label in enumerate(slot_label_lst)} out_slot_label_list = [[] for _ in range(out_slot_labels_ids.shape[0])] slot_preds_list = [[] for _ in range(out_slot_labels_ids.shape[0])] for i in range(out_slot_labels_ids.shape[0]): for j in range(out_slot_labels_ids.shape[1]): if out_slot_labels_ids[i, j] != args.ignore_index: out_slot_label_list[i].append(slot_label_map[out_slot_labels_ids[i][j]]) slot_preds_list[i].append(slot_label_map[slot_preds[i][j]]) total_result = compute_metrics(intent_preds, out_intent_label_ids, slot_preds_list, out_slot_label_list) results.update(total_result) print("***** Eval results *****") for key in sorted(results.keys()): print(" %s = %s", key, str(results[key])) return results['loss']
def evaluate(self, mode): #test # We use test dataset because semeval doesn't have dev dataset if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.batch_size) eval_loss = 0.0 nb_eval_steps = 0 intent_preds = None slot_preds = None out_intent_label_ids = None out_slot_labels_ids = None self.model.eval() #验证模式 for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): # 关闭梯度计算 inputs = { 'input_ids_1': batch[0], 'attention_mask_1': batch[1], 'token_type_ids_1': batch[2], 'input_ids_2': batch[3], # 第一行 'attention_mask_2': batch[4], # 第二行 'token_type_ids_2': batch[5], 'intent_label_ids': batch[6] } outputs = self.model(**inputs) tmp_eval_loss, ( intent_logits, slot_logits, ) = outputs[:2] # 模型输出的前两项为loss和logits # tmp_eval_loss, (slot_logits,), x= outputs[:2]# 模型输出的前两项为loss和logits eval_loss += tmp_eval_loss.mean().item() # item()返回一个值 nb_eval_steps += 1 # Intent prediction if intent_preds is None: intent_preds = intent_logits.detach().cpu().numpy() out_intent_label_ids = inputs['intent_label_ids'].detach().cpu( ).numpy() else: intent_preds = np.append(intent_preds, intent_logits.detach().cpu().numpy(), axis=0) out_intent_label_ids = np.append( out_intent_label_ids, inputs['intent_label_ids'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps # 平均损失 results = {"loss": eval_loss} # Intent result intent_preds = np.argmax(intent_preds, axis=1) #axis=1:按行查找最大元素 axis=0:按列查找最大元素 total_result = compute_metrics(intent_preds, out_intent_label_ids) # total_result = compute_metrics_slot(slot_preds_list, out_slot_label_list) results.update(total_result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) if mode == 'test': f = open('result/result.txt', 'a', encoding='utf-8') for key in sorted(results.keys()): f.write(" %s = %s" % (key, str(results[key]))) f.write("\n") f.close() return results
mask_ts_[mask_tiles == ts5] = 1 mask_ts_[mask_tiles == ts6] = 1 mask_ts_[mask_tiles == ts7] = 1 mask_ts_[mask_tiles == ts8] = 1 mask_ts_[mask_tiles == ts9] = 1 #% Load model model = load_model(filepath + 'unet_exp_' + str(exp) + '.h5', compile=False) area = 11 # Prediction ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction( model, image_array, image_ref, final_mask, mask_ts_, patch_size, area) # Metrics cm = confusion_matrix(ref_final, pre_final) metrics = compute_metrics(ref_final, pre_final) print('Confusion matrix \n', cm) print('Accuracy: ', metrics[0]) print('F1score: ', metrics[1]) print('Recall: ', metrics[2]) print('Precision: ', metrics[3]) # Alarm area total = (cm[1, 1] + cm[0, 1]) / len(ref_final) * 100 print('Area to be analyzed', total) print('training time', end_training) print('test time', time_ts) #%% Show the results # prediction of the whole image
def evaluate(args, model, eval_dataset, prefix=""): eval_output_dir = args.output_dir if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'entity_a': batch[1], 'entity_b': batch[2], 'attention_mask': batch[3], 'token_type_ids': batch[4] if args.model_type in ['bert', 'xlnet'] else None, # XLM and RoBERTa don't use segment_ids 'labels': batch[5] } outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) with open(args.result_file, "w", encoding="utf-8") as fo: for p in preds: fo.write(str(p) + "\n") result = compute_metrics(args, preds, out_label_ids) logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) return result, eval_loss
if args.max_level is None: max_level = class_tree.get_height() else: max_level = args.max_level wstc = WSTC(input_shape=x.shape, class_tree=class_tree, max_level=max_level, sup_source=args.sup_source, y=y, vocab_sz=vocab_sz, word_embedding_dim=word_embedding_dim, block_thre=args.gamma, block_level=args.block_level) total_counts = sum(word_counts[ele] for ele in word_counts) total_counts -= word_counts[vocabulary_inv_list[0]] background_array = np.zeros(vocab_sz) for i in range(1, vocab_sz): background_array[i] = word_counts[vocabulary_inv[i]] / total_counts for level in range(max_level): y_pred = proceed_level(x, sequences, wstc, args, pretrain_epochs, self_lr, decay, update_interval, delta, class_tree, level, expand_num, background_array, max_doc_length, max_sent_length, len_avg, len_std, beta, alpha, vocabulary_inv, common_words) write_output(y_pred, perm, class_tree, './' + args.dataset) compute_metrics(y_pred, y)
def evaluate(args, model, tokenizer, processor, prefix="", eval_split=None): eval_task_names = (args.task_name, ) eval_outputs_dirs = (args.output_dir, ) assert eval_split is not None results = {} if os.path.exists("/output/metrics.json"): with open("/output/metrics.json", "r") as f: existing_results = json.loads(f.read()) f.close() results.update(existing_results) for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): eval_dataset, examples = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True, eval_split=eval_split) if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max( 1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} on {} *****".format( prefix, eval_split)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None node_preds = None out_label_ids = None out_node_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating", mininterval=10, ncols=100): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet', 'bert_mc'] else None, # XLM don't use segment_ids 'proof_offset': batch[3], 'node_label': batch[4], 'labels': batch[5] } outputs = model(**inputs) tmp_eval_loss, tmp_qa_loss, tmp_node_loss, logits, node_logits = outputs[: 5] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() node_preds = node_logits.detach().cpu().numpy() if not eval_split == "test": out_label_ids = inputs['labels'].detach().cpu().numpy() out_node_label_ids = inputs['node_label'].detach().cpu( ).numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) node_preds = np.append(node_preds, node_logits.detach().cpu().numpy(), axis=0) if not eval_split == "test": out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) out_node_label_ids = np.append( out_node_label_ids, inputs['node_label'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) node_preds = np.argmax(node_preds, axis=2) if not eval_split == "test": result = compute_metrics(eval_task, preds, out_label_ids) result_split = {} for k, v in result.items(): result_split[k + "_{}".format(eval_split)] = v results.update(result_split) output_eval_file = os.path.join( eval_output_dir, "eval_results_{}.txt".format(eval_split)) with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} on {} *****".format( prefix, eval_split)) for key in sorted(result_split.keys()): logger.info(" %s = %s", key, str(result_split[key])) writer.write("%s = %s\n" % (key, str(result_split[key]))) # predictions output_pred_file = os.path.join( eval_output_dir, "predictions_{}.lst".format(eval_split)) with open(output_pred_file, "w") as writer: logger.info("***** Write predictions {} on {} *****".format( prefix, eval_split)) for pred in preds: writer.write("{}\n".format(processor.get_labels()[pred])) # prediction nodes output_node_pred_file = os.path.join( eval_output_dir, "prediction_nodes_{}.lst".format(eval_split)) with open(output_node_pred_file, "w") as writer: logger.info("***** Write predictions {} on {} *****".format( prefix, eval_split)) for node_gold, node_pred in zip(out_node_label_ids, node_preds): node_gold = node_gold[np.where(node_gold != -100)[0]] node_pred = node_pred[:len(node_gold)] writer.write(str(list(node_pred)) + "\n") return results