def evaluation(grammars_test, pcky, mode) : if mode == 'test' : print("...........Début de l'évaluation........") cfg_test = get_all_trees(grammars_test) corpus_test = [' '.join(tree.leaves()) for tree in cfg_test] predictions_test = [] for i, sentence in enumerate(corpus_test) : predictions_test.append(pcky.induce_CYK(sentence, show=False)) status_test, predictions_test_ = [x[1] for x in predictions_test], [x[0] for x in predictions_test] print('Precision on test :', compute_precision(predictions_test, grammars_test)) write_file(predictions_test_, corpus_test) print("...........Fin de l'évaluation........") elif mode == 'eval' : print('....................Début.................') print("Pour sortir, entrez : exit") while True : phrase_to_parse = str(input(">>>>> Veuillez entrer une phrase :")) if phrase_to_parse == 'exit' : break prediction, status = pcky.induce_CYK(phrase_to_parse, show=True) if status == 0: print("La phrase n'a pas pu être parsée") else : print(prediction) print('....................Fin...................')
def generate_results(gt, pred): cm = confusion_matrix(gt, pred, labels=[0, 1]) my_f1 = compute_f1(cm) my_precision = compute_precision(cm) my_recall = compute_recall(cm) prec, rec, f1, _ = precision_recall_fscore_support(gt, pred, labels=[0, 1], average=None) assert ((my_precision - prec) < 1e-3).all() assert ((my_recall - rec) < 1e-3).all() assert ((my_f1 - f1) < 1e-3).all() return cm, prec, rec, f1
image_to_gt = utils.load_evaluation_set(hp, files['iota10k'], files['gt_fn'], args.min_rater_count) # Arrange metrics for the gt labels in df. image_metrics = utils.compute_image_metrics(image_to_gt, label_metrics, files, method=hp['eval_method'], do_verify=hp['do_verify'], gt_in_voc=hp['gt_vocab'], y_force=hp['y_force']) images = list(set(image_metrics.ImageID.values.tolist())) raters_ub = utils.raters_performance(images, files['gt_fn']) print('Raters agreement: %s' % raters_ub) # Compute precision & recall over all metrics. precision, sem_p, precision_mat = utils.compute_precision( image_metrics, hp['k']) recall, sem_r, recall_mat = utils.compute_recall(image_metrics, hp['k']) vis.print_top_pr(precision, recall) utils.save_results(hp, files['results_dir'], precision, sem_p, recall, sem_r) # Plot precision, recall and correlation. Save specific examples to HTML. if args.plot_figures: vis.plot_precision(hp, files, precision, sem_p, raters_ub) vis.plot_recall(hp, files, recall, sem_r) vis.plot_precision_vs_recall(hp, files, precision, recall, raters_ub) # vis.plot_correlation(hp, files, image_metrics) vis.write_models_to_html(image_metrics, hp, files)
def validate(self): n_class = self.train_loader.dataset.n_class # os.system('play -nq -t alsa synth {} sine {}'.format(0.3, 440)) # sound an alarm val_loss = 0 prec = 0 metrics = np.zeros((len(self.val_loader), 4), dtype=np.float64) for batch_idx, (rgb_img, ddd_img, target) in tqdm.tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc=' val %d' % self.epoch, ncols=80, leave=False): ## validate with torch.no_grad(): self.model.eval() if self.cuda: rgb_img = rgb_img.cuda() ddd_img = ddd_img.cuda() target = target.cuda() output = self.model(rgb_img, ddd_img) if self.val_loader.dataset.encode_label: output = F.interpolate(output, size=target.size()[2:], mode='bilinear', align_corners=False) else: output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=False) loss = self.criterion(output, target) loss_data = loss.data.item() if np.isnan(loss_data): raise ValueError('loss is nan while validating') val_loss += loss_data / len(rgb_img) ## some stats lbl_pred = output.data.max(1)[1].cpu().numpy().squeeze() lbl_true = target.data.cpu().numpy().squeeze() prec += compute_precision(lbl_pred, lbl_true) m = label_accuracy_score(lbl_true, lbl_pred, n_class) metrics[batch_idx, :] = np.array(m) metrics = np.mean(metrics, axis=0) val_prec = prec / len(self.val_loader) with open(osp.join(self.output_path, 'log.csv'), 'a') as f: metrics_str = ['%.10f' % (a) for a in list(metrics)] elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Jakarta')) - self.timestamp_start).total_seconds() val_loss /= len(self.val_loader) log = [self.epoch, self.iteration] + [''] * 5 + \ ['%.10f' %(val_loss)] + metrics_str + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu is_prec_best = val_prec > self.best_prec if is_prec_best: self.best_prec = val_prec torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.arch, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, 'best_prec': self.best_prec, }, osp.join(self.output_path, 'checkpoint.pth.tar')) if self.arch == 'rfnet': torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.arch, 'optim_state_dict': self.optim.state_dict(), 'optim_dec_state_dict': self.optim_dec.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, 'best_prec': self.best_prec, }, osp.join(self.output_path, 'checkpoint.pth.tar')) if is_best: shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'), osp.join(self.output_path, 'model_best.pth.tar')) if is_prec_best: shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'), osp.join(self.output_path, 'model_prec_best.pth.tar')) self.writer.add_scalar('val/loss', val_loss, self.epoch) self.writer.add_scalar('val/precision', val_prec, self.epoch) self.writer.add_scalar('val/accuracy', metrics[0], self.epoch) self.writer.add_scalar('val/acc_class', metrics[1], self.epoch) self.writer.add_scalar('val/mean_iu', metrics[2], self.epoch) self.writer.add_scalar('val/fwacc', metrics[3], self.epoch) if self.scheduler != None: self.scheduler.step(val_prec) if self.training: self.model.train()
# Save all model hyperparameters to file hyperParams_file = open("{}/hyper-parameters.txt".format( opts["save_model_path"]), "a", encoding="utf8") for (option, value) in opts.items(): hyperParams_file.write("{}: {}\n".format(option, value)) print("Number of Epochs", opts["epochs"]) # Train model trainer.train(train_dataloader, dev_dataloader, opts) # Evaluate model by computing F-score on test set precisions = compute_precision(ner_model, test_dataloader, label_vocabulary, encoded_test_labels, opts) curr_f1 = precisions["f1"] print("\n\nF-1 score: {}\n\n".format(curr_f1)) print("Confusion matrix\n", precisions["confusion_matrix"]) # Print, plot, and save the confusion matrix file_name = "TestSet_Confusion_Matrix" plot_conf(file_name, precisions["confusion_matrix"], opts["save_model_path"]) # Add F-score obtained by this model to dict storing all F-scores F1_scores_dict.update({opts["save_model_path"]: curr_f1}) # Find the model that achieved the highest F-score best = max(F1_scores_dict.items(), key=lambda x: x[1])