Exemplo n.º 1
0
def evaluation(grammars_test, pcky,  mode) :

	if mode == 'test' :
		print("...........Début de l'évaluation........")
		cfg_test = get_all_trees(grammars_test)
		corpus_test = [' '.join(tree.leaves()) for tree in cfg_test] 
		predictions_test = []
		for i, sentence in enumerate(corpus_test) :
			predictions_test.append(pcky.induce_CYK(sentence, show=False))
		status_test, predictions_test_ = [x[1] for x in predictions_test], [x[0] for x in predictions_test]
		print('Precision on test :', compute_precision(predictions_test, grammars_test))
		write_file(predictions_test_, corpus_test)
		print("...........Fin de l'évaluation........")
	elif mode == 'eval' :
		print('....................Début.................')
		print("Pour sortir, entrez : exit")
		while True :
			phrase_to_parse = str(input(">>>>> Veuillez entrer une phrase :"))
			if phrase_to_parse == 'exit' :
				break
			prediction, status = pcky.induce_CYK(phrase_to_parse, show=True)
			if status == 0:
				print("La phrase n'a pas pu être parsée")
			else :
				print(prediction)
			print('....................Fin...................')
Exemplo n.º 2
0
def generate_results(gt, pred):
    cm = confusion_matrix(gt, pred, labels=[0, 1])
    my_f1 = compute_f1(cm)
    my_precision = compute_precision(cm)
    my_recall = compute_recall(cm)

    prec, rec, f1, _ = precision_recall_fscore_support(gt,
                                                       pred,
                                                       labels=[0, 1],
                                                       average=None)
    assert ((my_precision - prec) < 1e-3).all()
    assert ((my_recall - rec) < 1e-3).all()
    assert ((my_f1 - f1) < 1e-3).all()

    return cm, prec, rec, f1
Exemplo n.º 3
0
image_to_gt = utils.load_evaluation_set(hp, files['iota10k'], files['gt_fn'],
                                        args.min_rater_count)

# Arrange metrics for the gt labels in df.
image_metrics = utils.compute_image_metrics(image_to_gt,
                                            label_metrics,
                                            files,
                                            method=hp['eval_method'],
                                            do_verify=hp['do_verify'],
                                            gt_in_voc=hp['gt_vocab'],
                                            y_force=hp['y_force'])

images = list(set(image_metrics.ImageID.values.tolist()))
raters_ub = utils.raters_performance(images, files['gt_fn'])
print('Raters agreement: %s' % raters_ub)

# Compute precision & recall over all metrics.
precision, sem_p, precision_mat = utils.compute_precision(
    image_metrics, hp['k'])
recall, sem_r, recall_mat = utils.compute_recall(image_metrics, hp['k'])
vis.print_top_pr(precision, recall)
utils.save_results(hp, files['results_dir'], precision, sem_p, recall, sem_r)

# Plot precision, recall and correlation. Save specific examples to HTML.
if args.plot_figures:
    vis.plot_precision(hp, files, precision, sem_p, raters_ub)
    vis.plot_recall(hp, files, recall, sem_r)
    vis.plot_precision_vs_recall(hp, files, precision, recall, raters_ub)
    # vis.plot_correlation(hp, files, image_metrics)
    vis.write_models_to_html(image_metrics, hp, files)
Exemplo n.º 4
0
    def validate(self):
        n_class = self.train_loader.dataset.n_class

        # os.system('play -nq -t alsa synth {} sine {}'.format(0.3, 440)) # sound an alarm

        val_loss = 0
        prec = 0
        metrics = np.zeros((len(self.val_loader), 4), dtype=np.float64)
        for batch_idx, (rgb_img, ddd_img,
                        target) in tqdm.tqdm(enumerate(self.val_loader),
                                             total=len(self.val_loader),
                                             desc='  val %d' % self.epoch,
                                             ncols=80,
                                             leave=False):

            ## validate
            with torch.no_grad():
                self.model.eval()
                if self.cuda:
                    rgb_img = rgb_img.cuda()
                    ddd_img = ddd_img.cuda()
                    target = target.cuda()

                output = self.model(rgb_img, ddd_img)
                if self.val_loader.dataset.encode_label:
                    output = F.interpolate(output,
                                           size=target.size()[2:],
                                           mode='bilinear',
                                           align_corners=False)
                else:
                    output = F.interpolate(output,
                                           size=target.size()[1:],
                                           mode='bilinear',
                                           align_corners=False)

                loss = self.criterion(output, target)
                loss_data = loss.data.item()

                if np.isnan(loss_data):
                    raise ValueError('loss is nan while validating')
                val_loss += loss_data / len(rgb_img)

            ## some stats
            lbl_pred = output.data.max(1)[1].cpu().numpy().squeeze()
            lbl_true = target.data.cpu().numpy().squeeze()
            prec += compute_precision(lbl_pred, lbl_true)
            m = label_accuracy_score(lbl_true, lbl_pred, n_class)
            metrics[batch_idx, :] = np.array(m)

        metrics = np.mean(metrics, axis=0)
        val_prec = prec / len(self.val_loader)

        with open(osp.join(self.output_path, 'log.csv'), 'a') as f:

            metrics_str = ['%.10f' % (a) for a in list(metrics)]
            elapsed_time = (
                datetime.datetime.now(pytz.timezone('Asia/Jakarta')) -
                self.timestamp_start).total_seconds()

            val_loss /= len(self.val_loader)
            log = [self.epoch, self.iteration] + [''] * 5 + \
                ['%.10f' %(val_loss)] + metrics_str + [elapsed_time]
            log = map(str, log)
            f.write(','.join(log) + '\n')

        mean_iu = metrics[2]
        is_best = mean_iu > self.best_mean_iu
        if is_best:
            self.best_mean_iu = mean_iu
        is_prec_best = val_prec > self.best_prec
        if is_prec_best:
            self.best_prec = val_prec
        torch.save(
            {
                'epoch': self.epoch,
                'iteration': self.iteration,
                'arch': self.arch,
                'optim_state_dict': self.optim.state_dict(),
                'model_state_dict': self.model.state_dict(),
                'best_mean_iu': self.best_mean_iu,
                'best_prec': self.best_prec,
            }, osp.join(self.output_path, 'checkpoint.pth.tar'))
        if self.arch == 'rfnet':
            torch.save(
                {
                    'epoch': self.epoch,
                    'iteration': self.iteration,
                    'arch': self.arch,
                    'optim_state_dict': self.optim.state_dict(),
                    'optim_dec_state_dict': self.optim_dec.state_dict(),
                    'model_state_dict': self.model.state_dict(),
                    'best_mean_iu': self.best_mean_iu,
                    'best_prec': self.best_prec,
                }, osp.join(self.output_path, 'checkpoint.pth.tar'))

        if is_best:
            shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'),
                        osp.join(self.output_path, 'model_best.pth.tar'))
        if is_prec_best:
            shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'),
                        osp.join(self.output_path, 'model_prec_best.pth.tar'))

        self.writer.add_scalar('val/loss', val_loss, self.epoch)
        self.writer.add_scalar('val/precision', val_prec, self.epoch)
        self.writer.add_scalar('val/accuracy', metrics[0], self.epoch)
        self.writer.add_scalar('val/acc_class', metrics[1], self.epoch)
        self.writer.add_scalar('val/mean_iu', metrics[2], self.epoch)
        self.writer.add_scalar('val/fwacc', metrics[3], self.epoch)

        if self.scheduler != None:
            self.scheduler.step(val_prec)

        if self.training:
            self.model.train()
Exemplo n.º 5
0
            # Save all model hyperparameters to file
            hyperParams_file = open("{}/hyper-parameters.txt".format(
                opts["save_model_path"]),
                                    "a",
                                    encoding="utf8")
            for (option, value) in opts.items():
                hyperParams_file.write("{}: {}\n".format(option, value))

            print("Number of Epochs", opts["epochs"])

            # Train model
            trainer.train(train_dataloader, dev_dataloader, opts)

            # Evaluate model by computing F-score on test set
            precisions = compute_precision(ner_model, test_dataloader,
                                           label_vocabulary,
                                           encoded_test_labels, opts)
            curr_f1 = precisions["f1"]
            print("\n\nF-1 score: {}\n\n".format(curr_f1))
            print("Confusion matrix\n", precisions["confusion_matrix"])

            # Print, plot, and save the confusion matrix
            file_name = "TestSet_Confusion_Matrix"
            plot_conf(file_name, precisions["confusion_matrix"],
                      opts["save_model_path"])

            # Add F-score obtained by this model to dict storing all F-scores
            F1_scores_dict.update({opts["save_model_path"]: curr_f1})

# Find the model that achieved the highest F-score
best = max(F1_scores_dict.items(), key=lambda x: x[1])