Exemplo n.º 1
0
 def evaluate(self):
     self.clf.eval()
     if self.config[MODEL_TYPE] > 1:
         self.coref_trainer.model.eval()    
     with torch.no_grad():
         eval = Evaluator(self, self.data_helper, self.config)
         eval.eval_parser(self.data_helper.val_trees)
Exemplo n.º 2
0
    def train_classifier(self, train_loader, dev_loader):
        """
        """
        self.optim = Optimizer(self.clf.parameters(), lr=self.config[LR])
        if self.config[EPOCH_START] != 1:
            self.load('../data/model/' + self.config[MODEL_NAME] + "_" +
                      str(self.config[EPOCH_START]))

        for epoch in range(1, self.config[NUM_EPOCHS] + 1):
            cost_acc = 0
            self.clf.train()
            print("============ epoch: ", epoch, " ============")
            for i, data in enumerate(train_loader):
                docs, gold_actions = data
                cost_acc += self.sr_parse(docs, gold_actions, self.optim)[1]
                if (i % 50 == 0):
                    print("Cost on step ", i, "is ", cost_acc)

            print("Total cost for epoch ", epoch, "is ", cost_acc)
            self.clf.eval()
            with torch.no_grad():
                eval = Evaluator(self, self.clf.data_helper)
                eval.eval_parser(dev_loader, path=None)
            self.save('../data/model/',
                      self.config[MODEL_NAME] + "_" + str(epoch), epoch)
    def evaluate_potential_synonyms(self,
                                    empolis_mapping_path: str,
                                    distance_threshold: float = 0.85):
        """
        Evaluates how well a classifier is able to predict synonyms for the entities of a dataset.
        This has to be done using a different evaluation method because it does not evaluate whether a classifier
        is able to predict a mention correctly but rather whether a classifier is able to predict all synonyms/mentions
        for the Empolis dataset that are known for an entity.

        Because this is a completely different kind of evaluation a separate method has been implemented.
        """
        with open(empolis_mapping_path, 'r') as f:
            empolis_mapping_synonym_to_entity = json.load(f)

        # Remove all entries that are not part of the current split, because they can't be predicted by the
        # classifier and therefore should not be used for the evaluation
        synonyms_to_remove = []
        for synonym, entity_data in empolis_mapping_synonym_to_entity.items():
            if entity_data['entities'][0] not in self._entities:
                synonyms_to_remove.append(synonym)

        for synonym in synonyms_to_remove:
            del empolis_mapping_synonym_to_entity[synonym]

        # Classify all query samples
        res, identified_mentions = self._get_potential_synonyms(
            distance_threshold=distance_threshold)
        relevant_synonyms = {}
        for mention in identified_mentions:
            # Check if the mention is known to the Empolis dataset
            ground_truth_entity = empolis_mapping_synonym_to_entity.get(
                mention, None)
            if ground_truth_entity is not None:
                ground_truth_entity = ground_truth_entity['entities'][0]

                # If it is known, mark it accordingly for all mentions for the respective entity
                if ground_truth_entity not in relevant_synonyms:
                    relevant_synonyms[ground_truth_entity] = set([mention])
                else:
                    relevant_synonyms[ground_truth_entity].update([mention])
            else:
                # Remove all mentions that are not relevant for the evaluation (aka not known to the empolis data)
                for entity in res.keys():
                    if mention in res[entity]:
                        del res[entity][mention]

        # Evaluation
        evaluator = Evaluator()
        _, macro, micro = evaluator.evaluate_empolis_synonyms(
            res, relevant_synonyms)

        # Note: the top1 accuracy is not relevant here because we have no top x evaluation here
        print("\nMacro metrics:"
              "\nPrecision: %.2f%%, Recall: %.2f%%, F1-Score: %.2f%%" % macro)
        print("\nMicro metrics:"
              "\nPrecision: %.2f%%, Recall: %.2f%%, F1-Score: %.2f%%" % micro)
    def evaluate_datasplit(self,
                           split: str,
                           num_results: int = 1,
                           eval_mode: str = 'mentions',
                           empolis_mapping_path: str = None,
                           empolis_distance_threshold: float = 0.85):
        """
        Evaluate the given datasplit.
        split has to be one of the three: train, test, val.
        """
        assert split in ['train', 'test', 'val'
                         ], "The given evaluation split is not a valid split."
        assert split == self._loaded_datasplit, "The evaluation split has not been loaded."

        assert eval_mode in ['mentions', 'samples'
                             ], "The evaluation mode is not a valid mode."
        start = datetime.datetime.now()

        empolis_mapping = None
        if empolis_mapping_path is not None:
            with open(empolis_mapping_path, 'r') as f:
                empolis_mapping = json.load(f)

        eval_results = {}
        for sample in self._query_data:
            sentence = sample['sentence']
            mention = sample['mention']

            suggestions = self._classify(mention,
                                         sentence=sentence,
                                         num_results=num_results)
            if mention == "[NIL]" and empolis_mapping is not None:
                eval_results = self._evaluate_empolis(
                    suggestions, sample, empolis_mapping, eval_results,
                    empolis_distance_threshold)
            elif mention != "[NIL]":
                eval_results = self._add_suggestion_to_eval_results(
                    suggestions, sample, eval_results)

        end = datetime.datetime.now()
        print("Classification took: ", end - start)

        # Calculate some metrics
        evaluator = Evaluator()
        top1_accuracy, macro, micro = evaluator.evaluate(
            eval_results, eval_mode)

        print("\nTop1 Accuracy: %.2f%%" % top1_accuracy)
        print("\nMacro metrics:"
              "\nPrecision: %.2f%%, Recall: %.2f%%, F1-Score: %.2f%%" % macro)
        print("\nMicro metrics:"
              "\nPrecision: %.2f%%, Recall: %.2f%%, F1-Score: %.2f%%" % micro)
Exemplo n.º 5
0
            os.path.join(args.output_dir, args.parse_type, "TRAINING",
                         "data_helper.bin"))
    if args.train:
        data_helper.load_data_helper(
            os.path.join(args.output_dir, args.parse_type, "TRAINING",
                         "data_helper.bin"))
        data_helper.load_train_data(data_dir=args.data_dir,
                                    output_dir=args.output_dir,
                                    parse_type=args.parse_type,
                                    isFlat=args.isFlat)
        train_model(data_helper)
    if args.eval:
        # Evaluate models on the RST-DT test set
        if args.isFlat:
            evaluator = Evaluator(isFlat=args.isFlat,
                                  model_dir=os.path.join(
                                      args.output_dir, "RN~model"))
        else:
            evaluator = Evaluator(isFlat=args.isFlat,
                                  model_dir=os.path.join(
                                      args.output_dir, "N~model"))
        evaluator.eval_parser(data_dir=args.data_dir,
                              output_dir=args.output_dir,
                              report=True,
                              bcvocab=brown_clusters,
                              draw=False,
                              isFlat=args.isFlat)

    if args.pred:
        if args.isFlat:
            evaluator = Evaluator(isFlat=args.isFlat,
Exemplo n.º 6
0
    train_dirname = (args.train_dir[:-1] if args.train_dir[-1] == os.sep else
                     args.train_dir).split(os.sep)[-1]
    HELPER_PATH = f"..{os.sep}data{os.sep}{train_dirname}_data_helper_rst.bin"
    print("Helper path:", HELPER_PATH)

    if args.prepare:
        # Create training data
        #coref_model = CorefScore(higher_order=True).to(config[DEVICE])
        coref_model = CorefScore().to(config[DEVICE])

        coref_trainer = Trainer(coref_model, [], [], [], debug=False)

        data_helper.create_data_helper(args.train_dir, config, coref_trainer)
        data_helper.save_data_helper(HELPER_PATH)

    if args.train:
        train_model_coref(data_helper, config)

    if args.eval:
        # Evaluate models on the RST-DT test set
        data_helper.load_data_helper(HELPER_PATH)

        parser = get_discourse_parser(data_helper, config)
        parser.load('../data/model/' + config[MODEL_NAME])
        print("Evaluating")
        with torch.no_grad():
            evaluator = Evaluator(parser, data_helper, config)
            evaluator.eval_parser(None,
                                  path=args.eval_dir,
                                  use_parseval=args.use_parseval)
Exemplo n.º 7
0
            'train_on': 'patch',
            'patch_size': 512,
            'resize_width': 512,
            'resize_height': 512,
            'attention_blocks': False,
            'guided_attention': False,
            'attention_loss': 'dice',
            'attention_weight': 10,
            'apply_attention_mask': True,
            'n_layers': 18,
        }


if __name__ == '__main__':
    if len(sys.argv) < 2:
        config = Main.default_config()
    else:
        path = Path(sys.argv[1])
        config = load_config(path)

    print("Using config:")
    print(f"\t{config}")

    main = Main(config)
    main.train()

    from eval.evaluation import Evaluator

    eval = Evaluator(Path(f"../output/{main.name}"), name=config['name'])
    eval.evaluate_model(main.model)
Exemplo n.º 8
0
    parser.add_argument('--eval_dir', help='eval data directory')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    # Use brown clusters
    with gzip.open("../data/resources/bc3200.pickle.gz") as fin:
        print('Load Brown clusters for creating features ...')
        brown_clusters = pickle.load(fin)
    data_helper = DataHelper(max_action_feat_num=330000,
                             max_relation_feat_num=300000,
                             min_action_feat_occur=1,
                             min_relation_feat_occur=1,
                             brown_clusters=brown_clusters)
    if args.prepare:
        # Create training data
        data_helper.create_data_helper(data_dir=args.train_dir)
        data_helper.save_data_helper('../data/data_helper.bin')
    if args.train:
        data_helper.load_data_helper('../data/data_helper.bin')
        data_helper.load_train_data(data_dir=args.train_dir)
        train_model(data_helper)
    if args.eval:
        # Evaluate models on the RST-DT test set
        evaluator = Evaluator(model_dir='../data/model')
        evaluator.eval_parser(path=args.eval_dir,
                              report=True,
                              bcvocab=brown_clusters,
                              draw=False)
Exemplo n.º 9
0
from pathlib import Path

import torch

from eval.evaluation import Evaluator

with torch.no_grad():
    tests = []
    tests.extend(Path(r'../output').glob('*'))

    for path in tests:
        if path.name == 'eval':
            continue

        name = path.name.split('--')[-1]
        eval = Evaluator(Path(path), name=name)

        # checkpoint = eval.find_best_model('test')
        checkpoint = eval.checkpoints[-1]  # Last model

        eval.evaluate(checkpoint)
        eval.attention_map(checkpoint, 'per_abnormality')
        eval.attention_map(checkpoint, 'per_mammogram')