Пример #1
0
def evaluate_split(model, topic, split, config, **kwargs):
    evaluator_config = {
        'model': config.model,
        'topic': topic,
        'split': split,
        'dataset': kwargs['dataset'],
        'batch_size': config.batch_size,
        'ignore_lengths': False,
        'is_lowercase': True,
        'gradient_accumulation_steps': config.gradient_accumulation_steps,
        'max_seq_length': config.max_seq_length,
        'max_doc_length': args.max_doc_length,
        'data_dir': config.data_dir,
        'n_gpu': n_gpu,
        'device': config.device,
        'is_hierarchical':
        True if args.model in {'HBERT-Base', 'HBERT-Large'} else False
    }

    if config.model in {'HAN', 'HR-CNN'}:
        trainer_config['ignore_lengths'] = True
        evaluator_config['ignore_lengths'] = True

    evaluator = RelevanceTransferEvaluator(model,
                                           evaluator_config,
                                           processor=kwargs['processor'],
                                           embedding=kwargs['embedding'],
                                           data_loader=kwargs['loader'],
                                           dataset=kwargs['dataset'])

    accuracy, precision, recall, f1, avg_loss = evaluator.get_scores()[0]

    if split == 'test':
        pred_scores[topic] = (evaluator.y_pred, evaluator.docid)
    else:
        print('\n' + LOG_HEADER)
        print(
            LOG_TEMPLATE.format(topic, accuracy, precision, recall, f1,
                                avg_loss) + '\n')

    return evaluator.y_pred
Пример #2
0
                'split': 'dev',
                'batch_size': args.batch_size,
                'ignore_lengths': True,
                'is_lowercase': True,
                'gradient_accumulation_steps':
                args.gradient_accumulation_steps,
                'max_seq_length': args.max_seq_length,
                'max_doc_length': args.max_doc_length,
                'data_dir': args.data_dir,
                'n_gpu': n_gpu,
                'device': args.device
            }

            dev_evaluator = RelevanceTransferEvaluator(model,
                                                       evaluator_config,
                                                       dataset=dataset,
                                                       processor=processor,
                                                       tokenizer=tokenizer)
            trainer = RelevanceTransferTrainer(model,
                                               trainer_config,
                                               optimizer=optimizer,
                                               processor=processor,
                                               tokenizer=tokenizer,
                                               scheduler=scheduler,
                                               dev_evaluator=dev_evaluator)

            trainer.train(args.epochs)
            model = torch.load(trainer.snapshot_path)

            # Calculate dev and test metrics
            evaluate_split(model,
Пример #3
0
                    'ignore_lengths': True,
                    'is_lowercase': True,
                    'gradient_accumulation_steps':
                    args.gradient_accumulation_steps,
                    'max_seq_length': args.max_seq_length,
                    'max_doc_length': args.max_doc_length,
                    'data_dir': args.data_dir,
                    'n_gpu': n_gpu,
                    'device': args.device,
                    'is_hierarchical': True
                    if args.model in {'HBERT-Base', 'HBERT-Large'} else False
                }

                dev_evaluator = RelevanceTransferEvaluator(model,
                                                           evaluator_config,
                                                           dataset=dataset,
                                                           embedding=None,
                                                           processor=processor,
                                                           data_loader=None)
                trainer = RelevanceTransferTrainer(model,
                                                   trainer_config,
                                                   processor=processor,
                                                   train_loader=None,
                                                   embedding=None,
                                                   test_evaluator=None,
                                                   dev_evaluator=dev_evaluator)

                trainer.train(args.epochs)
                model = torch.load(trainer.snapshot_path)

                # Calculate dev and test metrics
                evaluate_split(model,