示例#1
0
def fairseq_train_and_evaluate(dataset,
                               metrics_coefs=[1, 1, 1],
                               parametrization_budget=64,
                               **kwargs):
    check_dataset(dataset)
    kwargs = check_and_resolve_args(kwargs)
    exp_dir = prepare_exp_dir()
    preprocessors_kwargs = kwargs.get('preprocessors_kwargs', {})
    preprocessors = get_preprocessors(preprocessors_kwargs)
    if len(preprocessors) > 0:
        dataset = create_preprocessed_dataset(dataset, preprocessors, n_jobs=1)
        shutil.copy(get_dataset_dir(dataset) / 'preprocessors.pickle', exp_dir)
    preprocessed_dir = fairseq_preprocess(dataset)
    train_kwargs = get_allowed_kwargs(fairseq_train, preprocessed_dir, exp_dir,
                                      **kwargs)
    fairseq_train(preprocessed_dir, exp_dir=exp_dir, **train_kwargs)
    # Evaluation
    generate_kwargs = get_allowed_kwargs(fairseq_generate, 'complex_filepath',
                                         'pred_filepath', exp_dir, **kwargs)
    recommended_preprocessors_kwargs = find_best_parametrization(
        exp_dir, metrics_coefs, preprocessors_kwargs, parametrization_budget)
    print(
        f'recommended_preprocessors_kwargs={recommended_preprocessors_kwargs}')
    simplifier = get_simplifier(exp_dir, recommended_preprocessors_kwargs,
                                generate_kwargs)
    scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid')
    print(f'scores={scores}')
    score = combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'],
                            metrics_coefs)
    return score
示例#2
0
 def evaluate_parametrization(**instru_kwargs):
     # Note that we use default generate kwargs instead of provided one because they are faster
     preprocessors_kwargs = instru_kwargs_to_preprocessors_kwargs(
         instru_kwargs)
     simplifier = get_simplifier(exp_dir,
                                 preprocessors_kwargs=preprocessors_kwargs,
                                 generate_kwargs={})
     scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid')
     return combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'],
                            metrics_coefs)
示例#3
0
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#

from access.evaluation.general import evaluate_simplifier_on_turkcorpus
from access.preprocessors import get_preprocessors
from access.resources.prepare import prepare_turkcorpus, prepare_models
from access.simplifiers import get_fairseq_simplifier, get_preprocessed_simplifier


if __name__ == '__main__':
    print('Evaluating pretrained model')
    prepare_turkcorpus()
    best_model_dir = prepare_models()
    recommended_preprocessors_kwargs = {
        'LengthRatioPreprocessor': {'target_ratio': 0.95},
        'LevenshteinPreprocessor': {'target_ratio': 0.75},
        'WordRankRatioPreprocessor': {'target_ratio': 0.75},
        'SentencePiecePreprocessor': {'vocab_size': 10000},
    }
    preprocessors = get_preprocessors(recommended_preprocessors_kwargs)
    simplifier = get_fairseq_simplifier(best_model_dir, beam=8)
    simplifier = get_preprocessed_simplifier(simplifier, preprocessors=preprocessors)
    evaluate_simplifier_on_turkcorpus(simplifier, phase='test')