Пример #1
0
#dev_file = train_params["validation_data_path"]
#test_file = train_params["test_data_path"]

#dev_pred, dev_eval, test_pred, test_eval = [
#    os.path.join(serialization_dir, name)
#    for name in ["dev.conllu", "dev_results.json", "test.conllu", "test_results.json"]
#]

#if dev_file != test_file:
#    util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, dev_file, dev_pred, dev_eval)

#util.predict_and_evaluate_model(args.predictor, predict_params, serialization_dir, test_file, test_pred, test_eval)

for dataset in predict_params['dataset_reader']['datasets']:
    dev_file = predict_params['dataset_reader']['datasets'][dataset]['dev']
    dev_pred = os.path.join(serialization_dir, 'dev.conllu')
    dev_eval = os.path.join(serialization_dir, 'dev_results.json')
    eval_type = predict_params['dataset_reader']['datasets'][dataset][
        'evaluation']
    util.predict_and_evaluate_model(args.predictor, predict_params,
                                    serialization_dir, dev_file, dev_pred,
                                    dev_eval, eval_type)

#if args.archive_bert:
#    bert_config = "config/archive/bert-base-multilingual-cased/bert_config.json"
#    util.archive_bert_model(serialization_dir, bert_config)

# If we want to use trainer>num_serialized_models_to_keep we need to comment this automatic cleanup
util.cleanup_training(serialization_dir, keep_archive=not args.cleanup_archive)
Пример #2
0
try:
    util.cache_vocab(train_params)
    train_model(train_params, serialization_dir, recover=bool(args.resume))
except KeyboardInterrupt:
    logger.warning("KeyboardInterrupt, skipping training")

dev_file = predict_params["validation_data_path"]
test_file = predict_params["test_data_path"]

dev_pred, dev_eval, test_pred, test_eval = [
    os.path.join(serialization_dir, name) for name in
    ["dev.conllu", "dev_results.json", "test.conllu", "test_results.json"]
]

if dev_file != test_file:
    util.predict_and_evaluate_model(args.predictor, predict_params,
                                    serialization_dir, dev_file, dev_pred,
                                    dev_eval)

util.predict_and_evaluate_model(args.predictor, predict_params,
                                serialization_dir, test_file, test_pred,
                                test_eval)

if args.archive_bert:
    #bert_config = "config/archive/bert-base-multilingual-cased/bert_config.json"
    bert_config = "config/archive/bert-base-cased/bert_config.json"  #RUNNING ON English only BERT
    util.archive_bert_model(serialization_dir, bert_config)

util.cleanup_training(serialization_dir, keep_archive=not args.cleanup_archive)
Пример #3
0
parser.add_argument("--archive_latest", action="store_true", help="Archive the latest trained model")
parser.add_argument("--device", default=None, type=int, help="CUDA device number; set to -1 for CPU")
parser.add_argument("--lazy", action="store_true", help="Lazy load dataset")
parser.add_argument("--batch_size", default=1, type=int, help="The size of each prediction batch")
parser.add_argument("--sigmorphon", action="store_true", help="Use Sigmorphon evaluation instead of UD")

args = parser.parse_args()

import_submodules("udify")

overrides = {}
if args.device is not None:
    overrides["trainer"] = {"cuda_device": args.device}
if args.lazy:
    overrides["dataset_reader"] = {"lazy": args.lazy}
configs = [Params(overrides), Params.from_file(os.path.join(args.archive_dir, "config.json"))]
params = util.merge_configs(configs)

if args.archive_latest:
    archive_model(args.archive_dir)

pred_file = os.path.join(args.archive_dir, args.pred_file)
# pred_file = args.pred_file

if not args.eval_file:
    util.predict_model("udify_predictor", params, args.archive_dir, args.input_file, pred_file)
else:
    eval_file = os.path.join(args.archive_dir, args.eval_file)
    util.predict_and_evaluate_model("udify_predictor", params, args.archive_dir, args.input_file, pred_file,
                                    eval_file, batch_size=args.batch_size)