示例#1
0
def main(cl_arguments):
    """ Run REPL for a CoLA model """

    # Arguments handling #
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)
    check_arg_name(args)

    assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format(
        args.target_tasks)

    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
        except Exception:
            log.warning("GPU access failed. You might be using a CPU-only"
                        " installation of PyTorch. Falling back to CPU.")
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    # Prepare data #
    _, target_tasks, vocab, word_embs = build_tasks(args)
    tasks = sorted(set(target_tasks), key=lambda x: x.name)

    # Build or load model #
    cuda_device = parse_cuda_list_arg(args.cuda)
    model = build_model(args, vocab, word_embs, tasks, cuda_device)
    log.info("Loading existing model from %s...", cl_args.model_file_path)
    load_model_state(model,
                     cl_args.model_file_path,
                     args.cuda, [],
                     strict=False)

    # Inference Setup #
    model.eval()
    vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab"))
    indexers = build_indexers(args)
    task = take_one(tasks)
    model_preprocessing_interface = ModelPreprocessingInterface(args)

    # Run Inference #
    if cl_args.inference_mode == "repl":
        assert cl_args.input_path is None
        assert cl_args.output_path is None
        print("Running REPL for task: {}".format(task.name))
        run_repl(model, model_preprocessing_interface, vocab, indexers, task,
                 args)
    elif cl_args.inference_mode == "corpus":
        run_corpus_inference(
            model,
            model_preprocessing_interface,
            vocab,
            indexers,
            task,
            args,
            cl_args.input_path,
            cl_args.input_format,
            cl_args.output_path,
            cl_args.eval_output_path,
        )
    else:
        raise KeyError(cl_args.inference_mode)
示例#2
0
def main(cl_arguments):
    """ Train a model for multitask-training."""
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)
    # Check for deprecated arg names
    check_arg_name(args)
    args, seed = initial_setup(args, cl_args)
    # Load tasks
    log.info("Loading tasks...")
    start_time = time.time()
    cuda_device = parse_cuda_list_arg(args.cuda)
    pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args)
    tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name)
    log.info("\tFinished loading tasks in %.3fs", time.time() - start_time)
    log.info("\t Tasks: {}".format([task.name for task in tasks]))

    # Build model
    log.info("Building model...")
    start_time = time.time()
    model = build_model(args, vocab, word_embs, tasks, cuda_device)
    log.info("Finished building model in %.3fs", time.time() - start_time)

    # Start Tensorboard if requested
    if cl_args.tensorboard:
        tb_logdir = os.path.join(args.run_dir, "tensorboard")
        _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port)

    check_configurations(args, pretrain_tasks, target_tasks)
    if args.do_pretrain:
        # Train on pretrain tasks
        log.info("Training...")
        stop_metric = pretrain_tasks[0].val_metric if len(pretrain_tasks) == 1 else "macro_avg"
        should_decrease = (
            pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False
        )
        trainer, _, opt_params, schd_params = build_trainer(
            args, cuda_device, [], model, args.run_dir, should_decrease, phase="pretrain"
        )
        to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
        _ = trainer.train(
            pretrain_tasks,
            stop_metric,
            args.batch_size,
            args.weighting_method,
            args.scaling_method,
            to_train,
            opt_params,
            schd_params,
            args.load_model,
            phase="pretrain",
        )

    # For checkpointing logic
    if not args.do_target_task_training:
        strict = True
    else:
        strict = False

    if args.do_target_task_training:
        # Train on target tasks
        pre_target_train_path = setup_target_task_training(args, target_tasks, model, strict)
        target_tasks_to_train = copy.deepcopy(target_tasks)
        # Check for previous target train checkpoints
        task_to_restore, _, _ = check_for_previous_checkpoints(
            args.run_dir, target_tasks_to_train, "target_train", args.load_model
        )
        if task_to_restore is not None:
            # If there is a task to restore from, target train only on target tasks
            # including and following that task.
            last_task_index = [task.name for task in target_tasks_to_train].index(task_to_restore)
            target_tasks_to_train = target_tasks_to_train[last_task_index:]
        for task in target_tasks_to_train:
            # Skip tasks that should not be trained on.
            if task.eval_only_task:
                continue

            params_to_train = load_model_for_target_train_run(
                args, pre_target_train_path, model, strict, task, cuda_device
            )
            trainer, _, opt_params, schd_params = build_trainer(
                args,
                cuda_device,
                [task.name],
                model,
                args.run_dir,
                task.val_metric_decreases,
                phase="target_train",
            )

            _ = trainer.train(
                tasks=[task],
                stop_metric=task.val_metric,
                batch_size=args.batch_size,
                weighting_method=args.weighting_method,
                scaling_method=args.scaling_method,
                train_params=params_to_train,
                optimizer_params=opt_params,
                scheduler_params=schd_params,
                load_model=(task.name == task_to_restore),
                phase="target_train",
            )

    if args.do_full_eval:
        log.info("Evaluating...")
        splits_to_write = evaluate.parse_write_preds_arg(args.write_preds)

        # Evaluate on target_tasks.
        for task in target_tasks:
            # Find the task-specific best checkpoint to evaluate on.
            task_params = get_model_attribute(model, "_get_task_params", cuda_device)
            task_to_use = task_params(task.name).get("use_classifier", task.name)
            ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use)
            assert ckpt_path is not None
            load_model_state(model, ckpt_path, cuda_device, skip_task_models=[], strict=strict)
            evaluate_and_write(args, model, [task], splits_to_write, cuda_device)

    if args.delete_checkpoints_when_done and not args.keep_all_checkpoints:
        log.info("Deleting all checkpoints.")
        delete_all_checkpoints(args.run_dir)

    log.info("Done!")
示例#3
0
def infer_jiant(exp_dir, task, items, batch_size=4):
    # use cached tokenizer
    path = join(exp_dir, 'transformers_cache')
    with env(PYTORCH_TRANSFORMERS_CACHE=path):
        reload(transformers.file_utils)

    # use terra model for lidirus
    run_dir = join(
        exp_dir,
        TERRA if task == LIDIRUS else task
    )

    loggers = [
        LOGGER,
        pytorch_pretrained_bert.modeling.logger,
        transformers.file_utils.logger,
        transformers.configuration_utils.logger,
        transformers.modeling_utils.logger,
        transformers.tokenization_utils.logger,
        allennlp.nn.initializers.logger
    ]
    with no_loggers(loggers):
        path = join(run_dir, 'params.conf')
        args = params_from_file(path)
        cuda_device = parse_cuda_list_arg('auto')

    args.local_log_path = join(run_dir, 'log.log')
    args.exp_dir = args.project_dir = exp_dir
    args.run_dir = run_dir

    log('Build tasks')
    with no_loggers(loggers), TemporaryDirectory() as dir:
        args.exp_dir = args.data_dir = dir  # hide pkl, preproc
        dump_task(dir, task, items=[])  # mock empty train, val, test
        if task in (TERRA, LIDIRUS):
            dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[])
        _, tasks, vocab, word_embs = build_tasks(args, cuda_device)

    log('Build model, load transformers pretrain')
    with no_loggers(loggers):
        args.exp_dir = exp_dir  # use transformers cache
        model = build_model(args, vocab, word_embs, tasks, cuda_device)

    path = join(run_dir, 'model.th')
    log(f'Load state {path!r}')
    load_model_state(model, path, cuda_device)

    log(f'Build mock task, infer via eval, batch_size={batch_size}')
    with no_loggers(loggers), TemporaryDirectory() as dir:
        args.exp_dir = args.data_dir = dir
        dump_task(dir, task, items)

        if task in (TERRA, LIDIRUS):
            # choose one at inference
            args.pretrain_tasks = task
            args.target_tasks = task

        _, tasks, _, _ = build_tasks(args, cuda_device)
        _, preds = evaluate.evaluate(
            model, tasks,
            batch_size, cuda_device, 'test'
        )
        evaluate.write_preds(
            tasks, preds, dir,
            'test', args.write_strict_glue_format
        )

        return list(load_preds(dir, task))