Пример #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--src",
                        help=f"Path of source file to read from.",
                        required=True)
    parser.add_argument("--hyp",
                        help="Path of file to write hypothesis to.",
                        required=True)
    parser.add_argument("--mod",
                        help="Path of model file to read.",
                        required=True)
    args = parser.parse_args()

    exp_dir = os.path.dirname(__file__)
    exp = "{EXP}"

    param_collection.ParamManager.init_param_col()

    # TODO: can we avoid the LoadSerialized proxy and load stuff directly?
    load_experiment = persistence.LoadSerialized(filename=args.mod)

    uninitialized_experiment = persistence.YamlPreloader.preload_obj(
        load_experiment, exp_dir=exp_dir, exp_name=exp)
    loaded_experiment = persistence.initialize_if_needed(
        uninitialized_experiment)
    model = loaded_experiment.model
    inference = model.inference
    param_collection.ParamManager.populate()

    decoding_task = eval_task.DecodingEvalTask(args.src, args.hyp, model,
                                               inference)
    decoding_task.eval()
Пример #2
0
    def setUp(self):
        events.clear()
        ParamManager.init_param_col()

        # Load a pre-trained model
        load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model",
                                         overwrite=[
                                             {
                                                 "path": "train",
                                                 "val": None
                                             },
                                             {
                                                 "path": "status",
                                                 "val": None
                                             },
                                         ])
        EXP_DIR = '.'
        EXP = "decode"
        uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                             exp_dir=EXP_DIR,
                                                             exp_name=EXP)
        loaded_experiment = initialize_if_needed(uninitialized_experiment)
        ParamManager.populate()

        # Pull out the parts we need from the experiment
        self.model = loaded_experiment.model
        src_vocab = self.model.src_reader.vocab
        trg_vocab = self.model.trg_reader.vocab

        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("test/data/head.en"))
Пример #3
0
 def test_shallow(self):
     test_obj = yaml.load("""
                      a: !DummyArgClass
                        arg1: !DummyArgClass2
                          _xnmt_id: id1
                          v: some_val
                        arg2: !Ref { name: id1 }
                      """)
     preloaded = persistence.YamlPreloader.preload_obj(root=test_obj,
                                                       exp_name="exp1",
                                                       exp_dir=self.out_dir)
     initalized = persistence.initialize_if_needed(preloaded)
     persistence.save_to_file(self.model_file, initalized)
Пример #4
0
 def initialize_generator(self, **kwargs):
     # TODO: refactor?
     if kwargs.get("len_norm_type", None) is None:
         len_norm = xnmt.length_normalization.NoNormalization()
     else:
         len_norm = initialize_if_needed(kwargs["len_norm_type"])
     search_args = {}
     if kwargs.get("max_len", None) is not None:
         search_args["max_len"] = kwargs["max_len"]
     if kwargs.get("beam", None) is None:
         self.search_strategy = GreedySearch(**search_args)
     else:
         search_args["beam_size"] = kwargs.get("beam", 1)
         search_args["len_norm"] = len_norm
         self.search_strategy = BeamSearch(**search_args)
     self.report_path = kwargs.get("report_path", None)
     self.report_type = kwargs.get("report_type", None)
Пример #5
0
 def test_simple_reference(self):
     test_obj = yaml.load("""
                      !DummyArgClass
                      arg1: !DummyArgClass
                        arg1: !DummyArgClass2 { v: some_val }
                        arg2: !DummyArgClass2 { v: some_other_val }
                      arg2: !Ref { path: arg1 }
                      """)
     preloaded = persistence.YamlPreloader.preload_obj(root=test_obj,
                                                       exp_name="exp1",
                                                       exp_dir=self.out_dir)
     initialized = persistence.initialize_if_needed(preloaded)
     dump = persistence._dump(initialized)
     reloaded = yaml.load(dump)
     if isinstance(reloaded.arg1, persistence.Ref):
         reloaded.arg1, reloaded.arg2 = reloaded.arg2, reloaded.arg1
     self.assertIsInstance(reloaded.arg1, DummyArgClass)
     self.assertIsInstance(reloaded.arg2, persistence.Ref)
     self.assertIsInstance(reloaded.arg1.arg1, DummyArgClass2)
     self.assertIsInstance(reloaded.arg1.arg2, DummyArgClass2)
Пример #6
0
def main(overwrite_args=None):

    with tee.Tee(), tee.Tee(error=True):
        argparser = argparse.ArgumentParser()
        argparser.add_argument("--dynet-mem", type=str)
        argparser.add_argument("--dynet-seed",
                               type=int,
                               help="set random seed for DyNet and XNMT.")
        argparser.add_argument("--dynet-autobatch", type=int)
        argparser.add_argument("--dynet-devices", type=str)
        argparser.add_argument("--dynet-viz",
                               action='store_true',
                               help="use visualization")
        argparser.add_argument("--dynet-gpu",
                               action='store_true',
                               help="use GPU acceleration")
        argparser.add_argument("--dynet-gpu-ids", type=int)
        argparser.add_argument("--dynet-gpus", type=int)
        argparser.add_argument("--dynet-weight-decay", type=float)
        argparser.add_argument("--dynet-profiling", type=int)
        argparser.add_argument("--settings",
                               type=str,
                               default="standard",
                               help="settings (standard, debug, or unittest)"
                               "must be given in '=' syntax, e.g."
                               " --settings=standard")
        argparser.add_argument("experiments_file")
        argparser.add_argument("experiment_name",
                               nargs='*',
                               help="Run only the specified experiments")
        argparser.set_defaults(generate_doc=False)
        args = argparser.parse_args(overwrite_args)

        if args.dynet_seed:
            random.seed(args.dynet_seed)
            np.random.seed(args.dynet_seed)

        if args.dynet_gpu:
            if settings.CHECK_VALIDITY:
                settings.CHECK_VALIDITY = False
                log_preamble(
                    "disabling CHECK_VALIDITY because it is not supported on GPU currently",
                    logging.WARNING)

        config_experiment_names = YamlPreloader.experiment_names_from_file(
            args.experiments_file)

        results = []

        # Check ahead of time that all experiments exist, to avoid bad surprises
        experiment_names = args.experiment_name or config_experiment_names

        if args.experiment_name:
            nonexistent = set(experiment_names).difference(
                config_experiment_names)
            if len(nonexistent) != 0:
                raise Exception("Experiments {} do not exist".format(",".join(
                    list(nonexistent))))

        log_preamble(
            f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        )
        for experiment_name in experiment_names:

            ParamManager.init_param_col()

            uninitialized_exp_args = YamlPreloader.preload_experiment_from_file(
                args.experiments_file, experiment_name)

            logger.info(f"=> Running {experiment_name}")

            glob_args = uninitialized_exp_args.data.exp_global
            log_file = glob_args.log_file

            if os.path.isfile(log_file) and not settings.OVERWRITE_LOG:
                logger.warning(
                    f"log file {log_file} already exists, skipping experiment; please delete log file by hand if you want to overwrite it "
                    f"(or activate OVERWRITE_LOG, by either specifying an environment variable as OVERWRITE_LOG=1, "
                    f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)"
                )
                continue

            tee.set_out_file(log_file)

            model_file = glob_args.model_file

            uninitialized_exp_args.data.exp_global.commandline_args = args

            # Create the model
            experiment = initialize_if_needed(uninitialized_exp_args)
            ParamManager.param_col.model_file = experiment.exp_global.model_file
            ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints
            ParamManager.populate()

            # Run the experiment
            eval_scores = experiment(save_fct=lambda: save_to_file(
                model_file, experiment, ParamManager.param_col))
            results.append((experiment_name, eval_scores))
            print_results(results)

            tee.unset_out_file()
Пример #7
0
def main(overwrite_args: Optional[Sequence[str]] = None) -> None:

    with tee.Tee(), tee.Tee(error=True):
        argparser = argparse.ArgumentParser()
        utils.add_backend_argparse(argparser)
        argparser.add_argument("--settings",
                               type=str,
                               default="standard",
                               help="settings (standard, debug, or unittest)"
                               "must be given in '=' syntax, e.g."
                               " --settings=standard")
        argparser.add_argument(
            "--resume",
            action='store_true',
            help="whether a saved experiment is being resumed, and"
            "locations of output files should be re-used.")
        argparser.add_argument("--backend",
                               type=str,
                               default="dynet",
                               help="backend (dynet or torch)")
        argparser.add_argument("experiments_file")
        argparser.add_argument("experiment_name",
                               nargs='*',
                               help="Run only the specified experiments")
        argparser.set_defaults(generate_doc=False)
        args = argparser.parse_args(overwrite_args)

        if xnmt.backend_dynet and args.dynet_seed: args.seed = args.dynet_seed
        if getattr(args, "seed", None):
            random.seed(args.seed)
            np.random.seed(args.seed)
            if xnmt.backend_torch: torch.manual_seed(0)

        if xnmt.backend_dynet and args.dynet_gpu and settings.CHECK_VALIDITY:
            settings.CHECK_VALIDITY = False
            log_preamble(
                "disabling CHECK_VALIDITY because it is not supported in the DyNet/GPU setting",
                logging.WARNING)

        config_experiment_names = YamlPreloader.experiment_names_from_file(
            args.experiments_file)

        results = []

        # Check ahead of time that all experiments exist, to avoid bad surprises
        experiment_names = args.experiment_name or config_experiment_names

        if args.experiment_name:
            nonexistent = set(experiment_names).difference(
                config_experiment_names)
            if len(nonexistent) != 0:
                raise Exception("Experiments {} do not exist".format(",".join(
                    list(nonexistent))))

        log_preamble(
            f"running XNMT revision {tee.get_git_revision()} on {socket.gethostname()} with {'DyNet' if xnmt.backend_dynet else 'PyTorch'} on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        )
        for experiment_name in experiment_names:

            ParamManager.init_param_col()

            uninitialized_exp_args = YamlPreloader.preload_experiment_from_file(
                args.experiments_file, experiment_name, resume=args.resume)

            logger.info(f"=> Running {experiment_name}")

            glob_args = uninitialized_exp_args.data.exp_global
            log_file = glob_args.log_file

            if not settings.OVERWRITE_LOG:
                log_files_exist = []
                if os.path.isfile(log_file): log_files_exist.append(log_file)
                if os.path.isdir(log_file + ".tb"):
                    log_files_exist.append(log_file + ".tb/")
                if log_files_exist:
                    logger.warning(
                        f"log file(s) {' '.join(log_files_exist)} already exists, skipping experiment; "
                        f"please delete log file by hand if you want to overwrite it "
                        f"(or activate OVERWRITE_LOG, by either specifying an environment variable OVERWRITE_LOG=1, "
                        f"or specifying --settings=debug, or changing xnmt.settings.Standard.OVERWRITE_LOG manually)"
                    )
                    continue
            elif settings.OVERWRITE_LOG and os.path.isdir(log_file + ".tb"):
                shutil.rmtree(
                    log_file + ".tb/"
                )  # remove tensorboard logs from previous run that is being overwritten

            tee.set_out_file(log_file, exp_name=experiment_name)

            try:

                model_file = glob_args.model_file

                uninitialized_exp_args.data.exp_global.commandline_args = vars(
                    args)

                # Create the model
                experiment = initialize_if_needed(uninitialized_exp_args)
                ParamManager.param_col.model_file = experiment.exp_global.model_file
                ParamManager.param_col.save_num_checkpoints = experiment.exp_global.save_num_checkpoints
                ParamManager.populate()

                # Run the experiment
                eval_scores = experiment(
                    save_fct=lambda: save_to_file(model_file, experiment))
                results.append((experiment_name, eval_scores))
                print_results(results)

            except Exception as e:
                file_logger.error(traceback.format_exc())
                raise e
            finally:
                tee.unset_out_file()
Пример #8
0
import xnmt.tee
from xnmt.param_collection import ParamManager
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file

EXP_DIR = os.path.dirname(__file__)
EXP = "programmatic-load"

model_file = f"{EXP_DIR}/models/{EXP}.mod"
log_file = f"{EXP_DIR}/logs/{EXP}.log"

xnmt.tee.set_out_file(log_file)

ParamManager.init_param_col()

load_experiment = LoadSerialized(
  filename=f"{EXP_DIR}/models/programmatic.mod",
  overwrite=[
    {"path" : "train", "val" : None}
  ]
)

uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP)
loaded_experiment = initialize_if_needed(uninitialized_experiment)

# if we were to continue training, we would need to set a save model file like this:
# ParamManager.param_col.model_file = model_file
ParamManager.populate()

# run experiment
loaded_experiment(save_fct=lambda: save_to_file(model_file, loaded_experiment))