def test_placeholder_loadserialized(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f""" a: !LoadSerialized filename: '{{EXP_DIR}}/{{EXP}}.yaml' """) YamlPreloader.preload_obj(test_obj, exp_name = "tmp1", exp_dir=self.out_dir)
def test_inconsistent_loadserialized(self): with open(f"{self.out_dir}/tmp1.yaml", "w") as f_out: yaml.dump(DummyClass(arg1="v1"), f_out) test_obj = yaml.load(f""" a: !LoadSerialized filename: {self.out_dir}/tmp1.yaml bad_arg: 1 """) with self.assertRaises(ValueError): YamlPreloader.preload_obj(test_obj, "SOME_EXP_NAME", "SOME_EXP_DIR")
def main() -> None: parser = argparse.ArgumentParser() utils.add_dynet_argparse(parser) parser.add_argument("--src", help=f"Path of source file to read from.", required=True) parser.add_argument("--hyp", help="Path of file to write hypothesis to.", required=True) parser.add_argument("--mod", help="Path of model file to read.", required=True) args = parser.parse_args() exp_dir = os.path.dirname(__file__) exp = "{EXP}" param_collections.ParamManager.init_param_col() # TODO: can we avoid the LoadSerialized proxy and load stuff directly? load_experiment = LoadSerialized(filename=args.mod) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=exp_dir, exp_name=exp) loaded_experiment = initialize_if_needed(uninitialized_experiment) model = loaded_experiment.model inference = model.inference param_collections.ParamManager.populate() decoding_task = tasks.DecodingEvalTask(args.src, args.hyp, model, inference) decoding_task.eval()
def setUp(self): events.clear() ParamManager.init_param_col() # Load a pre-trained model load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model", overwrite=[ { "path": "train", "val": None }, { "path": "status", "val": None }, ]) EXP_DIR = '.' EXP = "decode" uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP) loaded_experiment = initialize_if_needed(uninitialized_experiment) ParamManager.populate() # Pull out the parts we need from the experiment self.model = loaded_experiment.model src_vocab = self.model.src_reader.vocab trg_vocab = self.model.trg_reader.vocab event_trigger.set_train(False) self.src_data = list( self.model.src_reader.read_sents("test/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("test/data/head.en"))
import xnmt.tee from xnmt.param_collection import ParamManager from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file EXP_DIR = os.path.dirname(__file__) EXP = "programmatic-load" model_file = f"{EXP_DIR}/models/{EXP}.mod" log_file = f"{EXP_DIR}/logs/{EXP}.log" xnmt.tee.set_out_file(log_file) ParamManager.init_param_col() load_experiment = LoadSerialized( filename=f"{EXP_DIR}/models/programmatic.mod", overwrite=[ {"path" : "train", "val" : None} ] ) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP) loaded_experiment = initialize_if_needed(uninitialized_experiment) # if we were to continue training, we would need to set a save model file like this: # ParamManager.param_col.model_file = model_file ParamManager.populate() # run experiment loaded_experiment(save_fct=lambda: save_to_file(model_file, loaded_experiment))
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file parser = argparse.ArgumentParser() parser.add_argument("filename") parser.add_argument("output_vocab") parser.add_argument("output_embed") parser.add_argument("--embedding", choices=["src", "trg"], default="src") args = parser.parse_args() ParamManager.init_param_col() load_experiment = LoadSerialized( filename=args.filename, ) uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir="/tmp/dummy", exp_name="dummy") experiment = initialize_if_needed(uninitialized_experiment) if args.embedding == "src": vocab = experiment.model.src_reader.vocab tensor = experiment.model.src_embedder.embeddings else: vocab = experiment.model.trg_reader.vocab tensor = experiment.model.trg_embedder.embeddings with open(args.output_vocab, mode="w") as fp: for word in vocab.i2w: print(word, file=fp) with open(args.output_embed, mode="w") as fp: