def main(): args = utils.parse_args(create_parser()) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() engine = inflect.engine() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("train") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda x: list(map(Dialog.from_json, x)), utils.load_json) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) processor = datasets.DialogProcessor(sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, tokenizer="space", max_len=30), boc=True, eoc=True, state_order="randomized", max_len=30) processor.prepare_vocabs( list(itertools.chain(train_data, valid_data, test_data))) utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) logger.info("preparing model...") utils.save_json(utils.load_yaml(args.gen_model_path), save_dir.joinpath("model.json")) torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.gen_model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() utils.report_model(logger, model) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) def create_scheduler(s): return utils.PiecewiseScheduler( [utils.Coordinate(*t) for t in eval(s)]) save_dir = pathlib.Path(args.save_dir) train_args = train.TrainArguments( model=model, train_data=tuple(train_data), valid_data=tuple(valid_data), processor=processor, device=device, save_dir=save_dir, report_every=args.report_every, batch_size=args.batch_size, valid_batch_size=args.valid_batch_size, optimizer=args.optimizer, gradient_clip=args.gradient_clip, l2norm_weight=args.l2norm_weight, learning_rate=args.learning_rate, num_epochs=args.epochs, kld_schedule=(utils.ConstantScheduler(1.0) if args.kld_schedule is None else create_scheduler(args.kld_schedule)), dropout_schedule=(utils.ConstantScheduler(1.0) if args.dropout_schedule is None else create_scheduler(args.dropout_schedule)), validate_every=args.validate_every, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, disable_kl=args.disable_kl, kl_mode=args.kl_mode) utils.save_json(train_args.to_json(), save_dir.joinpath("train-args.json")) record = train.train(train_args) utils.save_json(record.to_json(), save_dir.joinpath("final-summary.json")) eval_dir = save_dir.joinpath("eval") shell.mkdir(eval_dir, silent=True) eval_data = dict( list( filter(None, [ ("train", train_data) if "train" in args.eval_splits else None, ("dev", valid_data) if "dev" in args.eval_splits else None, ("test", test_data) if "test" in args.eval_splits else None ]))) for split, data in eval_data.items(): eval_args = evaluate.EvaluateArugments( model=model, train_data=tuple(train_data), test_data=tuple(data), processor=processor, embed_type=args.embed_type, embed_path=args.embed_path, device=device, batch_size=args.valid_batch_size, beam_size=args.beam_size, max_conv_len=args.max_conv_len, max_sent_len=args.max_sent_len) utils.save_json(eval_args.to_json(), eval_dir.joinpath(f"eval-{split}-args.json")) eval_results = evaluate.evaluate(eval_args) save_path = eval_dir.joinpath(f"eval-{split}.json") utils.save_json(eval_results, save_path) logger.info(f"'{split}' results saved to {save_path}") logger.info(f"will run {args.gen_runs} generation trials...") gen_summary = [] dst_summary = [] for gen_idx in range(1, args.gen_runs + 1): logger.info(f"running {engine.ordinal(gen_idx)} generation trial...") gen_dir = save_dir.joinpath(f"gen-{gen_idx:03d}") shell.mkdir(gen_dir, silent=True) gen_args = generate.GenerateArguments( model=model, processor=processor, data=train_data, instances=int(round(len(train_data) * args.multiplier)), batch_size=args.valid_batch_size, conv_scale=args.conv_scale, spkr_scale=args.spkr_scale, goal_scale=args.goal_scale, state_scale=args.state_scale, sent_scale=args.sent_scale, validate_dst=True, validate_unique=args.validate_unique, device=device) utils.save_json(gen_args.to_json(), gen_dir.joinpath("gen-args.json")) with torch.no_grad(): samples = generate.generate(gen_args) utils.save_json([sample.output.to_json() for sample in samples], gen_dir.joinpath("gen-out.json")) utils.save_json([sample.input.to_json() for sample in samples], gen_dir.joinpath("gen-in.json")) utils.save_lines([str(sample.log_prob) for sample in samples], gen_dir.joinpath("logprob.txt")) da_data = [sample.output for sample in samples] data = {"train": train_data, "dev": valid_data, "test": test_data} data["train"] += da_data # convert dialogs to dst dialogs data = { split: list(map(datasets.DSTDialog.from_dialog, dialogs)) for split, dialogs in data.items() } for split, dialogs in data.items(): logger.info(f"verifying '{split}' dataset...") for dialog in dialogs: dialog.compute_user_goals() dialog.validate() logger.info("preparing dst environment...") dst_processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) dst_processor.prepare_vocabs(list(itertools.chain(*data.values()))) train_dataset = dst_datasets.DSTDialogDataset(dialogs=data["train"], processor=dst_processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.dst_batch_size, shuffle=True, pin_memory=True) dev_dataloader = dst_run.TestDataloader( dialogs=data["dev"], processor=dst_processor, max_batch_size=args.dst_batch_size) test_dataloader = dst_run.TestDataloader( dialogs=data["test"], processor=dst_processor, max_batch_size=args.dst_batch_size) logger.info("saving dst processor object...") utils.save_pickle(dst_processor, gen_dir.joinpath("processor.pkl")) torchmodels.register_packages(dst_models) dst_model_cls = torchmodels.create_model_cls(dst_pkg, args.dst_model_path) dst_model = dst_model_cls(dst_processor.vocabs) dst_model = dst_model.to(device) logger.info(str(model)) logger.info(f"number of parameters DST: " f"{utils.count_parameters(dst_model):,d}") logger.info(f"running {args.dst_runs} trials...") all_results = [] for idx in range(1, args.dst_runs + 1): logger.info(f"running {engine.ordinal(idx)} dst trial...") trial_dir = gen_dir.joinpath(f"dst-{idx:03d}") logger.info("resetting parameters...") dst_model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=dst_model, processor=dst_processor, device=device, save_dir=trial_dir, epochs=int(round(args.dst_epochs / (1 + args.multiplier))), loss="sum", l2norm=args.dst_l2norm, gradient_clip=args.dst_gradient_clip, train_validate=False, early_stop=True, early_stop_criterion="joint-goal", early_stop_patience=None, asr_method="scaled", asr_sigmoid_sum_order="sigmoid-sum", asr_topk=5) logger.info("commencing training...") record = runner.train(train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), trial_dir.joinpath("summary.json")) if not args.dst_test_asr: logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) else: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) utils.save_json(eval_results, trial_dir.joinpath("eval.json")) all_results.append(eval_results) dst_summary.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) logger.info("aggregated results: ") agg_summary = pprint.pformat( {k: v["stats"]["mean"] for k, v in summary.items()}) logger.info(pprint.pformat(agg_summary)) gen_summary.append(agg_summary) utils.save_json(summary, gen_dir.joinpath("summary.json")) gen_summary = reduce_json(gen_summary) dst_summary = reduce_json(dst_summary) logger.info(f"aggregating generation trials ({args.gen_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in gen_summary.items()})) logger.info(f"aggregating dst trials ({args.gen_runs * args.dst_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in dst_summary.items()})) utils.save_json(gen_summary, save_dir.joinpath("gen-summary.json")) utils.save_json(dst_summary, save_dir.joinpath("dst-summary.json")) logger.info("done!")
def main(): args = utils.parse_args(create_parser()) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("train") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda x: list(map(Dialog.from_json, x)), utils.load_json) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) processor = datasets.DialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, tokenizer="space", max_len=30 ), boc=True, eoc=True, state_order="randomized", max_len=30 ) processor.prepare_vocabs( list(itertools.chain(train_data, valid_data, test_data))) utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) logger.info("preparing model...") utils.save_json(utils.load_yaml(args.model_path), save_dir.joinpath("model.json")) torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() utils.report_model(logger, model) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) def create_scheduler(s): return utils.PiecewiseScheduler([utils.Coordinate(*t) for t in eval(s)]) save_dir = pathlib.Path(args.save_dir) train_args = train.TrainArguments( model=model, train_data=tuple(train_data), valid_data=tuple(valid_data), processor=processor, device=device, save_dir=save_dir, report_every=args.report_every, batch_size=args.batch_size, valid_batch_size=args.valid_batch_size, optimizer=args.optimizer, gradient_clip=args.gradient_clip, l2norm_weight=args.l2norm_weight, learning_rate=args.learning_rate, num_epochs=args.epochs, kld_schedule=(utils.ConstantScheduler(1.0) if args.kld_schedule is None else create_scheduler(args.kld_schedule)), dropout_schedule=(utils.ConstantScheduler(1.0) if args.dropout_schedule is None else create_scheduler(args.dropout_schedule)), validate_every=args.validate_every, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, disable_kl=args.disable_kl, kl_mode=args.kl_mode ) utils.save_json(train_args.to_json(), save_dir.joinpath("train-args.json")) record = train.train(train_args) utils.save_json(record.to_json(), save_dir.joinpath("final-summary.json")) eval_dir = save_dir.joinpath("eval") shell.mkdir(eval_dir, silent=True) eval_data = dict(list(filter(None, [ ("train", train_data) if "train" in args.eval_splits else None, ("dev", valid_data) if "dev" in args.eval_splits else None, ("test", test_data) if "test" in args.eval_splits else None ]))) for split, data in eval_data.items(): eval_args = evaluate.EvaluateArugments( model=model, train_data=tuple(train_data), test_data=tuple(data), processor=processor, embed_type=args.embed_type, embed_path=args.embed_path, device=device, batch_size=args.valid_batch_size, beam_size=args.beam_size, max_conv_len=args.max_conv_len, max_sent_len=args.max_sent_len ) utils.save_json(eval_args.to_json(), eval_dir.joinpath(f"eval-{split}-args.json")) with torch.no_grad(): eval_results = evaluate.evaluate(eval_args) save_path = eval_dir.joinpath(f"eval-{split}.json") utils.save_json(eval_results, save_path) logger.info(f"'{split}' results saved to {save_path}") logger.info("done!")
def main(): parser = create_parser() args = utils.parse_args(parser) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) logger = logging.getLogger("multirun") save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") save_dir.mkdir(exist_ok=True, parents=True) utils.save_yaml(vars(args), save_dir.joinpath("args.yml")) logger.info("preparing dataset...") data_dir = pathlib.Path(args.data_dir) data = { split: utils.load_json(data_dir.joinpath(f"{split}.json")) for split in ("train", "dev", "test") } data = { split: [ datasets.DSTDialog.from_dialog(datasets.Dialog.from_json(d)) for d in dialogs ] for split, dialogs in data.items() } logger.info("verifying dataset...") for split, dialogs in data.items(): for dialog in dialogs: dialog.validate() processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) processor.prepare_vocabs( list(itertools.chain(*(data["train"], data["dev"], data["test"])))) logger.info("saving processor object...") utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) train_dataset = dst_datasets.DSTDialogDataset(dialogs=data["train"], processor=processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) dev_dataloader = dst_run.TestDataloader(dialogs=data["dev"], processor=processor, max_batch_size=args.batch_size) test_dataloader = dst_run.TestDataloader(dialogs=data["test"], processor=processor, max_batch_size=args.batch_size) logger.info("preparing model...") torchmodels.register_packages(models) torchmodels.register_packages(dst_models) model_cls = torchmodels.create_model_cls(dst, args.model_path) model: dst.AbstractDialogStateTracker = model_cls(processor.vocabs) if args.gpu is None: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) logger.info(str(model)) logger.info(f"number of parameters: {utils.count_parameters(model):,d}") logger.info(f"running {args.runs} trials...") all_results = [] for idx in range(args.runs): logger.info(f"running trial-{idx + 1}...") run_save_dir = save_dir.joinpath(f"run-{idx + 1:03d}") logger.info("resetting parameters...") model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=model, processor=processor, device=device, save_dir=run_save_dir, epochs=args.epochs, scheduler=(None if not args.scheduled_lr else functools.partial( getattr(op.lr_scheduler, args.scheduler_cls), **json.loads(args.scheduler_kwargs))), loss=args.loss, l2norm=args.l2norm, gradient_clip=args.gradient_clip, train_validate=args.train_validate, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, asr_method=args.asr_method, asr_sigmoid_sum_order=args.asr_sigmoid_sum_order, asr_topk=args.asr_topk) logger.info("commencing training...") record = runner.train( train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=runner.test_asr if args.validate_asr else None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), run_save_dir.joinpath("summary-final.json")) logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) if args.test_asr: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) eval_results["criterion"] = record.value logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) if args.save_ckpt: logger.info("saving checkpoint...") torch.save({k: v.cpu() for k, v in model.state_dict().items()}, run_save_dir.joinpath("ckpt.pth")) logger.info("done!") utils.save_json(eval_results, run_save_dir.joinpath("eval.json")) all_results.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) pprint.pprint({k: v["stats"]["mean"] for k, v in summary.items()}) utils.save_json(summary, save_dir.joinpath("summary.json")) logger.info("done!")
def main(args=None): args = utils.parse_args(create_parser(), args) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() engine = inflect.engine() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("gda") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda data: list(map(Dialog.from_json, data)), utils.load_json) processor = utils.load_pickle(args.processor_path) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) data = {"train": train_data, "dev": valid_data, "test": test_data} logger.info("preparing model...") torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.gen_model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() ckpt = torch.load(args.ckpt_path) model.load_state_dict(ckpt) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) logger.info(f"will run {args.gen_runs} generation trials...") gen_summary = [] dst_summary = [] for gen_idx in range(1, args.gen_runs + 1): logger.info(f"running {engine.ordinal(gen_idx)} generation trial...") gen_dir = save_dir.joinpath(f"gen-{gen_idx:03d}") shell.mkdir(gen_dir, silent=True) gen_args = generate.GenerateArguments( model=model, processor=processor, data=tuple(train_data), instances=int(round(len(train_data) * args.multiplier)), batch_size=args.gen_batch_size, conv_scale=args.conv_scale, spkr_scale=args.spkr_scale, goal_scale=args.goal_scale, state_scale=args.state_scale, sent_scale=args.sent_scale, validate_dst=True, validate_unique=args.validate_unique, device=device) utils.save_json(gen_args.to_json(), gen_dir.joinpath("args.json")) with torch.no_grad(): samples = generate.generate(gen_args) utils.save_json([sample.output.to_json() for sample in samples], gen_dir.joinpath("out.json")) utils.save_json([sample.input.to_json() for sample in samples], gen_dir.joinpath("in.json")) utils.save_lines([str(sample.log_prob) for sample in samples], gen_dir.joinpath("logprob.txt")) da_data = [sample.output for sample in samples] gen_data = { "train": data["train"] + da_data, "dev": data["dev"], "test": data["test"] } # convert dialogs to dst dialogs gen_data = { split: list(map(datasets.DSTDialog.from_dialog, dialogs)) for split, dialogs in gen_data.items() } for split, dialogs in gen_data.items(): logger.info(f"verifying '{split}' dataset...") for dialog in dialogs: dialog.compute_user_goals() dialog.validate() logger.info("preparing dst environment...") dst_processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) dst_processor.prepare_vocabs(list(itertools.chain(*gen_data.values()))) train_dataset = dst_datasets.DSTDialogDataset( dialogs=gen_data["train"], processor=dst_processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.dst_batch_size, shuffle=True, pin_memory=True) dev_dataloader = dst_run.TestDataloader( dialogs=gen_data["dev"], processor=dst_processor, max_batch_size=args.dst_batch_size) test_dataloader = dst_run.TestDataloader( dialogs=gen_data["test"], processor=dst_processor, max_batch_size=args.dst_batch_size) logger.info("saving dst processor object...") utils.save_pickle(dst_processor, gen_dir.joinpath("processor.pkl")) torchmodels.register_packages(dst_models) dst_model_cls = torchmodels.create_model_cls(dst_pkg, args.dst_model_path) dst_model = dst_model_cls(dst_processor.vocabs) dst_model = dst_model.to(device) logger.info(str(model)) logger.info(f"number of parameters DST: " f"{utils.count_parameters(dst_model):,d}") logger.info(f"will run {args.dst_runs} trials...") all_results = [] for idx in range(1, args.dst_runs + 1): logger.info(f"running {engine.ordinal(idx)} dst trial...") trial_dir = gen_dir.joinpath(f"dst-{idx:03d}") logger.info("resetting parameters...") dst_model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=dst_model, processor=dst_processor, device=device, save_dir=trial_dir, epochs=int(round(args.epochs / (1 + args.multiplier))), loss="sum", l2norm=args.l2norm, gradient_clip=args.gradient_clip, train_validate=False, early_stop=True, early_stop_criterion="joint-goal", early_stop_patience=None, asr_method="scaled", asr_sigmoid_sum_order="sigmoid-sum", asr_topk=5) logger.info("commencing training...") record = runner.train(train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), trial_dir.joinpath("summary.json")) if not args.test_asr: logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) else: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) eval_results["criterion"] = record.value logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) utils.save_json(eval_results, trial_dir.joinpath("eval.json")) all_results.append(eval_results) dst_summary.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) logger.info("aggregated results: ") agg_results = {k: v["stats"]["mean"] for k, v in summary.items()} gen_summary.append(agg_results) logger.info(pprint.pformat(agg_results)) utils.save_json(summary, gen_dir.joinpath("summary.json")) gen_summary = reduce_json(gen_summary) dst_summary = reduce_json(dst_summary) logger.info(f"aggregating generation trials ({args.gen_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in gen_summary.items()})) logger.info(f"aggregating dst trials ({args.gen_runs * args.dst_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in dst_summary.items()})) utils.save_json(gen_summary, save_dir.joinpath("gen-summary.json")) utils.save_json(dst_summary, save_dir.joinpath("dst-summary.json")) logger.info("done!")