def create_trainer( config_dict: Dict[str, Any], output: Path, ): # config config = Config.from_dict(config_dict) config.add_git_info() assert_config(config) output.mkdir(exist_ok=True, parents=True) with (output / "config.yaml").open(mode="w") as f: yaml.safe_dump(config.to_dict(), f) # model predictor = create_predictor(config.network) model = Model( loss_config=config.loss, predictor=predictor, local_padding_size=config.dataset.local_padding_size, ) if config.train.weight_initializer is not None: init_weights(model, name=config.train.weight_initializer) device = torch.device("cuda") model.to(device) # dataset _create_iterator = partial( create_iterator, batch_size=config.train.batchsize, eval_batch_size=config.train.eval_batchsize, num_processes=config.train.num_processes, use_multithread=config.train.use_multithread, ) datasets = create_dataset(config.dataset) train_iter = _create_iterator(datasets["train"], for_train=True, for_eval=False) test_iter = _create_iterator(datasets["test"], for_train=False, for_eval=False) eval_iter = _create_iterator(datasets["eval"], for_train=False, for_eval=True) valid_iter = None if datasets["valid"] is not None: valid_iter = _create_iterator(datasets["valid"], for_train=False, for_eval=True) warnings.simplefilter("error", MultiprocessIterator.TimeoutWarning) # optimizer cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop("name").lower() optimizer: Optimizer if n == "adam": optimizer = optim.Adam(model.parameters(), **cp) elif n == "sgd": optimizer = optim.SGD(model.parameters(), **cp) else: raise ValueError(n) # updater if not config.train.use_amp: updater = StandardUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) else: updater = AmpUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) # trainer trigger_log = (config.train.log_iteration, "iteration") trigger_eval = (config.train.eval_iteration, "iteration") trigger_snapshot = (config.train.snapshot_iteration, "iteration") trigger_stop = ((config.train.stop_iteration, "iteration") if config.train.stop_iteration is not None else None) trainer = Trainer(updater, stop_trigger=trigger_stop, out=output) shift_ext = None if config.train.linear_shift is not None: shift_ext = extensions.LinearShift(**config.train.linear_shift) if config.train.step_shift is not None: shift_ext = extensions.StepShift(**config.train.step_shift) if shift_ext is not None: trainer.extend(shift_ext) ext = extensions.Evaluator(test_iter, model, device=device) trainer.extend(ext, name="test", trigger=trigger_log) generator = Generator( config=config, predictor=predictor, use_gpu=True, max_batch_size=(config.train.eval_batchsize if config.train.eval_batchsize is not None else config.train.batchsize), use_fast_inference=False, ) generate_evaluator = GenerateEvaluator( generator=generator, time_length=config.dataset.time_length_evaluate, local_padding_time_length=config.dataset. local_padding_time_length_evaluate, ) ext = extensions.Evaluator(eval_iter, generate_evaluator, device=device) trainer.extend(ext, name="eval", trigger=trigger_eval) if valid_iter is not None: ext = extensions.Evaluator(valid_iter, generate_evaluator, device=device) trainer.extend(ext, name="valid", trigger=trigger_eval) if config.train.stop_iteration is not None: saving_model_num = int(config.train.stop_iteration / config.train.eval_iteration / 10) else: saving_model_num = 10 ext = extensions.snapshot_object( predictor, filename="predictor_{.updater.iteration}.pth", n_retains=saving_model_num, ) trainer.extend( ext, trigger=LowValueTrigger("eval/main/mcd", trigger=trigger_eval), ) trainer.extend(extensions.FailOnNonNumber(), trigger=trigger_log) trainer.extend(extensions.observe_lr(), trigger=trigger_log) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend( extensions.PrintReport(["iteration", "main/loss", "test/main/loss"]), trigger=trigger_log, ) trainer.extend(TensorboardReport(writer=SummaryWriter(Path(output))), trigger=trigger_log) if config.project.category is not None: ext = WandbReport( config_dict=config.to_dict(), project_category=config.project.category, project_name=config.project.name, output_dir=output.joinpath("wandb"), ) trainer.extend(ext, trigger=trigger_log) (output / "struct.txt").write_text(repr(model)) if trigger_stop is not None: trainer.extend(extensions.ProgressBar(trigger_stop)) ext = extensions.snapshot_object( trainer, filename="trainer_{.updater.iteration}.pth", n_retains=1, autoload=True, ) trainer.extend(ext, trigger=trigger_snapshot) return trainer
def create_trainer( config_dict: Dict[str, Any], output: Path, ): # config config = Config.from_dict(config_dict) config.add_git_info() output.mkdir(exist_ok=True, parents=True) with (output / "config.yaml").open(mode="w") as f: yaml.safe_dump(config.to_dict(), f) # model networks = create_network(config.network) model = Model(model_config=config.model, networks=networks) if config.train.weight_initializer is not None: init_weights(model, name=config.train.weight_initializer) device = torch.device("cuda") if config.train.use_gpu else torch.device( "cpu") model.to(device) # dataset _create_iterator = partial( create_iterator, batch_size=config.train.batch_size, eval_batch_size=config.train.eval_batch_size, num_processes=config.train.num_processes, use_multithread=config.train.use_multithread, ) datasets = create_dataset(config.dataset) train_iter = _create_iterator(datasets["train"], for_train=True) test_iter = _create_iterator(datasets["test"], for_train=False) eval_iter = _create_iterator(datasets["eval"], for_train=False) warnings.simplefilter("error", MultiprocessIterator.TimeoutWarning) # optimizer optimizer = make_optimizer(config_dict=config.train.optimizer, model=model) # updater if not config.train.use_amp: updater = StandardUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) else: updater = AmpUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) # trainer trigger_log = (config.train.log_iteration, "iteration") trigger_eval = (config.train.eval_iteration, "iteration") trigger_snapshot = (config.train.snapshot_iteration, "iteration") trigger_stop = ((config.train.stop_iteration, "iteration") if config.train.stop_iteration is not None else None) trainer = Trainer(updater, stop_trigger=trigger_stop, out=output) ext = extensions.Evaluator(test_iter, model, device=device) trainer.extend(ext, name="test", trigger=trigger_log) if config.train.stop_iteration is not None: saving_model_num = int(config.train.stop_iteration / config.train.eval_iteration / 10) else: saving_model_num = 10 ext = extensions.snapshot_object( networks.predictor, filename="predictor_{.updater.iteration}.pth", n_retains=saving_model_num, ) trainer.extend( ext, trigger=LowValueTrigger("test/main/loss", trigger=trigger_eval), ) trainer.extend(extensions.FailOnNonNumber(), trigger=trigger_log) trainer.extend(extensions.observe_lr(), trigger=trigger_log) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend( extensions.PrintReport(["iteration", "main/loss", "test/main/loss"]), trigger=trigger_log, ) ext = TensorboardReport(writer=SummaryWriter(Path(output))) trainer.extend(ext, trigger=trigger_log) if config.project.category is not None: ext = WandbReport( config_dict=config.to_dict(), project_category=config.project.category, project_name=config.project.name, output_dir=output.joinpath("wandb"), ) trainer.extend(ext, trigger=trigger_log) (output / "struct.txt").write_text(repr(model)) if trigger_stop is not None: trainer.extend(extensions.ProgressBar(trigger_stop)) ext = extensions.snapshot_object( trainer, filename="trainer_{.updater.iteration}.pth", n_retains=1, autoload=True, ) trainer.extend(ext, trigger=trigger_snapshot) return trainer
def create_trainer( config_dict: Dict[str, Any], output: Path, ): # config config = Config.from_dict(config_dict) config.add_git_info() output.mkdir(exist_ok=True, parents=True) with (output / "config.yaml").open(mode="w") as f: yaml.safe_dump(config.to_dict(), f) # model device = torch.device("cuda") predictor = create_predictor(config.network) model = Model( model_config=config.model, predictor=predictor, local_padding_length=config.dataset.local_padding_length, ) init_weights(model, "orthogonal") model.to(device) # dataset _create_iterator = partial( create_iterator, batch_size=config.train.batchsize, eval_batch_size=config.train.eval_batchsize, num_processes=config.train.num_processes, use_multithread=config.train.use_multithread, ) datasets = create_dataset(config.dataset) train_iter = _create_iterator(datasets["train"], for_train=True, for_eval=False) test_iter = _create_iterator(datasets["test"], for_train=False, for_eval=False) eval_iter = _create_iterator(datasets["eval"], for_train=False, for_eval=True) warnings.simplefilter("error", MultiprocessIterator.TimeoutWarning) # optimizer cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop("name").lower() optimizer: Optimizer if n == "adam": optimizer = optim.Adam(model.parameters(), **cp) elif n == "sgd": optimizer = optim.SGD(model.parameters(), **cp) else: raise ValueError(n) # updater use_amp = config.train.use_amp if config.train.use_amp is not None else amp_exist if use_amp: updater = AmpUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) else: updater = StandardUpdater( iterator=train_iter, optimizer=optimizer, model=model, device=device, ) # trainer trigger_log = (config.train.log_iteration, "iteration") trigger_eval = (config.train.eval_iteration, "iteration") trigger_stop = ((config.train.stop_iteration, "iteration") if config.train.stop_iteration is not None else None) trainer = Trainer(updater, stop_trigger=trigger_stop, out=output) writer = SummaryWriter(Path(output)) # # error at randint # sample_data = datasets["train"][0] # writer.add_graph( # model, # input_to_model=( # sample_data["wave"].unsqueeze(0).to(device), # sample_data["local"].unsqueeze(0).to(device), # sample_data["speaker_id"].unsqueeze(0).to(device) # if predictor.with_speaker # else None, # ), # ) if config.train.multistep_shift is not None: trainer.extend( extensions.MultistepShift(**config.train.multistep_shift)) if config.train.step_shift is not None: trainer.extend(extensions.StepShift(**config.train.step_shift)) ext = extensions.Evaluator(test_iter, model, device=device) trainer.extend(ext, name="test", trigger=trigger_log) generator = Generator( config=config, noise_schedule_config=NoiseScheduleModelConfig(start=1e-4, stop=0.05, num=50), predictor=predictor, sampling_rate=config.dataset.sampling_rate, use_gpu=True, ) generate_evaluator = GenerateEvaluator( generator=generator, local_padding_time_second=config.dataset. evaluate_local_padding_time_second, ) ext = extensions.Evaluator(eval_iter, generate_evaluator, device=device) trainer.extend(ext, name="eval", trigger=trigger_eval) if config.train.stop_iteration is not None: saving_model_num = int(config.train.stop_iteration / config.train.eval_iteration / 10) else: saving_model_num = 10 ext = extensions.snapshot_object( predictor, filename="predictor_{.updater.iteration}.pth", n_retains=saving_model_num, ) trainer.extend( ext, trigger=LowValueTrigger("eval/main/mcd", trigger=trigger_eval), ) trainer.extend(extensions.FailOnNonNumber(), trigger=trigger_log) trainer.extend(extensions.observe_lr(), trigger=trigger_log) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend( extensions.PrintReport(["iteration", "main/loss", "test/main/loss"]), trigger=trigger_log, ) trainer.extend(ext, trigger=TensorboardReport(writer=writer)) if config.project.category is not None: ext = WandbReport( config_dict=config.to_dict(), project_category=config.project.category, project_name=config.project.name, output_dir=output.joinpath("wandb"), ) trainer.extend(ext, trigger=trigger_log) (output / "struct.txt").write_text(repr(model)) if trigger_stop is not None: trainer.extend(extensions.ProgressBar(trigger_stop)) ext = extensions.snapshot_object( trainer, filename="trainer_{.updater.iteration}.pth", n_retains=1, autoload=True, ) trainer.extend(ext, trigger=trigger_eval) return trainer
def create_trainer( config_dict: Dict[str, Any], output: Path, ): # config config = Config.from_dict(config_dict) config.add_git_info() output.mkdir(exist_ok=True, parents=True) with (output / "config.yaml").open(mode="w") as f: yaml.safe_dump(config.to_dict(), f) # model predictor = create_predictor(config.network) model = Model(model_config=config.model, predictor=predictor) if config.train.weight_initializer is not None: init_weights(model, name=config.train.weight_initializer) device = torch.device("cuda") model.to(device) # dataset _create_iterator = partial( create_iterator, batch_size=config.train.batch_size, num_processes=config.train.num_processes, use_multithread=config.train.use_multithread, ) datasets = create_dataset(config.dataset) train_iter = _create_iterator(datasets["train"], for_train=True) test_iter = _create_iterator(datasets["test"], for_train=False) warnings.simplefilter("error", MultiprocessIterator.TimeoutWarning) # optimizer cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop("name").lower() optimizer: Optimizer if n == "adam": optimizer = optim.Adam(model.parameters(), **cp) elif n == "sgd": optimizer = optim.SGD(model.parameters(), **cp) else: raise ValueError(n) # updater updater = StandardUpdater( iterator=train_iter, optimizer=optimizer, model=model, converter=list_concat, device=device, ) # trainer trigger_log = (config.train.log_iteration, "iteration") trigger_eval = (config.train.snapshot_iteration, "iteration") trigger_stop = ((config.train.stop_iteration, "iteration") if config.train.stop_iteration is not None else None) trainer = Trainer(updater, stop_trigger=trigger_stop, out=output) writer = SummaryWriter(Path(output)) sample_data = datasets["train"][0] writer.add_graph( model, input_to_model=( [sample_data["f0"].to(device)], [sample_data["phoneme"].to(device)], [sample_data["phoneme_list"].to(device)], ([sample_data["speaker_id"].to(device)] if predictor.with_speaker else None), ), ) ext = extensions.Evaluator(test_iter, model, converter=list_concat, device=device) trainer.extend(ext, name="test", trigger=trigger_log) if config.train.stop_iteration is not None: saving_model_num = int(config.train.stop_iteration / config.train.snapshot_iteration / 10) else: saving_model_num = 10 ext = extensions.snapshot_object( predictor, filename="predictor_{.updater.iteration}.pth", n_retains=saving_model_num, ) trainer.extend( ext, trigger=LowValueTrigger("test/main/loss", trigger=trigger_eval), ) trainer.extend(extensions.FailOnNonNumber(), trigger=trigger_log) trainer.extend(extensions.observe_lr(), trigger=trigger_log) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend( extensions.PrintReport(["iteration", "main/loss", "test/main/loss"]), trigger=trigger_log, ) ext = TensorboardReport(writer=writer) trainer.extend(ext, trigger=trigger_log) if config.project.category is not None: ext = WandbReport( config_dict=config.to_dict(), project_category=config.project.category, project_name=config.project.name, output_dir=output.joinpath("wandb"), ) trainer.extend(ext, trigger=trigger_log) (output / "struct.txt").write_text(repr(model)) if trigger_stop is not None: trainer.extend(extensions.ProgressBar(trigger_stop)) ext = extensions.snapshot_object( trainer, filename="trainer_{.updater.iteration}.pth", n_retains=1, autoload=True, ) trainer.extend(ext, trigger=trigger_eval) return trainer