def synth(self, hparams: ExtendedHParams, ids_input: Union[str, List[str], Tuple[str, ...], os.PathLike], post_processing_mapping: Dict[str, str] = None, plotter_configs: List[DataPlotter.Config] = None): if post_processing_mapping is None: post_processing_mapping = { "pred_acoustic_features": "cmp_features", "acoustic_features": "cmp_features" } if plotter_configs is None: plotter_configs = AcousticModelTrainer._get_legacy_plotter_configs( hparams) if not hparams.has_value("synth_feature_names"): hparams = copy.deepcopy(hparams) hparams.add_hparam("synth_feature_names", ["pred_acoustic_features"]) return super().synth(hparams=hparams, ids_input=ids_input, post_processing_mapping=post_processing_mapping, plotter_configs=plotter_configs)
def _get_synth_dir(hparams: ExtendedHParams, use_model_name: bool = True, epoch: int = None, step: int = None) -> os.PathLike: if hparams.has_value("synth_dir"): save_dir = hparams.synth_dir else: if hparams.has_value("out_dir"): save_dir = [hparams.out_dir] else: save_dir = [os.path.curdir] if use_model_name and hparams.has_value("model_name"): save_dir.append(hparams.model_name) save_dir.append(Synthesiser.SYNTH_SUB_DIR) if epoch is not None: save_dir.append("e" + str(epoch)) elif step is not None: save_dir.append("s" + str(step)) save_dir = os.path.join(*save_dir) makedirs_safe(save_dir) logging.info("Selected {} as synthesis directory.".format(save_dir)) return save_dir
def __init__(self, hparams: ExtendedHParams, id_list: List[str]): if hparams is None: hparams = self.create_hparams() hparams.out_dir = os.path.curdir # super(AcousticModelTrainer, self).__init__(id_list=id_list, data_reader_configs=data_reader_configs, hparams=hparams) # Call ModelTrainer base class init directly. super().__init__(id_list=id_list, hparams=hparams) if hparams.scheduler_type == "default": hparams.scheduler_type = "Plateau" hparams.scheduler_args["verbose"] = True
def __init__(self, hparams: ExtendedHParams, id_list: List[str], data_reader_configs: List[DataReaderConfig] = None): if hparams is None: hparams = self.create_hparams() hparams.out_dir = os.path.curdir # Write missing default parameters. # if hparams.synth_dir is None: # hparams.synth_dir = os.path.join(hparams.out_dir, "synth") super().__init__(data_reader_configs=data_reader_configs, id_list=id_list, hparams=hparams) # in_to_out_multiplier = int(hparams.frame_rate_output_Hz / # (1000.0 / hparams.frame_size_ms)) # num_frames_per_sec = 1000.0 / hparams.frame_size_ms # # NOTE: Had to limit input length because of memory constraints. # max_frames_trainset = int(num_frames_per_sec * hparams.max_input_train_sec) \ # * in_to_out_multiplier # Multiply by number of seconds. # max_frames_testset = int(num_frames_per_sec * hparams.max_input_test_sec) \ # * in_to_out_multiplier # Ensure that test takes all frames. # self.dataset_train = LabelGensDataset( # self.id_list_train, self.InputGen, self.OutputGen, hparams, # random_select=True, max_frames_input=max_frames_trainset) # self.dataset_val = LabelGensDataset( # self.id_list_val, self.InputGen, self.OutputGen, hparams, # random_select=True, max_frames_input=max_frames_testset) # if self.loss_function is None: # if hparams.input_type == "mulaw-quantize": # self.loss_function = OneHotCrossEntropyLoss(reduction='none', shift=1) # else: # self.loss_function = DiscretizedMixturelogisticLoss(hparams.quantize_channels, # hparams.log_scale_min, # reduction='none', # hinge_loss=hparams.hinge_regularizer) if hparams.scheduler_type == "default": hparams.scheduler_type = "Noam" # hparams.scheduler_args['exponential_gamma'] = 0.99 hparams.scheduler_args['wormup_steps'] = 4000 # Override the collate and decollate methods of batches. self.batch_collate_fn = partial(self.prepare_batch, use_cond=hparams.use_cond, one_hot_target=True) self.batch_decollate_fn = self.decollate_network_output
def legacy_support_init(dir_world_features: os.PathLike, dir_question_labels: os.PathLike, id_list: List[str], num_questions: int, hparams: ExtendedHParams): """Get arguments for new init. :param dir_world_features: Path to the directory containing the world features. :param dir_question_labels: Path to the directory containing the question labels. :param id_list: List of ids, can contain a speaker directory. :param num_questions: Number of questions in question file (only needed for legacy code). :param hparams: Set of hyper parameters. """ data_reader_configs = [] from idiaptts.src.data_preparation.DataReaderConfig import DataReaderConfig data_reader_configs.append( DataReaderConfig(name="questions", feature_type="QuestionLabelGen", directory=dir_question_labels, features="questions", num_questions=num_questions, match_length=["cmp_features"])) # if hasattr(hparams, "add_deltas") and hparams.add_deltas: data_reader_configs.append( WorldFeatLabelGen.Config( name="cmp_features", # feature_type="WorldFeatLabelGen", directory=dir_world_features, features=["cmp_mcep" + str(hparams.num_coded_sps)], output_names=["acoustic_features"], add_deltas=hparams.add_deltas, num_coded_sps=hparams.num_coded_sps, num_bap=hparams.num_bap, sp_type=hparams.sp_type, requires_seq_mask=True, match_length=["questions"])) hparams.world_dir = dir_world_features # else: # # TODO: How to load them separately? # datareader_configs.append( # DataReader.Config( # name="cmp_features", # feature_type="WorldFeatLabelGen", # directory=dir_world_features, # features=["cmp_mcep" + str(hparams.num_coded_sps)], # output_names=["acoustic_features"], # add_deltas=hparams.add_deltas, # num_coded_sps=hparams.num_coded_sps, # num_bap=hparams.num_bap, # sp_type=hparams.sp_type, # requires_seq_mask=True # ) # ) return dict(data_reader_configs=data_reader_configs, hparams=hparams, id_list=id_list)
def init(self, hparams: ExtendedHParams, data_reader_configs: List[DataReader.Config] = None, model_config=None, loss_configs: List[NamedLoss.Config] = None) -> None: if model_config is None and hparams.has_value("model_type"): model_config = NamedForwardWrapper.Config( wrapped_model_config=rnn_dyn.convert_legacy_to_config( (hparams.num_questions, ), hparams), input_names="questions", batch_first=hparams.batch_first, name="AcousticModel", output_names="pred_acoustic_features") if loss_configs is None: loss_configs = [ NamedLoss.Config(name="MSELoss_acoustic_features", type_="MSELoss", seq_mask="acoustic_features_mask", input_names=[ "acoustic_features", "pred_acoustic_features" ], batch_first=hparams.batch_first) ] super().init(data_reader_configs=data_reader_configs, hparams=hparams, model_config=model_config, loss_configs=loss_configs) self.logger.info("AcousticModelTrainer ready.")
def __init__(self, hparams: ExtendedHParams, id_list: List[str], data_reader_configs: List[DataReader.Config] = None): if hparams is None: hparams = self.create_hparams() hparams.out_dir = os.path.curdir super(AcousticModelTrainer, self).__init__(data_reader_configs=data_reader_configs, id_list=id_list, hparams=hparams) if hparams.scheduler_type == "default": hparams.scheduler_type = "Plateau" hparams.add_hparams(plateau_verbose=True)
def __init__(self, id_list: List[str], datareaders: List, hparams: ExtendedHParams, is_train_set: bool = False, is_val_set: bool = False, is_test_set: bool = False): super().__init__() assert hparams.has_value("windowed_feature_names"), \ "Use hparams.windowed_feature_names to define the features to " \ "apply the windowing to. Those features have to match in length." self.id_list = id_list if is_train_set: self.batch_size = hparams.batch_size_train elif is_val_set: self.batch_size = hparams.batch_size_val elif is_test_set: self.batch_size = hparams.batch_size_test self.windowed_feature_names = hparams.windowed_feature_names self.window_size = hparams.get("window_size", 500) assert self.window_size > 1 self.step_size = hparams.get("step_size", 50) self.mem_copy = hparams.get("windower_mem_copy", False) self.allow_shorter_sequences = hparams.get("allow_shorter_sequences", True) self.random_offset = hparams.get("windower_random_offset", True) self.dataset = PyTorchDatareadersDataset(id_list, datareaders, hparams) self.length = None
def process_dataloader(self, dataloader: DataLoader, hparams: ExtendedHParams, total_epoch: int, total_steps: int, current_epoch: int = None, training: bool = True): if hparams.use_gpu: assert (hparams.num_gpus <= torch.cuda.device_count()), \ "Specified number of GPUs is incorrect." try: from torch.utils.tensorboard import SummaryWriter if hparams.has_value("tensorboard_dir"): tensorboard_dir = hparams.tensorboard_dir else: tensorboard_dir = os.path.join(hparams.out_dir, hparams.model_name, "tensorboard") tb_writer = SummaryWriter(log_dir=tensorboard_dir) except ImportError: tb_writer = None model = self.model if training: model.train() msg = "{}: Train with {} on ".format( datetime.now().strftime("%Y-%m-%d %H:%M:%S"), self.optimiser) if hparams.use_gpu: msg += str(torch.cuda.device_count()) + " GPU(s)." else: msg += "1 CPU." self.logger.info(msg), else: if self.ema is not None: self.logger.info("Using averaged model for validation.") model = self.ema.model model.eval() self.logger.info("{}: Compute loss of validation set.".format( datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) if hparams.log_memory_consumption: self.logger.info('CPU: {:.0f} MB, GPU: {} MB'.format( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1e3, str(get_gpu_memory_map()) if hparams.use_gpu else "-")) # Multi-GPU support. if hparams.num_gpus > 1: model = DataParallel(model, dim=0 if hparams.batch_first else 1) # Make the init_hidden method directly accessible. model.init_hidden = model.module.init_hidden # Log loss after each <hparams.logging_batch_index_perc>% of batches. logging_batch_index = (len(dataloader) // hparams.logging_batch_index_perc) + 1 total_losses = dict() # for params in reversed(list(self.model.parameters())): # params.retain_grad() for batch_index, batch in enumerate(dataloader): if hparams.use_gpu: batch = self._batch_to_gpu(batch, hparams.dataset_load_async) data_dict, lengths = batch batch_size = len(next(iter(lengths.values()))) model.init_hidden(batch_size) # Compute max length because DataParallel splits the seq_lengths_input and padding will be done according to # the maximum length of that subset. Combining multi GPU output will fail with a size miss match. # https://pytorch.org/docs/stable/notes/faq.html#pack-rnn-unpack-with-data-parallelism max_lengths = dict() for key in data_dict.keys(): if key in lengths: l_max = max(lengths[key]) if hparams.use_gpu and hparams.num_gpus > 1: l_max = l_max.repeat(hparams.num_gpus) max_lengths[key] = l_max # Give max length because DataParallel splits the seq_lengths_input and padding will be done according to # the maximum length of that subset. Combining multi GPU output will fail with a size miss match. # https://pytorch.org/docs/stable/notes/faq.html#pack-rnn-unpack-with-data-parallelism if training: model(data_dict, lengths, max_lengths) else: with torch.no_grad(): model(data_dict, lengths, max_lengths) losses = {} for loss_fn in self.losses: loss_ = loss_fn(data_dict, lengths, total_steps) for loss_name, l in loss_.items(): if torch.isnan(l): raise ValueError("Found NaN in {} loss.".format(loss_name)) if not hparams.replace_inf_grads_by_zero and torch.isinf(l): raise ValueError("Found +/-Inf in {} loss.".format(loss_name)) if loss_name in losses: raise KeyError("Loss with name {} defined twice.".format(loss_name)) losses[loss_name] = l backprop_loss = self.get_summed_losses_subset( loss_names=hparams.backprop_loss_names, losses=losses) if hparams.backprop_loss_names is None \ and hparams.scheduler_loss_names is None: scheduler_loss = backprop_loss.detach() else: scheduler_loss = self.get_summed_losses_subset( loss_names=hparams.scheduler_loss_names, losses=losses).detach() if training: self.optimiser.zero_grad() backprop_loss.backward(retain_graph=hparams.backward_retain_graph) total_steps += 1 # for params in reversed(list(self.model.parameters())): # nan_or_inf |= torch.isnan(params.grad).any() # nan_or_inf |= (params.grad == float("inf")).any() # nan_or_inf |= (params.grad == -float("inf")).any() # if nan_or_inf: # raise ValueError("Found NaN/Inf in {}.".format(params)) # pdb.set_trace() if hparams.replace_inf_grads_by_zero: self._replace_inf_grads_by_zero() if hparams.grad_clip_norm_type is not None: # Adds a small bias. torch.nn.utils.clip_grad_norm_(self.model.parameters(), hparams.grad_clip_max_norm, hparams.grad_clip_norm_type) if hparams.grad_clip_thresh is not None: # Adds a big bias. torch.nn.utils.clip_grad_value_(self.model.parameters(), hparams.grad_clip_thresh) self.optimiser.step() # Update exponential moving average. if self.ema: self.ema.update_params(model) current_iter = self._get_current_iteration( batch_index=batch_index, current_epoch=current_epoch, dataloader_length=len(dataloader), hparams=hparams, total_epoch=total_epoch) self.run_scheduler(hparams=hparams, loss=scheduler_loss, current_iter=current_iter) # Logging current error. if batch_index % logging_batch_index == 0: log_message = "Train " if training else "Test " log_message += "mini batch [{:{front_pad}d}/{}]".format( batch_index + 1, len(dataloader), front_pad=len(str(len(dataloader)))) log_message += "\tLoss: " log_message += " ".join(["{}: {:.3f}".format(key, loss) for key, loss in losses.items()]) if hparams.log_memory_consumption: log_message += "\tCPU: {:.0f} MB, ".format( resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1e3) if hparams.use_gpu: log_message += "GPU: {} MB".format( str(get_gpu_memory_map())) self.logger.info(log_message) losses = {k: l.detach() for k, l in losses.items()} for key, loss in losses.items(): if key not in total_losses: total_losses[key] = loss else: total_losses[key] += loss if tb_writer is not None: tb_writer.add_scalars("Train loss", losses, total_steps) del data_dict, lengths, max_lengths, losses, backprop_loss, scheduler_loss total_losses = {key: value / len(dataloader) for key, value in total_losses.items()} if not training: if tb_writer is not None: tb_writer.add_scalars("Validation loss", total_losses, total_steps) self.logger.info( 'Validation set: Total loss: {}\nAverage loss:\n\t{}\n'.format( sum(total_losses.values()), "\n\t".join(["{}: {:.3f}".format(key, loss) for key, loss in total_losses.items()]))) fn_log_per_test = getattr(self.model, "log_per_test", None) if callable(fn_log_per_test): fn_log_per_test() np_total_losses = {key: loss.cpu().numpy() for key, loss in total_losses.items()} del total_losses return np_total_losses
def load_checkpoint(self, hparams: ExtendedHParams, model_path: Union[str, os.PathLike], epoch: int = None, ignore_layers: bool = True, load_optimiser: bool = True, load_scheduler: bool = True, step: int = None, verbose: bool = True, load_best_model: bool = False): """ Load a trainer and model from save_as_best_model: bool = False): a checkpoint. :param hparams: Hyper-parameter container :type hparams: ExtendedHParams :param model_path: Path to folder with save files of the checkpoint (config.json, params_*, trainer_*) :type model_path: String or Path :param epoch: Epoch of the checkpoint to load, use -1 to load best model, defaults to None :type epoch: int, optional :param ignore_layers: Whether to ignore layers specified in hparams, defaults to True :type ignore_layers: bool, optional :param load_optimiser: Whether to load the optimiser state, defaults to True :type load_optimiser: bool, optional :param step: Step of the checkpoint to load, use -1 to load best model, defaults to None :type step: int, optional :param verbose: Additional logging of checkpoint creation time, defaults to True :type verbose: bool, optional :param load_best_model: If true, epoch and step are ignored and the best model is loaded, defaults to False :type load_best_model: bool, optional :return: (best_loss, epoch, step) tuple of loaded checkpoint :type: Tuple[float, int, int] """ assert load_best_model or step is None or epoch is None, \ "Only epoch ({}) OR step ({}) can be not None".format(epoch, step) if load_best_model or epoch == -1 or step == -1: suffix = "_best" elif hparams.load_newest_checkpoint: assert step is None and epoch is None, \ "epoch ({}) and step ({}) need to be None when loading newest "\ "model.".format(epoch, step) file_list = glob.glob(os.path.join(model_path, "params_*")) if len(file_list) == 0: raise FileNotFoundError("No newest checkpoint found in {}." .format(model_path)) elif len(file_list) == 1: latest_params = file_list[0] else: file_list = [f for f in file_list if os.path.basename(f) not in ["params_e0", "params_s0"]] # Ignore initial state latest_params = max(file_list, key=os.path.getctime) suffix = "_" + os.path.basename(latest_params).split('_')[1] else: assert load_best_model or step is not None or epoch is not None, \ "Either step or epoch is required. Use -1 in one of them to " \ "load the best model." if step is not None: suffix = "_s{}".format(step) else: suffix = "_e{}".format(epoch) params_path = os.path.join(model_path, "params" + suffix) if verbose: mod_time = local_modification_time(params_path) message = "Load model state dict from {} (last modified {})".format( params_path, mod_time) if ignore_layers and hparams.ignore_layers is not None \ and len(hparams.ignore_layers) > 0: message += " ignoring {}".format(hparams.ignore_layers) self.logger.info(message) checkpoint = torch.load(params_path, map_location=lambda storage, loc: storage) try: params = checkpoint["params"] except KeyError: params = checkpoint["model_state_dict"] # Legacy support best_loss = np.inf epoch = checkpoint["epoch"] step = checkpoint["step"] if "step" in checkpoint else None self.logger.info("Load {}{}".format( "epoch {}, ".format(epoch) if epoch is not None else "", "step {}".format(step) if step is not None else "")) if self.model is None: with open(os.path.join(model_path, "config.json"), "r") as f: json_str = f.read() config_json = jsonpickle.decode(json_str) self.model = config_json.create_model() if hparams.has_value("layer_map") and len(hparams.layer_map) > 0: params = self._map_layer_names(params, hparams.layer_map, verbose) if ignore_layers: params = self._remove_ignored_layers(params, self.model, hparams) missing_keys, unexpected_keys = self.model.load_state_dict( params, strict=not hparams.allow_missing_layers) if verbose: if len(missing_keys) > 0: self.logger.warning("Did not load: {}".format( ", ".join(missing_keys))) if len(unexpected_keys) > 0: self.logger.warning("Found unexpected keys: {}".format( ", ".join(unexpected_keys))) if load_optimiser: opt_params_path = os.path.join(model_path, "optimiser" + suffix) checkpoint = torch.load(opt_params_path, map_location=lambda storage, loc: storage) if "best_loss" in checkpoint and (not ignore_layers or hparams.ignore_layers is None or len(hparams.ignore_layers) == 0): best_loss = checkpoint["best_loss"] opt_params = checkpoint["params"] # if opt_params is not None: self._load_optimiser(opt_params, hparams) if load_scheduler: scheduler_params_path = os.path.join(model_path, "scheduler" + suffix) if os.path.isfile(scheduler_params_path): checkpoint = torch.load(opt_params_path, map_location=lambda storage, loc: storage) scheduler_params = checkpoint["params"] self._load_scheduler( scheduler_params, epoch if epoch is not None else checkpoint['epoch'], step if step is not None else checkpoint['step'], hparams) if hparams.use_gpu: if hasattr(self.model, "set_gpu_flag") \ and callable(self.model.set_gpu_flag): self.model.set_gpu_flag(hparams.use_gpu) self.model = self.model.cuda() if self.optimiser is not None: self._optimiser_to_gpu() return best_loss, epoch, step
def create_hparams(hparams_string=None, verbose=False): """Create model hyper-parameters. Parse non-default from given string.""" return ExtendedHParams.create_hparams(hparams_string, verbose)