def load_to_gpu(path: str) -> Dict[str, Any]: """ Similar to load_to_cpu, but load model to cuda """ with PathManager.open(path, "rb") as f: state = torch.load( f, map_location=(lambda s, _: torch.serialization. default_restore_location(s, "cuda")), ) return state
def load_diverse_ensemble_for_inference(filenames: List[str], task: Optional[ tasks.FairseqTask] = None): """Load an ensemble of diverse models for inference. This method is similar to fairseq.utils.load_ensemble_for_inference but allows to load diverse models with non-uniform args. Args: filenames: List of file names to checkpoints task: Optional[FairseqTask]. If this isn't provided, we setup the task using the first checkpoint's model args loaded from the saved state. Return: models, args: Tuple of lists. models contains the loaded models, args the corresponding configurations. task: Either the input task or the task created within this function using args """ # load model architectures and weights checkpoints_data = [] for filename in filenames: if not PathManager.exists(filename): raise IOError("Model file not found: {}".format(filename)) with PathManager.open(filename, "rb") as f: checkpoints_data.append( torch.load( f, map_location=lambda s, l: torch.serialization. default_restore_location(s, "cpu"), )) def get_cfg(cp, key): if "cfg" in cp: return cp["cfg"][key] else: return cp["args"] # build ensemble ensemble = [] if task is None: cfg = get_cfg(checkpoints_data[0], "task") if hasattr(cfg, "mode"): cfg.mode = "eval" task = tasks.setup_task(cfg) for checkpoint_data in checkpoints_data: cfg = get_cfg(checkpoint_data, "model") model = task.build_model(cfg) model.load_state_dict(checkpoint_data["model"]) ensemble.append(model) args_list = [get_cfg(s, "model") for s in checkpoints_data] return ensemble, args_list, task
def load_to_cpu(path: str) -> Dict[str, Any]: """ This is just fairseq's utils.load_checkpoint_to_cpu(), except we don't try to upgrade the state dict for backward compatibility - to make cases where we only care about loading the model params easier to unit test. """ with PathManager.open(path, "rb") as f: state = torch.load( f, map_location=(lambda s, _: torch.serialization. default_restore_location(s, "cpu")), ) return state