def load_checkpoint( path_to_checkpoint, model, data_parallel=True, optimizer=None, inflation=False, convert_from_caffe2=False, ): """ Load the checkpoint from the given file. If inflation is True, inflate the 2D Conv weights from the checkpoint to 3D Conv. Args: path_to_checkpoint (string): path to the checkpoint to load. model (model): model to load the weights from the checkpoint. data_parallel (bool): if true, model is wrapped by torch.nn.parallel.DistributedDataParallel. optimizer (optim): optimizer to load the historical state. inflation (bool): if True, inflate the weights from the checkpoint. convert_from_caffe2 (bool): if True, load the model from caffe2 and convert it to pytorch. Returns: (int): the number of training epoch of the checkpoint. """ assert os.path.exists( path_to_checkpoint), "Checkpoint '{}' not found".format( path_to_checkpoint) # Account for the DDP wrapper in the multi-gpu setting. ms = model.module if data_parallel else model if convert_from_caffe2: with open(path_to_checkpoint, "rb") as f: caffe2_checkpoint = pickle.load(f, encoding="latin1") state_dict = OrderedDict() name_convert_func = get_name_convert_func() for key in caffe2_checkpoint["blobs"].keys(): converted_key = name_convert_func(key) if converted_key in ms.state_dict(): if caffe2_checkpoint["blobs"][key].shape == tuple( ms.state_dict()[converted_key].shape): state_dict[converted_key] = torch.tensor( caffe2_checkpoint["blobs"][key]).clone() logger.info("{}: {} => {}: {}".format( key, caffe2_checkpoint["blobs"][key].shape, converted_key, tuple(ms.state_dict()[converted_key].shape), )) else: logger.warn("!! {}: {} does not match {}: {}".format( key, caffe2_checkpoint["blobs"][key].shape, converted_key, tuple(ms.state_dict()[converted_key].shape), )) else: if not any(prefix in key for prefix in ["momentum", "lr", "model_iter"]): logger.warn("!! {}: can not be converted, got {}".format( key, converted_key)) ms.load_state_dict(state_dict, strict=False) epoch = -1 else: # Load the checkpoint on CPU to avoid GPU mem spike. checkpoint = torch.load(path_to_checkpoint, map_location="cpu") if inflation: # Try to inflate the model. model_state_dict_3d = (model.module.state_dict() if data_parallel else model.state_dict()) inflated_model_dict = inflate_weight(checkpoint["model_state"], model_state_dict_3d) ms.load_state_dict(inflated_model_dict, strict=False) else: ms.load_state_dict(checkpoint["model_state"]) # Load the optimizer state (commonly not done when fine-tuning) if optimizer: optimizer.load_state_dict(checkpoint["optimizer_state"]) if "epoch" in checkpoint.keys(): epoch = checkpoint["epoch"] else: epoch = -1 return epoch
from a layer name if it can be matched. Returns: (int): the number of training epoch of the checkpoint. """ assert PathManager.exists( path_to_checkpoint ), "Checkpoint '{}' not found".format(path_to_checkpoint) logger.info("Loading network weights from {}.".format(path_to_checkpoint)) # Account for the DDP wrapper in the multi-gpu setting. ms = model.module if data_parallel else model if convert_from_caffe2: with PathManager.open(path_to_checkpoint, "rb") as f: caffe2_checkpoint = pickle.load(f, encoding="latin1") state_dict = OrderedDict() name_convert_func = get_name_convert_func() for key in caffe2_checkpoint["blobs"].keys(): converted_key = name_convert_func(key) converted_key = c2_normal_to_sub_bn(converted_key, ms.state_dict()) if converted_key in ms.state_dict(): c2_blob_shape = caffe2_checkpoint["blobs"][key].shape model_blob_shape = ms.state_dict()[converted_key].shape # expand shape dims if they differ (eg for converting linear to conv params) if len(c2_blob_shape) < len(model_blob_shape): c2_blob_shape += (1,) * ( len(model_blob_shape) - len(c2_blob_shape) ) caffe2_checkpoint["blobs"][key] = np.reshape( caffe2_checkpoint["blobs"][key], c2_blob_shape )
def load_checkpoint( path_to_checkpoint, model, data_parallel=True, optimizer=None, inflation=False, convert_from_caffe2=False, epoch_reset=False, clear_name_pattern=(), ): """ Load the checkpoint from the given file. If inflation is True, inflate the 2D Conv weights from the checkpoint to 3D Conv. Args: path_to_checkpoint (string): path to the checkpoint to load. model (model): model to load the weights from the checkpoint. data_parallel (bool): if true, model is wrapped by torch.nn.parallel.DistributedDataParallel. optimizer (optim): optimizer to load the historical state. inflation (bool): if True, inflate the weights from the checkpoint. convert_from_caffe2 (bool): if True, load the model from caffe2 and convert it to pytorch. epoch_reset (bool): if True, reset #train iterations from the checkpoint. clear_name_pattern (string): if given, this (sub)string will be cleared from a layer name if it can be matched. Returns: (int): the number of training epoch of the checkpoint. """ assert PathManager.exists( path_to_checkpoint), "Checkpoint '{}' not found".format( path_to_checkpoint) logger.info("Loading network weights from {}.".format(path_to_checkpoint)) # Account for the DDP wrapper in the multi-gpu setting. ms = model.module if data_parallel else model if convert_from_caffe2: with PathManager.open(path_to_checkpoint, "rb") as f: caffe2_checkpoint = pickle.load(f, encoding="latin1") state_dict = OrderedDict() name_convert_func = get_name_convert_func() for key in caffe2_checkpoint["blobs"].keys(): converted_key = name_convert_func(key) converted_key = c2_normal_to_sub_bn(converted_key, ms.state_dict()) if converted_key in ms.state_dict(): c2_blob_shape = caffe2_checkpoint["blobs"][key].shape model_blob_shape = ms.state_dict()[converted_key].shape # expand shape dims if they differ (eg for converting linear to conv params) if len(c2_blob_shape) < len(model_blob_shape): c2_blob_shape += (1, ) * (len(model_blob_shape) - len(c2_blob_shape)) caffe2_checkpoint["blobs"][key] = np.reshape( caffe2_checkpoint["blobs"][key], c2_blob_shape) # Load BN stats to Sub-BN. if (len(model_blob_shape) == 1 and len(c2_blob_shape) == 1 and model_blob_shape[0] > c2_blob_shape[0] and model_blob_shape[0] % c2_blob_shape[0] == 0): caffe2_checkpoint["blobs"][key] = np.concatenate( [caffe2_checkpoint["blobs"][key]] * (model_blob_shape[0] // c2_blob_shape[0])) c2_blob_shape = caffe2_checkpoint["blobs"][key].shape if c2_blob_shape == tuple(model_blob_shape): state_dict[converted_key] = torch.tensor( caffe2_checkpoint["blobs"][key]).clone() logger.info("{}: {} => {}: {}".format( key, c2_blob_shape, converted_key, tuple(model_blob_shape), )) else: logger.warn("!! {}: {} does not match {}: {}".format( key, c2_blob_shape, converted_key, tuple(model_blob_shape), )) else: if not any(prefix in key for prefix in ["momentum", "lr", "model_iter"]): logger.warn("!! {}: can not be converted, got {}".format( key, converted_key)) diff = set(ms.state_dict()) - set(state_dict) diff = {d for d in diff if 'num_batches_tracked' not in d} if len(diff) > 0: logger.warn("Not loaded {}".format(diff)) ms.load_state_dict(state_dict, strict=False) epoch = -1 else: # Load the checkpoint on CPU to avoid GPU mem spike. with PathManager.open(path_to_checkpoint, "rb") as f: checkpoint = torch.load(f, map_location="cpu") model_state_dict_3d = (model.module.state_dict() if data_parallel else model.state_dict()) checkpoint["model_state"] = normal_to_sub_bn(checkpoint["model_state"], model_state_dict_3d) if inflation: # Try to inflate the model. inflated_model_dict = inflate_weight(checkpoint["model_state"], model_state_dict_3d) ms.load_state_dict(inflated_model_dict, strict=False) else: if clear_name_pattern: for item in clear_name_pattern: model_state_dict_new = OrderedDict() for k in checkpoint["model_state"]: if item in k: k_re = k.replace(item, "") model_state_dict_new[k_re] = checkpoint[ "model_state"][k] logger.info("renaming: {} -> {}".format(k, k_re)) else: model_state_dict_new[k] = checkpoint[ "model_state"][k] checkpoint["model_state"] = model_state_dict_new pre_train_dict = checkpoint["model_state"] model_dict = ms.state_dict() # Match pre-trained weights that have same shape as current model. pre_train_dict_match = { k: v for k, v in pre_train_dict.items() if k in model_dict and v.size() == model_dict[k].size() } # Weights that do not have match from the pre-trained model. not_load_layers = [ k for k in model_dict.keys() if k not in pre_train_dict_match.keys() ] # Log weights that are not loaded with the pre-trained weights. if not_load_layers: for k in not_load_layers: logger.info("Network weights {} not loaded.".format(k)) # Load pre-trained weights. ms.load_state_dict(pre_train_dict_match, strict=False) epoch = -1 # Load the optimizer state (commonly not done when fine-tuning) if "epoch" in checkpoint.keys() and not epoch_reset: epoch = checkpoint["epoch"] if optimizer: optimizer.load_state_dict(checkpoint["optimizer_state"]) else: epoch = -1 return epoch
def load_checkpoint( path_to_checkpoint, model, data_parallel=True, optimizer=None, inflation=False, convert_from_caffe2=False, ): """ Load the checkpoint from the given file. If inflation is True, inflate the 2D Conv weights from the checkpoint to 3D Conv. Args: path_to_checkpoint (string): path to the checkpoint to load. model (model): model to load the weights from the checkpoint. data_parallel (bool): if true, model is wrapped by torch.nn.parallel.DistributedDataParallel. optimizer (optim): optimizer to load the historical state. inflation (bool): if True, inflate the weights from the checkpoint. convert_from_caffe2 (bool): if True, load the model from caffe2 and convert it to pytorch. Returns: (int): the number of training epoch of the checkpoint. """ assert PathManager.exists( path_to_checkpoint ), "Checkpoint '{}' not found".format(path_to_checkpoint) # Account for the DDP wrapper in the multi-gpu setting. ms = model.module if data_parallel else model if convert_from_caffe2: with PathManager.open(path_to_checkpoint, "rb") as f: caffe2_checkpoint = pickle.load(f, encoding="latin1") state_dict = OrderedDict() name_convert_func = get_name_convert_func() for key in caffe2_checkpoint["blobs"].keys(): converted_key = name_convert_func(key) converted_key = c2_normal_to_sub_bn(converted_key, ms.state_dict()) if converted_key in ms.state_dict(): c2_blob_shape = caffe2_checkpoint["blobs"][key].shape model_blob_shape = ms.state_dict()[converted_key].shape # Load BN stats to Sub-BN. if ( len(model_blob_shape) == 1 and len(c2_blob_shape) == 1 and model_blob_shape[0] > c2_blob_shape[0] and model_blob_shape[0] % c2_blob_shape[0] == 0 ): caffe2_checkpoint["blobs"][key] = np.concatenate( [caffe2_checkpoint["blobs"][key]] * (model_blob_shape[0] // c2_blob_shape[0]) ) c2_blob_shape = caffe2_checkpoint["blobs"][key].shape if c2_blob_shape == tuple(model_blob_shape): state_dict[converted_key] = torch.tensor( caffe2_checkpoint["blobs"][key] ).clone() logger.info( "{}: {} => {}: {}".format( key, c2_blob_shape, converted_key, tuple(model_blob_shape), ) ) else: logger.warn( "!! {}: {} does not match {}: {}".format( key, c2_blob_shape, converted_key, tuple(model_blob_shape), ) ) else: if not any( prefix in key for prefix in ["momentum", "lr", "model_iter"] ): logger.warn( "!! {}: can not be converted, got {}".format( key, converted_key ) ) ms.load_state_dict(state_dict, strict=False) epoch = -1 else: # Load the checkpoint on CPU to avoid GPU mem spike. with PathManager.open(path_to_checkpoint, "rb") as f: device = torch.device('cpu') #import pickle #obj = f.read() #weights = pickle.loads(obj, encoding='latin1') #torch.save(weights, "temp.pkl") #checkpoint = torch.load('temp.pkl', map_location=device, encoding='latin1') checkpoint = torch.load(f, map_location=device, encoding='latin1') model_state_dict_3d = ( model.module.state_dict() if data_parallel else model.state_dict() ) checkpoint["model_state"] = normal_to_sub_bn( checkpoint["model_state"], model_state_dict_3d ) if inflation: # Try to inflate the model. inflated_model_dict = inflate_weight( checkpoint["model_state"], model_state_dict_3d ) ms.load_state_dict(inflated_model_dict, strict=False) else: ms.load_state_dict(checkpoint["model_state"]) # Load the optimizer state (commonly not done when fine-tuning) if optimizer: optimizer.load_state_dict(checkpoint["optimizer_state"]) if "epoch" in checkpoint.keys(): epoch = checkpoint["epoch"] else: epoch = -1 return epoch