def test_loading_objects_with_expected_shape_mismatches(self) -> None: def _get_model() -> torch.nn.Module: m = nn.Sequential(nn.Conv2d(2, 2, 1)) m.qconfig = torch.quantization.get_default_qat_qconfig("fbgemm") m = torch.quantization.prepare_qat(m) return m m1, m2 = _get_model(), _get_model() # Calibrate m1 with data to populate the observer stats m1(torch.randn(4, 2, 4, 4)) # Load m1's checkpoint into m2. This should work without errors even # though the shapes of per-channel observer buffers do not match. with TemporaryDirectory() as f: checkpointer = Checkpointer(m1, save_dir=f) checkpointer.save("checkpoint_file") # in the same folder fresh_checkpointer = Checkpointer(m2, save_dir=f) self.assertTrue(fresh_checkpointer.has_checkpoint()) self.assertEqual( fresh_checkpointer.get_checkpoint_file(), os.path.join(f, "checkpoint_file.pth"), ) fresh_checkpointer.load(fresh_checkpointer.get_checkpoint_file()) # Run the expected input through the network with observers # disabled and fake_quant enabled. If buffers were loaded correctly # into per-channel observers, this line will not crash. m2.apply(torch.quantization.disable_observer) m2.apply(torch.quantization.enable_fake_quant) m2(torch.randn(4, 2, 4, 4))
def test_from_last_checkpoint_model(self) -> None: """ test that loading works even if they differ by a prefix. """ for trained_model, fresh_model in [ (self._create_model(), self._create_model()), (nn.DataParallel(self._create_model()), self._create_model()), (self._create_model(), nn.DataParallel(self._create_model())), ( nn.DataParallel(self._create_model()), nn.DataParallel(self._create_model()), ), ]: with TemporaryDirectory() as f: checkpointer = Checkpointer(trained_model, save_dir=f) checkpointer.save("checkpoint_file") # in the same folder fresh_checkpointer = Checkpointer(fresh_model, save_dir=f) self.assertTrue(fresh_checkpointer.has_checkpoint()) self.assertEqual( fresh_checkpointer.get_checkpoint_file(), os.path.join(f, "checkpoint_file.pth"), ) fresh_checkpointer.load( fresh_checkpointer.get_checkpoint_file()) for trained_p, loaded_p in zip(trained_model.parameters(), fresh_model.parameters()): # different tensor references self.assertFalse(id(trained_p) == id(loaded_p)) # same content self.assertTrue(trained_p.cpu().equal(loaded_p.cpu()))
def test_from_name_file_model(self): """ test that loading works even if they differ by a prefix. """ for trained_model, fresh_model in [ (self._create_model(), self._create_model()), (nn.DataParallel(self._create_model()), self._create_model()), (self._create_model(), nn.DataParallel(self._create_model())), ( nn.DataParallel(self._create_model()), nn.DataParallel(self._create_model()), ), ]: with TemporaryDirectory() as f: checkpointer = Checkpointer(trained_model, save_dir=f, save_to_disk=True) checkpointer.save("checkpoint_file") # on different folders. with TemporaryDirectory() as g: fresh_checkpointer = Checkpointer(fresh_model, save_dir=g) self.assertFalse(fresh_checkpointer.has_checkpoint()) self.assertEqual(fresh_checkpointer.get_checkpoint_file(), "") fresh_checkpointer.load( os.path.join(f, "checkpoint_file.pth")) for trained_p, loaded_p in zip(trained_model.parameters(), fresh_model.parameters()): # different tensor references. self.assertFalse(id(trained_p) == id(loaded_p)) # same content. self.assertTrue(trained_p.equal(loaded_p))
def main(): config = load_config() if config.test.output_dir is None: output_dir = pathlib.Path(config.test.checkpoint).parent else: output_dir = pathlib.Path(config.test.output_dir) output_dir.mkdir(exist_ok=True, parents=True) logger = create_logger(name=__name__, distributed_rank=get_rank()) model = create_model(config) model = apply_data_parallel_wrapper(config, model) checkpointer = Checkpointer(model, checkpoint_dir=output_dir, logger=logger, distributed_rank=get_rank()) checkpointer.load(config.test.checkpoint) test_loader = create_dataloader(config, is_train=False) _, test_loss = create_loss(config) preds, probs, labels, loss, acc = evaluate(config, model, test_loader, test_loss, logger) output_path = output_dir / f'predictions.npz' np.savez(output_path, preds=preds, probs=probs, labels=labels, loss=loss, acc=acc)
def test_load_lazy_module(self) -> None: def _get_model() -> nn.Sequential: # pyre-fixme[11] return nn.Sequential(nn.LazyLinear(10)) m1, m2 = _get_model(), _get_model() m1(torch.randn(4, 2, 4, 4)) # initialize m1, but not m2 # Load m1's checkpoint into m2. with TemporaryDirectory() as f: checkpointer = Checkpointer(m1, save_dir=f) checkpointer.save("checkpoint_file") fresh_checkpointer = Checkpointer(m2, save_dir=f) self.assertTrue(fresh_checkpointer.has_checkpoint()) self.assertEqual( fresh_checkpointer.get_checkpoint_file(), os.path.join(f, "checkpoint_file.pth"), ) fresh_checkpointer.load(fresh_checkpointer.get_checkpoint_file()) self.assertTrue(torch.equal(m1[0].weight, m2[0].weight))
def evaluate_on_dataset( config_file="../../configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml", override_cfg=(), test_datasets=(), ): if override_cfg is None: override_cfg = [] cfg = get_cfg() cfg.merge_from_file(config_file) cfg.merge_from_list(override_cfg) cfg.DATASETS.TEST = test_datasets model = build_model(cfg) checkpointer = Checkpointer(model) checkpointer.load(cfg.MODEL.WEIGHTS) evaluator = [ COCOEvaluator(test_set, cfg, False) for test_set in test_datasets ] metrics = DefaultTrainer.test(cfg, model, evaluator) return metrics
def test_checkpointables(self) -> None: """ Test saving and loading checkpointables. """ class CheckpointableObj: """ A dummy checkpointableObj class with state_dict and load_state_dict methods. """ def __init__(self): self.state = { self.random_handle(): self.random_handle() for i in range(10) } def random_handle(self, str_len=100) -> str: """ Generate a random string of fixed length. Args: str_len (str): length of the output string. Returns: (str): random generated handle. """ letters = string.ascii_uppercase return "".join(random.choice(letters) for i in range(str_len)) def state_dict(self): """ Return the state. Returns: (dict): return the state. """ return self.state def load_state_dict(self, state) -> None: """ Load the state from a given state. Args: state (dict): a key value dictionary. """ self.state = copy.deepcopy(state) trained_model, fresh_model = self._create_model(), self._create_model() with TemporaryDirectory() as f: checkpointables = CheckpointableObj() checkpointer = Checkpointer( trained_model, save_dir=f, save_to_disk=True, checkpointables=checkpointables, ) checkpointer.save("checkpoint_file") # in the same folder fresh_checkpointer = Checkpointer(fresh_model, save_dir=f) self.assertTrue(fresh_checkpointer.has_checkpoint()) self.assertEqual( fresh_checkpointer.get_checkpoint_file(), os.path.join(f, "checkpoint_file.pth"), ) checkpoint = fresh_checkpointer.load( fresh_checkpointer.get_checkpoint_file()) state_dict = checkpointables.state_dict() for key, _ in state_dict.items(): self.assertTrue( checkpoint["checkpointables"].get(key) is not None) self.assertTrue( checkpoint["checkpointables"][key] == state_dict[key])