示例#1
0
    def test_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the train_steps
        run the same way after loading from a checkpoint.
        """
        config = get_fast_test_task_config()
        task = build_task(config).set_hooks([LossLrMeterLoggingHook()])
        task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])

        task.set_use_gpu(torch.cuda.is_available())

        # prepare the tasks for the right device
        task.prepare()

        # test in both train and test mode
        for _ in range(2):
            task.advance_phase()

            # set task's state as task_2's checkpoint
            task_2.set_checkpoint(get_checkpoint_dict(task, {}, deep_copy=True))
            task_2.prepare()

            # task 2 should have the same state
            self._compare_states(task.get_classy_state(), task_2.get_classy_state())

            # this tests that both states' iterators return the same samples
            sample = next(task.get_data_iterator())
            sample_2 = next(task_2.get_data_iterator())
            self._compare_samples(sample, sample_2)

            # test that the train step runs the same way on both states
            # and the loss remains the same
            task.train_step()
            task_2.train_step()
            self._compare_states(task.get_classy_state(), task_2.get_classy_state())
示例#2
0
    def test_prepare(self):
        pre_train_config = self._get_pre_train_config()
        pre_train_task = build_task(pre_train_config)
        pre_train_task.prepare()
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config()
        fine_tuning_task = build_task(fine_tuning_config)
        # cannot prepare a fine tuning task without a pre training checkpoint
        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        fine_tuning_task.set_pretrained_checkpoint(checkpoint)
        fine_tuning_task.prepare()

        # test a fine tuning task with incompatible heads
        fine_tuning_config = self._get_fine_tuning_config(head_num_classes=10)
        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task.set_pretrained_checkpoint(checkpoint)
        # cannot prepare a fine tuning task with a pre training checkpoint which
        # has incompatible heads
        with self.assertRaises(Exception):
            fine_tuning_task.prepare()

        fine_tuning_task.set_pretrained_checkpoint(checkpoint).set_reset_heads(
            True)
        fine_tuning_task.prepare()
    def test_checkpointing(self):
        # make checkpoint directory
        checkpoint_folder = self.base_dir + "/checkpoint/"
        os.mkdir(checkpoint_folder)

        config = get_fast_test_task_config()
        cuda_available = torch.cuda.is_available()
        task = build_task(config)

        task.prepare(use_gpu=cuda_available)

        # create a checkpoint hook
        checkpoint_hook = CheckpointHook(checkpoint_folder, {},
                                         phase_types=["train"])

        # call the on end phase function
        checkpoint_hook.on_phase_end(task)

        # we should be able to train a task using the checkpoint on all available
        # devices
        for use_gpu in {False, cuda_available}:
            # load the checkpoint
            checkpoint = load_checkpoint(checkpoint_folder)

            # create a new task
            task = build_task(config)

            # set the checkpoint
            task.set_checkpoint(checkpoint)

            task.prepare(use_gpu=use_gpu)

            # we should be able to run the trainer using the checkpoint
            trainer = LocalTrainer(use_gpu=use_gpu)
            trainer.train(task)
    def test_train_parametric_loss(self):
        heads_num_classes = 100
        pre_train_config = self._get_pre_train_config(
            head_num_classes=heads_num_classes)
        pre_train_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }
        pre_train_task = build_task(pre_train_config)
        trainer = LocalTrainer()
        trainer.train(pre_train_task)
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        fine_tuning_config = self._get_fine_tuning_config(
            head_num_classes=heads_num_classes)
        fine_tuning_config["loss"] = {
            "name": "batchnorm_cross_entropy_loss",
            "num_classes": heads_num_classes,
        }

        fine_tuning_task = build_task(fine_tuning_config)
        fine_tuning_task._set_pretrained_checkpoint_dict(
            copy.deepcopy(checkpoint))
        # run in test mode to compare the loss state. Since we have a BatchNorm module in
        # the loss, its moving mean/std should be unchanged when we run in test-only mode
        fine_tuning_task.set_test_only(True)
        loss_state = copy.deepcopy(fine_tuning_task.loss.get_classy_state())
        trainer.train(fine_tuning_task)
        self._compare_state_dict(loss_state,
                                 fine_tuning_task.loss.get_classy_state())
示例#5
0
    def test_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the train_steps
        run the same way after loading from a checkpoint.
        """
        config = get_fast_test_task_config()
        task = build_task(config).set_hooks([LossLrMeterLoggingHook()])
        task_2 = build_task(config).set_hooks([LossLrMeterLoggingHook()])

        task.set_use_gpu(torch.cuda.is_available())

        # only train 1 phase at a time
        trainer = LimitedPhaseTrainer(num_phases=1)

        while not task.done_training():
            # set task's state as task_2's checkpoint
            task_2._set_checkpoint_dict(
                get_checkpoint_dict(task, {}, deep_copy=True))

            # task 2 should have the same state before training
            self._compare_states(task.get_classy_state(),
                                 task_2.get_classy_state())

            # train for one phase
            trainer.train(task)
            trainer.train(task_2)

            # task 2 should have the same state after training
            self._compare_states(task.get_classy_state(),
                                 task_2.get_classy_state())
 def test_update_classy_model(self):
     """
     Tests that the update_classy_model successfully updates from a
     checkpoint
     """
     config = get_fast_test_task_config()
     task = build_task(config)
     trainer = LocalTrainer()
     trainer.train(task)
     for reset_heads in [False, True]:
         task_2 = build_task(config)
         # prepare task_2 for the right device
         task_2.prepare()
         update_classy_model(task_2.model,
                             task.model.get_classy_state(deep_copy=True),
                             reset_heads)
         self._compare_model_state(
             task.model.get_classy_state(),
             task_2.model.get_classy_state(),
             check_heads=not reset_heads,
         )
         if reset_heads:
             # the model head states should be different
             with self.assertRaises(Exception):
                 self._compare_model_state(
                     task.model.get_classy_state(),
                     task_2.model.get_classy_state(),
                     check_heads=True,
                 )
    def test_final_train_checkpoint(self):
        """Test that a train phase checkpoint with a where of 1.0 can be loaded"""

        config = get_fast_test_task_config()
        task = build_task(config).set_hooks(
            [CheckpointHook(self.base_dir, {}, phase_types=["train"])])
        task_2 = build_task(config)

        use_gpu = torch.cuda.is_available()

        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(task)

        # load the final train checkpoint
        checkpoint = load_checkpoint(self.base_dir)

        # make sure fetching the where raises an exception, which means that
        # where is >= 1.0
        with self.assertRaises(Exception):
            task.where

        # set task_2's state as task's final train checkpoint
        task_2.set_checkpoint(checkpoint)
        task_2.prepare(use_gpu=use_gpu)

        # we should be able to train the task
        trainer.train(task_2)
示例#8
0
    def test_fp16_grad_compression(self):
        # there is no API defined to check that a DDP hook has been enabled, so we just
        # test that we set the right variables
        config = copy.deepcopy(get_fast_test_task_config())
        task = build_task(config)
        self.assertFalse(task.fp16_grad_compress)

        config.setdefault("distributed", {})
        config["distributed"]["fp16_grad_compress"] = True

        task = build_task(config)
        self.assertTrue(task.fp16_grad_compress)
    def test_build_task(self):
        config = get_test_task_config()
        task = build_task(config)
        self.assertTrue(isinstance(task, ClassificationTask))
        # check that AMP is disabled by default
        self.assertIsNone(task.amp_args)

        # test a valid AMP opt level
        config = copy.deepcopy(config)
        config["amp_args"] = {"opt_level": "O1"}
        task = build_task(config)
        self.assertTrue(isinstance(task, ClassificationTask))
    def test_build_task(self):
        config = self._get_fine_tuning_config()
        task = build_task(config)
        self.assertIsInstance(task, FineTuningTask)

        config = self._get_fine_tuning_config(pretrained_checkpoint=True)

        with mock.patch(
                "classy_vision.tasks.FineTuningTask.set_pretrained_checkpoint"
        ):
            task = build_task(config)

        self.assertIsInstance(task, FineTuningTask)
示例#11
0
 def test_update_classy_state(self):
     """
     Tests that the update_classy_state successfully updates from a
     checkpoint
     """
     config = get_fast_test_task_config()
     task = build_task(config)
     task_2 = build_task(config)
     task_2.prepare()
     trainer = LocalTrainer()
     trainer.train(task)
     update_classy_state(task_2, task.get_classy_state(deep_copy=True))
     self._compare_states(task.get_classy_state(), task_2.get_classy_state())
    def test_checkpointing_different_device(self):
        config = get_fast_test_task_config()
        task = build_task(config)
        task_2 = build_task(config)

        for use_gpu in [True, False]:
            task.prepare(use_gpu=use_gpu)

            # set task's state as task_2's checkpoint
            task_2.set_checkpoint(get_checkpoint_dict(task, {}, deep_copy=True))

            # we should be able to run the trainer using state from a different device
            trainer = LocalTrainer(use_gpu=not use_gpu)
            trainer.train(task_2)
    def test_training(self):
        # Test an Apex AMP training
        config = get_fast_test_task_config()
        config["amp_args"] = {"opt_level": "O2"}
        task = build_task(config)
        task.set_use_gpu(True)
        trainer = LocalTrainer()
        trainer.train(task)

        # Test a Pytorch AMP training
        config["amp_args"] = {"amp_type": "pytorch"}
        task = build_task(config)
        task.set_use_gpu(True)
        trainer = LocalTrainer()
        trainer.train(task)
示例#14
0
    def test_from_checkpoint(self):
        config = get_test_task_config()
        for use_head in [True, False]:
            config["model"] = self.get_model_config(use_head)
            task = build_task(config)
            task.prepare()

            checkpoint_folder = f"{self.base_dir}/{use_head}/"
            input_args = {"config": config}

            # Simulate training by setting the model parameters to zero
            for param in task.model.parameters():
                param.data.zero_()

            checkpoint_hook = CheckpointHook(
                checkpoint_folder, input_args, phase_types=["train"]
            )

            # Create checkpoint dir, save checkpoint
            os.mkdir(checkpoint_folder)
            checkpoint_hook.on_start(task)

            task.train = True
            checkpoint_hook.on_phase_end(task)

            # Model should be checkpointed. load and compare
            checkpoint = load_checkpoint(checkpoint_folder)

            model = ClassyModel.from_checkpoint(checkpoint)
            self.assertTrue(isinstance(model, MyTestModel))

            # All parameters must be zero
            for param in model.parameters():
                self.assertTrue(torch.all(param.data == 0))
示例#15
0
    def execute_hook(self, config, torchscript_folder,
                     torchscript_hook) -> None:
        task = build_task(config)
        task.prepare()

        # create checkpoint dir, verify on_start hook runs
        os.mkdir(torchscript_folder)
        torchscript_hook.on_start(task)

        task.train = True
        # call the on end function
        torchscript_hook.on_end(task)

        # load torchscript file
        torchscript_file_name = (
            f"{torchscript_hook.torchscript_folder}/{TORCHSCRIPT_FILE}")
        torchscript = torch.jit.load(torchscript_file_name)
        # compare model load from checkpoint vs torchscript
        with torch.no_grad():
            batchsize = 1
            model = task.model
            input_data = torch.randn((batchsize, ) + model.input_shape,
                                     dtype=torch.float)
            if torch.cuda.is_available():
                input_data = input_data.cuda()
                model = model.cuda()
                torchscript = torchscript.cuda()
            checkpoint_out = model(input_data)
            torchscript_out = torchscript(input_data)
            self.assertTrue(
                torch.allclose(checkpoint_out, torchscript_out, atol=1e-5))
示例#16
0
 def test_training(self):
     config = get_fast_test_task_config()
     config["amp_args"] = {"opt_level": "O2"}
     task = build_task(config)
     task.set_use_gpu(True)
     trainer = LocalTrainer()
     trainer.train(task)
示例#17
0
    def test_logging(self, mock_get_rank: mock.MagicMock) -> None:
        """
        Test that the logging happens as expected and the loss and lr values are
        correct.
        """
        rank = 5
        mock_get_rank.return_value = rank

        # set up the task and state
        config = get_test_task_config()
        config["dataset"]["train"]["batchsize_per_replica"] = 2
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.prepare()

        losses = [1.2, 2.3, 3.4, 4.5]

        local_variables = {}
        task.phase_idx = 0

        for log_freq in [5, None]:
            # create a loss lr meter hook
            loss_lr_meter_hook = LossLrMeterLoggingHook(log_freq=log_freq)

            # check that _log_loss_meters() is called after on_step() every
            # log_freq batches and after on_phase_end()
            # and _log_lr() is called after on_step() every log_freq batches
            # and after on_phase_end()
            with mock.patch.object(loss_lr_meter_hook,
                                   "_log_loss_meters") as mock_fn:
                with mock.patch.object(loss_lr_meter_hook,
                                       "_log_lr") as mock_lr_fn:
                    num_batches = 20

                    for i in range(num_batches):
                        task.losses = list(range(i))
                        loss_lr_meter_hook.on_step(task, local_variables)
                        if log_freq is not None and i and i % log_freq == 0:
                            mock_fn.assert_called_with(task, local_variables)
                            mock_fn.reset_mock()
                            mock_lr_fn.assert_called_with(
                                task, local_variables)
                            mock_lr_fn.reset_mock()
                            continue
                        mock_fn.assert_not_called()
                        mock_lr_fn.assert_not_called()

                    loss_lr_meter_hook.on_phase_end(task, local_variables)
                    mock_fn.assert_called_with(task, local_variables)
                    if task.train:
                        mock_lr_fn.assert_called_with(task, local_variables)

            # test _log_loss_lr_meters()
            task.losses = losses

            with self.assertLogs():
                loss_lr_meter_hook._log_loss_meters(task, local_variables)
                loss_lr_meter_hook._log_lr(task, local_variables)

            task.phase_idx += 1
示例#18
0
    def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.optimizer.param_schedulers["lr"] = mock_lr_scheduler
        trainer = LocalTrainer()

        # 2 LR updates per epoch = 6
        lr_order = [0.0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]
        lr_list = []

        class LRLoggingHook(ClassyHook):
            on_end = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_start = ClassyHook._noop

            def on_step(self, task):
                if task.train:
                    lr_list.append(task.optimizer.parameters.lr)

        hook = LRLoggingHook()
        task.set_hooks([hook])
        trainer.train(task)
        self.assertEqual(lr_list, lr_order)
示例#19
0
def main(local_rank, c10d_backend, rdzv_init_url, max_world_size, classy_args):
    torch.manual_seed(0)
    set_video_backend(classy_args.video_backend)

    # Loads config, sets up task
    config = load_json(classy_args.config_file)

    task = build_task(config)

    # Load checkpoint, if available
    checkpoint = load_checkpoint(classy_args.checkpoint_folder)
    task.set_checkpoint(checkpoint)

    pretrained_checkpoint = load_checkpoint(classy_args.pretrained_checkpoint_folder)
    if pretrained_checkpoint is not None:
        assert isinstance(
            task, FineTuningTask
        ), "Can only use a pretrained checkpoint for fine tuning tasks"
        task.set_pretrained_checkpoint(pretrained_checkpoint)

    hooks = [
        LossLrMeterLoggingHook(classy_args.log_freq),
        ModelComplexityHook(),
        TimeMetricsHook(),
    ]

    if classy_args.checkpoint_folder != "":
        args_dict = vars(classy_args)
        args_dict["config"] = config
        hooks.append(
            CheckpointHook(
                classy_args.checkpoint_folder,
                args_dict,
                checkpoint_period=classy_args.checkpoint_period,
            )
        )
    if classy_args.profiler:
        hooks.append(ProfilerHook())

    task.set_hooks(hooks)

    assert c10d_backend == Backend.NCCL or c10d_backend == Backend.GLOO
    if c10d_backend == torch.distributed.Backend.NCCL:
        # needed to enable NCCL error handling
        os.environ["NCCL_BLOCKING_WAIT"] = "1"

    coordinator = CoordinatorP2P(
        c10d_backend=c10d_backend,
        init_method=rdzv_init_url,
        max_num_trainers=max_world_size,
        process_group_timeout=60000,
    )
    trainer = ElasticTrainer(
        use_gpu=classy_args.device == "gpu",
        num_dataloader_workers=classy_args.num_workers,
        local_rank=local_rank,
        elastic_coordinator=coordinator,
        input_args={},
    )
    trainer.train(task)
示例#20
0
    def test_logged_lr(self):
        class SchedulerMock(ClassyParamScheduler):
            def __call__(self, where):
                return where

        mock_lr_scheduler = SchedulerMock(UpdateInterval.STEP)
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 10
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.set_optimizer_schedulers({"lr": mock_lr_scheduler})
        trainer = LocalTrainer()

        # 2 LR updates per epoch = 6
        lr_order = [0.0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6]
        lr_list = []

        class LRLoggingHook(ClassyHook):
            on_end = ClassyHook._noop
            on_phase_end = ClassyHook._noop
            on_phase_start = ClassyHook._noop
            on_start = ClassyHook._noop

            def on_step(self, task):
                if task.train:
                    lr_list.append(task.optimizer.options_view.lr)

        hook = LRLoggingHook()
        task.set_hooks([hook])
        trainer.train(task)
        self.assertEqual(lr_list, lr_order)
示例#21
0
    def test_streaming_dataset(self):
        """
        Test that streaming datasets return the correct number of batches, and that
        the length is also calculated correctly.
        """
        config = get_test_task_config()
        dataset_config = {
            "name": "synthetic_image_streaming",
            "split": "train",
            "crop_size": 224,
            "class_ratio": 0.5,
            "num_samples": 2000,
            "length": 4000,
            "seed": 0,
            "batchsize_per_replica": 32,
            "use_shuffle": True,
        }
        expected_batches = 62
        config["dataset"]["train"] = dataset_config
        task = build_task(config)
        task.prepare()
        task.advance_phase()
        # test that the number of batches expected is correct
        self.assertEqual(task.num_batches_per_phase, expected_batches)

        # test that the data iterator returns the expected number of batches
        data_iterator = task.data_iterator
        self._test_number_of_batches(data_iterator, expected_batches)

        # test that the dataloader can be rebuilt
        task.build_dataloaders_for_current_phase()
        task.create_data_iterators()
        data_iterator = task.data_iterator
        self._test_number_of_batches(data_iterator, expected_batches)
    def test_test_only_task(self):
        """
        Tests the task in test mode by running train_steps
        to make sure the train_steps run as expected on a
        test_only task
        """
        test_config = get_fast_test_task_config()
        test_config["test_only"] = True

        # delete train dataset
        del test_config["dataset"]["train"]

        test_only_task = build_task(test_config).set_hooks(
            [LossLrMeterLoggingHook()])

        test_only_task.prepare()
        test_state = test_only_task.get_classy_state()

        # We expect that test only state is test, no matter what train state is
        self.assertFalse(test_state["train"])

        # Num updates should be 0
        self.assertEqual(test_state["num_updates"], 0)

        # Verify task will run
        trainer = LocalTrainer()
        trainer.train(test_only_task)
    def test_logged_lr(self):
        # Mock LR scheduler
        def scheduler_mock(where):
            return where

        mock_lr_scheduler = mock.Mock(side_effect=scheduler_mock)
        mock_lr_scheduler.update_interval = UpdateInterval.STEP
        config = get_test_mlp_task_config()
        config["num_epochs"] = 3
        config["dataset"]["train"]["batchsize_per_replica"] = 5
        config["dataset"]["test"]["batchsize_per_replica"] = 5
        task = build_task(config)
        task.optimizer.lr_scheduler = mock_lr_scheduler
        trainer = LocalTrainer()

        # 2 LR updates per epoch
        # At end of each epoch for train, LR is logged an additional time
        lr_order = [
            0.0, 1 / 6, 1 / 6, 2 / 6, 3 / 6, 3 / 6, 4 / 6, 5 / 6, 5 / 6
        ]
        lr_list = []

        def mock_log_lr(task: ClassyTask, local_variables) -> None:
            lr_list.append(task.optimizer.lr)

        with mock.patch.object(LossLrMeterLoggingHook,
                               "_log_lr",
                               side_effect=mock_log_lr):
            hook = LossLrMeterLoggingHook(1)
            task.set_hooks([hook])
            trainer.train(task)
            self.assertEqual(lr_list, lr_order)
    def test_test_only_checkpointing(self):
        """
        Tests checkpointing by running train_steps to make sure the
        train_steps run the same way after loading from a training
        task checkpoint on a test_only task.
        """
        train_config = get_fast_test_task_config()
        train_config["num_epochs"] = 10
        test_config = get_fast_test_task_config()
        test_config["test_only"] = True
        train_task = build_task(train_config).set_hooks(
            [LossLrMeterLoggingHook()])
        test_only_task = build_task(test_config).set_hooks(
            [LossLrMeterLoggingHook()])

        use_gpu = torch.cuda.is_available()

        # prepare the tasks for the right device
        train_task.prepare(use_gpu=use_gpu)

        # test in both train and test mode
        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(train_task)

        # set task's state as task_2's checkpoint
        test_only_task.set_checkpoint(
            get_checkpoint_dict(train_task, {}, deep_copy=True))
        test_only_task.prepare(use_gpu=use_gpu)
        test_state = test_only_task.get_classy_state()

        # We expect the phase idx to be different for a test only task
        self.assertEqual(test_state["phase_idx"], -1)

        # We expect that test only state is test, no matter what train state is
        self.assertFalse(test_state["train"])

        # Num updates should be 0
        self.assertEqual(test_state["num_updates"], 0)

        # train_phase_idx should -1
        self.assertEqual(test_state["train_phase_idx"], -1)

        # Verify task will run
        trainer = LocalTrainer(use_gpu=use_gpu)
        trainer.train(test_only_task)
    def test_train(self):
        pre_train_config = self._get_pre_train_config(head_num_classes=100)
        pre_train_task = build_task(pre_train_config)
        trainer = LocalTrainer()
        trainer.train(pre_train_task)
        checkpoint = get_checkpoint_dict(pre_train_task, {})

        for reset_heads, heads_num_classes in [(False, 100), (True, 20)]:
            for freeze_trunk in [True, False]:
                fine_tuning_config = self._get_fine_tuning_config(
                    head_num_classes=heads_num_classes)
                fine_tuning_task = build_task(fine_tuning_config)
                fine_tuning_task = (
                    fine_tuning_task._set_pretrained_checkpoint_dict(
                        copy.deepcopy(checkpoint)).set_reset_heads(
                            reset_heads).set_freeze_trunk(freeze_trunk))
                # run in test mode to compare the model state
                fine_tuning_task.set_test_only(True)
                trainer.train(fine_tuning_task)
                self._compare_model_state(
                    pre_train_task.model.get_classy_state(),
                    fine_tuning_task.model.get_classy_state(),
                    check_heads=not reset_heads,
                )
                # run in train mode to check accuracy
                fine_tuning_task.set_test_only(False)
                trainer.train(fine_tuning_task)
                if freeze_trunk:
                    # if trunk is frozen the states should be the same
                    self._compare_model_state(
                        pre_train_task.model.get_classy_state(),
                        fine_tuning_task.model.get_classy_state(),
                        check_heads=False,
                    )
                else:
                    # trunk isn't frozen, the states should be different
                    with self.assertRaises(Exception):
                        self._compare_model_state(
                            pre_train_task.model.get_classy_state(),
                            fine_tuning_task.model.get_classy_state(),
                            check_heads=False,
                        )

                accuracy = fine_tuning_task.meters[0].value["top_1"]
                self.assertAlmostEqual(accuracy, 1.0)
示例#26
0
 def test_train(self):
     config = get_test_mlp_task_config()
     task = build_task(config)
     num_samples = 10
     precise_batch_norm_hook = PreciseBatchNormHook(num_samples)
     task.set_hooks([precise_batch_norm_hook])
     task.prepare()
     trainer = ClassyTrainer()
     trainer.train(task)
    def test_build_task(self):
        config = get_test_task_config()
        task = build_task(config)
        self.assertTrue(isinstance(task, ClassificationTask))
        # check that AMP is disabled by default
        self.assertIsNone(task.amp_opt_level)

        # test a valid AMP opt level
        config = copy.deepcopy(config)
        config["amp_opt_level"] = "O1"
        task = build_task(config)
        self.assertTrue(isinstance(task, ClassificationTask))

        # test an invalid AMP opt level
        config = copy.deepcopy(config)
        config["amp_opt_level"] = "O5"
        with self.assertRaises(Exception):
            task = build_task(config)
    def test_train_step(self):
        # test that the model can be run in a train step
        model = models.resnet34(pretrained=False)
        classy_model = ClassyModelWrapper(model)

        config = get_fast_test_task_config()
        task = build_task(config)
        task.set_model(classy_model)
        trainer = LocalTrainer()
        trainer.train(task)
示例#29
0
    def test_synchronize_losses_when_losses_empty(self):
        config = get_fast_test_task_config()
        task = build_task(config)
        task.prepare()

        task.set_use_gpu(torch.cuda.is_available())

        # Losses should be empty when creating task
        self.assertEqual(len(task.losses), 0)

        task.synchronize_losses()
示例#30
0
    def test_synchronize_losses_non_distributed(self):
        """
        Tests that synchronize losses has no side effects in a non-distributed setting.
        """
        test_config = get_fast_test_task_config()
        task = build_task(test_config)
        task.prepare()

        old_losses = copy.deepcopy(task.losses)
        task.synchronize_losses()
        self.assertEqual(old_losses, task.losses)