def test_build_model(self): cfg = self._get_default_cfg() cfg.INPUT.MIN_SIZE_TRAIN = (60,) cfg.MODEL.KMEANS_ANCHORS.KMEANS_ANCHORS_ON = True cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS = 3 cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG = 5 cfg.MODEL.KMEANS_ANCHORS.DATASETS = ("toy_dataset",) cfg.MODEL.DEVICE = "cpu" cfg.MODEL.ANCHOR_GENERATOR.NAME = "KMeansAnchorGenerator" with make_temp_directory("detectron2go_tmp_dataset") as dataset_dir: image_dir = os.path.join(dataset_dir, "images") os.makedirs(image_dir) image_generator = LocalImageGenerator(image_dir, width=80, height=60) with register_toy_dataset( "toy_dataset", image_generator, num_images=cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG, ): model = self.runner.build_model(cfg) trainer = SimpleTrainer(model, data_loader=[], optimizer=None) trainer_hooks = [compute_kmeans_anchors_hook(self.runner, cfg)] trainer.register_hooks(trainer_hooks) trainer.before_train() anchor_generator = model.proposal_generator.anchor_generator cell_anchors = [x for x in anchor_generator.cell_anchors] gt_anchors = np.array( [ [-20, -15, 20, 15] # toy_dataset's bbox is half size of image for _ in range(cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS) ] ) np.testing.assert_allclose(cell_anchors[0], gt_anchors)
def test_build_model(self): cfg = self._get_default_cfg() cfg.INPUT.MIN_SIZE_TRAIN = (60, ) cfg.MODEL.KMEANS_ANCHORS.KMEANS_ANCHORS_ON = True cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS = 3 cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG = 5 cfg.MODEL.KMEANS_ANCHORS.DATASETS = ("toy_dataset", ) cfg.MODEL.DEVICE = "cpu" cfg.MODEL.ANCHOR_GENERATOR.NAME = "KMeansAnchorGenerator" with register_toy_coco_dataset( "toy_dataset", image_size=(80, 60), # w, h num_images=cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG, ): model = self.runner.build_model(cfg) trainer = SimpleTrainer(model, data_loader=[], optimizer=None) trainer_hooks = [compute_kmeans_anchors_hook(self.runner, cfg)] trainer.register_hooks(trainer_hooks) trainer.before_train() anchor_generator = model.proposal_generator.anchor_generator cell_anchors = list(anchor_generator.cell_anchors) gt_anchors = np.array([ [-20, -15, 20, 15] # toy_dataset's bbox is half size of image for _ in range(cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS) ]) np.testing.assert_allclose(cell_anchors[0], gt_anchors)
def _get_trainer_hooks(self, cfg, model, optimizer, scheduler, periodic_checkpointer, trainer): return [ hooks.IterationTimer(), model_ema.EMAHook(cfg, model) if cfg.MODEL_EMA.ENABLED else None, self._create_data_loader_hook(cfg), self._create_after_step_hook(cfg, model, optimizer, scheduler, periodic_checkpointer), hooks.EvalHook( cfg.TEST.EVAL_PERIOD, lambda: self.do_test(cfg, model, train_iter=trainer.iter), eval_after_train= False, # done by a separate do_test call in tools/train_net.py ), kmeans_anchors.compute_kmeans_anchors_hook(self, cfg), self._create_qat_hook(cfg) if cfg.QUANTIZATION.QAT.ENABLED else None, ]
def do_train(self, cfg, model, resume): add_print_flops_callback(cfg, model, disable_after_callback=True) optimizer = self.build_optimizer(cfg, model) scheduler = self.build_lr_scheduler(cfg, optimizer) checkpointer = self.build_checkpointer( cfg, model, save_dir=cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler, ) checkpoint = checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume) start_iter = (checkpoint.get("iteration", -1) if resume and checkpointer.has_checkpoint() else -1) # The checkpoint stores the training iteration that just finished, thus we start # at the next iteration (or iter zero if there's no checkpoint). start_iter += 1 max_iter = cfg.SOLVER.MAX_ITER periodic_checkpointer = PeriodicCheckpointer( checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter) data_loader = self.build_detection_train_loader(cfg) def _get_model_with_abnormal_checker(model): if not cfg.ABNORMAL_CHECKER.ENABLED: return model tbx_writer = _get_tbx_writer( get_tensorboard_log_dir(cfg.OUTPUT_DIR)) writers = abnormal_checker.get_writers(cfg, tbx_writer) checker = abnormal_checker.AbnormalLossChecker(start_iter, writers) ret = abnormal_checker.AbnormalLossCheckerWrapper(model, checker) return ret trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( _get_model_with_abnormal_checker(model), data_loader, optimizer) trainer_hooks = [ hooks.IterationTimer(), model_ema.EMAHook(cfg, model) if cfg.MODEL_EMA.ENABLED else None, self._create_after_step_hook(cfg, model, optimizer, scheduler, periodic_checkpointer), hooks.EvalHook( cfg.TEST.EVAL_PERIOD, lambda: self.do_test(cfg, model, train_iter=trainer.iter), ), kmeans_anchors.compute_kmeans_anchors_hook(self, cfg), self._create_qat_hook(cfg) if cfg.QUANTIZATION.QAT.ENABLED else None, ] if comm.is_main_process(): tbx_writer = _get_tbx_writer( get_tensorboard_log_dir(cfg.OUTPUT_DIR)) writers = [ CommonMetricPrinter(max_iter), JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), tbx_writer, ] trainer_hooks.append(hooks.PeriodicWriter(writers)) trainer.register_hooks(trainer_hooks) trainer.train(start_iter, max_iter) if hasattr(self, 'original_cfg'): table = get_cfg_diff_table(cfg, self.original_cfg) logger.info( "GeneralizeRCNN Runner ignoring training config change: \n" + table) trained_cfg = self.original_cfg.clone() else: trained_cfg = cfg.clone() with temp_defrost(trained_cfg): trained_cfg.MODEL.WEIGHTS = checkpointer.get_checkpoint_file() return {"model_final": trained_cfg}