def load_models( clf_timestamp: str = production_models._CLASSIFIER["timestamp"], det_timestamp: str = production_models._DETECTOR["timestamp"], ) -> Tuple[torch.nn.Module, torch.nn.Module]: """ Loads the given time stamps for the classification and detector models. Args: clf_timestamp: Which classification model to load. det_timestamp: Which detection model to load. Returns: Returns both models. """ clf_model = classifier.Classifier(timestamp=clf_timestamp, half_precision=torch.cuda.is_available()) clf_model.eval() # TODO(alex): Pass in the confidence for the detector. det_model = detector.Detector( timestamp=det_timestamp, confidence=0.2, half_precision=torch.cuda.is_available(), ) det_model.eval() # Do FP16 when inferencing if torch.cuda.is_available(): det_model.cuda() det_model.half() clf_model.cuda() clf_model.half() return clf_model, det_model
def benchmark(timestamp: str, model_type: str, batch_size: int, run_time: float) -> None: """Benchmarks a model. This function will load the specified model, create a random tensor from the model's internal height and width and the given batch then perform forward passes through the model for :attr:`run_time` seconds. Args: timestamp: The model's specific timestamp. model_type: Which type of model this is. batch_size: The batch size to benchmark the model on. run_time: How long to run the benchmark in seconds. """ # Construct the model. if model_type == "classifier": model = classifier.Classifier(timestamp=timestamp, half_precision=True) elif model_type == "detector": model = detector.Detector(timestamp=timestamp, half_precision=True) else: raise ValueError(f"Unsupported model type: {model_type}.") batch = torch.randn((batch_size, 3, model.image_size, model.image_size)) if torch.cuda.is_available(): model.cuda() model.half() batch = batch.cuda().half() print("Starting inference.") start_loop = time.perf_counter() times = [] while time.perf_counter() - start_loop < run_time: start = time.perf_counter() model(batch) times.append(time.perf_counter() - start) latency = sum(times) / len(times) print( f"Total time: {sum(times):.4f}.\n" f"Average batch inference time: {latency:.4f}s. FPS: {batch_size / latency:.2f}." )
def train( local_rank: int, world_size: int, model_cfg: dict, train_cfg: dict, save_dir: pathlib.Path, initial_timestamp: str = None, ) -> None: """Entrypoint for training. This is where most of the logic is executed. Args: local_rank: Which GPU subprocess rank this is executed in. For CPU and 1 GPU, this is 0. world_size: How many processes are being run. model_cfg: The model definition dictionary. train_cfg: The training config dictionary. save_dir: Where to write checkpoints. initial_timestamp: Which model to start from. """ # Do some general setup. When using distributed training and Apex, the device needs # to be set before loading the model. use_cuda = torch.cuda.is_available() device = torch.device(f"cuda:{local_rank}" if use_cuda else "cpu") if use_cuda: torch.cuda.set_device(local_rank) is_main = local_rank == 0 if is_main: log = logger.Log(save_dir / "log.txt") # If we are using distributed training, initialize the backend through which process # can communicate to each other. if world_size > 1: torch.distributed.init_process_group( "nccl", world_size=world_size, rank=local_rank ) # TODO(alex) these paths should be in the generate config batch_size = train_cfg.get("batch_size", 4) train_loader, train_sampler = create_data_loader( batch_size, generate_config.DATA_DIR / "clf_train", world_size=world_size, val=False, img_size=model_cfg.get("image_size", 224), ) eval_loader, _ = create_data_loader( batch_size, generate_config.DATA_DIR / "clf_val", world_size=world_size, val=True, img_size=model_cfg.get("image_size", 224), ) if is_main: log.info(f"Train dataset: {train_loader.dataset}") log.info(f"Val dataset: {eval_loader.dataset}") scores = {"best_model_score": 0, "best_ema_score": 0} best_scores_path = pathlib.Path(save_dir / "best_scores.json") best_scores_path.write_text(json.dumps({})) clf_model = classifier.Classifier( backbone=model_cfg.get("backbone", None), num_classes=model_cfg.get("num_classes", 2), ) if initial_timestamp is not None: clf_model.load_state_dict( torch.load(initial_timestamp / "classifier.pt", map_location="cpu") ) clf_model.to(device) if is_main: log.info(f"Model: \n {clf_model}") optimizer = utils.create_optimizer(train_cfg["optimizer"], clf_model) use_mixed_precision = train_cfg.get("mixed-precision", True) if use_mixed_precision: if is_main: log.info("Mixed-precision (AMP) enabled.") scaler = torch.cuda.amp.GradScaler() scaler ema_model = ema.Ema(clf_model) if world_size > 1: clf_model = torch.nn.parallel.DistributedDataParallel( clf_model, device_ids=[local_rank] ) epochs = train_cfg.get("epochs", 0) assert epochs > 0, "Please supply epoch > 0" # Create the learning rate scheduler. lr_scheduler = None if train_cfg["optimizer"]["type"].lower() == "sgd": lr_config = train_cfg.get("lr_schedule", {}) warm_up_percent = lr_config.get("warmup_fraction", 0) start_lr = float(lr_config.get("start_lr")) max_lr = float(lr_config.get("max_lr")) end_lr = float(lr_config.get("end_lr")) lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, max_lr=max_lr, total_steps=len(train_loader) * epochs, final_div_factor=start_lr / end_lr, div_factor=max_lr / start_lr, pct_start=warm_up_percent, ) loss_fn = torch.nn.CrossEntropyLoss() global_step = 0 for epoch in range(epochs): all_losses = [] # Set the train loader's epoch so data will be re-shuffled. if world_size > 1: train_sampler.set_epoch(epoch) for idx, (data, labels) in enumerate(train_loader): optimizer.zero_grad() global_step += 1 # BHWC -> BCHW data = data.permute(0, 3, 1, 2) data = data.to(device, non_blocking=True) labels = labels.to(device, non_blocking=True) out = clf_model(data) loss = loss_fn(out, labels) all_losses.append(loss.item()) # Propogate the gradients back through the model. if use_mixed_precision: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() ema_model.update(clf_model) if idx % _LOG_INTERVAL == 0 and is_main: lr = optimizer.param_groups[0]["lr"] log.info( f"Epoch: {epoch} step {idx}, loss " f"{sum(all_losses) / len(all_losses):.5}. lr: {lr:.4}" ) # Call evaluation function if is_main and epoch >= train_cfg.get("eval_start_epoch", 10): improved_scores = set() log.info("Starting eval.") start_val = time.perf_counter() clf_model.eval() model_score = evaluate(clf_model, eval_loader, device) clf_model.train() if model_score > scores["best_model_score"]: scores["best_model_score"] = model_score improved_scores.add("best_model_score") # TODO(alex): Fix this .module utils.save_model(clf_model, save_dir / "classifier.pt") ema_score = evaluate(ema_model, eval_loader, device) if ema_score > scores["best_ema_score"]: scores["best_ema_score"] = ema_score improved_scores.add("ema-acc") utils.save_model(ema_model.ema_model, save_dir / "ema-classifier.pt") # Write the best metrics to a file so we know which model weights to load. if improved_scores: best_scores = json.loads(best_scores_path.read_text()) best_scores.update(scores) best_scores_path.write_text(json.dumps(best_scores)) log.info(f"Eval took {time.perf_counter() - start_val:.4f}s.") log.info(f"Improved metrics: {improved_scores}.") log.info( f"Epoch {epoch}, Training loss {sum(all_losses) / len(all_losses):.5f}\n" f"Best model accuracy: {scores['best_model_score']:.5f}\n" f"Best EMA accuracy: {scores['best_ema_score']:.5f} \n" ) log.metric("Model score", model_score, epoch) log.metric("Best model score", scores["best_model_score"], epoch) log.metric("EMA score", ema_score, epoch) log.metric("Best EMA score", scores["best_ema_score"], epoch) log.metric("Training loss", sum(all_losses) / len(all_losses), epoch)
def test_rexnet_lite0(self) -> None: model = classifier.Classifier(num_classes=2, backbone="rexnet-lite0") self.assertTrue(self._test_model_output(model, 2))
def test_vovnet_39(self) -> None: model = classifier.Classifier(num_classes=2, backbone="vovnet-39") self.assertTrue(self._test_model_output(model, 2))
def test_vovnet_19_slim_dw(self) -> None: model = classifier.Classifier(num_classes=2, backbone="vovnet-19-slim-dw") self.assertTrue(self._test_model_output(model, 2))