def main(config, model_path: str, output_path: str, input_shape=(320, 320)): logger = Logger(local_rank=-1, save_dir=config.save_dir, use_tensorboard=False) # Create model and load weights model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) # Convert backbone weights for RepVGG models if config.model.arch.backbone.name == "RepVGG": deploy_config = config.model deploy_config.arch.backbone.update({"deploy": True}) deploy_model = build_model(deploy_config) from nanodet.model.backbone.repvgg import repvgg_det_model_convert model = repvgg_det_model_convert(model, deploy_model) # TorchScript: tracing the model with dummy inputs with torch.no_grad(): dummy_input = torch.zeros(1, 3, input_shape[0], input_shape[1]) # Batch size = 1 model.eval().cpu() model_traced = torch.jit.trace(model, example_inputs=dummy_input).eval() model_traced.save(output_path) print("Finished export to TorchScript")
def main(config, model_path, output_path, input_shape=(320, 320)): logger = Logger(-1, config.save_dir, False) model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) if config.model.arch.backbone.name == 'RepVGG': deploy_config = config.model deploy_config.arch.backbone.update({'deploy': True}) deploy_model = build_model(deploy_config) from nanodet.model.backbone.repvgg import repvgg_det_model_convert model = repvgg_det_model_convert(model, deploy_model) dummy_input = torch.autograd.Variable( torch.randn(1, 3, input_shape[0], input_shape[1])) dynamic_axes = { "input": { 0: "batch_size" }, "output1": { 0: "batch_size" }, "output2": { 0: "batch_size" }, "output3": { 0: "batch_size" }, "output4": { 0: "batch_size" }, "output5": { 0: "batch_size" }, "output6": { 0: "batch_size" } } input_names = ['input'] output_names = [ 'output1', 'output2', 'output3', 'output4', 'output5', 'output6' ] torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=12, input_names=input_names, output_names=output_names) import onnx from onnxsim import simplify model = onnx.load(output_path) # convert model model_simp, check = simplify(model) onnx.save(model_simp, output_path) print('finished exporting onnx ')
def main(config, model_path, output_path, input_shape=(320, 320)): logger = Logger(-1, config.save_dir, False) model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) if config.model.arch.backbone.name == 'RepVGG': deploy_config = config.model deploy_config.arch.backbone.update({'deploy': True}) deploy_model = build_model(deploy_config) from nanodet.model.backbone.repvgg import repvgg_det_model_convert model = repvgg_det_model_convert(model, deploy_model) dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1])) torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11) print('finished exporting onnx ')
def __init__(self, cfg, model_path, logger, device='cuda:0'): self.cfg = cfg self.device = device model = build_model(cfg.model) ckpt = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, ckpt, logger) if cfg.model.arch.backbone.name == 'RepVGG': deploy_config = cfg.model deploy_config.arch.backbone.update({'deploy': True}) deploy_model = build_model(deploy_config) from nanodet.model.backbone.repvgg import repvgg_det_model_convert model = repvgg_det_model_convert(model, deploy_model) self.model = model.to(device).eval() self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
def main(config, model_path, output_path, input_shape=(320, 320)): logger = Logger(-1, config.save_dir, False) model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) dummy_input = torch.autograd.Variable(torch.randn(1, 3, input_shape[0], input_shape[1])) torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11) print('finished exporting onnx ')
def __init__(self, cfg, model_path, logger, device='cuda:0'): self.cfg = cfg self.device = device model = build_model(cfg.model) ckpt = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, ckpt, logger) self.model = model.to(device).eval() self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
def main(config, input_shape=(320, 320)): model = build_model(config.model) try: import mobile_cv.lut.lib.pt.flops_utils as flops_utils except ImportError: print("mobile-cv is not installed. Skip flops calculation.") return first_batch = torch.rand((1, 3, input_shape[0], input_shape[1])) input_args = (first_batch, ) flops_utils.print_model_flops(model, input_args)
def main(args): warnings.warn( 'Warning! Old testing code is deprecated and will be deleted ' 'in next version. Please use tools/test.py') load_config(cfg, args.config) local_rank = -1 torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True cfg.defrost() timestr = datetime.datetime.now().__format__('%Y%m%d%H%M%S') cfg.save_dir = os.path.join(cfg.save_dir, timestr) cfg.freeze() mkdir(local_rank, cfg.save_dir) logger = Logger(local_rank, cfg.save_dir) logger.log('Creating model...') model = build_model(cfg.model) logger.log('Setting up data...') val_dataset = build_dataset(cfg.data.val, args.task) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=False, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, drop_last=True) trainer = build_trainer(local_rank, cfg, model, logger) cfg.schedule.update({'load_model': args.model}) trainer.load_model(cfg) evaluator = build_evaluator(cfg, val_dataset) logger.log('Starting testing...') with torch.no_grad(): results, val_loss_dict = trainer.run_epoch(0, val_dataloader, mode=args.task) if args.task == 'test': res_json = evaluator.results2json(results) json_path = os.path.join(cfg.save_dir, 'results{}.json'.format(timestr)) json.dump(res_json, open(json_path, 'w')) elif args.task == 'val': eval_results = evaluator.evaluate(results, cfg.save_dir, rank=local_rank) if args.save_result: txt_path = os.path.join(cfg.save_dir, "eval_results{}.txt".format(timestr)) with open(txt_path, "a") as f: for k, v in eval_results.items(): f.write("{}: {}\n".format(k, v))
def main(args): warnings.warn('Warning! Old training code is deprecated and will be deleted ' 'in next version. Please use tools/train.py') load_config(cfg, args.config) local_rank = int(args.local_rank) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True mkdir(local_rank, cfg.save_dir) # mkdir用@rank_filter包裹,主进程创建save_dir logger = Logger(local_rank, cfg.save_dir) if args.seed is not None: logger.log('Set random seed to {}'.format(args.seed)) init_seeds(args.seed) logger.log('Creating model...') model = build_model(cfg.model) logger.log('Setting up data...') train_dataset = build_dataset(cfg.data.train, 'train') val_dataset = build_dataset(cfg.data.val, 'test') if len(cfg.device.gpu_ids) > 1: print('rank = ', local_rank) num_gpus = torch.cuda.device_count() torch.cuda.set_device(local_rank % num_gpus) dist.init_process_group(backend='nccl') train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, sampler=train_sampler, drop_last=True) else: train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=True, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, drop_last=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=False, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, drop_last=True) trainer = build_trainer(local_rank, cfg, model, logger) if 'load_model' in cfg.schedule: trainer.load_model(cfg) if 'resume' in cfg.schedule: trainer.resume(cfg) evaluator = build_evaluator(cfg, val_dataset) logger.log('Starting training...') trainer.run(train_dataloader, val_dataloader, evaluator)
def main(config, model_path, output_path, input_shape=(320, 320)): logger = Logger(-1, config.save_dir, False) model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) if config.model.arch.backbone.name == "RepVGG": deploy_config = config.model deploy_config.arch.backbone.update({"deploy": True}) deploy_model = build_model(deploy_config) from nanodet.model.backbone.repvgg import repvgg_det_model_convert model = repvgg_det_model_convert(model, deploy_model) dummy_input = torch.autograd.Variable( torch.randn(1, 3, input_shape[0], input_shape[1]) ) torch.onnx.export( model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11, input_names=["data"], output_names=["output"], ) logger.log("finished exporting onnx ") logger.log("start simplifying onnx ") input_data = {"data": dummy_input.detach().cpu().numpy()} model_sim, flag = onnxsim.simplify(output_path, input_data=input_data) if flag: onnx.save(model_sim, output_path) logger.log("simplify onnx successfully") else: logger.log("simplify onnx failed")
def __init__(self, model_path, cfg_path, *args, **kwargs): from nanodet.model.arch import build_model from nanodet.util import Logger, cfg, load_config, load_model_weight super(NanoDetTorch, self).__init__(*args, **kwargs) print(f'Using PyTorch as inference backend') print(f'Using weight: {model_path}') # load model self.model_path = model_path self.cfg_path = cfg_path load_config(cfg, cfg_path) self.logger = Logger(-1, cfg.save_dir, False) self.model = build_model(cfg.model) checkpoint = self.torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(self.model, checkpoint, self.logger)
def export_ONNX_model(self, config, model_path, output_path, logger, input_shape=(320, 320)): model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) dummy_input = torch.autograd.Variable( torch.randn(1, 3, input_shape[0], input_shape[1])) torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=11)
def __init__(self, cfg, model_path, logger, device='cuda:0'): self.cfg = cfg self.device = device model = build_model(cfg.model) self.model = model.to(device).eval() self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio) self.trt_model = model_path self.inference_fn = common.do_inference if trt.__version__[0] < '7' \ else common.do_inference_v2 self.trt_logger = trt.Logger(trt.Logger.INFO) self.engine = self._load_engine() try: self.context = self.engine.create_execution_context() self.inputs, self.outputs, self.bindings, self.stream = \ common.allocate_buffers(self.engine) except Exception as e: raise RuntimeError('fail to allocate CUDA resources') from e
def __init__(self, cfg, model_path, logger, device='cuda:0'): """ :param cfg: :param model_path: :param logger: :param device: """ self.cfg = cfg self.device = device model = build_model(cfg.model) ckpt = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, ckpt, logger) print('INFO: {:s} loaded.'.format(model_path)) self.model = model.to(device).eval() self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
def test_config_files(): root_path = join(dirname(__file__), "../..") cfg_folder = join(root_path, "config") if not exists(cfg_folder): raise FileNotFoundError("Cannot find config folder.") cfg_paths = collect_files(cfg_folder, [".yml", ".yaml"]) for cfg_path in cfg_paths: print(f"Start testing {cfg_path}") config = copy.deepcopy(cfg) # test load cfg load_config(config, cfg_path) assert "save_dir" in config assert "model" in config assert "data" in config assert "device" in config assert "schedule" in config assert "log" in config # test build model model = build_model(config.model) assert config.model.arch.name == model.__class__.__name__
def main(config, model_path, output_path, input_shape=(320, 320), batch_size=1): logger = Logger(-1, config.save_dir, False) model = build_model(config.model) checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) load_model_weight(model, checkpoint, logger) dummy_input = torch.autograd.Variable( torch.randn(batch_size, 3, input_shape[0], input_shape[1])) torch.onnx.export(model, dummy_input, output_path, verbose=True, keep_initializers_as_inputs=True, opset_version=12) onnx_model = onnx.load(output_path) # load onnx model model_simp, check = simplify(onnx_model) assert check, "Simplified ONNX model could not be validated" onnx.save(model_simp, output_path) print('finished exporting onnx ')
def main(config, input_shape=(3, 320, 320)): model = build_model(config.model) flops, params = get_model_complexity_info(model, input_shape) split_line = '=' * 30 print(f'{split_line}\nInput shape: {input_shape}\n' f'Flops: {flops*2}\nParams: {params}\n{split_line}')
def test_flops(): load_config(cfg, "./config/legacy_v0.x_configs/nanodet-m.yml") model = build_model(cfg.model) input_shape = (3, 320, 320) get_model_complexity_info(model, input_shape)
def startNanodetTrain(self): #加载配置文件 load_config(cfg, self.nanoTrainConfig['cfg']) #判断分布式训练当中该主机的角色 local_rank = int(self.nanoTrainConfig["local_rank"]) # torch.backends.cudnn.enabled = True # torch.backends.cudnn.benchmark = True mkdir(local_rank, self.nanoTrainConfig["save_dir"]) logger = Logger(local_rank, self.nanoTrainConfig["save_dir"]) if self.nanoTrainConfig.keys().__contains__("seed"): logger.log('Set random seed to {}'.format( self.nanoTrainConfig['seed'])) self.init_seeds(self.nanoTrainConfig['seed']) #1.创建模型 model = build_model(cfg.model) model = model.cpu() #2.加载数据 logger.log('Setting up data...') train_dataset = build_dataset(cfg.data.train, 'train', self.nanoTrainConfig) val_dataset = build_dataset(cfg.data.val, 'test', self.nanoTrainConfig) if len(cfg.device.gpu_ids) > 1: print('rank = ', local_rank) num_gpus = torch.cuda.device_count() torch.cuda.set_device(local_rank % num_gpus) dist.init_process_group(backend='nccl') train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.device.batchsize_per_gpu, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, sampler=train_sampler, drop_last=True) else: print("加载数据...") train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=True, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, drop_last=True) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=collate_function, drop_last=True) trainer = build_trainer(local_rank, cfg, model, logger) if 'load_model' in cfg.schedule: trainer.load_model(cfg) if 'resume' in cfg.schedule: trainer.resume(cfg) evaluator = build_evaluator(cfg, val_dataset) logger.log('Starting training...') trainer.run(train_dataloader, val_dataloader, evaluator, self.nanoTrainConfig)
def run(args): """ :param args: :return: """ load_config(cfg, args.config) local_rank = int(args.local_rank) # what's this? torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True mkdir(local_rank, cfg.save_dir) logger = Logger(local_rank, cfg.save_dir) if args.seed is not None: logger.log('Set random seed to {}'.format(args.seed)) init_seeds(args.seed) logger.log('Creating model...') model = build_model(cfg.model) logger.log('Setting up data...') train_dataset = build_dataset(cfg.data.train, 'train') # build_dataset(cfg.data.train, 'train') val_dataset = build_dataset(cfg.data.val, 'test') if len(cfg.device.gpu_ids) > 1: # More than one GPU(distributed training) print('rank = ', local_rank) num_gpus = torch.cuda.device_count() torch.cuda.set_device(local_rank % num_gpus) dist.init_process_group(backend='nccl') train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.is_debug: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, num_workers=0, pin_memory=True, collate_fn=collate_function, sampler=train_sampler, drop_last=True) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, sampler=train_sampler, drop_last=True) else: if args.is_debug: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=True, num_workers=0, pin_memory=True, collate_fn=collate_function, drop_last=True) else: train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.device.batchsize_per_gpu, shuffle=True, num_workers=cfg.device.workers_per_gpu, pin_memory=True, collate_fn=collate_function, drop_last=True) if args.is_debug: val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, collate_fn=collate_function, drop_last=True) else: val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=collate_function, drop_last=True) # ----- trainer = build_trainer(local_rank, cfg, model, logger) if 'load_model' in cfg.schedule: trainer.load_model(cfg) if 'resume' in cfg.schedule: trainer.resume(cfg) # ----- Build a evaluator evaluator = build_evaluator(cfg, val_dataset) # evaluator = None logger.log('Starting training...') trainer.run(train_data_loader, val_data_loader, evaluator)
import torch from nanodet.model.arch import build_model from nanodet.util import cfg, load_config from nni.compression.pytorch import ModelSpeedup from nni.algorithms.compression.pytorch.pruning import L1FilterPruner """ NanoDet model can be installed from https://github.com/RangiLyu/nanodet.git """ cfg_path = r"nanodet/config/nanodet-RepVGG-A0_416.yml" load_config(cfg, cfg_path) model = build_model(cfg.model).cpu() dummy_input = torch.rand(8, 3, 416, 416) op_names = [] # these three conv layers are followed by reshape-like functions # that cannot be replaced, so we skip these three conv layers, # you can also get such layers by `not_safe_to_prune` function excludes = ['head.gfl_cls.0', 'head.gfl_cls.1', 'head.gfl_cls.2'] for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): if name not in excludes: op_names.append(name) cfg_list = [{'op_types': ['Conv2d'], 'sparsity': 0.5, 'op_names': op_names}] pruner = L1FilterPruner(model, cfg_list) pruner.compress() pruner.export_model('./model', './mask') # need call _unwrap_model if you want run the speedup on the same model