def test_export_with_pretrained(tmp_path): config = SampleConfig() config.update({ "model": "resnet18", "dataset": "imagenet", "input_info": { "sample_size": [2, 3, 299, 299] }, "num_classes": 1000, "compression": { "algorithm": "magnitude_sparsity" } }) config_factory = ConfigFactory(config, tmp_path / 'config.json') onnx_path = os.path.join(str(tmp_path), "model.onnx") args = { "--mode": "test", "--config": config_factory.serialize(), "--pretrained": '', "--to-onnx": onnx_path } runner = Command(create_command_line(args, "classification")) res = runner.run() assert res == 0 assert os.path.exists(onnx_path)
def configure_device(current_gpu, config: SampleConfig): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if config.execution_mode == ExecutionMode.SINGLE_GPU: torch.cuda.set_device(config.current_gpu)
def test_model_can_be_loaded_with_resume(_params): p = _params sample_config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] config = SampleConfig.from_json(str(sample_config_path)) nncf_config = NNCFConfig.from_json(str(sample_config_path)) config.execution_mode = p['execution_mode'] config.current_gpu = 0 config.device = get_device(config) config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: config.dist_url = "tcp://127.0.0.1:9898" config.dist_backend = "nccl" config.rank = 0 config.world_size = 1 configure_distributed(config) model_name = config['model'] model = load_model(model_name, pretrained=False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) model.to(config.device) model, compression_ctrl = create_compressed_model_and_algo_for_test(model, nncf_config) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() checkpoint = torch.load(checkpoint_path, map_location='cpu') load_state(model, checkpoint['state_dict'], is_resume=True)
def ssd_vgg_512_test(): ssd_params = SampleConfig({ "steps": [8, 16, 32, 64, 128, 256, 512], "min_sizes": [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8], "max_sizes": [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6], "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]], "variance": [0.1, 0.1, 0.2, 0.2], "clip": False, "flip": True }) return SSD_VGG(cfg=ssd_params, size=512, num_classes=21)
def ssd_vgg300(): ssd_params = SampleConfig({ "clip": False, "variance": [0.1, 0.1, 0.2, 0.2], "max_sizes": [60, 111, 162, 213, 264, 315], "min_sizes": [30, 60, 111, 162, 213, 264], "steps": [8, 16, 32, 64, 100, 300], "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]], "flip": True }) return SSD_VGG(ssd_params, 300, 21, True)
def start_worker(main_worker, config: SampleConfig): if config.execution_mode == ExecutionMode.CPU_ONLY: main_worker(current_gpu=None, config=config) return if config.execution_mode == ExecutionMode.SINGLE_GPU: main_worker(current_gpu=config.gpu_id, config=config) return if config.execution_mode == ExecutionMode.GPU_DATAPARALLEL: main_worker(current_gpu=None, config=config) return if config.execution_mode == ExecutionMode.MULTIPROCESSING_DISTRIBUTED: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly config.ngpus_per_node = torch.cuda.device_count() config.world_size = config.ngpus_per_node * config.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=config.ngpus_per_node, args=(config, ))
def ssd_mobilenet(): ssd_params = SampleConfig({ "variance": [0.1, 0.1, 0.2, 0.2], "max_sizes": [60, 111, 162, 213, 264, 315], "min_sizes": [30, 60, 111, 162, 213, 264], "steps": [16, 32, 64, 100, 150, 300], "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]], "clip": False, "flip": True, "top_k": 200 }) return MobileNetSSD(21, ssd_params)
def create_model(config: SampleConfig, resuming_model_sd: dict = None): input_info_list = create_input_infos(config.nncf_config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.to(config.device) compression_ctrl, compressed_model = create_compressed_model(ssd_net, config.nncf_config, resuming_model_sd) compressed_model, _ = prepare_model_for_execution(compressed_model, config) compressed_model.train() return compression_ctrl, compressed_model
def is_pretrained_model_requested(config: SampleConfig) -> bool: return config.get('pretrained', True) if config.get('weights') is None else False
def main_worker(current_gpu, config: SampleConfig): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # define loss function (criterion) criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) train_loader = train_sampler = val_loader = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) if config.to_onnx is not None: assert pretrained or (resuming_checkpoint_path is not None) else: # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders( config, train_dataset, val_dataset) nncf_config = register_default_init_args(nncf_config, criterion, train_loader) # create model model_name = config['model'] model = load_model(model_name, pretrained=pretrained, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params'), weights_path=config.get('weights')) model.to(config.device) resuming_model_sd = None resuming_checkpoint = None if resuming_checkpoint_path is not None: resuming_checkpoint = load_resuming_checkpoint( resuming_checkpoint_path) resuming_model_sd = resuming_checkpoint['state_dict'] compression_ctrl, model = create_compressed_model( model, nncf_config, resuming_state_dict=resuming_model_sd) if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() # define optimizer params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint_path is not None: if config.mode.lower() == 'train' and config.to_onnx is None: config.start_epoch = resuming_checkpoint['epoch'] best_acc1 = resuming_checkpoint['best_acc1'] compression_ctrl.scheduler.load_state_dict( resuming_checkpoint['scheduler']) optimizer.load_state_dict(resuming_checkpoint['optimizer']) logger.info( "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})". format(resuming_checkpoint_path, resuming_checkpoint['epoch'], best_acc1)) else: logger.info( "=> loaded checkpoint '{}'".format(resuming_checkpoint_path)) if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': is_inception = 'inception' in model_name train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)