def test_memory_cost_metric(memory_cost_metric_test_struct): config = get_basic_quantization_config() config['compression']['initializer'].update(memory_cost_metric_test_struct.initializers) config['compression']["weights"] = memory_cost_metric_test_struct.weights config['compression']["ignored_scopes"] = memory_cost_metric_test_struct.ignored_scopes config['target_device'] = memory_cost_metric_test_struct.target_device ctrl, compressed_model = create_compressed_model(test_models.AlexNet(), config) qmetric = MemoryCostMetric(compressed_model, ctrl.weight_quantizers, ctrl.non_weight_quantizers) qmetric.collect() assert qmetric.stat == approx(memory_cost_metric_test_struct.table, rel=1e-2)
def test_share_edges_quantized_data_path(share_edges_quantized_data_path_test_struct): config = get_basic_quantization_config() config['compression']["ignored_scopes"] = share_edges_quantized_data_path_test_struct.ignored_scopes config['input_info']['sample_size'] = [2, 3, 299, 299] config['quantizer_setup_type'] = share_edges_quantized_data_path_test_struct.quantizer_setup_type _, compressed_model = create_compressed_model(test_models.Inception3(aux_logits=True, transform_input=True), config) qmetric = ShareEdgesQuantizedDataPath(compressed_model) qmetric.collect() # pylint: disable=protected-access qmetric_stat = qmetric._get_copy_statistics() assert qmetric_stat == approx(share_edges_quantized_data_path_test_struct.table, rel=1e-2)
def create_model(config): input_info_list = create_input_infos(config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) compression_ctrl, ssd_net = create_compressed_model(ssd_net, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.train() model, _ = prepare_model_for_execution(ssd_net, config) return compression_ctrl, model
def export(config): model_builder = retinanet_model.RetinanetModel(config) model = model_builder.build_model(pretrained=config.get( 'pretrained', True), weights=config.get('weights', None), mode=ModeKeys.PREDICT_WITH_GT) compression_ctrl, compress_model = create_compressed_model(model, config) if config.ckpt_path: checkpoint = tf.train.Checkpoint(model=compress_model) load_checkpoint(checkpoint, config.ckpt_path) save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def create_model(config: SampleConfig, resuming_model_sd: dict = None): input_info_list = create_input_infos(config.nncf_config) image_size = input_info_list[0].shape[-1] ssd_net = build_ssd(config.model, config.ssd_params, image_size, config.num_classes, config) weights = config.get('weights') if weights: sd = torch.load(weights, map_location='cpu') load_state(ssd_net, sd) ssd_net.to(config.device) compression_ctrl, compressed_model = create_compressed_model(ssd_net, config.nncf_config, resuming_model_sd) compressed_model, _ = prepare_model_for_execution(compressed_model, config) compressed_model.train() return compression_ctrl, compressed_model
def test_network_quantization_share_metric(network_quantization_share_metric_test_struct): config = get_basic_quantization_config() config['compression']['initializer'].update(network_quantization_share_metric_test_struct.initializers) config['compression']["activations"] = network_quantization_share_metric_test_struct.activations config['compression']["weights"] = network_quantization_share_metric_test_struct.weights config['compression']["ignored_scopes"] = network_quantization_share_metric_test_struct.ignored_scopes config['quantizer_setup_type'] = network_quantization_share_metric_test_struct.quantizer_setup_type config['target_device'] = network_quantization_share_metric_test_struct.target_device cntrl, compressed_model = create_compressed_model(test_models.AlexNet(), config) quantizer_setup_type = QuantizerSetupType.PATTERN_BASED if config['quantizer_setup_type'] == 'pattern_based'\ else QuantizerSetupType.PROPAGATION_BASED qmetric = NQSM(compressed_model, cntrl.weight_quantizers,\ cntrl.non_weight_quantizers, quantizer_setup_type) qmetric.collect() # pylint: disable=protected-access qmetric_stat = qmetric._get_copy_statistics() for key, value in network_quantization_share_metric_test_struct.table.items(): assert qmetric_stat[key] == approx(value, rel=1e-2)
def test_network_quantization_share_metric( network_quantization_share_metric_test_struct): config = get_basic_quantization_config() config['compression']['initializer'].update( network_quantization_share_metric_test_struct.initializers) config['compression'][ "activations"] = network_quantization_share_metric_test_struct.activations config['compression'][ "weights"] = network_quantization_share_metric_test_struct.weights config['compression'][ "ignored_scopes"] = network_quantization_share_metric_test_struct.ignored_scopes config[ 'quantizer_setup_type'] = network_quantization_share_metric_test_struct.quantizer_setup_type config[ 'target_device'] = network_quantization_share_metric_test_struct.target_device ctrl, _ = create_compressed_model(test_models.AlexNet(), config) qmetric = ctrl.non_stable_metric_collectors[0] qmetric.collect() # pylint: disable=protected-access qmetric_stat = qmetric._get_copy_statistics() for key, value in network_quantization_share_metric_test_struct.table.items( ): assert qmetric_stat[key] == approx(value, rel=1e-2)
def export(config): raise NotImplementedError('Experemental code, please use train + export mode, ' 'don\'t use only export mode') model = tf.keras.Sequential( hub.KerasLayer("https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4", trainable=True)) model.build([None, 224, 224, 3]) compression_ctrl, compress_model = create_compressed_model(model, config) metrics = get_metrics() loss_obj = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1) compress_model.compile(loss=loss_obj, metrics=metrics) compress_model.summary() if config.ckpt_path is not None: load_checkpoint(model=compress_model, ckpt_path=config.ckpt_path) save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def train_test_export(config): strategy = get_distribution_strategy(config) strategy_scope = get_strategy_scope(strategy) # Training parameters NUM_EXAMPLES_TRAIN = 118287 NUM_EXAMPLES_EVAL = 5000 epochs = config.epochs batch_size = config.batch_size # per replica batch size num_devices = strategy.num_replicas_in_sync if strategy else 1 global_batch_size = batch_size * num_devices steps_per_epoch = NUM_EXAMPLES_TRAIN // global_batch_size # Create Dataset train_input_fn = input_reader.InputFn( file_pattern=config.train_file_pattern, params=config, mode=input_reader.ModeKeys.TRAIN, batch_size=global_batch_size) eval_input_fn = input_reader.InputFn( file_pattern=config.eval_file_pattern, params=config, mode=input_reader.ModeKeys.PREDICT_WITH_GT, batch_size=global_batch_size, num_examples=NUM_EXAMPLES_EVAL) train_dist_dataset = strategy.experimental_distribute_dataset( train_input_fn()) test_dist_dataset = strategy.experimental_distribute_dataset( eval_input_fn()) # Create model builder mode = ModeKeys.TRAIN if 'train' in config.mode else ModeKeys.PREDICT_WITH_GT model_builder = retinanet_model.RetinanetModel(config) eval_metric = model_builder.eval_metrics with strategy_scope: model = model_builder.build_model(pretrained=config.get( 'pretrained', True), weights=config.get('weights', None), mode=mode) compression_ctrl, compress_model = create_compressed_model( model, config) # compression_callbacks = create_compression_callbacks(compression_ctrl, config.log_dir) scheduler = build_scheduler(config=config, epoch_size=NUM_EXAMPLES_TRAIN, batch_size=global_batch_size, steps=steps_per_epoch) optimizer = build_optimizer(config=config, scheduler=scheduler) eval_metric = model_builder.eval_metrics() loss_fn = model_builder.build_loss_fn() predict_post_process_fn = model_builder.post_processing checkpoint = tf.train.Checkpoint(model=compress_model, optimizer=optimizer) checkpoint_manager = tf.train.CheckpointManager( checkpoint, config.checkpoint_save_dir, max_to_keep=None) logger.info('initialization...') compression_ctrl.initialize(dataset=train_input_fn()) initial_epoch = 0 if config.ckpt_path: initial_epoch = resume_from_checkpoint(checkpoint_manager, config.ckpt_path, steps_per_epoch) train_step = create_train_step_fn(strategy, compress_model, loss_fn, optimizer) test_step = create_test_step_fn(strategy, compress_model, predict_post_process_fn) if 'train' in config.mode: logger.info('Training...') train(train_step, test_step, eval_metric, train_dist_dataset, test_dist_dataset, initial_epoch, epochs, steps_per_epoch, checkpoint_manager, compression_ctrl, config.log_dir, optimizer) logger.info('Evaluation...') metric_result = evaluate(test_step, eval_metric, test_dist_dataset) logger.info('Validation metric = {}'.format(metric_result)) if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_main_process(): configure_logging(logger, config) print_args(config) logger.info(config) config.device = get_device(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) weights = config.get('weights') model = load_model(config.model, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=num_classes, model_params=config.get('model_params', {})) compression_ctrl, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() resuming_checkpoint = config.resuming_checkpoint if resuming_checkpoint is not None: if not config.pretrained: # Load the previously saved model state model, _, _, _, _ = \ load_checkpoint(model, resuming_checkpoint, config.device, compression_scheduler=compression_ctrl.scheduler) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.mode.lower() == 'test': logger.info(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) loaders, w_class = load_dataset(dataset, config) _, val_loader = loaders test(model, val_loader, w_class, color_encoding, config) print_statistics(compression_ctrl.statistics()) elif config.mode.lower() == 'train': loaders, w_class = load_dataset(dataset, config) train_loader, val_loader = loaders if not resuming_checkpoint: compression_ctrl.initialize(train_loader) train(model, model_without_dp, compression_ctrl, train_loader, val_loader, w_class, color_encoding, config) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def process(rank, args, port): #init multiprocess if rank < 0: args.device = torch.device("cpu" if args.n_gpu < 1 else "cuda") else: # create default process group os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = str(port) torch.distributed.init_process_group("nccl", rank=rank, world_size=args.n_gpu) args.device = torch.device("cuda:{}".format(rank)) torch.cuda.set_device(rank) if rank > 0: #wait while process 0 load models torch.distributed.barrier() printlog("rank", rank, "load tokenizer", args.model_student) tokenizer = BertTokenizer.from_pretrained(args.model_student) printlog("rank", rank, "load model", args.model_student) config = AutoConfig.from_pretrained(args.model_student) if config.architectures and 'BertBasedClassPacked' in config.architectures: model = BertPacked(BertModelEMB).from_pretrained( args.model_student).to(args.device) else: model = BertModelEMB.from_pretrained(args.model_student).to( args.device) if args.supervision_weight > 0: model_t = BertModelEMB.from_pretrained(args.model_teacher).to( args.device) else: model_t = None if rank == 0: #wait while other processes load models torch.distributed.barrier() #create train and evaluate datasets train_dataset_qc = create_squad_qcemb_dataset(rank, args.device, args.squad_train_data, tokenizer, args.max_seq_length_q, args.max_seq_length_c) test_dataset_qc = create_squad_qcemb_dataset(rank, args.device, args.squad_dev_data, tokenizer, args.max_seq_length_q, args.max_seq_length_c) if rank >= 0: #lets sync after data loaded torch.distributed.barrier() model_controller = None if QUANTIZATION: if hasattr(model, 'merge_'): #if model is packed, then merge some linera transformations before quantization model.merge_() if rank in [0, -1]: #evaluate before quntization model.eval() result = evaluate(args, model, test_dataset_qc) for n, v in result.items(): logger.info("original {} - {}".format(n, v)) if rank >= 0: torch.distributed.barrier() nncf_config = nncf.NNCFConfig.from_json(args.nncf_config) class SquadInitializingDataloader( nncf.initialization.InitializingDataLoader): def get_inputs(self, batch): return [], get_inputs(batch, args.device) train_dataloader = DataLoader(train_dataset_qc.c_dataset, sampler=RandomSampler( train_dataset_qc.c_dataset), batch_size=args.per_gpu_train_batch_size) initializing_data_loader = SquadInitializingDataloader( train_dataloader) init_range = nncf.initialization.QuantizationRangeInitArgs( initializing_data_loader) nncf_config.register_extra_structs([init_range]) model_controller, model = nncf.create_compressed_model( model, nncf_config, dump_graphs=True) if rank > -1: model_controller.distributed() utils.sync_models(rank, model) if rank in [-1, 0]: #evaluate pure initialized int8 model model.eval() result = evaluate(args, model, test_dataset_qc) for n, v in result.items(): logger.info("int8 {} - {}".format(n, v)) if rank > -1: #lets sync after quantization torch.distributed.barrier() #tune FQ parameters only train(rank, args, model, model_t, train_dataset_qc, test_dataset_qc, fq_tune_only=True, model_controller=model_controller) #tune whole quantized model train(rank, args, model, model_t, train_dataset_qc, test_dataset_qc, fq_tune_only=False, model_controller=model_controller) if rank in [-1, 0]: #save and evaluate result os.makedirs(args.output_dir, exist_ok=True) model.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) model.eval() #get sample to pass for onnx generation with torch.no_grad(): torch.onnx.export(model, tuple( torch.zeros((1, args.max_seq_length_c), dtype=torch.long, device=args.device) for t in range(4)), os.path.join(args.output_dir, "model.onnx"), verbose=False, enable_onnx_checker=False, opset_version=10, input_names=[ 'input_ids', 'attention_mask', 'token_type_ids', 'position_ids' ], output_names=['embedding']) # Evaluate final model result = evaluate(args, model, test_dataset_qc) for n, v in result.items(): logger.info("{} - {}".format(n, v)) logger.info("checkpoint final result {}".format(result))
def create_compressed_model_and_algo_for_test(model, config): assert isinstance(config, Config) tf.keras.backend.clear_session() algo, model = create_compressed_model(model, config) return model, algo
def train_test_export(config): strategy = get_distribution_strategy(config) strategy_scope = get_strategy_scope(strategy) builders = get_dataset_builders(config, strategy) datasets = [builder.build() for builder in builders] train_builder, validation_builder = builders train_dataset, validation_dataset = datasets train_epochs = config.epochs train_steps = train_builder.num_steps validation_steps = validation_builder.num_steps if config.model_type == ModelType.KerasLayer: args = get_KerasLayer_model() else: args = None with strategy_scope: from op_insertion import NNCFWrapperCustom if not args: args = get_model(config.model_type) model = tf.keras.Sequential([ tf.keras.layers.Input(shape=(224, 224, 3)), NNCFWrapperCustom(*args) ]) if SAVE_MODEL_WORKAROUND: path = '/tmp/model.pb' model.save(path, save_format='tf') model = tf.keras.models.load_model(path) compression_ctrl, compress_model = create_compressed_model(model, config) compression_callbacks = create_compression_callbacks(compression_ctrl, config.log_dir) scheduler = build_scheduler( config=config, epoch_size=train_builder.num_examples, batch_size=train_builder.global_batch_size, steps=train_steps) config['optimizer'] = {'type': 'sgd'} optimizer = build_optimizer( config=config, scheduler=scheduler) metrics = get_metrics() loss_obj = get_loss() compress_model.compile(optimizer=optimizer, loss=loss_obj, metrics=metrics, run_eagerly=config.get('eager_mode', False)) compress_model.summary() logger.info('initialization...') compression_ctrl.initialize(dataset=train_dataset) initial_epoch = 0 if config.ckpt_path is not None: initial_epoch = resume_from_checkpoint(model=compress_model, ckpt_path=config.ckpt_path, train_steps=train_steps) callbacks = get_callbacks( model_checkpoint=True, include_tensorboard=True, time_history=True, track_lr=True, write_model_weights=False, initial_step=initial_epoch * train_steps, batch_size=train_builder.global_batch_size, log_steps=100, model_dir=config.log_dir) callbacks.extend(compression_callbacks) validation_kwargs = { 'validation_data': validation_dataset, 'validation_steps': validation_steps, 'validation_freq': 1, } if 'train' in config.mode: logger.info('training...') compress_model.fit( train_dataset, epochs=train_epochs, steps_per_epoch=train_steps, initial_epoch=initial_epoch, callbacks=callbacks, **validation_kwargs) logger.info('evaluation...') compress_model.evaluate( validation_dataset, steps=validation_steps, verbose=1) if 'export' in config.mode: save_path, save_format = get_saving_parameters(config) compression_ctrl.export_model(save_path, save_format) logger.info("Saved to {}".format(save_path))
def main_worker(current_gpu, config): configure_device(current_gpu, config) config.mlflow = SafeMLFLow(config) if is_main_process(): configure_logging(logger, config) print_args(config) logger.info(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) train_loader = val_loader = criterion = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) def criterion_fn(model_outputs, target, criterion_): labels, loss_outputs, _ = \ loss_funcs.do_model_specific_postprocessing(config.model, target, model_outputs) return criterion_(loss_outputs, labels) if config.to_onnx is not None: assert pretrained or (resuming_checkpoint_path is not None) else: loaders, w_class = load_dataset(dataset, config) train_loader, val_loader, init_loader = loaders criterion = get_criterion(w_class, config) def autoq_test_fn(model, eval_loader): return test(model, eval_loader, criterion, color_encoding, config) nncf_config = register_default_init_args(nncf_config, init_loader, criterion, criterion_fn, autoq_test_fn, val_loader, config.device) model = load_model(config.model, pretrained=pretrained, num_classes=num_classes, model_params=config.get('model_params', {}), weights_path=config.get('weights')) model.to(config.device) resuming_model_sd = None resuming_checkpoint = None if resuming_checkpoint_path is not None: resuming_model_sd, resuming_checkpoint = load_resuming_model_state_dict_and_checkpoint_from_path( resuming_checkpoint_path) compression_ctrl, model = create_compressed_model( model, nncf_config, resuming_state_dict=resuming_model_sd) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() log_common_mlflow_params(config) if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if is_main_process(): print_statistics(compression_ctrl.statistics()) if config.mode.lower() == 'test': logger.info(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) test(model, val_loader, criterion, color_encoding, config) elif config.mode.lower() == 'train': train(model, model_without_dp, compression_ctrl, train_loader, val_loader, criterion, color_encoding, config, resuming_checkpoint) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--epochs", type=int, default=50, help="number of epochs to train (default: 50)" ) parser.add_argument( "--lr", type=float, default=0.05, help="learning rate (default: 0.05)" ) parser.add_argument( "--enable_nncf_compression", action="store_true", default=False, help="nncf compression flag (default: False)", ) parser.add_argument("--seed", type=int, default=1, help="random seed (default: 1)") parser.add_argument( "--ckpt_filename", type=str, default="resnet18_cifar10.pth", help="file name for model checkpoint (default: resnet18_cifar10.pth)", ) parser.add_argument( "--starting_checkpoint", type=str, default=None, help="checkpoint file name to start training from (default: None)", ) args = parser.parse_args() print(args) torch.manual_seed(args.seed) input_size, num_classes, train_dataset, test_dataset = get_CIFAR10() kwargs = {"num_workers": 8, "pin_memory": True} train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=512, shuffle=True, **kwargs ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=5000, shuffle=False, **kwargs ) model = Model() model = model.cuda() if args.starting_checkpoint is not None: model.load_state_dict(torch.load(args.starting_checkpoint)) compression_ctrl = None if args.enable_nncf_compression: nncf_config_dict = { "compression": { "algorithm": "quantization", "initializer": {"range": {"num_init_steps": 5}}, } } nncf_config = NNCFConfig(nncf_config_dict) nncf_config = register_default_init_args(nncf_config, None, train_loader) compression_ctrl, model = create_compressed_model(model, nncf_config) if args.enable_nncf_compression: milestones = [5, 10] else: milestones = [25, 40] optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4 ) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=0.1 ) for epoch in range(1, args.epochs + 1): train(model, train_loader, optimizer, epoch, compression_ctrl) test(model, test_loader) scheduler.step() if compression_ctrl is not None: compression_ctrl.scheduler.epoch_step() torch.save(model.state_dict(), args.ckpt_filename)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_main_process(): configure_logging(logger, config) print_args(config) logger.info(config) config.device = get_device(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) if config.metrics_dump is not None: write_metrics(0, config.metrics_dump) train_loader = val_loader = criterion = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) if config.to_onnx is not None: assert pretrained or (resuming_checkpoint_path is not None) else: loaders, w_class = load_dataset(dataset, config) train_loader, val_loader = loaders criterion = get_criterion(w_class, config) if not resuming_checkpoint_path: nncf_config = register_default_init_args(nncf_config, criterion, train_loader) model = load_model(config.model, pretrained=pretrained, num_classes=num_classes, model_params=config.get('model_params', {}), weights_path=config.get('weights')) model.to(config.device) compression_ctrl, model = create_compressed_model(model, nncf_config) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() if resuming_checkpoint_path: if not config.pretrained: # Load the previously saved model state model, _, _, _, _ = \ load_checkpoint(model, resuming_checkpoint_path, config.device, compression_scheduler=compression_ctrl.scheduler) if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.mode.lower() == 'test': logger.info(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) logger.info("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) test(model, val_loader, criterion, color_encoding, config) print_statistics(compression_ctrl.statistics()) elif config.mode.lower() == 'train': train(model, model_without_dp, compression_ctrl, train_loader, val_loader, criterion, color_encoding, config) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) compression_ctrl, model = create_compressed_model(model, config) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, compression_ctrl, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_ctrl) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset) if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_ctrl.initialize(data_loader=train_loader, criterion=criterion) train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)
def wrap_nncf_model(model, cfg, data_loader_for_init=None, get_fake_input_func=None): """ The function wraps mmdet model by NNCF Note that the parameter `get_fake_input_func` should be the function `get_fake_input` -- cannot import this function here explicitly """ check_nncf_is_enabled() pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True) nncf_config = NNCFConfig(cfg.nncf_config) logger = get_root_logger(cfg.log_level) if data_loader_for_init: wrapped_loader = MMInitializeDataLoader(data_loader_for_init) nncf_config = register_default_init_args(nncf_config, None, wrapped_loader) if cfg.get('resume_from'): checkpoint_path = cfg.get('resume_from') assert is_checkpoint_nncf(checkpoint_path), ( 'It is possible to resume training with NNCF compression from NNCF checkpoints only. ' 'Use "load_from" with non-compressed model for further compression by NNCF.' ) elif cfg.get('load_from'): checkpoint_path = cfg.get('load_from') if not is_checkpoint_nncf(checkpoint_path): checkpoint_path = None logger.info('Received non-NNCF checkpoint to start training ' '-- initialization of NNCF fields will be done') else: checkpoint_path = None if not data_loader_for_init and not checkpoint_path: raise RuntimeError('Either data_loader_for_init or NNCF pre-trained ' 'model checkpoint should be set') if checkpoint_path: logger.info(f'Loading NNCF checkpoint from {checkpoint_path}') logger.info( 'Please, note that this first loading is made before addition of ' 'NNCF FakeQuantize nodes to the model, so there may be some ' 'warnings on unexpected keys') resuming_state_dict = load_checkpoint(model, checkpoint_path) logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}') else: resuming_state_dict = None if "nncf_compress_postprocessing" in cfg: # NB: This parameter is used to choose if we should try to make NNCF compression # for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`), # or make NNCF compression of the part of the model without postprocessing # (`nncf_compress_postprocessing=False`). # Our primary goal is to make NNCF compression of such big part of the model as # possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas # `nncf_compress_postprocessing=False` is our fallback decision. # When we manage to enable NNCF compression for sufficiently many models, # we should keep one choice only. nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing') logger.debug('set should_compress_postprocessing=' f'{nncf_compress_postprocessing}') else: nncf_compress_postprocessing = True def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func): input_size = nncf_config.get("input_info").get('sample_size') assert get_fake_input_func is not None assert len(input_size) == 4 and input_size[0] == 1 H, W, C = input_size[2], input_size[3], input_size[1] device = next(model.parameters()).device return get_fake_input_func(cfg, orig_img_shape=tuple([H, W, C]), device=device) def dummy_forward(model): fake_data = _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func) img, img_metas = fake_data["img"], fake_data["img_metas"] img = nncf_model_input(img) if nncf_compress_postprocessing: ctx = model.forward_export_context(img_metas) logger.debug( f"NNCF will compress a postprocessing part of the model") else: ctx = model.forward_dummy_context(img_metas) logger.debug( f"NNCF will NOT compress a postprocessing part of the model") with ctx: model(img) model.dummy_forward_fn = dummy_forward compression_ctrl, model = create_compressed_model( model, nncf_config, dummy_forward_fn=dummy_forward, resuming_state_dict=resuming_state_dict) model = change_export_func_first_conv(model) return compression_ctrl, model
def main_worker(current_gpu, config: SampleConfig): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # define loss function (criterion) criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) train_loader = train_sampler = val_loader = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) if config.to_onnx is not None: assert pretrained or (resuming_checkpoint_path is not None) else: # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders( config, train_dataset, val_dataset) nncf_config = register_default_init_args(nncf_config, criterion, train_loader) # create model model_name = config['model'] model = load_model(model_name, pretrained=pretrained, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params'), weights_path=config.get('weights')) model.to(config.device) resuming_model_sd = None resuming_checkpoint = None if resuming_checkpoint_path is not None: resuming_checkpoint = load_resuming_checkpoint( resuming_checkpoint_path) resuming_model_sd = resuming_checkpoint['state_dict'] compression_ctrl, model = create_compressed_model( model, nncf_config, resuming_state_dict=resuming_model_sd) if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_ctrl.distributed() # define optimizer params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint_path is not None: if config.mode.lower() == 'train' and config.to_onnx is None: config.start_epoch = resuming_checkpoint['epoch'] best_acc1 = resuming_checkpoint['best_acc1'] compression_ctrl.scheduler.load_state_dict( resuming_checkpoint['scheduler']) optimizer.load_state_dict(resuming_checkpoint['optimizer']) logger.info( "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})". format(resuming_checkpoint_path, resuming_checkpoint['epoch'], best_acc1)) else: logger.info( "=> loaded checkpoint '{}'".format(resuming_checkpoint_path)) if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': is_inception = 'inception' in model_name train(config, compression_ctrl, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)