def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config: NNCFConfig, dummy_forward_fn: Callable[[Module], Any] = None) \ -> Tuple[NNCFNetwork, CompressionAlgorithmController]: assert isinstance(config, NNCFConfig) NNCFConfig.validate(config) algo, model = create_compressed_model(model, config, dump_graphs=False, dummy_forward_fn=dummy_forward_fn) return model, algo
def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config: NNCFConfig, dummy_forward_fn: Callable[[Module], Any] = None, wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None, resuming_state_dict: dict = None) \ -> Tuple[NNCFNetwork, CompressionAlgorithmController]: assert isinstance(config, NNCFConfig) NNCFConfig.validate(config) algo, model = create_compressed_model(model, config, dump_graphs=False, dummy_forward_fn=dummy_forward_fn, wrap_inputs_fn=wrap_inputs_fn, resuming_state_dict=resuming_state_dict) return model, algo
def load_torch_model(config, cuda=False): weights = config.get('weights') model = load_model(config.get('model'), pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params', {})) compression_ctrl, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) if cuda: model = model.cuda() model = torch.nn.DataParallel(model) print_statistics(compression_ctrl.statistics()) return model
def main_worker_binarization(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) original_model = copy.deepcopy(model) compression_ctrl, model = create_compressed_model(model, config) if not isinstance(compression_ctrl, BinarizationController): raise RuntimeError("The binarization sample worker may only be run with the binarization algorithm!") if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_ctrl.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = model.parameters() compression_config = config['compression'] binarization_config = compression_config if isinstance(compression_config, dict) else compression_config[0] optimizer = get_binarization_optimizer(params_to_optimize, binarization_config) optimizer_scheduler = BinarizationOptimizerScheduler(optimizer, binarization_config) kd_loss_calculator = KDLossCalculator(original_model) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_ctrl) if config.to_onnx is not None: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders(config, train_dataset, val_dataset) if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_ctrl.initialize(data_loader=train_loader, criterion=criterion) batch_multiplier = (binarization_config.get("params", {})).get("batch_multiplier", 1) train_bin(config, compression_ctrl, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)
def test_number_of_calling_fq_for_gnmt(self): torch.cuda.set_device(0) device = torch.device('cuda') batch_first = False vocab_size = 32000 model_config = { 'hidden_size': 100, 'vocab_size': vocab_size, 'num_layers': 4, 'dropout': 0.2, 'batch_first': batch_first, 'share_embedding': True, } batch_size = 128 sequence_size = 50 input_sample_size = (batch_size, sequence_size) if batch_first else (sequence_size, batch_size) config = get_empty_config(input_sample_size=input_sample_size) config['compression'] = \ {'algorithm': 'quantization', 'quantize_inputs': True, 'quantizable_subgraph_patterns': [["linear", "__add__"], ["sigmoid", "__mul__", "__add__"], ["__add__", "tanh", "__mul__"], ["sigmoid", "__mul__"]], 'disable_function_quantization_hooks': True} config['scopes_without_shape_matching'] = \ ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/BahdanauAttention[attn]', ] model = GNMT(**model_config) model = replace_lstm(model) model.to(device) def dummy_forward_fn(model, seq_len=sequence_size): def gen_packed_sequence(): seq_list = [] seq_lens = torch.LongTensor(batch_size).random_(1, seq_len + 1) seq_lens = torch.sort(seq_lens, descending=True).values for seq_size in seq_lens: seq_list.append( torch.LongTensor(seq_size.item()).random_( 1, vocab_size).to(device)) padded_seq_batch = torch.nn.utils.rnn.pad_sequence( seq_list, batch_first=batch_first) return padded_seq_batch, seq_lens x_data, seq_lens = gen_packed_sequence() input_encoder = x_data input_enc_len = seq_lens.to(device) input_decoder = gen_packed_sequence()[0] model(input_encoder, input_enc_len, input_decoder) algo, model = create_compressed_model(model, config, dummy_forward_fn, dump_graphs=False) model.to(device) class Counter: def __init__(self): self.count = 0 def next(self): self.count += 1 def hook(model, input_, counter): counter.next() counters = {} for name, quantizer in algo.all_quantizations.items(): counter = Counter() counters[str(name)] = counter quantizer.register_forward_pre_hook(partial(hook, counter=counter)) dummy_forward_fn(model) assert model.get_graph().get_nodes_count( ) == 230 # NB: may always fail in debug due to superfluous 'cat' nodes assert len(counters) == 55 for name, counter in counters.items(): if 'cell' in name or "LSTMCellForwardNNCF" in name: assert counter.count == sequence_size, name else: assert counter.count == 1, name new_seq_len = int(sequence_size / 2) dummy_forward_fn(model, new_seq_len) assert model.get_graph().get_nodes_count( ) == 230 # NB: may always fail in debug due to superfluous 'cat' nodes assert len(counters) == 55 for name, counter in counters.items(): if 'cell' in name or "LSTMCellForwardNNCF" in name: assert counter.count == sequence_size + new_seq_len, name else: assert counter.count == 2, name
def create_compressed_model_and_algo_for_test(model: NNCFNetwork, config) -> Tuple[ NNCFNetwork, CompressionAlgorithmController]: algo, model = create_compressed_model(model, config, dump_graphs=False) return model, algo
def staged_quantization_main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(logger, config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) train_loader = train_sampler = val_loader = None resuming_checkpoint_path = config.resuming_checkpoint_path nncf_config = config.nncf_config pretrained = is_pretrained_model_requested(config) if config.to_onnx is not None: assert pretrained or (resuming_checkpoint_path is not None) else: # Data loading code train_dataset, val_dataset = create_datasets(config) train_loader, train_sampler, val_loader = create_data_loaders( config, train_dataset, val_dataset) nncf_config = register_default_init_args(nncf_config, criterion, train_loader) # create model model_name = config['model'] model = load_model(model_name, pretrained=pretrained, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params'), weights_path=config.get('weights')) original_model = copy.deepcopy(model) model.to(config.device) resuming_model_sd = None resuming_checkpoint = None if resuming_checkpoint_path is not None: resuming_checkpoint = load_resuming_checkpoint( resuming_checkpoint_path) resuming_model_sd = resuming_checkpoint['state_dict'] compression_ctrl, model = create_compressed_model(model, nncf_config, resuming_model_sd) if not isinstance(compression_ctrl, (BinarizationController, QuantizationController)): raise RuntimeError( "The stage quantization sample worker may only be run with the binarization and quantization algorithms!" ) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_ctrl.distributed() is_inception = 'inception' in model_name params_to_optimize = model.parameters() compression_config = config['compression'] quantization_config = compression_config if isinstance( compression_config, dict) else compression_config[0] optimizer = get_quantization_optimizer(params_to_optimize, quantization_config) optimizer_scheduler = PolyLRDropScheduler(optimizer, quantization_config) kd_loss_calculator = KDLossCalculator(original_model) best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None and config.to_onnx is None: config.start_epoch = resuming_checkpoint['epoch'] best_acc1 = resuming_checkpoint['best_acc1'] kd_loss_calculator.original_model.load_state_dict( resuming_checkpoint['original_model_state_dict']) compression_ctrl.scheduler.load_state_dict( resuming_checkpoint['compression_scheduler']) optimizer.load_state_dict(resuming_checkpoint['optimizer']) optimizer_scheduler.load_state_dict( resuming_checkpoint['optimizer_scheduler']) if config.mode.lower() == 'train': logger.info( "=> loaded checkpoint '{}' (epoch: {}, best_acc1: {:.3f})". format(resuming_checkpoint_path, resuming_checkpoint['epoch'], best_acc1)) else: logger.info( "=> loaded checkpoint '{}'".format(resuming_checkpoint_path)) if config.to_onnx: compression_ctrl.export_model(config.to_onnx) logger.info("Saved to {}".format(config.to_onnx)) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True if config.mode.lower() == 'test': print_statistics(compression_ctrl.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': batch_multiplier = (quantization_config.get("params", {})).get( "batch_multiplier", 1) train_staged(config, compression_ctrl, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)