def scale_signed_dumping_worker(gpu, ngpus_per_node, config, tmp_path): distributed_init_test_default(gpu, ngpus_per_node, config) data_loader = create_rank_dataloader(config, gpu) model = safe_thread_call(partial(squeezenet1_1, pretrained=True)) config.register_extra_structs([QuantizationRangeInitArgs(data_loader)]) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) compression_scheduler = compression_ctrl.scheduler quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) criterion = torch.nn.MSELoss().cuda(config.gpu) optimizer = torch.optim.Adam(quant_model.parameters(), lr=0.01) torch.backends.cudnn.benchmark = True # just to reproduce the same scale values without Dropout quant_model.eval() act_sum = 0 for layer in get_all_modules_by_type(quant_model, "SymmetricQuantizer").values(): act_sum += layer.scale.sum() ref_sum = 4447.291 assert act_sum.item() == approx(ref_sum, 0.01), \ 'sum of scales is not expected {} vs {} rank {}'.format(act_sum.item(), ref_sum, config.rank) out_file_path = get_path_after_broadcast(tmp_path, config.rank) save_params(quant_model, out_file_path) compression_scheduler.step() for i, (input_, _) in enumerate(data_loader): if i > 5: break output = quant_model(input_) optimizer.zero_grad() dummy_target = torch.randn(1000).cuda(config.gpu, non_blocking=True) loss = criterion(output, dummy_target) compression_scheduler.step() loss.backward() optimizer.step() compression_scheduler.step() out_file_path = get_path_path_after_train_iters(tmp_path, config.rank) save_params(quant_model, out_file_path)
def hawq_dumping_worker(gpu, ngpus_per_node, config, tmp_path): distributed_init_test_default(gpu, ngpus_per_node, config) data_loader = create_rank_dataloader(config, gpu) model = safe_thread_call(partial(mobilenet_v2, pretrained=True)) model.eval() criterion = torch.nn.MSELoss().cuda(config.gpu) config = register_default_init_args(config, data_loader, criterion, autoq_eval_fn=lambda *x: 0, autoq_eval_loader=data_loader) quant_model, compression_ctrl = create_compressed_model_and_algo_for_test( model, config) quant_model = post_compression_test_distr_init(compression_ctrl, config, ngpus_per_node, quant_model) # just to reproduce the same scale values without Dropout quant_model.eval() act_bitwidth_per_scope = get_bitwidth_per_scope(quant_model.module) out_file_path = get_path_to_bitwidth_dump(tmp_path, config.rank) torch.save(act_bitwidth_per_scope, str(out_file_path))