def test_hawq_on_single_conv_without_quantizers(_seed, dataset_dir, tmp_path, params: HAWQTestParams): config = get_squeezenet_quantization_config(batch_size=params.batch_size) iter_number = params.iter_number tolerance = 4e-4 model = squeezenet1_1(num_classes=10, dropout=0) from torchvision.models.squeezenet import model_urls load_state(model, model_zoo.load_url(model_urls['squeezenet1_1'])) model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() if not dataset_dir: dataset_dir = str(tmp_path) data_loader, _ = create_test_dataloaders(config, dataset_dir) device = next(model.parameters()).device for _, param in model.named_parameters(): param.requires_grad = False first_conv = next(iter(get_all_modules_by_type(model, 'Conv2d').values())) first_conv.weight.requires_grad = True trace_estimator = HessianTraceEstimator(model, default_criterion_fn, criterion, device, data_loader, params.num_data_points) actual_state = trace_estimator.get_average_traces(max_iter=iter_number, tolerance=tolerance) assert math.isclose(actual_state.item(), params.ref_trace, rel_tol=1e-09)
def test_multiprocessing_distributed_shares_init_scales_signedness_across_gpus( tmp_path): num_init_steps = 10 config = get_squeezenet_quantization_config() config['compression']['initializer'] = { 'range': { 'num_init_steps': num_init_steps } } ngpus_per_node = torch.cuda.device_count() config.world_size = ngpus_per_node torch.multiprocessing.spawn(scale_signed_dumping_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config, tmp_path), join=True) assert not compare_multi_gpu_dump(config, tmp_path, get_path_after_broadcast) assert not compare_multi_gpu_dump(config, tmp_path, get_path_path_after_train_iters)
def test_hw_config_quantization_can_quantize_squeezenet(hw_config_type): config = get_squeezenet_quantization_config() config["hw_config"] = hw_config_type.value model = squeezenet1_1_custom() create_compressed_model_and_algo_for_test(model, config)