def test_can_broadcast_initialized_precisions_in_distributed_mode( config_builder, tmp_path): config = config_builder.build() ngpus_per_node = torch.cuda.device_count() config.world_size = ngpus_per_node torch.multiprocessing.spawn(hawq_dumping_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config, tmp_path), join=True) assert not compare_multi_gpu_dump(config, tmp_path, get_path_to_bitwidth_dump)
def test_hawq_broadcast_avg_traces_in_distributed_mode(tmp_path): num_data_points = 10 batch_size = 2 config = HAWQConfigBuilder(batch_size=batch_size, num_data_points=num_data_points, image_size=224).build() ngpus_per_node = torch.cuda.device_count() config.world_size = ngpus_per_node torch.multiprocessing.spawn(hawq_dumping_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config, tmp_path), join=True) assert not compare_multi_gpu_dump(config, tmp_path, get_path_to_bitwidth_dump)
def test_multiprocessing_distributed_shares_init_scales_signedness_across_gpus( tmp_path): num_init_steps = 10 config = get_squeezenet_quantization_config() config['compression']['initializer'] = { 'range': { 'num_init_steps': num_init_steps } } ngpus_per_node = torch.cuda.device_count() config.world_size = ngpus_per_node torch.multiprocessing.spawn(scale_signed_dumping_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, config, tmp_path), join=True) assert not compare_multi_gpu_dump(config, tmp_path, get_path_after_broadcast) assert not compare_multi_gpu_dump(config, tmp_path, get_path_path_after_train_iters)