def test_find_layer_groups_to_scale_for_network_with_residuals(self): torch.manual_seed(10) model = MockMobileNetV2() model.eval() fold_all_batch_norms(model, (1, 3, 224, 224)) graph_search = GraphSearchUtils(model, (1, 3, 224, 224)) layer_groups = graph_search.find_layer_groups_to_scale() self.assertEqual(4, len(layer_groups)) self.assertIn([ model.features[3].conv[0], model.features[3].conv[3], model.features[3].conv[6] ], layer_groups) self.assertIn([ model.features[4].conv[0], model.features[4].conv[3], model.features[4].conv[6] ], layer_groups) self.assertIn([ model.features[5].conv[0], model.features[5].conv[3], model.features[5].conv[6], model.features[6][0] ], layer_groups) for layer_group in layer_groups: print("Group ------- ") for module in layer_group: print(" " + get_layer_name(model, module))
def main(): args = arguments() seed(args) model = DeepLab(backbone='mobilenet', output_stride=16, num_classes=21, sync_bn=False) model.eval() from aimet_torch import batch_norm_fold from aimet_torch import utils args.input_shape = (1, 3, 513, 513) batch_norm_fold.fold_all_batch_norms(model, args.input_shape) utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6, torch.nn.ReLU) if args.checkpoint_path: model.load_state_dict(torch.load(args.checkpoint_path)) else: raise ValueError('checkpoint path {} must be specified'.format( args.checkpoint_path)) data_loader_kwargs = {'worker_init_fn': work_init, 'num_workers': 0} train_loader, val_loader, test_loader, num_class = make_data_loader( args, **data_loader_kwargs) eval_func_quant = model_eval(args, val_loader) eval_func = model_eval(args, val_loader) from aimet_common.defs import QuantScheme from aimet_torch.quantsim import QuantizationSimModel if hasattr(args, 'quant_scheme'): if args.quant_scheme == 'range_learning_tf': quant_scheme = QuantScheme.training_range_learning_with_tf_init elif args.quant_scheme == 'range_learning_tfe': quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init elif args.quant_scheme == 'tf': quant_scheme = QuantScheme.post_training_tf elif args.quant_scheme == 'tf_enhanced': quant_scheme = QuantScheme.post_training_tf_enhanced else: raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) kwargs = { 'quant_scheme': quant_scheme, 'default_param_bw': args.default_param_bw, 'default_output_bw': args.default_output_bw, 'config_file': args.config_file } print(kwargs) sim = QuantizationSimModel(model.cpu(), input_shapes=args.input_shape, **kwargs) sim.compute_encodings(eval_func_quant, (1024, True)) post_quant_top1 = eval_func(sim.model.cuda(), (99999999, True)) print("Post Quant mIoU :", post_quant_top1)
def test_auto_mobilenetv1(self): torch.manual_seed(10) model = MockMobileNetV1() model.eval() # BN fold fold_all_batch_norms(model, (1, 3, 224, 224)) scale_factors = CrossLayerScaling.scale_model(model, (1, 3, 224, 224)) self.assertEqual(8, len(scale_factors))
def visualize_weight_ranges_model(): """ Code example for model visualization """ visualization_url, process = start_bokeh_server_session(8002) model = models.resnet18(pretrained=True).to(torch.device('cpu')) model = model.eval() batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224)) # Usually it is observed that if we do BatchNorm fold the layer's weight range increases. # This helps in visualizing layer's weight visualize_model.visualize_weight_ranges(model, visualization_url)
def visualize_relative_weight_ranges_model(): """ Code example for model visualization """ visualization_url, process = start_bokeh_server_session(8002) model = models.resnet18(pretrained=True).to(torch.device('cpu')) model = model.eval() batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224)) # Usually it is observed that if we do BatchNorm fold the layer's weight range increases. # This helps in finding layers which can be equalized to get better performance on hardware visualize_model.visualize_relative_weight_ranges_to_identify_problematic_layers( model, visualization_url)
def visualize_changes_in_model_after_and_before_cle(): """ Code example for visualizating model before and after Cross Layer Equalization optimization """ visualization_url, process = start_bokeh_server_session(8002) model = models.resnet18(pretrained=True).to(torch.device('cpu')) model = model.eval() model_copy = copy.deepcopy(model) batch_norm_fold.fold_all_batch_norms(model_copy, (1, 3, 224, 224)) equalize_model(model, (1, 3, 224, 224)) visualize_model.visualize_changes_after_optimization( model_copy, model, visualization_url)
def test_cross_layer_equalization_resnet18_visualize_to_identify_problem_layers( self): bokeh_visualizations_url, process = start_bokeh_server_session(6008) torch.manual_seed(10) model = models.resnet18() model = model.eval() batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224)) bokeh_server_session = \ visualize_model.visualize_relative_weight_ranges_to_identify_problematic_layers(model, bokeh_visualizations_url) bokeh_server_session.server_session.close("test complete") os.killpg(os.getpgid(process.pid), signal.SIGTERM)
def equalize_model(model: torch.nn.Module, input_shapes: Union[Tuple, List[Tuple]]): """ High-level API to perform Cross-Layer Equalization (CLE) on the given model. The model is equalized in place. :param model: Model to equalize :param input_shapes: Shape of the input (can be a tuple or a list of tuples if multiple inputs) :return: None """ device = get_device(model) model.cpu() # fold batchnorm layers folded_pairs = fold_all_batch_norms(model, input_shapes) bn_dict = {} for conv_bn in folded_pairs: bn_dict[conv_bn[0]] = conv_bn[1] # replace any ReLU6 layers with ReLU utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6, torch.nn.ReLU) # perform cross-layer scaling on applicable layer sets cls_set_info_list = CrossLayerScaling.scale_model(model, input_shapes) # high-bias fold HighBiasFold.bias_fold(cls_set_info_list, bn_dict) model.to(device=device)
def run_pytorch_bn_fold(config, model): folded_pairs = batch_norm_fold.fold_all_batch_norms( model.cpu(), config.input_shape) conv_bn_pairs = {} for conv_bn in folded_pairs: conv_bn_pairs[conv_bn[0]] = conv_bn[1] return model, conv_bn_pairs
def test_auto_custom_model(self): torch.manual_seed(10) model = MyModel() model.eval() # BN fold fold_all_batch_norms(model, (2, 10, 24, 24)) scale_factors = CrossLayerScaling.scale_model(model, (2, 10, 24, 24)) self.assertEqual(3, len(scale_factors)) self.assertTrue(scale_factors[0].cls_pair_info_list[0]. relu_activation_between_layers) self.assertTrue(scale_factors[1].cls_pair_info_list[0]. relu_activation_between_layers) self.assertFalse(scale_factors[2].cls_pair_info_list[0]. relu_activation_between_layers)
def test_cross_layer_equalization_mobilenet_v2_visualize_after_optimization( self): bokeh_visualizations_url, process = start_bokeh_server_session(8006) torch.manual_seed(10) model = MobileNetV2().to(torch.device('cpu')) bokeh_session = BokehServerSession(bokeh_visualizations_url, session_id="cle") model = model.eval() model_copy = copy.deepcopy(model) # model_copy_again = copy.deepcopy(model) batch_norm_fold.fold_all_batch_norms(model_copy, (1, 3, 224, 224)) equalize_model(model, (1, 3, 224, 224)) visualize_model.visualize_changes_after_optimization( model_copy, model, bokeh_visualizations_url) bokeh_session.server_session.close("test complete") os.killpg(os.getpgid(process.pid), signal.SIGTERM)
def test_auto_hbf_transposed_conv2d_model(self): torch.manual_seed(10) model = TransposedConvModel() model.eval() bn_dict = {model.conv1: model.bn1} fold_all_batch_norms(model, (10, 10, 4, 4)) scale_factor = np.array(np.random.randn(10)) cls_pair_info = ClsSetInfo.ClsSetLayerPairInfo(model.conv1, model.conv2, scale_factor, True) cls_set_info = ClsSetInfo(cls_pair_info) bias = copy.deepcopy(model.conv1.bias.data) HighBiasFold.bias_fold([cls_set_info], bn_dict) for i in range(len(model.conv1.bias)): self.assertTrue(model.conv1.bias.data[i] <= bias.data[i])
def test_bias_correction_hybrid(self): torch.manual_seed(10) model = MobileNetV2().to(torch.device('cpu')) model.eval() module_prop_list = aimet_torch.bias_correction.find_all_conv_bn_with_activation( model, input_shape=(1, 3, 224, 224)) batch_norm_fold.fold_all_batch_norms(model, (1, 3, 224, 224)) model_copy = copy.deepcopy(model) model.eval() model_copy.eval() image_dir = './data/tiny-imagenet-200' image_size = 224 batch_size = 1 num_workers = 1 data_loader = ImageNetDataLoader(image_dir, image_size, batch_size, num_workers) params = QuantParams(weight_bw=4, act_bw=4, round_mode="nearest", quant_scheme=QuantScheme.post_training_tf) bias_correction.correct_bias(model.to(device="cuda"), params, 1, data_loader.train_loader, 1, module_prop_list, False) assert (np.allclose( model.features[0][0].bias.detach().cpu().numpy(), model_copy.features[0][0].bias.detach().cpu().numpy())) assert (np.allclose( model.features[1].conv[0].bias.detach().cpu().numpy(), model_copy.features[1].conv[0].bias.detach().cpu().numpy())) # To check if wrappers got removed assert (isinstance(model.features[11].conv[0], nn.Conv2d))
def test_cle_depthwise_transposed_conv2D(self): class TransposedConvModel(torch.nn.Module): def __init__(self): super(TransposedConvModel, self).__init__() self.conv = torch.nn.Conv2d(20, 10, 3) self.bn = torch.nn.BatchNorm2d(10) self.relu = torch.nn.ReLU() self.conv1 = torch.nn.ConvTranspose2d(10, 10, 3, groups=10) self.bn1 = torch.nn.BatchNorm2d(10) self.relu1 = torch.nn.ReLU() self.conv2 = torch.nn.ConvTranspose2d(10, 15, 3) self.bn2 = torch.nn.BatchNorm2d(15) def forward(self, x): # Regular case - conv followed by bn x = self.conv(x) x = self.bn(x) x = self.relu(x) x = self.conv1(x) x = self.bn1(x) x = self.relu1(x) x = self.conv2(x) x = self.bn2(x) return x torch.manual_seed(10) model = TransposedConvModel() w_shape_1 = copy.deepcopy(model.conv1.weight.shape) w_shape_2 = copy.deepcopy(model.conv2.weight.shape) model = model.eval() input_shapes = (1, 20, 3, 4) random_input = torch.rand(input_shapes) output_before_cle = model(random_input).detach().numpy() folded_pairs = batch_norm_fold.fold_all_batch_norms( model, input_shapes) bn_dict = {} for conv_bn in folded_pairs: bn_dict[conv_bn[0]] = conv_bn[1] cls_set_info_list = CrossLayerScaling.scale_model(model, input_shapes) HighBiasFold.bias_fold(cls_set_info_list, bn_dict) self.assertEqual(w_shape_1, model.conv1.weight.shape) self.assertEqual(w_shape_2, model.conv2.weight.shape) output_after_cle = model(random_input).detach().numpy() self.assertTrue( np.allclose(output_before_cle, output_after_cle, rtol=1.e-2))
def test_find_cls_sets_mobilenetv1(self): torch.manual_seed(10) model = MockMobileNetV1() model.eval() fold_all_batch_norms(model, (1, 3, 224, 224)) graph_search = GraphSearchUtils(model, (1, 3, 224, 224)) layer_groups = graph_search.find_layer_groups_to_scale() self.assertEqual(1, len(layer_groups)) self.assertIn([ model.model[0][0], model.model[1][0], model.model[1][3], model.model[2][0], model.model[2][3], model.model[3][0], model.model[3][3], model.model[4][0], model.model[4][3], model.model[5][0], model.model[5][3], model.model[6][0], model.model[6][3], model.model[7][0], model.model[7][3], model.model[8][0], model.model[8][3], ], layer_groups) layer_pairs = GraphSearchUtils.convert_layer_group_to_cls_sets( layer_groups[0]) for layer_tuple in layer_pairs: print(layer_tuple)
def test_fold_bn_after_transposed_conv_depthwise(self): class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() self.conv1 = torch.nn.ConvTranspose2d(10, 10, 3, groups=10) self.bn1 = torch.nn.BatchNorm2d(10) self.reul1 = torch.nn.ReLU() def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.reul1(x) return x torch.manual_seed(10) model = MyModel() model = model.eval() random_input = torch.rand(2, 10, 24, 24) # Set the batch norm params to something non-zero with a random batch model.train() model(torch.randn((2, 10, 24, 24))) model.eval() baseline_output = model(random_input).detach().numpy() fold_all_batch_norms(model, (2, 10, 24, 24)) output_after_fold = model(random_input).detach().numpy() self.assertFalse(isinstance(model.bn1, torch.nn.BatchNorm2d)) self.assertTrue(np.allclose(baseline_output, output_after_fold, rtol=1.e-2))
def test_bn_fold_auto_mode_transposed_conv2d(self): torch.manual_seed(0) model = TransposedConvModel() model = model.eval() random_input = torch.rand((10, 10, 4, 4)) baseline_output = model(random_input).detach().numpy() folded_pairs = fold_all_batch_norms(model, (10, 10, 4, 4)) output_after_fold = model(random_input).detach().numpy() self.assertFalse(isinstance(model.bn1, torch.nn.BatchNorm2d)) self.assertTrue(sum(baseline_output.reshape(-1) - output_after_fold.reshape(-1)) < 1e-5) self.assertEqual(len(folded_pairs), 2)
def test_bn_fold_auto_mode(self): torch.manual_seed(10) model = MyModel() model = model.eval() random_input = torch.rand(2, 10, 24, 24) baseline_output = model(random_input).detach().numpy() folded_pairs = fold_all_batch_norms(model, (2, 10, 24, 24)) output_after_fold = model(random_input).detach().numpy() self.assertFalse(isinstance(model.bn1, torch.nn.BatchNorm2d)) self.assertTrue(np.allclose(baseline_output, output_after_fold, rtol=1.e-2)) self.assertEqual(len(folded_pairs), 2)
def cross_layer_equalization_auto_step_by_step(): model = models.resnet18(pretrained=True) model = model.eval() input_shape = (1, 3, 224, 224) # Fold batchnorm layers folded_pairs = batch_norm_fold.fold_all_batch_norms(model, input_shape) bn_dict = {} for conv_bn in folded_pairs: bn_dict[conv_bn[0]] = conv_bn[1] # Replace any ReLU6 layers with ReLU utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6, torch.nn.ReLU) # Perform cross-layer scaling on applicable layer sets cls_set_info_list = cross_layer_equalization.CrossLayerScaling.scale_model( model, input_shape) # Perform high-bias fold cross_layer_equalization.HighBiasFold.bias_fold(cls_set_info_list, bn_dict)
def test_cross_layer_equalization_resnet(self): torch.manual_seed(10) model = models.resnet18(pretrained=True) model = model.eval() folded_pairs = batch_norm_fold.fold_all_batch_norms( model, (1, 3, 224, 224)) bn_dict = {} for conv_bn in folded_pairs: bn_dict[conv_bn[0]] = conv_bn[1] self.assertFalse(isinstance(model.layer2[0].bn1, torch.nn.BatchNorm2d)) w1 = model.layer1[0].conv1.weight.detach().numpy() w2 = model.layer1[0].conv2.weight.detach().numpy() w3 = model.layer1[1].conv1.weight.detach().numpy() cls_set_info_list = CrossLayerScaling.scale_model( model, (1, 3, 224, 224)) # check if weights are updating assert not np.allclose(model.layer1[0].conv1.weight.detach().numpy(), w1) assert not np.allclose(model.layer1[0].conv2.weight.detach().numpy(), w2) assert not np.allclose(model.layer1[1].conv1.weight.detach().numpy(), w3) b1 = model.layer1[0].conv1.bias.data b2 = model.layer1[1].conv2.bias.data HighBiasFold.bias_fold(cls_set_info_list, bn_dict) for i in range(len(model.layer1[0].conv1.bias.data)): self.assertTrue(model.layer1[0].conv1.bias.data[i] <= b1[i]) for i in range(len(model.layer1[1].conv2.bias.data)): self.assertTrue(model.layer1[1].conv2.bias.data[i] <= b2[i])
def test_fold_auto_mode_with_linear_layer(self): class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() self.fc1 = torch.nn.Linear(10, 20) self.bn1 = torch.nn.BatchNorm1d(20) def forward(self, x): x = self.fc1(x) x = self.bn1(x) return x model = MyModel() model.eval() random_input = torch.randn((32, 10)) # Set the batch norm params to something non-zero with a random batch model.train() model(torch.randn((32, 10))) model.eval() baseline_output = model(random_input).detach().numpy() orig_bn = model.bn1 bn_pairs = fold_all_batch_norms(model, (32, 10)) output_after_fold = model(random_input).detach().numpy() self.assertFalse(isinstance(model.bn1, torch.nn.BatchNorm1d)) self.assertTrue(np.allclose(baseline_output, output_after_fold, rtol=1.e-2)) self.assertEqual(1, len(bn_pairs)) self.assertTrue((model.fc1, orig_bn) in bn_pairs)