Пример #1
0
def test_policy_scheduling():
    model = create_model(False, 'cifar10', 'resnet20_cifar')
    scheduler = distiller.CompressionScheduler(model)
    policy = distiller.PruningPolicy(None, None)
    with pytest.raises(AssertionError):
        scheduler.add_policy(policy)
    with pytest.raises(AssertionError):
        # Test for mutual-exclusive configuration
        scheduler.add_policy(policy,
                             epochs=[1, 2, 3],
                             starting_epoch=4,
                             ending_epoch=5,
                             frequency=1)

    scheduler.add_policy(policy,
                         epochs=None,
                         starting_epoch=4,
                         ending_epoch=5,
                         frequency=1)
    # Regression test for issue #176 - https://github.com/NervanaSystems/distiller/issues/176
    scheduler.add_policy(policy, epochs=[1, 2, 3])
    sched_metadata = scheduler.sched_metadata[policy]
    assert sched_metadata['starting_epoch'] == 1
    assert sched_metadata['ending_epoch'] == 4
    assert sched_metadata['frequency'] is None

    scheduler.add_policy(policy, epochs=[5])
    sched_metadata = scheduler.sched_metadata[policy]
    assert sched_metadata['starting_epoch'] == 5
    assert sched_metadata['ending_epoch'] == 6
    assert sched_metadata['frequency'] is None
Пример #2
0
def dict_config(model, optimizer, sched_dict, scheduler=None):
    app_cfg_logger.debug('Schedule contents:\n' +
                         json.dumps(sched_dict, indent=2))

    if scheduler is None:
        scheduler = distiller.CompressionScheduler(model)

    pruners = __factory('pruners', model, sched_dict)
    regularizers = __factory('regularizers', model, sched_dict)
    quantizers = __factory('quantizers',
                           model,
                           sched_dict,
                           optimizer=optimizer)
    if len(quantizers) > 1:
        raise ValueError("\nError: Multiple Quantizers not supported")
    extensions = __factory('extensions', model, sched_dict)

    try:
        lr_policies = []
        for policy_def in sched_dict['policies']:
            policy = None
            if 'pruner' in policy_def:
                try:
                    instance_name, args = __policy_params(policy_def, 'pruner')
                except TypeError as e:
                    print(
                        '\n\nFatal Error: a policy is defined with a null pruner'
                    )
                    print(
                        'Here\'s the policy definition for your reference:\n{}'
                        .format(json.dumps(policy_def, indent=1)))
                    raise
                assert instance_name in pruners, "Pruner {} was not defined in the list of pruners".format(
                    instance_name)
                pruner = pruners[instance_name]
                policy = distiller.PruningPolicy(pruner, args)

            elif 'regularizer' in policy_def:
                instance_name, args = __policy_params(policy_def,
                                                      'regularizer')
                assert instance_name in regularizers, "Regularizer {} was not defined in the list of regularizers".format(
                    instance_name)
                regularizer = regularizers[instance_name]
                if args is None:
                    policy = distiller.RegularizationPolicy(regularizer)
                else:
                    policy = distiller.RegularizationPolicy(
                        regularizer, **args)

            elif 'quantizer' in policy_def:
                instance_name, args = __policy_params(policy_def, 'quantizer')
                assert instance_name in quantizers, "Quantizer {} was not defined in the list of quantizers".format(
                    instance_name)
                quantizer = quantizers[instance_name]
                policy = distiller.QuantizationPolicy(quantizer)

            elif 'lr_scheduler' in policy_def:
                # LR schedulers take an optimizer in their CTOR, so postpone handling until we're certain
                # a quantization policy was initialized (if exists)
                lr_policies.append(policy_def)
                continue

            elif 'extension' in policy_def:
                instance_name, args = __policy_params(policy_def, 'extension')
                assert instance_name in extensions, "Extension {} was not defined in the list of extensions".format(
                    instance_name)
                extension = extensions[instance_name]
                policy = extension

            else:
                raise ValueError(
                    "\nFATAL Parsing error while parsing the pruning schedule - unknown policy [%s]"
                    .format(policy_def))

            add_policy_to_scheduler(policy, policy_def, scheduler)

        # Any changes to the optmizer caused by a quantizer have occured by now, so safe to create LR schedulers
        lr_schedulers = __factory('lr_schedulers',
                                  model,
                                  sched_dict,
                                  optimizer=optimizer)
        for policy_def in lr_policies:
            instance_name, args = __policy_params(policy_def, 'lr_scheduler')
            assert instance_name in lr_schedulers, "LR-scheduler {} was not defined in the list of lr-schedulers".format(
                instance_name)
            lr_scheduler = lr_schedulers[instance_name]
            policy = distiller.LRPolicy(lr_scheduler)
            add_policy_to_scheduler(policy, policy_def, scheduler)

    except AssertionError:
        # propagate the assertion information
        raise
    except Exception as exception:
        print("\nFATAL Parsing error!\n%s" % json.dumps(policy_def, indent=1))
        print("Exception: %s %s" % (type(exception), exception))
        raise
    return scheduler
Пример #3
0
def perform_sensitivity_analysis(model, net_params, sparsities, test_func,
                                 group):
    """Perform a sensitivity test for a model's weights parameters.

    The model should be trained to maximum accuracy, because we aim to understand
    the behavior of the model's performance in relation to pruning of a specific
    weights tensor.

    By default this function will test all of the model's parameters.

    The return value is a complex sensitivities dictionary: the dictionary's
    key is the name (string) of the weights tensor.  The value is another dictionary,
    where the tested sparsity-level is the key, and a (top1, top5, loss) tuple
    is the value.
    Below is an example of such a dictionary:

    .. code-block:: python
    {'features.module.6.weight':    {0.0:  (56.518, 79.07,  1.9159),
                                     0.05: (56.492, 79.1,   1.9161),
                                     0.10: (56.212, 78.854, 1.9315),
                                     0.15: (35.424, 60.3,   3.0866)},
     'classifier.module.1.weight':  {0.0:  (56.518, 79.07,  1.9159),
                                     0.05: (56.514, 79.07,  1.9159),
                                     0.10: (56.434, 79.074, 1.9138),
                                     0.15: (54.454, 77.854, 2.3127)} }

    The test_func is expected to execute the model on a test/validation dataset,
    and return the results for top1 and top5 accuracies, and the loss value.
    """
    if group not in ['element', 'filter', 'channel']:
        raise ValueError(
            "group parameter contains an illegal value: {}".format(group))
    sensitivities = OrderedDict()

    for param_name in net_params:
        if model.state_dict()[param_name].dim() not in [2, 4]:
            continue

        # Make a copy of the model, because when we apply the zeros mask (i.e.
        # perform pruning), the model's weights are altered
        model_cpy = deepcopy(model)

        sensitivity = OrderedDict()
        for sparsity_level in sparsities:
            sparsity_level = float(sparsity_level)
            msglogger.info("Testing sensitivity of %s [%0.1f%% sparsity]" %
                           (param_name, sparsity_level * 100))
            # Create the pruner (a level pruner), the pruning policy and the
            # pruning schedule.
            if group == 'element':
                # Element-wise sparasity
                sparsity_levels = {param_name: sparsity_level}
                pruner = distiller.pruning.SparsityLevelParameterPruner(
                    name='sensitivity', levels=sparsity_levels)
            elif group == 'filter':
                # Filter ranking
                if model.state_dict()[param_name].dim() != 4:
                    continue
                regims = {param_name: [sparsity_level, '3D']}
                pruner = distiller.pruning.L1RankedStructureParameterPruner(
                    name='sensitivity', reg_regims=regims)
            elif group == 'channel':
                # Filter ranking
                if model.state_dict()[param_name].dim() != 4:
                    continue
                regims = {param_name: [sparsity_level, 'Channels']}
                pruner = distiller.pruning.L1RankedStructureParameterPruner(
                    name='sensitivity', reg_regims=regims)

            policy = distiller.PruningPolicy(pruner, pruner_args=None)
            scheduler = CompressionScheduler(model_cpy)
            scheduler.add_policy(policy, epochs=[0])

            # Compute the pruning mask per the pruner and apply the mask on the weights
            scheduler.on_epoch_begin(0)
            scheduler.apply_mask()

            # Test and record the performance of the pruned model
            prec1, prec5, loss = test_func(model=model_cpy)
            sensitivity[sparsity_level] = (prec1, prec5, loss)
            sensitivities[param_name] = sensitivity
    return sensitivities
Пример #4
0
def dictConfig(model, optimizer, schedule, sched_dict, logger):
    logger.debug(json.dumps(sched_dict, indent=1))

    pruners = __factory('pruners', model, sched_dict)
    regularizers = __factory('regularizers', model, sched_dict)
    lr_schedulers = __factory('lr_schedulers',
                              model,
                              sched_dict,
                              optimizer=optimizer)
    extensions = __factory('extensions', model, sched_dict)

    try:
        for policy_def in sched_dict['policies']:
            policy = None
            if 'pruner' in policy_def:
                try:
                    instance_name, args = __policy_params(policy_def, 'pruner')
                except TypeError as e:
                    print(
                        '\n\nFatal Error: a policy is defined with a null pruner'
                    )
                    print(
                        'Here\'s the policy definition for your reference:\n{}'
                        .format(json.dumps(policy_def, indent=1)))
                    exit(1)
                assert instance_name in pruners, "Pruner {} was not defined in the list of pruners".format(
                    instance_name)
                pruner = pruners[instance_name]
                policy = distiller.PruningPolicy(pruner, args)

            elif 'regularizer' in policy_def:
                instance_name, args = __policy_params(policy_def,
                                                      'regularizer')
                assert instance_name in regularizers, "Regularizer {} was not defined in the list of regularizers".format(
                    instance_name)
                regularizer = regularizers[instance_name]
                if args is None:
                    policy = distiller.RegularizationPolicy(regularizer)
                else:
                    policy = distiller.RegularizationPolicy(
                        regularizer, **args)

            elif 'lr_scheduler' in policy_def:
                instance_name, args = __policy_params(policy_def,
                                                      'lr_scheduler')
                assert instance_name in lr_schedulers, "LR-scheduler {} was not defined in the list of lr-schedulers".format(
                    instance_name)
                lr_scheduler = lr_schedulers[instance_name]
                policy = distiller.LRPolicy(lr_scheduler)

            elif 'extension' in policy_def:
                instance_name, args = __policy_params(policy_def, 'extension')
                assert instance_name in extensions, "Extension {} was not defined in the list of extensions".format(
                    instance_name)
                extension = extensions[instance_name]
                policy = extension

            else:
                print(
                    "\nFATAL Parsing error while parsing the pruning schedule - unknown policy [%s]"
                    % policy_def)
                exit(1)

            if 'epochs' in policy_def:
                schedule.add_policy(policy, epochs=policy_def['epochs'])
            else:
                schedule.add_policy(
                    policy,
                    starting_epoch=policy_def['starting_epoch'],
                    ending_epoch=policy_def['ending_epoch'],
                    frequency=policy_def['frequency'])
    except AssertionError:
        # propagate the assertion information
        raise
    except Exception as exception:
        print("\nFATAL Parsing error!\n%s" % json.dumps(policy_def, indent=1))
        print("Exception: %s %s" % (type(exception), exception))
        exit(1)

    return schedule
Пример #5
0
def objective(space):
    global model
    global count
    global global_min_score
    
    #Explore new model
    model = create_model(False, args.dataset, args.arch, device_ids=args.gpus)
    count += 1
    # Objective function: F(Acc, Lat) = (1 - Acc.) + (alpha * Sparsity)
    accuracy = 0
    alpha = 0.3 # Super-parameter: the importance of inference time
    latency = 0.0
    sparsity = 0.0
    # Training hyperparameter

    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        print('resume mode: {}'.format(args.resume))

    print(global_min_score)
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """
    distiller/distiller/config.py
        # Element-wise sparsity
        sparsity_levels = {net_param: sparsity_level}
        pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
        policy = distiller.PruningPolicy(pruner, pruner_args=None)
        scheduler = distiller.CompressionScheduler(model)
        scheduler.add_policy(policy, epochs=[0, 2, 4])
        # Local search 
        add multiple pruner for each layer
    """
    sparsity_levels = {}
    for key, value in space.items():
        sparsity_levels[key] = value
    #print(sparsity_levels)

    pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels) # for SparsityLevelParameterPruner
    # pruner = distiller.pruning.SensitivityPruner(name='sensitivity', sensitivities=sparsity_levels) # for SensitivityPruner
    policy = distiller.PruningPolicy(pruner, pruner_args=None)
    lrpolicy = distiller.LRPolicy(torch.optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1))
    compression_scheduler = distiller.CompressionScheduler(model)
    compression_scheduler.add_policy(policy, epochs=[PrunerEpoch])
    # compression_scheduler.add_policy(policy, starting_epoch=0, ending_epoch=38, frequency=2)
    compression_scheduler.add_policy(lrpolicy, starting_epoch=0, ending_epoch=50, frequency=1)
    """
    distiller/example/classifier_compression/compress_classifier.py
    For each epoch:
        compression_scheduler.on_epoch_begin(epoch)
        train()
        save_checkpoint()
        compression_scheduler.on_epoch_end(epoch)

    train():
        For each training step:
            compression_scheduler.on_minibatch_begin(epoch)
            output = model(input)
            loss = criterion(output, target)
            compression_scheduler.before_backward_pass(epoch)
            loss.backward()
            optimizer.step()
            compression_scheduler.on_minibatch_end(epoch)
    """
    
    local_min_score = 2.
    for i in range(args.epochs):
        compression_scheduler.on_epoch_begin(i)
        train_accuracy = train(i,criterion, optimizer, compression_scheduler)
        val_accuracy = validate() # Validate hyperparameter setting
        t, sparsity = distiller.weights_sparsity_tbl_summary(model, return_total_sparsity=True)
        compression_scheduler.on_epoch_end(i, optimizer)
        apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, False,
                                         'hyperopt', './')
        print('Epoch: {}, train_acc: {:.4f}, val_acc: {:.4f}, sparsity: {:.4f}'.format(i, train_accuracy, val_accuracy, sparsity))
        
        score = (1-(val_accuracy/100.)) + (alpha * (1-sparsity/100.)) # objective funtion here
        if(score < global_min_score):
            global_min_score = score
            apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, True, 'best', './')

        if(score < local_min_score):
            local_min_score = score

        if (PrunerConstraint == True and i >= PrunerEpoch and (sparsity < Expected_Sparsity_Level_Low or sparsity > Expected_Sparsity_Level_High)):
            break 

    test_accuracy = test() # Validate hyperparameter setting

    print('{} trials: score: {:.4f}, train_acc:{:.4f}, val_acc:{:.4f}, test_acc:{:.4f}, sparsity:{:.4f}'.format(count, 
                                      local_min_score, 
                                      train_accuracy, 
                                      val_accuracy, 
                                      test_accuracy,
                                      sparsity))

    return local_min_score
Пример #6
0
def objective(space):
    global model
    global count
    global best_dict
    
    #Explore new model
    model = create_model(False, args.dataset, args.arch, device_ids=args.gpus)
    if args.resume:
        model, _, _ = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
    
    count += 1
    print('{} trial starting...'.format(count))
    # Objective function: F(Acc, Lat) = (1 - Acc.) + (alpha * Sparsity)
    accuracy = 0
    #alpha = 0.2 # Super-parameter: the importance of inference time
    alpha = 1.0 # Super-parameter: the importance of inference time
    sparsity = 0.0
    # Training hyperparameter
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    """
    distiller/distiller/config.py
        # Element-wise sparsity
        sparsity_levels = {net_param: sparsity_level}
        pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
        policy = distiller.PruningPolicy(pruner, pruner_args=None)
        scheduler = distiller.CompressionScheduler(model)
        scheduler.add_policy(policy, epochs=[0, 2, 4])
        # Local search 
        add multiple pruner for each layer
    """
    sparsity_levels = {}
    for key, value in space.items():
        sparsity_levels[key] = value
    pruner = distiller.pruning.SparsityLevelParameterPruner(name='sensitivity', levels=sparsity_levels)
    policy = distiller.PruningPolicy(pruner, pruner_args=None)
    lrpolicy = distiller.LRPolicy(torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1))
    compression_scheduler = distiller.CompressionScheduler(model)
    #compression_scheduler.add_policy(policy, epochs=[90])
    compression_scheduler.add_policy(policy, epochs=[0])
    compression_scheduler.add_policy(lrpolicy, starting_epoch=0, ending_epoch=90, frequency=1)
    """
    distiller/example/classifier_compression/compress_classifier.py
    For each epoch:
        compression_scheduler.on_epoch_begin(epoch)
        train()
        save_checkpoint()
        compression_scheduler.on_epoch_end(epoch)

    train():
        For each training step:
            compression_scheduler.on_minibatch_begin(epoch)
            output = model(input)
            loss = criterion(output, target)
            compression_scheduler.before_backward_pass(epoch)
            loss.backward()
            optimizer.step()
            compression_scheduler.on_minibatch_end(epoch)
    """
    for i in range(args.epochs):
        compression_scheduler.on_epoch_begin(i)
        train_accuracy = train(i,criterion, optimizer, compression_scheduler)
        val_accuracy = validate() # Validate hyperparameter setting
        t, sparsity = distiller.weights_sparsity_tbl_summary(model, return_total_sparsity=True)
        compression_scheduler.on_epoch_end(i, optimizer)
        apputils.save_checkpoint(i, args.arch, model, optimizer, compression_scheduler, train_accuracy, False,
                                         'hyperopt', './')
        print('{} epochs => train acc:{:.2f}%,  val acc:{:.2f}%'.format(i, train_accuracy, val_accuracy))
        
    test_accuracy = validate(test_loader) # Validate hyperparameter setting
    #score = (1-(val_accuracy/100.)) + (alpha * (1-sparsity/100.)) # objective funtion here
    
    # objective funtion here
    # accuracy: 98~90%, sparsity: 80%~50%
    score = -((val_accuracy/100.)**2-0.9**2 + alpha * ((sparsity/100.)**2-0.5**2)) 
    print('{} trials: score: {:.2f}\ttrain acc:{:.2f}%\tval acc:{:.2f}%\ttest acc:{:.2f}%\tsparsity:{:.2f}%'.format(count, 
                                      score, 
                                      train_accuracy, 
                                      val_accuracy, 
                                      test_accuracy,
                                      sparsity))
    if score < best_dict['score']:
        best_dict['trial'] = count
        best_dict['score'] = score
        best_dict['tr_acc'] = train_accuracy        
        best_dict['v_acc'] = val_accuracy
        best_dict['te_acc'] = test_accuracy
        best_dict['sparsity'] = sparsity
        best_dict['model_best'] = copy.deepcopy(model)

    return score
def perform_sensitivity_analysis(model, net_params, sparsities, args):

    sensitivities = OrderedDict()
    print('测试原模型精度')
    accuracy = test(args, model)

    print('原模型精度为:{}'.format(accuracy))

    if args.fpgm:
        print('即将采用几何中位数剪枝产生折线图')
    conv_dict = {}
    if args.hrank:
        print('即将采用HRank剪枝')
        cnt = 1
        layer_name = 'conv1.conv.weight'
        conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                        str(cnt) + '.npy')
        cnt += 1
        for key, value in model.block_info.items():
            if value == 1:
                layer_name = '{}.conv.conv.weight'.format(key)
                conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                                str(cnt) + '.npy')
                cnt += 1
                layer_name = '{}.conv_dw.conv.weight'.format(key)
                conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                                str(cnt) + '.npy')
                cnt += 1
                layer_name = '{}.project.conv.weight'.format(key)
                conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                                str(cnt) + '.npy')
                cnt += 1
            else:
                for j in range(value):
                    layer_name = '{}.model.{}.conv.conv.weight'.format(key, j)
                    conv_dict[layer_name] = np.load(args.rank_path +
                                                    'rank_conv' + str(cnt) +
                                                    '.npy')
                    cnt += 1
                    layer_name = '{}.model.{}.conv_dw.conv.weight'.format(
                        key, j)
                    conv_dict[layer_name] = np.load(args.rank_path +
                                                    'rank_conv' + str(cnt) +
                                                    '.npy')
                    cnt += 1
                    layer_name = '{}.model.{}.project.conv.weight'.format(
                        key, j)
                    conv_dict[layer_name] = np.load(args.rank_path +
                                                    'rank_conv' + str(cnt) +
                                                    '.npy')
                    cnt += 1
        layer_name = 'conv_6_sep.conv.weight'
        conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                        str(cnt) + '.npy')
        cnt += 1
        layer_name = 'conv_6_dw.conv.weight'
        conv_dict[layer_name] = np.load(args.rank_path + 'rank_conv' +
                                        str(cnt) + '.npy')
        cnt += 1
        print(len(conv_dict))

    for param_name in net_params:
        if model.state_dict()[param_name].dim() not in [4]:
            continue

        model_cpy = deepcopy(model)

        sensitivity = OrderedDict()

        # 对每一层循环剪枝并测试精度(从0.05->0.95)
        for sparsity_level in sparsities:

            sparsity_level = float(sparsity_level)

            print(param_name, sparsity_level)

            pruner = distiller.pruning.L1RankedStructureParameterPruner(
                "sensitivity",
                group_type="Filters",
                desired_sparsity=sparsity_level,
                weights=param_name)

            policy = distiller.PruningPolicy(pruner, pruner_args=None)
            scheduler = CompressionScheduler(model_cpy)
            scheduler.add_policy(policy, epochs=[0])

            scheduler.on_epoch_begin(0,
                                     fpgm=args.fpgm,
                                     HRank=args.hrank,
                                     conv_index=conv_dict)

            scheduler.mask_all_weights()

            accuracy = test(args, model_cpy)

            print('剪枝{}后的精度为:{}'.format(sparsity_level, accuracy))

            sensitivity[sparsity_level] = (accuracy, 0, 0)
            sensitivities[param_name] = sensitivity

    return sensitivities
Пример #8
0
        initial_sparsity=0.05,
        final_sparsity=0.20,
        weights=['module.conv2.weight'])

    gemm_pruner_agp = distiller.AutomatedGradualPruner(
        name='gemm_pruner_agp',
        initial_sparsity=0.02,
        final_sparsity=0.15,
        weights=['module.fc2.weight'])

    net_thinner = distiller.FilterRemover("remove_filters",
                                          arch='simplenet_mnist',
                                          dataset='mnist')

    # Policies
    policy1 = distiller.PruningPolicy(filter_pruner, pruner_args=None)
    compression_scheduler.add_policy(policy1, epochs=(0, 1))

    policy2 = distiller.PruningPolicy(filter_pruner_agp, pruner_args=None)
    compression_scheduler.add_policy(policy2,
                                     starting_epoch=0,
                                     ending_epoch=2,
                                     frequency=1)

    policy3 = distiller.PruningPolicy(gemm_pruner_agp, pruner_args=None)
    compression_scheduler.add_policy(policy3,
                                     starting_epoch=0,
                                     ending_epoch=2,
                                     frequency=1)

    compression_scheduler.add_policy(net_thinner, epochs=(2, ))
    def perform_model_pruning(model, sparsity_level, group):
        all_params = [param_name for param_name, param in model.named_parameters()]
        pruner_list = []
        first_layer_name = all_params[0]
        last_layer_name = all_params[-2]
        if group == 'element':
            sparsity_level = float(sparsity_level)
            # Element-wise sparasity
            # This can be well deployed in every layer of the deep neural network.
            sparsity_level = {}
            for param_name in all_params:
                if model.state_dict()[param_name].dim() not in [2,4]:
                    continue
                sparsity_levels = {param_name: sparsity_level[param_name]}
            pruner = distiller.pruning.SparsityLevelParameterPruner(name="sensitivity", levels=sparsity_levels)
            pruner_list.append(pruner)

        elif group == 'filter':
            # One thing you should keep in mind, in this case, the output layer will be executed by channel-wise analysis rather than filter.
            for param_name in all_params:
                if model.state_dict()[param_name].dim() not in [2,4]:
                    continue
                #sparsity_levels = {param_name: sparsity_level[param_name]}
                desired_sparsity = sparsity_level[param_name]
                if not desired_sparsity:
                    continue 

                if param_name == last_layer_name:
                    pruner = L1RankedStructureParameterPruner("sensitivity",
                                                            group_type="Channels",
                                                            desired_sparsity=desired_sparsity,
                                                            weights=last_layer_name)
                else:
                    pruner = L1RankedStructureParameterPruner("sensitivity",
                                                            group_type="Filters",
                                                            desired_sparsity=desired_sparsity,
                                                            weights=param_name)
                pruner_list.append(pruner)

        elif group == 'channel':
            for param_name in all_params:
                if param_name == first_layer_name:
                    continue
                
                if model.state_dict()[param_name].dim() not in [2,4] or param_name == first_layer_name:
                    continue
                desired_sparsity = sparsity_level[param_name]
                
                if not desired_sparsity:
                    continue
                pruner = distiller.pruning.L1RankedStructureParameterPruner("sensitivity",
                                                                            group_type="Channels",
                                                                            desired_sparsity=desired_sparsity,
                                                                            weights=param_name)
                pruner_list.append(pruner)

        # Build scheduler to set zero mask dictionary.
        scheduler = CompressionScheduler(model)
        for pruner in pruner_list:
            policy = distiller.PruningPolicy(pruner, pruner_args=None)
            scheduler.add_policy(policy, epochs=[0])

        # Compute the pruning mask per the pruner and apply the mask on the weights
        scheduler.on_epoch_begin(0)
        scheduler.mask_all_weights(epoch=0)
        # Build fake optimizer, the reason we call if fake is that there are no training loops here.
        optimizer = optim.SGD(model.parameters(), lr=0.001, 
                            momentum=0.9, weight_decay=1e-4)
        criterion = nn.CrossEntropyLoss().to('cpu')
        model.to(device)
        input_shape = (1, 3, 32, 32)
        #print(compress_scheduler.zeros_mask_dict['basic_model.fc2.weight'].mask)
        dummy_input = utility.get_dummy_input('cifar10',  # Dataset should be specified.
                                            utility.model_device(model), 
                                            input_shape=input_shape)    
        sgraph = SummaryGraph(model, dummy_input)
        if group == 'filter':
            # First remove filter 
            thinning_recipe = thinning.create_thinning_recipe_filters(sgraph, model, scheduler.zeros_mask_dict, prune_output_layer=None)
            thinning.apply_and_save_recipe(model, scheduler.zeros_mask_dict, thinning_recipe, optimizer)
            # Second remove channel from last layer.
            zeros_mask_dict = create_model_masks_dict(model)
            zeros_mask_dict[last_layer_name].mask = scheduler.zeros_mask_dict[last_layer_name].mask
            #print(zeros_mask_dict[last_layer_name].mask)    
            thinning_recipe = thinning.create_thinning_recipe_channels(sgraph, model, zeros_mask_dict)
            thinning.apply_and_save_recipe(model, scheduler.zeros_mask_dict, thinning_recipe, optimizer) 
        elif group == 'channel':
            thinning_recipe = thinning.create_thinning_recipe_channels(sgraph, model, zeros_mask_dict)
            thinning.apply_and_save_recipe(model, scheduler.zeros_mask_dict, thinning_recipe, optimizer) 
        else: 
            raise ValueError("Can not execute thinning recipe with the model pruned by element-wise mode.")
        return model 
Пример #10
0
def perform_sensitivity_analysis(model, net_params, sparsities, test_func,
                                 group):
    """Perform a sensitivity test for a model's weights parameters.
    The model should be trained to maximum accuracy, because we aim to understand
    the behavior of the model's performance in relation to pruning of a specific
    weights tensor.
    By default this function will test all of the model's parameters.
    The return value is a complex sensitivities dictionary: the dictionary's
    key is the name (string) of the weights tensor.  The value is another dictionary,
    where the tested sparsity-level is the key, and a (top1, top5, loss) tuple
    is the value.
    Below is an example of such a dictionary:
    .. code-block:: python
    {'features.module.6.weight':    {0.0:  (56.518, 79.07,  1.9159),
                                     0.05: (56.492, 79.1,   1.9161),
                                     0.10: (56.212, 78.854, 1.9315),
                                     0.15: (35.424, 60.3,   3.0866)},
     'classifier.module.1.weight':  {0.0:  (56.518, 79.07,  1.9159),
                                     0.05: (56.514, 79.07,  1.9159),
                                     0.10: (56.434, 79.074, 1.9138),
                                     0.15: (54.454, 77.854, 2.3127)} }
    The test_func is expected to execute the model on a test/validation dataset,
    and return the results for top1 and top5 accuracies, and the loss value.
    """
    if group not in ['element', 'filter', 'channel']:
        raise ValueError(
            "group parameter contains an illegal value: {}".format(group))
    sensitivities = OrderedDict()

    # Terminated layer for channel pruning
    last_layer_name = net_params[-2][0]
    first_layer_name = net_params[0][0]
    for param_name, param_var in net_params:

        # Ignore bias analysis.
        if model.state_dict()[param_name].dim() not in [2, 4]:
            continue
        # Make a copy of the model, because when we apply the zeros mask (i.e.
        # perform pruning), the model's weights are altered
        model_cpy = deepcopy(model)
        layer_wise_eval_scores_dict = {}
        sensitivity = OrderedDict()
        for sparsity_level in sparsities:
            sparsity_level = float(sparsity_level)
            msglogger.info("Testing sensitivity of %s [%0.1f%% sparsity]" %
                           (param_name, sparsity_level * 100))

            # **************
            # Should I specify the pruner or not? Qualcomm directly use the SVD pruner to get the optimial permutation of filter, and the sensitivity table could also be optimal?
            # **************

            # Create the pruner (a level pruner)
            # pruning schedule.
            if group == 'element':
                # Element-wise sparasity
                # This can be well deployed in every layer of the deep neural network.
                sparsity_levels = {param_name: sparsity_level}
                pruner = distiller.pruning.SparsityLevelParameterPruner(
                    name="sensitivity", levels=sparsity_levels)

            elif group == 'filter':
                # Output filter ranking
                # But so far it's only restricted to convolution layers.

                # *********************
                # My idea: (How about adding new )
                # The last layer (i.e output layer) can not be examined and pruned in this case, distiller's authors thus droped the analysis process to avoid trivial code writing.
                # How to define the termination condition? and apply this mechanism to other fully connected layers. I think this is really important, since there is too many weights here.
                # *********************

                if param_name == last_layer_name:
                    # One thing you should keep in mind, in this case, the output layer will be executed by channel-wise analysis rather than filter.
                    pruner = L1RankedStructureParameterPruner(
                        "sensitivity",
                        group_type="Channels",
                        desired_sparsity=sparsity_level,
                        weights=param_name)
                else:
                    pruner = L1RankedStructureParameterPruner(
                        "sensitivity",
                        group_type="Filters",
                        desired_sparsity=sparsity_level,
                        weights=param_name)
                """
                if model.state_dict()[param_name].dim() != 4:
                    continue
                pruner = distiller.pruning.L1RankedStructureParameterPruner("sensitivity",
                                                                            group_type="Filters",
                                                                            desired_sparsity=sparsity_level,
                                                                            weights=param_name)
                """
            elif group == 'channel':
                # Input channel ranking
                # But so far it's only restricted to convolution layers.
                # *********************
                # My idea:
                # Channel pruning is very applicable and more suitable to deploy (no change the size of output logit), but distiiler not support analyze, either.
                # The reason is that pruner should be specified the group type by YAML file, so they decide dropping this code to avoid trivial code gerneration.
                # *Should I add new sensitivity analysis policy or pruner*?
                # *********************
                """
                if model.state_dict()[param_name].dim() != 4:
                    continue
                """
                if param_name == first_layer_name:
                    continue
                pruner = distiller.pruning.L1RankedStructureParameterPruner(
                    "sensitivity",
                    group_type="Channels",
                    desired_sparsity=sparsity_level,
                    weights=param_name)

            policy = distiller.PruningPolicy(pruner, pruner_args=None)
            scheduler = CompressionScheduler(model_cpy)
            scheduler.add_policy(policy, epochs=[0])

            # Compute the pruning mask per the pruner and apply the mask on the weights
            scheduler.on_epoch_begin(0)
            scheduler.mask_all_weights(epoch=0)

            # Test and record the performance of the pruned model
            record = test_func(model=model_cpy, parameter_name=param_name)
            layer_wise_eval_scores_dict[1 - sparsity_level] = record[0]
            sensitivity[sparsity_level] = record + (param_var, )
            sensitivities[param_name] = sensitivity
            #Our testing function output is consisted of nat_top1, nat_top5, nat_loss, adv_top1, adv_top5, adv_loss.
        eval_scores_dict[param_name] = layer_wise_eval_scores_dict

    return sensitivities, eval_scores_dict