示例#1
0
    def step(self, action):
        # Pseudo prune and get the corresponding statistics. The real pruning happens till the end of all pseudo pruning
        if self.visited[self.cur_ind]:
            action = self.strategy_dict[self.prunable_idx[self.cur_ind]][0]
            preserve_idx = self.index_buffer[self.cur_ind]
        else:
            action = self._action_wall(action)  # percentage to preserve
            preserve_idx = None

        # prune and update action
        # 调用完成后,对应的layer的weight已经经过剪枝
        # 通道被剪部分对应权重已经变为0
        # 返回
        #   action:本层对应的压缩率
        #   d_prime:被压缩的通道,压缩后的通道数
        #   preserve_idx:保留的这些个通道
        action, d_prime, preserve_idx = self.prune_kernel(
            self.prunable_idx[self.cur_ind], action, preserve_idx)

        # 这部分不会执行,因为self.shared_idx永远是空的
        if not self.visited[self.cur_ind]:
            for group in self.shared_idx:
                if self.cur_ind in group:  # set the shared ones
                    for g_idx in group:
                        self.strategy_dict[
                            self.prunable_idx[g_idx]][0] = action
                        self.strategy_dict[self.prunable_idx[g_idx -
                                                             1]][1] = action
                        self.visited[g_idx] = True
                        self.index_buffer[g_idx] = preserve_idx.copy()

        # 要想看看更多的信息其实 if ture: 也行
        if self.export_model:  # export checkpoint
            print('# Pruning {}: ratio: {}, d_prime: {}'.format(
                self.cur_ind, action, d_prime))

        # 保存本层的压缩率与保留的通道数
        self.strategy.append(action)  # save action to strategy
        self.d_prime_list.append(d_prime)

        self.strategy_dict[self.prunable_idx[self.cur_ind]][0] = action
        if self.cur_ind > 0:
            self.strategy_dict[self.prunable_idx[self.cur_ind - 1]][1] = action

        # all the actions are made
        if self._is_final_layer():
            assert len(self.strategy) == len(self.prunable_idx)
            current_flops = self._cur_flops()
            acc_t1 = time.time()
            acc = self._validate(self.val_loader, self.model)
            acc_t2 = time.time()
            self.val_time = acc_t2 - acc_t1
            compress_ratio = current_flops * 1. / self.org_flops
            info_set = {
                'compress_ratio': compress_ratio,
                'accuracy': acc,
                'strategy': self.strategy.copy()
            }
            reward = self.reward(self, acc, current_flops)

            if reward > self.best_reward:
                self.best_reward = reward
                self.best_strategy = self.strategy.copy()
                self.best_d_prime_list = self.d_prime_list.copy()
                prGreen(
                    'New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'.
                    format(self.best_reward, acc, compress_ratio))
                prGreen('New best policy: {}'.format(self.best_strategy))
                prGreen('New best d primes: {}'.format(self.best_d_prime_list))

            obs = self.layer_embedding[
                self.cur_ind, :].copy()  # actually the same as the last state
            done = True
            if self.export_model:  # export state dict
                torch.save(self.model.state_dict(), self.export_path)
                return None, None, None, None
            return obs, reward, done, info_set

        info_set = None
        reward = 0
        done = False
        self.visited[self.cur_ind] = True  # set to visited
        self.cur_ind += 1  # the index of next layer
        # build next state (in-place modify)
        self.layer_embedding[self.cur_ind][-3] = self._cur_reduced(
        ) * 1. / self.org_flops  # reduced
        self.layer_embedding[self.cur_ind][-2] = sum(
            self.flops_list[self.cur_ind + 1:]) * 1. / self.org_flops  # rest
        self.layer_embedding[self.cur_ind][-1] = self.strategy[
            -1]  # last action
        obs = self.layer_embedding[self.cur_ind, :].copy()

        return obs, reward, done, info_set
示例#2
0
    def step(self, action):
        # Pseudo prune and get the corresponding statistics. The real pruning happens till the end of all pseudo pruning
        if self.visited[self.cur_ind]:
            action = self.strategy_dict[self.prunable_idx[self.cur_ind]][0]
            preserve_idx = self.index_buffer[self.cur_ind]
        else:
            action = self._action_wall(action)  # percentage to preserve,利用已裁掉计算量与剩余计算量,对action数值进行限制,对应论文中的伪代码
            preserve_idx = None

        # prune and update action ,整个模型裁剪过程,获取mask,并对模型进行权值处理,内部直接按权值大小进行剪枝
        action, d_prime, preserve_idx = self.prune_kernel(self.prunable_idx[self.cur_ind], action, preserve_idx)

        if not self.visited[self.cur_ind]:
            for group in self.shared_idx: #mobileV1时 self.shared_idx为空的
                if self.cur_ind in group:  # set the shared ones
                    for g_idx in group:
                        self.strategy_dict[self.prunable_idx[g_idx]][0] = action
                        self.strategy_dict[self.prunable_idx[g_idx - 1]][1] = action
                        self.visited[g_idx] = True
                        self.index_buffer[g_idx] = preserve_idx.copy()

        if self.export_model:  # export checkpoint
            print('# Pruning {}: ratio: {}, d_prime: {}'.format(self.cur_ind, action, d_prime))

        self.strategy.append(action)  # save action to strategy
        self.d_prime_list.append(d_prime)

        self.strategy_dict[self.prunable_idx[self.cur_ind]][0] = action
        if self.cur_ind > 0:
            self.strategy_dict[self.prunable_idx[self.cur_ind - 1]][1] = action ## 通道号必须对上,需要弄懂作者的裁剪思路;

        # all the actions are made
        if self._is_final_layer():
            assert len(self.strategy) == len(self.prunable_idx)
            current_flops = self._cur_flops()
            acc_t1 = time.time()
            acc = self._validate(self.val_loader, self.model)
            acc_t2 = time.time()
            self.val_time = acc_t2 - acc_t1
            compress_ratio = current_flops * 1. / self.org_flops
            info_set = {'compress_ratio': compress_ratio, 'accuracy': acc, 'strategy': self.strategy.copy()}
            reward = self.reward(self, acc, current_flops) #获得总的reward,函数定义在rewards.py内的acc_reward

            if reward > self.best_reward:
                self.best_reward = reward
                self.best_strategy = self.strategy.copy()
                self.best_d_prime_list = self.d_prime_list.copy()
                prGreen('New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'.format(self.best_reward, acc, compress_ratio))
                prGreen('New best policy: {}'.format(self.best_strategy))
                prGreen('New best d primes: {}'.format(self.best_d_prime_list))

            obs = self.layer_embedding[self.cur_ind, :].copy()  # actually the same as the last state
            done = True
            if self.export_model:  # export state dict
                torch.save(self.model.state_dict(), self.export_path)
                return None, None, None, None
            return obs, reward, done, info_set

        info_set = None
        reward = 0
        done = False
        self.visited[self.cur_ind] = True  # set to visited
        self.cur_ind += 1  # the index of next layer
        # build next state (in-place modify)
        self.layer_embedding[self.cur_ind][-3] = self._cur_reduced() * 1. / self.org_flops  # reduced
        self.layer_embedding[self.cur_ind][-2] = sum(self.flops_list[self.cur_ind + 1:]) * 1. / self.org_flops  # rest
        self.layer_embedding[self.cur_ind][-1] = self.strategy[-1]  # last action
        obs = self.layer_embedding[self.cur_ind, :].copy()

        return obs, reward, done, info_set
    def step(self, action):

        action = self._action_wall(action)  # percentage to preserve

        # viturally conduct the pruning process
        action = self.shrink_action(action, self.cur_ind)

        self.strategy.append(action)
        self.strategy_dict[self.prunable_idx[self.cur_ind]] = action

        if self._is_final_layer():
            assert len(self.strategy) == len(self.prunable_idx)
            current_flops = self._cur_flops()
            acc_t1 = time.time()
            # mask_t1 = time
            self.model_masked = self.vgg_masked(self.strategy)
            acc = self._validate(self.val_loader, self.model_masked)

            self.acc_baseline[self.cur_beta_idx] = self.gama * (
                self.acc_baseline[self.cur_beta_idx]) + (1 - self.gama) * acc

            acc_t2 = time.time()
            self.val_time = acc_t2 - acc_t1
            compress_ratio = current_flops * 1. / self.org_flops
            info_set = {
                'compress_ratio': compress_ratio,
                'accuracy': acc,
                'strategy': self.strategy.copy(),
                'd_prime': self.d_prime_list.copy()
            }

            reward = (acc - self.acc_baseline[self.cur_beta_idx]) * 0.01
            # reward = acc*0.01
            # reward = self.reward(self, self.beta,acc, current_flops)

            if reward > self.best_reward[self.cur_beta_idx]:
                self.best_reward[self.cur_beta_idx] = reward
                self.best_strategy[self.cur_beta_idx] = self.strategy.copy()
                self.best_d_prime_list[
                    self.cur_beta_idx] = self.d_prime_list.copy()
                prGreen('best action for beta={}'.format(self.beta))
                prGreen(
                    'New best reward: {:.4f}, acc: {:.4f}, compress: {:.4f}'.
                    format(reward, acc, compress_ratio))
                prGreen('New best policy: {}'.format(self.strategy))
                prGreen('New best d primes: {}'.format(self.d_prime_list))

            obs = self.layer_embedding[
                self.cur_ind, :].copy()  # actually the same as the last state
            done = True
            # if self.export_model:  # export state dict
            #     torch.save(self.model.state_dict(), self.export_path)
            #     return None, None, None, None
            return obs, reward, done, info_set
        info_set = None
        reward = 0
        done = False
        self.cur_ind += 1  # the index of next layer

        # build next state (in-place modify)
        self.layer_embedding[self.cur_ind][-4] = self._cur_reduced(
        ) * 1. / self.org_flops  # reduced
        # if self._is_final_layer:
        #     self.layer_embedding[self.cur_ind][-3] = 0.0  # rest
        # else:
        #     self.layer_embedding[self.cur_ind][-3] = sum(self.flops_list[self.prunable_idx[self.cur_ind]+1:]) * 1. / self.org_flops  # rest
        self.layer_embedding[self.cur_ind][-3] = self.following_changeable(
            self.prunable_idx[
                self.cur_ind]) / self.org_flops  # following changeable flops
        self.layer_embedding[self.cur_ind][-2] = self.strategy[
            -1]  # last action
        obs = self.layer_embedding[self.cur_ind, :].copy()

        return obs, reward, done, info_set