예제 #1
0
def profiling(model, use_cuda):
    """profiling on either gpu or cpu"""
    print('Start model profiling, use_cuda: {}.'.format(use_cuda))
    if getattr(FLAGS, 'autoslim', False):
        flops, params = model_profiling(model,
                                        FLAGS.image_size,
                                        FLAGS.image_size,
                                        use_cuda=use_cuda,
                                        verbose=getattr(
                                            FLAGS, 'profiling_verbose', False))
    elif getattr(FLAGS, 'slimmable_training', False):
        for width_mult in sorted(FLAGS.width_mult_list, reverse=True):
            model.apply(lambda m: setattr(m, 'width_mult', width_mult))
            print('Model profiling with width mult {}x:'.format(width_mult))
            flops, params = model_profiling(model,
                                            FLAGS.image_size,
                                            FLAGS.image_size,
                                            use_cuda=use_cuda,
                                            verbose=getattr(
                                                FLAGS, 'profiling_verbose',
                                                False))
    else:
        flops, params = model_profiling(model,
                                        FLAGS.image_size,
                                        FLAGS.image_size,
                                        use_cuda=use_cuda,
                                        verbose=getattr(
                                            FLAGS, 'profiling_verbose', True))
    return flops, params
예제 #2
0
def profiling(model, use_cuda):
    """Profiling on either gpu or cpu."""
    logging.info('Start model profiling, use_cuda:{}.'.format(use_cuda))
    model_profiling(model,
                    FLAGS.image_size,
                    FLAGS.image_size,
                    verbose=getattr(FLAGS, 'model_profiling_verbose', True)
                    and is_root_rank)
예제 #3
0
def shrink_model(model_wrapper,
                 ema,
                 optimizer,
                 prune_info,
                 threshold=1e-3,
                 ema_only=False):
    r"""Dynamic network shrinkage to discard dead atomic blocks.

    Args:
        model_wrapper: model to be shrinked.
        ema: An instance of `ExponentialMovingAverage`, could be None.
        optimizer: Global optimizer.
        prune_info: An instance of `PruneInfo`, could be None.
        threshold: A small enough constant.
        ema_only: If `True`, regard an atomic block as dead only when
            `$$\hat{alpha} \le threshold$$`. Otherwise use both current value
            and momentum version.
    """
    model = unwrap_model(model_wrapper)
    for block_name, block in model.get_named_block_list().items():
        assert isinstance(block, mb.InvertedResidualChannels)
        masks = [
            bn.weight.detach().abs() > threshold
            for bn in block.get_depthwise_bn()
        ]
        if ema is not None:
            masks_ema = [
                ema.average('{}.{}.weight'.format(
                    block_name, name)).detach().abs() > threshold
                for name in block.get_named_depthwise_bn().keys()
            ]
            if not ema_only:
                masks = [
                    mask0 | mask1 for mask0, mask1 in zip(masks, masks_ema)
                ]
            else:
                masks = masks_ema
        block.compress_by_mask(masks,
                               ema=ema,
                               optimizer=optimizer,
                               prune_info=prune_info,
                               prefix=block_name,
                               verbose=False)

    if optimizer is not None:
        assert set(optimizer.param_groups[0]['params']) == set(
            model.parameters())

    model_profiling(model,
                    FLAGS.image_size,
                    FLAGS.image_size,
                    num_forwards=0,
                    verbose=False)
    logging.info('Model Shrink to FLOPS: {}'.format(model.n_macs))
    logging.info('Current model: {}'.format(mb.output_network(model)))
예제 #4
0
def profiling(model, use_cuda):
    """profiling on either gpu or cpu"""
    print('Start model profiling, use_cuda:{}.'.format(use_cuda))
    for width_mult in sorted(FLAGS.width_mult_list, reverse=True):
        model.apply(
            lambda m: setattr(m, 'width_mult', width_mult))
        print('Model profiling with width mult {}x:'.format(width_mult))
        verbose = width_mult == max(FLAGS.width_mult_list)
        model_profiling(
            model, FLAGS.image_size, FLAGS.image_size,
            verbose=getattr(FLAGS, 'model_profiling_verbose', verbose))
예제 #5
0
    def __init__(self, opt):
        super(SPADEModel, self).__init__(opt)
        self.model_names = ['G_student', 'G_teacher', 'D']
        self.visual_names = ['labels', 'Tfake_B', 'Sfake_B', 'real_B']
        self.model_names.append('D')
        self.loss_names = [
            'G_gan', 'G_feat', 'G_vgg', 'G_distill', 'D_real', 'D_fake'
        ]
        if hasattr(opt, 'distiller'):
            self.modules = SPADEDistillerModules(opt).to(self.device)
            if len(opt.gpu_ids) > 0:
                self.modules = DataParallelWithCallback(self.modules,
                                                        device_ids=opt.gpu_ids)
                self.modules_on_one_gpu = self.modules.module
            else:
                self.modules_on_one_gpu = self.modules
        for i in range(len(self.modules_on_one_gpu.mapping_layers)):
            self.loss_names.append('G_distill%d' % i)
        self.optimizer_G, self.optimizer_D = self.modules_on_one_gpu.create_optimizers(
        )
        self.optimizers = [self.optimizer_G, self.optimizer_D]
        if not opt.no_fid:
            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
            self.inception_model = InceptionV3([block_idx])
            self.inception_model.to(self.device)
            self.inception_model.eval()
        if 'cityscapes' in opt.dataroot and not opt.no_mIoU:
            self.drn_model = DRNSeg('drn_d_105', 19, pretrained=False)
            util.load_network(self.drn_model, opt.drn_path, verbose=False)
            self.drn_model.to(self.device)
            self.drn_model.eval()
        self.eval_dataloader = create_eval_dataloader(self.opt)
        self.best_fid = 1e9
        self.best_mIoU = -1e9
        self.fids, self.mIoUs = [], []
        self.is_best = False
        self.npz = np.load(opt.real_stat_path)

        model_profiling(self.modules_on_one_gpu.netG_teacher,
                        self.opt.data_height,
                        self.opt.data_width,
                        channel=self.opt.data_channel,
                        num_forwards=0,
                        verbose=False)
        model_profiling(self.modules_on_one_gpu.netG_student,
                        self.opt.data_height,
                        self.opt.data_width,
                        channel=self.opt.data_channel,
                        num_forwards=0,
                        verbose=False)
        print(
            f'Teacher FLOPs: {self.modules_on_one_gpu.netG_teacher.n_macs}, Student FLOPs: {self.modules_on_one_gpu.netG_student.n_macs}.'
        )
예제 #6
0
파일: train.py 프로젝트: phuocphn/AdaBits
def profiling(model, use_cuda):
    """profiling on either gpu or cpu"""
    mprint('Start model profiling, use_cuda:{}.'.format(use_cuda))
    if getattr(FLAGS, 'adaptive_training', False):
        for bits in FLAGS.bits_list:
            model.apply(
                lambda m: setattr(m,'bits', bits))
            mprint('Model profiling with {} bits.'.format(bits))
            flops, params, bitops, bytesize, energy, latency = model_profiling(
                model, FLAGS.image_size, FLAGS.image_size,
                verbose=getattr(FLAGS, 'model_profiling_verbose', False))
    else:
        flops, params, bitops, bytesize, energy, latency = model_profiling(
            model, FLAGS.image_size, FLAGS.image_size,
            verbose=getattr(FLAGS, 'model_profiling_verbose', False))
    return flops, params
예제 #7
0
파일: train.py 프로젝트: oj9040/FracBits
def profiling(model, use_cuda):
    """profiling on either gpu or cpu"""
    mprint('Start model profiling, use_cuda:{}.'.format(use_cuda))
    flops, params, bitops, bitops_max, bytesize, energy, latency = model_profiling(
        model, FLAGS.image_size, FLAGS.image_size,
        verbose=getattr(FLAGS, 'model_profiling_verbose', False))
    return bitops, bytesize
예제 #8
0
def slimming(loader, model, criterion):
    """network slimming by slimmable network"""
    model.eval()
    bn_calibration_init(model)
    model.apply(lambda m: setattr(m, 'width_mult', 1.0))
    if getattr(FLAGS, 'distributed', False):
        layers = get_conv_layers(model.module)
    else:
        raise NotImplementedError
    print('Totally {} layers to slim.'.format(len(layers)))
    error = np.zeros(len(layers))
    # get data
    if getattr(FLAGS, 'distributed', False):
        loader.sampler.set_epoch(0)
    input, target = next(iter(loader))
    input = input.cuda()
    target = target.cuda()
    # start to slim
    print('Start to slim...')
    flops = 10e10
    FLAGS.autoslim_target_flops = sorted(FLAGS.autoslim_target_flops)
    autoslim_target_flop = FLAGS.autoslim_target_flops.pop()
    while True:
        flops, params = model_profiling(model,
                                        FLAGS.image_size,
                                        FLAGS.image_size,
                                        verbose=getattr(
                                            FLAGS, 'profiling_verbose', False))
        if flops < autoslim_target_flop:
            if len(FLAGS.autoslim_target_flops) == 0:
                break
            else:
                print('Find autoslim net at flops {}'.format(
                    autoslim_target_flop))
                autoslim_target_flop = FLAGS.autoslim_target_flops.pop()
        for i in range(len(layers)):
            torch.cuda.empty_cache()
            error[i] = 0.
            outc = layers[i].out_channels - layers[i].divisor
            if outc <= 0 or outc > layers[i].out_channels_max:
                error[i] += 1.
                continue
            layers[i].out_channels -= layers[i].divisor
            loss, error_batch = forward_loss(model,
                                             criterion,
                                             input,
                                             target,
                                             None,
                                             return_acc=True)
            error[i] += error_batch
            layers[i].out_channels += layers[i].divisor
        best_index = np.argmin(error)
        print(*[f'{element:.4f}' for element in error])
        layers[best_index].out_channels -= layers[best_index].divisor
        print('Adjust layer {} for {} to {}, error: {}.'.format(
            best_index, -layers[best_index].divisor,
            layers[best_index].out_channels, error[best_index]))
    return
예제 #9
0
 def profile(self, input_semantics):
     netG = self.netG
     if isinstance(netG, nn.DataParallel):
         netG = netG.module
     batch_, channel_, height_, width_ = input_semantics.shape
     macs, params = model_profiling(netG,
                                    height_,
                                    width_,
                                    batch_,
                                    channel_,
                                    verbose=False)
     return macs, params