def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        # evaluate
        acc = inf_model.validate()

        del inf_model
        del mq

        return point, loss, acc
    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        all_layers = []
        if args.bit_weights is not None:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][1:-1]
        if args.bit_act is not None:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][1:-1]
        if args.bit_act is not None and 'mobilenet' in args.arch:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][1:-1]

        mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        del inf_model
        del mq

        return point, loss
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if args.tag is not None:
        ml_logger.mlflow.log_param('tag', args.tag)

    # enable_bcorr = False
    # if args.bcorr_w:
    #     args.bcorr_w = False
    #     enable_bcorr = True

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    layers = []
    # TODO: make it more generic
    if 'inception' in args.arch and args.custom_inception:
        first = 3
        last = -1
    else:
        first = 1
        last = -1
    if args.bit_weights is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][first:last]
    if args.bit_act is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][first:last]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][first:last]

    replacement_factory = {nn.ReLU: ActivationModuleWrapperPost,
                           nn.ReLU6: ActivationModuleWrapperPost,
                           nn.Conv2d: ParameterModuleWrapperPost}

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    maxabs_loss = inf_model.evaluate_calibration()
    print("max loss: {:.4f}".format(maxabs_loss.item()))
    max_point = mq.get_clipping()
    ml_logger.log_metric('Loss max', maxabs_loss.item(), step='auto')

    # evaluate
    maxabs_acc = 0#inf_model.validate()
    ml_logger.log_metric('Acc maxabs', maxabs_acc, step='auto')
    data = {'max': {'alpha': max_point.cpu().numpy(), 'loss': maxabs_loss.item(), 'acc': maxabs_acc}}

    del inf_model
    del mq

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        # evaluate
        acc = inf_model.validate()

        del inf_model
        del mq

        return point, loss, acc

    def eval_pnorm_on_calibration(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        del inf_model
        del mq

        return point, loss

    ps = np.linspace(2, 4, 10)
    losses = []
    for p in tqdm(ps):
        point, loss = eval_pnorm_on_calibration(p)
        losses.append(loss.item())
        print("(p, loss) - ({}, {})".format(p, loss.item()))

    # Interpolate optimal p
    z = np.polyfit(ps, losses, 2)
    y = np.poly1d(z)
    p_intr = y.deriv().roots[0]
    # loss_opt = y(p_intr)
    print("p intr: {:.2f}".format(p_intr))
    ml_logger.log_metric('p intr', p_intr, step='auto')

    lp_point, lp_loss, lp_acc = eval_pnorm(p_intr)

    print("loss p intr: {:.4f}".format(lp_loss.item()))
    print("acc p intr: {:.4f}".format(lp_acc))
    ml_logger.log_metric('Init loss', lp_loss.item(), step='auto')
    ml_logger.log_metric('Acc init', lp_acc, step='auto')

    global _eval_count, _min_loss
    _min_loss = lp_loss.item()

    # loss_best = np.min(losses)
    # if loss_best < lp_loss:
    #     p_intr = ps[np.argmin(losses)]
    #     print("p best: {:.2f}".format(p_intr))
    #     ml_logger.log_metric('p best', p_intr, step='auto')
    #     lp_point, lp_loss, lp_acc = eval_pnorm(p_intr)
    #     print("loss p best: {:.4f}".format(lp_loss.item()))
    #     print("acc p best: {:.4f}".format(lp_acc))
    #     ml_logger.log_metric('Loss p best', lp_loss.item(), step='auto')
    #     ml_logger.log_metric('Acc p best', lp_acc, step='auto')

    # idx = np.argmin([maxabs_loss, lp_loss])
    # init = [max_point, lp_point][idx]

    init = lp_point

    args.qtype = 'lp_norm'
    args.lp = p_intr
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # if enable_bcorr:
    #     args.bcorr_w = True
    inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    # run optimizer
    min_options = {}
    if args.maxiter is not None:
        min_options['maxiter'] = args.maxiter
    if args.maxfev is not None:
        min_options['maxfev'] = args.maxfev

    _iter = count(0)

    def local_search_callback(x):
        it = next(_iter)
        mq.set_clipping(x, inf_model.device)
        loss = inf_model.evaluate_calibration()
        print("\n[{}]: Local search callback".format(it))
        print("loss: {:.4f}\n".format(loss.item()))
        print(x)
        ml_logger.log_metric('Loss {}'.format(args.min_method), loss.item(), step='auto')

        # evaluate
        acc = inf_model.validate()
        ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')

    args.min_method = "Powell"
    method = coord_descent if args.min_method == 'CD' else args.min_method
    res = opt.minimize(lambda scales: evaluate_calibration_clipped(scales, inf_model, mq), init.cpu().numpy(),
                       method=method, options=min_options, callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format(args.min_method), loss.item(), step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')
    data['powell'] = {'alpha': scales, 'loss': loss.item(), 'acc': acc}

    # save scales
    f_name = "scales_{}_W{}A{}.pkl".format(args.arch, args.bit_weights, args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    inf_model = CnnModel(args.arch,
                         args.custom_resnet,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    all_layers = []
    if args.bit_weights is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }
    mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)

    loss = inf_model.evaluate_calibration()
    print("loss: {:.4f}".format(loss.item()))
    ml_logger.log_metric('loss', loss.item(), step='auto')

    # get clipping values
    p_max = mq.get_clipping()
    # print(init)

    args.qtype = 'l2_norm'
    inf_model = CnnModel(args.arch,
                         args.custom_resnet,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)
    mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)
    loss = inf_model.evaluate_calibration()
    print("loss l2: {:.4f}".format(loss.item()))
    p_l2 = mq.get_clipping()

    args.qtype = 'l3_norm'
    inf_model = CnnModel(args.arch,
                         args.custom_resnet,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)
    mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)
    loss = inf_model.evaluate_calibration()
    print("loss l2: {:.4f}".format(loss.item()))
    p_l3 = mq.get_clipping()

    # gamma_avg = 0
    # T_avg = 0
    num_iter = args.num_iter
    n = args.num_points

    def status_callback(i, gamma, T, f_max):
        T = T.item()
        gamma = gamma.item()
        f_max = f_max.item()

        print("gamma^2: {}, T: {}, max: {}".format(gamma, T, f_max))
        ml_logger.log_metric('gamma', gamma, step='auto')
        ml_logger.log_metric('T', T, step='auto')
        ml_logger.log_metric('f_max', f_max, step='auto')
        T_norm = T / np.sqrt(i + 1)
        ml_logger.log_metric('T_norm', T_norm, step='auto')
        gamma_norm = gamma / f_max**2
        ml_logger.log_metric('gamma_norm', gamma_norm, step='auto')

    gamma_, T_, f_max = separability_index(
        lambda x: model_func(x, p_max, inf_model, mq, p_l2, p_l3),
        len(p_max),
        n,
        num_iter,
        gpu=True,
        status_callback=status_callback)

    gamma_norm = np.mean(np.array(gamma_) / f_max.item()**2)
    T_norm = np.mean(np.array(T_) / np.sqrt(np.arange(1, num_iter + 1)))

    print("gamma^2 norm: {}, T norm: {}".format(gamma_norm, T_norm))
    ml_logger.log_metric('gamma_tot', gamma_norm, step='auto')
    ml_logger.log_metric('T_tot', T_norm, step='auto')
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if args.tag is not None:
        ml_logger.mlflow.log_param('tag', args.tag)

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    layers = []
    # TODO: make it more generic
    if args.bit_weights is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    loss = inf_model.evaluate_calibration()

    # evaluate
    max_acc = inf_model.validate()
    max_point = mq.get_clipping()
    ml_logger.log_metric('Loss max', loss.item(), step='auto')
    ml_logger.log_metric('Acc max', max_acc, step='auto')
    data = {'max': {'alpha': max_point.cpu().numpy(), 'loss': loss.item()}}
    print("max loss: {:.4f}, max_acc: {:.4f}".format(loss.item(), max_acc))

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(
            args.arch,
            custom_resnet,
            custom_inception,
            args.pretrained,
            args.dataset,
            args.gpu_ids,
            args.datapath,
            batch_size=args.batch_size,
            shuffle=True,
            workers=args.workers,
            print_freq=args.print_freq,
            cal_batch_size=args.cal_batch_size,
            cal_set_size=args.cal_set_size,
            args=args,
        )

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        # evaluate
        acc = inf_model.validate()

        del inf_model
        del mq

        return point, loss, acc

    del inf_model
    del mq

    print("Evaluate L2 norm optimization")
    l2_point, l2_loss, l2_acc = eval_pnorm(2.)
    print("loss l2: {:.4f}".format(l2_loss.item()))
    ml_logger.log_metric('Loss l2', l2_loss.item(), step='auto')
    ml_logger.log_metric('Acc l2', l2_acc, step='auto')
    data['l2'] = {
        'alpha': l2_point.cpu().numpy(),
        'loss': l2_loss.item(),
        'acc': l2_acc
    }

    print("Evaluate L2.5 norm optimization")
    l25_point, l25_loss, l25_acc = eval_pnorm(2.5)
    print("loss l2.5: {:.4f}".format(l25_loss.item()))
    ml_logger.log_metric('Loss l2.5', l25_loss.item(), step='auto')
    ml_logger.log_metric('Acc l2.5', l25_acc, step='auto')
    data['l2.5'] = {
        'alpha': l25_point.cpu().numpy(),
        'loss': l25_loss.item(),
        'acc': l25_acc
    }

    print("Evaluate L3 norm optimization")
    l3_point, l3_loss, l3_acc = eval_pnorm(3.)
    print("loss l3: {:.4f}".format(l3_loss.item()))
    ml_logger.log_metric('Loss l3', l3_loss.item(), step='auto')
    ml_logger.log_metric('Acc l3', l3_acc, step='auto')
    data['l3'] = {
        'alpha': l3_point.cpu().numpy(),
        'loss': l3_loss.item(),
        'acc': l3_acc
    }

    # Interpolate optimal p
    xp = np.linspace(1, 5, 50)
    z = np.polyfit([2, 2.5, 3], [l2_acc, l25_acc, l3_acc], 2)
    y = np.poly1d(z)
    p_intr = xp[np.argmax(y(xp))]
    print("p intr: {:.2f}".format(p_intr))
    ml_logger.log_metric('p intr', p_intr, step='auto')

    args.qtype = 'lp_norm'
    args.lp = p_intr
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    if args.bn_folding:
        print(
            "Applying batch-norm folding ahead of post-training quantization")
        # pdb.set_trace()
        from utils.absorb_bn import search_absorbe_bn
        search_absorbe_bn(inf_model.model)
    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    # Evaluate with optimal p
    lp_loss = inf_model.evaluate_calibration()
    lp_point = mq.get_clipping()
    # evaluate
    lp_acc = inf_model.validate()

    print("loss p intr: {:.4f}".format(lp_loss.item()))
    ml_logger.log_metric('Loss p intr', lp_loss.item(), step='auto')
    ml_logger.log_metric('Acc p intr', lp_acc, step='auto')

    global _eval_count, _min_loss
    _min_loss = lp_loss.item()

    idx = np.argmax([l2_acc, l25_acc, l3_acc, lp_acc])
    init = [l2_point, l25_point, l3_point, lp_point][idx]

    # run optimizer
    min_options = {}
    if args.maxiter is not None:
        min_options['maxiter'] = args.maxiter
    if args.maxfev is not None:
        min_options['maxfev'] = args.maxfev

    _iter = count(0)

    def local_search_callback(x):
        it = next(_iter)
        mq.set_clipping(x, inf_model.device)
        loss = inf_model.evaluate_calibration()
        print("\n[{}]: Local search callback".format(it))
        print("loss: {:.4f}\n".format(loss.item()))
        print(x)
        ml_logger.log_metric('Loss {}'.format(args.min_method),
                             loss.item(),
                             step='auto')

        # evaluate
        acc = inf_model.validate()
        ml_logger.log_metric('Acc {}'.format(args.min_method),
                             acc,
                             step='auto')

    args.min_method = "Powell"
    method = coord_descent if args.min_method == 'CD' else args.min_method
    res = opt.minimize(
        lambda scales: evaluate_calibration_clipped(scales, inf_model, mq),
        init.cpu().numpy(),
        method=method,
        options=min_options,
        callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format(args.min_method),
                         loss.item(),
                         step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')
    data['powell'] = {'alpha': scales, 'loss': loss.item(), 'acc': acc}

    print("Starting coordinate descent")
    args.min_method = "CD"
    min_options[
        'maxiter'] = 1  # Perform only one iteration of coordinate descent to avoid divergence
    _iter = count(0)
    global _eval_count
    _eval_count = count(0)
    _min_loss = lp_loss.item()
    mq.set_clipping(init, inf_model.device)
    # Run coordinate descent for comparison
    method = coord_descent
    res = opt.minimize(
        lambda scales: evaluate_calibration_clipped(scales, inf_model, mq),
        init.cpu().numpy(),
        method=method,
        options=min_options,
        callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format("CD"), loss.item(), step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format("CD"), acc, step='auto')
    data['cd'] = {'alpha': scales, 'loss': loss.item(), 'acc': acc}

    # save scales
    f_name = "scales_{}_W{}A{}.pkl".format(args.arch, args.bit_weights,
                                           args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
示例#6
0
def main(args):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    all_layers = []
    if args.bit_weights is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    id1 = 0
    id2 = 1
    layers = [all_layers[id1], all_layers[id2]]
    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    loss = inf_model.evaluate_calibration()
    print("loss: {:.4f}".format(loss.item()))
    max_point = mq.get_clipping()

    n = args.grid_resolution
    x = np.linspace(0.01, max_point[0].item(), n)
    y = np.linspace(0.01, max_point[1].item(), n)
    X, Y = np.meshgrid(x, y)
    Z = np.empty((n, n))
    for i, x_ in enumerate(tqdm(x)):
        for j, y_ in enumerate(y):
            # set clip value to qwrappers
            scales = np.array([X[i, j], Y[i, j]])
            mq.set_clipping(scales, inf_model.device)

            # evaluate with clipping
            loss = inf_model.evaluate_calibration()
            Z[i][j] = loss.item()

    max_point = np.concatenate([max_point.cpu().numpy(), loss.cpu().numpy()])

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch,
                             custom_resnet,
                             args.pretrained,
                             args.dataset,
                             args.gpu_ids,
                             args.datapath,
                             batch_size=args.batch_size,
                             shuffle=True,
                             workers=args.workers,
                             print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size,
                             cal_set_size=args.cal_set_size,
                             args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()
        point = np.concatenate([point.cpu().numpy(), loss.cpu().numpy()])

        del inf_model
        del mq
        return point

    del inf_model
    del mq
    l1_point = eval_pnorm(1.)
    print("loss l1: {:.4f}".format(l1_point[2]))

    l1_5_point = eval_pnorm(1.5)
    print("loss l1.5: {:.4f}".format(l1_5_point[2]))

    l2_point = eval_pnorm(2.)
    print("loss l2: {:.4f}".format(l2_point[2]))

    l2_5_point = eval_pnorm(2.5)
    print("loss l2.5: {:.4f}".format(l2_5_point[2]))

    l3_point = eval_pnorm(3.)
    print("loss l3: {:.4f}".format(l3_point[2]))

    f_name = "{}_l{}l{}_W{}A{}.pkl".format(args.arch, id1, id2,
                                           args.bit_weights, args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    data = {
        'X': X,
        'Y': Y,
        'Z': Z,
        'max_point': max_point,
        'l1_point': l1_point,
        'l1.5_point': l1_5_point,
        'l2_point': l2_point,
        'l2.5_point': l2_5_point,
        'l3_point': l3_point
    }
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if args.tag is not None:
        ml_logger.mlflow.log_param('tag', args.tag)

    enable_bcorr = False
    if args.bcorr_w:
        args.bcorr_w = False
        enable_bcorr = True

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    layers = []
    # TODO: make it more generic
    if args.bit_weights is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][1:-1]
    if args.bit_act is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][1:-1]

    replacement_factory = {nn.ReLU: ActivationModuleWrapperPost,
                           nn.ReLU6: ActivationModuleWrapperPost,
                           nn.Conv2d: ParameterModuleWrapperPost}

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    maxabs_loss = inf_model.evaluate_calibration()
    print("max loss: {:.4f}".format(maxabs_loss.item()))
    max_point = mq.get_clipping()
    ml_logger.log_metric('Loss max', maxabs_loss.item(), step='auto')

    # evaluate
    maxabs_acc = 0#inf_model.validate()
    ml_logger.log_metric('Acc maxabs', maxabs_acc, step='auto')
    data = {'max': {'alpha': max_point.cpu().numpy(), 'loss': maxabs_loss.item(), 'acc': maxabs_acc}}

    del inf_model
    del mq

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        # evaluate
        acc = inf_model.validate()

        del inf_model
        del mq

        return point, loss, acc

    def eval_pnorm_on_calibration(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        del inf_model
        del mq

        return point, loss

    # l2_point, l2_loss = eval_pnorm_on_calibration(2)
    # print("loss l2: {:.4f}".format(l2_loss.item()))
    # ml_logger.log_metric('Loss l2', l2_loss.item(), step='auto')
    # # l4_point, l4_loss = eval_pnorm_on_calibration(4)
    # print("loss l4: {:.4f}".format(l4_loss.item()))
    # ml_logger.log_metric('Loss l4', l4_loss.item(), step='auto')

    # args.qtype = 'lp_norm'
    # args.lp = p_intr
    # Fix the seed
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    opt_point = np.array([0.42811054, 1.27721779, 0.53149996, 1.51492159, 0.91115569,
       1.17987683, 1.13352566, 1.5227828 , 0.67026185, 0.75535328,
       0.54173654, 0.70824616, 0.44899457, 1.25257411, 0.68778409])
    start_point = 0.8*opt_point
    end_point = 1.5*opt_point
    k = 100
    step = (end_point - start_point) / k
    print("start")
    print(start_point)
    print("end")
    print(end_point)

    losses = []
    points = []
    for i in range(k+1):
        point = start_point + i * step
        mq.set_clipping(point, inf_model.device)
        loss = inf_model.evaluate_calibration()
        losses.append(loss.item())
        points.append(point)
        print("({}: loss) - {}".format(i, loss.item()))

    data = {'opt': opt_point, 'points': points, 'loss': losses}

    # save scales
    f_name = "quadratic_loss_{}_W{}A{}.pkl".format(args.arch, args.bit_weights, args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
示例#8
0
def main(args):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        inf_model = CnnModel(args.arch,
                             custom_resnet,
                             custom_inception,
                             args.pretrained,
                             args.dataset,
                             args.gpu_ids,
                             args.datapath,
                             batch_size=args.batch_size,
                             shuffle=True,
                             workers=args.workers,
                             print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size,
                             cal_set_size=args.cal_set_size,
                             args=args)

        all_layers = []
        if args.bit_weights is not None:
            all_layers += [
                n for n, m in inf_model.model.named_modules()
                if isinstance(m, nn.Conv2d)
            ][1:-1]
        if args.bit_act is not None:
            all_layers += [
                n for n, m in inf_model.model.named_modules()
                if isinstance(m, nn.ReLU)
            ][1:-1]
        if args.bit_act is not None and 'mobilenet' in args.arch:
            all_layers += [
                n for n, m in inf_model.model.named_modules()
                if isinstance(m, nn.ReLU6)
            ][1:-1]

        mq = ModelQuantizer(inf_model.model, args, all_layers,
                            replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        del inf_model
        del mq

        return point, loss

    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    all_layers = []
    if args.bit_weights is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        all_layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)

    p1 = torch.tensor([
        0.7677084, 1.7640269, 0.80914754, 2.044024, 0.87229156, 1.2659631,
        0.78454655, 1.3018194, 0.7894693, 0.92967707, 0.5754433, 0.9115604,
        0.5689196, 1.2382566, 0.601773
    ])
    p2 = torch.tensor([
        0.8135005, 1.7248632, 0.8009758, 2.005755, 0.83956134, 1.2431265,
        0.7720454, 1.3013302, 0.76733077, 0.96402454, 0.5914314, 0.9579072,
        0.56543064, 1.2535284, 0.6261679
    ])

    k = 50
    step = p1 - p2
    losses = []
    points = []
    for i in range(k + 1):
        point = p1 + 0.4 * i * step - 10 * step
        mq.set_clipping(point, inf_model.device)
        loss = inf_model.evaluate_calibration()
        losses.append(loss.item())
        points.append(point.cpu().numpy())
        print("({}: loss) - {}".format(i, loss.item()))

    f_name = "{}_W{}A{}_loss_conjugate_dir.pkl".format(args.arch,
                                                       args.bit_weights,
                                                       args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    data = {'start': p1.cpu().numpy(), 'loss': losses, 'points': points}
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
示例#9
0
def main_ratio(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    curr_best_acc = 0
    curr_best_scale_point = None

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    # pdb.set_trace()
    if args.bn_folding:
        print(
            "Applying batch-norm folding ahead of post-training quantization")
        # pdb.set_trace()
        from utils.absorb_bn import search_absorbe_bn
        search_absorbe_bn(inf_model.model)
    # pdb.set_trace()

    layers = []
    # TODO: make it more generic
    if args.bit_weights is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    loss = inf_model.evaluate_calibration()

    # evaluate
    max_acc = inf_model.validate()
    max_point = mq.get_clipping()
    # pdb.set_trace()
    if max_acc > curr_best_acc:
        curr_best_acc = max_acc
        curr_best_scale_point = max_point
    ml_logger.log_metric('Loss max', loss.item(), step='auto')
    ml_logger.log_metric('Acc max', max_acc, step='auto')
    data = {'max': {'alpha': max_point.cpu().numpy(), 'loss': loss.item()}}
    print("max loss: {:.4f}, max_acc: {:.4f}".format(loss.item(), max_acc))

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        inf_model = CnnModel(args.arch,
                             custom_resnet,
                             custom_inception,
                             args.pretrained,
                             args.dataset,
                             args.gpu_ids,
                             args.datapath,
                             batch_size=args.batch_size,
                             shuffle=True,
                             workers=args.workers,
                             print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size,
                             cal_set_size=args.cal_set_size,
                             args=args)

        if args.bn_folding:
            print(
                "Applying batch-norm folding ahead of post-training quantization"
            )
            # pdb.set_trace()
            from utils.absorb_bn import search_absorbe_bn
            search_absorbe_bn(inf_model.model)
        mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        # evaluate
        acc = inf_model.validate()

        del inf_model
        del mq

        return point, loss, acc

    del inf_model
    del mq

    l2_point, l2_loss, l2_acc = eval_pnorm(2.)
    print("loss l2: {:.4f}".format(l2_loss.item()))
    ml_logger.log_metric('Loss l2', l2_loss.item(), step='auto')
    ml_logger.log_metric('Acc l2', l2_acc, step='auto')
    data['l2'] = {
        'alpha': l2_point.cpu().numpy(),
        'loss': l2_loss.item(),
        'acc': l2_acc
    }
    if l2_acc > curr_best_acc:
        curr_best_acc = l2_acc
        curr_best_scale_point = l2_point

    l25_point, l25_loss, l25_acc = eval_pnorm(2.5)
    print("loss l2.5: {:.4f}".format(l25_loss.item()))
    ml_logger.log_metric('Loss l2.5', l25_loss.item(), step='auto')
    ml_logger.log_metric('Acc l2.5', l25_acc, step='auto')
    data['l2.5'] = {
        'alpha': l25_point.cpu().numpy(),
        'loss': l25_loss.item(),
        'acc': l25_acc
    }
    if l25_acc > curr_best_acc:
        curr_best_acc = l25_acc
        curr_best_scale_point = l25_point

    l3_point, l3_loss, l3_acc = eval_pnorm(3.)
    print("loss l3: {:.4f}".format(l3_loss.item()))
    ml_logger.log_metric('Loss l3', l3_loss.item(), step='auto')
    ml_logger.log_metric('Acc l3', l3_acc, step='auto')
    data['l3'] = {
        'alpha': l3_point.cpu().numpy(),
        'loss': l3_loss.item(),
        'acc': l3_acc
    }
    if l3_acc > curr_best_acc:
        curr_best_acc = l3_acc
        curr_best_scale_point = l3_point

    # Interpolate optimal p
    xp = np.linspace(1, 5, 50)
    z = np.polyfit([2, 2.5, 3], [l2_acc, l25_acc, l3_acc], 2)
    y = np.poly1d(z)
    p_intr = xp[np.argmax(y(xp))]
    print("p intr: {:.2f}".format(p_intr))
    ml_logger.log_metric('p intr', p_intr, step='auto')

    args.qtype = 'lp_norm'
    args.lp = p_intr
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    if args.bn_folding:
        print(
            "Applying batch-norm folding ahead of post-training quantization")
        # pdb.set_trace()
        from utils.absorb_bn import search_absorbe_bn
        search_absorbe_bn(inf_model.model)
    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    # Evaluate with optimal p
    lp_loss = inf_model.evaluate_calibration()
    lp_point = mq.get_clipping()
    # evaluate
    lp_acc = inf_model.validate()

    print("loss p intr: {:.4f}".format(lp_loss.item()))
    ml_logger.log_metric('Loss p intr', lp_loss.item(), step='auto')
    ml_logger.log_metric('Acc p intr', lp_acc, step='auto')
    if lp_acc > curr_best_acc:
        curr_best_acc = lp_acc
        curr_best_scale_point = lp_point

    global _eval_count, _min_loss
    _min_loss = lp_loss.item()

    idx = np.argmax([l2_acc, l25_acc, l3_acc, lp_acc])
    init = [l2_point, l25_point, l3_point, lp_point][idx]

    # run optimizer
    min_options = {}
    if args.maxiter is not None:
        min_options['maxiter'] = args.maxiter
    if args.maxfev is not None:
        min_options['maxfev'] = args.maxfev

    _iter = count(0)

    def local_search_callback(x):
        it = next(_iter)
        mq.set_clipping(x, inf_model.device)
        loss = inf_model.evaluate_calibration()
        print("\n[{}]: Local search callback".format(it))
        print("loss: {:.4f}\n".format(loss.item()))
        print(x)
        ml_logger.log_metric('Loss {}'.format(args.min_method),
                             loss.item(),
                             step='auto')

        # evaluate
        acc = inf_model.validate()
        ml_logger.log_metric('Acc {}'.format(args.min_method),
                             acc,
                             step='auto')

    args.min_method = "Powell"
    method = coord_descent if args.min_method == 'CD' else args.min_method
    res = opt.minimize(
        lambda scales: evaluate_calibration_clipped(scales, inf_model, mq),
        init.cpu().numpy(),
        method=method,
        options=min_options,
        callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format(args.min_method),
                         loss.item(),
                         step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')
    data['powell'] = {'alpha': scales, 'loss': loss.item(), 'acc': acc}
    if acc > curr_best_acc:
        curr_best_acc = acc
        curr_best_scale_point = scales

    print("Starting coordinate descent")
    args.min_method = "CD"
    _iter = count(0)
    global _eval_count
    _eval_count = count(0)
    _min_loss = lp_loss.item()
    mq.set_clipping(init, inf_model.device)
    # Run coordinate descent for comparison
    method = coord_descent
    res = opt.minimize(
        lambda scales: evaluate_calibration_clipped(scales, inf_model, mq),
        init.cpu().numpy(),
        method=method,
        options=min_options,
        callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format("CD"), loss.item(), step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format("CD"), acc, step='auto')
    data['cd'] = {'alpha': scales, 'loss': loss.item(), 'acc': acc}
    if acc > curr_best_acc:
        curr_best_acc = acc
        curr_best_scale_point = scales

    pdb.set_trace()
    if curr_best_scale_point.is_cuda:
        curr_best_scale_point = curr_best_scale_point.cpu()
    best_point = np.concatenate(
        [curr_best_scale_point,
         torch.tensor([curr_best_acc])])
    print("**** START LOSS GENERATION ****")
    print("best point:" + str(best_point))
    best_point_values = best_point[:-1]
    mq.set_clipping(best_point_values, inf_model.device)
    loss = inf_model.evaluate_calibration()
    # evaluate
    top1 = inf_model.validate()
    print("best point: loss, top1: {:.4f}, {}".format(loss.item(), top1))

    # best_point = curr_best_scale_point
    # best_point = mq.get_clipping()
    # best_point_values = curr_best_scale_point[:-1]
    # pdb.set_trace()
    n = args.grid_resolution

    min_ratio = args.min_ratio  # 0.8
    max_ratio = args.max_ratio  # 1.2

    x = np.linspace(min_ratio, max_ratio, n)
    # y = np.linspace(min_ratio, max_ratio, n)

    loss_best = loss
    # X, Y = np.meshgrid(x, y)
    Z_loss = np.empty(n)
    Z_top1 = np.empty(n)
    for i, x_ in enumerate(tqdm(x)):
        # set clip value to qwrappers
        scales_ratio = x_
        mq.set_clipping((best_point_values * scales_ratio), inf_model.device)

        if scales_ratio == 1.0:
            print(best_point_values * scales_ratio)
        # evaluate with clipping
        loss = inf_model.evaluate_calibration()
        Z_loss[i] = loss.item()
        Z_top1[i] = inf_model.validate()

        str1 = "[x, loss, top1] = [{}, {}, {}]".format(x[i], Z_loss[i],
                                                       Z_top1[i])
        print(str1)

    # pdb.set_trace()
    # best_point = np.concatenate([1.0, loss_best.cpu().numpy()])
    best_point_ratio = [1.0, loss_best.cpu().numpy()]
    print("best_point_ratio: " + str(best_point_ratio))
    # best_point = [best_point_values, loss_best.cpu().numpy()]
    # print("best point: " + str(best_point))
    print("best point values: " + str(best_point_values))

    f_name = "loss_generation_lapq_{}_W{}A{}.pkl".format(
        args.arch, 'ALL', None)
    dir_fullname = os.path.join(os.getcwd(), args.experiment)
    if not os.path.exists(dir_fullname):
        os.makedirs(dir_fullname)
    f = open(os.path.join(dir_fullname, f_name), 'wb')
    data = {
        'X': x,
        'Z_loss': Z_loss,
        'Z_top1': Z_top1,
        'best_point_ratio': best_point_ratio,
        'best_point': best_point_values
    }
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if args.tag is not None:
        ml_logger.mlflow.log_param('tag', args.tag)

    enable_bcorr = False
    if args.bcorr_w:
        args.bcorr_w = False
        enable_bcorr = True

    if args.init_method == 'random':
        args.qtype = 'max_static'

    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    inf_model = CnnModel(args.arch,
                         custom_resnet,
                         custom_inception,
                         args.pretrained,
                         args.dataset,
                         args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size,
                         shuffle=True,
                         workers=args.workers,
                         print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size,
                         cal_set_size=args.cal_set_size,
                         args=args)

    layers = []
    # TODO: make it more generic
    if args.bit_weights is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.Conv2d)
        ][1:-1]
    if args.bit_act is not None:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU)
        ][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [
            n for n, m in inf_model.model.named_modules()
            if isinstance(m, nn.ReLU6)
        ][1:-1]

    replacement_factory = {
        nn.ReLU: ActivationModuleWrapperPost,
        nn.ReLU6: ActivationModuleWrapperPost,
        nn.Conv2d: ParameterModuleWrapperPost
    }

    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    init_loss = inf_model.evaluate_calibration()

    if args.init_method == 'random':
        clip = mq.get_clipping()
        for i, c in enumerate(clip.cpu()):
            clip[i] = np.random.uniform(0, c)
        print("Randomize initial clipping")
        print(clip)
        mq.set_clipping(clip, inf_model.device)
        init_loss = inf_model.evaluate_calibration()

    print("init loss: {:.4f}".format(init_loss.item()))
    ml_logger.log_metric('Init loss', init_loss.item(), step='auto')

    acc = inf_model.validate()
    ml_logger.log_metric('Acc init', acc, step='auto')

    init = mq.get_clipping()

    global _eval_count, _min_loss
    _min_loss = init_loss.item()

    # if enable_bcorr:
    #     args.bcorr_w = True
    # inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
    #                      batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
    #                      cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)
    #
    # mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)

    # run optimizer
    min_options = {}
    if args.maxiter is not None:
        min_options['maxiter'] = args.maxiter
    if args.maxfev is not None:
        min_options['maxfev'] = args.maxfev

    _iter = count(0)

    def local_search_callback(x):
        it = next(_iter)
        mq.set_clipping(x, inf_model.device)
        loss = inf_model.evaluate_calibration()
        print("\n[{}]: Local search callback".format(it))
        print("loss: {:.4f}\n".format(loss.item()))
        print(x)
        ml_logger.log_metric('Loss {}'.format(args.min_method),
                             loss.item(),
                             step='auto')

        # evaluate
        acc = inf_model.validate()
        ml_logger.log_metric('Acc {}'.format(args.min_method),
                             acc,
                             step='auto')

    args.min_method = "Powell"
    method = coord_descent if args.min_method == 'CD' else args.min_method
    res = opt.minimize(
        lambda scales: evaluate_calibration_clipped(scales, inf_model, mq),
        init.cpu().numpy(),
        method=method,
        options=min_options,
        callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format(args.min_method),
                         loss.item(),
                         step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')
def main(args):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    args.qtype = 'max_static'
    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    custom_resnet = True
    custom_inception = True
    replacement_factory = {nn.ReLU: ActivationModuleWrapperPost,
                           nn.ReLU6: ActivationModuleWrapperPost,
                           nn.Conv2d: ParameterModuleWrapperPost}

    def eval_pnorm(p):
        args.qtype = 'lp_norm'
        args.lp = p
        # Fix the seed
        random.seed(args.seed)
        if not args.dont_fix_np_seed:
            np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
        cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                             batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                             cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

        all_layers = []
        if args.bit_weights is not None:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][1:-1]
        if args.bit_act is not None:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][1:-1]
        if args.bit_act is not None and 'mobilenet' in args.arch:
            all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][1:-1]

        mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)
        loss = inf_model.evaluate_calibration()
        point = mq.get_clipping()

        del inf_model
        del mq

        return point, loss

    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    inf_model = CnnModel(args.arch, custom_resnet, custom_inception, args.pretrained, args.dataset, args.gpu_ids,
                         args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    all_layers = []
    if args.bit_weights is not None:
        all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][1:-1]
    if args.bit_act is not None:
        all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        all_layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][1:-1]

    mq = ModelQuantizer(inf_model.model, args, all_layers, replacement_factory)

    start_point, start_loss = eval_pnorm(2)
    end_point, end_loss = eval_pnorm(4.5)
    k = 50
    step = (end_point - start_point) / k

    print("start")
    print(start_point)
    print("end")
    print(end_point)
    losses = []
    points = []
    for i in range(k+1):
        point = start_point + i * step
        mq.set_clipping(point, inf_model.device)
        loss = inf_model.evaluate_calibration()
        losses.append(loss.item())
        points.append(point.cpu().numpy())
        print("({}: loss) - {}".format(i, loss.item()))

    f_name = "{}_W{}A{}_loss_vs_clipping.pkl".format(args.arch, args.bit_weights, args.bit_act)
    f = open(os.path.join(proj_root_dir, 'data', f_name), 'wb')
    data = {'start': start_point.cpu().numpy(), 'end': end_point.cpu().numpy(), 'loss': losses, 'points': points}
    pickle.dump(data, f)
    f.close()
    print("Data saved to {}".format(f_name))
示例#12
0
def main(args, ml_logger):
    # Fix the seed
    random.seed(args.seed)
    if not args.dont_fix_np_seed:
        np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # create model
    # Always enable shuffling to avoid issues where we get bad results due to weak statistics
    inf_model = CnnModel(args.arch, args.custom_resnet, args.custom_inception,args.pretrained, args.dataset, args.gpu_ids, args.datapath,
                         batch_size=args.batch_size, shuffle=True, workers=args.workers, print_freq=args.print_freq,
                         cal_batch_size=args.cal_batch_size, cal_set_size=args.cal_set_size, args=args)

    layers = []
    # TODO: make it more generic
    if args.bit_weights is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.Conv2d)][1:-1]
    if args.bit_act is not None:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU)][1:-1]
    if args.bit_act is not None and 'mobilenet' in args.arch:
        layers += [n for n, m in inf_model.model.named_modules() if isinstance(m, nn.ReLU6)][1:-1]

    replacement_factory = {nn.ReLU: ActivationModuleWrapperPost,
                           nn.ReLU6: ActivationModuleWrapperPost,
                           nn.Conv2d: ParameterModuleWrapperPost}
    mq = ModelQuantizer(inf_model.model, args, layers, replacement_factory)
    # mq.log_quantizer_state(ml_logger, -1)

    print("init_method: {}, qtype {}".format(args.init_method, args.qtype))
    # initialize scales
    if args.init_method == 'dynamic':
        # evaluate to initialize dynamic clipping
        loss = inf_model.evaluate_calibration()
        print("Initial loss: {:.4f}".format(loss.item()))

        # get clipping values
        init = mq.get_clipping()
    else:
        if args.init_method == 'static':
            init = np.array([args.siv] * len(layers))
        elif args.init_method == 'random':
            init = np.random.uniform(0.5, 1., size=len(layers))  # TODO: pass range by argument
        else:
            raise RuntimeError("Invalid argument init_method {}".format(args.init_method))

        # set clip value to qwrappers
        mq.set_clipping(init, inf_model.device)
        print("scales initialization: {}".format(str(init)))

        # evaluate with clipping
        loss = inf_model.evaluate_calibration()
        print("Initial loss: {:.4f}".format(loss.item()))

    ml_logger.log_metric('Loss init'.format(args.min_method), loss.item(), step='auto')

    global _min_loss
    _min_loss = loss.item()

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc init', acc, step='auto')

    # run optimizer
    min_options = {}
    if args.maxiter is not None:
        min_options['maxiter'] = args.maxiter
    if args.maxfev is not None:
        min_options['maxfev'] = args.maxfev

    _iter = count(0)

    def local_search_callback(x):
        it = next(_iter)
        mq.set_clipping(x, inf_model.device)
        loss = inf_model.evaluate_calibration()
        print("\n[{}]: Local search callback".format(it))
        print("loss: {:.4f}\n".format(loss.item()))
        print(x)
        ml_logger.log_metric('Loss {}'.format(args.min_method), loss.item(), step='auto')

        # evaluate
        acc = inf_model.validate()
        ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')

    method = coord_descent if args.min_method == 'CD' else args.min_method
    res = opt.minimize(lambda scales: evaluate_calibration_clipped(scales, inf_model, mq), init.cpu().numpy(),
                       method=method, options=min_options, callback=local_search_callback)

    print(res)

    scales = res.x
    mq.set_clipping(scales, inf_model.device)
    loss = inf_model.evaluate_calibration()
    ml_logger.log_metric('Loss {}'.format(args.min_method), loss.item(), step='auto')

    # evaluate
    acc = inf_model.validate()
    ml_logger.log_metric('Acc {}'.format(args.min_method), acc, step='auto')