def fit_stump_universal(self, X, X_proj, y, gamma_1, gamma_infty, model, eps, coord, precision = 0.001, lambda_1 = 0.7):
        assert isinstance(eps, tuple) and len(eps) == 2

        trees_current_coord = self.coords_trees[coord] if coord in self.coords_trees else []
        w_rs, bs = np.zeros(len(trees_current_coord)), np.zeros(len(trees_current_coord))

        for i in range((len(trees_current_coord))):
            w_rs[i] = trees_current_coord[i].w_r
            bs[i] = trees_current_coord[i].b

        cumu_threshold_value = self.build_cumu_threshold_value()

        if gamma_1 is None:
            gamma_1 = self.certify_Lp_bound(X, y, eps[0], cumu_threshold_value, (coord,), 1, precision)

        intervals_j = cumu_threshold_value[coord] if coord in cumu_threshold_value else [(-np.inf, np.inf, 0)]
        n_bins = self.n_bins

        b_vals_universal = np.arange(1, n_bins) / n_bins
        # to have some margin to make the thresholds not adversarially reachable from 0 or 1
        b_vals_universal[b_vals_universal < 0.5] += 0.1 * 1 / n_bins
        b_vals_universal[b_vals_universal > 0.5] -= 0.1 * 1 / n_bins

        intervals = tuple([(i[0], i[1]) for i in intervals_j])
        values = tuple([i[2] for i in intervals_j])
        # fit_robust_exact_stumps_universal(X_proj, y, y_min_without_j, threshold_cumu_value, gamma, b_vals, eps, w_rs, bs, max_weight, precision, verbose = False):
        losses_universal, w_l_vals_universal, w_r_vals_universal, b_vals_universal = fit_robust_exact_stumps_universal(X_proj, y, gamma_1, intervals, values, gamma_infty, b_vals_universal, eps, w_rs, bs, self.max_weight, precision, lambda_1, verbose = False)
        

        min_loss = np.min(losses_universal)
        # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
        indices_opt_init = np.sort(np.where(losses_universal == min_loss)[0])
        indices_opt = get_contiguous_indices(indices_opt_init)

        id_opt = indices_opt[len(indices_opt) // 2]

        loss = losses_universal[id_opt]

        # if id_opt >= len(b_vals_1):
        # w_l, w_r, b = w_l_vals_infty[id_opt - len(b_vals_1)], w_r_vals_infty[id_opt - len(b_vals_1)], b_vals_infty[id_opt - len(b_vals_1)]
        # else:
        w_l, w_r, b = w_l_vals_universal[id_opt], w_r_vals_universal[id_opt], b_vals_universal[id_opt]

        if np.abs(loss - min_loss) > 1e7:
            print('New loss: {:.5f}, min loss before: {:.5f}'.format(loss, min_loss))

        # best_loss = losses[id_opt]
        # return [best_loss, w_l_opt, w_r_opt, b_opt, coord]

        return [loss, w_l, w_r, b, coord]
Пример #2
0
    def fit_stump(self, X, y, gamma, model, eps):
        n_trials_coord = self.n_trials_coord
        X, y, gamma = X.astype(dtype), y.astype(dtype), gamma.astype(dtype)

        num, dim = X.shape
        params, min_losses = np.zeros(
            (n_trials_coord, 4)), np.full(n_trials_coord, np.inf)

        # 151 features are always 0.0 on MNIST 2 vs 6. Doesn't even makes sense to consider them.
        idx_non_trivial = np.abs(X).sum(axis=0) > 0.0
        features_to_check = list(np.arange(dim)[idx_non_trivial])
        np.random.shuffle(features_to_check)  # shuffles in-place
        for trial in prange(n_trials_coord):
            if len(features_to_check) > 0:
                coord = features_to_check.pop()  # takes the last element
            else:
                n_trials_coord = trial
                break
            X_proj = X[:, coord]

            min_val = 1e-7
            threshold_candidates = np.sort(np.copy(X_proj))
            if self.min_samples_leaf > 0:
                threshold_candidates = threshold_candidates[
                    self.min_samples_leaf:-self.min_samples_leaf]
            if len(threshold_candidates
                   ) == 0:  # if no samples left according to min_samples_leaf
                min_losses[trial] = np.inf
                params[trial, :] = [0.0, 0.0, 0.0, -1]
                continue

            if model not in ['robust_bound'] or eps == 0.0:  # plain training
                b_vals = np.copy(threshold_candidates)
                b_vals += min_val  # to break the ties
            else:  # robust training
                b_vals = np.concatenate(
                    (threshold_candidates - eps, threshold_candidates + eps),
                    axis=0)
                # to make in the overlapping case |---x-|--|-x---| output 2 different losses in the middle
                n_bs = len(threshold_candidates)
                b_vals += np.concatenate(
                    (-np.full(n_bs, min_val), np.full(n_bs, min_val)), axis=0)
            b_vals = np.unique(b_vals)  # use only unique b's
            b_vals = np.sort(
                b_vals
            )  # still important to sort because of the final threshold selection

            if model == 'plain':
                losses, w_l_vals, w_r_vals, b_vals = self.fit_plain_stumps(
                    X_proj, y, gamma, b_vals)
            elif model == 'robust_bound':
                losses, w_l_vals, w_r_vals, b_vals = self.fit_robust_bound_stumps(
                    X_proj, y, gamma, b_vals, eps)
            else:
                raise ValueError('wrong model')

            min_loss = np.min(losses)
            # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
            indices_opt_init = np.sort(np.where(losses == min_loss)[0])
            indices_opt = get_contiguous_indices(indices_opt_init)
            id_opt = indices_opt[len(indices_opt) // 2]

            idx_prev = np.clip(indices_opt[0] - 1, 0,
                               len(b_vals) -
                               1)  # to prevent stepping out of the array
            idx_next = np.clip(indices_opt[-1] + 1, 0,
                               len(b_vals) -
                               1)  # to prevent stepping out of the array
            b_prev, w_l_prev, w_r_prev = b_vals[idx_prev], w_l_vals[
                idx_prev], w_r_vals[idx_prev]
            b_next, w_l_next, w_r_next = b_vals[idx_next], w_l_vals[
                idx_next], w_r_vals[idx_next]
            # initialization
            b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[
                indices_opt[-1]]
            # more involved, since with +-eps, an additional check of the loss is needed
            if model == 'plain':
                b_rightmost = b_next
            elif model in ['robust_bound']:
                b_prev_half = (b_prev + b_vals[indices_opt[0]]) / 2
                loss_prev_half = exp_loss_robust(X_proj, y, gamma, w_l_prev,
                                                 w_r_prev, [], [], b_prev_half,
                                                 eps, False)

                b_next_half = (b_vals[indices_opt[-1]] + b_next) / 2
                loss_next_half = exp_loss_robust(X_proj, y, gamma, w_l_next,
                                                 w_r_next, [], [], b_next_half,
                                                 eps, False)

                # we extend the interval of the constant loss to the left and to the right if there the loss is
                # the same at b_prev_half or b_next_half
                if loss_prev_half == losses[id_opt]:
                    b_leftmost = b_prev
                if loss_next_half == losses[id_opt]:
                    b_rightmost = b_next
            else:
                raise ValueError('wrong model')
            # we put in the middle of the interval of the constant loss
            b_opt = (b_leftmost + b_rightmost) / 2
            # note: now inf can easily happen if e.g. all examples at some subtree are < eps (happens on MNIST)
            # if (losses == np.nan).sum() > 0:
            #     pdb.set_trace()

            # w_l_opt, w_r_opt, b_opt = w_l_vals[id_opt], w_r_vals[id_opt], b_vals[id_opt]

            # For the chosen threshold, we need to calculate w_l, w_r
            # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
            b_val_final = np.array([b_opt])
            if model == 'plain':
                loss, w_l_opt, w_r_opt, _ = self.fit_plain_stumps(
                    X_proj, y, gamma, b_val_final)
            elif model == 'robust_bound':
                loss, w_l_opt, w_r_opt, _ = self.fit_robust_bound_stumps(
                    X_proj, y, gamma, b_val_final, eps)
            else:
                raise ValueError('wrong model')
            loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]

            # recalculation of w_l, w_r shouldn't change the min loss
            if np.abs(loss - min_loss) > 1e7:
                print('New loss: {:.5f}, min loss before: {:.5f}'.format(
                    loss, min_loss))

            min_losses[trial] = losses[id_opt]
            params[trial, :] = [w_l_opt, w_r_opt, b_opt, coord]

        id_best_coord = min_losses[:n_trials_coord].argmin()
        best_coord = int(params[id_best_coord]
                         [3])  # float to int is necessary for a coordinate
        return params[id_best_coord][0], params[id_best_coord][1], params[
            id_best_coord][2], best_coord
    def fit_stump(self, X, y, gamma_global, model, eps):
        n_trials_coord = self.n_trials_coord
        X, y, gamma_global = X.astype(dtype), y.astype(
            dtype), gamma_global.astype(dtype)

        num, dim = X.shape
        params, min_losses = np.zeros(
            (n_trials_coord, 4)), np.full(n_trials_coord, np.inf)

        # 151 features are always 0.0 on MNIST 2 vs 6. Doesn't even makes sense to consider them.
        idx_non_trivial = np.abs(X).sum(axis=0) > 0.0
        features_to_check = list(np.arange(dim)[idx_non_trivial])
        np.random.shuffle(features_to_check)  # shuffles in-place
        for trial in prange(n_trials_coord):
            if len(features_to_check) > 0:
                coord = features_to_check.pop()  # takes the last element
            else:
                self.n_trials_coord = trial
                break
            X_proj = X[:, coord]

            # Needed for exact robust optimization with stumps
            trees_current_coord = self.coords_trees[
                coord] if coord in self.coords_trees else []
            w_rs, bs = np.zeros(len(trees_current_coord)), np.zeros(
                len(trees_current_coord))
            for i in range(len(trees_current_coord)):
                w_rs[i] = trees_current_coord[i].w_r
                bs[i] = trees_current_coord[i].b

            if model == 'robust_exact' and trees_current_coord != []:  # note: the previous gamma is just ignored
                min_Fx_y_exact_without_j = self.certify_exact(
                    X, y, eps, coords_to_ignore=(coord, ))
                w_ls = np.sum([tree.w_l for tree in trees_current_coord])
                gamma = np.exp(-min_Fx_y_exact_without_j - y * w_ls)
            else:
                gamma = gamma_global

            min_val = 1e-7
            if model not in ['robust_exact', 'robust_bound'
                             ] or eps == 0.0:  # plain training
                b_vals = np.copy(X_proj)
                b_vals += min_val  # to break the ties
            else:  # robust training
                b_vals = np.concatenate((X_proj - eps, X_proj + eps),
                                        axis=0)  # 2n thresholds
                # to make in the overlapping case |---x-|--|-x---| output 2 different losses in the middle
                b_vals += np.concatenate(
                    (-np.full(num, min_val), np.full(num, min_val)), axis=0)
            b_vals = np.unique(b_vals)  # use only unique b's
            b_vals = np.sort(
                b_vals
            )  # still important to sort because of the final threshold selection

            if model == 'plain':
                losses, w_l_vals, w_r_vals, b_vals = self.fit_plain_stumps(
                    X_proj, y, gamma, b_vals)
            elif model == 'robust_bound':
                losses, w_l_vals, w_r_vals, b_vals = self.fit_robust_bound_stumps(
                    X_proj, y, gamma, b_vals, eps)
            elif model == 'robust_exact':
                losses, w_l_vals, w_r_vals, b_vals = self.fit_robust_exact_stumps(
                    X_proj, y, gamma, b_vals, eps, w_rs, bs)
            else:
                raise ValueError('wrong model')

            min_loss = np.min(losses)
            # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
            indices_opt_init = np.sort(np.where(losses == min_loss)[0])
            indices_opt = get_contiguous_indices(indices_opt_init)
            id_opt = indices_opt[len(indices_opt) // 2]

            idx_prev = np.clip(indices_opt[0] - 1, 0,
                               len(b_vals) -
                               1)  # to prevent stepping out of the array
            idx_next = np.clip(indices_opt[-1] + 1, 0,
                               len(b_vals) -
                               1)  # to prevent stepping out of the array
            b_prev, w_l_prev, w_r_prev = b_vals[idx_prev], w_l_vals[
                idx_prev], w_r_vals[idx_prev]
            b_next, w_l_next, w_r_next = b_vals[idx_next], w_l_vals[
                idx_next], w_r_vals[idx_next]
            # initialization
            b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[
                indices_opt[-1]]
            # more involved, since with +-eps, an additional check of the loss is needed
            if model == 'plain':
                b_rightmost = b_next
            elif model in ['robust_bound', 'robust_exact']:
                h_flag = False if model == 'robust_bound' else True

                b_prev_half = (b_prev + b_vals[indices_opt[0]]) / 2
                loss_prev_half = exp_loss_robust(X_proj, y, gamma, w_l_prev,
                                                 w_r_prev, w_rs, bs,
                                                 b_prev_half, eps, h_flag)

                b_next_half = (b_vals[indices_opt[-1]] + b_next) / 2
                loss_next_half = exp_loss_robust(X_proj, y, gamma, w_l_next,
                                                 w_r_next, w_rs, bs,
                                                 b_next_half, eps, h_flag)

                # we extend the interval of the constant loss to the left and to the right if there the loss is
                # the same at b_prev_half or b_next_half
                if loss_prev_half == losses[id_opt]:
                    b_leftmost = b_prev
                if loss_next_half == losses[id_opt]:
                    b_rightmost = b_next
            else:
                raise ValueError('wrong model')
            # we put in the middle of the interval of the constant loss
            b_opt = (b_leftmost + b_rightmost) / 2

            # For the chosen threshold, we need to calculate w_l, w_r
            # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
            b_val_final = np.array([b_opt])
            if model == 'plain':
                loss, w_l_opt, w_r_opt, _ = self.fit_plain_stumps(
                    X_proj, y, gamma, b_val_final)
            elif model == 'robust_bound':
                loss, w_l_opt, w_r_opt, _ = self.fit_robust_bound_stumps(
                    X_proj, y, gamma, b_val_final, eps)
            elif model == 'robust_exact':
                loss, w_l_opt, w_r_opt, _ = self.fit_robust_exact_stumps(
                    X_proj, y, gamma, b_val_final, eps, w_rs, bs)
            else:
                raise ValueError('wrong model')
            loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]
            # recalculation of w_l, w_r shouldn't change the min loss

            if np.abs(loss - min_loss) > 1e7:
                print('New loss: {:.5f}, min loss before: {:.5f}'.format(
                    loss, min_loss))

            min_losses[trial] = losses[id_opt]
            params[trial, :] = [w_l_opt, w_r_opt, b_opt, coord]

        id_best_coord = min_losses[:n_trials_coord].argmin()
        best_coord = int(params[id_best_coord]
                         [3])  # float to int is necessary for a coordinate
        w_l, w_r, b, coord = params[id_best_coord][0], params[id_best_coord][
            1], params[id_best_coord][2], best_coord
        stump = Stump(w_l, w_r, b, coord)
        return stump
def fit_stump(X_proj, y, gamma, model, eps, coord, n_bins, min_samples_leaf, max_weight):
    min_prec_val = 1e-7
    min_val, max_val = 0.0, 1.0  # can be changed if the features are in a different range

    if n_bins > 0:
        if model == 'robust_bound':
            # e.g. that's the thresholds that one gets with n_bins=10: [0.31, 0.41, 0.5, 0.59, 0.69]
            b_vals = np.arange(eps*n_bins, n_bins - eps*n_bins + 1) / n_bins
            # to have some margin to make the thresholds not adversarially reachable from 0 or 1
            b_vals[b_vals < 0.5] += 0.1 * 1/n_bins
            b_vals[b_vals > 0.5] -= 0.1 * 1/n_bins
        else:
            b_vals = np.arange(1, n_bins) / n_bins
    else:
        threshold_candidates = np.sort(X_proj)
        if min_samples_leaf > 0:
            threshold_candidates = threshold_candidates[min_samples_leaf:-min_samples_leaf]
        if len(threshold_candidates) == 0:  # if no samples left according to min_samples_leaf
            return [np.inf, 0.0, 0.0, 0.0, -1]
        if model not in ['robust_bound'] or eps == 0.0:  # plain or da_uniform training
            b_vals = np.copy(threshold_candidates)
            b_vals += min_prec_val  # to break the ties
        else:  # robust training
            b_vals = np.concatenate((threshold_candidates - eps, threshold_candidates + eps), axis=0)
            b_vals = np.clip(b_vals, min_val, max_val)  # save computations (often goes 512 -> 360 thresholds on MNIST)
            # to make in the overlapping case [---x-[--]-x---] output 2 different losses in the middle
            n_bs = len(threshold_candidates)
            b_vals += np.concatenate((-np.full(n_bs, min_prec_val), np.full(n_bs, min_prec_val)), axis=0)
        b_vals = np.unique(b_vals)  # use only unique b's
        b_vals = np.sort(b_vals)  # still important to sort because of the final threshold selection

    if model in ['plain', 'da_uniform', 'at_cube']:
        losses, w_l_vals, w_r_vals, b_vals = fit_plain_stumps(X_proj, y, gamma, b_vals, max_weight)
    elif model == 'robust_bound':
        losses, w_l_vals, w_r_vals, b_vals = fit_robust_bound_stumps(X_proj, y, gamma, b_vals, eps, max_weight)
    else:
        raise ValueError('wrong model')

    min_loss = np.min(losses)
    # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
    indices_opt_init = np.sort(np.where(losses == min_loss)[0])
    indices_opt = get_contiguous_indices(indices_opt_init)
    id_opt = indices_opt[len(indices_opt) // 2]

    idx_prev = np.clip(indices_opt[0] - 1, 0, len(b_vals) - 1)  # to prevent stepping out of the array
    idx_next = np.clip(indices_opt[-1] + 1, 0, len(b_vals) - 1)  # to prevent stepping out of the array
    b_prev, w_l_prev, w_r_prev = b_vals[idx_prev], w_l_vals[idx_prev], w_r_vals[idx_prev]
    b_next, w_l_next, w_r_next = b_vals[idx_next], w_l_vals[idx_next], w_r_vals[idx_next]
    # initialization
    b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[indices_opt[-1]]

    if n_bins > 0:  # note that one shouldn't average thresholds since it's unpredictable what is in between
        return [min_loss, w_l_vals[id_opt], w_r_vals[id_opt], b_vals[id_opt], coord]

    # more involved, since with +-eps, an additional check of the loss is needed
    if model in ['plain', 'da_uniform', 'at_cube']:
        b_rightmost = b_next
    elif model in ['robust_bound']:
        b_prev_half = (b_prev + b_vals[indices_opt[0]]) / 2
        loss_prev_half = exp_loss_robust(X_proj, y, gamma, w_l_prev, w_r_prev, [], [], b_prev_half, eps, False)

        b_next_half = (b_vals[indices_opt[-1]] + b_next) / 2
        loss_next_half = exp_loss_robust(X_proj, y, gamma, w_l_next, w_r_next, [], [], b_next_half, eps, False)

        # we extend the interval of the constant loss to the left and to the right if there the loss is
        # the same at b_prev_half or b_next_half
        if loss_prev_half == losses[id_opt]:
            b_leftmost = b_prev
        if loss_next_half == losses[id_opt]:
            b_rightmost = b_next
    else:
        raise ValueError('wrong model')
    # we put in the middle of the interval of the constant loss
    b_opt = (b_leftmost + b_rightmost) / 2

    # For the chosen threshold, we need to calculate w_l, w_r
    # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
    b_val_final = np.array([b_opt])
    if model in ['plain', 'da_uniform', 'at_cube']:
        loss, w_l_opt, w_r_opt, _ = fit_plain_stumps(X_proj, y, gamma, b_val_final, max_weight)
    elif model == 'robust_bound':
        loss, w_l_opt, w_r_opt, _ = fit_robust_bound_stumps(X_proj, y, gamma, b_val_final, eps, max_weight)
    else:
        raise ValueError('wrong model')
    loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]

    # recalculation of w_l, w_r shouldn't change the min loss
    if np.abs(loss - min_loss) > 1e7:
        print('New loss: {:.5f}, min loss before: {:.5f}'.format(loss, min_loss))

    best_loss = losses[id_opt]
    return [best_loss, w_l_opt, w_r_opt, b_opt, coord]
def fit_stump_Lp(X_index, X, X_proj, y, leaf_nodes, eps, max_eps, budget, coord, order, n_bins, min_samples_leaf, max_weight, box):
    min_prec_val = 1e-7
    min_val, max_val = 0.0, 1.0

    # pre_leaves = leaf_nodes

    if n_bins > 0:
        # print(eps, n_bins)
        b_vals = np.arange(0, n_bins) / n_bins
        # to have some margin to make the thresholds not adversarially reachable from 0 or 1
        b_vals[b_vals < 0.5] += 0.1 * 1/n_bins
        b_vals[b_vals > 0.5] -= 0.1 * 1/n_bins

    # boxes = [leaf.box.intervals for leaf in leaf_nodes]
    # values = [leaf.value for leaf in leaf_nodes]
    # min_value_maps = [leaf.y_min_value_map for leaf in leaf]

    losses, w_l_vals, w_r_vals, b_vals = fit_robust_bound_stumps_tree_Lp(X_index, X, X_proj, y, budget, coord, leaf_nodes, b_vals, eps, max_eps, order, max_weight, box)
    
    # print(losses)
    min_loss = np.min(losses)
    # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
    indices_opt_init = np.sort(np.where(losses == min_loss)[0])
    indices_opt = get_contiguous_indices(indices_opt_init)
    id_opt = indices_opt[len(indices_opt) // 2]

    idx_prev = np.clip(indices_opt[0] - 1, 0, len(b_vals) - 1)  # to prevent stepping out of the array
    idx_next = np.clip(indices_opt[-1] + 1, 0, len(b_vals) - 1)  # to prevent stepping out of the array
    b_prev, w_l_prev, w_r_prev = b_vals[idx_prev], w_l_vals[idx_prev], w_r_vals[idx_prev]
    b_next, w_l_next, w_r_next = b_vals[idx_next], w_l_vals[idx_next], w_r_vals[idx_next]
    # initialization
    b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[indices_opt[-1]]

    if n_bins > 0:  # note that one shouldn't average thresholds since it's unpredictable what is in between
        return [min_loss, w_l_vals[id_opt], w_r_vals[id_opt], b_vals[id_opt], coord]

    # more involved, since with +-eps, an additional check of the loss is needed
    # if model in ['plain', 'da_uniform', 'at_cube']:
    #     b_rightmost = b_next
    # elif model in ['robust_bound']:
    #     b_prev_half = (b_prev + b_vals[indices_opt[0]]) / 2
    #     loss_prev_half = exp_loss_robust(X_proj, y, gamma, w_l_prev, w_r_prev, [], [], b_prev_half, eps, False)

    #     b_next_half = (b_vals[indices_opt[-1]] + b_next) / 2
    #     loss_next_half = exp_loss_robust(X_proj, y, gamma, w_l_next, w_r_next, [], [], b_next_half, eps, False)

    #     # we extend the interval of the constant loss to the left and to the right if there the loss is
    #     # the same at b_prev_half or b_next_half
    #     if loss_prev_half == losses[id_opt]:
    #         b_leftmost = b_prev
    #     if loss_next_half == losses[id_opt]:
    #         b_rightmost = b_next
    # else:
    #     raise ValueError('wrong model')
    # we put in the middle of the interval of the constant loss
    b_opt = (b_leftmost + b_rightmost) / 2

    # For the chosen threshold, we need to calculate w_l, w_r
    # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
    b_val_final = np.array([b_opt])
    # if model in ['plain', 'da_uniform', 'at_cube']:
    #     loss, w_l_opt, w_r_opt, _ = fit_plain_stumps(X_proj, y, gamma, b_val_final, max_weight)
    # elif model == 'robust_bound':
    #     loss, w_l_opt, w_r_opt, _ = fit_robust_bound_stumps(X_proj, y, gamma, b_val_final, eps, max_weight)
    # else:
    #     raise ValueError('wrong model')

    # loss, w_l_opt, w_r_opt, _ = fit_robust_bound_stumps_tree_Lp(X_index, X, X_proj, y, budget, coord, leaf_nodes, b_val_final, eps, order, max_weight, box)
    loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]

    # recalculation of w_l, w_r shouldn't change the min loss
    if np.abs(loss - min_loss) > 1e7:
        print('New loss: {:.5f}, min loss before: {:.5f}'.format(loss, min_loss))

    best_loss = losses[id_opt]
    return [best_loss, w_l_opt, w_r_opt, b_opt, coord]
    def fit_stump_Lp(self, X, X_proj, y, gamma_global, model, eps, coord, order = 1, precision = 0.02):
        trees_current_coord = self.coords_trees[coord] if coord in self.coords_trees else []
        w_rs, bs = np.zeros(len(trees_current_coord)), np.zeros(len(trees_current_coord))

        for i in range((len(trees_current_coord))):
            w_rs[i] = trees_current_coord[i].w_r
            bs[i] = trees_current_coord[i].b

        cumu_threshold_value = self.build_cumu_threshold_value()
        # ori_y_without_feature_j = self.predict(X, (coord,))

        begin = time.time()
        if gamma_global is None:
            gamma_global = self.certify_Lp_bound(X, y, eps, cumu_threshold_value, (coord,), order, precision)
        # print("certification: {}".format(time.time() - begin))

        # print(y_min_without_feature_j)

        intervals_j = cumu_threshold_value[coord] if coord in cumu_threshold_value else [(-np.inf, np.inf, 0)]
        # print(coord, intervals_j)
        # sum_w_l = np.sum([tree.w_l for tree in trees_current_coord])
        # intervals_j = [(a, b, value) for (a, b, value) in intervals_j] # value of each interval(ignore the newly built stump)

        # print(intervals_j)
        
        n_bins = self.n_bins

        if eps < 0.5:
            b_vals = np.arange(eps * n_bins, n_bins - eps * n_bins + 1) / n_bins
        else:
            b_vals = np.arange(1, n_bins) / n_bins
        # to have some margin to make the thresholds not adversarially reachable from 0 or 1
        b_vals[b_vals < 0.5] += 0.1 * 1 / n_bins
        b_vals[b_vals > 0.5] -= 0.1 * 1 / n_bins

        # gamma = y_min_without_feature_j # y.shape[0] * C
        intervals = tuple([(i[0], i[1]) for i in intervals_j])
        values = tuple([i[2] for i in intervals_j])

        # print(intervals, values)

        begin = time.time()
        losses, w_l_vals, w_r_vals, b_vals = fit_robust_exact_stumps_Lp(X_proj, y, gamma_global, intervals, values, b_vals, eps, w_rs, bs, self.max_weight, order, precision)
        # print("fit stumps: {}".format(time.time() - begin))
        # print(losses, w_l_vals)

        min_loss = np.min(losses)
        # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
        indices_opt_init = np.sort(np.where(losses == min_loss)[0])
        indices_opt = get_contiguous_indices(indices_opt_init)
        id_opt = indices_opt[len(indices_opt) // 2]

        # initialization
        b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[indices_opt[-1]]
        b_opt = (b_leftmost + b_rightmost) / 2

        # For the chosen threshold, we need to calculate w_l, w_r
        # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
        b_val_final = np.array([b_opt])
        # print('-----------------{}-----------------'.format(coord))
        loss, w_l_opt, w_r_opt, _ = fit_robust_exact_stumps_Lp(X_proj, y, gamma_global, intervals, values, b_val_final, eps, w_rs, bs, self.max_weight, order, precision, verbose = False)
        # else:
        #     raise ValueError('wrong model')
        loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]
        # recalculation of w_l, w_r shouldn't change the min loss

        if np.abs(loss - min_loss) > 1e7:
            print('New loss: {:.5f}, min loss before: {:.5f}'.format(loss, min_loss))

        best_loss = losses[id_opt]
        return [best_loss, w_l_opt, w_r_opt, b_opt, coord]
    def fit_stump_L0(self, X, X_proj, y, eps, coord):
        trees_current_coord = self.coords_trees[coord] if coord in self.coords_trees else []
        threshold_value ={}

        pre_y_min_1 = self.certify_exact_norm_zero(X, y, eps - 1, (coord,))
        pre_y_min_0 = self.certify_exact_norm_zero(X, y, eps, (coord,))

        # pre_y_min_1 = self.certify_exact_norm_zero(X, y, eps - 1, coord)
        # pre_y_min_0 = self.certify_exact_norm_zero(X, y, eps, coord)

        sum_w_l = 0
        for tree in trees_current_coord:
            sum_w_l += tree.w_l
            threshold_value[tree.b] = tree.w_r

        # print(threshold_value)
        sorted_thresholds = sorted(list(threshold_value.items()), key=lambda x: x[0])
        sorted_thresholds.insert(0, (-np.inf, 0))
        sorted_thresholds.append((np.inf, np.inf))

        interval_value = []
        pre_value = 0
        ori_value = np.zeros_like(y)
        # min_value = np.zeros_like(y) + np.inf
        for i, (b, value) in enumerate(sorted_thresholds[:-1]):
            pre_value += value
            interval_value.append((b, sorted_thresholds[i + 1][0], pre_value + sum_w_l))
            ori_value[b <= X_proj] = y[b <= X_proj] * pre_value
        #     min_value = np.minimum(min_value, y * pre_value)

        n_bins = self.n_bins
        b_vals = np.arange(0, n_bins) / n_bins
        # to have some margin to make the thresholds not adversarially reachable from 0 or 1
        b_vals[b_vals < 0.5] += 0.1 * 1 / n_bins
        b_vals[b_vals > 0.5] -= 0.1 * 1 / n_bins

        # print(b_vals)

        losses, w_l_vals, w_r_vals, b_vals = fit_robust_exact_stumps_L0(X_proj, y, pre_y_min_0, pre_y_min_1, interval_value, ori_value, b_vals, self.max_weight)

        # print(losses)
        
        min_loss = np.min(losses)
        # probably, they are already sorted, but to be 100% sure since it is not explicitly mentioned in the docs
        indices_opt_init = np.sort(np.where(losses == min_loss)[0])
        indices_opt = get_contiguous_indices(indices_opt_init)
        id_opt = indices_opt[len(indices_opt) // 2]

        # idx_prev = np.clip(indices_opt[0]-1, 0, len(b_vals)-1)  # to prevent stepping out of the array
        # idx_next = np.clip(indices_opt[-1]+1, 0, len(b_vals)-1)  # to prevent stepping out of the array
        # b_prev, w_l_prev, w_r_prev = b_vals[idx_prev], w_l_vals[idx_prev], w_r_vals[idx_prev]
        # b_next, w_l_next, w_r_next = b_vals[idx_next], w_l_vals[idx_next], w_r_vals[idx_next]
        # initialization
        b_leftmost, b_rightmost = b_vals[indices_opt[0]], b_vals[indices_opt[-1]]
        # more involved, since with +-eps, an additional check of the loss is needed

        # we put in the middle of the interval of the constant loss
        b_opt = (b_leftmost + b_rightmost) / 2

        # For the chosen threshold, we need to calculate w_l, w_r
        # Some of w_l, w_r that correspond to min_loss may not be optimal anymore
        b_val_final = np.array([b_opt])
        loss, w_l_opt, w_r_opt, _ = fit_robust_exact_stumps_L0(X_proj, y, pre_y_min_0, pre_y_min_1, interval_value, ori_value, b_val_final, self.max_weight)
        loss, w_l_opt, w_r_opt = loss[0], w_l_opt[0], w_r_opt[0]
        # recalculation of w_l, w_r shouldn't change the min loss

        if np.abs(loss - min_loss) > 1e7:
            print('New loss: {:.5f}, min loss before: {:.5f}'.format(loss, min_loss))

        best_loss = losses[id_opt]
        return [best_loss, w_l_opt, w_r_opt, b_opt, coord]