示例#1
0
    def __init__(self, loss, penalty, active_groups, inactive_groups, randomization, solve_args={'min_its':50, 'tol':1.e-10},
                 beta_active=None):
        """
        penalty is a group_lasso object that assigns weights to groups
        """

        (self.loss,
         self.penalty,
         self.active_groups,
         self.inactive_groups,
         self.randomization,
         self.solve_args,
         self.beta_active) = (loss,
                              penalty,
                              active_groups,
                              inactive_groups,
                              randomization,
                              solve_args,
                              beta_active)
         
        self.active = np.zeros(self.loss.shape, np.bool)
        for i, g in enumerate(np.unique(self.penalty.groups)):
            if self.active_groups[i]:
                self.active[self.penalty.groups == g] = True

        self.inactive = ~self.active

        # we form a dual group lasso object
        # to compute the max score

        new_groups = penalty.groups[self.inactive]
        new_weights = dict([(g,penalty.weights[g]) for g in penalty.weights.keys() if g in np.unique(new_groups)])

        self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, lagrange=1.)
示例#2
0
    def solve(self, scaling=1, solve_args={'min_its': 20, 'tol': 1.e-10}):

        self.randomize()

        (loss, randomized_loss, epsilon, penalty, randomization,
         solve_args) = (self.loss, self.randomized_loss, self.epsilon,
                        self.penalty, self.randomization, self.solve_args)

        # initial solution

        problem = rr.simple_problem(randomized_loss, penalty)
        self.initial_soln = problem.solve(**solve_args)

        # find the active groups and their direction vectors
        # as well as unpenalized groups

        groups = np.unique(penalty.groups)
        active_groups = np.zeros(len(groups), np.bool)
        unpenalized_groups = np.zeros(len(groups), np.bool)

        active_directions = []
        active = np.zeros(loss.shape, np.bool)
        unpenalized = np.zeros(loss.shape, np.bool)

        initial_scalings = []

        for i, g in enumerate(groups):
            group = penalty.groups == g
            active_groups[i] = (np.linalg.norm(self.initial_soln[group]) >
                                1.e-6 * penalty.weights[g]) and (
                                    penalty.weights[g] > 0)
            unpenalized_groups[i] = (penalty.weights[g] == 0)
            if active_groups[i]:
                active[group] = True
                z = np.zeros(active.shape, np.float)
                z[group] = self.initial_soln[group] / np.linalg.norm(
                    self.initial_soln[group])
                active_directions.append(z)
                initial_scalings.append(
                    np.linalg.norm(self.initial_soln[group]))
            if unpenalized_groups[i]:
                unpenalized[group] = True

        # solve the restricted problem

        self._overall = active + unpenalized
        self._inactive = ~self._overall
        self._unpenalized = unpenalized
        self._active_directions = np.array(active_directions).T
        self._active_groups = np.array(active_groups, np.bool)
        self._unpenalized_groups = np.array(unpenalized_groups, np.bool)

        self.selection_variable = {
            'groups': self._active_groups,
            'variables': self._overall,
            'directions': self._active_directions
        }

        # initial state for opt variables

        initial_subgrad = -(
            self.randomized_loss.smooth_objective(self.initial_soln, 'grad') +
            self.randomized_loss.quadratic.objective(self.initial_soln,
                                                     'grad'))
        # the quadratic of a smooth_atom is not included in computing the smooth_objective

        initial_subgrad = initial_subgrad[self._inactive]
        initial_unpenalized = self.initial_soln[self._unpenalized]
        self.observed_opt_state = np.concatenate(
            [initial_scalings, initial_unpenalized, initial_subgrad], axis=0)

        # set the _solved bit

        self._solved = True

        # Now setup the pieces for linear decomposition

        (loss, epsilon, penalty, initial_soln, overall, inactive, unpenalized,
         active_groups,
         active_directions) = (self.loss, self.epsilon, self.penalty,
                               self.initial_soln, self._overall,
                               self._inactive, self._unpenalized,
                               self._active_groups, self._active_directions)

        # scaling should be chosen to be Lipschitz constant for gradient of Gaussian part

        # we are implicitly assuming that
        # loss is a pairs model

        _sqrt_scaling = np.sqrt(scaling)

        _beta_unpenalized = restricted_Mest(loss,
                                            overall,
                                            solve_args=solve_args)

        beta_full = np.zeros(overall.shape)
        beta_full[overall] = _beta_unpenalized
        _hessian = loss.hessian(beta_full)
        self._beta_full = beta_full

        # observed state for score

        self.observed_score_state = np.hstack([
            _beta_unpenalized * _sqrt_scaling,
            -loss.smooth_objective(beta_full, 'grad')[inactive] / _sqrt_scaling
        ])

        # form linear part

        self.num_opt_var = p = loss.shape[0]  # shorthand for p

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros(
            (p,
             self._active_groups.sum() + unpenalized.sum() + inactive.sum()))
        _score_linear_term = np.zeros((p, p))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        Mest_slice = slice(0, overall.sum())
        _Mest_hessian = _hessian[:, overall]
        _score_linear_term[:, Mest_slice] = -_Mest_hessian / _sqrt_scaling

        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution

        null_idx = range(overall.sum(), p)
        inactive_idx = np.nonzero(inactive)[0]
        for _i, _n in zip(inactive_idx, null_idx):
            _score_linear_term[_i, _n] = -_sqrt_scaling

        # c_E piece

        scaling_slice = slice(0, active_groups.sum())
        if len(active_directions) == 0:
            _opt_hessian = 0
        else:
            _opt_hessian = (_hessian +
                            epsilon * np.identity(p)).dot(active_directions)
        _opt_linear_term[:, scaling_slice] = _opt_hessian / _sqrt_scaling

        self.observed_opt_state[scaling_slice] *= _sqrt_scaling

        # beta_U piece

        unpenalized_slice = slice(active_groups.sum(),
                                  active_groups.sum() + unpenalized.sum())
        unpenalized_directions = np.identity(p)[:, unpenalized]
        if unpenalized.sum():
            _opt_linear_term[:, unpenalized_slice] = (
                _hessian + epsilon *
                np.identity(p)).dot(unpenalized_directions) / _sqrt_scaling

        self.observed_opt_state[unpenalized_slice] *= _sqrt_scaling

        # subgrad piece

        subgrad_idx = range(
            active_groups.sum() + unpenalized.sum(),
            active_groups.sum() + inactive.sum() + unpenalized.sum())
        subgrad_slice = slice(
            active_groups.sum() + unpenalized.sum(),
            active_groups.sum() + inactive.sum() + unpenalized.sum())
        for _i, _s in zip(inactive_idx, subgrad_idx):
            _opt_linear_term[_i, _s] = _sqrt_scaling

        self.observed_opt_state[subgrad_slice] /= _sqrt_scaling

        # form affine part

        _opt_affine_term = np.zeros(p)
        idx = 0
        groups = np.unique(penalty.groups)
        for i, g in enumerate(groups):
            if active_groups[i]:
                group = penalty.groups == g
                _opt_affine_term[group] = active_directions[:, idx][
                    group] * penalty.weights[g]
                idx += 1

        # two transforms that encode score and optimization
        # variable roles

        # later, we will modify `score_transform`
        # in `linear_decomposition`

        self.opt_transform = (_opt_linear_term, _opt_affine_term)
        self.score_transform = (_score_linear_term,
                                np.zeros(_score_linear_term.shape[0]))

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        self.scaling_slice = scaling_slice

        # weights are scaled here because the linear terms scales them by scaling

        new_groups = penalty.groups[inactive]
        new_weights = dict([(g, penalty.weights[g] / _sqrt_scaling)
                            for g in penalty.weights.keys()
                            if g in np.unique(new_groups)])

        # we form a dual group lasso object
        # to do the projection

        self.group_lasso_dual = rr.group_lasso_dual(new_groups,
                                                    weights=new_weights,
                                                    bound=1.)
        self.subgrad_slice = subgrad_slice

        self._setup = True
    def decompose_subgradient(self,
                              conditioning_groups=None,
                              marginalizing_groups=None):
        """
        ADD DOCSTRING

        conditioning_groups and marginalizing_groups should be disjoint
        """

        groups = np.unique(self.penalty.groups)
        condition_inactive_groups = np.zeros_like(groups, dtype=bool)

        if conditioning_groups is None:
            conditioning_groups = np.zeros_like(groups, dtype=np.bool)

        if marginalizing_groups is None:
            marginalizing_groups = np.zeros_like(groups, dtype=np.bool)

        if np.any(conditioning_groups * marginalizing_groups):
            raise ValueError(
                "cannot simultaneously condition and marginalize over a group's subgradient"
            )

        if not self._setup:
            raise ValueError(
                'setup_sampler should be called before using this function')

        condition_inactive_variables = np.zeros_like(self._inactive,
                                                     dtype=bool)
        moving_inactive_groups = np.zeros_like(groups, dtype=bool)
        moving_inactive_variables = np.zeros_like(self._inactive, dtype=bool)
        _inactive_groups = ~(self._active_groups + self._unpenalized)

        inactive_marginal_groups = np.zeros_like(self._inactive, dtype=bool)
        limits_marginal_groups = np.zeros_like(self._inactive, np.float)

        for i, g in enumerate(groups):
            if (_inactive_groups[i]) and conditioning_groups[i]:
                group = self.penalty.groups == g
                condition_inactive_groups[i] = True
                condition_inactive_variables[group] = True
            elif (_inactive_groups[i]) and (~conditioning_groups[i]) and (
                    ~marginalizing_groups[i]):
                group = self.penalty.groups == g
                moving_inactive_groups[i] = True
                moving_inactive_variables[group] = True
            if (_inactive_groups[i]) and marginalizing_groups[i]:
                group = self.penalty.groups == g
                inactive_marginal_groups[i] = True
                limits_marginal_groups[i] = self.penalty.weights[g]

        opt_linear, opt_offset = self.opt_transform

        new_linear = np.zeros(
            (opt_linear.shape[0],
             (self._active_groups.sum() + self._unpenalized_groups.sum() +
              moving_inactive_variables.sum())))
        new_linear[:, self.scaling_slice] = opt_linear[:, self.scaling_slice]
        new_linear[:,
                   self.unpenalized_slice] = opt_linear[:,
                                                        self.unpenalized_slice]

        inactive_moving_idx = np.nonzero(moving_inactive_variables)[0]
        subgrad_idx = range(
            self._active_groups.sum() + self._unpenalized.sum(),
            self._active_groups.sum() + self._unpenalized.sum() +
            moving_inactive_variables.sum())
        subgrad_slice = subgrad_idx
        for _i, _s in zip(inactive_moving_idx, subgrad_idx):
            new_linear[_i, _s] = 1.

        observed_opt_state = self.observed_opt_state[:(
            self._active_groups.sum() + self._unpenalized_groups.sum() +
            moving_inactive_variables.sum())]
        observed_opt_state[subgrad_idx] = self.initial_subgrad[
            moving_inactive_variables]

        condition_linear = np.zeros(
            (opt_linear.shape[0],
             (self._active_groups.sum() + self._unpenalized_groups.sum() +
              condition_inactive_variables.sum())))
        inactive_condition_idx = np.nonzero(condition_inactive_variables)[0]
        subgrad_condition_idx = range(
            self._active_groups.sum() + self._unpenalized.sum(),
            self._active_groups.sum() + self._unpenalized.sum() +
            condition_inactive_variables.sum())

        for _i, _s in zip(inactive_condition_idx, subgrad_condition_idx):
            condition_linear[_i, _s] = 1.

        new_offset = condition_linear[:, subgrad_condition_idx].dot(
            self.initial_subgrad[condition_inactive_variables]) + opt_offset

        new_opt_transform = (new_linear, new_offset)

        print("limits marginal groups", limits_marginal_groups)
        print("inactive marginal groups", inactive_marginal_groups)

        def _fraction(_cdf, _pdf, full_state_plus, full_state_minus,
                      inactive_marginal_groups):
            return (np.divide(
                _pdf(full_state_plus) - _pdf(full_state_minus),
                _cdf(full_state_plus) -
                _cdf(full_state_minus)))[inactive_marginal_groups]

        def new_grad_log_density(query, limits_marginal_groups,
                                 inactive_marginal_groups, _cdf, _pdf,
                                 opt_linear, deriv_log_dens, internal_state,
                                 opt_state):

            full_state = reconstruct_full_from_internal(
                new_opt_transform, query.score_transform, internal_state,
                opt_state)

            p = query.penalty.shape[0]
            weights = np.zeros(p)

            if inactive_marginal_groups.sum() > 0:
                full_state_plus = full_state + np.multiply(
                    limits_marginal_groups,
                    np.array(inactive_marginal_groups, np.float))
                full_state_minus = full_state - np.multiply(
                    limits_marginal_groups,
                    np.array(inactive_marginal_groups, np.float))
                weights[inactive_marginal_groups] = _fraction(
                    _cdf, _pdf, full_state_plus, full_state_minus,
                    inactive_marginal_groups)
            weights[~inactive_marginal_groups] = deriv_log_dens(
                full_state)[~inactive_marginal_groups]
            return -opt_linear.T.dot(weights)

        new_grad_log_density = functools.partial(
            new_grad_log_density, self, limits_marginal_groups,
            inactive_marginal_groups, self.randomization._cdf,
            self.randomization._pdf, new_opt_transform[0],
            self.randomization._derivative_log_density)

        def new_log_density(query, limits_marginal_groups,
                            inactive_marginal_groups, _cdf, _pdf, opt_linear,
                            log_dens, internal_state, opt_state):

            full_state = reconstruct_full_from_internal(
                new_opt_transform, query.score_transform, internal_state,
                opt_state)
            full_state = np.atleast_2d(full_state)
            p = query.penalty.shape[0]
            logdens = np.zeros(full_state.shape[0])

            if inactive_marginal_groups.sum() > 0:
                full_state_plus = full_state + np.multiply(
                    limits_marginal_groups,
                    np.array(inactive_marginal_groups, np.float))
                full_state_minus = full_state - np.multiply(
                    limits_marginal_groups,
                    np.array(inactive_marginal_groups, np.float))
                logdens += np.sum(
                    np.log(_cdf(full_state_plus) -
                           _cdf(full_state_minus))[:,
                                                   inactive_marginal_groups],
                    axis=1)

            logdens += log_dens(full_state[:, ~inactive_marginal_groups])

            return np.squeeze(
                logdens
            )  # should this be negative to match the gradient log density?

        new_log_density = functools.partial(
            new_log_density, self, limits_marginal_groups,
            inactive_marginal_groups, self.randomization._cdf,
            self.randomization._pdf, self.opt_transform[0],
            self.randomization._log_density)

        new_groups = self.penalty.groups[moving_inactive_groups]
        _sqrt_scaling = np.sqrt(self.scaling)
        new_weights = dict([(g, self.penalty.weights[g] / _sqrt_scaling)
                            for g in self.penalty.weights.keys()
                            if g in np.unique(new_groups)])
        new_group_lasso_dual = rr.group_lasso_dual(new_groups,
                                                   weights=new_weights,
                                                   bound=1.)

        def new_projection(group_lasso_dual, noverall, opt_state):
            new_state = opt_state.copy()
            new_state[self.scaling_slice] = np.maximum(
                opt_state[self.scaling_slice], 0)
            new_state[noverall:] = group_lasso_dual.bound_prox(
                opt_state[noverall:])
            return new_state

        new_projection = functools.partial(new_projection,
                                           new_group_lasso_dual,
                                           self._overall.sum())

        new_selection_variable = copy(self.selection_variable)
        new_selection_variable['subgradient'] = self.observed_opt_state[
            self.subgrad_slice]

        self.sampler = optimization_sampler(
            observed_opt_state,
            self.observed_internal_state.copy(),
            self.score_transform,
            new_opt_transform,
            new_projection,
            new_grad_log_density,
            new_log_density,
            selection_info=(self, new_selection_variable))
示例#4
0
    def setup_sampler(self, solve_args={'min_its':50, 'tol':1.e-10}):

        """
        Should return a bootstrap_score
        """

        (loss,
         epsilon,
         penalty,
         randomization,
         initial_soln,
         overall,
         inactive,
         unpenalized,
         active_groups,
         active_directions) = (self.loss,
                               self.epsilon,
                               self.penalty,
                               self.randomization,
                               self.initial_soln,
                               self.overall,
                               self.inactive,
                               self.unpenalized,
                               self.active_groups,
                               self.active_directions)

        # we are implicitly assuming that
        # loss is a pairs model

        _beta_unpenalized = restricted_Mest(loss, overall, solve_args=solve_args)

        beta_full = np.zeros(overall.shape)
        beta_full[overall] = _beta_unpenalized
        _hessian = loss.hessian(beta_full)
        self._beta_full = beta_full

        # observed state for score

        self.observed_score_state = np.hstack([_beta_unpenalized,
                                               -loss.smooth_objective(beta_full, 'grad')[inactive]])

        # form linear part

        self.num_opt_var = p = loss.shape[0] # shorthand for p

        # (\bar{\beta}_{E \cup U}, N_{-E}, c_E, \beta_U, z_{-E})
        # E for active
        # U for unpenalized
        # -E for inactive

        _opt_linear_term = np.zeros((p, self.active_groups.sum() + unpenalized.sum() + inactive.sum()))
        _score_linear_term = np.zeros((p, p))

        # \bar{\beta}_{E \cup U} piece -- the unpenalized M estimator

        Mest_slice = slice(0, overall.sum())
        _Mest_hessian = _hessian[:,overall]
        _score_linear_term[:,Mest_slice] = -_Mest_hessian

        # N_{-(E \cup U)} piece -- inactive coordinates of score of M estimator at unpenalized solution

        null_idx = range(overall.sum(), p)
        inactive_idx = np.nonzero(inactive)[0]
        for _i, _n in zip(inactive_idx, null_idx):
            _score_linear_term[_i,_n] = -1.

        # c_E piece 

        scaling_slice = slice(0, active_groups.sum())
        _opt_hessian = (_hessian + epsilon * np.identity(p)).dot(active_directions)
        _opt_linear_term[:,scaling_slice] = _opt_hessian

        # beta_U piece

        unpenalized_slice = slice(active_groups.sum(), active_groups.sum() + unpenalized.sum())
        unpenalized_directions = np.identity(p)[:,unpenalized]
        if unpenalized.sum():
            _opt_linear_term[:,unpenalized_slice] = (_hessian + epsilon * np.identity(p)).dot(unpenalized_directions)

        # subgrad piece
        subgrad_idx = range(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
        subgrad_slice = slice(active_groups.sum() + unpenalized.sum(), active_groups.sum() + inactive.sum() + unpenalized.sum())
        for _i, _s in zip(inactive_idx, subgrad_idx):
            _opt_linear_term[_i,_s] = 1.

        # form affine part

        _opt_affine_term = np.zeros(p)
        idx = 0
        groups = np.unique(penalty.groups) 
        for i, g in enumerate(groups):
            if active_groups[i]:
                group = penalty.groups == g
                _opt_affine_term[group] = active_directions[:,idx][group] * penalty.weights[g]
                idx += 1

        # two transforms that encode score and optimization
        # variable roles 

        # later, conditioning will modify `score_transform`

        self.opt_transform = (_opt_linear_term, _opt_affine_term)
        self.score_transform = (_score_linear_term, np.zeros(_score_linear_term.shape[0]))

        # now store everything needed for the projections
        # the projection acts only on the optimization
        # variables

        self.scaling_slice = scaling_slice

        new_groups = penalty.groups[inactive]
        new_weights = dict([(g,penalty.weights[g]) for g in penalty.weights.keys() if g in np.unique(new_groups)])

        # we form a dual group lasso object
        # to do the projection

        self.group_lasso_dual = rr.group_lasso_dual(new_groups, weights=new_weights, bound=1.)
        self.subgrad_slice = subgrad_slice