Пример #1
0
def percent_build(er: np.ndarray,
                  percent: float,
                  groups: np.ndarray = None,
                  masks: np.ndarray = None) -> np.ndarray:

    er = er.copy()

    if masks is not None:
        er[~masks] = -np.inf

    if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
        # fast path methods for single column er
        neg_er = -er.flatten()
        length = len(neg_er)
        weights = zeros((length, 1))
        if groups is not None:
            index_diff, order = groupby(groups)
            start = 0
            for diff_loc in index_diff:
                current_index = order[start:diff_loc + 1]
                current_ordering = neg_er[current_index].argsort()
                current_ordering.shape = -1, 1
                use_rank = int(percent * len(current_index))
                set_value(weights, current_index[current_ordering[:use_rank]],
                          1.)
                start = diff_loc + 1
        else:
            ordering = neg_er.argsort()
            use_rank = int(percent * len(neg_er))
            weights[ordering[:use_rank]] = 1.
        return weights.reshape(er.shape)
    else:
        neg_er = -er
        weights = zeros_like(er)

        if groups is not None:
            index_diff, order = groupby(groups)
            start = 0
            for diff_loc in index_diff:
                current_index = order[start:diff_loc + 1]
                current_ordering = neg_er[current_index].argsort(axis=0)
                use_rank = int(percent * len(current_index))
                set_value(weights, current_index[current_ordering[:use_rank]],
                          1)
                start = diff_loc + 1
        else:
            ordering = neg_er.argsort(axis=0)
            use_rank = int(percent * len(neg_er))
            set_value(weights, ordering[:use_rank], 1.)
        return weights
Пример #2
0
def _train(
    x: np.ndarray,
    y: np.ndarray,
    groups: np.ndarray = None
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    if groups is None:
        return ls_fit(x, y)
    else:
        index_diff, order = groupby(groups)
        res_beta = _train_loop(index_diff, order, x, y)
        return np.unique(groups), res_beta
Пример #3
0
def neutralize(x: np.ndarray,
               y: np.ndarray,
               groups: np.ndarray=None,
               detail: bool=False,
               weights: np.ndarray = None) \
        -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:

    if y.ndim == 1:
        y = y.reshape((-1, 1))

    if weights is None:
        weights = np.ones(len(y), dtype=float)

    output_dict = {}

    if detail:
        exposure = np.zeros(x.shape + (y.shape[1], ))
        explained = np.zeros(x.shape + (y.shape[1], ))
        output_dict['exposure'] = exposure
        output_dict['explained'] = explained

    if groups is not None:
        res = np.zeros(y.shape)
        index_diff, order = utils.groupby(groups)
        start = 0
        if detail:
            for diff_loc in index_diff:
                curr_idx = order[start:diff_loc + 1]
                curr_x, b = _sub_step(x, y, weights, curr_idx, res)
                exposure[curr_idx, :, :] = b
                explained[curr_idx] = ls_explain(curr_x, b)
                start = diff_loc + 1
        else:
            for diff_loc in index_diff:
                curr_idx = order[start:diff_loc + 1]
                _sub_step(x, y, weights, curr_idx, res)
                start = diff_loc + 1
    else:
        try:
            b = ls_fit(x, y, weights)
        except np.linalg.linalg.LinAlgError:
            b = ls_fit_pinv(x, y, weights)

        res = ls_res(x, y, b)

        if detail:
            explained[:, :, :] = ls_explain(x, b)
            exposure[:] = b

    if output_dict:
        return res, output_dict
    else:
        return res
Пример #4
0
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
        -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:

    if y.ndim == 1:
        y = y.reshape((-1, 1))

    if groups is not None:
        res = np.zeros(y.shape)

        if y.ndim == 2:
            if output_explained:
                explained = np.zeros(x.shape + (y.shape[1], ))
            if output_exposure:
                exposure = np.zeros(x.shape + (y.shape[1], ))
        else:
            if output_explained:
                explained = np.zeros(x.shape + (1, ))
            if output_exposure:
                exposure = np.zeros(x.shape + (1, ))

        index_diff, order = utils.groupby(groups)

        start = 0
        for diff_loc in index_diff:
            curr_idx = order[start:diff_loc + 1]
            curr_x, b = _sub_step(x, y, curr_idx, res)
            if output_exposure:
                for i in range(exposure.shape[2]):
                    exposure[curr_idx, :, i] = b[:, i]
            if output_explained:
                for i in range(explained.shape[2]):
                    explained[curr_idx] = ls_explain(curr_x, b)
            start = diff_loc + 1
    else:
        b = ls_fit(x, y)
        res = ls_res(x, y, b)

        if output_explained:
            explained = ls_explain(x, b)
        if output_exposure:
            exposure = b

    output_dict = {}
    if output_explained:
        output_dict['explained'] = explained
    if output_exposure:
        output_dict['exposure'] = exposure

    if output_dict:
        return res, output_dict
    else:
        return res
Пример #5
0
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:

    if x.ndim == 1:
        x = x.reshape((-1, 1))

    if groups is not None:
        res = np.zeros(x.shape, dtype=int)
        index_diff, order = utils.groupby(groups)

        start = 0
        for diff_loc in index_diff:
            curr_idx = order[start:diff_loc + 1]
            res[curr_idx] = x[curr_idx].argsort(axis=0)
            start = diff_loc + 1

    else:
        return x.argsort(axis=0)
Пример #6
0
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:

    if x.ndim == 1:
        x = x.reshape((-1, 1))

    if groups is not None:
        res = np.zeros(x.shape, dtype=int)
        index_diff, order = utils.groupby(groups)

        start = 0
        for diff_loc in index_diff:
            curr_idx = order[start:diff_loc + 1]
            res[curr_idx] = rankdata(x[curr_idx]).astype(float) - 1.
            start = diff_loc + 1
        return res
    else:
        return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
Пример #7
0
def percentile(x: np.ndarray,
               groups: Optional[np.ndarray] = None) -> np.ndarray:
    if x.ndim == 1:
        x = x.reshape((-1, 1))

    if groups is not None:
        res = np.zeros(x.shape, dtype=int)
        index_diff, order = utils.groupby(groups)

        start = 0
        for diff_loc in index_diff:
            curr_idx = order[start:diff_loc + 1]
            curr_values = x[curr_idx]
            length = len(curr_values) - 1. if len(curr_values) > 1 else 1.
            res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length
            start = diff_loc + 1
        return res
    else:
        length = len(x) - 1. if len(x) > 1 else 1.
        return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1))