def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None, masks: np.ndarray = None) -> np.ndarray: er = er.copy() if masks is not None: er[~masks] = -np.inf if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1): # fast path methods for single column er neg_er = -er.flatten() length = len(neg_er) weights = zeros((length, 1)) if groups is not None: index_diff, order = groupby(groups) start = 0 for diff_loc in index_diff: current_index = order[start:diff_loc + 1] current_ordering = neg_er[current_index].argsort() current_ordering.shape = -1, 1 use_rank = int(percent * len(current_index)) set_value(weights, current_index[current_ordering[:use_rank]], 1.) start = diff_loc + 1 else: ordering = neg_er.argsort() use_rank = int(percent * len(neg_er)) weights[ordering[:use_rank]] = 1. return weights.reshape(er.shape) else: neg_er = -er weights = zeros_like(er) if groups is not None: index_diff, order = groupby(groups) start = 0 for diff_loc in index_diff: current_index = order[start:diff_loc + 1] current_ordering = neg_er[current_index].argsort(axis=0) use_rank = int(percent * len(current_index)) set_value(weights, current_index[current_ordering[:use_rank]], 1) start = diff_loc + 1 else: ordering = neg_er.argsort(axis=0) use_rank = int(percent * len(neg_er)) set_value(weights, ordering[:use_rank], 1.) return weights
def _train( x: np.ndarray, y: np.ndarray, groups: np.ndarray = None ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: if groups is None: return ls_fit(x, y) else: index_diff, order = groupby(groups) res_beta = _train_loop(index_diff, order, x, y) return np.unique(groups), res_beta
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, detail: bool=False, weights: np.ndarray = None) \ -> Union[np.ndarray, Tuple[np.ndarray, Dict]]: if y.ndim == 1: y = y.reshape((-1, 1)) if weights is None: weights = np.ones(len(y), dtype=float) output_dict = {} if detail: exposure = np.zeros(x.shape + (y.shape[1], )) explained = np.zeros(x.shape + (y.shape[1], )) output_dict['exposure'] = exposure output_dict['explained'] = explained if groups is not None: res = np.zeros(y.shape) index_diff, order = utils.groupby(groups) start = 0 if detail: for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] curr_x, b = _sub_step(x, y, weights, curr_idx, res) exposure[curr_idx, :, :] = b explained[curr_idx] = ls_explain(curr_x, b) start = diff_loc + 1 else: for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] _sub_step(x, y, weights, curr_idx, res) start = diff_loc + 1 else: try: b = ls_fit(x, y, weights) except np.linalg.linalg.LinAlgError: b = ls_fit_pinv(x, y, weights) res = ls_res(x, y, b) if detail: explained[:, :, :] = ls_explain(x, b) exposure[:] = b if output_dict: return res, output_dict else: return res
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \ -> Union[np.ndarray, Tuple[np.ndarray, Dict]]: if y.ndim == 1: y = y.reshape((-1, 1)) if groups is not None: res = np.zeros(y.shape) if y.ndim == 2: if output_explained: explained = np.zeros(x.shape + (y.shape[1], )) if output_exposure: exposure = np.zeros(x.shape + (y.shape[1], )) else: if output_explained: explained = np.zeros(x.shape + (1, )) if output_exposure: exposure = np.zeros(x.shape + (1, )) index_diff, order = utils.groupby(groups) start = 0 for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] curr_x, b = _sub_step(x, y, curr_idx, res) if output_exposure: for i in range(exposure.shape[2]): exposure[curr_idx, :, i] = b[:, i] if output_explained: for i in range(explained.shape[2]): explained[curr_idx] = ls_explain(curr_x, b) start = diff_loc + 1 else: b = ls_fit(x, y) res = ls_res(x, y, b) if output_explained: explained = ls_explain(x, b) if output_exposure: exposure = b output_dict = {} if output_explained: output_dict['explained'] = explained if output_exposure: output_dict['exposure'] = exposure if output_dict: return res, output_dict else: return res
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray: if x.ndim == 1: x = x.reshape((-1, 1)) if groups is not None: res = np.zeros(x.shape, dtype=int) index_diff, order = utils.groupby(groups) start = 0 for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] res[curr_idx] = x[curr_idx].argsort(axis=0) start = diff_loc + 1 else: return x.argsort(axis=0)
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray: if x.ndim == 1: x = x.reshape((-1, 1)) if groups is not None: res = np.zeros(x.shape, dtype=int) index_diff, order = utils.groupby(groups) start = 0 for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] res[curr_idx] = rankdata(x[curr_idx]).astype(float) - 1. start = diff_loc + 1 return res else: return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray: if x.ndim == 1: x = x.reshape((-1, 1)) if groups is not None: res = np.zeros(x.shape, dtype=int) index_diff, order = utils.groupby(groups) start = 0 for diff_loc in index_diff: curr_idx = order[start:diff_loc + 1] curr_values = x[curr_idx] length = len(curr_values) - 1. if len(curr_values) > 1 else 1. res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length start = diff_loc + 1 return res else: length = len(x) - 1. if len(x) > 1 else 1. return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1))