def days_to_first_event(df, groupby, time_col): """ Calculate days to the first date for each group, in a Time series """ dates = df[time_col].astype("datetime64[ns]").values ids = df[groupby].values result = wnp.group_apply(dates, ids, _time_to_min_date) result = _convert_ns_to_days(result) return result
def test_vector_group_apply_works_with_2dims(): values = np.array([1, 1, 1, 2, 2, 2]) apply_func = np.sum ids = np.array([[1, 0], [1, 0], [1, 0], [1, 1], [2, 2], [2, 2]]) expected = np.array([3, 3, 3, 2, 4, 4]) output = utils_np.group_apply(values, ids, apply_func) assert np.all(output == expected)
def grouped_lagged_decay(df, groupby, col, fillna=0, decay=1): """ Grouped lagged decay """ values = wnp.fillna(df[col].values, 0) f = partial(lagged_decay, decay=decay) result = wnp.group_apply(values, df[groupby].values, f) result = wnp.fillna(result, fillna) return result
def grouped_days_since_result( df, groupby, col="win_flag", value=1, fillna=-1, coldate="scheduled_time" ): func = partial(days_since_result, value=1) result = wnp.group_apply( df[[col, coldate]].values, df[groupby].values, func, multiarg=True ) result = wnp.fillna(result, fillna) return result
def test_vector_group_apply(): test_data = [ # values, ids, expected ( np.array([1, 0, 0, 1, 1, 0]), np.array([1, 2, 1, 2, 1, 2]), np.array([1, 0, 1, 1, 2, 1]), ), ( np.array([1, 0, 0, 1, 1, 0]), np.array(["A", "B", "A", "B", "A", "B"]), np.array([1, 0, 1, 1, 2, 1]), ), ] for values, ids, expected in test_data: output = utils_np.group_apply(values, ids, np.cumsum) assert np.all(output == expected)
def test_vector_group_apply_works_with_tuple_ids(): values = np.array([1, 1, 1, 2, 2, 2]) all_ids = [ np.array( list( zip(np.array([1, 1, 1, 2, 2, 2]), np.array([0, 0, 0, 1, 2, 2])))), np.array( list( zip( np.array([1, 1, 1, 2, 2, 2]), np.array(["A", "A", "A", "B", "C", "C"]), ))), np.array( list( zip( np.array(["a", "a", "a", "b", "b", "b"]), np.array(["A", "A", "A", "B", "C", "C"]), ))), ] expected = np.array([3, 3, 3, 2, 4, 4]) for ids in all_ids: output = utils_np.group_apply(values, ids, np.sum) assert np.all(output == expected)
def grouped_ema(df, col, alpha, groupby): v = df[col].values func = partial(ema, alpha=alpha) result = wnp.group_apply(v, df[groupby].values, func) return result