def work(table, keys): df = data_utils.load_table(table, usecols=keys, dtype={k: pd.Int64Dtype() for k in keys}) groupby_ss = df.groupby(keys).size() bct = groupby_ss.to_frame(name="weight").reset_index() for key in keys: kct = df.groupby(key).size().rename(f"{key}.cnt") bct = bct.merge(kct, how="left", left_on=key, right_index=True) return bct.astype(np.int64, copy=False)
def load_data_table(table, join_keys): return data_utils.load_table(table, dtype={k: pd.Int64Dtype() for k in join_keys})
def load_data_table(table, join_keys, usecols): return data_utils.load_table(table, usecols=usecols, dtype={k: np.int64 for k in join_keys})