def buc(df, pre, df2): if df.shape[0] == 1: binLst = [ bin(i)[3:] for i in range(2**df.shape[1], 2**(df.shape[1] + 1), 2) ] for s in binLst: tmpLst = [ 'ALL' if s[i] == '1' else list(df.iloc[0])[i] for i in range(len(s)) ] tmpLst = pre + tmpLst df2.loc[len(df2)] = tmpLst elif df.shape[1] == 1: _pre = pre.copy() _pre.append(sum(helper.project_data(df, 0))) df2.loc[len(df2)] = _pre else: valLst = sorted(list(set(helper.project_data(df, 0).values))) _pre = pre.copy() for val in valLst: subdf = helper.slice_data_dim0(df, val) pre_ = _pre.copy() pre_.append(val) buc(subdf, pre_, df2) subdf = helper.remove_first_dim(df) pre_ = _pre.copy() pre_.append('ALL') buc(subdf, pre_, df2)
def buc_rec(input, l=[]): dims = input.shape[1] # Note that input is a DataFrame if input.shape[0] == 1 and dims == 3: a = input.iloc[0, 0] b = input.iloc[0, 1] s = input.iloc[0, 2] ll.extend([ l + [a, b, s], l + [a, "ALL", s], l + ["ALL", b, s], l + ["ALL", "ALL", s] ]) return if dims == 1: # only the measure dim input_sum = sum(helper.project_data(input, 0)) input_sum = "%.1f" % input_sum ll.append(l + [input_sum]) else: # the general case dim0_vals = set(helper.project_data(input, 0).values) for dim0_v in dim0_vals: sub_data = helper.slice_data_dim0(input, dim0_v) buc_rec(sub_data, l + [dim0_v]) ## for R_{ALL} sub_data = helper.remove_first_dim(input) buc_rec(sub_data, l + ['ALL'])
def _buc_rec_optimized(df, pre_num, df_out): # help function # Note that input is a DataFrame dims = df.shape[1] if dims == 1: # only the measure dim input_sum = sum(helper.project_data(df, 0)) pre_num.append(input_sum) df_out.loc[len(df_out)] = pre_num else: # the general case dim0_vals = set(helper.project_data(df, 0).values) temp_pre_num = deepcopy(pre_num) for dim0_v in dim0_vals: pre_num = deepcopy(temp_pre_num) sub_data = helper.slice_data_dim0(df, dim0_v) pre_num.append(dim0_v) _buc_rec_optimized(sub_data, pre_num, df_out) ## for R_{ALL} sub_data = helper.remove_first_dim(df) pre_num = deepcopy(temp_pre_num) pre_num.append("ALL") _buc_rec_optimized(sub_data, pre_num, df_out)
def buc_rec_3_params(input, result, pre=[]): rows = input.shape[0] dims = input.shape[1] new_pre = pre.copy() if dims >= 2 and rows == 1: new_pre += input.iloc[0, ].tolist() new_rows = single_line_opt(tuple(new_pre), dims) result += new_rows return result elif dims == 1: # only the measure dim input_sum = sum(helper.project_data(input, 0)) new_pre.append(input_sum) result.append(new_pre) return result else: # the general case dim0_vals = set(helper.project_data(input, 0).values) for dim0_v in dim0_vals: new_pre.append(dim0_v) sub_data = helper.slice_data_dim0(input, dim0_v) result = buc_rec_3_params(sub_data, result, new_pre) new_pre = pre.copy() ## for R_{ALL} sub_data = helper.remove_first_dim(input) new_pre.append('ALL') result = buc_rec_3_params(sub_data, result, new_pre) return result
def buc(df, result, prefix=[]): dims = df.shape[1] pre_cp = [i for i in prefix] if dims == 1: pre_cp.append(sum(helper.project_data(df, 0))) result.append(pre_cp) else: dim0_vals = set(helper.project_data(df, 0).values) for i in dim0_vals: pre_cp.append(i) rest_data = helper.slice_data_dim0(df, i) buc(rest_data, result, pre_cp) pre_cp = [i for i in prefix] pre_cp.append('ALL') rest_data = helper.remove_first_dim(df) buc(rest_data, result, pre_cp) return result
def my_buc_rec_optimized(df, pre, res): # help recursive function dims = df.shape[1] if df.shape[0] == 1: single_tuple(df, pre, res) elif dims == 1: pre.append(sum(helper.project_data(df, 0))) res.loc[len(res)] = pre else: vals = set(helper.project_data(df, 0).values) pre_copy = pre.copy() for val in vals: pre = pre_copy.copy() sub_data = helper.slice_data_dim0(df, val) pre.append(val) my_buc_rec_optimized(sub_data, pre, res) sub_data = helper.remove_first_dim(df) pre = pre_copy.copy() pre.append("ALL") my_buc_rec_optimized(sub_data, pre, res)
def cal_mul_tuple(df, result, check): dims = df.shape[1] check_trace = copy_list(check) if dims == 1: # only the measure dim input_sum = sum(helper.project_data(df, 0)) check_trace.append(input_sum) result.append(check_trace) else: # the general case dim0_vals = set(helper.project_data(df, 0).values) for dim0_v in dim0_vals: check_trace.append(dim0_v) sub_data = helper.slice_data_dim0(df, dim0_v) cal_mul_tuple(sub_data, result, check_trace) check_trace = copy_list(check) ## for R_{ALL} sub_data = helper.remove_first_dim(df) check_trace.append('ALL') cal_mul_tuple(sub_data, result, check_trace) return result
def buc_rec_general(df, result, prefix=[]): dims = df.shape[1] rows = df.shape[0] pre_cp = [i for i in prefix] if dims == 1: input_sum = sum(helper.project_data(df, 0)) pre_cp.append(input_sum) result.append(pre_cp) elif rows == 1: s_tuple = list(df.iloc[0]) tuples = buc_single(pre_cp, s_tuple, dims) # tuples 是[[],[]....]形式 result += tuples else: dim0_vals = set(helper.project_data(df, 0).values) for dim0_v in dim0_vals: pre_cp.append(dim0_v) sub_data = helper.slice_data_dim0(df, dim0_v) buc_rec_general(sub_data, result, pre_cp) pre_cp = [i for i in prefix] pre_cp.append('ALL') sub_data = helper.remove_first_dim(df) buc_rec_general(sub_data, result, pre_cp) return result
def buc(df, df_out, pre=[]): dims = df.shape[1] if dims == 1: # one column left all = [] # sum result on column for x in helper.project_data(df, 0): all.append(int(x)) pre.append(sum(all)) df_out.loc[len(df_out)] = pre pre.pop() return 0 elif df.shape[0] == 1: st_opt(df, df_out, pre) else: dim0_vals = set(helper.project_data(df, 0).values) for dim0_v in sorted(dim0_vals): sub_data = helper.slice_data_dim0(df, dim0_v) pre.append(dim0_v) buc(sub_data, df_out, pre) pre.pop() sub_data = helper.remove_first_dim(df) pre.append("ALL") buc(sub_data, df_out, pre) pre.pop() return 0
import pandas as pd