def __result_parser(self, result): '''parse result input example: [{'num_variants_CFisher': 3, 'pvalue_CFisher': 0.06156133406564546, 'total_mac_CFisher': 4, 'sample_size_CFisher': 400, 'statistic_CFisher': 1.7976931348623157e+308}, {'statistic_WSSRankTest': 1784.0, 'total_mac_WSSRankTest': 4, 'pvalue_WSSRankTest': 0.027761334758504563, 'num_variants_WSSRankTest': 3, 'sample_size_WSSRankTest': 400}] output format: a dictionary of properly organized power and data statistic. ''' if len(result) == 0: raise NullResultException out = {} names = [] for item in result: for k in item: # record statistics for entry in ['num_variants', 'total_mac', 'sample_size']: if k.startswith(entry) and '{}_analyzed'.format(entry) not in out: out['{}_analyzed'.format(entry)] = item[k] # record p-values if k.startswith('pvalue_'): name = k[7:] if name in names: # resolve name conflict i = 0 while name + ('_{}'.format(i) if i else '') in names: i += 1 name += '_{}'.format(i) names.append(name) out['power_{}'.format(name)] = item[k] if [is_null(out[k]) for k in out].count(True) == len(out): raise NullResultException return out
def __result_parser(self, result): '''parse result input example: [{'num_variants_CFisher': 3, 'pvalue_CFisher': 0.06156133406564546, 'total_mac_CFisher': 4, 'sample_size_CFisher': 400, 'statistic_CFisher': 1.7976931348623157e+308}, {'statistic_WSSRankTest': 1784.0, 'total_mac_WSSRankTest': 4, 'pvalue_WSSRankTest': 0.027761334758504563, 'num_variants_WSSRankTest': 3, 'sample_size_WSSRankTest': 400}] output format: a dictionary of properly organized power and data statistic. ''' if len(result) == 0: raise NullResultException out = {} names = [] for item in result: for k in item: # record statistics for entry in ['num_variants', 'total_mac', 'sample_size']: if k.startswith(entry) and '{}_analyzed'.format( entry) not in out: out['{}_analyzed'.format(entry)] = item[k] # record p-values if k.startswith('pvalue_'): name = k[7:] if name in names: # resolve name conflict i = 0 while name + ('_{}'.format(i) if i else '') in names: i += 1 name += '_{}'.format(i) names.append(name) out['power_{}'.format(name)] = item[k] if [is_null(out[k]) for k in out].count(True) == len(out): raise NullResultException return out
def preprocess(self, data, exclude=[], hide=[]): """format result to output, messy messy""" multicols = [] for key in list(data.keys()): if key in ["pool"]: continue # collect results from RunningStat if isinstance(data[key], L.RunningStat): data[key], data[key + "_median"], data[key + "_std"] = ( data[key].mean(), data[key].left(), data[key].sd(), ) continue # delete trivial data if is_null(data[key]): del data[key] continue # adjust key names if key in hide: data["_" + key] = data[key] del data[key] continue # multi column information if type(data[key]) is list: multicols.append(key) if len([x for x in data.keys() if x.startswith("power")]): # manually combine multiple power analysis methods into single column for key in ["power", "method"]: data[key] = [] for key in list(data.keys()): if key.startswith("power") and key not in ["power", "power_std", "power_median"]: if key.endswith("_std"): if "power_std" not in data: data["power_std"] = [] if "default" in key: data["power_std"].append(data[key]) else: # adjust standard error data["power_std"].append(data[key] / np.sqrt(data["_replicates"])) elif key.endswith("_median"): if "power_median" not in data: data["power_median"] = [] data["power_median"].append(data[key]) else: data["power"].append(data[key]) data["method"].append(re.sub("power_", "", key)) del data[key] # expand table for key in list(data.keys()): if key not in ["power", "power_std", "power_median", "method", "model"] + multicols: data[key] = [data[key]] * (max(len(data["power"]), 1) if "power" in data else 1) # manually create ordered column names colnames = ["title", "name"] + sorted( [x for x in list(data.keys()) if x not in exclude], key=lambda x: x.replace("_", "|").replace("method", "AAA").replace("power", "AAB"), ) # return: data, single row colnames, multi row colnames return data, [x for x in colnames if x not in multicols], [x for x in colnames if x in multicols]
def preprocess(self, data, exclude = [], hide = []): '''format result to output, messy messy''' multicols = [] for key in list(data.keys()): if key in ['pool']: continue # collect results from RunningStat if isinstance(data[key], L.RunningStat): data[key], data[key + '_median'], data[key + '_std'] = \ data[key].mean(), data[key].left(), data[key].sd() continue # delete trivial data if is_null(data[key]): del data[key] continue # adjust key names if key in hide: data["_" + key] = data[key] del data[key] continue # multi column information if type(data[key]) is list: multicols.append(key) if len([x for x in data.keys() if x.startswith('power')]): # manually combine multiple power analysis methods into single column for key in ['power', 'method']: data[key] = [] for key in list(data.keys()): if key.startswith('power') and key not in ['power', 'power_std', 'power_median']: if key.endswith('_std'): if 'power_std' not in data: data['power_std'] = [] if 'default' in key: data['power_std'].append(data[key]) else: # adjust standard error data['power_std'].append(data[key]/np.sqrt(data['_replicates'])) elif key.endswith('_median'): if 'power_median' not in data: data['power_median'] = [] data['power_median'].append(data[key]) else: data['power'].append(data[key]) data['method'].append(re.sub('power_', '', key)) del data[key] # expand table for key in list(data.keys()): if key not in ['power', 'power_std', 'power_median', 'method', 'model'] + multicols: data[key] = [data[key]] * (max(len(data['power']), 1) if 'power' in data else 1) # manually create ordered column names colnames = ['title', 'name'] + sorted([x for x in list(data.keys()) if x not in exclude], key = lambda x: x.replace("_", "|").replace('method', 'AAA').replace('power', 'AAB')) # return: data, single row colnames, multi row colnames return data, [x for x in colnames if x not in multicols], [x for x in colnames if x in multicols]
def __get_type(self, values): types = [] for item in values: # Here assume the input list has the same type on each element # Which is true in simulation data if isinstance(item, list): for i in item: if not is_null(i): item = i break try: item = float(item) types.append('number') except: types.append('string') return types
def preprocess(self, data, exclude=[], hide=[]): '''format result to output, messy messy''' multicols = [] for key in list(data.keys()): if key in ['pool']: continue # collect results from RunningStat if isinstance(data[key], L.RunningStat): data[key], data[key + '_median'], data[key + '_std'] = \ data[key].mean(), data[key].left(), data[key].sd() continue # delete trivial data if is_null(data[key]): del data[key] continue # adjust key names if key in hide: data["_" + key] = data[key] del data[key] continue # multi column information if type(data[key]) is list: multicols.append(key) if len([x for x in data.keys() if x.startswith('power')]): # manually combine multiple power analysis methods into single column for key in ['power', 'method']: data[key] = [] for key in list(data.keys()): if key.startswith('power') and key not in [ 'power', 'power_std', 'power_median' ]: if key.endswith('_std'): if 'power_std' not in data: data['power_std'] = [] if 'default' in key: data['power_std'].append(data[key]) else: # adjust standard error data['power_std'].append( data[key] / np.sqrt(data['_replicates'])) elif key.endswith('_median'): if 'power_median' not in data: data['power_median'] = [] data['power_median'].append(data[key]) else: data['power'].append(data[key]) data['method'].append(re.sub('power_', '', key)) del data[key] # expand table for key in list(data.keys()): if key not in [ 'power', 'power_std', 'power_median', 'method', 'model' ] + multicols: data[key] = [data[key]] * (max(len(data['power']), 1) if 'power' in data else 1) # manually create ordered column names colnames = ['title', 'name'] + sorted( [x for x in list(data.keys()) if x not in exclude], key=lambda x: x.replace("_", "|").replace('method', 'AAA').replace( 'power', 'AAB')) # return: data, single row colnames, multi row colnames return data, [x for x in colnames if x not in multicols ], [x for x in colnames if x in multicols]