def seq95d(a): a = a.values result = st.t.interval(0.95, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[0] if np.isnan(result): ColorPrint.print_red(f'CI failed on array {a}') return a[0] return result
def abs95u(a): a = a.values result = st.t.interval(0.95, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[1] if np.isnan(result): ColorPrint.print_red( f'CI failed on array {a} with type {type(a)}') return a[0] return result
def main(): df_path = './dataframes/' for subdir, dirs, files in os.walk(df_path): for filename in files: if filename.split('.')[-1] == 'csv': path = os.path.join(df_path, filename) print(filename) latex_printer(path) else: ColorPrint.print_red(f'CAUTION: Skipped {filename}') return
def pgd_graphlet_counts(self, n_threads=4) -> Dict: """ Return the dictionary of graphlets and their counts - based on Neville's PGD :return: """ pgd_path = Path(get_imt_input_directory()).parent / 'src' / 'PGD' graphlet_counts = {} if 'Linux' in platform.platform() and (pgd_path / 'pgd_0').exists(): edgelist = '\n'.join(nx.generate_edgelist(self.graph, data=False)) edgelist += '\nX' # add the X dummy_path = f'{pgd_path}/dummy.txt' try: bash_script = f'{pgd_path}/pgd_0 -w {n_threads} -f {dummy_path} -c {dummy_path}' #pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True, timeout=30000) pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True) output_data = pipe.stdout.decode() except sub.TimeoutExpired as e: CP.print_blue(f'PGD timeout!{e.stderr}') graphlet_counts = {} except sub.CalledProcessError as e: CP.print_blue(f'PGD error {e.stderr}') graphlet_counts = {} except Exception as e: CP.print_blue(str(e)) graphlet_counts = {} else: # pgd is successfully run for line in output_data.split('\n')[:-1]: # last line blank graphlet_name, count = map(lambda st: st.strip(), line.split('=')) graphlet_counts[graphlet_name] = int(count) else: CP.print_red(f'PGD executable not found at {pgd_path}/pgd') graphlet_counts = {} self.stats['pgd_graphlet_counts'] = graphlet_counts return graphlet_counts
def write_stats_jsons(self, stats: Union[str, list], overwrite: bool = False) -> None: """ write the stats dictionary as a compressed json :return: """ # standardize incoming type if isinstance(stats, str): stats = [stats] for statistic in stats: assert statistic in [ method_name for method_name in dir(self) if callable(getattr(self, method_name)) and not method_name.startswith('_') ] output_directory = get_imt_output_directory() file_output_directory = os.path.join(output_directory, 'graph_stats', self.dataset, self.model, statistic) ensure_dir(file_output_directory, recursive=True) filename = os.path.join( output_directory, 'graph_stats', self.dataset, self.model, statistic, f'gs_{self.trial}_{self.iteration}.json.gz') # if the file already exists and overwrite flag is not set, then don't rework. if not overwrite and verify_file(filename): CP.print_green( f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.' ) return try: data = self[statistic] # todo : maybe there's a better way?! save_zipped_json(data, filename) CP.print_blue(f'Stats json stored at {filename}') except Exception as e: CP.print_red(f'Exception occurred on {filename}!') CP.print_red(str(e)) if statistic == 'netlsd': save_zipped_json(data, filename + '.failed') return
for dataset in datasets: for model in models: for stat in stats: ColorPrint.print_green( f'computing {stat} distances for {dataset} {model}') trials = walker_texas_ranger(dataset, model, stat=implemented_metrics[stat], unique=True) args = [[dataset, model, trial, stat] for trial in trials] print(args[:5]) # exit(-1) try: results = parallel_async(distance_computation, args, num_workers=10) df = pd.concat(results) except Exception as e: ColorPrint.print_red( f'Error, for {dataset!r} {model!r} {stat!r}') continue # output_dir = f'/data/infinity-mirror/output/distances/{dataset}/{model}/{stat}/' output_dir = Path( get_imt_output_directory()) / 'distances' / dataset ensure_dir(output_dir, recursive=True) df.to_csv(output_dir / f'{dataset}_{model}_{stat}.csv') # for arg in args: # distance_computation(*arg)