示例#1
0
 def seq95d(a):
     a = a.values
     result = st.t.interval(0.95,
                            len(a) - 1,
                            loc=np.mean(a),
                            scale=st.sem(a))[0]
     if np.isnan(result):
         ColorPrint.print_red(f'CI failed on array {a}')
         return a[0]
     return result
示例#2
0
 def abs95u(a):
     a = a.values
     result = st.t.interval(0.95,
                            len(a) - 1,
                            loc=np.mean(a),
                            scale=st.sem(a))[1]
     if np.isnan(result):
         ColorPrint.print_red(
             f'CI failed on array {a} with type {type(a)}')
         return a[0]
     return result
示例#3
0
def main():
    df_path = './dataframes/'
    for subdir, dirs, files in os.walk(df_path):
        for filename in files:
            if filename.split('.')[-1] == 'csv':
                path = os.path.join(df_path, filename)
                print(filename)
                latex_printer(path)
            else:
                ColorPrint.print_red(f'CAUTION: Skipped {filename}')
    return
示例#4
0
    def pgd_graphlet_counts(self, n_threads=4) -> Dict:
        """
        Return the dictionary of graphlets and their counts - based on Neville's PGD
        :return:
        """
        pgd_path = Path(get_imt_input_directory()).parent / 'src' / 'PGD'
        graphlet_counts = {}

        if 'Linux' in platform.platform() and (pgd_path / 'pgd_0').exists():
            edgelist = '\n'.join(nx.generate_edgelist(self.graph, data=False))
            edgelist += '\nX'  # add the X
            dummy_path = f'{pgd_path}/dummy.txt'

            try:
                bash_script = f'{pgd_path}/pgd_0 -w {n_threads} -f {dummy_path} -c {dummy_path}'

                #pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True, timeout=30000)
                pipe = sub.run(bash_script,
                               shell=True,
                               capture_output=True,
                               input=edgelist.encode(),
                               check=True)

                output_data = pipe.stdout.decode()

            except sub.TimeoutExpired as e:
                CP.print_blue(f'PGD timeout!{e.stderr}')
                graphlet_counts = {}

            except sub.CalledProcessError as e:
                CP.print_blue(f'PGD error {e.stderr}')
                graphlet_counts = {}
            except Exception as e:
                CP.print_blue(str(e))
                graphlet_counts = {}
            else:  # pgd is successfully run
                for line in output_data.split('\n')[:-1]:  # last line blank
                    graphlet_name, count = map(lambda st: st.strip(),
                                               line.split('='))
                    graphlet_counts[graphlet_name] = int(count)
        else:
            CP.print_red(f'PGD executable not found at {pgd_path}/pgd')
            graphlet_counts = {}
        self.stats['pgd_graphlet_counts'] = graphlet_counts

        return graphlet_counts
示例#5
0
    def write_stats_jsons(self,
                          stats: Union[str, list],
                          overwrite: bool = False) -> None:
        """
        write the stats dictionary as a compressed json
        :return:
        """
        # standardize incoming type
        if isinstance(stats, str):
            stats = [stats]

        for statistic in stats:
            assert statistic in [
                method_name for method_name in dir(self)
                if callable(getattr(self, method_name))
                and not method_name.startswith('_')
            ]
            output_directory = get_imt_output_directory()

            file_output_directory = os.path.join(output_directory,
                                                 'graph_stats', self.dataset,
                                                 self.model, statistic)
            ensure_dir(file_output_directory, recursive=True)

            filename = os.path.join(
                output_directory, 'graph_stats', self.dataset, self.model,
                statistic, f'gs_{self.trial}_{self.iteration}.json.gz')

            # if the file already exists and overwrite flag is not set, then don't rework.
            if not overwrite and verify_file(filename):
                CP.print_green(
                    f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.'
                )
                return

            try:
                data = self[statistic]  # todo : maybe there's a better way?!
                save_zipped_json(data, filename)
                CP.print_blue(f'Stats json stored at {filename}')
            except Exception as e:
                CP.print_red(f'Exception occurred on {filename}!')
                CP.print_red(str(e))
                if statistic == 'netlsd':
                    save_zipped_json(data, filename + '.failed')
        return
示例#6
0
    for dataset in datasets:
        for model in models:
            for stat in stats:
                ColorPrint.print_green(
                    f'computing {stat} distances for {dataset} {model}')
                trials = walker_texas_ranger(dataset,
                                             model,
                                             stat=implemented_metrics[stat],
                                             unique=True)
                args = [[dataset, model, trial, stat] for trial in trials]
                print(args[:5])
                # exit(-1)
                try:
                    results = parallel_async(distance_computation,
                                             args,
                                             num_workers=10)
                    df = pd.concat(results)
                except Exception as e:
                    ColorPrint.print_red(
                        f'Error, for {dataset!r} {model!r} {stat!r}')
                    continue

                # output_dir = f'/data/infinity-mirror/output/distances/{dataset}/{model}/{stat}/'
                output_dir = Path(
                    get_imt_output_directory()) / 'distances' / dataset
                ensure_dir(output_dir, recursive=True)
                df.to_csv(output_dir / f'{dataset}_{model}_{stat}.csv')
                # for arg in args:
                #     distance_computation(*arg)