def prep_environment(self) -> None: """ Prepare the Python environment :return: """ if check_file_exists('./envs/hrg'): return CP.print_blue('Making virtual environment for HRG') sub.run( 'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;', shell=True, stdout=sub.DEVNULL) # create and activate environment if 'Linux' not in platform.platform(): completed_process = sub.run( 'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg else: completed_process = sub.run( '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg assert completed_process.returncode == 0, 'Error while creating environment for HRG' return
def parallel_computation(input_path, dataset, model): path = os.path.join(input_path, dataset, model) input_filenames = [f for f in listdir(path) if isfile(join(path, f))] number_of_files = len(input_filenames) n_threads = 2 pbar_inner = tqdm(number_of_files) def pbar_update(result): pbar_inner.update() pbar_inner.set_postfix_str(result) # for idx in range(number_of_files): # sublevel_parallel_computation(p_arg[0],p_arg[1],p_arg[2], idx) asyncResults = [] with mp.Pool(n_threads) as innerPool: ColorPrint.print_green( f"Starting Pool with {n_threads} threads with {len(parallel_args)} tasks." ) for idx in range(number_of_files): r = innerPool.apply_async(sublevel_parallel_computation, [input_path, dataset, model, idx], callback=pbar_update) asyncResults.append(r) for r in asyncResults: try: r.wait() except: continue return model, dataset
def diameter(self) -> float: CP.print_none('Calculating Diameter') diam = nx.diameter(self.graph) self.stats['diameter'] = diam return diam
def k_hop_reach(self) -> np.array: """ Returns the average number of nodes reachable from any node in k-hops Two levels of aggregation: 1. _k_hop_reachability gives the absolute count of nodes reachable within a k-hops from a node 2. overall_k_hop_dict aggregates the sum of all absolute counts for all nodes Normalizing factor: n ** 2 (once for each step) Then convert to a cumulative distribution :return: """ CP.print_none('Calculating hop-plot') overall_k_hop_dict = Counter() for node in self.graph.nodes(): k_hop_dict = self._k_hop_reachability_counter(node) overall_k_hop_dict += Counter(k_hop_dict) k_hop_vec = np.array([ v for k, v in sorted(overall_k_hop_dict.items(), key=lambda x: x[0]) ]) k_hop_vec = k_hop_vec / (self.graph.order()**2) self.stats['k_hop_reach'] = np.cumsum(k_hop_vec) return self.stats['k_hop_reach']
def clustering_coefficients_by_degree(self) -> Dict[int, float]: """ Returns the average clustering coefficient by degree :return: """ CP.print_none('Calculating Clustering Coefficients and CC by degree') clustering_coeffs = nx.clustering(self.graph) self.stats['clustering_coeffs'] = clustering_coeffs clustering_by_degree = {} # clustering per degree # get the sums for node, cc in clustering_coeffs.items(): deg = self.graph.degree[node] if deg not in clustering_by_degree: clustering_by_degree[deg] = [] clustering_by_degree[deg].append(cc) avg_clustering_by_degree = { deg: np.mean(ccs) for deg, ccs in clustering_by_degree.items() } self.stats[ 'clustering_coefficients_by_degree'] = avg_clustering_by_degree return avg_clustering_by_degree
def __init__(self, input_graph: nx.Graph, trial: int, **kwargs) -> None: super().__init__(model_name='BUGGE', input_graph=input_graph, trial=trial) self.rule_min = 2 self.rule_max = 5 CP.print_blue( f'Rule sizes: min: {self.rule_min}, max: {self.rule_max}') return
def degree_centrality(self) -> Dict[int, float]: """ Degree centrality """ CP.print_none('Calculating Degree Centrality') degree_centrality = nx.degree_centrality(self.graph) self.stats['degree_centrality'] = degree_centrality return degree_centrality
def seq95d(a): a = a.values result = st.t.interval(0.95, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[0] if np.isnan(result): ColorPrint.print_red(f'CI failed on array {a}') return a[0] return result
def closeness_centrality(self) -> Dict[int, float]: """ Closeness centrality """ CP.print_none('Calculating Closeness Centrality') closeness = nx.closeness_centrality(self.graph) self.stats['closeness_centrality'] = closeness return closeness
def adj_eigenvalues(self): """ Returns the eigenvalues of the Adjacency matrix :return: """ CP.print_none('Calculating eigenvalues of Adjacency Matrix') adj_eigenvalues = nx.adjacency_spectrum(self.graph) self.stats['adj_eigenvalues'] = adj_eigenvalues return adj_eigenvalues
def pagerank(self) -> Dict[int, float]: """ PageRank centrality """ CP.print_none('Calculating PageRank') pagerank = nx.pagerank_scipy(self.graph) pagerank = {int(k): v for k, v in pagerank.items()} self.stats['pagerank'] = pagerank return pagerank
def update(self, new_input_graph: nx.Graph) -> None: """ Update the model to (a) update the input graph, (b) fit the parameters :return: """ CP.print_none('Updating graph') self.input_graph = new_input_graph self._fit() # re-fit the parameters return
def abs95u(a): a = a.values result = st.t.interval(0.95, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[1] if np.isnan(result): ColorPrint.print_red( f'CI failed on array {a} with type {type(a)}') return a[0] return result
def assortativity(self) -> float: """ Returns the assortativity of the network :return: """ CP.print_none('Calculating Degree Assortativity') assortativity = nx.degree_assortativity_coefficient(self.graph) self.stats['assortativity'] = assortativity return assortativity
def main(): df_path = './dataframes/' for subdir, dirs, files in os.walk(df_path): for filename in files: if filename.split('.')[-1] == 'csv': path = os.path.join(df_path, filename) print(filename) latex_printer(path) else: ColorPrint.print_red(f'CAUTION: Skipped {filename}') return
def write_stats_pickle(self, base_path: Union[str, Path]): """ write the stats dictionary as a pickle :return: """ filename = os.path.join(base_path, 'graph_stats', self.dataset, self.model, f'gs_{self.trial}_{self.iteration}.pkl.gz') CP.print_blue(f'Stats pickle stored at {filename}') save_pickle(self.stats, filename) return
def get_filenames(base_path, dataset, models): filenames = [] for model in models: path = os.path.join(base_path, dataset, model) for subdir, dirs, files in os.walk(path): for filename in files: if 'seq' not in filename and 'rob' not in filename: # print(f'loading {filename}') filenames.append(os.path.join(subdir, filename)) # yield load_pickle(os.path.join(subdir, filename)) ColorPrint.print_bold(f"Found {len(filenames)} graph files to be loaded.") return filenames
def _fit(self) -> None: from src.netgan.fit import fit sparse_adj = nx.to_scipy_sparse_matrix(self.input_graph) try: scores, tg_sum = fit(sparse_adj) except Exception as e: CP.print_orange(f'NetGAN fit failed\n{e}') scores, tg_sum = None, None self.params['scores'] = scores self.params['tg_sum'] = tg_sum return
def main() -> None: args = parse_args() num_jobs, num_trials = int(args.cores[0]), int(args.trials[0]) CP.print_green( f'Running infinity mirror on {num_jobs} cores for {num_trials} trials') # print(args) # exit(1) Parallel(n_jobs=num_jobs, backend='multiprocessing')( delayed(run_infinity_mirror)(trial=i + 1, args=args) for i in range(num_trials)) return
def make_dirs(output_dir: str, gname: str, model: str) -> None: """ Makes input and output directories if they do not exist already :return: """ output_dir = Path(output_dir) for dirname in ('pickles', f'pickles/{gname}', f'pickles/{gname}/{model}', 'features', f'features/{gname}', f'features/{gname}/{model}'): dir_ = output_dir / dirname if not dir_.exists(): CP.print_blue(f'Making dir {dir_!r}') os.makedirs(dir_, exist_ok=True) return
def component_size_distribution(self) -> List[Tuple[int, float]]: """ Returns the distribution of component sizes and fraction of nodes in each component, largest first :return: """ CP.print_none('Calculating Component Size Distribution') component_size_ratio_list = [ (len(c), len(c) / self.graph.order()) for c in sorted( nx.connected_components(self.graph), key=len, reverse=True) ] self.stats['component_size_distribution'] = component_size_ratio_list return component_size_ratio_list
def _gen(self, gname: str, gen_id: int) -> nx.Graph: from src.netgan.netgan.utils import graph_from_scores assert 'scores' in self.params assert 'tg_sum' in self.params if self.params['scores'] is None or self.params['tg_sum'] is None: CP.print_orange('NetGAN gen failed') raise Exception('Generation failed!') else: gen_mat = graph_from_scores(self.params['scores'], self.params['tg_sum']) g = nx.from_numpy_array(gen_mat, create_using=nx.Graph()) g.name = gname g.gen_id = gen_id return g
def prep_environment(self) -> None: proc = sub.run('conda init bash; . ~/.bashrc; conda activate netgan', shell=True, stdout=sub.DEVNULL) os.makedirs('./src/netgan/dumps', exist_ok=True) # make the directory to store the dumps if proc.returncode == 0: # conda environment exists return CP.print_blue('Making conda environment for NetGAN') proc = sub.run('conda env create -f ./envs/netgan.yml', shell=True, stdout=sub.DEVNULL) # create and activate environment assert proc.returncode == 0, 'Error while creating env for NetGAN' return
def laplacian_eigenvalues(self) -> np.array: """ Returns eigenvalues of the Laplacian :return: """ CP.print_none('Calculating Laplacian Eigenvalues') if self.graph.order() == 0 or self.graph.size() == 0: CP.print_orange( f'Graph has {self.graph.order()} nodes and {self.graph.size()} edges!' ) laplacian_eigs = [] else: laplacian_eigs = nx.laplacian_spectrum(self.graph) self.stats['laplacian_eigenvalues'] = laplacian_eigs return laplacian_eigs
def _calculate_all_stats(self): """ Calculate all stats """ CP.print_orange('Calculating all stats') object_methods = [ method_name for method_name in dir(self) if callable(getattr(self, method_name)) and not method_name.startswith('_') ] for method in object_methods: method = getattr(self, method) try: method() except NotImplementedError as e: pass
def degree_dist(self, normalized=True) -> Dict[int, float]: """ Returns the degrees counter - keys: degrees, values: #nodes with that degree :return: """ CP.print_none('Calculating Degree Distribution') degree_seq = sorted(deg for _, deg in self.graph.degree()) self.stats['degree_seq'] = degree_seq degree_counts = Counter(degree_seq) if normalized: for deg, count in degree_counts.items(): degree_counts[deg] /= self.graph.order() self.stats['degree_dist'] = dict(degree_counts) return dict(degree_counts)
def __getitem__(self, item): """ Allows square bracket indexing for stats - allow for some fuzzy matching """ if item in self.stats: # the stat has already been calculated return self.stats[item] # try some fuzzy matching to figure out the function to call based on the item object_methods = [ method_name for method_name in dir(self) if callable(getattr(self, method_name)) and not method_name.startswith('_') ] best_match_func = '' best_match_score = float('inf') for method in object_methods: dist = ed.eval(method, item) if dist == 0: best_match_score = dist best_match_func = method break if dist < best_match_score: best_match_score = dist best_match_func = method assert best_match_func != '', 'edit distance did not work' item = best_match_func if best_match_score != 0: CP.print_orange( f'Best matching function found for "{item}": "{best_match_func}()", edit distance: {best_match_score}' ) if best_match_func not in self.stats: best_match_func = getattr( self, best_match_func ) # translates best_match_fun from string to a function object best_match_func() # call the best match function assert item in self.stats, f'stat: {item} is not updated after function call' return self.stats[item]
def stats_computation(dataset, model, trial, filename, stats): path = Path( get_imt_output_directory()) / 'pickles' / dataset / model / filename graph_list = load_pickle(path) assert isinstance( graph_list, list), f'Expected type "list" and got type {type(graph_list)}.' assert all(isinstance(g, nx.Graph) for g in graph_list ), f'Expected a list of nx.Graph and got disappointed instead.' ColorPrint.print_orange(f'{filename} has length {len(graph_list)}') for idx, G in enumerate(graph_list): gs_obj = GraphStats(graph=G, dataset=dataset, model=model, trial=trial, iteration=idx) gs_obj.write_stats_jsons(stats=stats) return None
def _gen(self, gname: str, gen_id: int) -> nx.Graph: """ call KronGen """ orig_n = self.input_graph.order() kron_iters = int( math.log2(orig_n) ) # floor of log2 gives a bound on kronecker iteration count if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) - orig_n): kron_iters += 1 assert 'initiator_matrix' in self.params, 'Initiator matrix not found' matrix = self.params['initiator_matrix'] output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt' if len(matrix) == 0: # KronFit failed CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}' completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE) if completed_process.returncode != 0 or not check_file_exists( output_file): CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: graph = nx.read_edgelist(output_file, nodetype=int, create_using=nx.Graph()) graph.name = gname delete_files(output_file) graph.gen_id = gen_id return graph
def generate(self, num_graphs: int, gen_id: int) -> Union[List[nx.Graph], None]: edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g' nx.write_edgelist(self.input_graph, edgelist_path, data=False) output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle' completed_process = sub.run( f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;', shell=True, stdout=sub.DEVNULL) if completed_process.returncode != 0 or not check_file_exists( output_pickle_path): CP.print_blue(f'Error in HRG: "{self.input_graph.name}"') raise Exception('Generation failed!') else: generated_graphs = [] gen_graphs = load_pickle(output_pickle_path) if not isinstance(gen_graphs, list) or len(gen_graphs) != num_graphs: raise Exception('Generation failed!') for i, gen_graph in enumerate(gen_graphs): gen_graph = self._make_graph(gen_graph) gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}' # adding the number of graph gen_graph.gen_id = gen_id generated_graphs.append(gen_graph) if not isinstance(generated_graphs, list) or len(generated_graphs) != num_graphs: print('HRG failed') raise Exception('Generation failed!') # delete_files(edgelist_path, output_pickle_path) return generated_graphs