def prep_environment(self) -> None: """ Prepare the Python environment :return: """ if check_file_exists('./envs/hrg'): return CP.print_blue('Making virtual environment for HRG') sub.run( 'python2 -m pip install --user virtualenv; python2 -m virtualenv -p python2 ./envs/hrg;. ./envs/hrg/bin/activate; which python2;', shell=True, stdout=sub.DEVNULL) # create and activate environment if 'Linux' not in platform.platform(): completed_process = sub.run( 'export CC=gcc-9; export CXX=g++-9;. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg else: completed_process = sub.run( '. ./envs/hrg/bin/activate; python2 -m pip install -r ./envs/requirements_hrg.txt', shell=True, stdout=sub.DEVNULL) # install requirements for cnrg assert completed_process.returncode == 0, 'Error while creating environment for HRG' return
def __init__(self, input_graph: nx.Graph, trial: int, **kwargs) -> None: super().__init__(model_name='BUGGE', input_graph=input_graph, trial=trial) self.rule_min = 2 self.rule_max = 5 CP.print_blue( f'Rule sizes: min: {self.rule_min}, max: {self.rule_max}') return
def write_stats_pickle(self, base_path: Union[str, Path]): """ write the stats dictionary as a pickle :return: """ filename = os.path.join(base_path, 'graph_stats', self.dataset, self.model, f'gs_{self.trial}_{self.iteration}.pkl.gz') CP.print_blue(f'Stats pickle stored at {filename}') save_pickle(self.stats, filename) return
def make_dirs(output_dir: str, gname: str, model: str) -> None: """ Makes input and output directories if they do not exist already :return: """ output_dir = Path(output_dir) for dirname in ('pickles', f'pickles/{gname}', f'pickles/{gname}/{model}', 'features', f'features/{gname}', f'features/{gname}/{model}'): dir_ = output_dir / dirname if not dir_.exists(): CP.print_blue(f'Making dir {dir_!r}') os.makedirs(dir_, exist_ok=True) return
def prep_environment(self) -> None: proc = sub.run('conda init bash; . ~/.bashrc; conda activate netgan', shell=True, stdout=sub.DEVNULL) os.makedirs('./src/netgan/dumps', exist_ok=True) # make the directory to store the dumps if proc.returncode == 0: # conda environment exists return CP.print_blue('Making conda environment for NetGAN') proc = sub.run('conda env create -f ./envs/netgan.yml', shell=True, stdout=sub.DEVNULL) # create and activate environment assert proc.returncode == 0, 'Error while creating env for NetGAN' return
def write_stats_jsons(self, stats: Union[str, list], overwrite: bool = False) -> None: """ write the stats dictionary as a compressed json :return: """ # standardize incoming type if isinstance(stats, str): stats = [stats] for statistic in stats: assert statistic in [ method_name for method_name in dir(self) if callable(getattr(self, method_name)) and not method_name.startswith('_') ] output_directory = get_imt_output_directory() file_output_directory = os.path.join(output_directory, 'graph_stats', self.dataset, self.model, statistic) ensure_dir(file_output_directory, recursive=True) filename = os.path.join( output_directory, 'graph_stats', self.dataset, self.model, statistic, f'gs_{self.trial}_{self.iteration}.json.gz') # if the file already exists and overwrite flag is not set, then don't rework. if not overwrite and verify_file(filename): CP.print_green( f'Statistic: {statistic} output file for {self.model}-{self.dataset}-{self.trial} already exists. Skipping.' ) return try: data = self[statistic] # todo : maybe there's a better way?! save_zipped_json(data, filename) CP.print_blue(f'Stats json stored at {filename}') except Exception as e: CP.print_red(f'Exception occurred on {filename}!') CP.print_red(str(e)) if statistic == 'netlsd': save_zipped_json(data, filename + '.failed') return
def _gen(self, gname: str, gen_id: int) -> nx.Graph: """ call KronGen """ orig_n = self.input_graph.order() kron_iters = int( math.log2(orig_n) ) # floor of log2 gives a bound on kronecker iteration count if math.fabs(2**kron_iters - orig_n) > math.fabs(2**(kron_iters + 1) - orig_n): kron_iters += 1 assert 'initiator_matrix' in self.params, 'Initiator matrix not found' matrix = self.params['initiator_matrix'] output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}_kron.txt' if len(matrix) == 0: # KronFit failed CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: bash_code = f'cd src/kronecker; ./{self.krongen_exec} -o:{self.initial_gname}_{self.trial}_kron.txt -m:"{matrix}" -i:{kron_iters}' completed_process = sub.run(bash_code, shell=True, stdout=sub.PIPE) if completed_process.returncode != 0 or not check_file_exists( output_file): CP.print_blue(f'Error in KronGen: "{self.input_graph.name}"') raise Exception('Generation failed!') else: graph = nx.read_edgelist(output_file, nodetype=int, create_using=nx.Graph()) graph.name = gname delete_files(output_file) graph.gen_id = gen_id return graph
def generate(self, num_graphs: int, gen_id: int) -> Union[List[nx.Graph], None]: edgelist_path = f'./src/hrg/{self.initial_gname}_{self.trial}.g' nx.write_edgelist(self.input_graph, edgelist_path, data=False) output_pickle_path = f'./src/hrg/Results/{self.initial_gname}_{self.trial}_hstars.pickle' completed_process = sub.run( f'. ./envs/hrg/bin/activate; cd src/hrg; python2 exact_phrg.py --orig {self.initial_gname}_{self.trial}.g --trials {num_graphs}; deactivate;', shell=True, stdout=sub.DEVNULL) if completed_process.returncode != 0 or not check_file_exists( output_pickle_path): CP.print_blue(f'Error in HRG: "{self.input_graph.name}"') raise Exception('Generation failed!') else: generated_graphs = [] gen_graphs = load_pickle(output_pickle_path) if not isinstance(gen_graphs, list) or len(gen_graphs) != num_graphs: raise Exception('Generation failed!') for i, gen_graph in enumerate(gen_graphs): gen_graph = self._make_graph(gen_graph) gen_graph.name = f'{self.input_graph.name}_{self.trial}_{i + 1}' # adding the number of graph gen_graph.gen_id = gen_id generated_graphs.append(gen_graph) if not isinstance(generated_graphs, list) or len(generated_graphs) != num_graphs: print('HRG failed') raise Exception('Generation failed!') # delete_files(edgelist_path, output_pickle_path) return generated_graphs
def _fit(self) -> None: """ call KronFit """ output_file = f'./src/kronecker/{self.initial_gname}_{self.trial}-fit' # write edgelist to the path, but graph needs to start from 1 g = nx.convert_node_labels_to_integers(self.input_graph, first_label=1, label_attribute='old_label') directed_g = g.to_directed() # kronecker expects a directed graph edgelist_path = f'./src/kronecker/{self.initial_gname}_{self.trial}.txt' nx.write_edgelist(directed_g, edgelist_path, data=False) bash_code = f'cd src/kronecker; {self.kronfit_exec} -i:{self.initial_gname}_{self.trial}.txt -o:{self.initial_gname}_{self.trial}-fit -s:50000' completed_process = sub.run(bash_code, shell=True) # , stdout=sub.PIPE) if completed_process.returncode != 0: CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"') raise Exception('Generation failed!') elif not check_file_exists(output_file): CP.print_blue(f'Error in KronFit: "{self.input_graph.name}"') raise Exception('Generation failed!') else: with open(output_file) as f: last_line = f.readlines()[-1] last_line = last_line.replace(']', '') matrix = last_line[last_line.find('[') + 1:] # CP.print_blue('Initiator matrix:', matrix) self.params['initiator_matrix'] = matrix return
def pgd_graphlet_counts(self, n_threads=4) -> Dict: """ Return the dictionary of graphlets and their counts - based on Neville's PGD :return: """ pgd_path = Path(get_imt_input_directory()).parent / 'src' / 'PGD' graphlet_counts = {} if 'Linux' in platform.platform() and (pgd_path / 'pgd_0').exists(): edgelist = '\n'.join(nx.generate_edgelist(self.graph, data=False)) edgelist += '\nX' # add the X dummy_path = f'{pgd_path}/dummy.txt' try: bash_script = f'{pgd_path}/pgd_0 -w {n_threads} -f {dummy_path} -c {dummy_path}' #pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True, timeout=30000) pipe = sub.run(bash_script, shell=True, capture_output=True, input=edgelist.encode(), check=True) output_data = pipe.stdout.decode() except sub.TimeoutExpired as e: CP.print_blue(f'PGD timeout!{e.stderr}') graphlet_counts = {} except sub.CalledProcessError as e: CP.print_blue(f'PGD error {e.stderr}') graphlet_counts = {} except Exception as e: CP.print_blue(str(e)) graphlet_counts = {} else: # pgd is successfully run for line in output_data.split('\n')[:-1]: # last line blank graphlet_name, count = map(lambda st: st.strip(), line.split('=')) graphlet_counts[graphlet_name] = int(count) else: CP.print_red(f'PGD executable not found at {pgd_path}/pgd') graphlet_counts = {} self.stats['pgd_graphlet_counts'] = graphlet_counts return graphlet_counts
def _gen(self, gname: str, gen_id: int) -> nx.Graph: g = self.input_graph # fix BTER to use the directory.. CP.print_blue('Starting BTER...') graph_path = f'./src/bter/{g.name}_{self.trial}.mat' np.savetxt(graph_path, nx.to_numpy_matrix(g), fmt='%d') matlab_code = [ 'mex -largeArrayDims tricnt_mex.c;', 'mex -largeArrayDims ccperdegest_mex.c;', f"G = dlmread('{g.name}_{self.trial}.mat');", 'G = sparse(G);', f"graphname = '{g.name}_{self.trial}';", '', 'nnodes = size(G, 1);', 'nedges = nnz(G) / 2;', r"fprintf('nodes: %d edges: %d\n', nnodes, nedges);", '', 'nd = accumarray(nonzeros(sum(G,2)),1);', "maxdegree = find(nd>0,1,'last');", r"fprintf('Maximum degree: %d\n', maxdegree);", '', '[ccd,gcc] = ccperdeg(G);', r"fprintf('Global clustering coefficient: %.2f\n', gcc);", '', r"fprintf('Running BTER...\n');", 't1=tic;', '[E1,E2] = bter(nd,ccd);', 'toc(t1);', r"fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));", '', "fprintf('Turning edge list into adjacency matrix (including dedup)...');", 't2=tic;', 'G_bter = bter_edges2graph(E1,E2);', 'toc(t2);', r"fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2);", '', 'G_bter = full(G_bter);', r"dlmwrite('{}_{}_bter.mat', G_bter, ' ');".format( g.name, self.trial), 'quit;' ] matlab_code_filename = f'{g.name}_{self.trial}_code.m' matlab_code_path = f'./src/bter/{matlab_code_filename}' print('\n'.join(matlab_code), file=open(matlab_code_path, 'w')) output_path = f'./src/bter/{g.name}_{self.trial}_bter.mat' start_time = time() completed_process = sub.run( f'cd src/bter; cat {matlab_code_filename} | matlab -nosplash -nodesktop', shell=True, stdout=sub.DEVNULL, stderr=sub.DEVNULL) CP.print_blue(f'BTER ran in {round(time() - start_time, 3)} secs') if completed_process.returncode != 0 or not check_file_exists( output_path): CP.print_blue('BTER failed!') raise Exception('Generation failed!') else: bter_mat = np.loadtxt(output_path, dtype=int) g_bter = nx.from_numpy_matrix(bter_mat, create_using=nx.Graph()) g_bter.name = gname g_bter.gen_id = gen_id delete_files(graph_path, output_path, matlab_code_path) return g_bter
work_pool = mp.Pool(num) with mp.Pool(num) as read_pool: while filenames or graphs_list: if active_reads + pending_work + active_work <= num: if filenames: filename = filenames.pop(0) # take the first item active_reads += 1 read_pool.apply_async(load_graph, [filename], callback=read_update) # graphs_list.append(read_update(load_graph(filename))) for idx, graph in enumerate(graphs_list): active_work += 1 # work_update(parallel_thing(graph)) work_pool.apply_async(parallel_thing, [graph], callback=work_update) graphs_list.pop(idx) pending_work -= 1 else: for idx, graph in enumerate(graphs_list): active_work += 1 # work_update(parallel_thing(graph)) work_pool.apply_async(parallel_thing, [graph], callback=work_update) graphs_list.pop(idx) pending_work -= 1 ColorPrint.print_blue(f'Sleeping {active_reads}, {pending_work}, {active_work}') time.sleep(10) # wait until everything is off of the queue while active_work > 0: time.sleep(num) work_pool.close()
def run(self, use_pickle: bool) -> None: """ New runner - uses list of graphs :param use_pickle: :return: """ pickle_ext = '.pkl.gz' self.graphs = [] if use_pickle: if check_file_exists(self.graphs_pickle_path + pickle_ext): # the whole pickle exists graphs = load_pickle(self.graphs_pickle_path + pickle_ext) #assert len(graphs) == 21, f'Expected 21 graphs, found {len(graphs)}' assert len( graphs ) == self.num_generations + 1, f'Expected 21 graphs, found {len(graphs)}' CP.print_green( f'Using completed pickle at {self.graphs_pickle_path + pickle_ext!r}. Loaded {len(graphs)} graphs' ) return else: temp_file_pattern = re.compile( f'list_(\d+)_{self.trial}_temp_(\d+).pkl.gz') dir_name = '/'.join(self.graphs_pickle_path.split('/')[:-1]) input_files = [ f for f in os.listdir(dir_name) if re.match(temp_file_pattern, f) ] if len(input_files) > 0: assert len( input_files ) == 1, f'More than one matches found: {input_files}' input_file = input_files[0] total_generations, progress = map( int, temp_file_pattern.fullmatch(input_file).groups()) graphs = load_pickle(join(dir_name, input_file)) assert len( graphs ) == progress + 1, f'Found {len(graphs)}, expected: {progress}' CP.print_blue( f'Partial pickle found at {input_file!r} trial: {self.trial} progress: {progress}/{total_generations}' ) self.graphs = graphs remaining_generations = self.num_generations - len(self.graphs) tqdm.write( f'Running Infinity Mirror on {self.initial_graph.name!r} {self.initial_graph.order(), self.initial_graph.size()} {self.model.model_name!r} {remaining_generations} generations' ) pbar = tqdm(total=remaining_generations, bar_format='{l_bar}{bar}|[{elapsed}<{remaining}]', ncols=50) if len(self.graphs) == 0: self.initial_graph.level = 0 self.graphs = [self.initial_graph] self.features = [None] completed_trial = False for i in range(len(self.graphs) - 1, self.num_generations): if i == len(self.graphs) - 1: curr_graph = self.graphs[-1] # use the last graph level = i + 1 try: fit_time_start = time.perf_counter() self.model.update( new_input_graph=curr_graph) # update the model fit_time = time.perf_counter() - fit_time_start except Exception as e: fit_time = np.nan print(f'Model fit failed {e}') break try: gen_time_start = time.perf_counter() generated_graphs = self.model.generate( num_graphs=self.num_graphs, gen_id=level) # generate a new set of graphs gen_time = time.perf_counter() - gen_time_start except Exception as e: gen_time = np.nan print(f'Generation failed {e}') break if self.features: self.features.append(self.model.params) curr_graph = generated_graphs[ 0] # we are only generating one graph curr_graph.name = f'{self.initial_graph.name}_{level}_{self.trial}' curr_graph.gen = level self.graphs.append(curr_graph) temp_pickle_path = self.graphs_pickle_path + f'_temp_{level}{pickle_ext}' prev_temp_pickle_path = self.graphs_pickle_path + f'_temp_{level-1}{pickle_ext}' temp_features_path = self.graphs_features_path + f'_temp_{level}{pickle_ext}' prev_temp_features_path = self.graphs_features_path + f'_temp_{level-1}{pickle_ext}' save_pickle(obj=self.graphs, path=temp_pickle_path) save_pickle(obj=self.features, path=temp_features_path) delete_files(prev_temp_pickle_path) delete_files(prev_temp_features_path) self.write_timing_csv(iter_=level, fit_time=fit_time, gen_time=gen_time) if level == self.num_generations: completed_trial = True pbar.update(1) pbar.close() if completed_trial: # only delete the temp pickle if the trial finishes successfully delete_files( temp_pickle_path ) # delete the temp file if the loop finishes normally delete_files( temp_features_path ) # delete the temp file if the loop finishes normally CP.print_green( f'List of {len(self.graphs)} Graphs is pickled at "{self.graphs_pickle_path + pickle_ext}"' ) save_pickle(obj=self.graphs, path=self.graphs_pickle_path + pickle_ext) save_pickle(obj=self.features, path=self.graphs_features_path + pickle_ext) return