def reduce_graph(graph, oct_reductions, vc_reductions): # Run each type of reduction at least once changed = oct_reductions() changed = vc_reductions() # Reduce until no changes while changed: changed = oct_reductions() if changed: changed = vc_reductions() return graph
def convert_huffner(): # Define some directories-of-interest paths original_dir = Path('.') / 'data' / 'original' preprocessed_dir = Path('.') / 'data' / 'preprocessed' # Huffner files we don't preprocess blacklist = ['aa12', 'j12', 'j27'] # Identify the Huffner data data_names = sorted( filter(lambda n: n not in blacklist, names_in_dir(original_dir / 'huffner', '.graph'))) print('Identified {} Huffner files'.format(len(data_names))) # Convert datasets for dataset in data_names: print('Processing', dataset) start_time = time.time() # Process the graph graph = read_huffner(original_dir / 'huffner', dataset) oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, preprocessed_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True total_time = time.time() - start_time print('Preprocessing `{}` took {} seconds'.format( dataset, round(total_time, 1))) # Write the results graph = reset_labels(graph) write_summary(graph, preprocessed_dir / 'summary', 'huffner.csv') write_oct_set(graph, oct_set, preprocessed_dir / 'oct') write_name_lookup(graph, preprocessed_dir / 'lookup') write_edgelist(graph, preprocessed_dir / 'edgelist') write_huffner(graph, preprocessed_dir / 'huffner') write_snap(graph, preprocessed_dir / 'snap') print('Preprocessed Huffner data')
def convert_select_gka(data_names): # Define some directories-of-interest paths original_dir = Path('.') / 'data' / 'original' preprocessed_dir = Path('.') / 'data' / 'preprocessed' # Remove the old statistics CSV if Path(preprocessed_dir / 'summary' / 'gka.csv').is_file(): Path(preprocessed_dir / 'summary' / 'gka.csv').unlink() # Convert datasets for dataset in data_names: print('Processing', dataset) start_time = time.time() # Process the graph graph = read_beasley(original_dir / 'gka', dataset) oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, preprocessed_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True # Write the results total_time = time.time() - start_time print('Preprocessing `{}` took {} seconds'.format( dataset, round(total_time, 1))) graph = reset_labels(graph) write_summary(graph, preprocessed_dir / 'summary', 'gka.csv') write_oct_set(graph, oct_set, preprocessed_dir / 'oct') write_name_lookup(graph, preprocessed_dir / 'lookup') write_edgelist(graph, preprocessed_dir / 'edgelist') write_huffner(graph, preprocessed_dir / 'huffner') write_snap(graph, preprocessed_dir / 'snap') print('Preprocessed GKA data')
def _convert_quantum(data_names): # Define some directories-of-interest paths input_dir = Path('.') / 'data' / 'sanitized' output_dir = Path('.') / 'data' / 'preprocessed' # Remove the old statistics CSV summary_dir = Path(output_dir / 'summary') summary_filename = summary_dir / 'quantum.csv' if summary_filename.is_file(): Path(summary_filename).unlink() else: summary_dir.mkdir(exist_ok=True, parents=True) _write_summary_header(summary_filename) # Convert datasets for dataset in data_names: timestamp = datetime.\ datetime.\ fromtimestamp(time.time()).strftime('%Y/%m/%d-%H:%M:%S:') print('{} Processing {}'.format(timestamp, dataset)) # Process the graph graph = read_edgelist(input_dir / 'edgelist', dataset) graph = reset_labels(graph) graph.graph['original_vertices'] = graph.order() graph.graph['original_edges'] = graph.size() oct_set = set() graph_reduced = True while graph_reduced: # Require a change for graph_reduced to be triggered again graph_reduced = False # Compute OCT reductions print("- Computing OCT reduction") graph = reset_labels(graph) changed, graph, oct_set = oct_reductions(graph, oct_set) if changed: print("-- OCT reduced graph") graph_reduced = True # Compute print("- Computing VC reduction") graph = reset_labels(graph) write_snap(graph, output_dir / 'snap') changed, graph, oct_set = vc_reductions(graph, oct_set) if changed: print("-- VC reduced graph") graph_reduced = True # Write the results graph = reset_labels(graph) _write_summary(graph, output_dir / 'summary', 'quantum.csv') _write_oct_set(graph, oct_set, output_dir / 'oct') _write_name_lookup(graph, output_dir / 'lookup') write_edgelist(graph, output_dir / 'edgelist') write_huffner(graph, output_dir / 'huffner') write_snap(graph, output_dir / 'snap') print('Finished preprocessing quantum data')