示例#1
0
def convert_huffner():
    # Define some directories-of-interest paths
    original_dir = Path('.') / 'data' / 'original'
    preprocessed_dir = Path('.') / 'data' / 'preprocessed'

    # Huffner files we don't preprocess
    blacklist = ['aa12', 'j12', 'j27']

    # Identify the Huffner data
    data_names = sorted(
        filter(lambda n: n not in blacklist,
               names_in_dir(original_dir / 'huffner', '.graph')))
    print('Identified {} Huffner files'.format(len(data_names)))

    # Convert datasets
    for dataset in data_names:
        print('Processing', dataset)
        start_time = time.time()

        # Process the graph
        graph = read_huffner(original_dir / 'huffner', dataset)
        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, preprocessed_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        total_time = time.time() - start_time
        print('Preprocessing `{}` took {} seconds'.format(
            dataset, round(total_time, 1)))
        # Write the results
        graph = reset_labels(graph)
        write_summary(graph, preprocessed_dir / 'summary', 'huffner.csv')
        write_oct_set(graph, oct_set, preprocessed_dir / 'oct')
        write_name_lookup(graph, preprocessed_dir / 'lookup')
        write_edgelist(graph, preprocessed_dir / 'edgelist')
        write_huffner(graph, preprocessed_dir / 'huffner')
        write_snap(graph, preprocessed_dir / 'snap')
    print('Preprocessed Huffner data')
示例#2
0
def _sanitize_select_gka(original_dir, sanitized_dir, data_names):
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_beasley(original_dir / 'gka', dataset + '.txt')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Preprocessed GKA data')
示例#3
0
def convert_select_gka(data_names):
    # Define some directories-of-interest paths
    original_dir = Path('.') / 'data' / 'original'
    preprocessed_dir = Path('.') / 'data' / 'preprocessed'

    # Remove the old statistics CSV
    if Path(preprocessed_dir / 'summary' / 'gka.csv').is_file():
        Path(preprocessed_dir / 'summary' / 'gka.csv').unlink()

    # Convert datasets
    for dataset in data_names:
        print('Processing', dataset)
        start_time = time.time()

        # Process the graph
        graph = read_beasley(original_dir / 'gka', dataset)
        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, preprocessed_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        # Write the results
        total_time = time.time() - start_time
        print('Preprocessing `{}` took {} seconds'.format(
            dataset, round(total_time, 1)))
        graph = reset_labels(graph)
        write_summary(graph, preprocessed_dir / 'summary', 'gka.csv')
        write_oct_set(graph, oct_set, preprocessed_dir / 'oct')
        write_name_lookup(graph, preprocessed_dir / 'lookup')
        write_edgelist(graph, preprocessed_dir / 'edgelist')
        write_huffner(graph, preprocessed_dir / 'huffner')
        write_snap(graph, preprocessed_dir / 'snap')
    print('Preprocessed GKA data')
示例#4
0
def _sanitize_select_beasley(original_dir, sanitized_dir, data_names):
    """
    Sanitize select graphs in the origina/beasley/ directory.
    """
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_beasley(original_dir / 'beasley', dataset + '.txt')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Preprocessed Beasley data')
示例#5
0
def _sanitize_huffner(original_dir, sanitized_dir):
    """
    Sanitize all graphs in the original/huffner/ directory.
    """
    # Identify the Huffner data
    data_names = sorted(names_in_dir(original_dir / 'huffner', '.graph'))
    print('Identified {} Huffner files'.format(len(data_names)))

    # Convert datasets
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_huffner(original_dir / 'huffner', dataset + '.graph')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Sanitized Huffner data')
def _convert_quantum(data_names):
    # Define some directories-of-interest paths
    input_dir = Path('.') / 'data' / 'sanitized'
    output_dir = Path('.') / 'data' / 'preprocessed'

    # Remove the old statistics CSV
    summary_dir = Path(output_dir / 'summary')
    summary_filename = summary_dir / 'quantum.csv'
    if summary_filename.is_file():
        Path(summary_filename).unlink()
    else:
        summary_dir.mkdir(exist_ok=True, parents=True)

    _write_summary_header(summary_filename)

    # Convert datasets
    for dataset in data_names:
        timestamp = datetime.\
                    datetime.\
                    fromtimestamp(time.time()).strftime('%Y/%m/%d-%H:%M:%S:')
        print('{} Processing {}'.format(timestamp, dataset))

        # Process the graph
        graph = read_edgelist(input_dir / 'edgelist', dataset)
        graph = reset_labels(graph)
        graph.graph['original_vertices'] = graph.order()
        graph.graph['original_edges'] = graph.size()

        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, output_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        # Write the results
        graph = reset_labels(graph)
        _write_summary(graph, output_dir / 'summary', 'quantum.csv')
        _write_oct_set(graph, oct_set, output_dir / 'oct')
        _write_name_lookup(graph, output_dir / 'lookup')
        write_edgelist(graph, output_dir / 'edgelist')
        write_huffner(graph, output_dir / 'huffner')
        write_snap(graph, output_dir / 'snap')
    print('Finished preprocessing quantum data')
示例#7
0
    # Keep only the non-synthetic data
    datasets = sorted(list(filter(lambda x: '-' not in x, datasets)))

    # Read in the pre-computed optimal OCT sizes
    oct_upper_bound = _populate_oct_upper_bound_lookup()

    # For every dataset and seed, generate a synthetic graph with each model
    for dataset, seed in product(datasets, args.seeds):
        print('For {} and seed {}'.format(dataset, seed))
        # Generate the sanitized ER random graph
        print('- Generating Erdos-Renyi')
        graph = read_edgelist(input_dir, dataset + '.edgelist')
        er_graph = _generate_er(graph, seed)
        reset_labels(er_graph)
        # Write the graph
        write_edgelist(er_graph, sanitized_dir / 'edgelist')
        write_huffner(er_graph, sanitized_dir / 'huffner')
        write_snap(er_graph, sanitized_dir / 'snap')

        # Generate the sanitized CL random graph
        print('- Generating Chung-Lu')
        graph = read_edgelist(input_dir, dataset + '.edgelist')
        cl_graph = _generate_cl(graph, seed)
        reset_labels(cl_graph)
        # Write the graph
        write_edgelist(cl_graph, sanitized_dir / 'edgelist')
        write_huffner(cl_graph, sanitized_dir / 'huffner')
        write_snap(cl_graph, sanitized_dir / 'snap')

        # Generate the sanitized BA random graph
        print('- Generating Barabasi-Albert')