示例#1
0
def convert_huffner():
    # Define some directories-of-interest paths
    original_dir = Path('.') / 'data' / 'original'
    preprocessed_dir = Path('.') / 'data' / 'preprocessed'

    # Huffner files we don't preprocess
    blacklist = ['aa12', 'j12', 'j27']

    # Identify the Huffner data
    data_names = sorted(
        filter(lambda n: n not in blacklist,
               names_in_dir(original_dir / 'huffner', '.graph')))
    print('Identified {} Huffner files'.format(len(data_names)))

    # Convert datasets
    for dataset in data_names:
        print('Processing', dataset)
        start_time = time.time()

        # Process the graph
        graph = read_huffner(original_dir / 'huffner', dataset)
        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, preprocessed_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        total_time = time.time() - start_time
        print('Preprocessing `{}` took {} seconds'.format(
            dataset, round(total_time, 1)))
        # Write the results
        graph = reset_labels(graph)
        write_summary(graph, preprocessed_dir / 'summary', 'huffner.csv')
        write_oct_set(graph, oct_set, preprocessed_dir / 'oct')
        write_name_lookup(graph, preprocessed_dir / 'lookup')
        write_edgelist(graph, preprocessed_dir / 'edgelist')
        write_huffner(graph, preprocessed_dir / 'huffner')
        write_snap(graph, preprocessed_dir / 'snap')
    print('Preprocessed Huffner data')
示例#2
0
def convert_select_gka(data_names):
    # Define some directories-of-interest paths
    original_dir = Path('.') / 'data' / 'original'
    preprocessed_dir = Path('.') / 'data' / 'preprocessed'

    # Remove the old statistics CSV
    if Path(preprocessed_dir / 'summary' / 'gka.csv').is_file():
        Path(preprocessed_dir / 'summary' / 'gka.csv').unlink()

    # Convert datasets
    for dataset in data_names:
        print('Processing', dataset)
        start_time = time.time()

        # Process the graph
        graph = read_beasley(original_dir / 'gka', dataset)
        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, preprocessed_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        # Write the results
        total_time = time.time() - start_time
        print('Preprocessing `{}` took {} seconds'.format(
            dataset, round(total_time, 1)))
        graph = reset_labels(graph)
        write_summary(graph, preprocessed_dir / 'summary', 'gka.csv')
        write_oct_set(graph, oct_set, preprocessed_dir / 'oct')
        write_name_lookup(graph, preprocessed_dir / 'lookup')
        write_edgelist(graph, preprocessed_dir / 'edgelist')
        write_huffner(graph, preprocessed_dir / 'huffner')
        write_snap(graph, preprocessed_dir / 'snap')
    print('Preprocessed GKA data')
示例#3
0
def _generate_to(qubo, seed, oct_upper_bound, bias=0.5):
    """
    Given a QUBO, an upper bound on oct, and a bias of bipartite vertices,
    generate an Erdos-Renyi graph such that oct_upper_bound number of vertices
    form an OCT set and the remaining vertices are partitioned into partites
    (left partite set with probability of "bias"). Edges between the partite
    sets are then removed.
    """
    # Compute parameters needed for ER
    n = qubo.order()
    p = qubo.size() / scipy.special.binom(n, 2)
    # Generate graph
    graph = nx.erdos_renyi_graph(n=n, p=p, seed=seed)
    random.seed(seed)
    # Compute partite sets on the remaining vertices
    nodes = list(graph.nodes())[oct_upper_bound:]
    partite1 = set()
    partite2 = set()
    for node in nodes:
        if random.random() < bias:
            partite1.add(node)
        else:
            partite2.add(node)
    # Remove edges within a partite set
    for edge in chain(combinations(partite1, 2), combinations(partite2, 2)):
        if graph.has_edge(*edge):
            graph.remove_edge(*edge)
    # Name the graph
    graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'to', seed)
    # Sanitize the graph and return
    graph = reset_labels(graph)
    return graph
示例#4
0
def _sanitize_select_gka(original_dir, sanitized_dir, data_names):
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_beasley(original_dir / 'gka', dataset + '.txt')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Preprocessed GKA data')
示例#5
0
def _sanitize_select_beasley(original_dir, sanitized_dir, data_names):
    """
    Sanitize select graphs in the origina/beasley/ directory.
    """
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_beasley(original_dir / 'beasley', dataset + '.txt')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Preprocessed Beasley data')
示例#6
0
def _generate_cl(qubo, seed):
    """Generate a Chung-Lu graph that matches a graph's degree distriubtion"""
    # Compute the parameters needed for CL
    degree_distribution = sorted([qubo.degree(node) for node in qubo.nodes()])
    # Generate graph
    graph = nx.expected_degree_graph(w=degree_distribution,
                                     selfloops=False,
                                     seed=seed)
    # Name the graph
    graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'cl', seed)
    # Sanitize the graph and return
    graph = reset_labels(graph)
    return graph
示例#7
0
def _generate_ba(qubo, seed):
    """Generate Barabasi-Albert graph such that each new edge has 'edge
        density' neighbors"""
    # Compute the parameters needed for BA
    n = qubo.order()
    m = math.ceil(qubo.size() / n)
    # Generate graph
    graph = nx.barabasi_albert_graph(n=n, m=m, seed=seed)
    # Name the graph
    graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'ba', seed)
    # Sanitize the graph and return
    graph = reset_labels(graph)
    return graph
示例#8
0
def _generate_er(qubo, seed):
    """
    Given a QUBO, generate an Erdos-Renyi graph matching the number of
    vertices and edges (in expectation)
    """
    # Compute parameters needed for model
    n = qubo.order()
    p = qubo.size() / scipy.special.binom(n, 2)
    # Generate graph
    graph = nx.erdos_renyi_graph(n=n, p=p, seed=seed)
    # Name the graph
    graph.graph['name'] = '{}-{}-{}'.format(qubo.graph['name'], 'er', seed)
    # Sanitize the graph and return
    graph = reset_labels(graph)
    return graph
示例#9
0
def _sanitize_huffner(original_dir, sanitized_dir):
    """
    Sanitize all graphs in the original/huffner/ directory.
    """
    # Identify the Huffner data
    data_names = sorted(names_in_dir(original_dir / 'huffner', '.graph'))
    print('Identified {} Huffner files'.format(len(data_names)))

    # Convert datasets
    for dataset in data_names:
        # Sanitize the graph and write
        print('Sanitizing', dataset)
        graph = read_huffner(original_dir / 'huffner', dataset + '.graph')
        graph = reset_labels(graph)
        write_edgelist(graph, sanitized_dir / 'edgelist')
        write_huffner(graph, sanitized_dir / 'huffner')
        write_snap(graph, sanitized_dir / 'snap')
    print('Sanitized Huffner data')
def _convert_quantum(data_names):
    # Define some directories-of-interest paths
    input_dir = Path('.') / 'data' / 'sanitized'
    output_dir = Path('.') / 'data' / 'preprocessed'

    # Remove the old statistics CSV
    summary_dir = Path(output_dir / 'summary')
    summary_filename = summary_dir / 'quantum.csv'
    if summary_filename.is_file():
        Path(summary_filename).unlink()
    else:
        summary_dir.mkdir(exist_ok=True, parents=True)

    _write_summary_header(summary_filename)

    # Convert datasets
    for dataset in data_names:
        timestamp = datetime.\
                    datetime.\
                    fromtimestamp(time.time()).strftime('%Y/%m/%d-%H:%M:%S:')
        print('{} Processing {}'.format(timestamp, dataset))

        # Process the graph
        graph = read_edgelist(input_dir / 'edgelist', dataset)
        graph = reset_labels(graph)
        graph.graph['original_vertices'] = graph.order()
        graph.graph['original_edges'] = graph.size()

        oct_set = set()
        graph_reduced = True
        while graph_reduced:
            # Require a change for graph_reduced to be triggered again
            graph_reduced = False

            # Compute OCT reductions
            print("- Computing OCT reduction")
            graph = reset_labels(graph)
            changed, graph, oct_set = oct_reductions(graph, oct_set)

            if changed:
                print("-- OCT reduced graph")
                graph_reduced = True

            # Compute
            print("- Computing VC reduction")
            graph = reset_labels(graph)
            write_snap(graph, output_dir / 'snap')
            changed, graph, oct_set = vc_reductions(graph, oct_set)
            if changed:
                print("-- VC reduced graph")
                graph_reduced = True

        # Write the results
        graph = reset_labels(graph)
        _write_summary(graph, output_dir / 'summary', 'quantum.csv')
        _write_oct_set(graph, oct_set, output_dir / 'oct')
        _write_name_lookup(graph, output_dir / 'lookup')
        write_edgelist(graph, output_dir / 'edgelist')
        write_huffner(graph, output_dir / 'huffner')
        write_snap(graph, output_dir / 'snap')
    print('Finished preprocessing quantum data')
示例#11
0
    # Obtain the names of the quantum graphs already sanitized
    datasets = names_in_dir(input_dir, '.edgelist')
    # Keep only the non-synthetic data
    datasets = sorted(list(filter(lambda x: '-' not in x, datasets)))

    # Read in the pre-computed optimal OCT sizes
    oct_upper_bound = _populate_oct_upper_bound_lookup()

    # For every dataset and seed, generate a synthetic graph with each model
    for dataset, seed in product(datasets, args.seeds):
        print('For {} and seed {}'.format(dataset, seed))
        # Generate the sanitized ER random graph
        print('- Generating Erdos-Renyi')
        graph = read_edgelist(input_dir, dataset + '.edgelist')
        er_graph = _generate_er(graph, seed)
        reset_labels(er_graph)
        # Write the graph
        write_edgelist(er_graph, sanitized_dir / 'edgelist')
        write_huffner(er_graph, sanitized_dir / 'huffner')
        write_snap(er_graph, sanitized_dir / 'snap')

        # Generate the sanitized CL random graph
        print('- Generating Chung-Lu')
        graph = read_edgelist(input_dir, dataset + '.edgelist')
        cl_graph = _generate_cl(graph, seed)
        reset_labels(cl_graph)
        # Write the graph
        write_edgelist(cl_graph, sanitized_dir / 'edgelist')
        write_huffner(cl_graph, sanitized_dir / 'huffner')
        write_snap(cl_graph, sanitized_dir / 'snap')