Пример #1
0
    def predict_and_compute(self):
        assert (self.predictor is not None)
        input_instances, output_graphs = self.manager.run()

        if len(output_graphs) > 0 and type(output_graphs[0]) == tuple:
            # ignore conllu graphs here
            output_graphs = [x[0] for x in output_graphs]

        input_graphs = [
            inst.fields['graph'].metadata for inst in input_instances
        ]
        input_sents = [
            inst.fields['src_tokens_str'].metadata for inst in input_instances
        ]

        if self.pred_args.save_pred_path is not None:
            # save serialized graphs to pkl file
            try:
                self.save_graphs([nx.adjacency_data(x) for x in input_graphs],
                                 [nx.adjacency_data(x) for x in output_graphs],
                                 self.pred_args.save_pred_path)
            except AttributeError:
                self.save_graphs(
                    [nx.adjacency_data(x) for x in input_graphs],
                    [nx.adjacency_data(x[0])
                     for x in output_graphs], self.pred_args.save_pred_path)

        return compute_s_metric(
            input_graphs,
            output_graphs,
            input_sents,
            semantics_only=self.semantics_only,
            drop_syntax=self.drop_syntax,
            #args,
            include_attribute_scores=self.include_attribute_scores)
Пример #2
0
def save_qrep(fn, cur_qrep):
    assert ".pkl" in fn
    qrep = copy.deepcopy(cur_qrep)
    qrep["join_graph"] = nx.adjacency_data(qrep["join_graph"])
    qrep["subset_graph"] = nx.adjacency_data(qrep["subset_graph"])

    with open(fn, "wb") as f:
        pickle.dump(qrep, f)
Пример #3
0
def parse_sql(sql,
              user,
              db_name,
              db_host,
              port,
              pwd,
              timeout=False,
              compute_ground_truth=True,
              subset_cache_dir="./subset_cache/"):
    '''
    @sql: sql query string.

    @ret: python dict with the keys:
        sql: original sql string
        join_graph: networkX graph representing query and its
        join_edges. Properties include:
            Nodes:
                - table
                - alias
                - predicate matches
            Edges:
                - join_condition

            Note: This is the only place where these strings will be stored.
            Each of the subplans will be represented by their nodes within
            the join_graph, and we can use these properties to reconstruct the
            appropriate query for each subplan.

        subset_graph: networkX graph representing each subplan as a node.

        Properties of each subplan will include all the cardinality data that
        will need to be computed:
            - true_count
            - pg_count
            - total_count
    '''
    start = time.time()
    join_graph = extract_join_graph(sql)
    subset_graph = generate_subset_graph(join_graph)

    print("query has", len(join_graph.nodes), "relations,",
          len(join_graph.edges), "joins, and", len(subset_graph),
          " possible subplans.", "took:",
          time.time() - start)

    ret = {}
    ret["sql"] = sql
    ret["join_graph"] = join_graph
    ret["subset_graph"] = subset_graph

    ret["join_graph"] = nx.adjacency_data(ret["join_graph"])
    ret["subset_graph"] = nx.adjacency_data(ret["subset_graph"])
    return ret
Пример #4
0
def write_nx_to_file(nx_graph, filename):

    print(f"writing networkx output file: {filename}")
    json_out = nx.adjacency_data(nx_graph)

    with open(filename, "w", encoding="utf-8") as f:
        json.dump(json_out, f)
Пример #5
0
    def from_ud_lines(cls, path):
        with open(path) as f1:
            lines = f1.readlines()

        graphs = {}
        for i, line in enumerate(lines):
            try:
                sent, tags = line.split("\t")
            except ValueError:
                pdb.set_trace()
            tags = tags.strip()
            tags = tags.split(",")
            sentence = sent
            empty_graph = nx.DiGraph()
            empty_graph.add_node(f"-root-0")
            empty_graph.nodes[f"-root-0"]['type'] = 'root'
            empty_graph.nodes[f"-root-0"]['domain'] = 'semantics'
            empty_graph.nodes[f"-root-0"]['frompredpatt'] = False
            empty_graph.nodes[f"-root-0"]['sentence'] = sentence
            empty_graph.nodes[f"-root-0"]['pos_tags'] = tags

            name = f"test_graph_{i}"
            graph_data = nx.adjacency_data(empty_graph)
            g = UDSSentenceGraph.from_dict(graph_data, name)
            graphs[name] = g

        return cls(graphs)
def ba_graph(name, n):
    print("***")
    # n = random.randint(10,15)
    G = nx.barabasi_albert_graph(
        n, 2
    )  #n:Number of nodes m:Number of edges to attach from a new node to existing nodes

    for l in G.edges():
        G.edges[l]["weight"] = random.randint(weight[0], weight[1])
        G.edges[l]["sp"] = 1
    mapping = dict(zip(G.nodes(), range(1, n + 1)))
    G1 = nx.relabel_nodes(G, mapping)  # nodes 1..26
    nx.draw(G1, with_labels=True, font_weight='bold')
    plt.savefig("graph_" + name + ".png")  # save as png
    plt.close()
    print(G1.nodes())
    print(G1.nodes().data())
    G1_nl_format = nx.node_link_data(G1)
    G1_ad_format = nx.adjacency_data(
        G1)  # returns the graph in a node-link format

    print("G1_nl_format:", G1_nl_format)
    print("G1_adj_format:", G1_ad_format)
    with open('topo_' + name + '.json', 'w') as json_file:
        json.dump(G1_ad_format, json_file)
    return G1
Пример #7
0
def fix_qrep(qrep):
    # json-ify the graphs
    qrep["subset_graph"] = nx.adjacency_data(
        nx.OrderedDiGraph(qrep["subset_graph"]))
    for nd in qrep["join_graph"].nodes(data=True):
        data = nd[1]
        for i, col in enumerate(data["pred_cols"]):
            # add pred related feature
            cmp_op = data["pred_types"][i]
            if cmp_op == "in" or \
                    "like" in cmp_op or \
                    cmp_op == "eq":
                val = data["pred_vals"][i]
                if isinstance(val, dict):
                    val = [val["literal"]]
                elif not hasattr(val, "__len__"):
                    val = [val]
                elif isinstance(val[0], dict):
                    val = val[0]["literal"]
                val = set(val)
                data["pred_vals"][i] = val
    qrep["join_graph"] = nx.adjacency_data(qrep["join_graph"])
Пример #8
0
 def save_file(self, path: str, file_type: str):
     logger.debug(locals())
     if not self.graph:
         raise ValueError("No graph to save!")
     if "json" in file_type.lower():
         if "node link graph" in file_type.lower():
             with pathlib.Path(path).open("w") as file_p:
                 data = networkx.node_link_data(self.graph)
                 json.dump(data, file_p)
                 del data
         elif "adjacency graph" in file_type.lower():
             with pathlib.Path(path).open("w") as file_p:
                 data = networkx.adjacency_data(self.graph)
                 json.dump(data, file_p)
                 del data
         else:
             raise NotImplementedError()
     else:
         raise NotImplementedError()
Пример #9
0
    def from_single_line(cls, line):
        def tokenize(sent):
            # TODO: real tokenization here
            return sent.split(" ")

        if type(line) == str:
            lines = [line]
        else:
            lines = line

        graphs = {}
        for i, line in enumerate(lines):
            sentence = line.strip()
            empty_graph = nx.DiGraph()
            empty_graph.add_node(f"test-root-0")
            empty_graph.nodes[f"test-root-0"]['type'] = 'root'
            empty_graph.nodes[f"test-root-0"]['domain'] = 'semantics'
            empty_graph.nodes[f"test-root-0"]['frompredpatt'] = False
            empty_graph.nodes[f"test-root-0"]['sentence'] = sentence
            for i, node_name in enumerate(tokenize(sentence)):
                empty_graph.add_node(f"test-syntax-{i+1}")
                empty_graph.nodes[f"test-syntax-{i+1}"]["form"] = node_name
                empty_graph.nodes[f"test-syntax-{i+1}"]["domain"] = 'syntax'
                empty_graph.nodes[f"test-syntax-{i+1}"]["type"] = 'token'
                empty_graph.nodes[f"test-syntax-{i+1}"]["position"] = i + 1
            name = f"test_graph_{i}"
            graph_data = nx.adjacency_data(empty_graph)
            g = UDSSentenceGraph.from_dict(graph_data, name)
            for node in g.nodes:
                if 'type' not in g.nodes[node].keys():
                    g.nodes[node]['type'] = None
                if 'domain' not in g.nodes[node].keys():
                    g.nodes[node]['domain'] = 'syntax'

            g.nodes[f"test-root-0"]['type'] = 'root'
            g.nodes[f"test-root-0"]['sentence'] = sentence
            g.nodes[f"test-root-0"]['domain'] = "semantics"

            graphs[name] = g

        return cls(graphs)
Пример #10
0
 def serialize(self):
     return nx.adjacency_data(self.arbor_graph)
Пример #11
0
    def to_dict(self) -> Dict:
        """Convert the graph to a dictionary"""

        return adjacency_data(self.graph)
Пример #12
0
def before_request():
    # store empty graph
    if 'graph' not in session:
        session['graph'] = nx.adjacency_data(nx.Graph())
Пример #13
0
def setSessionGraph(graph):
    session['graph'] = nx.adjacency_data(graph)
Пример #14
0
import torch
import os.path as osp
import torch.nn.functional as F
from torch.nn import ModuleList
from torch_geometric.datasets import KarateClub
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv  # noq
from torch_geometric.utils import convert
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import networkx as nx

G = nx.karate_club_graph()
adj = nx.adjacency_data(G)

colors = [
    '#ffc0cb', '#bada55', '#008080', '#420420', '#7fe5f0', '#065535', '#ffd700'
]
real_label = []
label = [
    0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1
]

for c in adj['nodes']:
    if c['club'] == 'Mr. Hi':
        real_label += [0]
    else:
        real_label += [1]
Пример #15
0
import networkx as nx
import matplotlib.pyplot as plt

G = nx.Graph()
G.add_edge("a", "b", weight=0.31)
G.add_edge("q", "b", weight=0.50)

print(G.get_edge_data("a", "b"))

nx.complete_graph(3)
print(nx.adjacency_data(G))

nx.draw(G, with_labels=True, with_weights=True)

plt.show()
Пример #16
0
def parse_sql(sql,
              user,
              db_name,
              db_host,
              port,
              pwd,
              timeout=False,
              compute_ground_truth=True,
              subset_cache_dir="./subset_cache/"):
    '''
    @sql: sql query string.

    @ret: python dict with the keys:
        sql: original sql string
        join_graph: networkX graph representing query and its
        join_edges. Properties include:
            Nodes:
                - table
                - alias
                # FIXME: matches, or separate it out into ops AND predicates
                - matches
            Edges:
                - join_condition

            Note: This is the only place where these strings will be stored.
            Each of the subqueries will be represented by their nodes within
            the join_graph, and we can use these properties to reconstruct the
            appropriate query for the subsets.

        subset_graph: networkX graph representing each subquery.
        Properties include all the ground truth data that will need to be
        computed:
            - true_count
            - pg_count
            - total_count
    '''
    start = time.time()
    join_graph = extract_join_graph(sql)
    subset_graph = generate_subset_graph(join_graph)

    print("query has", len(join_graph.nodes), "relations,",
          len(join_graph.edges), "joins, and", len(subset_graph),
          " possible subsets.", "took:",
          time.time() - start)

    ret = {}
    ret["sql"] = sql
    ret["join_graph"] = join_graph
    ret["subset_graph"] = subset_graph

    if not compute_ground_truth:
        ret["join_graph"] = nx.adjacency_data(ret["join_graph"])
        ret["subset_graph"] = nx.adjacency_data(ret["subset_graph"])
        return ret

    assert user is not None
    make_dir(subset_cache_dir)
    subset_cache_file = subset_cache_dir + get_subset_cache_name(sql)
    # we should check and see which cardinalities of the subset graph
    # we already know. Note thate we have to cache at this level because
    # the maximal matching might make arbitrary choices each time.
    with shelve.open(subset_cache_file) as cache:
        if sql in cache:
            currently_stored = cache[sql]
        else:
            currently_stored = {}

    unknown_subsets = subset_graph.copy()
    unknown_subsets = unknown_subsets.subgraph(subset_graph.nodes -
                                               currently_stored.keys())

    print(len(unknown_subsets.nodes), "/", len(subset_graph.nodes),
          "subsets still unknown (", len(currently_stored), "known )")

    # let us update the ground truth values
    edges = get_optimal_edges(unknown_subsets)
    paths = list(reconstruct_paths(edges))
    for p in paths:
        for el1, el2 in zip(p, p[1:]):
            assert len(el1) > len(el2)

    # ensure the paths we constructed cover every possible path
    sanity_check_unknown_subsets = unknown_subsets.copy()
    for n1, n2 in edges.items():
        if n1 in sanity_check_unknown_subsets.nodes:
            sanity_check_unknown_subsets.remove_node(n1)
        if n2 in sanity_check_unknown_subsets.nodes:
            sanity_check_unknown_subsets.remove_node(n2)

    assert len(sanity_check_unknown_subsets.nodes) == 0

    subset_sqls = []

    for path in paths:
        join_order = [tuple(sorted(x)) for x in path_to_join_order(path)]
        join_order.reverse()
        sql_to_exec = nodes_to_sql(join_order, join_graph)
        if compute_ground_truth:
            prefix = "explain (analyze, timing off, format json) "
        else:
            prefix = "explain (analyze off, timing off, format json) "
        sql_to_exec = prefix + sql_to_exec
        subset_sqls.append(sql_to_exec)

    print("computing all", len(unknown_subsets),
          "unknown subset cardinalities with", len(subset_sqls), "queries")

    pre_exec_sqls = []

    # TODO: if we use the min #queries approach, maybe greedy approach and
    # letting pg choose join order is better?
    pre_exec_sqls.append("set join_collapse_limit to 1")
    pre_exec_sqls.append("set from_collapse_limit to 1")
    if timeout:
        pre_exec_sqls.append("set statement_timeout = {}".format(timeout))

    sanity_check_unknown_subsets = unknown_subsets.copy()
    for idx, path_sql in enumerate(bar(subset_sqls)):
        res = execute_query(path_sql, user, db_host, port, pwd, db_name,
                            pre_exec_sqls)
        if res is None:
            print("Query failed to execute, ignoring.")
            breakpoint()
            continue

        plan = res[0][0][0]
        plan_tree = plan["Plan"]
        results = list(analyze_plan(plan_tree))
        for result in results:
            # this assertion is invalid because PG may choose to use an implicit join predicate,
            # for example, if a.c1 = b.c1 and b.c1 = c.c1, then PG may choose to join on a.c1 = c.c1
            # assert nx.is_connected(join_graph.subgraph(result["aliases"])), (result["aliases"], plan_tree)
            aliases_key = tuple(sorted(result["aliases"]))
            if compute_ground_truth:
                currently_stored[aliases_key] = {
                    "expected": result["expected"],
                    "actual": result["actual"]
                }
            else:
                currently_stored[aliases_key] = {
                    "expected": result["expected"]
                }

            if aliases_key in sanity_check_unknown_subsets.nodes:
                sanity_check_unknown_subsets.remove_node(aliases_key)

        if idx % 5 == 0:
            with shelve.open(subset_cache_file) as cache:
                cache[sql] = currently_stored

    print(len(currently_stored), "total subsets now known")

    assert len(sanity_check_unknown_subsets.nodes) == 0

    with shelve.open(subset_cache_file) as cache:
        cache[sql] = currently_stored

    for node in subset_graph.nodes:
        subset_graph.nodes[node]["cardinality"] = currently_stored[node]

    print("total time:", time.time() - start)

    # json-ify the graphs
    ret["join_graph"] = nx.adjacency_data(ret["join_graph"])
    ret["subset_graph"] = nx.adjacency_data(ret["subset_graph"])

    return ret