Exemplo n.º 1
0
    def from_json(obj: dict, ont: Ontology) -> 'SSD':
        g = Graph(True, True, True)
        node2attr = {x['node']: x['attribute'] for x in obj['mappings']}
        idmap = {}
        raw_attributes = {}
        for raw_attr in obj['attributes']:
            assert len(raw_attr['columnIds']
                       ) == 1 and raw_attr['columnIds'][0] == raw_attr['id']
            raw_attributes[raw_attr['id']] = raw_attr

        attrs = []
        for n in obj['semanticModel']['nodes']:
            if n['type'] == 'DataNode':
                node_type = GraphNodeType.DATA_NODE
                attr = raw_attributes[node2attr[n['id']]]
                n_lbl = attr['name']
                attrs.append(SSDAttribute(n['id'], n_lbl))
            else:
                node_type = GraphNodeType.CLASS_NODE
                n_lbl = n['prefix'] + n['label']
                n_lbl = ont.simplify_uri(n_lbl)

            idmap[n['id']] = g.add_new_node(node_type, n_lbl.encode()).id

        for e in obj['semanticModel']['links']:
            e_lbl = e['prefix'] + e['label']
            e_lbl = ont.simplify_uri(e_lbl)
            g.add_new_link(GraphLinkType.UNSPECIFIED, e_lbl.encode(),
                           idmap[e['source']], idmap[e['target']])

        return SSD(obj['name'], attrs, g, ont)
Exemplo n.º 2
0
 def mask_dnode(self, g: Graph) -> Graph:
     """deprecated"""
     g2 = Graph(True, True, True, g.get_n_nodes(), g.get_n_links())
     for n in g.iter_nodes():
         assert g2.add_new_node(
             n.type, n.label if n.type == GraphNodeType.CLASS_NODE else
             b"DataNode").id == n.id
     for e in g.iter_links():
         assert g2.add_new_link(e.type, e.label, e.source_id,
                                e.target_id).id == e.id
     return g2
Exemplo n.º 3
0
 def to_graph(self) -> Graph:
     g = Graph(
         index_node_type=True,
         index_node_label=True,
         index_link_label=True,
         estimated_n_nodes=self.get_n_nodes(),
         estimated_n_links=self.get_n_links(),
         name=self.name)
     for n in self.iter_nodes():
         g.add_new_node(n.type, n.label)
     for e in self.iter_links():
         g.add_new_link(e.type, e.label, e.source_id, e.target_id)
     return g
Exemplo n.º 4
0
def generate_children(root, opened, closed, algorithm):
    children = []
    for position in root.dots:
        # touching same position twice is redundant as it gives the same state
        if position == root.touched:
            continue
        child = Graph(root.n, root.max_d, root.max_l, root.state)
        child.touch(position)

        if not is_in_opened_closed_lists(child, opened, closed, algorithm,
                                         root):
            child.depth = root.depth + 1
            child.parent = root
            children.append(child)
    return children
Exemplo n.º 5
0
    def clear_serene_footprint(self, remove_unknown: bool = True) -> 'SSD':
        g = Graph(True, True, True)
        idmap = {}

        serene_all = None
        serene_unknown = None
        for n in self.graph.iter_nodes():
            if n.label == b"serene:All":
                serene_all = n
                continue

            if n.label == b"serene:Unknown":
                serene_unknown = n
                continue

        ignore_nodes = set()
        if serene_all is not None:
            ignore_nodes.add(serene_all.id)

        if remove_unknown and serene_unknown is not None:
            ignore_nodes.add(serene_unknown.id)
            for e in self.graph.iter_links():
                if e.source_id == serene_unknown.id:
                    assert e.get_target_node().is_data_node()
                    ignore_nodes.add(e.target_id)

        if len(ignore_nodes) == 0:
            # no serene footprint to remove
            return self

        for n in self.graph.iter_nodes():
            if n.id in ignore_nodes:
                continue

            idmap[n.id] = g.add_new_node(n.type, n.label).id
        for e in self.graph.iter_links():
            if e.label == b"serene:connect":
                continue
            if remove_unknown and e.label == b"serene:unknown":
                continue
            g.add_new_link(e.type, e.label, idmap[e.source_id],
                           idmap[e.target_id])

        self.graph = g
        return self
Exemplo n.º 6
0
def make_ssd(sm: SemanticModel, keys: Set[str], ont: Ontology) -> SSD:
    attrs = {}
    for attr in sm.attrs:
        # new_lbl = attr.label.replace(Schema.PATH_DELIMITER, ".")
        new_lbl = attr.label
        attrs[attr.id] = SSDAttribute(attr.id, new_lbl)
        assert new_lbl in keys

    g = Graph()
    for n in sm.graph.iter_nodes():
        if n.is_data_node():
            label = attrs[n.id].name.encode()
        else:
            label = n.label
        g.add_new_node(n.type, label)
    for e in sm.graph.iter_links():
        g.add_new_link(e.type, e.label, e.source_id, e.target_id)
    return SSD(sm.id, list(attrs.values()), g, ont)
Exemplo n.º 7
0
    def apply_cmds(self, tbl: DataTable) -> SemanticModel:
        g = Graph(index_node_type=True,
                  index_node_label=True,
                  index_link_label=True,
                  name=tbl.id.encode("utf-8"))
        attrs: List[Attribute] = []
        id_map: Dict[str, int] = {}

        for cmd in self.commands:
            if isinstance(cmd, PyTransformNewColumnCmd):
                # TODO: fix me! currently the new attr_path is generated from first input_attr_path
                # we should be explicitly about the output, since the first input attr path can be different
                # may be it should be the deepest attr path
                new_attr_path = Schema.PATH_DELIMITER.join(
                    cmd.input_attr_paths[0].split(Schema.PATH_DELIMITER)[:-1] +
                    [cmd.new_attr_name])
                # assert not tbl.schema.has_attr_path(new_attr_path)
                # TODO: fix me!! not handle list of input attr path properly (cmd.input_attr_paths[0])
                tbl.schema.add_new_attr_path(
                    new_attr_path,
                    tbl.schema.get_attr_type(cmd.input_attr_paths[0]),
                    cmd.input_attr_paths[-1])
                self.pytransform(tbl, cmd)
            elif isinstance(cmd, SetSemanticTypeCmd):
                lbl = cmd.input_attr_path.encode("utf-8")
                assert cmd.input_attr_path not in id_map
                id_map[cmd.input_attr_path] = g.add_new_node(
                    GraphNodeType.DATA_NODE, lbl).id
                if cmd.node_id not in id_map:
                    id_map[cmd.node_id] = g.add_new_node(
                        GraphNodeType.CLASS_NODE,
                        cmd.domain.encode("utf-8")).id

                attrs.append(
                    Attribute(id_map[cmd.input_attr_path], cmd.input_attr_path,
                              []))
                g.add_new_link(GraphLinkType.UNSPECIFIED,
                               cmd.type.encode("utf-8"), id_map[cmd.node_id],
                               id_map[cmd.input_attr_path])
            elif isinstance(cmd, SetInternalLinkCmd):
                if cmd.source_id not in id_map:
                    id_map[cmd.source_id] = g.add_new_node(
                        GraphNodeType.CLASS_NODE,
                        cmd.source_uri.encode('utf-8')).id
                if cmd.target_id not in id_map:
                    id_map[cmd.target_id] = g.add_new_node(
                        GraphNodeType.CLASS_NODE,
                        cmd.target_uri.encode('utf-8')).id

                assert g.get_node_by_id(
                    id_map[cmd.target_id]).n_incoming_links == 0
                g.add_new_link(GraphLinkType.UNSPECIFIED,
                               cmd.link_lbl.encode("utf-8"),
                               id_map[cmd.source_id], id_map[cmd.target_id])
            elif isinstance(cmd, ZipAttributesCmd):
                for row in tbl.rows:
                    cmd.zip_attributes(row)
                # TODO: fix me!! re-build schema, which is very expensive
                tbl.rebuild_schema()
            elif isinstance(cmd, UnpackOneElementListCmd):
                assert tbl.schema.get_attr_type(
                    cmd.input_attr) == Schema.LIST_VALUE
                for row in tbl.rows:
                    cmd.unpack(row)
                tbl.schema.update_attr_path(cmd.input_attr,
                                            Schema.SINGLE_VALUE)
            elif isinstance(cmd, AddLiteralColumnCmd):
                tbl.schema.add_new_attr_path(cmd.input_attr_path,
                                             tbl.schema.SINGLE_VALUE)
                for row in tbl.rows:
                    cmd.add_literal(row)
            elif isinstance(cmd, JoinListCmd):
                for row in tbl.rows:
                    cmd.execute(row)
                tbl.schema.update_attr_path(cmd.input_attr_path,
                                            Schema.SINGLE_VALUE)
            else:
                raise NotImplementedError(cmd.__class__.__name__)

        return SemanticModel(tbl.id, attrs, g)
Exemplo n.º 8
0
 def __init__(self):
     self.graph = Graph(json.loads(open('mecca-map.json').read()))
Exemplo n.º 9
0
def discovering_func(search_nodes: List[Union[PGMStartSearchNode, PGMSearchNode]],
                     args: PGMBeamSearchArgs) -> List[PGMSearchNode]:
    global _logger
    next_nodes: List[PGMSearchNode] = []
    merged_plans = []

    if isinstance(search_nodes[0], PGMStartSearchNode):
        # can only have one starter node
        search_node = search_nodes[0]
        G_explorers: Dict[bytes, GraphExplorer] = {}
        G_terminals: Dict[bytes, Graph] = {}
        G_scored: Dict[bytes, float] = {}

        # create graph & graph explorer for each terminals
        for terminal in search_node.remained_terminals:
            g: Graph = Graph(index_node_type=True, index_node_label=True)
            g.add_new_node(GraphNodeType.DATA_NODE, terminal)

            G_terminals[terminal] = g
            G_scored[terminal] = 1
            G_explorers[terminal] = args.graph_explorer_builder.build(g)

        search_node.G_terminals = pmap(G_terminals)
        search_node.G_scored = pmap(G_scored)
        search_node.G_explorers = pmap(G_explorers)

        search_node.remained_terminals = pvector(search_node.remained_terminals)

        # final all possible merged points between every terminal pairs & release it as terminal nodes
        # TOO EXPENSIVE
        # for T_i, T_j in (
        #         tuple(c)
        #         for c in unique_values(frozenset(c) if c[0] != c[1] else c for c in combinations(search_node.remained_terminals, 2))):
        #     G_ti, G_tj = G_terminals[T_i], G_terminals[T_j]
        for T_i in args.top_attributes:
            for T_j in search_node.remained_terminals:
                if T_i == T_j:
                    continue
                G_ti, G_tj = G_terminals[T_i], G_terminals[T_j]

                merged_plans += [(T_i, T_j, plan, search_node,
                                  MergeGraph.create(G_ti, G_tj, plan.int_tree, plan.int_a, plan.int_b))
                                 for plan in py_make_plan4case1(G_ti, G_tj, G_explorers[T_i], G_explorers[T_j])]

        # doing filter to speed up, will remove all merge graph that have more than 3 nodes (because the good result is usually
        # two data nodes connect to one single class node)
        merged_plans = [x for x in merged_plans if x[-1].get_n_nodes() == 3]
    else:
        for search_node in search_nodes:
            T_i = search_node.working_terminal
            G_ti_explorer = search_node.G_explorers[T_i]
            G_ti = search_node.G_terminals[T_i]
            for T_j in unique_values(search_node.remained_terminals):
                G_tj = search_node.G_terminals[T_j]
                merged_plans += [(T_i, T_j, plan, search_node,
                                  MergeGraph.create(G_ti, G_tj, plan.int_tree, plan.int_a, plan.int_b))
                                 for plan in make_merge_plans(G_ti, G_tj, G_ti_explorer, search_node.G_explorers[T_j])]

    if args.pre_filter_func is not None:
        n_next_states = len(merged_plans)
        filtered_merged_plans = []
        for merged_plan in merged_plans:
            if args.pre_filter_func(merged_plan[-1]):
                filtered_merged_plans.append(merged_plan)

        merged_plans = filtered_merged_plans
        _logger.debug("(%s) #possible next states: %s (filtered down to: %s)", args.source_id, n_next_states, len(merged_plans))
    else:
        _logger.debug("(%s) #possible next states: %s", args.source_id, len(merged_plans))

    merged_graphs = [x[-1] for x in merged_plans]
    merged_probs = args.predict_graph_prob_func(merged_graphs)

    best_plans = sorted(
        zip(merged_plans, merged_graphs, merged_probs), key=lambda x: x[-1], reverse=True)[:args.beam_width]

    need_remove_T_i: bool = isinstance(search_nodes[0], PGMStartSearchNode)

    for merged_plan, merged_graph, score, in best_plans:
        T_i, T_j, __, search_node, __ = merged_plan
        working_terminal = b'%b---%b' % (T_i, T_j)
        remained_terminals = search_node.remained_terminals.remove(T_j)
        if need_remove_T_i:
            remained_terminals = remained_terminals.remove(T_i)

        g: Graph = merged_graph.proceed_merging()
        current_G_explorers = search_node.G_explorers.set(working_terminal, args.graph_explorer_builder.build(g))
        current_G_terminals = search_node.G_terminals.set(working_terminal, g)
        current_G_scored = search_node.G_scored.set(working_terminal, score)
        next_nodes.append(
            PGMSearchNode(args.get_and_increment_id(), args, working_terminal, remained_terminals, current_G_explorers,
                          current_G_terminals, current_G_scored))

    return next_nodes
Exemplo n.º 10
0
class FindRoute:
    """a class to find route between 2 cities using depth first search"""
    graph = Graph()
    stack = Stack()
    start_city = None
    target_city = None

    def __init__(self):
        self.setup_graph()
        self.set_starting_city()
        self.set_target_city()
        self.depth_first_search()

    def setup_graph(self):
        self.graph.add_node("Buraydah",
                            ["Unayzah", "Riyadh-Alkhabra", "Al-Bukayriyah"])
        self.graph.add_node("Unayzah", ["AlZulfi", "Al-Badai", "Buraydah"])
        self.graph.add_node("Riyadh-Alkhabra", ["Buraydah", "Al-Badai"])
        self.graph.add_node("Al-Bukayriyah", ["Buraydah", "Sheehyah"])
        self.graph.add_node("AlZulfi", ["Unayzah", "UmSedrah"])
        self.graph.add_node("Al-Badai",
                            ["Unayzah", "Riyadh-Alkhabra", "AlRass"])
        self.graph.add_node("Sheehyah", ["Al-Bukayriyah", "Dhalfa"])
        self.graph.add_node("UmSedrah", ["AlZulfi", "Shakra"])
        self.graph.add_node("AlRass", ["Al-Badai"])
        self.graph.add_node("Dhalfa", ["Sheehyah", "Mulaida"])
        self.graph.add_node("Shakra", ["UmSedrah"])
        self.graph.add_node("Mulaida", ["Dhalfa"])
        print("graph has been setup.")

    def set_starting_city(self):
        """set the starting city as a string"""
        cities = self.graph.get_all_nodes()
        print("Choose a city number to start with:" + "\n")
        self.start_city = cities[get_user_choice(cities)]
        print("you will start at", self.start_city, "city")

    def set_target_city(self):
        """set the starting city as a string"""
        cities = self.graph.get_all_nodes()
        print("Choose a city number as a target:" + "\n")
        self.target_city = cities[get_user_choice(cities)]
        print("your target city is", self.target_city, "city")

    def depth_first_search(self):
        """travel form start_city to target_city from left to right while logging the traveled cities"""
        visited_cities = []
        print("----------Depth First Search Traverse-------------")
        self.stack.unique_push(self.start_city)

        while not self.stack.is_empty():
            current_city = self.stack.pop()
            visited_cities.append(current_city)
            print("I am at", current_city, "city")
            self.stack.display()
            print("Visited cities are:", visited_cities)
            if current_city is self.target_city:
                print("I have reached the target city")
                break
            else:
                children_of_city = self.graph.get_children_of_node(
                    current_city)
                print("The children cities are:", children_of_city)
                for city in children_of_city:
                    # push to stack if not visited and not in stack
                    if city not in visited_cities:
                        self.stack.unique_push(city)
                        # print(city, "has been added to stack")

                self.stack.display()
            print("-----------------------------------------")
Exemplo n.º 11
0
def render_factor_graph(model_or_factors: Union[LogLinearModel, List[Factor]],
                        vars: List[TripleLabel], fpath: str):
    if isinstance(model_or_factors, LogLinearModel):
        factors = model_or_factors.get_factors(vars)
    else:
        factors = model_or_factors

    def get_fnode_lbl(fnode: Union[TripleLabel, Factor]) -> bytes:
        if isinstance(fnode, Factor):
            label = fnode.__class__.__name__
        else:
            s = fnode.triple.link.get_source_node()
            t = fnode.triple.link.get_target_node()
            label = "%s:%s--%s:%s" % (s.id, s.label.decode('utf-8'), t.id,
                                      t.label.decode('utf-8'))

        return label.encode('utf-8')

    class Node(GraphNode):
        def __init__(self, fnode: Union[TripleLabel, Factor]) -> None:
            super().__init__()
            self.fnode = fnode

        def get_dot_format(self, max_text_width: int):
            label = self.get_printed_label(max_text_width).encode(
                'unicode_escape').decode()
            if isinstance(self.fnode, Variable):
                return '"%s"[style="filled",color="white",fillcolor="gold",label="%s"];' % (
                    self.id, label)

            return '"%s"[shape="plaintext",style="filled",fillcolor="lightgray",label="%s"];' % (
                self.id, label)

    class Link(GraphLink):
        var2factor = "var2factor"
        var2var = "var2var"

        def __init__(self, link_type: str) -> None:
            super().__init__()
            self.link_type = link_type

        def get_dot_format(self, max_text_width: int):
            label = self.get_printed_label(max_text_width).encode(
                'unicode_escape').decode()
            if self.link_type == Link.var2factor:
                return '"%s" -> "%s"[dir=none,color="brown",fontcolor="black",label="%s"];' % (
                    self.source_id, self.target_id, label)
            return '"%s" -> "%s"[color="brown",style="dashed",fontcolor="black",label="%s"];' % (
                self.source_id, self.target_id, label)

    """Render factor graph for debugging"""
    g = Graph()

    # build graphs
    fnode2id: Dict[Union[Variable, Factor], int] = _(
        vars, factors).enumerate().imap(lambda v: (v[1], v[0])).todict()
    _(vars, factors).forall(lambda fnode: g.real_add_new_node(
        Node(fnode), GraphNodeType.CLASS_NODE, get_fnode_lbl(fnode)))

    for factor in factors:
        for var in factor.unobserved_variables:
            g.real_add_new_link(Link(Link.var2factor),
                                GraphLinkType.UNSPECIFIED, b"", fnode2id[var],
                                fnode2id[factor])
    for var in vars:
        if var.triple.parent is not None:
            g.real_add_new_link(Link(Link.var2var), GraphLinkType.UNSPECIFIED,
                                b"", fnode2id[var.triple.parent.label],
                                fnode2id[var])

    for var in vars:
        var.myid = "%s: %s" % (fnode2id[var], g.get_node_by_id(
            fnode2id[var]).label)
    for factor in factors:
        factor.myid = fnode2id[factor]

    g.render2pdf(fpath)