Пример #1
0
def create_rule(subtree: Set[int], g: LightMultiGraph, mode: str) -> Tuple[PartRule, List[Tuple[int, int]]]:
    sg = g.subgraph(subtree).copy()
    assert isinstance(sg, LightMultiGraph)
    boundary_edges = find_boundary_edges(g, subtree)

    if mode == 'full':  # in the full information case, we add the boundary edges to the RHS and contract it
        rule = FullRule(lhs=len(boundary_edges), internal_nodes=subtree, graph=sg)

        for bdry in boundary_edges:
            if len(bdry) == 2:
                u, v = bdry
                rule.graph.add_edge(u, v, b=True)
            elif len(bdry) == 3:
                u, v, dd = bdry
                rule.graph.add_edge(u, v, attr_dict=dd, b=True)

        rule.contract_rhs()  # contract and generalize

    elif mode == 'part':  # in the partial boundary info, we need to set the boundary degrees
        rule = PartRule(lhs=len(boundary_edges), graph=sg)
        set_boundary_degrees(g, rule.graph)
        rule.generalize_rhs()

    else:
        rule = NoRule(lhs=len(boundary_edges), graph=sg)
        rule.generalize_rhs()
    return rule, boundary_edges
Пример #2
0
def compress_graph(g: LightMultiGraph, subtree: Set[int], boundary_edges: Any,
                   permanent: bool) -> Union[None, float]:
    """
    :param g: the graph
    :param subtree: the set of nodes that's compressed
    :param boundary_edges: boundary edges
    :param permanent: if disabled, undo the compression after computing the new dl -> returns the float
    :return:
    """
    assert len(
        subtree
    ) > 0, f'Empty subtree g:{g.order(), g.size()}, bound: {boundary_edges}'
    before = (g.order(), g.size())

    if not isinstance(subtree, set):
        subtree = set(subtree)

    if boundary_edges is None:
        # compute the boundary edges
        boundary_edges = find_boundary_edges(g, subtree)

    removed_edges = set()
    removed_nodes = set()
    # step 1: remove the nodes from subtree, keep track of the removed edges
    if not permanent:
        removed_edges = list(g.subgraph(subtree).edges(data=True))
        removed_nodes = list(g.subgraph(subtree).nodes(data=True))
    g.remove_nodes_from(subtree)
    new_node = min(subtree)

    # step 2: replace subtree with new_node
    g.add_node(new_node, label=len(boundary_edges))

    # step 3: rewire new_node
    for u, v in boundary_edges:
        if u in subtree:
            u = new_node
        if v in subtree:
            v = new_node
        g.add_edge(u, v)

    if not permanent:  # if this flag is set, then return the graph dl of the compressed graph and undo the changes
        compressed_graph_dl = graph_dl(g)
        # print(f'In compress_graph, dl after change: {compressed_graph_dl:_g}')
        g.remove_node(new_node)  # and the boundary edges
        g.add_nodes_from(removed_nodes)  # add the subtree

        for e in itertools.chain(removed_edges, boundary_edges):
            if len(e) == 3:
                u, v, d = e
            else:
                u, v = e
                d = {'weight': 1}
            g.add_edge(u, v, weight=d['weight'])

        after = (g.order(), g.size())
        assert before == after, 'Decompression did not work'
        return compressed_graph_dl
    else:
        return None
Пример #3
0
def generate_graph(rule_dict, rule_list):
    """
    Create a new graph from the VRG at random
    :param rule_dict: List of unique VRG rules
    :return: newly generated graph
    """
    node_counter = 1
    non_terminals = set()
    new_g = nx.MultiGraph()

    new_g.add_node(0, attr_dict={'label': 0})
    non_terminals.add(0)

    rule_ordering = []

    while len(non_terminals
              ) > 0:  # continue until no more non-terminal nodes exist
        # choose a non terminal node at random
        node_sample = random.sample(non_terminals, 1)[0]
        lhs = new_g.node[node_sample]['label']

        rhs_candidates = rule_dict[lhs]
        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(
                np.random.choice(range(len(rhs_candidates)), size=1,
                                 p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        rule_ordering.append(rule_list.index(rhs))

        max_v = -1
        for v in rhs.graph.nodes_iter():
            if isinstance(v, int):
                max_v = max(v, max_v)
        max_v += 1

        # expanding the 'Iso' nodes into separate integer labeled nodes
        if rhs.graph.has_node('Iso'):
            for u, v in rhs.graph.edges():
                if u == 'I':
                    rhs.graph.remove_edge(u, v)
                    rhs.graph.add_edge(max_v, v, attr_dict={'b': True})
                    max_v += 1

                elif v == 'Iso':
                    rhs.graph.remove_edge(u, v)
                    rhs.graph.add_edge(u, max_v, attr_dict={'b': True})
                    max_v += 1

            assert rhs.graph.degree('Iso') == 0
            rhs.graph.remove_node('Iso')

        broken_edges = find_boundary_edges(new_g, [node_sample])

        assert len(broken_edges) == lhs, 'expected {}, got {}'.format(
            lhs, len(broken_edges))

        new_g.remove_node(node_sample)
        non_terminals.remove(node_sample)

        nodes = {}

        for n, d in rhs.graph.nodes_iter(data=True):
            if isinstance(n, str):
                new_node = node_counter
                nodes[n] = new_node
                new_g.add_node(new_node, attr_dict=d)
                if 'label' in d:  # if it's a new non-terminal add it to the set of non-terminals
                    non_terminals.add(new_node)
                node_counter += 1

        for u, v, d in rhs.graph.edges_iter(data=True):
            if 'b' not in d:  # (u, v) is not a boundary edge
                new_g.add_edge(nodes[u], nodes[v])

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        boundary_edge_count = 0
        for u, v, d in rhs.graph.edges_iter(data=True):
            if 'b' in d:  # (u, v) is a boundary edge
                boundary_edge_count += 1

        assert len(
            broken_edges
        ) >= boundary_edge_count, 'broken edges {}, boundary edges {}'.format(
            len(broken_edges), boundary_edge_count)
        for u, v, d in rhs.graph.edges_iter(data=True):
            if 'b' not in d:  # (u, v) is not a boundary edge
                continue

            b_u, b_v = broken_edges.pop()
            if isinstance(u, str):  # u is internal
                if b_u == node_sample:  # b_u is the sampled node
                    new_g.add_edge(nodes[u], b_v)
                else:
                    new_g.add_edge(nodes[u], b_u)
            else:  # v is internal
                if b_u == node_sample:
                    new_g.add_edge(nodes[v], b_v)
                else:
                    new_g.add_edge(nodes[v], b_u)

    return new_g, rule_ordering
Пример #4
0
def generate_graph(rule_dict, rule_list):
    """
    Create a new graph from the VRG at random
    :param rule_dict: List of unique VRG rules
    :return: newly generated graph
    """

    node_counter = 1
    # non_terminals = set()
    non_terminals = {}  # now a dictionary, key: non-terminal id, val: size of lhs

    new_g = nx.MultiGraph()

    new_g.add_node(0, attr_dict={'label': 0})

    # non_terminals.add(0)
    non_terminals[0] = 0  # non-terminal 0 has size 0

    rule_ordering = []

    while len(non_terminals) > 0:  # continue until no more non-terminal nodes
        # choose a non terminal node at random
        sampled_node = random.sample(non_terminals.keys(), 1)[0]
        lhs = non_terminals[sampled_node]

        rhs_candidates = rule_dict[lhs]
        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(np.random.choice(range(len(rhs_candidates)), size=1, p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        rule_ordering.append(rule_list.index(rhs))
        # find the broken edges
        broken_edges = find_boundary_edges(new_g, [sampled_node])


        assert len(broken_edges) == lhs, "node: {}, expected degree: {}, got: {}".format(sampled_node, lhs, new_g.degree(sampled_node))

        # remove the sampled node
        new_g.remove_node(sampled_node)
        del non_terminals[sampled_node]

        nodes = {}

        for n, d in rhs.graph.nodes_iter(data=True):  # all the nodes are internal
            new_node = node_counter
            nodes[n] = new_node
            new_g.add_node(new_node, attr_dict=d)
            if 'label' in d:  # if it's a new non-terminal add it to the dictionary of non-terminals
                non_terminals[new_node] = d['label']

            node_counter += 1

        # adding the internal edges in the RHS  - no check of effective degree is necessary - WRONG!
        for u, v in rhs.graph.edges_iter():
            new_g.add_edge(nodes[u], nodes[v])

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        # possible nonterminals that could be connected to
        possible_nonterminals = set()

        # add to possible candidates the nodes in new_g
        for node in rhs.graph.nodes_iter():
            node = nodes[node]
            if node in non_terminals:   # if terminal, it is a possible candidate by default
                # add only if the non terminal can accommodate more edges
                effective_degree = non_terminals[node] - new_g.degree(node)
                if effective_degree > 0:
                    possible_nonterminals.add(node)


        for u, v in broken_edges:  # either u = node_sample or v is.
            # try the unfulfilled nonterminals first
            if len(possible_nonterminals) > 0:  # try the possible nonterminals
                n = random.sample(possible_nonterminals, 1)[0]
                effective_degree = non_terminals[n] - new_g.degree(n)

                if effective_degree == 1:  # since the effective degree is 1, it cannot take more edges in the future
                    possible_nonterminals.remove(n)

            else:  # try the other internal terminal nodes
                terminal_nodes = {nodes[n]
                                 for n in rhs.graph.nodes_iter()
                                 if nodes[n] not in non_terminals}

                n = random.sample(terminal_nodes, 1)[0]

            if u == sampled_node:
                u = n
            else:
                v = n
            # print('adding boundary edge ({}, {})'.format(u, v))
            new_g.add_edge(u, v)

    return new_g, rule_ordering
Пример #5
0
def _generate_graph(rule_dict: Dict[int, List[PartRule]],
                    upper_bound: int) -> Any:
    """
    Create a new graph from the VRG at random
    Returns None if the nodes in generated graph exceeds upper_bound
    :return: newly generated graph
    """
    node_counter = 1

    new_g = LightMultiGraph()
    new_g.add_node(0, label=0)

    non_terminals = {0}
    rule_ordering = []  # list of rule ids in the order they were fired

    while len(non_terminals) > 0:  # continue until no more non-terminal nodes
        if new_g.order() > upper_bound:  # early stopping
            return None, None

        node_sample = random.sample(
            non_terminals, 1)[0]  # choose a non terminal node at random
        lhs = new_g.nodes[node_sample]['label']

        rhs_candidates = rule_dict[lhs]

        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(
                np.random.choice(range(len(rhs_candidates)), size=1,
                                 p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        logging.debug(
            f'firing rule {rhs.id}, selecting node {node_sample} with label: {lhs}'
        )
        rule_ordering.append(rhs.id)

        broken_edges = find_boundary_edges(new_g, {node_sample})
        assert len(broken_edges) == lhs

        new_g.remove_node(node_sample)
        non_terminals.remove(node_sample)

        nodes = {}

        for n, d in rhs.graph.nodes(data=True):  # all the nodes are internal
            new_node = node_counter
            nodes[n] = new_node

            label = None
            if 'label' in d:  # if it's a new non-terminal add it to the set of non-terminals
                non_terminals.add(new_node)
                label = d['label']

            if label is None:
                new_g.add_node(new_node, b_deg=d['b_deg'])
            else:
                new_g.add_node(new_node, b_deg=d['b_deg'], label=label)
            node_counter += 1

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random
        for n, d in rhs.graph.nodes(data=True):
            num_boundary_edges = d['b_deg']
            if num_boundary_edges == 0:  # there are no boundary edges incident to that node
                continue

            assert len(broken_edges) >= num_boundary_edges

            edge_candidates = broken_edges[:
                                           num_boundary_edges]  # picking the first num_broken edges
            broken_edges = broken_edges[
                num_boundary_edges:]  # removing them from future consideration

            for u, v in edge_candidates:  # each edge is either (node_sample, v) or (u, node_sample)
                if u == node_sample:
                    u = nodes[n]
                else:
                    v = nodes[n]
                logging.debug(f'adding broken edge ({u}, {v})')
                new_g.add_edge(u, v)

        # adding the rhs to the new graph
        for u, v in rhs.graph.edges():
            edge_multiplicity = rhs.graph[u][v]['weight']  #
            new_g.add_edge(nodes[u], nodes[v], weight=edge_multiplicity)
            logging.debug(
                f'adding RHS internal edge ({nodes[u]}, {nodes[v]}) wt: {edge_multiplicity}'
            )

    return new_g, rule_ordering
Пример #6
0
def generate_graph(rule_dict, rule_list):
    """
    Create a new graph from the VRG at random
    :param rule_dict: List of unique VRG rules
    :return: newly generated graph
    """

    node_counter = 1
    non_terminals = set()
    # new_g = nx.MultiGraph()
    new_g = LightMultiGraph()

    new_g.add_node(0, label=0)
    non_terminals.add(0)

    rule_ordering = []  # list of rule ids in the order they were fired

    while len(non_terminals) > 0:  # continue until no more non-terminal nodes
        # choose a non terminal node at random
        node_sample = random.sample(non_terminals, 1)[0]
        lhs = new_g.nodes[node_sample]['label']

        rhs_candidates = list(
            filter(lambda rule: rule.is_active, rule_dict[lhs]))
        # consider only active rules

        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(
                np.random.choice(range(len(rhs_candidates)), size=1,
                                 p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        # print(f'firing rule {rule_list.index(rhs)}')
        # rule_ordering.append(rule_list.index(rhs))
        # print('Selected node {} with label {}'.format(node_sample, lhs))

        broken_edges = find_boundary_edges(new_g, [node_sample])

        # print('broken edges: ', broken_edges)

        assert len(broken_edges) == lhs

        new_g.remove_node(node_sample)
        non_terminals.remove(node_sample)

        nodes = {}

        for n, d in rhs.graph.nodes(data=True):  # all the nodes are internal
            new_node = node_counter
            nodes[n] = new_node
            new_g.add_node(new_node, attr_dict=d)
            if 'label' in d:  # if it's a new non-terminal add it to the set of non-terminals
                non_terminals.add(new_node)
            node_counter += 1

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random
        for n, d in rhs.graph.nodes(data=True):
            num_boundary_edges = d['b_deg']
            if num_boundary_edges == 0:  # there are no boundary edges incident to that node
                continue

            assert len(broken_edges) >= num_boundary_edges

            edge_candidates = broken_edges[:
                                           num_boundary_edges]  # picking the first num_broken edges
            broken_edges = broken_edges[
                num_boundary_edges:]  # removing them from future consideration

            for u, v in edge_candidates:  # each edge is either (node_sample, v) or (u, node_sample)
                if u == node_sample:
                    u = nodes[n]
                else:
                    v = nodes[n]
                # print('adding broken edge ({}, {})'.format(u, v))
                new_g.add_edge(u, v)

        # adding the rhs to the new graph
        for u, v in rhs.graph.edges():
            # print('adding RHS internal edge ({}, {})'.format(nodes[u], nodes[v]))
            edge_multiplicity = rhs.graph[u][v]['weight']  #
            for _ in range(edge_multiplicity):
                new_g.add_edge(nodes[u], nodes[v])
    return new_g, rule_ordering