def create_rule(subtree: Set[int], g: LightMultiGraph, mode: str) -> Tuple[PartRule, List[Tuple[int, int]]]: sg = g.subgraph(subtree).copy() assert isinstance(sg, LightMultiGraph) boundary_edges = find_boundary_edges(g, subtree) if mode == 'full': # in the full information case, we add the boundary edges to the RHS and contract it rule = FullRule(lhs=len(boundary_edges), internal_nodes=subtree, graph=sg) for bdry in boundary_edges: if len(bdry) == 2: u, v = bdry rule.graph.add_edge(u, v, b=True) elif len(bdry) == 3: u, v, dd = bdry rule.graph.add_edge(u, v, attr_dict=dd, b=True) rule.contract_rhs() # contract and generalize elif mode == 'part': # in the partial boundary info, we need to set the boundary degrees rule = PartRule(lhs=len(boundary_edges), graph=sg) set_boundary_degrees(g, rule.graph) rule.generalize_rhs() else: rule = NoRule(lhs=len(boundary_edges), graph=sg) rule.generalize_rhs() return rule, boundary_edges
def compress_graph(g: LightMultiGraph, subtree: Set[int], boundary_edges: Any, permanent: bool) -> Union[None, float]: """ :param g: the graph :param subtree: the set of nodes that's compressed :param boundary_edges: boundary edges :param permanent: if disabled, undo the compression after computing the new dl -> returns the float :return: """ assert len( subtree ) > 0, f'Empty subtree g:{g.order(), g.size()}, bound: {boundary_edges}' before = (g.order(), g.size()) if not isinstance(subtree, set): subtree = set(subtree) if boundary_edges is None: # compute the boundary edges boundary_edges = find_boundary_edges(g, subtree) removed_edges = set() removed_nodes = set() # step 1: remove the nodes from subtree, keep track of the removed edges if not permanent: removed_edges = list(g.subgraph(subtree).edges(data=True)) removed_nodes = list(g.subgraph(subtree).nodes(data=True)) g.remove_nodes_from(subtree) new_node = min(subtree) # step 2: replace subtree with new_node g.add_node(new_node, label=len(boundary_edges)) # step 3: rewire new_node for u, v in boundary_edges: if u in subtree: u = new_node if v in subtree: v = new_node g.add_edge(u, v) if not permanent: # if this flag is set, then return the graph dl of the compressed graph and undo the changes compressed_graph_dl = graph_dl(g) # print(f'In compress_graph, dl after change: {compressed_graph_dl:_g}') g.remove_node(new_node) # and the boundary edges g.add_nodes_from(removed_nodes) # add the subtree for e in itertools.chain(removed_edges, boundary_edges): if len(e) == 3: u, v, d = e else: u, v = e d = {'weight': 1} g.add_edge(u, v, weight=d['weight']) after = (g.order(), g.size()) assert before == after, 'Decompression did not work' return compressed_graph_dl else: return None
def generate_graph(rule_dict, rule_list): """ Create a new graph from the VRG at random :param rule_dict: List of unique VRG rules :return: newly generated graph """ node_counter = 1 non_terminals = set() new_g = nx.MultiGraph() new_g.add_node(0, attr_dict={'label': 0}) non_terminals.add(0) rule_ordering = [] while len(non_terminals ) > 0: # continue until no more non-terminal nodes exist # choose a non terminal node at random node_sample = random.sample(non_terminals, 1)[0] lhs = new_g.node[node_sample]['label'] rhs_candidates = rule_dict[lhs] if len(rhs_candidates) == 1: rhs = rhs_candidates[0] else: weights = np.array([rule.frequency for rule in rhs_candidates]) weights = weights / np.sum(weights) # normalize into probabilities idx = int( np.random.choice(range(len(rhs_candidates)), size=1, p=weights)) # pick based on probability rhs = rhs_candidates[idx] rule_ordering.append(rule_list.index(rhs)) max_v = -1 for v in rhs.graph.nodes_iter(): if isinstance(v, int): max_v = max(v, max_v) max_v += 1 # expanding the 'Iso' nodes into separate integer labeled nodes if rhs.graph.has_node('Iso'): for u, v in rhs.graph.edges(): if u == 'I': rhs.graph.remove_edge(u, v) rhs.graph.add_edge(max_v, v, attr_dict={'b': True}) max_v += 1 elif v == 'Iso': rhs.graph.remove_edge(u, v) rhs.graph.add_edge(u, max_v, attr_dict={'b': True}) max_v += 1 assert rhs.graph.degree('Iso') == 0 rhs.graph.remove_node('Iso') broken_edges = find_boundary_edges(new_g, [node_sample]) assert len(broken_edges) == lhs, 'expected {}, got {}'.format( lhs, len(broken_edges)) new_g.remove_node(node_sample) non_terminals.remove(node_sample) nodes = {} for n, d in rhs.graph.nodes_iter(data=True): if isinstance(n, str): new_node = node_counter nodes[n] = new_node new_g.add_node(new_node, attr_dict=d) if 'label' in d: # if it's a new non-terminal add it to the set of non-terminals non_terminals.add(new_node) node_counter += 1 for u, v, d in rhs.graph.edges_iter(data=True): if 'b' not in d: # (u, v) is not a boundary edge new_g.add_edge(nodes[u], nodes[v]) # randomly assign broken edges to boundary edges random.shuffle(broken_edges) boundary_edge_count = 0 for u, v, d in rhs.graph.edges_iter(data=True): if 'b' in d: # (u, v) is a boundary edge boundary_edge_count += 1 assert len( broken_edges ) >= boundary_edge_count, 'broken edges {}, boundary edges {}'.format( len(broken_edges), boundary_edge_count) for u, v, d in rhs.graph.edges_iter(data=True): if 'b' not in d: # (u, v) is not a boundary edge continue b_u, b_v = broken_edges.pop() if isinstance(u, str): # u is internal if b_u == node_sample: # b_u is the sampled node new_g.add_edge(nodes[u], b_v) else: new_g.add_edge(nodes[u], b_u) else: # v is internal if b_u == node_sample: new_g.add_edge(nodes[v], b_v) else: new_g.add_edge(nodes[v], b_u) return new_g, rule_ordering
def generate_graph(rule_dict, rule_list): """ Create a new graph from the VRG at random :param rule_dict: List of unique VRG rules :return: newly generated graph """ node_counter = 1 # non_terminals = set() non_terminals = {} # now a dictionary, key: non-terminal id, val: size of lhs new_g = nx.MultiGraph() new_g.add_node(0, attr_dict={'label': 0}) # non_terminals.add(0) non_terminals[0] = 0 # non-terminal 0 has size 0 rule_ordering = [] while len(non_terminals) > 0: # continue until no more non-terminal nodes # choose a non terminal node at random sampled_node = random.sample(non_terminals.keys(), 1)[0] lhs = non_terminals[sampled_node] rhs_candidates = rule_dict[lhs] if len(rhs_candidates) == 1: rhs = rhs_candidates[0] else: weights = np.array([rule.frequency for rule in rhs_candidates]) weights = weights / np.sum(weights) # normalize into probabilities idx = int(np.random.choice(range(len(rhs_candidates)), size=1, p=weights)) # pick based on probability rhs = rhs_candidates[idx] rule_ordering.append(rule_list.index(rhs)) # find the broken edges broken_edges = find_boundary_edges(new_g, [sampled_node]) assert len(broken_edges) == lhs, "node: {}, expected degree: {}, got: {}".format(sampled_node, lhs, new_g.degree(sampled_node)) # remove the sampled node new_g.remove_node(sampled_node) del non_terminals[sampled_node] nodes = {} for n, d in rhs.graph.nodes_iter(data=True): # all the nodes are internal new_node = node_counter nodes[n] = new_node new_g.add_node(new_node, attr_dict=d) if 'label' in d: # if it's a new non-terminal add it to the dictionary of non-terminals non_terminals[new_node] = d['label'] node_counter += 1 # adding the internal edges in the RHS - no check of effective degree is necessary - WRONG! for u, v in rhs.graph.edges_iter(): new_g.add_edge(nodes[u], nodes[v]) # randomly assign broken edges to boundary edges random.shuffle(broken_edges) # possible nonterminals that could be connected to possible_nonterminals = set() # add to possible candidates the nodes in new_g for node in rhs.graph.nodes_iter(): node = nodes[node] if node in non_terminals: # if terminal, it is a possible candidate by default # add only if the non terminal can accommodate more edges effective_degree = non_terminals[node] - new_g.degree(node) if effective_degree > 0: possible_nonterminals.add(node) for u, v in broken_edges: # either u = node_sample or v is. # try the unfulfilled nonterminals first if len(possible_nonterminals) > 0: # try the possible nonterminals n = random.sample(possible_nonterminals, 1)[0] effective_degree = non_terminals[n] - new_g.degree(n) if effective_degree == 1: # since the effective degree is 1, it cannot take more edges in the future possible_nonterminals.remove(n) else: # try the other internal terminal nodes terminal_nodes = {nodes[n] for n in rhs.graph.nodes_iter() if nodes[n] not in non_terminals} n = random.sample(terminal_nodes, 1)[0] if u == sampled_node: u = n else: v = n # print('adding boundary edge ({}, {})'.format(u, v)) new_g.add_edge(u, v) return new_g, rule_ordering
def _generate_graph(rule_dict: Dict[int, List[PartRule]], upper_bound: int) -> Any: """ Create a new graph from the VRG at random Returns None if the nodes in generated graph exceeds upper_bound :return: newly generated graph """ node_counter = 1 new_g = LightMultiGraph() new_g.add_node(0, label=0) non_terminals = {0} rule_ordering = [] # list of rule ids in the order they were fired while len(non_terminals) > 0: # continue until no more non-terminal nodes if new_g.order() > upper_bound: # early stopping return None, None node_sample = random.sample( non_terminals, 1)[0] # choose a non terminal node at random lhs = new_g.nodes[node_sample]['label'] rhs_candidates = rule_dict[lhs] if len(rhs_candidates) == 1: rhs = rhs_candidates[0] else: weights = np.array([rule.frequency for rule in rhs_candidates]) weights = weights / np.sum(weights) # normalize into probabilities idx = int( np.random.choice(range(len(rhs_candidates)), size=1, p=weights)) # pick based on probability rhs = rhs_candidates[idx] logging.debug( f'firing rule {rhs.id}, selecting node {node_sample} with label: {lhs}' ) rule_ordering.append(rhs.id) broken_edges = find_boundary_edges(new_g, {node_sample}) assert len(broken_edges) == lhs new_g.remove_node(node_sample) non_terminals.remove(node_sample) nodes = {} for n, d in rhs.graph.nodes(data=True): # all the nodes are internal new_node = node_counter nodes[n] = new_node label = None if 'label' in d: # if it's a new non-terminal add it to the set of non-terminals non_terminals.add(new_node) label = d['label'] if label is None: new_g.add_node(new_node, b_deg=d['b_deg']) else: new_g.add_node(new_node, b_deg=d['b_deg'], label=label) node_counter += 1 # randomly assign broken edges to boundary edges random.shuffle(broken_edges) # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random for n, d in rhs.graph.nodes(data=True): num_boundary_edges = d['b_deg'] if num_boundary_edges == 0: # there are no boundary edges incident to that node continue assert len(broken_edges) >= num_boundary_edges edge_candidates = broken_edges[: num_boundary_edges] # picking the first num_broken edges broken_edges = broken_edges[ num_boundary_edges:] # removing them from future consideration for u, v in edge_candidates: # each edge is either (node_sample, v) or (u, node_sample) if u == node_sample: u = nodes[n] else: v = nodes[n] logging.debug(f'adding broken edge ({u}, {v})') new_g.add_edge(u, v) # adding the rhs to the new graph for u, v in rhs.graph.edges(): edge_multiplicity = rhs.graph[u][v]['weight'] # new_g.add_edge(nodes[u], nodes[v], weight=edge_multiplicity) logging.debug( f'adding RHS internal edge ({nodes[u]}, {nodes[v]}) wt: {edge_multiplicity}' ) return new_g, rule_ordering
def generate_graph(rule_dict, rule_list): """ Create a new graph from the VRG at random :param rule_dict: List of unique VRG rules :return: newly generated graph """ node_counter = 1 non_terminals = set() # new_g = nx.MultiGraph() new_g = LightMultiGraph() new_g.add_node(0, label=0) non_terminals.add(0) rule_ordering = [] # list of rule ids in the order they were fired while len(non_terminals) > 0: # continue until no more non-terminal nodes # choose a non terminal node at random node_sample = random.sample(non_terminals, 1)[0] lhs = new_g.nodes[node_sample]['label'] rhs_candidates = list( filter(lambda rule: rule.is_active, rule_dict[lhs])) # consider only active rules if len(rhs_candidates) == 1: rhs = rhs_candidates[0] else: weights = np.array([rule.frequency for rule in rhs_candidates]) weights = weights / np.sum(weights) # normalize into probabilities idx = int( np.random.choice(range(len(rhs_candidates)), size=1, p=weights)) # pick based on probability rhs = rhs_candidates[idx] # print(f'firing rule {rule_list.index(rhs)}') # rule_ordering.append(rule_list.index(rhs)) # print('Selected node {} with label {}'.format(node_sample, lhs)) broken_edges = find_boundary_edges(new_g, [node_sample]) # print('broken edges: ', broken_edges) assert len(broken_edges) == lhs new_g.remove_node(node_sample) non_terminals.remove(node_sample) nodes = {} for n, d in rhs.graph.nodes(data=True): # all the nodes are internal new_node = node_counter nodes[n] = new_node new_g.add_node(new_node, attr_dict=d) if 'label' in d: # if it's a new non-terminal add it to the set of non-terminals non_terminals.add(new_node) node_counter += 1 # randomly assign broken edges to boundary edges random.shuffle(broken_edges) # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random for n, d in rhs.graph.nodes(data=True): num_boundary_edges = d['b_deg'] if num_boundary_edges == 0: # there are no boundary edges incident to that node continue assert len(broken_edges) >= num_boundary_edges edge_candidates = broken_edges[: num_boundary_edges] # picking the first num_broken edges broken_edges = broken_edges[ num_boundary_edges:] # removing them from future consideration for u, v in edge_candidates: # each edge is either (node_sample, v) or (u, node_sample) if u == node_sample: u = nodes[n] else: v = nodes[n] # print('adding broken edge ({}, {})'.format(u, v)) new_g.add_edge(u, v) # adding the rhs to the new graph for u, v in rhs.graph.edges(): # print('adding RHS internal edge ({}, {})'.format(nodes[u], nodes[v])) edge_multiplicity = rhs.graph[u][v]['weight'] # for _ in range(edge_multiplicity): new_g.add_edge(nodes[u], nodes[v]) return new_g, rule_ordering