def find_label( graph: MultiDiGraph, query: str, verbose: bool = True ) -> Optional[Tuple[Any, Any]]: """Returns the edge label containing the specified string. Parameters ---------- graph : MultiDiGraph Initial graph. query : str String to find. verbose : bool If true, a progress bar will be displayed. Examples -------- >>> import cfpq_data >>> g = cfpq_data.graph_from_dataset("foaf", verbose=False) >>> cfpq_data.find_label(g, "subClassOf", verbose=False) ('label', rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf')) Returns ------- response : Optional[Tuple[Any, Any]] Pair (``edge label key``, ``edge label value``) where ``edge label value`` contains ``query``. Not if the required ``edge label value`` is not found. """ res = None for _, _, edge_labels in tqdm( graph.edges(data=True), disable=not verbose, desc="Searching..." ): for k, v in edge_labels.items(): if query in str(v): res = (k, v) break return res
def get_subgraph_by_edge_type(self, graph: nx.MultiDiGraph, relationship: str) -> nx.DiGraph: """Get a subgraph containing all edges of a given type (aka relationship). Args: graph: input multi digraph (aka hypergraph) relationship: edge / link relationship type with possible values same as in above docs. Returns: Directed graph on edges of a particular type (aka relationship) """ # prune the metadata model graph so as to include only those edges that match the relationship type rel_edges = [] for (u, v, key, c) in graph.edges(data=True, keys=True): if key == relationship: rel_edges.append((u, v)) relationship_subgraph = nx.DiGraph() relationship_subgraph.add_edges_from(rel_edges)
def AGraphComputerSetMultiDiGraph(spsg: nx.MultiDiGraph, cf: Callable) -> AGraph: A = nx.nx_agraph.to_agraph(spsg) A = AGraph(directed=True) A.node_attr["style"] = "filled" A.node_attr["shape"] = "rectangle" A.node_attr["fixedsize"] = "false" A.node_attr["fontcolor"] = "black" for node in spsg.nodes: A.add_node(node_2_string(node)) edges = spsg.edges(data=True) for edge in edges: s, t, data_dict = edge computer_set = data_dict["computers"] ss, st = tuple(map(node_2_string, (s, t))) A.add_edge(ss, st) Ae = A.get_edge(ss, st) Ae.attr["label"] = "\n".join([c.__name__ for c in computer_set]) return A
def create_in_out_dict(g: nx.MultiDiGraph): """Creates a dictionary where the keys (nodes in a graph) are associated with another dictionary that tells what nodes can be traversed to by following edges "out" of the key node and what edges lead "in" to the key node Args: g (nx.MultiDiGraph): input graph whos nodes and edges are used to create in_out dict Returns: dict: in out dictionary of node traversals TODO: make if statement clearer - remove single loops in graph """ in_out = defaultdict(lambda: {"in": [], "out": []}) for start, end in g.edges(): if start == end: continue in_out[end]["in"].append(start) in_out[start]["out"].append(end) return in_out
def map_graph(graph: nx.MultiDiGraph, mapping: Dict, preserve: bool = True) -> nx.MultiDiGraph: """ Remap node identifiers in a networkx.MultiDiGraph based on a provided mapping. For nodes, the old identifier is saved as `source_curie` attribute. In case of edges, - if the node is the `subject` then the old identifier is saved as `source_subject` - if the node is the `object` then the old identifier is saved as `source_object` Parameters ---------- graph: networkx.MultiDiGraph A graph mapping: dict Dictionary containing node identifier mappings preserve: bool Preserve the old identifier before remapping. Returns ------- networkx.MultiDiGraph The graph with its nodes remapped """ if preserve: for nid in graph.nodes(): if nid in mapping: # add_node will append attributes graph.add_node(nid, source_curie=nid) for oid, sid in graph.edges(): if oid in mapping: for ex in graph[oid][sid]: graph[oid][sid][ex].update(source_object=oid) if sid in mapping: for ex in graph[oid][sid]: graph[oid][sid][ex].update(source_subject=oid) nx.relabel_nodes(graph, mapping, copy=False) return graph
def build_cliques(self, target_graph: nx.MultiDiGraph): """ Builds a clique graph from ``same_as`` edges in ``target_graph``. Parameters ---------- target_graph: networkx.MultiDiGraph A MultiDiGraph that contains nodes and edges Returns ------- networkx.Graph The clique graph with only ``same_as`` edges """ self.target_graph = target_graph for u, v, data in target_graph.edges(data=True): if 'edge_label' in data and data['edge_label'] == SAME_AS: # load all same_as edges to self.clique_graph self.clique_graph.add_node(u, **target_graph.nodes[u]) self.clique_graph.add_node(v, **target_graph.nodes[v]) self.clique_graph.add_edge(u, v, **data)
def get_factors(G: nx.MultiDiGraph) -> Generator[Tuple[List, List], None, None]: fg = nx.DiGraph() variables = set() for source, target, edge_data in G.edges(data=True): source_port, target_port = get_ports(edge_data) source_var = (source, source_port) target_var = (target, target_port) variables.add(source_var) variables.add(target_var) fg.add_node(source) fg.add_node(source_var) fg.add_node(target) fg.add_node(target_var) fg.add_edge(source, source_var) fg.add_edge(target, target_var) fg.add_edge(source_var, target_var) return split(fg.subgraph(variables))
def get_category_from_equivalence( target_graph: BaseGraph, clique_graph: nx.MultiDiGraph, node: str, attributes: Dict ) -> List: """ Get category for a node based on its equivalent nodes in a graph. Parameters ---------- target_graph: kgx.graph.base_graph.BaseGraph The original graph clique_graph: networkx.MultiDiGraph The clique graph node: str Node identifier attributes: Dict Node's attributes Returns ------- List Category for the node """ category: List = [] for u, v, data in clique_graph.edges(node, data=True): if data["predicate"] == SAME_AS: if u == node: if "category" in clique_graph.nodes()[v]: category = clique_graph.nodes()[v]["category"] break elif v == node: if "category" in clique_graph.nodes()[u]: category = clique_graph.nodes()[u]["category"] break update = {node: {"category": category}} nx.set_node_attributes(clique_graph, update) return category
def add_reshapes_for_tf_subgraph_calls(graph: nx.MultiDiGraph): """ Input and output tensors of the TFCustomSubgraphCall must be 4D because IE layer accepts and produces only 4D tensors. This function adds reshape operations where it is necessary. :param graph: graph to operate on. :return: None. """ for src_node_name, dst_node_name, edge_attrs in list( graph.edges(data=True)): src_node = Node(graph, src_node_name) dst_node = Node(graph, dst_node_name) if dst_node.kind == 'op' and dst_node.has_valid('type') and dst_node.type == 'TFCustomSubgraphCall' and \ src_node.has_valid('shape') and len(src_node.shape) != 4: log.info( "There is an data tensor of shape '{}' which goes into '{}' node" .format(src_node.shape, dst_node.type)) add_reshape_before_op_node(graph, src_node_name, dst_node_name, edge_attrs) for node_name in list(graph.nodes()): node = Node(graph, node_name) if node['kind'] == 'op' and node.has_and_set( 'type') and node.type == 'TFCustomSubgraphCall': for index, data_node in node.out_nodes().items(): real_dims_count = len(data_node.shape) if real_dims_count != 4: log.info( "There is an data tensor of shape '{}' with real dims count '{}' which goes out of '{}' " "node".format(data_node.shape, real_dims_count, node.name)) add_reshape_after_data_node(graph, data_node.id) # need to update shape of the op so IE generates XML with 4D tensors out_shape = make_shape_4d(data_node['shape']) data_node['shape'] = out_shape
def update_custom_replacement_attributes(self, graph: nx.MultiDiGraph): if not self.has('instances'): raise Error( "No instance(s) is(are) defined for the custom replacement '{}'. " .format(self.replacement_id) + refer_to_faq_msg(66)) if not isinstance(self.instances, dict): raise Error( "The instance must be a single dictionary for the custom replacement with id '{}'. " .format(self.replacement_id) + refer_to_faq_msg(67)) start_points = self.get_internal_input_nodes(graph) end_points = self.get_internal_output_nodes(graph) matched_nodes = sub_graph_between_nodes(graph, start_points, end_points) output_tensors = set() input_nodes_mapping = dict( ) # key is the input tensor name, value is the pair: (input_port, output_node_name) for src_node_name, dst_node_name, edge_attrs in graph.edges(data=True): dst_node = graph.node[dst_node_name] # edge outside sub-graph into sub-graph if (src_node_name not in matched_nodes) and (dst_node_name in matched_nodes): tensor_name = src_node_name + ":" + str(edge_attrs['out']) if tensor_name not in input_nodes_mapping: input_nodes_mapping[tensor_name] = list() input_nodes_mapping[tensor_name].append( ('^' + dst_node_name + '$', edge_attrs['in'])) # edge from inside sub-graph to outside sub-graph if (src_node_name in matched_nodes) and (dst_node_name not in matched_nodes): output_tensors.add( ('^' + dst_node['pb'].input[edge_attrs['in']] + '$', edge_attrs['out'])) for node_name in graph.nodes(): node = Node(graph, node_name) if node_name in matched_nodes and len( node.out_nodes()) == 0 and node['pb'].op != 'Const': log.debug( "Node {} doesn't have output edges. Consider it output". format(node_name)) output_tensors.add(('^' + node_name + '$', 0)) if not self.has('inputs'): self._replacement_desc['inputs'] = [[{ 'node': desc[0], 'port': desc[1] } for desc in inp] for inp in sorted(input_nodes_mapping.values())] log.debug('Updated inputs of sub-graph for instance "{}"'.format( self.instances)) if not self.has('outputs'): self._replacement_desc['outputs'] = [{ 'node': node, 'port': port } for node, port in sorted(output_tensors)] log.debug('Updated outputs of sub-graph for instance "{}"'.format( self.instances))
class Conversations(list): def __init__(self): list.__init__(self) self.graph = Graph() self.concepts = {} self.features = [] self.edgeindices = [] self.queries = [] self.target_classes = [] self.features_tensors = [] self.edges_tensors = [] self.queries_tensors = [] self.targets_tensors = [] def compile(self): print('\nGraph compilation...', end='') for i, conversation in enumerate(self): conversation.compile() for source, target, label in conversation.graph.edges(keys=True): self.graph.add_edge(source, target, label) if i % 100 == 0: print('.', end='', flush=True) print() def _edges_to_tensors(self, edges, previous_edges, supporting_edges, concept_to_features_function): features = [] edge_idices = [] indices = {} for source, target, label in edges: for node in (source, target, label): if node not in indices: indices[node] = len(indices) features.append( np.concatenate((concept_to_features_function(node), np.array([1, 1])))) edge_idices.append([indices[target], indices[label]]) edge_idices.append([indices[label], indices[source]]) for source, target, label in previous_edges: for node in (source, target, label): if node not in indices: indices[node] = len(indices) features.append( np.concatenate((concept_to_features_function(node), np.array([1, 0])))) edge_idices.append([indices[target], indices[label]]) edge_idices.append([indices[label], indices[source]]) for source, target, label in supporting_edges: for node in (source, target, label): if node not in indices: indices[node] = len(indices) features.append( np.concatenate((concept_to_features_function(node), np.array([0, 0])))) edge_idices.append([indices[target], indices[label]]) edge_idices.append([indices[label], indices[source]]) return features, edge_idices, indices def _nodes_to_id(self): for edge in self.graph.edges(keys=True): for node in edge: if node not in self.concepts: self.concepts[node] = len(self.concepts) def _get_supporting_edges(self, edges, limit=1, step=1): supporting = set() for i in range(step): for s, _, l in edges | supporting: try: n1 = next(iter(self.graph.edges(s, keys=True))) supporting.add(n1) n2 = next(iter(self.graph.edges(n1, keys=True))) supporting.add(n2) except StopIteration: pass try: n1 = next(iter(self.graph.in_edges(l, keys=True))) supporting.add(n1) n2 = next(iter(self.graph.in_edges(n1, keys=True))) supporting.add(n2) except StopIteration: pass return supporting - edges def compile_matrix_data(self, concept_to_features_function): self.features = [] self.edgeindices = [] self.target_classes = [] self.queries = [] self._nodes_to_id() for i, conversation in enumerate(self): edges_by_turn = [ set(t.graph.edges(keys=True)) for t in conversation.turns ] for j in range(3, len(conversation.turns) - 1): context_edges = set().union(*edges_by_turn[:j - 1]) previous_edges = set(edges_by_turn[j]) supporting_edges = self._get_supporting_edges(context_edges | previous_edges) continuation_edges = set().union( *edges_by_turn[j:j + 1]) - context_edges features, edge_indices, indices = self._edges_to_tensors( context_edges, previous_edges, supporting_edges, concept_to_features_function) for source, target, label in continuation_edges: self.queries.append([ concept_to_features_function(source), concept_to_features_function(label) ]) target_class = self.concepts[target] self.features.append(features) self.edgeindices.append(edge_indices) self.target_classes.append(target_class) if i % 100 == 0: print('.', end='', flush=True) def compile_matrices(self): self.features_tensors = [] self.edges_tensors = [] self.queries_tensors = [] self.targets_tensors = [] print('Compiling {} samples to tensors...'.format(len(self.features))) for i in range(len(self.features)): self.features_tensors.append( torch.from_numpy(np.array(self.features[i]))) self.edges_tensors.append( torch.from_numpy(np.array(self.edgeindices[i]))) self.queries_tensors.append( torch.from_numpy(np.array(self.queries[i]))) self.targets_tensors.append( torch.from_numpy(np.array(self.target_classes[i]))) if i % 10000 == 0: print('.', end='', flush=True) def save(self, filename): with open(filename, 'wb') as f: pickle.dump(self, f) return True @classmethod def load(cls, filename): with open(filename, 'rb') as f: return pickle.load(f)
def run(self): isp_topology_path = self.options.get('-i', None) output_directory = self.options.get('-o', None) link_capacity = self.options.get('--link', None) medium = self.options.get('--medium', None) medium_speed = 0. if medium == 'fiber': medium_speed = 300*10**6 elif medium == 'copper': medium_speed = 210*10**6 else: try: medium_speed = float(medium) except ValueError as ex: print ex print '--medium is either (1) Medium type (fiber or copper) or (2) Medium speed (float)' exit(1) assert medium_speed > 0 link = parse_capacities(link_capacity) link = 10000000000 # special case isp_graph = read_isp_graph(isp_topology_path) if not isp_graph: print 'Can\'t read the input graph file...' exit(1) if not isinstance(isp_graph, MultiDiGraph): isp_graph = MultiDiGraph(isp_graph) has_zero_id = False for node, node_data in isp_graph.nodes_iter(data=True): try: node_data['id'] = int(node) if node_data['id'] == 0: has_zero_id = True except ValueError as ex: print ex print 'Node ID should be integer...' exit(1) for node, node_data in isp_graph.nodes_iter(data=True): node_data['id'] += int(has_zero_id) relabel_nodes(isp_graph, {node: str(node_data['id']) for node, node_data in isp_graph.nodes_iter(data=True)}, copy=False) sorted_nodes = map(str, sorted([int(node) for node in isp_graph])) ports = {} for node in sorted_nodes: s_port = 2 sorted_neighbors = map(str, sorted([int(x[1]) for x in isp_graph.edges(node)])) for neighbor in sorted_neighbors: if (node, neighbor) not in ports: ports[node, neighbor] = (s_port, -1) s_port += 1 for node in sorted_nodes: sorted_neighbors = map(str, sorted([int(x[1]) for x in isp_graph.edges(node)])) for neighbor in sorted_neighbors: if (neighbor, node) in ports: ports[node, neighbor] = (ports[node, neighbor][0], ports[neighbor, node][0]) max_dist = 0. traversed_edges = {} for node, node_data in isp_graph.nodes_iter(data=True): for n, neighbor, key, data in isp_graph.edges(node, data=True, keys=True): edge_key = n, neighbor if edge_key not in traversed_edges: src_lat = isp_graph.node[n].get('Latitude', -1) src_lon = isp_graph.node[n].get('Longitude', -1) dst_lat = isp_graph.node[neighbor].get('Latitude', -1) dst_lon = isp_graph.node[neighbor].get('Longitude', -1) if src_lat == -1 or src_lon == -1 or dst_lat == -1 or dst_lon == -1: distance = 1 else: distance = 1000 * vincenty((src_lat, src_lon), (dst_lat, dst_lon)) delay = 1000.0 * distance / medium_speed if max_dist < distance: max_dist = distance isp_graph.edge[n][neighbor][key] = {'Capacity': link, 'Distance': distance, 'Delay': delay, 'src_port': ports[n, neighbor][0], 'dst_port': ports[n, neighbor][1], 'BandwidthCost': 1} traversed_edges[edge_key] = key # traversed_edges[edge_key] = neighbor, n else: pass # isp_graph.edge[n][neighbor][traversed_edges[edge_key]]['Capacity'] += link for node, node_data in isp_graph.nodes_iter(data=True): for n, neighbor, key, data in isp_graph.edges(node, data=True, keys=True): if 'Capacity' not in data: del isp_graph.edge[n][neighbor][key] write_graphml(isp_graph, get_resources_file(isp_topology_path, output_directory)) print "GetIsp DONE!"
class NxGraph(BaseGraph): """ NxGraph is a wrapper that provides methods to interact with a networkx.MultiDiGraph. NxGraph extends kgx.graph.base_graph.BaseGraph and implements all the methods from BaseGraph. """ def __init__(self): super().__init__() self.graph = MultiDiGraph() self.name = None def add_node(self, node: str, **kwargs: Any) -> None: """ Add a node to the graph. Parameters ---------- node: str Node identifier **kwargs: Any Any additional node properties """ if "data" in kwargs: data = kwargs["data"] else: data = kwargs self.graph.add_node(node, **data) def add_edge(self, subject_node: str, object_node: str, edge_key: str = None, **kwargs: Any) -> None: """ Add an edge to the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key kwargs: Any Any additional edge properties """ if "data" in kwargs: data = kwargs["data"] else: data = kwargs return self.graph.add_edge(subject_node, object_node, key=edge_key, **data) def add_node_attribute(self, node: str, attr_key: str, attr_value: Any) -> None: """ Add an attribute to a given node. Parameters ---------- node: str The node identifier attr_key: str The key for an attribute attr_value: Any The value corresponding to the key """ self.graph.add_node(node, **{attr_key: attr_value}) def add_edge_attribute( self, subject_node: str, object_node: str, edge_key: Optional[str], attr_key: str, attr_value: Any, ) -> None: """ Add an attribute to a given edge. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key attr_key: str The attribute key attr_value: Any The attribute value """ self.graph.add_edge(subject_node, object_node, key=edge_key, **{attr_key: attr_value}) def update_node_attribute(self, node: str, attr_key: str, attr_value: Any, preserve: bool = False) -> Dict: """ Update an attribute of a given node. Parameters ---------- node: str The node identifier attr_key: str The key for an attribute attr_value: Any The value corresponding to the key preserve: bool Whether or not to preserve existing values for the given attr_key Returns ------- Dict A dictionary corresponding to the updated node properties """ node_data = self.graph.nodes[node] updated = prepare_data_dict(node_data, {attr_key: attr_value}, preserve=preserve) self.graph.add_node(node, **updated) return updated def update_edge_attribute( self, subject_node: str, object_node: str, edge_key: Optional[str], attr_key: str, attr_value: Any, preserve: bool = False, ) -> Dict: """ Update an attribute of a given edge. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key attr_key: str The attribute key attr_value: Any The attribute value preserve: bool Whether or not to preserve existing values for the given attr_key Returns ------- Dict A dictionary corresponding to the updated edge properties """ e = self.graph.edges((subject_node, object_node, edge_key), keys=True, data=True) edge_data = list(e)[0][3] updated = prepare_data_dict(edge_data, {attr_key: attr_value}, preserve) self.graph.add_edge(subject_node, object_node, key=edge_key, **updated) return updated def get_node(self, node: str) -> Dict: """ Get a node and its properties. Parameters ---------- node: str The node identifier Returns ------- Dict The node dictionary """ n = {} if self.graph.has_node(node): n = self.graph.nodes[node] return n def get_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> Dict: """ Get an edge and its properties. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key Returns ------- Dict The edge dictionary """ e = {} if self.graph.has_edge(subject_node, object_node, edge_key): e = self.graph.get_edge_data(subject_node, object_node, edge_key) return e def nodes(self, data: bool = True) -> Dict: """ Get all nodes in a graph. Parameters ---------- data: bool Whether or not to fetch node properties Returns ------- Dict A dictionary of nodes """ return self.graph.nodes(data) def edges(self, keys: bool = False, data: bool = True) -> Dict: """ Get all edges in a graph. Parameters ---------- keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- Dict A dictionary of edges """ return self.graph.edges(keys=keys, data=data) def in_edges(self, node: str, keys: bool = False, data: bool = False) -> List: """ Get all incoming edges for a given node. Parameters ---------- node: str The node identifier keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- List A list of edges """ return self.graph.in_edges(node, keys=keys, data=data) def out_edges(self, node: str, keys: bool = False, data: bool = False) -> List: """ Get all outgoing edges for a given node. Parameters ---------- node: str The node identifier keys: bool Whether or not to include edge keys data: bool Whether or not to fetch node properties Returns ------- List A list of edges """ return self.graph.out_edges(node, keys=keys, data=data) def nodes_iter(self) -> Generator: """ Get an iterable to traverse through all the nodes in a graph. Returns ------- Generator A generator for nodes where each element is a Tuple that contains (node_id, node_data) """ for n in self.graph.nodes(data=True): yield n def edges_iter(self) -> Generator: """ Get an iterable to traverse through all the edges in a graph. Returns ------- Generator A generator for edges where each element is a 4-tuple that contains (subject, object, edge_key, edge_data) """ for u, v, k, data in self.graph.edges(keys=True, data=True): yield u, v, k, data def remove_node(self, node: str) -> None: """ Remove a given node from the graph. Parameters ---------- node: str The node identifier """ self.graph.remove_node(node) def remove_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> None: """ Remove a given edge from the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key """ self.graph.remove_edge(subject_node, object_node, edge_key) def has_node(self, node: str) -> bool: """ Check whether a given node exists in the graph. Parameters ---------- node: str The node identifier Returns ------- bool Whether or not the given node exists """ return self.graph.has_node(node) def has_edge(self, subject_node: str, object_node: str, edge_key: Optional[str] = None) -> bool: """ Check whether a given edge exists in the graph. Parameters ---------- subject_node: str The subject (source) node object_node: str The object (target) node edge_key: Optional[str] The edge key Returns ------- bool Whether or not the given edge exists """ return self.graph.has_edge(subject_node, object_node, key=edge_key) def number_of_nodes(self) -> int: """ Returns the number of nodes in a graph. Returns ------- int """ return self.graph.number_of_nodes() def number_of_edges(self) -> int: """ Returns the number of edges in a graph. Returns ------- int """ return self.graph.number_of_edges() def degree(self): """ Get the degree of all the nodes in a graph. """ return self.graph.degree() def clear(self) -> None: """ Remove all the nodes and edges in the graph. """ self.graph.clear() @staticmethod def set_node_attributes(graph: BaseGraph, attributes: Dict) -> None: """ Set nodes attributes from a dictionary of key-values. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attributes: Dict A dictionary of node identifier to key-value pairs """ return set_node_attributes(graph.graph, attributes) @staticmethod def set_edge_attributes(graph: BaseGraph, attributes: Dict) -> None: """ Set nodes attributes from a dictionary of key-values. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attributes: Dict A dictionary of node identifier to key-value pairs Returns ------- Any """ return set_edge_attributes(graph.graph, attributes) @staticmethod def get_node_attributes(graph: BaseGraph, attr_key: str) -> Dict: """ Get all nodes that have a value for the given attribute ``attr_key``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attr_key: str The attribute key Returns ------- Dict A dictionary where nodes are the keys and the values are the attribute values for ``key`` """ return get_node_attributes(graph.graph, attr_key) @staticmethod def get_edge_attributes(graph: BaseGraph, attr_key: str) -> Dict: """ Get all edges that have a value for the given attribute ``attr_key``. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify attr_key: str The attribute key Returns ------- Dict A dictionary where edges are the keys and the values are the attribute values for ``attr_key`` """ return get_edge_attributes(graph.graph, attr_key) @staticmethod def relabel_nodes(graph: BaseGraph, mapping: Dict) -> None: """ Relabel identifiers for a series of nodes based on mappings. Parameters ---------- graph: kgx.graph.base_graph.BaseGraph The graph to modify mapping: Dict A dictionary of mapping where the key is the old identifier and the value is the new identifier. """ relabel_nodes(graph.graph, mapping, copy=False)
def collapse_multi(mdg: nx.MultiDiGraph) -> nx.DiGraph: g = nx.DiGraph() for ((u, v), group) in groupby(sorted(mdg.edges())): g.add_edge(u, v, weight=len(list(group))) return g
class GraphBuilder: def __init__(self, model: Model): self.model = model self.graph = MultiDiGraph() self.nodes = 0 self.edges = 0 def add_from_json(self, js: Json) -> None: if "id" in js and Section.reported in js: self.add_node( js["id"], js[Section.reported], js.get(Section.desired, None), js.get(Section.metadata, None), js.get("search", None), js.get("replace", False) is True, ) elif "from" in js and "to" in js: self.add_edge(js["from"], js["to"], js.get("edge_type", EdgeType.default)) else: raise AttributeError(f"Format not understood! Got {json.dumps(js)} which is neither vertex nor edge.") def add_node( self, node_id: str, reported: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None, search: Optional[str] = None, replace: bool = False, ) -> None: self.nodes += 1 # validate kind of this reported json coerced = self.model.check_valid(reported) reported = reported if coerced is None else coerced kind = self.model[reported] # create content hash sha = GraphBuilder.content_hash(reported, desired, metadata) # flat all properties into a single string for search flat = search if isinstance(search, str) else (GraphBuilder.flatten(reported, kind)) self.graph.add_node( node_id, id=node_id, reported=reported, desired=desired, metadata=metadata, hash=sha, kind=kind, kinds=list(kind.kind_hierarchy()), kinds_set=kind.kind_hierarchy(), flat=flat, replace=replace | metadata.get("replace", False) is True if metadata else False, ) def add_edge(self, from_node: str, to_node: str, edge_type: str) -> None: self.edges += 1 key = GraphAccess.edge_key(from_node, to_node, edge_type) self.graph.add_edge(from_node, to_node, key, edge_type=edge_type) @staticmethod def content_hash(js: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None) -> str: sha256 = hashlib.sha256() # all content hashes will be different, when the version changes sha256.update(ContentHashVersion.to_bytes(2, "big")) sha256.update(json.dumps(js, sort_keys=True).encode("utf-8")) if desired: sha256.update(json.dumps(desired, sort_keys=True).encode("utf-8")) if metadata: sha256.update(json.dumps(metadata, sort_keys=True).encode("utf-8")) return sha256.hexdigest() @staticmethod def flatten(js: Json, kind: Kind) -> str: result = "" def dispatch(value: Any, k: Kind) -> None: nonlocal result if isinstance(value, dict): for prop, elem in value.items(): sub = ( k.property_kind_of(prop, AnyKind()) if isinstance(k, ComplexKind) else (k.value_kind if isinstance(k, DictionaryKind) else AnyKind()) ) dispatch(elem, sub) elif isinstance(value, list): sub = k.inner if isinstance(k, ArrayKind) else AnyKind() for elem in value: dispatch(elem, sub) elif value is None or isinstance(value, bool): pass else: # in case of date time: "2017-05-30T22:04:34Z" -> "2017-05-30 22:04:34" if isinstance(k, DateTimeKind): value = re.sub("[ZT]", " ", value) if result: result += " " result += str(value).strip() dispatch(js, kind) return result def check_complete(self) -> None: # check that all vertices are given, that were defined in any edge definition # note: DiGraph will create an empty vertex node automatically for node_id, node in self.graph.nodes(data=True): assert node.get(Section.reported), f"{node_id} was used in an edge definition but not provided as vertex!" edge_types = {edge[2] for edge in self.graph.edges(data="edge_type")} al = EdgeType.all assert not edge_types.difference(al), f"Graph contains unknown edge types! Given: {edge_types}. Known: {al}" # make sure there is only one root node rid = GraphAccess.root_id(self.graph) root_node = self.graph.nodes[rid] # make sure the root if value_in_path(root_node, NodePath.reported_kind) == "graph_root" and rid != "root": # remove node with wrong id + root_node = self.graph.nodes[rid] root_node["id"] = "root" self.graph.add_node("root", **root_node) for succ in list(self.graph.successors(rid)): for edge_type in EdgeType.all: key = GraphAccess.edge_key(rid, succ, edge_type) if self.graph.has_edge(rid, succ, key): self.graph.remove_edge(rid, succ, key) self.add_edge("root", succ, edge_type) self.graph.remove_node(rid)
def NetworkXToProgramGraph( g: nx.MultiDiGraph, proto: Optional[programl_pb2.ProgramGraph] = None, **proto_fields, ) -> programl_pb2.ProgramGraph: """Perform the inverse transformation from networkx graph -> protobuf. See ProgramGraphToNetworkX() for details. Arguments: g: A networkx graph. proto: An optional protocol buffer instance to use. Else a new one is created. Calling code is reponsible for clearning the protocol buffer. **proto_fields: Optional keyword arguments to use when constructing a proto. Has no effect if proto argument is set. Returns: A ProgramGraph proto instance. """ proto = proto or programl_pb2.ProgramGraph(**proto_fields) # Create a map from function name to function ID. function_names = list( sorted(set([fn for _, fn in g.nodes(data="function") if fn]))) function_to_idx_map = {fn: i for i, fn in enumerate(function_names)} # Create the function protos. for function_name in function_names: function_proto = proto.function.add() function_proto.name = function_name # Function-level LLVM profiling info. if ("llvm_function_entry_count" in g.graph and function_name in g.graph["llvm_function_entry_count"]): function_proto.llvm_entry_count = g.graph[ "llvm_function_entry_count"][function_name] # Set the graph-level features and labels. proto.x[:] = np.array(g.graph["x"], dtype=np.int64).tolist() proto.y[:] = np.array(g.graph["y"], dtype=np.int64).tolist() if "data_flow_root_node" in g.graph: proto.data_flow_root_node = g.graph["data_flow_root_node"] if "data_flow_steps" in g.graph: proto.data_flow_steps = g.graph["data_flow_steps"] if "data_flow_positive_node_count" in g.graph: proto.data_flow_positive_node_count = g.graph[ "data_flow_positive_node_count"] # Graph-level LLVM profiling info. if "llvm_profile_num_functions" in g.graph: proto.llvm_profile.num_functions = g.graph[ "llvm_profile_num_functions"] if "llvm_profile_max_function_count" in g.graph: proto.llvm_profile.max_function_count = g.graph[ "llvm_profile_max_function_count"] if "llvm_profile_num_counts" in g.graph: proto.llvm_profile.num_counts = g.graph["llvm_profile_num_counts"] if "llvm_profile_total_count" in g.graph: proto.llvm_profile.total_count = g.graph["llvm_profile_total_count"] if "llvm_profile_max_count" in g.graph: proto.llvm_profile.max_count = g.graph["llvm_profile_max_count"] if "llvm_profile_max_internal_count" in g.graph: proto.llvm_profile.max_internal_count = g.graph[ "llvm_profile_max_internal_count"] # Create the node list. for node, data in g.nodes(data=True): node_proto = proto.node.add() node_proto.type = data["type"] node_proto.text = data["text"] node_proto.preprocessed_text = data["preprocessed_text"] if data["function"] is not None: node_proto.function = function_to_idx_map[data["function"]] node_proto.x[:] = np.array(data["x"], dtype=np.int64).tolist() node_proto.y[:] = np.array(data["y"], dtype=np.int64).tolist() # Node-level LLVM profiling info. if data.get("llvm_profile_true_weight") is not None: node_proto.llvm_profile_true_weight = data[ "llvm_profile_true_weight"] if data.get("llvm_profile_false_weight") is not None: node_proto.llvm_profile_false_weight = data[ "llvm_profile_false_weight"] if data.get("llvm_profile_total_weight") is not None: node_proto.llvm_profile_total_weight = data[ "llvm_profile_total_weight"] # Create the edge list. for src, dst, data in g.edges(data=True): edge_proto = proto.edge.add() edge_proto.source_node = src edge_proto.destination_node = dst edge_proto.flow = data["flow"] edge_proto.position = data["position"] return proto
def delete_control_flow_edges(graph: nx.MultiDiGraph): for u, v, k, attrs in list(graph.edges(keys=True, data=True)): if 'control_flow_edge' in attrs and attrs['control_flow_edge']: graph.remove_edge(u, v, k) log.debug('Removing control flow edge from {} to {}'.format(u, v))
class GraphData(SaveLoad): """ the store of a graph data. each node is represent as a dict of node info named 'node_json', Example Format for 'node_json': { "id": 1, "properties": {"name":"bob","age":1}, "labels": ["entity","man"] } >>> graphdata=GraphData() graphdata.create_index_on_property("name","aliases","qualified_name") # save a graphdata to disk graphdata.save("test.v1.graph") # load a graphdata from disk graphdata=Graphdata.load("test.v1.graph") >>> """ DEFAULT_KEY_NODE_ID = "id" # the key name for the node id, every node must have it. DEFAULT_KEY_NODE_PROPERTIES = "properties" # the key name for the node properties, every node must have it. DEFAULT_KEY_NODE_LABELS = "labels" # the key name for the node labels, every node must have it. DEFAULT_KEYS = [ DEFAULT_KEY_NODE_ID, DEFAULT_KEY_NODE_PROPERTIES, DEFAULT_KEY_NODE_LABELS ] UNASSIGNED_NODE_ID = -1 # a node without a id specify, a newly created node, its id is -1 DEFAULT_KEY_RELATION_START_ID = "startId" DEFAULT_KEY_RELATION_TYPE = "relationType" DEFAULT_KEY_RELATION_END_ID = "endId" def __init__(self): # two map for self.__init_graph() def clear(self): self.__init_graph() def __init_graph(self): self.graph = MultiDiGraph() self.max_node_id = 0 self.label_to_ids_map = {} self.index_collection = GraphIndexCollection() self.relation_type_to_num_map = {} def create_index_on_property(self, *property_name_list): """ create index on some properties. It makes the query on the corresponding property faster. :param property_name_list: one or one more property names. :return: """ self.index_collection.create_index_on_property(*property_name_list) def find_all_shortest_paths(self, startId, endId): """ 找到所有的最短路 :param startId: :param endId: :return: """ shortest_paths = all_shortest_paths(self.graph, startId, endId) return shortest_paths def find_shortest_path(self, startId, endId): """ 找到一个最短路 :param startId: :param endId: :return: """ shortest_paths = shortest_path(self.graph, startId, endId) return shortest_paths def set_nodes(self, nodes): for n in nodes: self.add_node(node_id=n[self.DEFAULT_KEY_NODE_ID], node_properties=n[self.DEFAULT_KEY_NODE_PROPERTIES], node_labels=n[self.DEFAULT_KEY_NODE_LABELS]) def add_labels(self, *labels): """ add a list of label to the graph :param labels: :return: """ for label in labels: if not label: return if label not in self.label_to_ids_map.keys(): self.label_to_ids_map[label] = set([]) def add_label_by_node_id(self, node_id, label): """ add a label to a node :param node_id: the node id which the label need to add :param label: the label that need to added :return: True, add successful.False, add fail. """ if not label: return False node_json = self.get_node_info_dict(node_id) if not node_json: return False node_json[GraphData.DEFAULT_KEY_NODE_LABELS].add(label) self.label_to_ids_map[label].add(node_id) return True def get_node_ids_by_label(self, label): if label not in self.label_to_ids_map.keys(): return set([]) return self.label_to_ids_map[label] def add_label_by_label(self, label, new_label): """ add a label to node in graph, the node must has the specific label :param new_label: the new_label add to node :param label: the node must has the label :return: """ for node_id in self.get_node_ids_by_label(label): self.add_label_by_node_id(node_id, new_label) def add_label_to_all(self, label): """ add a label to node in graph :param label: :return: """ if not label: return self.add_labels(label) for node_id in self.get_node_ids(): self.add_label_by_node_id(node_id, label) def add_node(self, node_labels, node_properties, node_id=UNASSIGNED_NODE_ID, primary_property_name=""): """ add a node json to the graph :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_name:make sure the node_json["properties"][primary_property_name] is unique in GraphData. if no passing, the node json will be add to graph without check. otherwise, only the node json with unique property value ( property value is got by primary_property_name ) will be added to the GraphData. :return:-1, means that adding node json fail. otherwise, return the id of the newly added node """ if primary_property_name: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID node_json = self.find_one_node_by_property( property_name=primary_property_name, property_value=node_properties[primary_property_name]) if node_json: return node_json[self.DEFAULT_KEY_NODE_ID] if node_id == self.UNASSIGNED_NODE_ID: node_id = self.max_node_id + 1 self.max_node_id = self.max_node_id + 1 new_node_json = { self.DEFAULT_KEY_NODE_ID: node_id, self.DEFAULT_KEY_NODE_PROPERTIES: node_properties, self.DEFAULT_KEY_NODE_LABELS: set(node_labels) } self.graph.add_node(node_id, **new_node_json) if self.max_node_id < node_id: self.max_node_id = node_id self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node( node_id=node_id, node_properties=new_node_json[ GraphData.DEFAULT_KEY_NODE_PROPERTIES]) return node_id def update_node_property_by_node_id(self, node_id, node_properties): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID node_json = self.get_node_info_dict(node_id) update_node_id = node_json[self.DEFAULT_KEY_NODE_ID] update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS] for k, v in node_properties.items(): update_node_properties[k] = v update_node_json = { self.DEFAULT_KEY_NODE_ID: update_node_id, self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties, self.DEFAULT_KEY_NODE_LABELS: update_node_labels } self.graph.add_node(update_node_id, **update_node_json) self.index_collection.add_node(node_id=update_node_id, node_properties=update_node_properties) return update_node_id def update_node_by_node_id(self, node_id, node_labels, node_properties): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID node_json = self.get_node_info_dict(node_id) update_node_id = node_json[self.DEFAULT_KEY_NODE_ID] update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS] for k, v in node_properties.items(): update_node_properties[k] = v for label in node_labels: update_node_labels.add(label) update_node_json = { self.DEFAULT_KEY_NODE_ID: update_node_id, self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties, self.DEFAULT_KEY_NODE_LABELS: update_node_labels } self.graph.add_node(update_node_id, **update_node_json) self.add_labels(*update_node_labels) for label in update_node_labels: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node(node_id=update_node_id, node_properties=update_node_properties) return update_node_id def update_node_property_value_by_node_id(self, node_id, node_property_name, node_proprty_value): if not node_id in list(self.get_node_ids()): return self.UNASSIGNED_NODE_ID if node_property_name == "": return node_id node_property = {node_property_name: node_proprty_value} return self.update_node_property_by_node_id(node_id, node_property) def remove_node(self, node_id): if node_id not in self.graph.nodes: return None # print(type(self.graph.nodes)) node_json = self.graph.nodes[node_id] in_relations = set(self.graph.in_edges(node_id, keys=True)) out_relations = set(self.graph.out_edges(node_id, keys=True)) self.graph.remove_node(node_id) for label in node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].remove(node_id) self.index_collection.remove_node(node_id) return node_json, out_relations, in_relations def remove_all_nodes(self): ids = self.get_node_ids() for id in ids: self.remove_node(id) return True def merge_node(self, node_labels, node_properties, primary_property_name): """ merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node. we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node. properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used. :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_name: The name of the property to check, the merged node and the new node are the same on this property. :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change. """ if not primary_property_name: print("primary_property_name must given on merge") return GraphData.UNASSIGNED_NODE_ID if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID node_json = self.find_one_node_by_property( property_name=primary_property_name, property_value=node_properties[primary_property_name]) if not node_json: return self.add_node(node_labels=node_labels, node_properties=node_properties, node_id=GraphData.UNASSIGNED_NODE_ID) merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID] merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] for k, v in node_properties.items(): merge_properties[k] = v merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in node_labels: merge_labels.add(label) return self.add_node(node_labels=merge_labels, node_properties=merge_properties, node_id=merge_node_id) def add_node_with_multi_primary_property(self, node_labels, node_properties, node_id=UNASSIGNED_NODE_ID, primary_property_names=None): """ add a node json to the graph :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_names:a list of primary properties. make sure the node_json["properties"][primary_property_name] is unique in GraphData. if no passing, the node json will be add to graph without check. otherwise, only the node json with unique property value ( property value is got by primary_property_name ) will be added to the GraphData. :return:-1, means that adding node json fail. otherwise, return the id of the newly added node """ if primary_property_names is None: primary_property_names = [] match_properties = {} for primary_property_name in primary_property_names: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID match_properties[primary_property_name] = node_properties[ primary_property_name] node_json = self.find_one_node_by_properties(**match_properties) if node_json: return node_json[self.DEFAULT_KEY_NODE_ID] if node_id == self.UNASSIGNED_NODE_ID: node_id = self.max_node_id + 1 new_node_json = { self.DEFAULT_KEY_NODE_ID: node_id, self.DEFAULT_KEY_NODE_PROPERTIES: node_properties, self.DEFAULT_KEY_NODE_LABELS: set(node_labels) } self.graph.add_node(node_id, **new_node_json) if self.max_node_id < node_id: self.max_node_id = node_id self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]: self.label_to_ids_map[label].add(node_id) self.index_collection.add_node( node_id=node_id, node_properties=new_node_json[ GraphData.DEFAULT_KEY_NODE_PROPERTIES]) return node_id def merge_node_with_multi_primary_property(self, node_labels, node_properties, primary_property_names=None): """ merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node. we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node. properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used. :param node_properties: a dict of node properties, key-value pair :param node_labels: a set of node labels :param primary_property_names: The list of name of the property to check, the merged node and the new node are the same on this property. :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change. """ if not primary_property_names: print("primary_property_names must given on merge") return GraphData.UNASSIGNED_NODE_ID match_properties = {} for primary_property_name in primary_property_names: if primary_property_name not in node_properties: print( "node json must have a primary_property_name ( %r ) in properties " % primary_property_name) return self.UNASSIGNED_NODE_ID match_properties[primary_property_name] = node_properties[ primary_property_name] node_json = self.find_one_node_by_properties(**match_properties) if not node_json: return self.add_node(node_labels=node_labels, node_properties=node_properties, node_id=GraphData.UNASSIGNED_NODE_ID) merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID] merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] for k, v in node_properties.items(): merge_properties[k] = v merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS]) for label in node_labels: merge_labels.add(label) return self.add_node(node_labels=merge_labels, node_properties=merge_properties, node_id=merge_node_id) def refresh_indexer(self): """ refresh the index on all properties. :return: """ index_properties = self.index_collection.get_index_property() index_properties = list(index_properties) del self.index_collection self.index_collection = GraphIndexCollection() self.create_index_on_property(*index_properties) for node_id, node_json in self.graph.nodes(data=True): if node_json is None: continue node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] self.index_collection.add_node(node_id, node_properties_json) def find_one_node_by_property(self, property_name, property_value): if self.index_collection.is_property_indexed(property_name): candidate_node_ids = list( self.index_collection.find_ids(property_name, property_value=property_value)) if len(candidate_node_ids) == 0: return None return self.get_node_info_dict(candidate_node_ids[0]) for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name in node_properties_json.keys( ) and node_properties_json[property_name] == property_value: return node_json return None def find_nodes_by_ids(self, *ids): result = [] for node_id in ids: node_json = self.get_node_info_dict(node_id) if node_json: result.append(node_json) return result def find_nodes_by_property(self, property_name, property_value): if self.index_collection.is_property_indexed(property_name): candidate_node_ids = list( self.index_collection.find_ids(property_name, property_value=property_value)) return self.find_nodes_by_ids(*candidate_node_ids) nodes = [] for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name in node_properties_json.keys( ) and node_properties_json[property_name] == property_value: nodes.append(node_json) return nodes def find_one_node_by_property_value_starts_with(self, property_name, property_value_starter): """ find a node which its property value is string and the string is startswith a given string :param property_name: :param property_value_starter: :return: """ for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name not in node_properties_json.keys(): continue property_value = node_properties_json[property_name] if type(property_value) != str: continue if property_value.startswith(property_value_starter): return node_json return None def find_nodes_by_property_value_starts_with(self, property_name, property_value_starter): """ find all nodes which its property value is string and the string is startswith a given string :param property_name: :param property_value_starter: :return: """ nodes = [] for node_id, node_json in self.graph.nodes(data=True): node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] if property_name not in node_properties_json.keys(): continue property_value = node_properties_json[property_name] if type(property_value) != str: continue if property_value.startswith(property_value_starter): nodes.append(node_json) return nodes def __find_node_ids_by_index_properties(self, **index_properties): result_ids = self.get_node_ids() for property_name, property_value in index_properties.items(): result_ids = result_ids.intersection( self.index_collection.find_ids(property_name=property_name, property_value=property_value)) return result_ids def find_one_node_by_properties(self, **properties): indexed_properties = {} unindexed_properties = {} for property_name, property_value in properties.items(): if self.index_collection.is_property_indexed( property_name=property_name): indexed_properties[property_name] = property_value else: unindexed_properties[property_name] = property_value candidate_node_ids = self.__find_node_ids_by_index_properties( **indexed_properties) if len(candidate_node_ids) == 0: return None if len(unindexed_properties) == 0: return self.get_node_info_dict(list(candidate_node_ids)[0]) for node_id in candidate_node_ids: node_json = self.get_node_info_dict(node_id=node_id) node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES] is_match = True for property_name, property_value in unindexed_properties.items(): if property_name not in node_properties_json.keys( ) or node_properties_json[property_name] != property_value: is_match = False break if is_match: return node_json return None def set_relations(self, relations): for t in relations: self.add_relation(startId=t[self.DEFAULT_KEY_RELATION_START_ID], relationType=t[self.DEFAULT_KEY_RELATION_TYPE], endId=t[self.DEFAULT_KEY_RELATION_END_ID]) def add_relation(self, startId, relationType, endId): """ add a new relation to graphData, if exist, not add. :param startId: :param relationType: :param endId: :return:False, the relation is already exist adding fail, True, add the relation successsful """ # if startId == GraphData.UNASSIGNED_NODE_ID: # return False # if endId == GraphData.UNASSIGNED_NODE_ID: # return False if startId not in self.graph.nodes or endId not in self.graph.nodes: return False if self.exist_relation(startId=startId, relationType=relationType, endId=endId): return False self.__add_one_relation_count(relationType) self.graph.add_edge(startId, endId, relationType) return True def __add_one_relation_count(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() relation_type_to_num_map[relation_type] = relation_type_to_num_map.get( relation_type, 0) + 1 def __remove_one_relation_count(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() relation_type_to_num_map[relation_type] = max( 0, relation_type_to_num_map.get(relation_type, 0) - 1) def add_relation_with_property(self, startId, relationType, endId, **kwargs): if startId not in self.graph.nodes or endId not in self.graph.nodes: return False if self.exist_relation(startId=startId, relationType=relationType, endId=endId): return False self.__add_one_relation_count(relationType) self.graph.add_edge(startId, endId, relationType, **kwargs) return True def remove_relation(self, startId, relationType, endId): if not self.exist_relation( startId=startId, relationType=relationType, endId=endId): return False self.__remove_one_relation_count(relationType) self.graph.remove_edge(startId, endId, relationType) return True def remove_all_relations(self): relation_pairs = self.get_relation_pairs() for relation_pair in relation_pairs: relations = self.get_relations(start_id=relation_pair[0], end_id=relation_pair[1]) for relation in relations: self.remove_relation(relation[0], relation[1], relation[2]) return True def exist_relation(self, startId, relationType, endId): return self.graph.has_edge(startId, endId, relationType) def exist_any_relation(self, startId, endId): return self.graph.has_edge(startId, endId) def get_relations(self, start_id=None, relation_type=None, end_id=None): candidates = None if start_id is not None: candidates = self.get_all_out_relations(start_id) if end_id is not None: tmp = self.get_all_in_relations(end_id) if candidates is not None: candidates &= tmp else: candidates = tmp candidates = self.get_relation_pairs_with_type( ) if candidates is None else candidates if relation_type is not None: candidates = set( filter(lambda r: r[1] == relation_type, candidates)) return candidates def get_all_relations(self, id_1, id_2): result = set([]) result = result | self.get_relations(start_id=id_1, end_id=id_2) result = result | self.get_relations(start_id=id_2, end_id=id_1) return result def get_edge_extra_info(self, start_id, end_id, relation_name, extra_key): relation_dict = self.graph.get_edge_data(start_id, end_id) if relation_name in relation_dict: if extra_key in relation_dict[relation_name]: return relation_dict[relation_name][extra_key] return "" def get_node_num(self): return len(self.graph.nodes) def get_relation_num(self): return len(self.graph.edges) def get_node_ids(self): return set(self.graph.nodes) def get_relation_pairs(self): # todo:cache the result? """ get the relation list in [(startId,endId)] format :return: """ pairs = set(self.graph.edges(keys=False)) return pairs def get_relation_pairs_with_type(self): """ get the relation list in [(startId,endId)] format :return: """ pairs = {(r[0], r[2], r[1]) for r in self.graph.edges(keys=True)} return pairs def get_all_out_relations(self, node_id): if node_id not in self.graph.nodes: return set() return {(r[0], r[2], r[1]) for r in self.graph.out_edges(node_id, keys=True)} def get_all_in_relations(self, node_id): if node_id not in self.graph.nodes: return set() return {(r[0], r[2], r[1]) for r in self.graph.in_edges(node_id, keys=True)} def update_node_index(self, node_id): node_info = self.get_node_info_dict(node_id=node_id) node_properties = node_info[self.DEFAULT_KEY_NODE_PROPERTIES] self.index_collection.add_node(node_id=node_id, node_properties=node_properties) def get_node_info_dict(self, node_id): """ get the node info dict, :param node_id: the node id :return: """ return self.graph.nodes.get(node_id, None) def get_properties_for_node(self, node_id, key_node_properties=DEFAULT_KEY_NODE_PROPERTIES ): """ get the node properties part from node info dict :param key_node_properties: specify the key of key_node_properties, default is "properties" :param node_id: the node id :return: {} if the node not exist """ node_info_dict = self.get_node_info_dict(node_id) if node_info_dict is None: return {} return node_info_dict[key_node_properties] def get_labels_for_node(self, node_id, key_node_labels=DEFAULT_KEY_NODE_LABELS): """ get the node properties part from node info dict :param key_node_labels: specify the key of node_labels, default is "labels" :param node_id: the node id :return: [] if the node not exist """ node_info_dict = self.get_node_info_dict(node_id) if node_info_dict is None: return [] return node_info_dict[key_node_labels] def get_all_labels(self): """ get all labels as set for current node. :return: a set of labels. """ return set(self.label_to_ids_map.keys()) def get_all_relation_types(self): """ get all relation types in graph data :return: a set of relation type strings """ return set(self.get_relation_type_to_num_map().keys()) def get_relation_count_by_type(self, relation_type): relation_type_to_num_map = self.get_relation_type_to_num_map() return relation_type_to_num_map.get(relation_type, 0) def get_relation_type_to_num_map(self): return self.relation_type_to_num_map def __count_relation_type_to_num_map(self): relation_type_to_num_map = {} relation_type_to_relation = {} for r in self.get_relation_pairs_with_type(): if r[1] not in relation_type_to_relation: relation_type_to_relation[r[1]] = set() relation_type_to_relation[r[1]].add(r) for k, v in relation_type_to_relation.items(): relation_type_to_num_map[k] = len(v) return relation_type_to_num_map def print_label_count(self): print("Label Num=%d" % len(self.label_to_ids_map.keys())) for k, v in self.label_to_ids_map.items(): print("<Label:%r Num:%d>" % (k, len(v))) def print_graph_info(self): print("----- Graph Info ------") print(self) self.print_label_count() self.print_relation_info() print("-----------------------") def print_relation_info(self): relation_type_to_num_map = self.get_relation_type_to_num_map() print("Relation Num=%d" % len(relation_type_to_num_map.keys())) for k, v in relation_type_to_num_map.items(): print("<Relation:%r Num:%d>" % (k, v)) def __repr__(self): return "<GraphData nodeNum=%d relNum=%d maxNodeId=%d>" % ( self.get_node_num(), self.get_relation_num(), self.max_node_id) def subgraph(self, node_ids): """ get a sub graph of graph data which keep only given nodes and relations between nodes :param node_ids: the kept node ids in graph :return: a graph that keep all things. """ graph_data = deepcopy(self) remove_nodes = set(self.get_node_ids()) - node_ids for node_id in remove_nodes: graph_data.remove_node(node_id) return graph_data
class FeatureConcatenator: def __init__(self, g_input: Graph) -> None: log.info( f"FeatureConcatenator: Initiating with a graph of {g_input.number_of_nodes()} nodes " f"and {g_input.number_of_edges()} edges" ) self.g_input = g_input self.g = MultiDiGraph(deepcopy(g_input)) self._obtain_attrs() self._init_feat_attrs() def _obtain_attrs(self) -> None: """ Obtains a list-formatted set of node and edge attributes """ self.n_targets: Set[str] = set(self.g.graph["n_targets"]) if self.g.graph[ "n_targets" ] else set() self.n_attrs: Set[str] = { k_attr for nid, attrs in self.g.nodes.data() for k_attr in attrs.keys() if k_attr not in self.n_targets } if "label" in self.n_attrs: self.n_attrs.remove("label") log.info( "FeatureConcatenator: The following set of node attributes " f"is present in the graph:\n{self.n_attrs}" ) self.e_targets: Set[str] = set(self.g.graph["e_targets"]) if self.g.graph[ "e_targets" ] else set() self.e_attrs: Set[str] = { k_attr for u, v, k, attrs in self.g.edges.data(keys=True) for k_attr in attrs.keys() if k_attr not in self.e_targets } if "label" in self.e_attrs: self.e_attrs.remove("label") log.info( "FeatureConcatenator: The following set of edge attributes " f"is present in the graph:\n{self.e_attrs}" ) def _init_feat_attrs(self) -> None: """ Creates an empty node attribute "nfeat" and an empty edge attribute "efeat" """ log.info("FeatureConcatenator: Initiating target nfeat attribute with nulls") mapping_nfeat: Dict[Union[int, str], None] = {nid: None for nid in self.g.nodes} nx.set_node_attributes(self.g, mapping_nfeat, "nfeat") log.info("FeatureConcatenator: Initiating target efeat attribute with nulls") mapping_efeat: Dict[Any, Any] = { (u, v, k): None for u, v, k in self.g.edges(keys=True) } nx.set_edge_attributes(self.g, mapping_efeat, "efeat") def concat_n_attrs(self) -> None: """ Encodes all node attributes as continous variables into attribute "nfeat" """ log.info( f"FeatureConcatenator: Concatenating the following node attributes:\n{self.n_attrs}" ) mapping_attrs: Dict[Union[int, str], ndarray] = { k: np.array([v[attr] for attr in self.n_attrs]).reshape(1, -1) for k, v in self.g.nodes.data() } nx.set_node_attributes(self.g, mapping_attrs, "nfeat") def concat_e_attrs(self) -> None: """ Encodes all edge attributes as continous variables into attribute "efeat" """ log.info( f"FeatureConcatenator: Concatenating the following edge attributes:\n{self.e_attrs}" ) mapping_attrs: Dict[Tuple[Any, Any, Any], ndarray] = { (u, v, k): np.array([e[attr] for attr in self.e_attrs]).reshape(1, -1) for u, v, k, e in self.g.edges.data(keys=True) } nx.set_edge_attributes(self.g, mapping_attrs, "efeat") def delete_originals(self) -> None: """ Deletes original node and edge attributes after they have been concatenated """ log.info( "FeatureConcatenator: Deleting original node attributes " f"{self.n_attrs.union(self.n_targets)} and edge attributes {self.e_attrs.union(self.e_targets)}", ) for n_attr in self.n_attrs.union(self.n_targets): for nid in self.g.nodes: del self.g.nodes[nid][n_attr] for e_attr in self.e_attrs.union(self.e_targets): for u, v, k in self.g.edges(keys=True): del self.g.edges[u, v, k][e_attr] @property def graph(self) -> Graph: return self.g
def plot_graph(graph: nx.MultiDiGraph) -> None: """ Plot the Multiple Directed graph using the plotly library. """ # Choosing the spring layout to position the vertices of the graph. pos = nx.spring_layout(graph) # Creating the edge trace. edge_x = [] edge_y = [] xtext = [] ytext = [] edge_values_text = [] for edge in graph.edges(): # Determine the start and end coordinates of the edge on the graph. x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] # Add all x coordinates to list of x_edge data. edge_x.append(x0) edge_x.append(x1) edge_x.append(None) # Add all y coordinates to list of y_edge data. edge_y.append(y0) edge_y.append(y1) edge_y.append(None) # Add x midpoint coordinates to list of xtext data. xtext.append((x0 + x1) / 2) # Add y midpoint coordinates to list of ytext data. ytext.append((y0 + y1) / 2) # Add transaction value to list of edge_values data. value = graph.get_edge_data(edge[0], edge[1])[0]['weight'] edge_values_text.append(f"Transaction Value: {value}") # Plotting the edges. edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=1, color='black'), mode='lines') # Plotting the edge transaction text. edge_values_trace = go.Scatter(x=xtext, y=ytext, mode='none', text=edge_values_text, textposition='top center', hovertemplate='%{text}<extra></extra>') # Creating the node trace. node_x = [] node_y = [] node_size = [] for node in graph.nodes(): # Determine the coordinates of each node (using the spring layout defined earlier) x, y = pos[node] node_x.append(x) node_y.append(y) size = 10 if graph.nodes[node] != {}: size = graph.nodes[node]['size'] node_size.append(size) node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text', marker=dict(showscale=True, colorscale='Hot', color=[], size=node_size, colorbar=dict( thickness=10, title='# of Transactions (degree)', xanchor='left', titleside='right'), line_width=2)) # Setting the text of each node to its address. node_text = [] for node in graph.nodes(): node_desc = f"Address: {node}" # If the account doesn't have an empty representation # in the graph, get its balance. if graph.nodes[node] != {}: balance = graph.nodes[node]['balance'] node_desc = f"Address: {node}\nBalance: {balance}" # Add the description of the node to the list (which # will get added to the trace, updating it). node_text.append(node_desc) # Update the text and size attributes of the node trace. node_trace.text = node_text node_neighbours = [] for node in graph.adjacency(): # To find the neighbours of this node (accounts who either # sent or received transactions from this current account) # we must access the second item of a tuple, which contains # a dictionary representation of its neighbours (addresses # mapped to neighbours = len(node[1]) node_neighbours.append(neighbours) node_trace.marker.color = node_neighbours # Setting up the layout here. layout = go.Layout( title='Ethereum Transaction Graph', showlegend=False, hovermode='closest', xaxis=dict(showgrid=False, zeroline=False), yaxis=dict(showgrid=False, zeroline=False), margin=dict(b=20, l=15, r=15, t=50), # Setting up the margins around the graph ) # Plot the graph figure. fig = go.Figure(data=[edge_trace, node_trace, edge_values_trace], layout=layout) # update layout fig.update_layout(title_font_size=15) fig.show()
def dump_graph_for_graphviz(graph: nx.MultiDiGraph, node_attrs: list = ['kind', 'op', 'shape'], edge_attrs: list = ['in', 'out'], nodes_to_dump: list = None, save_to_svg=False): log.debug("---- GRAPHVIZ OUTPUT STARTS ----") if nodes_to_dump is None: nodes_to_dump = graph.nodes() string = '\ndigraph {\n' visited_nodes = set() for src_node_name, dst_node_name, attrs in graph.edges(data=True): visited_nodes.add(src_node_name) visited_nodes.add(dst_node_name) if src_node_name not in nodes_to_dump or dst_node_name not in nodes_to_dump: continue src_node = graph.node[src_node_name] dst_node = graph.node[dst_node_name] src_node_string = str(src_node_name) + '\\n' + '\\n'.join([ str(key) + '=' + str(src_node.get(key, 'None')) for key in node_attrs if key in src_node ]) dst_node_string = str(dst_node_name) + '\\n' + '\\n'.join([ str(key) + '=' + str(dst_node.get(key, 'None')) for key in node_attrs if key in dst_node ]) edge_string = ' '.join([ str(key) + '=' + str(attrs.get(key, 'None')) for key in edge_attrs if key in attrs ]) string += '"{}" -> "{}" [label = "{}"];\n'.format( src_node_string, dst_node_string, edge_string) for node in nodes_to_dump: if node not in visited_nodes: string += '"{}"'.format( node ) # TODO: add attributes like it was done in the loop above visited_nodes.add(node) string += '}' log.debug(string) log.debug("---- GRAPHVIZ OUTPUT ENDS ----") if save_to_svg: try: import graphviz import os file_name = "{}_{}.txt".format(graph.name.replace('/', '_'), 0) id = 1 while os.path.exists(file_name): file_name = "{}_{}.txt".format(graph.name.replace('/', '_'), id) id += 1 with open(file_name, "w") as f: f.write(string) graphviz.render('dot', 'svg', file_name) print('Graph was saved to {}.{}'.format(file_name, 'svg')) except ImportError: raise ImportError('Can\'t import graphviz') except Exception as e: raise Error('Can\'t save graph to svg') from e return string
def remap_node_identifier(graph: nx.MultiDiGraph, category: str, alternative_property: str, prefix=None) -> nx.MultiDiGraph: """ Remap a node's 'id' attribute with value from a node's ``alternative_property`` attribute. Parameters ---------- graph: networkx.MultiDiGraph The graph category: string category referring to nodes whose 'id' needs to be remapped alternative_property: string property name from which the new value is pulled from prefix: string signifies that the value for ``alternative_property`` is a list and the ``prefix`` indicates which value to pick from the list Returns ------- networkx.MultiDiGraph The modified graph """ mapping = {} for nid, data in graph.nodes(data=True): node_data = data.copy() if 'category' in node_data and category not in node_data['category']: continue if alternative_property in node_data: alternative_values = node_data[alternative_property] if isinstance(alternative_values, (list, set, tuple)): if prefix: for v in alternative_values: if prefix in v: # take the first occurring value that contains the given prefix mapping[nid] = v break else: # no prefix defined; pick the 1st one from list mapping[nid] = alternative_values[0] elif isinstance(alternative_values, str): if prefix: if alternative_values.startswith(prefix): mapping[nid] = alternative_values else: # no prefix defined mapping[nid] = alternative_values else: logging.error(f"Cannot use {alternative_values} from alternative_property {alternative_property}") nx.set_node_attributes(graph, values=mapping, name='id') nx.relabel_nodes(graph, mapping, copy=False) # update 'subject' of all outgoing edges update_edge_keys = {} updated_subject_values = {} updated_object_values = {} for u, v, k, edge_data in graph.edges(keys=True, data=True): if u is not edge_data['subject']: updated_subject_values[(u, v, k)] = u update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v) if v is not edge_data['object']: updated_object_values[(u, v, k)] = v update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v) nx.set_edge_attributes(graph, values=updated_subject_values, name='subject') nx.set_edge_attributes(graph, values=updated_object_values, name='object') nx.set_edge_attributes(graph, values=update_edge_keys, name='edge_key') return graph
def all_edges(g: nx.MultiDiGraph, pre, post): return { key: data for _, tgt, key, data in g.edges(pre, data=True, keys=True) if tgt == post }
def get_eulerian_circuit(g: nx.MultiDiGraph) -> tuple: ans = [] g = deepcopy(g) first_edge = list(g.edges(data=True))[0] u, v, ABdata = first_edge for key, data in g[u][v].items(): if data == ABdata: g.remove_edge(u, v, key=key) break ans.append(first_edge) now = v now_parity = g.nodes[now]["parity"] now_type = ABdata["Tv"] def get_next_type(now_parity, now_type): if now_parity == "Odd": if now_type == "A": return "B" elif now_type == "B": return "A" elif now_parity == "Even": if now_type == "A": return "A" elif now_type == "B": return "B" else: return None next_type = get_next_type(now_parity, now_type) while g.edges: flg = False for next, atlas_view in list(g.succ[now].items()): for key, data in list(atlas_view.items()): if data["Tu"] == next_type: ans.append((now, next, data)) g.remove_edge(now, next, key=key) now = next now_parity = g.nodes[now]["parity"] now_type = data["Tv"] next_type = get_next_type(now_parity, now_type) flg = True break if flg: break else: print("miss!", ans) sleep(1) ok = False while not ok: edge = ans.pop(-1) u, v, edge_data = edge for next, atlas_view in list(g.succ[u].items()): for key, data in list(atlas_view.items()): if data["Tu"] == edge_data["Tu"]: ans.append((u, next, data)) g.remove_edge(u, next, key=key) now = next now_parity = g.nodes[now]["parity"] now_type = data["Tv"] next_type = get_next_type(now_parity, now_type) ok = True g.add_edge(u, v, **edge_data) return ans
def NetworkXGraphToProgramGraphProto( g: nx.MultiDiGraph, ) -> programl_pb2.ProgramGraph: """Convert a networkx graph constructed using the old control-and-data-flow graph builder to a ProGraML graph proto.""" proto = programl_pb2.ProgramGraph() # Create the map from function IDs to function names. function_names = list( sorted(set([fn for _, fn in g.nodes(data="function") if fn]))) function_to_idx_map = {fn: i for i, fn in enumerate(function_names)} # Create the function list. for function_name in function_names: function_proto = proto.function.add() function_proto.name = function_name # Build a translation map from node names to node list indices. if "root" not in g.nodes: raise ValueError(f"Graph has no root node: {g.nodes}") node_to_idx_map = {"root": 0} for node in [node for node in g.nodes if node != "root"]: node_to_idx_map[node] = len(node_to_idx_map) # Create the node list. idx_to_node_map = {v: k for k, v in node_to_idx_map.items()} for node_idx in range(len(node_to_idx_map)): node = g.nodes[idx_to_node_map[node_idx]] node_proto = proto.node.add() # Translate node attributes. node_type = node.get("type") if not node_type: raise ValueError(f"Node has no type: {node_type}") node_proto.type = { "statement": programl_pb2.Node.STATEMENT, "identifier": programl_pb2.Node.IDENTIFIER, "immediate": programl_pb2.Node.IMMEDIATE, # We are removing the "magic" node type, replacing them with a regular # statement of unknown type. "magic": programl_pb2.Node.STATEMENT, }[node_type] # Get the text of the node. if "original_text" in node: node_proto.text = node["original_text"] node_proto.preprocessed_text = node["text"] elif "text" in node: node_proto.text = node["text"] node_proto.preprocessed_text = node["text"] elif "name" in node: node_proto.text = node["name"] node_proto.preprocessed_text = node["name"] else: raise ValueError(f"Node has no original_text or name: {node}") # Set the encoded representation of the node. x = node.get("x", None) if x is not None: node_proto.x.extend([x]) # Set the node function. function = node.get("function") if function: node_proto.function = function_to_idx_map[function] # Create the edge list. for src, dst, data in g.edges(data=True): edge = proto.edge.add() edge.flow = { "call": programl_pb2.Edge.CALL, "control": programl_pb2.Edge.CONTROL, "data": programl_pb2.Edge.DATA, }[data["flow"]] edge.source_node = node_to_idx_map[src] edge.destination_node = node_to_idx_map[dst] edge.position = data.get("position", 0) return proto
class MossNet: def __init__(self, moss_results_dict): '''Create a ``MossNet`` object from a 3D dictionary of downloaded MOSS results Args: ``moss_results_dict`` (``dict``): A 3D dictionary of downloaded MOSS results Returns: ``MossNet``: A ``MossNet`` object ''' if isinstance(moss_results_dict, MultiDiGraph): self.graph = moss_results_dict; return if isinstance(moss_results_dict, str): try: if moss_results_dict.lower().endswith('.gz'): moss_results_dict = load(gopen(moss_results_dict)) else: moss_results_dict = load(open(moss_results_dict,'rb')) except: raise ValueError("Unable to load dictionary: %s" % moss_results_dict) if not isinstance(moss_results_dict, dict): raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results") self.graph = MultiDiGraph() for u in moss_results_dict: u_edges = moss_results_dict[u] if not isinstance(u_edges, dict): raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results") for v in u_edges: u_v_links = u_edges[v] if not isinstance(u_edges[v], dict): raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results") for f in u_v_links: try: left, right = u_v_links[f] except: raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results") self.graph.add_edge(u, v, attr_dict = {'files':f, 'left':left, 'right':right}) def save(self, outfile): '''Save this ``MossNet`` object as a 3D dictionary of MOSS results Args: ``outfile`` (``str``): The desired output file's path ''' out = dict() for u in self.graph.nodes: u_edges = dict(); out[u] = u_edges for v in self.graph.neighbors(u): u_v_links = dict(); u_edges[v] = u_v_links; u_v_edge_data = self.graph.get_edge_data(u,v) for k in u_v_edge_data: edge = u_v_edge_data[k]['attr_dict']; u_v_links[edge['files']] = (edge['left'], edge['right']) if outfile.lower().endswith('.gz'): f = gopen(outfile, mode='wb', compresslevel=9) else: f = open(outfile, 'wb') pkldump(out, f); f.close() def __add__(self, o): if not isinstance(o, MossNet): raise TypeError("unsupported operand type(s) for +: 'MossNet' and '%s'" % type(o).__name__) g = MultiDiGraph() g.add_edges_from(list(self.graph.edges(data=True)) + list(o.graph.edges(data=True))) g.add_nodes_from(list(self.graph.nodes(data=True)) + list(o.graph.nodes(data=True))) return MossNet(g) def get_networkx(self): '''Return a NetworkX ``MultiDiGraph`` equivalent to this ``MossNet`` object Returns: ``MultiDiGraph``: A NetworkX ``DiGraph`` equivalent to this ``MossNet`` object ''' return self.graph.copy() def get_nodes(self): '''Returns a ``set`` of node labels in this ``MossNet`` object Returns: ``set``: The node labels in this ``MossNet`` object ''' return set(self.graph.nodes) def get_pair(self, u, v, style='tuples'): '''Returns the links between nodes ``u`` and ``v`` Args: ``u`` (``str``): A node label ``v`` (``str``): A node label not equal to ``u`` ``style`` (``str``): The representation of a given link * ``"tuples"``: Links are ``((u_percent, u_html), (v_percent, v_html))`` tuples * ``"html"``: Links are HTML representation (one HTML for all links) * ``"htmls"``: Links are HTML representations (one HTML per link) Returns: ``dict``: The links between ``u`` and ``v`` (keys are filenames) ''' if style not in {'tuples', 'html', 'htmls'}: raise ValueError("Invalid link style: %s" % style) if u == v: raise ValueError("u and v cannot be equal: %s" % u) for node in [u,v]: if not self.graph.has_node(node): raise ValueError("Nonexistant node: %s" % node) links = self.graph.get_edge_data(u,v) out = dict() for k in sorted(links.keys(), key=lambda x: links[x]['attr_dict']['files']): d = links[k]['attr_dict'] u_fn, v_fn = d['files'] u_percent, u_html = d['left'] v_percent, v_html = d['right'] if style == 'tuples': out[(u_fn, v_fn)] = ((u_percent, u_html), (v_percent, v_html)) elif style in {'html', 'htmls'}: out[(u_fn, v_fn)] = '<html><table style="width:100%%" border="1"><tr><td colspan="2"><center><b>%s/%s --- %s/%s</b></center></td></tr><tr><td>%s (%d%%)</td><td>%s (%d%%)</td></tr><tr><td><pre>%s</pre></td><td><pre>%s</pre></td></tr></table></html>' % (u, u_fn, v, v_fn, u, u_percent, v, v_percent, u_html, v_html) if style == 'html': out = '<html>' + '<br>'.join(out[fns].replace('<html>','').replace('</html>','') for fns in sorted(out.keys())) + '</html>' return out def get_summary(self, style='html'): '''Returns a summary of this ``MossNet`` Args: ``style`` (``str``): The representation of this ``MossNet`` Returns: ``dict``: A summary of this ``MossNet``, where keys are filenames ''' if style not in {'html'}: raise ValueError("Invalid summary style: %s" % style) matches = list() # list of (u_path, u_percent, v_path, v_percent) tuples for u,v in self.traverse_pairs(order=None): links = self.graph.get_edge_data(u,v) for k in links: d = links[k]['attr_dict'] u_fn, v_fn = d['files'] u_percent, u_html = d['left'] v_percent, v_html = d['right'] matches.append(('%s/%s' % (u,u_fn), u_percent, '%s/%s' % (v,v_fn), v_percent)) matches.sort(reverse=True, key=lambda x: max(x[1],x[3])) return '<html><table style="width:100%%" border="1">%s</table></html>' % ''.join(('<tr><td>%s (%d%%)</td><td>%s (%d%%)</td></tr>' % tup) for tup in matches) def num_links(self, u, v): '''Returns the number of links between ``u`` and ``v`` Args: ``u`` (``str``): A node label ``v`` (``str``): A node label not equal to ``u`` Returns: ``int``: The number of links between ``u`` and ``v`` ''' for node in [u,v]: if not self.graph.has_node(node): raise ValueError("Nonexistant node: %s" % node) return len(self.graph.get_edge_data(u,v)) def num_nodes(self): '''Returns the number of nodes in this ``MossNet`` object Returns: ``int``: The number of nodes in this ``MossNet`` object ''' return self.graph.number_of_nodes() def num_edges(self): '''Returns the number of (undirected) edges in this ``MossNet`` object (including parallel edges) Returns: ``int``: The number of (undirected) edges in this ``MossNet`` object (including parallel edges) ''' return int(self.graph.number_of_edges()/2) def outlier_pairs(self): '''Predict which student pairs are outliers (i.e., too many problem similarities). The distribution of number of links between student pairs (i.e., histogram) is modeled as y = A/(B^x), where x = a number of links, and y = the number of student pairs with that many links Returns: ``list`` of ``tuple``: The student pairs expected to be outliers (in decreasing order of significance) ''' links = dict() # key = number of links; value = set of student pairs that have that number of links for u,v in self.traverse_pairs(): n = self.num_links(u,v) if n not in links: links[n] = set() links[n].add((u,v)) mult = list(); min_links = min(len(s) for s in links.values()); max_links = max(len(s) for s in links.values()) for i in range(min_links, max_links): if i not in links or i+1 not in links or len(links[i+1]) > len(links[i]): break mult.append(float(len(links[i]))/len(links[i+1])) B = sum(mult)/len(mult) A = len(links[min_links]) * (B**min_links) n_cutoff = log(A)/log(B) out = list() for n in sorted(links.keys(), reverse=True): if n < n_cutoff: break for u,v in links[n]: out.append((n,u,v)) return out def traverse_pairs(self, order='descending'): '''Iterate over student pairs Args: ``order`` (``str``): Order to sort pairs in iteration * ``None`` to not sort (may be faster for large/dense graphs) * ``"ascending"`` to sort in ascending order of number of links * ``"descending"`` to sort in descending order of number of links ''' if order not in {None, 'None', 'none', 'ascending', 'descending'}: raise ValueError("Invalid order: %s" % order) nodes = list(self.graph.nodes) pairs = [(u,v) for u in self.graph.nodes for v in self.graph.neighbors(u) if u < v] if order == 'ascending': pairs.sort(key=lambda x: len(self.graph.get_edge_data(x[0],x[1]))) elif order == 'descending': pairs.sort(key=lambda x: len(self.graph.get_edge_data(x[0],x[1])), reverse=True) for pair in pairs: yield pair def export(self, outpath, style='html', gte=0, verbose=False): '''Export the links in this ``MossNet`` in the specified style Args: ``outpath`` (``str``): Path to desired output folder/file ``style`` (``str``): Desired output style ``gte`` (``int``): The minimum number of links for an edge to be exported * ``"dot"`` to export as a GraphViz DOT file * ``"gexf"`` to export as a Graph Exchange XML Format (GEXF) file * ``"html"`` to export one HTML file per pair ``verbose`` (``bool``): ``True`` to show verbose messages, otherwise ``False`` ''' if style not in {'dot', 'gexf', 'html'}: raise ValueError("Invalid export style: %s" % style) if isdir(outpath) or isfile(outpath): raise ValueError("Output path exists: %s" % outpath) if not isinstance(gte, int): raise TypeError("'gte' must be an 'int', but you provided a '%s'" % type(gte).__name__) if gte < 0: raise ValueError("'gte' must be non-negative, but yours was %d" % gte) # export as folder of HTML files if style == 'html': summary = self.get_summary(style='html') pairs = list(self.traverse_pairs(order=None)) makedirs(outpath) f = open('%s/summary.html' % outpath, 'w'); f.write(summary); f.close() for i,pair in enumerate(pairs): if verbose: print("Exporting pair %d of %d..." % (i+1, len(pairs)), end='\r') u,v = pair if self.num_links(u,v) < gte: continue if style == 'html': f = open("%s/%d_%s_%s.html" % (outpath, self.num_links(u,v), u, v), 'w') f.write(self.get_pair(u, v, style='html')) f.close() if verbose: print("Successfully exported %d pairs" % len(pairs)) # export as GraphViz DOT or a GEXF file elif style in {'dot', 'gexf'}: if verbose: print("Computing colors...", end='') max_links = max(self.num_links(u,v) for u,v in self.traverse_pairs()) try: from seaborn import color_palette except: raise RuntimeError("Exporting as a DOT or GEXF file currently requires seaborn") pal = color_palette("Reds", max_links) if verbose: print(" done") print("Computing node information...", end='') nodes = list(self.get_nodes()) index = {u:i for i,u in enumerate(nodes)} if verbose: print(" done") print("Writing output file...", end='') outfile = open(outpath, 'w') if style == 'dot': pal = [str(c).upper() for c in pal.as_hex()] outfile.write("graph G {\n") for u in nodes: outfile.write(' node%d[label="%s"]\n' % (index[u], u)) for u,v in self.traverse_pairs(): curr_num_links = self.num_links(u,v) if curr_num_links < gte: continue outfile.write(' node%d -- node%d[color="%s"]\n' % (index[u], index[v], pal[curr_num_links-1])) outfile.write('}\n') elif style == 'gexf': from datetime import datetime pal = [(int(255*c[0]), int(255*c[1]), int(255*c[2])) for c in pal] outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n') outfile.write('<gexf xmlns="http://www.gexf.net/1.3draft" xmlns:viz="http://www.gexf.net/1.3draft/viz">\n') outfile.write(' <meta lastmodifieddate="%s">\n' % datetime.today().strftime('%Y-%m-%d')) outfile.write(' <creator>MossNet</creator>\n') outfile.write(' <description>A MossNet network exported to GEXF</description>\n') outfile.write(' </meta>\n') outfile.write(' <graph mode="static" defaultedgetype="undirected">\n') outfile.write(' <nodes>\n') for u in nodes: outfile.write(' <node id="%d" label="%s"/>\n' % (index[u], u)) outfile.write(' </nodes>\n') outfile.write(' <edges>\n') for i,pair in enumerate(self.traverse_pairs()): u,v = pair curr_num_links = self.num_links(u,v) if curr_num_links == 0: continue color = pal[curr_num_links-1] outfile.write(' <edge id="%d" source="%d" target="%d">\n' % (i, index[u], index[v])) outfile.write(' <viz:color r="%d" g="%d" b="%d"/>\n' % (color[0], color[1], color[2])) outfile.write(' </edge>\n') outfile.write(' </edges>\n') outfile.write(' </graph>\n') outfile.write('</gexf>\n') outfile.close() if verbose: print(" done")
def graph_to_json(graph: MultiDiGraph) -> List[rc.JsObject]: ga: List[rc.JsValue] = [{**node, "type": "node"} for _, node in graph.nodes(data=True)] for from_node, to_node, data in graph.edges(data=True): ga.append({"type": "edge", "from": from_node, "to": to_node, "edge_type": data["edge_type"]}) return ga
def backtracking_1(multigraph: nx.MultiDiGraph, x_edges, y_edges, vertex, timeout: tuple = None, global_timeout: tuple = None) -> bool: """ Args: multigraph: x_edges: y_edges: vertex: timeout: global_timeout: Returns: """ for u, v, key, attrs in filter( utils.is_non_fixed_edge, multigraph.edges(vertex, data=True, keys=True)): # checking for cycle in z if ('included_in_z' in multigraph.nodes[v] and multigraph.graph['length_z'] + 1 != len(x_edges)): continue multigraph.edges[u, v, key]['fixed_z'] = True multigraph.nodes[v]['included_in_z'] = True multigraph.graph['length_z'] += 1 added_to_w = [] for source in (multigraph.edges(vertex, data=True, keys=True), multigraph.in_edges(vertex, data=True, keys=True)): for u_w, v_w, key_w, _ in filter(utils.is_non_fixed_edge, source): multigraph.edges[u_w, v_w, key_w]['fixed_w'] = True added_to_w.append((u_w, v_w, key_w)) multigraph.graph['length_w'] += 1 multigraph.graph['w'].add((u_w, v_w)) if added_to_w and utils.has_cycle(multigraph.graph['w']) and ( multigraph.graph['length_w'] != len(x_edges) or not utils.is_hamiltonian_cycle(multigraph.graph['w'])): step_back(multigraph, u, v, key, added_to_w) continue if multigraph.graph['length_z'] == len( x_edges) and multigraph.graph['length_w'] == len(x_edges): z_edges = set( item[:2] for item in nx.get_edge_attributes(multigraph, 'fixed_z')) if z_edges != x_edges and z_edges != y_edges and multigraph.graph['w'] != x_edges and multigraph.graph[ 'w'] != y_edges and \ utils.is_hamiltonian_cycle(z_edges) and utils.is_hamiltonian_cycle(multigraph.graph['w']): return True step_back(multigraph, u, v, key, added_to_w) continue if backtracking_1(multigraph, x_edges, y_edges, v, timeout=timeout, global_timeout=global_timeout): return True step_back(multigraph, u, v, key, added_to_w) return False
def plot_bank2bank_count(_g: nx.MultiDiGraph, _plot_img: str): acct_bank = nx.get_node_attributes(_g, "bank_id") bank_list = sorted(set(acct_bank.values())) bank2bank_all = Counter() bank2bank_sar = Counter() for orig, bene, attr in _g.edges(data=True): orig_bank = acct_bank[orig] bene_bank = acct_bank[bene] is_sar = attr["is_sar"] bank_pair = (orig_bank, bene_bank) bank2bank_all[bank_pair] += 1 if is_sar: bank2bank_sar[bank_pair] += 1 total_num = _g.number_of_edges() internal_num = sum([num for pair, num in bank2bank_all.items() if pair[0] == pair[1]]) external_num = total_num - internal_num internal_ratio = internal_num / total_num * 100 external_ratio = external_num / total_num * 100 internal_sar_num = sum([num for pair, num in bank2bank_sar.items() if pair[0] == pair[1]]) external_sar_num = sum([num for pair, num in bank2bank_sar.items() if pair[0] != pair[1]]) all_count_data = list() sar_count_data = list() for orig_bank in bank_list: all_count_row = [bank2bank_all[(orig_bank, bene_bank)] for bene_bank in bank_list] all_count_total = sum(all_count_row) all_count_data.append(all_count_row + [all_count_total]) sar_count_row = [bank2bank_sar[(orig_bank, bene_bank)] for bene_bank in bank_list] sar_count_total = sum(sar_count_row) sar_count_data.append(sar_count_row + [sar_count_total]) all_count_total = list() sar_count_total = list() for bene_bank in bank_list: all_count_total.append(sum([bank2bank_all[(orig_bank, bene_bank)] for orig_bank in bank_list])) sar_count_total.append(sum([bank2bank_sar[(orig_bank, bene_bank)] for orig_bank in bank_list])) all_count_total.append(sum(all_count_total)) sar_count_total.append(sum(sar_count_total)) all_count_data.append(all_count_total) sar_count_data.append(sar_count_total) all_count_csv = list() sar_count_csv = list() for row in all_count_data: all_count_csv.append(["{:,}".format(num) for num in row]) for row in sar_count_data: sar_count_csv.append(["{:,}".format(num) for num in row]) cols = ["To: %s" % bank for bank in bank_list] + ["Total"] rows = ["From: %s" % bank for bank in bank_list] + ["Total"] fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(9, 6)) table_attr = {"rowLabels": rows, "colLabels": cols, "colWidths": [0.15 for _ in cols], "loc": "center", "bbox": [0.15, 0.3, 0.75, 0.6]} ax1.axis("off") ax1.table(cellText=all_count_csv, **table_attr) ax1.set_title("Number of all bank-to-bank transactions") ax2.axis("off") ax2.table(cellText=sar_count_csv, **table_attr) ax2.set_title("Number of SAR bank-to-bank transactions") fig.suptitle("Internal bank transactions: Total = {:,} ({:.2f}%), SAR = {:,}". format(internal_num, internal_ratio, internal_sar_num) + "\n" + "External bank transactions: Total = {:,} ({:.2f}%), SAR = {:,}" .format(external_num, external_ratio, external_sar_num), y=0.1) plt.tight_layout() fig.savefig(_plot_img)
def CreateFromNetworkX(cls, g: nx.MultiDiGraph) -> "GraphTuple": """Construct a graph tuple from a networkx graph. Args: g: The graph to convert to a graph. See deeplearning.ml4pl.graphs.programl.ProgramGraphToNetworkX() for a description of the networkx format. Returns: A GraphTuple instance. """ # Create an adjacency list for each edge type. # {control, data, call} types. adjacencies: List[List[Tuple[int, int]]] = [ [], [], [], ] # Create an edge position list for each edge type. # {control, data, call} types. edge_positions: List[List[int]] = [ [], [], [], ] # Build the adjacency and positions lists. for src, dst, data in g.edges(data=True): adjacencies[data["flow"]].append((src, dst)) edge_positions[data["flow"]].append(data["position"]) # Convert the edge lists to numpy arrays. # Shape (edge_flow_count, edge_count, 2): for i in range(len(adjacencies)): if len(adjacencies[i]): adjacencies[i] = np.array(adjacencies[i], dtype=np.int32) else: adjacencies[i] = np.zeros((0, 2), dtype=np.int32) # Shape (edge_flow_count, edge_count): edge_positions = np.array([ np.array(edge_position, dtype=np.int32) for edge_position in edge_positions ]) # Set the node features. node_x = [None] * g.number_of_nodes() for node, x in g.nodes(data="x"): node_x[node] = np.array(x, dtype=np.int64) # Shape (node_count, node_x_dimensionality): node_x = np.vstack(node_x) # Set the node labels. node_targets = [None] * g.number_of_nodes() node_y = None for node, y in g.nodes(data="y"): # Node labels are optional. If there are no labels, break. if not y: break node_targets[node] = y else: # Shape (node_count, node_y_dimensionality): node_y = np.vstack(node_targets).astype(np.int64) # Get the optional graph-level features and labels. graph_x = np.array(g.graph["x"], dtype=np.int64) if g.graph["x"] else None graph_y = np.array(g.graph["y"], dtype=np.int64) if g.graph["y"] else None return GraphTuple( adjacencies=np.array(adjacencies), edge_positions=edge_positions, node_x=node_x, node_y=node_y, graph_x=graph_x, graph_y=graph_y, )