Пример #1
0
def find_label(
    graph: MultiDiGraph, query: str, verbose: bool = True
) -> Optional[Tuple[Any, Any]]:
    """Returns the edge label
    containing the specified string.

    Parameters
    ----------
    graph : MultiDiGraph
        Initial graph.

    query : str
        String to find.

    verbose : bool
        If true, a progress bar will be displayed.

    Examples
    --------
    >>> import cfpq_data
    >>> g = cfpq_data.graph_from_dataset("foaf", verbose=False)
    >>> cfpq_data.find_label(g, "subClassOf", verbose=False)
    ('label', rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'))

    Returns
    -------
    response : Optional[Tuple[Any, Any]]
        Pair (``edge label key``, ``edge label value``)
        where ``edge label value`` contains ``query``.
        Not if the required ``edge label value`` is not found.
    """
    res = None

    for _, _, edge_labels in tqdm(
        graph.edges(data=True), disable=not verbose, desc="Searching..."
    ):
        for k, v in edge_labels.items():
            if query in str(v):
                res = (k, v)
                break

    return res
Пример #2
0
    def get_subgraph_by_edge_type(self, graph: nx.MultiDiGraph,
                                  relationship: str) -> nx.DiGraph:
        """Get a subgraph containing all edges of a given type (aka relationship).

        Args:
            graph: input multi digraph (aka hypergraph)
            relationship: edge / link relationship type with possible values same as in above docs.

        Returns:
            Directed graph on edges of a particular type (aka relationship)
        """

        # prune the metadata model graph so as to include only those edges that match the relationship type
        rel_edges = []
        for (u, v, key, c) in graph.edges(data=True, keys=True):
            if key == relationship:
                rel_edges.append((u, v))

        relationship_subgraph = nx.DiGraph()
        relationship_subgraph.add_edges_from(rel_edges)
Пример #3
0
def AGraphComputerSetMultiDiGraph(spsg: nx.MultiDiGraph,
                                  cf: Callable) -> AGraph:
    A = nx.nx_agraph.to_agraph(spsg)
    A = AGraph(directed=True)
    A.node_attr["style"] = "filled"
    A.node_attr["shape"] = "rectangle"
    A.node_attr["fixedsize"] = "false"
    A.node_attr["fontcolor"] = "black"

    for node in spsg.nodes:
        A.add_node(node_2_string(node))
    edges = spsg.edges(data=True)
    for edge in edges:
        s, t, data_dict = edge
        computer_set = data_dict["computers"]
        ss, st = tuple(map(node_2_string, (s, t)))
        A.add_edge(ss, st)
        Ae = A.get_edge(ss, st)
        Ae.attr["label"] = "\n".join([c.__name__ for c in computer_set])
    return A
Пример #4
0
def create_in_out_dict(g: nx.MultiDiGraph):
    """Creates a dictionary where the keys (nodes in a graph) are associated with
    another dictionary that tells what nodes can be traversed to by following
    edges "out" of the key node and what edges lead "in" to the key node
    
    Args:
        g (nx.MultiDiGraph): input graph whos nodes and edges are used to create in_out dict
    
    Returns:
        dict: in out dictionary of node traversals

    TODO: make if statement clearer - remove single loops in graph
    """
    in_out = defaultdict(lambda: {"in": [], "out": []})
    for start, end in g.edges():
        if start == end:
            continue
        in_out[end]["in"].append(start)
        in_out[start]["out"].append(end)
    return in_out
Пример #5
0
def map_graph(graph: nx.MultiDiGraph,
              mapping: Dict,
              preserve: bool = True) -> nx.MultiDiGraph:
    """
    Remap node identifiers in a networkx.MultiDiGraph based on a provided mapping.

    For nodes, the old identifier is saved as `source_curie` attribute.
    In case of edges,
    - if the node is the `subject` then the old identifier is saved as `source_subject`
    - if the node is the `object` then the old identifier is saved as `source_object`

    Parameters
    ----------
    graph: networkx.MultiDiGraph
        A graph
    mapping: dict
        Dictionary containing node identifier mappings
    preserve: bool
        Preserve the old identifier before remapping.

    Returns
    -------
    networkx.MultiDiGraph
        The graph with its nodes remapped

    """
    if preserve:
        for nid in graph.nodes():
            if nid in mapping:
                # add_node will append attributes
                graph.add_node(nid, source_curie=nid)
        for oid, sid in graph.edges():
            if oid in mapping:
                for ex in graph[oid][sid]:
                    graph[oid][sid][ex].update(source_object=oid)
            if sid in mapping:
                for ex in graph[oid][sid]:
                    graph[oid][sid][ex].update(source_subject=oid)
    nx.relabel_nodes(graph, mapping, copy=False)
    return graph
Пример #6
0
    def build_cliques(self, target_graph: nx.MultiDiGraph):
        """
        Builds a clique graph from ``same_as`` edges in ``target_graph``.

        Parameters
        ----------
        target_graph: networkx.MultiDiGraph
            A MultiDiGraph that contains nodes and edges

        Returns
        -------
        networkx.Graph
            The clique graph with only ``same_as`` edges

        """
        self.target_graph = target_graph
        for u, v, data in target_graph.edges(data=True):
            if 'edge_label' in data and data['edge_label'] == SAME_AS:
                # load all same_as edges to self.clique_graph
                self.clique_graph.add_node(u, **target_graph.nodes[u])
                self.clique_graph.add_node(v, **target_graph.nodes[v])
                self.clique_graph.add_edge(u, v, **data)
Пример #7
0
def get_factors(G: nx.MultiDiGraph) -> Generator[Tuple[List, List], None, None]:
    fg = nx.DiGraph()
    variables = set()
    for source, target, edge_data in G.edges(data=True):

        source_port, target_port = get_ports(edge_data)

        source_var = (source, source_port)
        target_var = (target, target_port)

        variables.add(source_var)
        variables.add(target_var)

        fg.add_node(source)
        fg.add_node(source_var)
        fg.add_node(target)
        fg.add_node(target_var)

        fg.add_edge(source, source_var)
        fg.add_edge(target, target_var)
        fg.add_edge(source_var, target_var)

    return split(fg.subgraph(variables))
Пример #8
0
def get_category_from_equivalence(
    target_graph: BaseGraph, clique_graph: nx.MultiDiGraph, node: str, attributes: Dict
) -> List:
    """
    Get category for a node based on its equivalent nodes in a graph.

    Parameters
    ----------
    target_graph: kgx.graph.base_graph.BaseGraph
        The original graph
    clique_graph: networkx.MultiDiGraph
        The clique graph
    node: str
        Node identifier
    attributes: Dict
        Node's attributes

    Returns
    -------
    List
        Category for the node

    """
    category: List = []
    for u, v, data in clique_graph.edges(node, data=True):
        if data["predicate"] == SAME_AS:
            if u == node:
                if "category" in clique_graph.nodes()[v]:
                    category = clique_graph.nodes()[v]["category"]
                    break
            elif v == node:
                if "category" in clique_graph.nodes()[u]:
                    category = clique_graph.nodes()[u]["category"]
                    break
            update = {node: {"category": category}}
            nx.set_node_attributes(clique_graph, update)
    return category
Пример #9
0
def add_reshapes_for_tf_subgraph_calls(graph: nx.MultiDiGraph):
    """
    Input and output tensors of the TFCustomSubgraphCall must be 4D because IE layer accepts and produces only 4D
    tensors. This function adds reshape operations where it is necessary.
    :param graph: graph to operate on.
    :return: None.
    """
    for src_node_name, dst_node_name, edge_attrs in list(
            graph.edges(data=True)):
        src_node = Node(graph, src_node_name)
        dst_node = Node(graph, dst_node_name)
        if dst_node.kind == 'op' and dst_node.has_valid('type') and dst_node.type == 'TFCustomSubgraphCall' and \
                src_node.has_valid('shape') and len(src_node.shape) != 4:
            log.info(
                "There is an data tensor of shape '{}' which goes into '{}' node"
                .format(src_node.shape, dst_node.type))
            add_reshape_before_op_node(graph, src_node_name, dst_node_name,
                                       edge_attrs)

    for node_name in list(graph.nodes()):
        node = Node(graph, node_name)
        if node['kind'] == 'op' and node.has_and_set(
                'type') and node.type == 'TFCustomSubgraphCall':
            for index, data_node in node.out_nodes().items():
                real_dims_count = len(data_node.shape)
                if real_dims_count != 4:
                    log.info(
                        "There is an data tensor of shape '{}' with real dims count '{}' which goes out of '{}' "
                        "node".format(data_node.shape, real_dims_count,
                                      node.name))
                    add_reshape_after_data_node(graph, data_node.id)

                    # need to update shape of the op so IE generates XML with 4D tensors
                    out_shape = make_shape_4d(data_node['shape'])

                    data_node['shape'] = out_shape
Пример #10
0
    def update_custom_replacement_attributes(self, graph: nx.MultiDiGraph):
        if not self.has('instances'):
            raise Error(
                "No instance(s) is(are) defined for the custom replacement '{}'. "
                .format(self.replacement_id) + refer_to_faq_msg(66))
        if not isinstance(self.instances, dict):
            raise Error(
                "The instance must be a single dictionary for the custom replacement with id '{}'. "
                .format(self.replacement_id) + refer_to_faq_msg(67))

        start_points = self.get_internal_input_nodes(graph)
        end_points = self.get_internal_output_nodes(graph)

        matched_nodes = sub_graph_between_nodes(graph, start_points,
                                                end_points)
        output_tensors = set()
        input_nodes_mapping = dict(
        )  # key is the input tensor name, value is the pair: (input_port, output_node_name)
        for src_node_name, dst_node_name, edge_attrs in graph.edges(data=True):
            dst_node = graph.node[dst_node_name]

            # edge outside sub-graph into sub-graph
            if (src_node_name not in matched_nodes) and (dst_node_name
                                                         in matched_nodes):
                tensor_name = src_node_name + ":" + str(edge_attrs['out'])
                if tensor_name not in input_nodes_mapping:
                    input_nodes_mapping[tensor_name] = list()
                input_nodes_mapping[tensor_name].append(
                    ('^' + dst_node_name + '$', edge_attrs['in']))

            # edge from inside sub-graph to outside sub-graph
            if (src_node_name in matched_nodes) and (dst_node_name
                                                     not in matched_nodes):
                output_tensors.add(
                    ('^' + dst_node['pb'].input[edge_attrs['in']] + '$',
                     edge_attrs['out']))

        for node_name in graph.nodes():
            node = Node(graph, node_name)
            if node_name in matched_nodes and len(
                    node.out_nodes()) == 0 and node['pb'].op != 'Const':
                log.debug(
                    "Node {} doesn't have output edges. Consider it output".
                    format(node_name))
                output_tensors.add(('^' + node_name + '$', 0))

        if not self.has('inputs'):
            self._replacement_desc['inputs'] = [[{
                'node': desc[0],
                'port': desc[1]
            } for desc in inp] for inp in sorted(input_nodes_mapping.values())]
            log.debug('Updated inputs of sub-graph for instance "{}"'.format(
                self.instances))

        if not self.has('outputs'):
            self._replacement_desc['outputs'] = [{
                'node': node,
                'port': port
            } for node, port in sorted(output_tensors)]
            log.debug('Updated outputs of sub-graph for instance "{}"'.format(
                self.instances))
Пример #11
0
class Conversations(list):
    def __init__(self):
        list.__init__(self)
        self.graph = Graph()
        self.concepts = {}
        self.features = []
        self.edgeindices = []
        self.queries = []
        self.target_classes = []
        self.features_tensors = []
        self.edges_tensors = []
        self.queries_tensors = []
        self.targets_tensors = []

    def compile(self):
        print('\nGraph compilation...', end='')
        for i, conversation in enumerate(self):
            conversation.compile()
            for source, target, label in conversation.graph.edges(keys=True):
                self.graph.add_edge(source, target, label)
            if i % 100 == 0:
                print('.', end='', flush=True)
        print()

    def _edges_to_tensors(self, edges, previous_edges, supporting_edges,
                          concept_to_features_function):
        features = []
        edge_idices = []
        indices = {}
        for source, target, label in edges:
            for node in (source, target, label):
                if node not in indices:
                    indices[node] = len(indices)
                    features.append(
                        np.concatenate((concept_to_features_function(node),
                                        np.array([1, 1]))))
            edge_idices.append([indices[target], indices[label]])
            edge_idices.append([indices[label], indices[source]])
        for source, target, label in previous_edges:
            for node in (source, target, label):
                if node not in indices:
                    indices[node] = len(indices)
                    features.append(
                        np.concatenate((concept_to_features_function(node),
                                        np.array([1, 0]))))
            edge_idices.append([indices[target], indices[label]])
            edge_idices.append([indices[label], indices[source]])
        for source, target, label in supporting_edges:
            for node in (source, target, label):
                if node not in indices:
                    indices[node] = len(indices)
                    features.append(
                        np.concatenate((concept_to_features_function(node),
                                        np.array([0, 0]))))
            edge_idices.append([indices[target], indices[label]])
            edge_idices.append([indices[label], indices[source]])
        return features, edge_idices, indices

    def _nodes_to_id(self):
        for edge in self.graph.edges(keys=True):
            for node in edge:
                if node not in self.concepts:
                    self.concepts[node] = len(self.concepts)

    def _get_supporting_edges(self, edges, limit=1, step=1):
        supporting = set()
        for i in range(step):
            for s, _, l in edges | supporting:
                try:
                    n1 = next(iter(self.graph.edges(s, keys=True)))
                    supporting.add(n1)
                    n2 = next(iter(self.graph.edges(n1, keys=True)))
                    supporting.add(n2)
                except StopIteration:
                    pass
                try:
                    n1 = next(iter(self.graph.in_edges(l, keys=True)))
                    supporting.add(n1)
                    n2 = next(iter(self.graph.in_edges(n1, keys=True)))
                    supporting.add(n2)
                except StopIteration:
                    pass
        return supporting - edges

    def compile_matrix_data(self, concept_to_features_function):
        self.features = []
        self.edgeindices = []
        self.target_classes = []
        self.queries = []
        self._nodes_to_id()
        for i, conversation in enumerate(self):
            edges_by_turn = [
                set(t.graph.edges(keys=True)) for t in conversation.turns
            ]
            for j in range(3, len(conversation.turns) - 1):
                context_edges = set().union(*edges_by_turn[:j - 1])
                previous_edges = set(edges_by_turn[j])
                supporting_edges = self._get_supporting_edges(context_edges
                                                              | previous_edges)
                continuation_edges = set().union(
                    *edges_by_turn[j:j + 1]) - context_edges
                features, edge_indices, indices = self._edges_to_tensors(
                    context_edges, previous_edges, supporting_edges,
                    concept_to_features_function)
                for source, target, label in continuation_edges:
                    self.queries.append([
                        concept_to_features_function(source),
                        concept_to_features_function(label)
                    ])
                    target_class = self.concepts[target]
                    self.features.append(features)
                    self.edgeindices.append(edge_indices)
                    self.target_classes.append(target_class)
            if i % 100 == 0:
                print('.', end='', flush=True)

    def compile_matrices(self):
        self.features_tensors = []
        self.edges_tensors = []
        self.queries_tensors = []
        self.targets_tensors = []
        print('Compiling {} samples to tensors...'.format(len(self.features)))
        for i in range(len(self.features)):
            self.features_tensors.append(
                torch.from_numpy(np.array(self.features[i])))
            self.edges_tensors.append(
                torch.from_numpy(np.array(self.edgeindices[i])))
            self.queries_tensors.append(
                torch.from_numpy(np.array(self.queries[i])))
            self.targets_tensors.append(
                torch.from_numpy(np.array(self.target_classes[i])))
            if i % 10000 == 0:
                print('.', end='', flush=True)

    def save(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump(self, f)
        return True

    @classmethod
    def load(cls, filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)
Пример #12
0
    def run(self):
        isp_topology_path = self.options.get('-i', None)
        output_directory = self.options.get('-o', None)
        link_capacity = self.options.get('--link', None)
        medium = self.options.get('--medium', None)

        medium_speed = 0.
        if medium == 'fiber':
            medium_speed = 300*10**6
        elif medium == 'copper':
            medium_speed = 210*10**6
        else:
            try:
                medium_speed = float(medium)
            except ValueError as ex:
                print ex
                print '--medium is either (1) Medium type (fiber or copper) or (2) Medium speed (float)'
                exit(1)
        assert medium_speed > 0

        link = parse_capacities(link_capacity)
        link = 10000000000 # special case

        isp_graph = read_isp_graph(isp_topology_path)
        if not isp_graph:
            print 'Can\'t read the input graph file...'
            exit(1)

        if not isinstance(isp_graph, MultiDiGraph):
            isp_graph = MultiDiGraph(isp_graph)

        has_zero_id = False
        for node, node_data in isp_graph.nodes_iter(data=True):
            try:
                node_data['id'] = int(node)
                if node_data['id'] == 0:
                    has_zero_id = True
            except ValueError as ex:
                print ex
                print 'Node ID should be integer...'
                exit(1)

        for node, node_data in isp_graph.nodes_iter(data=True):
            node_data['id'] += int(has_zero_id)

        relabel_nodes(isp_graph, {node: str(node_data['id']) for node, node_data in isp_graph.nodes_iter(data=True)}, copy=False)

        sorted_nodes = map(str, sorted([int(node) for node in isp_graph]))
        ports = {}
        for node in sorted_nodes:
            s_port = 2
            sorted_neighbors = map(str, sorted([int(x[1]) for x in isp_graph.edges(node)]))
            for neighbor in sorted_neighbors:
                if (node, neighbor) not in ports:
                    ports[node, neighbor] = (s_port, -1)
                    s_port += 1

        for node in sorted_nodes:
            sorted_neighbors = map(str, sorted([int(x[1]) for x in isp_graph.edges(node)]))
            for neighbor in sorted_neighbors:
                if (neighbor, node) in ports:
                    ports[node, neighbor] = (ports[node, neighbor][0], ports[neighbor, node][0])

        max_dist = 0.
        traversed_edges = {}
        for node, node_data in isp_graph.nodes_iter(data=True):
            for n, neighbor, key, data in isp_graph.edges(node, data=True, keys=True):
                edge_key = n, neighbor
                if edge_key not in traversed_edges:
                    src_lat = isp_graph.node[n].get('Latitude', -1)
                    src_lon = isp_graph.node[n].get('Longitude', -1)
                    dst_lat = isp_graph.node[neighbor].get('Latitude', -1)
                    dst_lon = isp_graph.node[neighbor].get('Longitude', -1)
                    if src_lat == -1 or src_lon == -1 or dst_lat == -1 or dst_lon == -1:
                        distance = 1
                    else:
                        distance = 1000 * vincenty((src_lat, src_lon), (dst_lat, dst_lon))
                    delay = 1000.0 * distance / medium_speed
                    if max_dist < distance:
                        max_dist = distance
                    isp_graph.edge[n][neighbor][key] = {'Capacity': link,
                                                        'Distance': distance,
                                                        'Delay': delay,
                                                        'src_port': ports[n, neighbor][0],
                                                        'dst_port': ports[n, neighbor][1],
                                                        'BandwidthCost': 1}
                    traversed_edges[edge_key] = key
                    # traversed_edges[edge_key] = neighbor, n
                else:
                    pass
                    # isp_graph.edge[n][neighbor][traversed_edges[edge_key]]['Capacity'] += link

        for node, node_data in isp_graph.nodes_iter(data=True):
            for n, neighbor, key, data in isp_graph.edges(node, data=True, keys=True):
                if 'Capacity' not in data:
                    del isp_graph.edge[n][neighbor][key]

        write_graphml(isp_graph, get_resources_file(isp_topology_path, output_directory))
        print "GetIsp DONE!"
Пример #13
0
class NxGraph(BaseGraph):
    """
    NxGraph is a wrapper that provides methods to interact with a networkx.MultiDiGraph.

    NxGraph extends kgx.graph.base_graph.BaseGraph and implements all the methods from BaseGraph.
    """
    def __init__(self):
        super().__init__()
        self.graph = MultiDiGraph()
        self.name = None

    def add_node(self, node: str, **kwargs: Any) -> None:
        """
        Add a node to the graph.

        Parameters
        ----------
        node: str
            Node identifier
        **kwargs: Any
            Any additional node properties

        """
        if "data" in kwargs:
            data = kwargs["data"]
        else:
            data = kwargs
        self.graph.add_node(node, **data)

    def add_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: str = None,
                 **kwargs: Any) -> None:
        """
        Add an edge to the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        kwargs: Any
            Any additional edge properties

        """
        if "data" in kwargs:
            data = kwargs["data"]
        else:
            data = kwargs
        return self.graph.add_edge(subject_node,
                                   object_node,
                                   key=edge_key,
                                   **data)

    def add_node_attribute(self, node: str, attr_key: str,
                           attr_value: Any) -> None:
        """
        Add an attribute to a given node.

        Parameters
        ----------
        node: str
            The node identifier
        attr_key: str
            The key for an attribute
        attr_value: Any
            The value corresponding to the key

        """
        self.graph.add_node(node, **{attr_key: attr_value})

    def add_edge_attribute(
        self,
        subject_node: str,
        object_node: str,
        edge_key: Optional[str],
        attr_key: str,
        attr_value: Any,
    ) -> None:
        """
        Add an attribute to a given edge.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        attr_key: str
            The attribute key
        attr_value: Any
            The attribute value

        """
        self.graph.add_edge(subject_node,
                            object_node,
                            key=edge_key,
                            **{attr_key: attr_value})

    def update_node_attribute(self,
                              node: str,
                              attr_key: str,
                              attr_value: Any,
                              preserve: bool = False) -> Dict:
        """
        Update an attribute of a given node.

        Parameters
        ----------
        node: str
            The node identifier
        attr_key: str
            The key for an attribute
        attr_value: Any
            The value corresponding to the key
        preserve: bool
            Whether or not to preserve existing values for the given attr_key

        Returns
        -------
        Dict
            A dictionary corresponding to the updated node properties

        """
        node_data = self.graph.nodes[node]
        updated = prepare_data_dict(node_data, {attr_key: attr_value},
                                    preserve=preserve)
        self.graph.add_node(node, **updated)
        return updated

    def update_edge_attribute(
        self,
        subject_node: str,
        object_node: str,
        edge_key: Optional[str],
        attr_key: str,
        attr_value: Any,
        preserve: bool = False,
    ) -> Dict:
        """
        Update an attribute of a given edge.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key
        attr_key: str
            The attribute key
        attr_value: Any
            The attribute value
        preserve: bool
            Whether or not to preserve existing values for the given attr_key

        Returns
        -------
        Dict
            A dictionary corresponding to the updated edge properties

        """
        e = self.graph.edges((subject_node, object_node, edge_key),
                             keys=True,
                             data=True)
        edge_data = list(e)[0][3]
        updated = prepare_data_dict(edge_data, {attr_key: attr_value},
                                    preserve)
        self.graph.add_edge(subject_node, object_node, key=edge_key, **updated)
        return updated

    def get_node(self, node: str) -> Dict:
        """
        Get a node and its properties.

        Parameters
        ----------
        node: str
            The node identifier

        Returns
        -------
        Dict
            The node dictionary

        """
        n = {}
        if self.graph.has_node(node):
            n = self.graph.nodes[node]
        return n

    def get_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: Optional[str] = None) -> Dict:
        """
        Get an edge and its properties.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        Returns
        -------
        Dict
            The edge dictionary

        """
        e = {}
        if self.graph.has_edge(subject_node, object_node, edge_key):
            e = self.graph.get_edge_data(subject_node, object_node, edge_key)
        return e

    def nodes(self, data: bool = True) -> Dict:
        """
        Get all nodes in a graph.

        Parameters
        ----------
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        Dict
            A dictionary of nodes

        """
        return self.graph.nodes(data)

    def edges(self, keys: bool = False, data: bool = True) -> Dict:
        """
        Get all edges in a graph.

        Parameters
        ----------
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        Dict
            A dictionary of edges

        """
        return self.graph.edges(keys=keys, data=data)

    def in_edges(self,
                 node: str,
                 keys: bool = False,
                 data: bool = False) -> List:
        """
        Get all incoming edges for a given node.

        Parameters
        ----------
        node: str
            The node identifier
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        List
            A list of edges

        """
        return self.graph.in_edges(node, keys=keys, data=data)

    def out_edges(self,
                  node: str,
                  keys: bool = False,
                  data: bool = False) -> List:
        """
        Get all outgoing edges for a given node.

        Parameters
        ----------
        node: str
            The node identifier
        keys: bool
            Whether or not to include edge keys
        data: bool
            Whether or not to fetch node properties

        Returns
        -------
        List
            A list of edges

        """
        return self.graph.out_edges(node, keys=keys, data=data)

    def nodes_iter(self) -> Generator:
        """
        Get an iterable to traverse through all the nodes in a graph.

        Returns
        -------
        Generator
            A generator for nodes where each element is a Tuple that
            contains (node_id, node_data)

        """
        for n in self.graph.nodes(data=True):
            yield n

    def edges_iter(self) -> Generator:
        """
        Get an iterable to traverse through all the edges in a graph.

        Returns
        -------
        Generator
            A generator for edges where each element is a 4-tuple that
            contains (subject, object, edge_key, edge_data)

        """
        for u, v, k, data in self.graph.edges(keys=True, data=True):
            yield u, v, k, data

    def remove_node(self, node: str) -> None:
        """
        Remove a given node from the graph.

        Parameters
        ----------
        node: str
            The node identifier

        """
        self.graph.remove_node(node)

    def remove_edge(self,
                    subject_node: str,
                    object_node: str,
                    edge_key: Optional[str] = None) -> None:
        """
        Remove a given edge from the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        """
        self.graph.remove_edge(subject_node, object_node, edge_key)

    def has_node(self, node: str) -> bool:
        """
        Check whether a given node exists in the graph.

        Parameters
        ----------
        node: str
            The node identifier

        Returns
        -------
        bool
            Whether or not the given node exists

        """
        return self.graph.has_node(node)

    def has_edge(self,
                 subject_node: str,
                 object_node: str,
                 edge_key: Optional[str] = None) -> bool:
        """
        Check whether a given edge exists in the graph.

        Parameters
        ----------
        subject_node: str
            The subject (source) node
        object_node: str
            The object (target) node
        edge_key: Optional[str]
            The edge key

        Returns
        -------
        bool
            Whether or not the given edge exists

        """
        return self.graph.has_edge(subject_node, object_node, key=edge_key)

    def number_of_nodes(self) -> int:
        """
        Returns the number of nodes in a graph.

        Returns
        -------
        int

        """
        return self.graph.number_of_nodes()

    def number_of_edges(self) -> int:
        """
        Returns the number of edges in a graph.

        Returns
        -------
        int

        """
        return self.graph.number_of_edges()

    def degree(self):
        """
        Get the degree of all the nodes in a graph.
        """
        return self.graph.degree()

    def clear(self) -> None:
        """
        Remove all the nodes and edges in the graph.
        """
        self.graph.clear()

    @staticmethod
    def set_node_attributes(graph: BaseGraph, attributes: Dict) -> None:
        """
        Set nodes attributes from a dictionary of key-values.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attributes: Dict
            A dictionary of node identifier to key-value pairs

        """
        return set_node_attributes(graph.graph, attributes)

    @staticmethod
    def set_edge_attributes(graph: BaseGraph, attributes: Dict) -> None:
        """
        Set nodes attributes from a dictionary of key-values.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attributes: Dict
            A dictionary of node identifier to key-value pairs

        Returns
        -------
        Any

        """
        return set_edge_attributes(graph.graph, attributes)

    @staticmethod
    def get_node_attributes(graph: BaseGraph, attr_key: str) -> Dict:
        """
        Get all nodes that have a value for the given attribute ``attr_key``.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attr_key: str
            The attribute key

        Returns
        -------
        Dict
            A dictionary where nodes are the keys and the values
            are the attribute values for ``key``

        """
        return get_node_attributes(graph.graph, attr_key)

    @staticmethod
    def get_edge_attributes(graph: BaseGraph, attr_key: str) -> Dict:
        """
        Get all edges that have a value for the given attribute ``attr_key``.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        attr_key: str
            The attribute key

        Returns
        -------
        Dict
            A dictionary where edges are the keys and the values
            are the attribute values for ``attr_key``

        """
        return get_edge_attributes(graph.graph, attr_key)

    @staticmethod
    def relabel_nodes(graph: BaseGraph, mapping: Dict) -> None:
        """
        Relabel identifiers for a series of nodes based on mappings.

        Parameters
        ----------
        graph: kgx.graph.base_graph.BaseGraph
            The graph to modify
        mapping: Dict
            A dictionary of mapping where the key is the old identifier
            and the value is the new identifier.

        """
        relabel_nodes(graph.graph, mapping, copy=False)
Пример #14
0
def collapse_multi(mdg: nx.MultiDiGraph) -> nx.DiGraph:
	g = nx.DiGraph()
	for ((u, v), group) in groupby(sorted(mdg.edges())):
		g.add_edge(u, v, weight=len(list(group)))
	return g
Пример #15
0
class GraphBuilder:
    def __init__(self, model: Model):
        self.model = model
        self.graph = MultiDiGraph()
        self.nodes = 0
        self.edges = 0

    def add_from_json(self, js: Json) -> None:
        if "id" in js and Section.reported in js:
            self.add_node(
                js["id"],
                js[Section.reported],
                js.get(Section.desired, None),
                js.get(Section.metadata, None),
                js.get("search", None),
                js.get("replace", False) is True,
            )
        elif "from" in js and "to" in js:
            self.add_edge(js["from"], js["to"], js.get("edge_type", EdgeType.default))
        else:
            raise AttributeError(f"Format not understood! Got {json.dumps(js)} which is neither vertex nor edge.")

    def add_node(
        self,
        node_id: str,
        reported: Json,
        desired: Optional[Json] = None,
        metadata: Optional[Json] = None,
        search: Optional[str] = None,
        replace: bool = False,
    ) -> None:
        self.nodes += 1
        # validate kind of this reported json
        coerced = self.model.check_valid(reported)
        reported = reported if coerced is None else coerced
        kind = self.model[reported]
        # create content hash
        sha = GraphBuilder.content_hash(reported, desired, metadata)
        # flat all properties into a single string for search
        flat = search if isinstance(search, str) else (GraphBuilder.flatten(reported, kind))
        self.graph.add_node(
            node_id,
            id=node_id,
            reported=reported,
            desired=desired,
            metadata=metadata,
            hash=sha,
            kind=kind,
            kinds=list(kind.kind_hierarchy()),
            kinds_set=kind.kind_hierarchy(),
            flat=flat,
            replace=replace | metadata.get("replace", False) is True if metadata else False,
        )

    def add_edge(self, from_node: str, to_node: str, edge_type: str) -> None:
        self.edges += 1
        key = GraphAccess.edge_key(from_node, to_node, edge_type)
        self.graph.add_edge(from_node, to_node, key, edge_type=edge_type)

    @staticmethod
    def content_hash(js: Json, desired: Optional[Json] = None, metadata: Optional[Json] = None) -> str:
        sha256 = hashlib.sha256()
        # all content hashes will be different, when the version changes
        sha256.update(ContentHashVersion.to_bytes(2, "big"))
        sha256.update(json.dumps(js, sort_keys=True).encode("utf-8"))
        if desired:
            sha256.update(json.dumps(desired, sort_keys=True).encode("utf-8"))
        if metadata:
            sha256.update(json.dumps(metadata, sort_keys=True).encode("utf-8"))
        return sha256.hexdigest()

    @staticmethod
    def flatten(js: Json, kind: Kind) -> str:
        result = ""

        def dispatch(value: Any, k: Kind) -> None:
            nonlocal result
            if isinstance(value, dict):
                for prop, elem in value.items():
                    sub = (
                        k.property_kind_of(prop, AnyKind())
                        if isinstance(k, ComplexKind)
                        else (k.value_kind if isinstance(k, DictionaryKind) else AnyKind())
                    )
                    dispatch(elem, sub)
            elif isinstance(value, list):
                sub = k.inner if isinstance(k, ArrayKind) else AnyKind()
                for elem in value:
                    dispatch(elem, sub)
            elif value is None or isinstance(value, bool):
                pass
            else:
                # in case of date time: "2017-05-30T22:04:34Z" -> "2017-05-30 22:04:34"
                if isinstance(k, DateTimeKind):
                    value = re.sub("[ZT]", " ", value)
                if result:
                    result += " "
                result += str(value).strip()

        dispatch(js, kind)
        return result

    def check_complete(self) -> None:
        # check that all vertices are given, that were defined in any edge definition
        # note: DiGraph will create an empty vertex node automatically
        for node_id, node in self.graph.nodes(data=True):
            assert node.get(Section.reported), f"{node_id} was used in an edge definition but not provided as vertex!"

        edge_types = {edge[2] for edge in self.graph.edges(data="edge_type")}
        al = EdgeType.all
        assert not edge_types.difference(al), f"Graph contains unknown edge types! Given: {edge_types}. Known: {al}"
        # make sure there is only one root node
        rid = GraphAccess.root_id(self.graph)
        root_node = self.graph.nodes[rid]

        # make sure the root
        if value_in_path(root_node, NodePath.reported_kind) == "graph_root" and rid != "root":
            # remove node with wrong id +
            root_node = self.graph.nodes[rid]
            root_node["id"] = "root"
            self.graph.add_node("root", **root_node)

            for succ in list(self.graph.successors(rid)):
                for edge_type in EdgeType.all:
                    key = GraphAccess.edge_key(rid, succ, edge_type)
                    if self.graph.has_edge(rid, succ, key):
                        self.graph.remove_edge(rid, succ, key)
                        self.add_edge("root", succ, edge_type)
            self.graph.remove_node(rid)
Пример #16
0
def NetworkXToProgramGraph(
    g: nx.MultiDiGraph,
    proto: Optional[programl_pb2.ProgramGraph] = None,
    **proto_fields,
) -> programl_pb2.ProgramGraph:
    """Perform the inverse transformation from networkx graph -> protobuf.

  See ProgramGraphToNetworkX() for details.

  Arguments:
    g: A networkx graph.
    proto: An optional protocol buffer instance to use. Else a new one is
      created. Calling code is reponsible for clearning the protocol buffer.
    **proto_fields: Optional keyword arguments to use when constructing a proto.
      Has no effect if proto argument is set.

  Returns:
    A ProgramGraph proto instance.
  """
    proto = proto or programl_pb2.ProgramGraph(**proto_fields)

    # Create a map from function name to function ID.
    function_names = list(
        sorted(set([fn for _, fn in g.nodes(data="function") if fn])))
    function_to_idx_map = {fn: i for i, fn in enumerate(function_names)}

    # Create the function protos.
    for function_name in function_names:
        function_proto = proto.function.add()
        function_proto.name = function_name
        # Function-level LLVM profiling info.
        if ("llvm_function_entry_count" in g.graph
                and function_name in g.graph["llvm_function_entry_count"]):
            function_proto.llvm_entry_count = g.graph[
                "llvm_function_entry_count"][function_name]

    # Set the graph-level features and labels.
    proto.x[:] = np.array(g.graph["x"], dtype=np.int64).tolist()
    proto.y[:] = np.array(g.graph["y"], dtype=np.int64).tolist()
    if "data_flow_root_node" in g.graph:
        proto.data_flow_root_node = g.graph["data_flow_root_node"]
    if "data_flow_steps" in g.graph:
        proto.data_flow_steps = g.graph["data_flow_steps"]
    if "data_flow_positive_node_count" in g.graph:
        proto.data_flow_positive_node_count = g.graph[
            "data_flow_positive_node_count"]
    # Graph-level LLVM profiling info.
    if "llvm_profile_num_functions" in g.graph:
        proto.llvm_profile.num_functions = g.graph[
            "llvm_profile_num_functions"]
    if "llvm_profile_max_function_count" in g.graph:
        proto.llvm_profile.max_function_count = g.graph[
            "llvm_profile_max_function_count"]
    if "llvm_profile_num_counts" in g.graph:
        proto.llvm_profile.num_counts = g.graph["llvm_profile_num_counts"]
    if "llvm_profile_total_count" in g.graph:
        proto.llvm_profile.total_count = g.graph["llvm_profile_total_count"]
    if "llvm_profile_max_count" in g.graph:
        proto.llvm_profile.max_count = g.graph["llvm_profile_max_count"]
    if "llvm_profile_max_internal_count" in g.graph:
        proto.llvm_profile.max_internal_count = g.graph[
            "llvm_profile_max_internal_count"]

    # Create the node list.
    for node, data in g.nodes(data=True):
        node_proto = proto.node.add()
        node_proto.type = data["type"]
        node_proto.text = data["text"]
        node_proto.preprocessed_text = data["preprocessed_text"]
        if data["function"] is not None:
            node_proto.function = function_to_idx_map[data["function"]]
        node_proto.x[:] = np.array(data["x"], dtype=np.int64).tolist()
        node_proto.y[:] = np.array(data["y"], dtype=np.int64).tolist()
        # Node-level LLVM profiling info.
        if data.get("llvm_profile_true_weight") is not None:
            node_proto.llvm_profile_true_weight = data[
                "llvm_profile_true_weight"]
        if data.get("llvm_profile_false_weight") is not None:
            node_proto.llvm_profile_false_weight = data[
                "llvm_profile_false_weight"]
        if data.get("llvm_profile_total_weight") is not None:
            node_proto.llvm_profile_total_weight = data[
                "llvm_profile_total_weight"]

    # Create the edge list.
    for src, dst, data in g.edges(data=True):
        edge_proto = proto.edge.add()
        edge_proto.source_node = src
        edge_proto.destination_node = dst
        edge_proto.flow = data["flow"]
        edge_proto.position = data["position"]

    return proto
Пример #17
0
def delete_control_flow_edges(graph: nx.MultiDiGraph):
    for u, v, k, attrs in list(graph.edges(keys=True, data=True)):
        if 'control_flow_edge' in attrs and attrs['control_flow_edge']:
            graph.remove_edge(u, v, k)
            log.debug('Removing control flow edge from {} to {}'.format(u, v))
Пример #18
0
class GraphData(SaveLoad):
    """
    the store of a graph data.

    each node is represent as a dict of node info named 'node_json',
    Example Format for 'node_json':

     {
        "id": 1,
        "properties": {"name":"bob","age":1},
        "labels": ["entity","man"]
    }

    >>>
    graphdata=GraphData()
    graphdata.create_index_on_property("name","aliases","qualified_name")

    # save a graphdata to disk
    graphdata.save("test.v1.graph")

    # load a graphdata from disk
    graphdata=Graphdata.load("test.v1.graph")
    >>>
    """

    DEFAULT_KEY_NODE_ID = "id"  # the key name for the node id, every node must have it.
    DEFAULT_KEY_NODE_PROPERTIES = "properties"  # the key name for the node properties, every node must have it.
    DEFAULT_KEY_NODE_LABELS = "labels"  # the key name for the node labels, every node must have it.

    DEFAULT_KEYS = [
        DEFAULT_KEY_NODE_ID, DEFAULT_KEY_NODE_PROPERTIES,
        DEFAULT_KEY_NODE_LABELS
    ]
    UNASSIGNED_NODE_ID = -1  # a node without a id specify, a newly created node, its id is -1

    DEFAULT_KEY_RELATION_START_ID = "startId"
    DEFAULT_KEY_RELATION_TYPE = "relationType"
    DEFAULT_KEY_RELATION_END_ID = "endId"

    def __init__(self):
        # two map for
        self.__init_graph()

    def clear(self):
        self.__init_graph()

    def __init_graph(self):
        self.graph = MultiDiGraph()
        self.max_node_id = 0
        self.label_to_ids_map = {}
        self.index_collection = GraphIndexCollection()
        self.relation_type_to_num_map = {}

    def create_index_on_property(self, *property_name_list):
        """
        create index on some properties. It makes the query on the corresponding property faster.
        :param property_name_list: one or one more property names.
        :return:
        """
        self.index_collection.create_index_on_property(*property_name_list)

    def find_all_shortest_paths(self, startId, endId):
        """
        找到所有的最短路
        :param startId:
        :param endId:
        :return:
        """
        shortest_paths = all_shortest_paths(self.graph, startId, endId)
        return shortest_paths

    def find_shortest_path(self, startId, endId):
        """
        找到一个最短路
        :param startId:
        :param endId:
        :return:
        """
        shortest_paths = shortest_path(self.graph, startId, endId)
        return shortest_paths

    def set_nodes(self, nodes):
        for n in nodes:
            self.add_node(node_id=n[self.DEFAULT_KEY_NODE_ID],
                          node_properties=n[self.DEFAULT_KEY_NODE_PROPERTIES],
                          node_labels=n[self.DEFAULT_KEY_NODE_LABELS])

    def add_labels(self, *labels):
        """
        add a list of label to the graph
        :param labels:
        :return:
        """

        for label in labels:
            if not label:
                return
            if label not in self.label_to_ids_map.keys():
                self.label_to_ids_map[label] = set([])

    def add_label_by_node_id(self, node_id, label):
        """
        add a label to a node
        :param node_id: the node id which the label need to add
        :param label: the label that need to added
        :return: True, add successful.False, add fail.
        """
        if not label:
            return False
        node_json = self.get_node_info_dict(node_id)
        if not node_json:
            return False
        node_json[GraphData.DEFAULT_KEY_NODE_LABELS].add(label)
        self.label_to_ids_map[label].add(node_id)
        return True

    def get_node_ids_by_label(self, label):
        if label not in self.label_to_ids_map.keys():
            return set([])
        return self.label_to_ids_map[label]

    def add_label_by_label(self, label, new_label):
        """
        add a label to node in graph, the node must has the specific label
        :param new_label: the new_label add to node
        :param label: the node must has the label
        :return:
        """

        for node_id in self.get_node_ids_by_label(label):
            self.add_label_by_node_id(node_id, new_label)

    def add_label_to_all(self, label):
        """
        add a label to node in graph
        :param label:
        :return:
        """
        if not label:
            return
        self.add_labels(label)
        for node_id in self.get_node_ids():
            self.add_label_by_node_id(node_id, label)

    def add_node(self,
                 node_labels,
                 node_properties,
                 node_id=UNASSIGNED_NODE_ID,
                 primary_property_name=""):
        """
        add a node json to the graph
        :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id
        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_name:make sure the node_json["properties"][primary_property_name] is unique in GraphData.
         if no passing, the node json will be add to graph without check. otherwise, only the node json
        with unique property value ( property value is got by primary_property_name ) will be added to the GraphData.
                :return:-1, means that adding node json fail. otherwise, return the id of the newly added node
        """
        if primary_property_name:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID

            node_json = self.find_one_node_by_property(
                property_name=primary_property_name,
                property_value=node_properties[primary_property_name])
            if node_json:
                return node_json[self.DEFAULT_KEY_NODE_ID]

        if node_id == self.UNASSIGNED_NODE_ID:
            node_id = self.max_node_id + 1
            self.max_node_id = self.max_node_id + 1

        new_node_json = {
            self.DEFAULT_KEY_NODE_ID: node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: node_properties,
            self.DEFAULT_KEY_NODE_LABELS: set(node_labels)
        }

        self.graph.add_node(node_id, **new_node_json)

        if self.max_node_id < node_id:
            self.max_node_id = node_id

        self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(
            node_id=node_id,
            node_properties=new_node_json[
                GraphData.DEFAULT_KEY_NODE_PROPERTIES])
        return node_id

    def update_node_property_by_node_id(self, node_id, node_properties):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID

        node_json = self.get_node_info_dict(node_id)
        update_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS]
        for k, v in node_properties.items():
            update_node_properties[k] = v
        update_node_json = {
            self.DEFAULT_KEY_NODE_ID: update_node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties,
            self.DEFAULT_KEY_NODE_LABELS: update_node_labels
        }
        self.graph.add_node(update_node_id, **update_node_json)
        self.index_collection.add_node(node_id=update_node_id,
                                       node_properties=update_node_properties)
        return update_node_id

    def update_node_by_node_id(self, node_id, node_labels, node_properties):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID

        node_json = self.get_node_info_dict(node_id)
        update_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        update_node_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        update_node_labels = node_json[self.DEFAULT_KEY_NODE_LABELS]
        for k, v in node_properties.items():
            update_node_properties[k] = v
        for label in node_labels:
            update_node_labels.add(label)
        update_node_json = {
            self.DEFAULT_KEY_NODE_ID: update_node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: update_node_properties,
            self.DEFAULT_KEY_NODE_LABELS: update_node_labels
        }
        self.graph.add_node(update_node_id, **update_node_json)
        self.add_labels(*update_node_labels)
        for label in update_node_labels:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(node_id=update_node_id,
                                       node_properties=update_node_properties)
        return update_node_id

    def update_node_property_value_by_node_id(self, node_id,
                                              node_property_name,
                                              node_proprty_value):
        if not node_id in list(self.get_node_ids()):
            return self.UNASSIGNED_NODE_ID
        if node_property_name == "":
            return node_id
        node_property = {node_property_name: node_proprty_value}
        return self.update_node_property_by_node_id(node_id, node_property)

    def remove_node(self, node_id):
        if node_id not in self.graph.nodes:
            return None
        # print(type(self.graph.nodes))
        node_json = self.graph.nodes[node_id]
        in_relations = set(self.graph.in_edges(node_id, keys=True))
        out_relations = set(self.graph.out_edges(node_id, keys=True))
        self.graph.remove_node(node_id)

        for label in node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].remove(node_id)

        self.index_collection.remove_node(node_id)

        return node_json, out_relations, in_relations

    def remove_all_nodes(self):
        ids = self.get_node_ids()
        for id in ids:
            self.remove_node(id)
        return True

    def merge_node(self, node_labels, node_properties, primary_property_name):
        """
        merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node.
        we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node.
        properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used.

        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_name: The name of the property to check, the merged node and the new node are the same on this property.
        :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change.
        """

        if not primary_property_name:
            print("primary_property_name must given on merge")
            return GraphData.UNASSIGNED_NODE_ID

        if primary_property_name not in node_properties:
            print(
                "node json must have a primary_property_name ( %r ) in properties "
                % primary_property_name)
            return self.UNASSIGNED_NODE_ID

        node_json = self.find_one_node_by_property(
            property_name=primary_property_name,
            property_value=node_properties[primary_property_name])

        if not node_json:
            return self.add_node(node_labels=node_labels,
                                 node_properties=node_properties,
                                 node_id=GraphData.UNASSIGNED_NODE_ID)

        merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        for k, v in node_properties.items():
            merge_properties[k] = v

        merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in node_labels:
            merge_labels.add(label)

        return self.add_node(node_labels=merge_labels,
                             node_properties=merge_properties,
                             node_id=merge_node_id)

    def add_node_with_multi_primary_property(self,
                                             node_labels,
                                             node_properties,
                                             node_id=UNASSIGNED_NODE_ID,
                                             primary_property_names=None):
        """
        add a node json to the graph
        :param node_id: the node_id to identify the node, if not given, it will be add as new node and give a node id
        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_names:a list of primary properties. make sure the node_json["properties"][primary_property_name] is unique in GraphData.
         if no passing, the node json will be add to graph without check. otherwise, only the node json
        with unique property value ( property value is got by primary_property_name ) will be added to the GraphData.
                :return:-1, means that adding node json fail. otherwise, return the id of the newly added node
        """

        if primary_property_names is None:
            primary_property_names = []

        match_properties = {}

        for primary_property_name in primary_property_names:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID
            match_properties[primary_property_name] = node_properties[
                primary_property_name]

        node_json = self.find_one_node_by_properties(**match_properties)
        if node_json:
            return node_json[self.DEFAULT_KEY_NODE_ID]

        if node_id == self.UNASSIGNED_NODE_ID:
            node_id = self.max_node_id + 1

        new_node_json = {
            self.DEFAULT_KEY_NODE_ID: node_id,
            self.DEFAULT_KEY_NODE_PROPERTIES: node_properties,
            self.DEFAULT_KEY_NODE_LABELS: set(node_labels)
        }

        self.graph.add_node(node_id, **new_node_json)
        if self.max_node_id < node_id:
            self.max_node_id = node_id

        self.add_labels(*new_node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in new_node_json[self.DEFAULT_KEY_NODE_LABELS]:
            self.label_to_ids_map[label].add(node_id)
        self.index_collection.add_node(
            node_id=node_id,
            node_properties=new_node_json[
                GraphData.DEFAULT_KEY_NODE_PROPERTIES])

        return node_id

    def merge_node_with_multi_primary_property(self,
                                               node_labels,
                                               node_properties,
                                               primary_property_names=None):
        """
        merge a node json to the graph, that is if we can't not find the node with primary_property_value match the given node.
        we will add a new node, if we found, we will add copy all properties given to the exist node, copy all labels to the exist node.
        properties will be updated by this merge. That is, if the node to be merged has the same attributes as the existing node, the attributes of the new node are used.

        :param node_properties: a dict of node properties, key-value pair
        :param node_labels: a set of node labels
        :param primary_property_names: The list of name of the property to check, the merged node and the new node are the same on this property.
        :return:-1, means that adding node json fail. otherwise, return the id of the newly added(merged) node.If it already exists, the id of this merged node will not change.
        """

        if not primary_property_names:
            print("primary_property_names must given on merge")
            return GraphData.UNASSIGNED_NODE_ID
        match_properties = {}

        for primary_property_name in primary_property_names:
            if primary_property_name not in node_properties:
                print(
                    "node json must have a primary_property_name ( %r ) in properties "
                    % primary_property_name)
                return self.UNASSIGNED_NODE_ID
            match_properties[primary_property_name] = node_properties[
                primary_property_name]

        node_json = self.find_one_node_by_properties(**match_properties)
        if not node_json:
            return self.add_node(node_labels=node_labels,
                                 node_properties=node_properties,
                                 node_id=GraphData.UNASSIGNED_NODE_ID)

        merge_node_id = node_json[self.DEFAULT_KEY_NODE_ID]
        merge_properties = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
        for k, v in node_properties.items():
            merge_properties[k] = v

        merge_labels = set(node_json[self.DEFAULT_KEY_NODE_LABELS])
        for label in node_labels:
            merge_labels.add(label)

        return self.add_node(node_labels=merge_labels,
                             node_properties=merge_properties,
                             node_id=merge_node_id)

    def refresh_indexer(self):
        """
        refresh the index on all properties.
        :return:
        """
        index_properties = self.index_collection.get_index_property()
        index_properties = list(index_properties)
        del self.index_collection
        self.index_collection = GraphIndexCollection()

        self.create_index_on_property(*index_properties)
        for node_id, node_json in self.graph.nodes(data=True):
            if node_json is None:
                continue
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            self.index_collection.add_node(node_id, node_properties_json)

    def find_one_node_by_property(self, property_name, property_value):
        if self.index_collection.is_property_indexed(property_name):
            candidate_node_ids = list(
                self.index_collection.find_ids(property_name,
                                               property_value=property_value))
            if len(candidate_node_ids) == 0:
                return None
            return self.get_node_info_dict(candidate_node_ids[0])

        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name in node_properties_json.keys(
            ) and node_properties_json[property_name] == property_value:
                return node_json
        return None

    def find_nodes_by_ids(self, *ids):
        result = []
        for node_id in ids:
            node_json = self.get_node_info_dict(node_id)
            if node_json:
                result.append(node_json)
        return result

    def find_nodes_by_property(self, property_name, property_value):
        if self.index_collection.is_property_indexed(property_name):
            candidate_node_ids = list(
                self.index_collection.find_ids(property_name,
                                               property_value=property_value))

            return self.find_nodes_by_ids(*candidate_node_ids)

        nodes = []
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name in node_properties_json.keys(
            ) and node_properties_json[property_name] == property_value:
                nodes.append(node_json)
        return nodes

    def find_one_node_by_property_value_starts_with(self, property_name,
                                                    property_value_starter):
        """
        find a node which its property value is string and the string is startswith a given string
        :param property_name:
        :param property_value_starter:
        :return:
        """
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name not in node_properties_json.keys():
                continue

            property_value = node_properties_json[property_name]
            if type(property_value) != str:
                continue
            if property_value.startswith(property_value_starter):
                return node_json
        return None

    def find_nodes_by_property_value_starts_with(self, property_name,
                                                 property_value_starter):
        """
        find all nodes which its property value is string and the string is startswith a given string
        :param property_name:
        :param property_value_starter:
        :return:
        """
        nodes = []
        for node_id, node_json in self.graph.nodes(data=True):
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]
            if property_name not in node_properties_json.keys():
                continue

            property_value = node_properties_json[property_name]
            if type(property_value) != str:
                continue
            if property_value.startswith(property_value_starter):
                nodes.append(node_json)
        return nodes

    def __find_node_ids_by_index_properties(self, **index_properties):
        result_ids = self.get_node_ids()

        for property_name, property_value in index_properties.items():
            result_ids = result_ids.intersection(
                self.index_collection.find_ids(property_name=property_name,
                                               property_value=property_value))

        return result_ids

    def find_one_node_by_properties(self, **properties):
        indexed_properties = {}
        unindexed_properties = {}
        for property_name, property_value in properties.items():
            if self.index_collection.is_property_indexed(
                    property_name=property_name):
                indexed_properties[property_name] = property_value
            else:
                unindexed_properties[property_name] = property_value

        candidate_node_ids = self.__find_node_ids_by_index_properties(
            **indexed_properties)

        if len(candidate_node_ids) == 0:
            return None

        if len(unindexed_properties) == 0:
            return self.get_node_info_dict(list(candidate_node_ids)[0])

        for node_id in candidate_node_ids:
            node_json = self.get_node_info_dict(node_id=node_id)
            node_properties_json = node_json[self.DEFAULT_KEY_NODE_PROPERTIES]

            is_match = True
            for property_name, property_value in unindexed_properties.items():
                if property_name not in node_properties_json.keys(
                ) or node_properties_json[property_name] != property_value:
                    is_match = False
                    break
            if is_match:
                return node_json

        return None

    def set_relations(self, relations):
        for t in relations:
            self.add_relation(startId=t[self.DEFAULT_KEY_RELATION_START_ID],
                              relationType=t[self.DEFAULT_KEY_RELATION_TYPE],
                              endId=t[self.DEFAULT_KEY_RELATION_END_ID])

    def add_relation(self, startId, relationType, endId):
        """
        add a new relation to graphData, if exist, not add.
        :param startId:
        :param relationType:
        :param endId:
        :return:False, the relation is already exist adding fail, True, add the relation successsful
        """
        # if startId == GraphData.UNASSIGNED_NODE_ID:
        #     return False
        # if endId == GraphData.UNASSIGNED_NODE_ID:
        #     return False

        if startId not in self.graph.nodes or endId not in self.graph.nodes:
            return False

        if self.exist_relation(startId=startId,
                               relationType=relationType,
                               endId=endId):
            return False

        self.__add_one_relation_count(relationType)

        self.graph.add_edge(startId, endId, relationType)
        return True

    def __add_one_relation_count(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        relation_type_to_num_map[relation_type] = relation_type_to_num_map.get(
            relation_type, 0) + 1

    def __remove_one_relation_count(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        relation_type_to_num_map[relation_type] = max(
            0,
            relation_type_to_num_map.get(relation_type, 0) - 1)

    def add_relation_with_property(self, startId, relationType, endId,
                                   **kwargs):
        if startId not in self.graph.nodes or endId not in self.graph.nodes:
            return False

        if self.exist_relation(startId=startId,
                               relationType=relationType,
                               endId=endId):
            return False

        self.__add_one_relation_count(relationType)
        self.graph.add_edge(startId, endId, relationType, **kwargs)
        return True

    def remove_relation(self, startId, relationType, endId):
        if not self.exist_relation(
                startId=startId, relationType=relationType, endId=endId):
            return False
        self.__remove_one_relation_count(relationType)

        self.graph.remove_edge(startId, endId, relationType)
        return True

    def remove_all_relations(self):
        relation_pairs = self.get_relation_pairs()
        for relation_pair in relation_pairs:
            relations = self.get_relations(start_id=relation_pair[0],
                                           end_id=relation_pair[1])
            for relation in relations:
                self.remove_relation(relation[0], relation[1], relation[2])
        return True

    def exist_relation(self, startId, relationType, endId):
        return self.graph.has_edge(startId, endId, relationType)

    def exist_any_relation(self, startId, endId):
        return self.graph.has_edge(startId, endId)

    def get_relations(self, start_id=None, relation_type=None, end_id=None):
        candidates = None
        if start_id is not None:
            candidates = self.get_all_out_relations(start_id)
        if end_id is not None:
            tmp = self.get_all_in_relations(end_id)
            if candidates is not None:
                candidates &= tmp
            else:
                candidates = tmp
        candidates = self.get_relation_pairs_with_type(
        ) if candidates is None else candidates

        if relation_type is not None:
            candidates = set(
                filter(lambda r: r[1] == relation_type, candidates))
        return candidates

    def get_all_relations(self, id_1, id_2):
        result = set([])
        result = result | self.get_relations(start_id=id_1, end_id=id_2)
        result = result | self.get_relations(start_id=id_2, end_id=id_1)
        return result

    def get_edge_extra_info(self, start_id, end_id, relation_name, extra_key):
        relation_dict = self.graph.get_edge_data(start_id, end_id)
        if relation_name in relation_dict:
            if extra_key in relation_dict[relation_name]:
                return relation_dict[relation_name][extra_key]
        return ""

    def get_node_num(self):
        return len(self.graph.nodes)

    def get_relation_num(self):
        return len(self.graph.edges)

    def get_node_ids(self):
        return set(self.graph.nodes)

    def get_relation_pairs(self):
        # todo:cache the result?
        """
        get the relation list in [(startId,endId)] format
        :return:
        """
        pairs = set(self.graph.edges(keys=False))

        return pairs

    def get_relation_pairs_with_type(self):
        """
        get the relation list in [(startId,endId)] format
        :return:
        """
        pairs = {(r[0], r[2], r[1]) for r in self.graph.edges(keys=True)}
        return pairs

    def get_all_out_relations(self, node_id):
        if node_id not in self.graph.nodes:
            return set()
        return {(r[0], r[2], r[1])
                for r in self.graph.out_edges(node_id, keys=True)}

    def get_all_in_relations(self, node_id):
        if node_id not in self.graph.nodes:
            return set()
        return {(r[0], r[2], r[1])
                for r in self.graph.in_edges(node_id, keys=True)}

    def update_node_index(self, node_id):

        node_info = self.get_node_info_dict(node_id=node_id)
        node_properties = node_info[self.DEFAULT_KEY_NODE_PROPERTIES]
        self.index_collection.add_node(node_id=node_id,
                                       node_properties=node_properties)

    def get_node_info_dict(self, node_id):
        """
        get the node info dict,
        :param node_id: the node id
        :return:
        """
        return self.graph.nodes.get(node_id, None)

    def get_properties_for_node(self,
                                node_id,
                                key_node_properties=DEFAULT_KEY_NODE_PROPERTIES
                                ):
        """
        get the node properties part from node info dict
        :param key_node_properties: specify the key of key_node_properties, default is "properties"
        :param node_id: the node id
        :return: {} if the node not exist
        """
        node_info_dict = self.get_node_info_dict(node_id)
        if node_info_dict is None:
            return {}

        return node_info_dict[key_node_properties]

    def get_labels_for_node(self,
                            node_id,
                            key_node_labels=DEFAULT_KEY_NODE_LABELS):
        """
        get the node properties part from node info dict
        :param key_node_labels: specify the key of node_labels, default is "labels"
        :param node_id: the node id
        :return: [] if the node not exist
        """
        node_info_dict = self.get_node_info_dict(node_id)
        if node_info_dict is None:
            return []

        return node_info_dict[key_node_labels]

    def get_all_labels(self):
        """
        get all labels as set for current node.
        :return: a set of labels.
        """
        return set(self.label_to_ids_map.keys())

    def get_all_relation_types(self):
        """
        get all relation types in graph data
        :return: a set of relation type strings
        """

        return set(self.get_relation_type_to_num_map().keys())

    def get_relation_count_by_type(self, relation_type):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        return relation_type_to_num_map.get(relation_type, 0)

    def get_relation_type_to_num_map(self):
        return self.relation_type_to_num_map

    def __count_relation_type_to_num_map(self):
        relation_type_to_num_map = {}
        relation_type_to_relation = {}
        for r in self.get_relation_pairs_with_type():
            if r[1] not in relation_type_to_relation:
                relation_type_to_relation[r[1]] = set()
            relation_type_to_relation[r[1]].add(r)
        for k, v in relation_type_to_relation.items():
            relation_type_to_num_map[k] = len(v)
        return relation_type_to_num_map

    def print_label_count(self):
        print("Label Num=%d" % len(self.label_to_ids_map.keys()))
        for k, v in self.label_to_ids_map.items():
            print("<Label:%r Num:%d>" % (k, len(v)))

    def print_graph_info(self):
        print("----- Graph Info ------")
        print(self)
        self.print_label_count()
        self.print_relation_info()
        print("-----------------------")

    def print_relation_info(self):
        relation_type_to_num_map = self.get_relation_type_to_num_map()
        print("Relation Num=%d" % len(relation_type_to_num_map.keys()))
        for k, v in relation_type_to_num_map.items():
            print("<Relation:%r Num:%d>" % (k, v))

    def __repr__(self):
        return "<GraphData nodeNum=%d relNum=%d maxNodeId=%d>" % (
            self.get_node_num(), self.get_relation_num(), self.max_node_id)

    def subgraph(self, node_ids):
        """
        get a sub graph of graph data which keep only given nodes and relations between nodes
        :param node_ids: the kept node ids in graph
        :return: a graph that keep all things.
        """
        graph_data = deepcopy(self)

        remove_nodes = set(self.get_node_ids()) - node_ids
        for node_id in remove_nodes:
            graph_data.remove_node(node_id)

        return graph_data
Пример #19
0
class FeatureConcatenator:
    def __init__(self, g_input: Graph) -> None:
        log.info(
            f"FeatureConcatenator: Initiating with a graph of {g_input.number_of_nodes()} nodes "
            f"and {g_input.number_of_edges()} edges"
        )
        self.g_input = g_input

        self.g = MultiDiGraph(deepcopy(g_input))
        self._obtain_attrs()
        self._init_feat_attrs()

    def _obtain_attrs(self) -> None:
        """
        Obtains a list-formatted set of node and edge attributes
        """
        self.n_targets: Set[str] = set(self.g.graph["n_targets"]) if self.g.graph[
            "n_targets"
        ] else set()
        self.n_attrs: Set[str] = {
            k_attr
            for nid, attrs in self.g.nodes.data()
            for k_attr in attrs.keys()
            if k_attr not in self.n_targets
        }
        if "label" in self.n_attrs:
            self.n_attrs.remove("label")
        log.info(
            "FeatureConcatenator: The following set of node attributes "
            f"is present in the graph:\n{self.n_attrs}"
        )

        self.e_targets: Set[str] = set(self.g.graph["e_targets"]) if self.g.graph[
            "e_targets"
        ] else set()
        self.e_attrs: Set[str] = {
            k_attr
            for u, v, k, attrs in self.g.edges.data(keys=True)
            for k_attr in attrs.keys()
            if k_attr not in self.e_targets
        }
        if "label" in self.e_attrs:
            self.e_attrs.remove("label")
        log.info(
            "FeatureConcatenator: The following set of edge attributes "
            f"is present in the graph:\n{self.e_attrs}"
        )

    def _init_feat_attrs(self) -> None:
        """
        Creates an empty node attribute "nfeat" and an empty edge attribute "efeat"
        """
        log.info("FeatureConcatenator: Initiating target nfeat attribute with nulls")
        mapping_nfeat: Dict[Union[int, str], None] = {nid: None for nid in self.g.nodes}
        nx.set_node_attributes(self.g, mapping_nfeat, "nfeat")

        log.info("FeatureConcatenator: Initiating target efeat attribute with nulls")
        mapping_efeat: Dict[Any, Any] = {
            (u, v, k): None for u, v, k in self.g.edges(keys=True)
        }
        nx.set_edge_attributes(self.g, mapping_efeat, "efeat")

    def concat_n_attrs(self) -> None:
        """
        Encodes all node attributes as continous variables into attribute "nfeat"
        """
        log.info(
            f"FeatureConcatenator: Concatenating the following node attributes:\n{self.n_attrs}"
        )
        mapping_attrs: Dict[Union[int, str], ndarray] = {
            k: np.array([v[attr] for attr in self.n_attrs]).reshape(1, -1)
            for k, v in self.g.nodes.data()
        }
        nx.set_node_attributes(self.g, mapping_attrs, "nfeat")

    def concat_e_attrs(self) -> None:
        """
        Encodes all edge attributes as continous variables into attribute "efeat"
        """
        log.info(
            f"FeatureConcatenator: Concatenating the following edge attributes:\n{self.e_attrs}"
        )
        mapping_attrs: Dict[Tuple[Any, Any, Any], ndarray] = {
            (u, v, k): np.array([e[attr] for attr in self.e_attrs]).reshape(1, -1)
            for u, v, k, e in self.g.edges.data(keys=True)
        }
        nx.set_edge_attributes(self.g, mapping_attrs, "efeat")

    def delete_originals(self) -> None:
        """
        Deletes original node and edge attributes after they have been concatenated
        """
        log.info(
            "FeatureConcatenator: Deleting original node attributes "
            f"{self.n_attrs.union(self.n_targets)} and edge attributes {self.e_attrs.union(self.e_targets)}",
        )
        for n_attr in self.n_attrs.union(self.n_targets):
            for nid in self.g.nodes:
                del self.g.nodes[nid][n_attr]
        for e_attr in self.e_attrs.union(self.e_targets):
            for u, v, k in self.g.edges(keys=True):
                del self.g.edges[u, v, k][e_attr]

    @property
    def graph(self) -> Graph:
        return self.g
Пример #20
0
def plot_graph(graph: nx.MultiDiGraph) -> None:
    """
    Plot the Multiple Directed graph using the plotly library.
    """
    # Choosing the spring layout to position the vertices of the graph.
    pos = nx.spring_layout(graph)

    # Creating the edge trace.
    edge_x = []
    edge_y = []

    xtext = []
    ytext = []
    edge_values_text = []
    for edge in graph.edges():
        # Determine the start and end coordinates of the edge on the graph.
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]

        # Add all x coordinates to list of x_edge data.
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)

        # Add all y coordinates to list of y_edge data.
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

        # Add x midpoint coordinates to list of xtext data.
        xtext.append((x0 + x1) / 2)

        # Add y midpoint coordinates to list of ytext data.
        ytext.append((y0 + y1) / 2)

        # Add transaction value to list of edge_values data.
        value = graph.get_edge_data(edge[0], edge[1])[0]['weight']
        edge_values_text.append(f"Transaction Value: {value}")

    # Plotting the edges.
    edge_trace = go.Scatter(x=edge_x,
                            y=edge_y,
                            line=dict(width=1, color='black'),
                            mode='lines')

    # Plotting the edge transaction text.
    edge_values_trace = go.Scatter(x=xtext,
                                   y=ytext,
                                   mode='none',
                                   text=edge_values_text,
                                   textposition='top center',
                                   hovertemplate='%{text}<extra></extra>')

    # Creating the node trace.
    node_x = []
    node_y = []
    node_size = []
    for node in graph.nodes():
        # Determine the coordinates of each node (using the spring layout defined earlier)
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

        size = 10
        if graph.nodes[node] != {}:
            size = graph.nodes[node]['size']

        node_size.append(size)

    node_trace = go.Scatter(x=node_x,
                            y=node_y,
                            mode='markers',
                            hoverinfo='text',
                            marker=dict(showscale=True,
                                        colorscale='Hot',
                                        color=[],
                                        size=node_size,
                                        colorbar=dict(
                                            thickness=10,
                                            title='# of Transactions (degree)',
                                            xanchor='left',
                                            titleside='right'),
                                        line_width=2))

    # Setting the text of each node to its address.
    node_text = []
    for node in graph.nodes():
        node_desc = f"Address: {node}"

        # If the account doesn't have an empty representation
        # in the graph, get its balance.
        if graph.nodes[node] != {}:
            balance = graph.nodes[node]['balance']
            node_desc = f"Address: {node}\nBalance: {balance}"

        # Add the description of the node to the list (which
        # will get added to the trace, updating it).
        node_text.append(node_desc)

    # Update the text and size attributes of the node trace.
    node_trace.text = node_text

    node_neighbours = []
    for node in graph.adjacency():
        # To find the neighbours of this node (accounts who either
        # sent or received transactions from this current account)
        # we must access the second item of a tuple, which contains
        # a dictionary representation of its neighbours (addresses
        # mapped to
        neighbours = len(node[1])
        node_neighbours.append(neighbours)

    node_trace.marker.color = node_neighbours

    # Setting up the layout here.
    layout = go.Layout(
        title='Ethereum Transaction Graph',
        showlegend=False,
        hovermode='closest',
        xaxis=dict(showgrid=False, zeroline=False),
        yaxis=dict(showgrid=False, zeroline=False),
        margin=dict(b=20, l=15, r=15,
                    t=50),  # Setting up the margins around the graph
    )

    # Plot the graph figure.
    fig = go.Figure(data=[edge_trace, node_trace, edge_values_trace],
                    layout=layout)

    # update layout
    fig.update_layout(title_font_size=15)

    fig.show()
Пример #21
0
def dump_graph_for_graphviz(graph: nx.MultiDiGraph,
                            node_attrs: list = ['kind', 'op', 'shape'],
                            edge_attrs: list = ['in', 'out'],
                            nodes_to_dump: list = None,
                            save_to_svg=False):
    log.debug("---- GRAPHVIZ OUTPUT STARTS ----")
    if nodes_to_dump is None:
        nodes_to_dump = graph.nodes()
    string = '\ndigraph {\n'
    visited_nodes = set()
    for src_node_name, dst_node_name, attrs in graph.edges(data=True):
        visited_nodes.add(src_node_name)
        visited_nodes.add(dst_node_name)
        if src_node_name not in nodes_to_dump or dst_node_name not in nodes_to_dump:
            continue
        src_node = graph.node[src_node_name]
        dst_node = graph.node[dst_node_name]
        src_node_string = str(src_node_name) + '\\n' + '\\n'.join([
            str(key) + '=' + str(src_node.get(key, 'None'))
            for key in node_attrs if key in src_node
        ])
        dst_node_string = str(dst_node_name) + '\\n' + '\\n'.join([
            str(key) + '=' + str(dst_node.get(key, 'None'))
            for key in node_attrs if key in dst_node
        ])
        edge_string = ' '.join([
            str(key) + '=' + str(attrs.get(key, 'None')) for key in edge_attrs
            if key in attrs
        ])
        string += '"{}" -> "{}" [label = "{}"];\n'.format(
            src_node_string, dst_node_string, edge_string)
    for node in nodes_to_dump:
        if node not in visited_nodes:
            string += '"{}"'.format(
                node
            )  # TODO: add attributes like it was done in the loop above
            visited_nodes.add(node)
    string += '}'
    log.debug(string)
    log.debug("---- GRAPHVIZ OUTPUT ENDS ----")

    if save_to_svg:
        try:
            import graphviz
            import os
            file_name = "{}_{}.txt".format(graph.name.replace('/', '_'), 0)
            id = 1
            while os.path.exists(file_name):
                file_name = "{}_{}.txt".format(graph.name.replace('/', '_'),
                                               id)
                id += 1
            with open(file_name, "w") as f:
                f.write(string)
            graphviz.render('dot', 'svg', file_name)
            print('Graph was saved to {}.{}'.format(file_name, 'svg'))
        except ImportError:
            raise ImportError('Can\'t import graphviz')
        except Exception as e:
            raise Error('Can\'t save graph to svg') from e

    return string
Пример #22
0
def remap_node_identifier(graph: nx.MultiDiGraph, category: str, alternative_property: str, prefix=None) -> nx.MultiDiGraph:
    """
    Remap a node's 'id' attribute with value from a node's ``alternative_property`` attribute.

    Parameters
    ----------
    graph: networkx.MultiDiGraph
        The graph
    category: string
        category referring to nodes whose 'id' needs to be remapped
    alternative_property: string
        property name from which the new value is pulled from
    prefix: string
        signifies that the value for ``alternative_property`` is a list
        and the ``prefix`` indicates which value to pick from the list

    Returns
    -------
    networkx.MultiDiGraph
        The modified graph

    """
    mapping = {}
    for nid, data in graph.nodes(data=True):
        node_data = data.copy()
        if 'category' in node_data and category not in node_data['category']:
            continue

        if alternative_property in node_data:
            alternative_values = node_data[alternative_property]
            if isinstance(alternative_values, (list, set, tuple)):
                if prefix:
                    for v in alternative_values:
                        if prefix in v:
                            # take the first occurring value that contains the given prefix
                            mapping[nid] = v
                            break
                else:
                    # no prefix defined; pick the 1st one from list
                    mapping[nid] = alternative_values[0]
            elif isinstance(alternative_values, str):
                if prefix:
                    if alternative_values.startswith(prefix):
                        mapping[nid] = alternative_values
                else:
                    # no prefix defined
                    mapping[nid] = alternative_values
            else:
                logging.error(f"Cannot use {alternative_values} from alternative_property {alternative_property}")

    nx.set_node_attributes(graph, values=mapping, name='id')
    nx.relabel_nodes(graph, mapping, copy=False)

    # update 'subject' of all outgoing edges
    update_edge_keys = {}
    updated_subject_values = {}
    updated_object_values = {}
    for u, v, k, edge_data in graph.edges(keys=True, data=True):
        if u is not edge_data['subject']:
            updated_subject_values[(u, v, k)] = u
            update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v)
        if v is not edge_data['object']:
            updated_object_values[(u, v, k)] = v
            update_edge_keys[(u, v, k)] = generate_edge_key(u, edge_data['edge_label'], v)

    nx.set_edge_attributes(graph, values=updated_subject_values, name='subject')
    nx.set_edge_attributes(graph, values=updated_object_values, name='object')
    nx.set_edge_attributes(graph, values=update_edge_keys, name='edge_key')

    return graph
Пример #23
0
def all_edges(g: nx.MultiDiGraph, pre, post):
    return {
        key: data
        for _, tgt, key, data in g.edges(pre, data=True, keys=True)
        if tgt == post
    }
Пример #24
0
def get_eulerian_circuit(g: nx.MultiDiGraph) -> tuple:
    ans = []
    g = deepcopy(g)
    first_edge = list(g.edges(data=True))[0]
    u, v, ABdata = first_edge
    for key, data in g[u][v].items():
        if data == ABdata:
            g.remove_edge(u, v, key=key)
            break
    ans.append(first_edge)
    now = v

    now_parity = g.nodes[now]["parity"]
    now_type = ABdata["Tv"]

    def get_next_type(now_parity, now_type):
        if now_parity == "Odd":
            if now_type == "A":
                return "B"
            elif now_type == "B":
                return "A"

        elif now_parity == "Even":
            if now_type == "A":
                return "A"
            elif now_type == "B":
                return "B"

        else:
            return None

    next_type = get_next_type(now_parity, now_type)
    while g.edges:
        flg = False
        for next, atlas_view in list(g.succ[now].items()):
            for key, data in list(atlas_view.items()):
                if data["Tu"] == next_type:
                    ans.append((now, next, data))
                    g.remove_edge(now, next, key=key)

                    now = next
                    now_parity = g.nodes[now]["parity"]
                    now_type = data["Tv"]
                    next_type = get_next_type(now_parity, now_type)
                    flg = True
                    break
            if flg:
                break
        else:
            print("miss!", ans)
            sleep(1)
            ok = False
            while not ok:
                edge = ans.pop(-1)
                u, v, edge_data = edge
                for next, atlas_view in list(g.succ[u].items()):
                    for key, data in list(atlas_view.items()):
                        if data["Tu"] == edge_data["Tu"]:
                            ans.append((u, next, data))
                            g.remove_edge(u, next, key=key)

                            now = next
                            now_parity = g.nodes[now]["parity"]
                            now_type = data["Tv"]
                            next_type = get_next_type(now_parity, now_type)

                            ok = True
                g.add_edge(u, v, **edge_data)

    return ans
Пример #25
0
def NetworkXGraphToProgramGraphProto(
    g: nx.MultiDiGraph, ) -> programl_pb2.ProgramGraph:
    """Convert a networkx graph constructed using the old control-and-data-flow
  graph builder to a ProGraML graph proto."""
    proto = programl_pb2.ProgramGraph()

    # Create the map from function IDs to function names.
    function_names = list(
        sorted(set([fn for _, fn in g.nodes(data="function") if fn])))
    function_to_idx_map = {fn: i for i, fn in enumerate(function_names)}

    # Create the function list.
    for function_name in function_names:
        function_proto = proto.function.add()
        function_proto.name = function_name

    # Build a translation map from node names to node list indices.
    if "root" not in g.nodes:
        raise ValueError(f"Graph has no root node: {g.nodes}")
    node_to_idx_map = {"root": 0}
    for node in [node for node in g.nodes if node != "root"]:
        node_to_idx_map[node] = len(node_to_idx_map)

    # Create the node list.
    idx_to_node_map = {v: k for k, v in node_to_idx_map.items()}
    for node_idx in range(len(node_to_idx_map)):
        node = g.nodes[idx_to_node_map[node_idx]]
        node_proto = proto.node.add()

        # Translate node attributes.
        node_type = node.get("type")
        if not node_type:
            raise ValueError(f"Node has no type: {node_type}")
        node_proto.type = {
            "statement": programl_pb2.Node.STATEMENT,
            "identifier": programl_pb2.Node.IDENTIFIER,
            "immediate": programl_pb2.Node.IMMEDIATE,
            # We are removing the "magic" node type, replacing them with a regular
            # statement of unknown type.
            "magic": programl_pb2.Node.STATEMENT,
        }[node_type]

        # Get the text of the node.
        if "original_text" in node:
            node_proto.text = node["original_text"]
            node_proto.preprocessed_text = node["text"]
        elif "text" in node:
            node_proto.text = node["text"]
            node_proto.preprocessed_text = node["text"]
        elif "name" in node:
            node_proto.text = node["name"]
            node_proto.preprocessed_text = node["name"]
        else:
            raise ValueError(f"Node has no original_text or name: {node}")

        # Set the encoded representation of the node.
        x = node.get("x", None)
        if x is not None:
            node_proto.x.extend([x])

        # Set the node function.
        function = node.get("function")
        if function:
            node_proto.function = function_to_idx_map[function]

    # Create the edge list.
    for src, dst, data in g.edges(data=True):
        edge = proto.edge.add()
        edge.flow = {
            "call": programl_pb2.Edge.CALL,
            "control": programl_pb2.Edge.CONTROL,
            "data": programl_pb2.Edge.DATA,
        }[data["flow"]]
        edge.source_node = node_to_idx_map[src]
        edge.destination_node = node_to_idx_map[dst]
        edge.position = data.get("position", 0)

    return proto
Пример #26
0
class MossNet:
    def __init__(self, moss_results_dict):
        '''Create a ``MossNet`` object from a 3D dictionary of downloaded MOSS results

        Args:
            ``moss_results_dict`` (``dict``): A 3D dictionary of downloaded MOSS results

        Returns:
            ``MossNet``: A ``MossNet`` object
        '''
        if isinstance(moss_results_dict, MultiDiGraph):
            self.graph = moss_results_dict; return
        if isinstance(moss_results_dict, str):
            try:
                if moss_results_dict.lower().endswith('.gz'):
                    moss_results_dict = load(gopen(moss_results_dict))
                else:
                    moss_results_dict = load(open(moss_results_dict,'rb'))
            except:
                raise ValueError("Unable to load dictionary: %s" % moss_results_dict)
        if not isinstance(moss_results_dict, dict):
            raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results")
        self.graph = MultiDiGraph()
        for u in moss_results_dict:
            u_edges = moss_results_dict[u]
            if not isinstance(u_edges, dict):
                raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results")
            for v in u_edges:
                u_v_links = u_edges[v]
                if not isinstance(u_edges[v], dict):
                    raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results")
                for f in u_v_links:
                    try:
                        left, right = u_v_links[f]
                    except:
                        raise TypeError("moss_results_dict must be a 3D dictionary of MOSS results")
                    self.graph.add_edge(u, v, attr_dict = {'files':f, 'left':left, 'right':right})

    def save(self, outfile):
        '''Save this ``MossNet`` object as a 3D dictionary of MOSS results

        Args:
            ``outfile`` (``str``): The desired output file's path
        '''
        out = dict()
        for u in self.graph.nodes:
            u_edges = dict(); out[u] = u_edges
            for v in self.graph.neighbors(u):
                u_v_links = dict(); u_edges[v] = u_v_links; u_v_edge_data = self.graph.get_edge_data(u,v)
                for k in u_v_edge_data:
                    edge = u_v_edge_data[k]['attr_dict']; u_v_links[edge['files']] = (edge['left'], edge['right'])
        if outfile.lower().endswith('.gz'):
            f = gopen(outfile, mode='wb', compresslevel=9)
        else:
            f = open(outfile, 'wb')
        pkldump(out, f); f.close()

    def __add__(self, o):
        if not isinstance(o, MossNet):
            raise TypeError("unsupported operand type(s) for +: 'MossNet' and '%s'" % type(o).__name__)
        g = MultiDiGraph()
        g.add_edges_from(list(self.graph.edges(data=True)) + list(o.graph.edges(data=True)))
        g.add_nodes_from(list(self.graph.nodes(data=True)) + list(o.graph.nodes(data=True)))
        return MossNet(g)

    def get_networkx(self):
        '''Return a NetworkX ``MultiDiGraph`` equivalent to this ``MossNet`` object

        Returns:
            ``MultiDiGraph``: A NetworkX ``DiGraph`` equivalent to this ``MossNet`` object
        '''
        return self.graph.copy()

    def get_nodes(self):
        '''Returns a ``set`` of node labels in this ``MossNet`` object

        Returns:
            ``set``: The node labels in this ``MossNet`` object
        '''
        return set(self.graph.nodes)

    def get_pair(self, u, v, style='tuples'):
        '''Returns the links between nodes ``u`` and ``v``

        Args:
            ``u`` (``str``): A node label

            ``v`` (``str``): A node label not equal to ``u``

            ``style`` (``str``): The representation of a given link

            * ``"tuples"``: Links are ``((u_percent, u_html), (v_percent, v_html))`` tuples

            * ``"html"``: Links are HTML representation (one HTML for all links)

            * ``"htmls"``: Links are HTML representations (one HTML per link)

        Returns:
            ``dict``: The links between ``u`` and ``v`` (keys are filenames)
        '''
        if style not in {'tuples', 'html', 'htmls'}:
            raise ValueError("Invalid link style: %s" % style)
        if u == v:
            raise ValueError("u and v cannot be equal: %s" % u)
        for node in [u,v]:
            if not self.graph.has_node(node):
                raise ValueError("Nonexistant node: %s" % node)
        links = self.graph.get_edge_data(u,v)
        out = dict()
        for k in sorted(links.keys(), key=lambda x: links[x]['attr_dict']['files']):
            d = links[k]['attr_dict']
            u_fn, v_fn = d['files']
            u_percent, u_html = d['left']
            v_percent, v_html = d['right']
            if style == 'tuples':
                out[(u_fn, v_fn)] = ((u_percent, u_html), (v_percent, v_html))
            elif style in {'html', 'htmls'}:
                out[(u_fn, v_fn)] = '<html><table style="width:100%%" border="1"><tr><td colspan="2"><center><b>%s/%s --- %s/%s</b></center></td></tr><tr><td>%s (%d%%)</td><td>%s (%d%%)</td></tr><tr><td><pre>%s</pre></td><td><pre>%s</pre></td></tr></table></html>' % (u, u_fn, v, v_fn, u, u_percent, v, v_percent, u_html, v_html)
        if style == 'html':
            out = '<html>' + '<br>'.join(out[fns].replace('<html>','').replace('</html>','') for fns in sorted(out.keys())) + '</html>'
        return out

    def get_summary(self, style='html'):
        '''Returns a summary of this ``MossNet``

        Args:
            ``style`` (``str``): The representation of this ``MossNet``

        Returns:
            ``dict``: A summary of this ``MossNet``, where keys are filenames
        '''
        if style not in {'html'}:
            raise ValueError("Invalid summary style: %s" % style)
        matches = list() # list of (u_path, u_percent, v_path, v_percent) tuples
        for u,v in self.traverse_pairs(order=None):
            links = self.graph.get_edge_data(u,v)
            for k in links:
                d = links[k]['attr_dict']
                u_fn, v_fn = d['files']
                u_percent, u_html = d['left']
                v_percent, v_html = d['right']
                matches.append(('%s/%s' % (u,u_fn), u_percent, '%s/%s' % (v,v_fn), v_percent))
        matches.sort(reverse=True, key=lambda x: max(x[1],x[3]))
        return '<html><table style="width:100%%" border="1">%s</table></html>' % ''.join(('<tr><td>%s (%d%%)</td><td>%s (%d%%)</td></tr>' % tup) for tup in matches)

    def num_links(self, u, v):
        '''Returns the number of links between ``u`` and ``v``

        Args:
            ``u`` (``str``): A node label

            ``v`` (``str``): A node label not equal to ``u``

        Returns:
            ``int``: The number of links between ``u`` and ``v``
        '''
        for node in [u,v]:
            if not self.graph.has_node(node):
                raise ValueError("Nonexistant node: %s" % node)
        return len(self.graph.get_edge_data(u,v))

    def num_nodes(self):
        '''Returns the number of nodes in this ``MossNet`` object

        Returns:
            ``int``: The number of nodes in this ``MossNet`` object
        '''
        return self.graph.number_of_nodes()

    def num_edges(self):
        '''Returns the number of (undirected) edges in this ``MossNet`` object (including parallel edges)

        Returns:
            ``int``: The number of (undirected) edges in this ``MossNet`` object (including parallel edges)
        '''
        return int(self.graph.number_of_edges()/2)

    def outlier_pairs(self):
        '''Predict which student pairs are outliers (i.e., too many problem similarities).
        The distribution of number of links between student pairs (i.e., histogram) is modeled as y = A/(B^x),
        where x = a number of links, and y = the number of student pairs with that many links

        Returns:
            ``list`` of ``tuple``: The student pairs expected to be outliers (in decreasing order of significance)
        '''
        links = dict() # key = number of links; value = set of student pairs that have that number of links
        for u,v in self.traverse_pairs():
            n = self.num_links(u,v)
            if n not in links:
                links[n] = set()
            links[n].add((u,v))
        mult = list(); min_links = min(len(s) for s in links.values()); max_links = max(len(s) for s in links.values())
        for i in range(min_links, max_links):
            if i not in links or i+1 not in links or len(links[i+1]) > len(links[i]):
                break
            mult.append(float(len(links[i]))/len(links[i+1]))
        B = sum(mult)/len(mult)
        A = len(links[min_links]) * (B**min_links)
        n_cutoff = log(A)/log(B)
        out = list()
        for n in sorted(links.keys(), reverse=True):
            if n < n_cutoff:
                break
            for u,v in links[n]:
                out.append((n,u,v))
        return out

    def traverse_pairs(self, order='descending'):
        '''Iterate over student pairs

        Args:
            ``order`` (``str``): Order to sort pairs in iteration

            * ``None`` to not sort (may be faster for large/dense graphs)

            * ``"ascending"`` to sort in ascending order of number of links

            * ``"descending"`` to sort in descending order of number of links
        '''
        if order not in {None, 'None', 'none', 'ascending', 'descending'}:
            raise ValueError("Invalid order: %s" % order)
        nodes = list(self.graph.nodes)
        pairs = [(u,v) for u in self.graph.nodes for v in self.graph.neighbors(u) if u < v]
        if order == 'ascending':
            pairs.sort(key=lambda x: len(self.graph.get_edge_data(x[0],x[1])))
        elif order == 'descending':
            pairs.sort(key=lambda x: len(self.graph.get_edge_data(x[0],x[1])), reverse=True)
        for pair in pairs:
            yield pair

    def export(self, outpath, style='html', gte=0, verbose=False):
        '''Export the links in this ``MossNet`` in the specified style

        Args:
            ``outpath`` (``str``): Path to desired output folder/file

            ``style`` (``str``): Desired output style

            ``gte`` (``int``): The minimum number of links for an edge to be exported

            * ``"dot"`` to export as a GraphViz DOT file

            * ``"gexf"`` to export as a Graph Exchange XML Format (GEXF) file

            * ``"html"`` to export one HTML file per pair

            ``verbose`` (``bool``): ``True`` to show verbose messages, otherwise ``False``
        '''
        if style not in {'dot', 'gexf', 'html'}:
            raise ValueError("Invalid export style: %s" % style)
        if isdir(outpath) or isfile(outpath):
            raise ValueError("Output path exists: %s" % outpath)
        if not isinstance(gte, int):
            raise TypeError("'gte' must be an 'int', but you provided a '%s'" % type(gte).__name__)
        if gte < 0:
            raise ValueError("'gte' must be non-negative, but yours was %d" % gte)

        # export as folder of HTML files
        if style == 'html':
            summary = self.get_summary(style='html')
            pairs = list(self.traverse_pairs(order=None))
            makedirs(outpath)
            f = open('%s/summary.html' % outpath, 'w'); f.write(summary); f.close()
            for i,pair in enumerate(pairs):
                if verbose:
                    print("Exporting pair %d of %d..." % (i+1, len(pairs)), end='\r')
                u,v = pair
                if self.num_links(u,v) < gte:
                    continue
                if style == 'html':
                    f = open("%s/%d_%s_%s.html" % (outpath, self.num_links(u,v), u, v), 'w')
                    f.write(self.get_pair(u, v, style='html'))
                    f.close()
            if verbose:
                print("Successfully exported %d pairs" % len(pairs))

        # export as GraphViz DOT or a GEXF file
        elif style in {'dot', 'gexf'}:
            if verbose:
                print("Computing colors...", end='')
            max_links = max(self.num_links(u,v) for u,v in self.traverse_pairs())
            try:
                from seaborn import color_palette
            except:
                raise RuntimeError("Exporting as a DOT or GEXF file currently requires seaborn")
            pal = color_palette("Reds", max_links)
            if verbose:
                print(" done")
                print("Computing node information...", end='')
            nodes = list(self.get_nodes())
            index = {u:i for i,u in enumerate(nodes)}
            if verbose:
                print(" done")
                print("Writing output file...", end='')
            outfile = open(outpath, 'w')
            if style == 'dot':
                pal = [str(c).upper() for c in pal.as_hex()]
                outfile.write("graph G {\n")
                for u in nodes:
                    outfile.write('  node%d[label="%s"]\n' % (index[u], u))
                for u,v in self.traverse_pairs():
                    curr_num_links = self.num_links(u,v)
                    if curr_num_links < gte:
                        continue
                    outfile.write('  node%d -- node%d[color="%s"]\n' % (index[u], index[v], pal[curr_num_links-1]))
                outfile.write('}\n')
            elif style == 'gexf':
                from datetime import datetime
                pal = [(int(255*c[0]), int(255*c[1]), int(255*c[2])) for c in pal]
                outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
                outfile.write('<gexf xmlns="http://www.gexf.net/1.3draft" xmlns:viz="http://www.gexf.net/1.3draft/viz">\n')
                outfile.write('  <meta lastmodifieddate="%s">\n' % datetime.today().strftime('%Y-%m-%d'))
                outfile.write('    <creator>MossNet</creator>\n')
                outfile.write('    <description>A MossNet network exported to GEXF</description>\n')
                outfile.write('  </meta>\n')
                outfile.write('  <graph mode="static" defaultedgetype="undirected">\n')
                outfile.write('    <nodes>\n')
                for u in nodes:
                    outfile.write('      <node id="%d" label="%s"/>\n' % (index[u], u))
                outfile.write('    </nodes>\n')
                outfile.write('    <edges>\n')
                for i,pair in enumerate(self.traverse_pairs()):
                    u,v = pair
                    curr_num_links = self.num_links(u,v)
                    if curr_num_links == 0:
                        continue
                    color = pal[curr_num_links-1]
                    outfile.write('      <edge id="%d" source="%d" target="%d">\n' % (i, index[u], index[v]))
                    outfile.write('        <viz:color r="%d" g="%d" b="%d"/>\n' % (color[0], color[1], color[2]))
                    outfile.write('      </edge>\n')
                outfile.write('    </edges>\n')
                outfile.write('  </graph>\n')
                outfile.write('</gexf>\n')
            outfile.close()
            if verbose:
                print(" done")
Пример #27
0
def graph_to_json(graph: MultiDiGraph) -> List[rc.JsObject]:
    ga: List[rc.JsValue] = [{**node, "type": "node"} for _, node in graph.nodes(data=True)]
    for from_node, to_node, data in graph.edges(data=True):
        ga.append({"type": "edge", "from": from_node, "to": to_node, "edge_type": data["edge_type"]})
    return ga
Пример #28
0
def backtracking_1(multigraph: nx.MultiDiGraph,
                   x_edges,
                   y_edges,
                   vertex,
                   timeout: tuple = None,
                   global_timeout: tuple = None) -> bool:
    """

    Args:
        multigraph:
        x_edges:
        y_edges:
        vertex:
        timeout:
        global_timeout:

    Returns:

    """

    for u, v, key, attrs in filter(
            utils.is_non_fixed_edge,
            multigraph.edges(vertex, data=True, keys=True)):
        # checking for cycle in z
        if ('included_in_z' in multigraph.nodes[v]
                and multigraph.graph['length_z'] + 1 != len(x_edges)):
            continue

        multigraph.edges[u, v, key]['fixed_z'] = True
        multigraph.nodes[v]['included_in_z'] = True
        multigraph.graph['length_z'] += 1

        added_to_w = []
        for source in (multigraph.edges(vertex, data=True, keys=True),
                       multigraph.in_edges(vertex, data=True, keys=True)):
            for u_w, v_w, key_w, _ in filter(utils.is_non_fixed_edge, source):
                multigraph.edges[u_w, v_w, key_w]['fixed_w'] = True
                added_to_w.append((u_w, v_w, key_w))
                multigraph.graph['length_w'] += 1
                multigraph.graph['w'].add((u_w, v_w))

        if added_to_w and utils.has_cycle(multigraph.graph['w']) and (
                multigraph.graph['length_w'] != len(x_edges)
                or not utils.is_hamiltonian_cycle(multigraph.graph['w'])):
            step_back(multigraph, u, v, key, added_to_w)
            continue

        if multigraph.graph['length_z'] == len(
                x_edges) and multigraph.graph['length_w'] == len(x_edges):
            z_edges = set(
                item[:2]
                for item in nx.get_edge_attributes(multigraph, 'fixed_z'))
            if z_edges != x_edges and z_edges != y_edges and multigraph.graph['w'] != x_edges and multigraph.graph[
                'w'] != y_edges and \
                    utils.is_hamiltonian_cycle(z_edges) and utils.is_hamiltonian_cycle(multigraph.graph['w']):
                return True

            step_back(multigraph, u, v, key, added_to_w)
            continue

        if backtracking_1(multigraph,
                          x_edges,
                          y_edges,
                          v,
                          timeout=timeout,
                          global_timeout=global_timeout):
            return True

        step_back(multigraph, u, v, key, added_to_w)

    return False
Пример #29
0
def plot_bank2bank_count(_g: nx.MultiDiGraph, _plot_img: str):
    acct_bank = nx.get_node_attributes(_g, "bank_id")
    bank_list = sorted(set(acct_bank.values()))
    bank2bank_all = Counter()
    bank2bank_sar = Counter()

    for orig, bene, attr in _g.edges(data=True):
        orig_bank = acct_bank[orig]
        bene_bank = acct_bank[bene]
        is_sar = attr["is_sar"]
        bank_pair = (orig_bank, bene_bank)
        bank2bank_all[bank_pair] += 1
        if is_sar:
            bank2bank_sar[bank_pair] += 1

    total_num = _g.number_of_edges()
    internal_num = sum([num for pair, num in bank2bank_all.items() if pair[0] == pair[1]])
    external_num = total_num - internal_num
    internal_ratio = internal_num / total_num * 100
    external_ratio = external_num / total_num * 100
    internal_sar_num = sum([num for pair, num in bank2bank_sar.items() if pair[0] == pair[1]])
    external_sar_num = sum([num for pair, num in bank2bank_sar.items() if pair[0] != pair[1]])

    all_count_data = list()
    sar_count_data = list()
    for orig_bank in bank_list:
        all_count_row = [bank2bank_all[(orig_bank, bene_bank)] for bene_bank in bank_list]
        all_count_total = sum(all_count_row)
        all_count_data.append(all_count_row + [all_count_total])
        sar_count_row = [bank2bank_sar[(orig_bank, bene_bank)] for bene_bank in bank_list]
        sar_count_total = sum(sar_count_row)
        sar_count_data.append(sar_count_row + [sar_count_total])

    all_count_total = list()
    sar_count_total = list()
    for bene_bank in bank_list:
        all_count_total.append(sum([bank2bank_all[(orig_bank, bene_bank)] for orig_bank in bank_list]))
        sar_count_total.append(sum([bank2bank_sar[(orig_bank, bene_bank)] for orig_bank in bank_list]))
    all_count_total.append(sum(all_count_total))
    sar_count_total.append(sum(sar_count_total))

    all_count_data.append(all_count_total)
    sar_count_data.append(sar_count_total)

    all_count_csv = list()
    sar_count_csv = list()
    for row in all_count_data:
        all_count_csv.append(["{:,}".format(num) for num in row])
    for row in sar_count_data:
        sar_count_csv.append(["{:,}".format(num) for num in row])

    cols = ["To: %s" % bank for bank in bank_list] + ["Total"]
    rows = ["From: %s" % bank for bank in bank_list] + ["Total"]

    fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(9, 6))
    table_attr = {"rowLabels": rows, "colLabels": cols, "colWidths": [0.15 for _ in cols],
                  "loc": "center", "bbox": [0.15, 0.3, 0.75, 0.6]}
    ax1.axis("off")
    ax1.table(cellText=all_count_csv, **table_attr)
    ax1.set_title("Number of all bank-to-bank transactions")

    ax2.axis("off")
    ax2.table(cellText=sar_count_csv, **table_attr)
    ax2.set_title("Number of SAR bank-to-bank transactions")

    fig.suptitle("Internal bank transactions: Total = {:,} ({:.2f}%), SAR = {:,}".
                 format(internal_num, internal_ratio, internal_sar_num) + "\n" +
                 "External bank transactions: Total = {:,} ({:.2f}%), SAR = {:,}"
                 .format(external_num, external_ratio, external_sar_num),
                 y=0.1)
    plt.tight_layout()
    fig.savefig(_plot_img)
Пример #30
0
    def CreateFromNetworkX(cls, g: nx.MultiDiGraph) -> "GraphTuple":
        """Construct a graph tuple from a networkx graph.

    Args:
      g: The graph to convert to a graph. See
        deeplearning.ml4pl.graphs.programl.ProgramGraphToNetworkX() for a
        description of the networkx format.

    Returns:
      A GraphTuple instance.
    """
        # Create an adjacency list for each edge type.
        # {control, data, call} types.
        adjacencies: List[List[Tuple[int, int]]] = [
            [],
            [],
            [],
        ]
        # Create an edge position list for each edge type.
        # {control, data, call} types.
        edge_positions: List[List[int]] = [
            [],
            [],
            [],
        ]

        # Build the adjacency and positions lists.
        for src, dst, data in g.edges(data=True):
            adjacencies[data["flow"]].append((src, dst))
            edge_positions[data["flow"]].append(data["position"])

        # Convert the edge lists to numpy arrays.
        # Shape (edge_flow_count, edge_count, 2):
        for i in range(len(adjacencies)):
            if len(adjacencies[i]):
                adjacencies[i] = np.array(adjacencies[i], dtype=np.int32)
            else:
                adjacencies[i] = np.zeros((0, 2), dtype=np.int32)

        # Shape (edge_flow_count, edge_count):
        edge_positions = np.array([
            np.array(edge_position, dtype=np.int32)
            for edge_position in edge_positions
        ])

        # Set the node features.
        node_x = [None] * g.number_of_nodes()
        for node, x in g.nodes(data="x"):
            node_x[node] = np.array(x, dtype=np.int64)
        # Shape (node_count, node_x_dimensionality):
        node_x = np.vstack(node_x)

        # Set the node labels.
        node_targets = [None] * g.number_of_nodes()
        node_y = None
        for node, y in g.nodes(data="y"):
            # Node labels are optional. If there are no labels, break.
            if not y:
                break
            node_targets[node] = y
        else:
            # Shape (node_count, node_y_dimensionality):
            node_y = np.vstack(node_targets).astype(np.int64)

        # Get the optional graph-level features and labels.
        graph_x = np.array(g.graph["x"],
                           dtype=np.int64) if g.graph["x"] else None
        graph_y = np.array(g.graph["y"],
                           dtype=np.int64) if g.graph["y"] else None

        return GraphTuple(
            adjacencies=np.array(adjacencies),
            edge_positions=edge_positions,
            node_x=node_x,
            node_y=node_y,
            graph_x=graph_x,
            graph_y=graph_y,
        )