Пример #1
0
def build_reeb_graph(adj_gr: nx.Graph, adjs: Sequence[AdjacencyList]):
    """
    Construct the Reeb graph from the adjacency lists.
    """
    # Newly-created object()s are inserted into the graph as dummy nodes. Those
    # dummy objects are then inserted into the frontier. That way, we don't
    # have to make a decision on which critical point index to assign them, or
    # even what kind of critical point should be assigned.
    # (A newly-created object() will only ever compare equal to itself.)

    # Why MultiDiGraph? In the adjacency graph, there can't be more than one
    # edge between nodes. Equivalently, there can't be more than one way to
    # reach a neighbour of any cell. (The boustrophedon decomposition
    # guarantees that, since cells don't overlap.) However, there isn't such a
    # guarantee for the Reeb graph, and indeed there can be more than one path
    # to reach two critical points next to each other.
    reeb_gr = nx.MultiDiGraph()
    frontier = []

    # next available critical point index
    node_i = 0

    # next available cell index
    edge_i = 0

    # The order of assigning free indices to edges is exactly the same as the
    # order of assigning to nodes in the adjacency graph. So the edges in the
    # Reeb graph have correspondence with the cells.
    for adj in adjs:
        events = find_events_from_adjlist(adj)
        for event_name, event_adj in events.items():
            # assert len(event_adj) == 1, "too many conn. changes"

            if event_name == "split":
                [[left, right]] = event_adj.items()

                pred = frontier.pop(left)
                # assert type(pred) == object
                # Replace the dummy object with the next available node number
                nx.relabel_nodes(reeb_gr, {pred: node_i}, copy=False)

                for succ in right:
                    obj = object()
                    reeb_gr.add_edge(node_i, obj, cell=edge_i)
                    frontier.insert(succ, obj)

                    edge_i += 1

                node_i += 1

            elif event_name == "merge":
                # The order is inverted for the adjacency relation
                # for easier representation
                [[right, left]] = event_adj.items()

                # need to delete from the frontier in reverse order,
                # so that we don't disturb the other elements as we delete
                # the ones in front
                preds = [
                    frontier.pop(pred) for pred in sorted(left, reverse=True)
                ]
                # order is backward now...
                preds.reverse()

                relabel_inplace_fixed(reeb_gr,
                                      {pred: node_i
                                       for pred in preds})

                # Only one new node is created, and only one descendant cell
                # is inserted into the frontier
                obj = object()
                reeb_gr.add_edge(node_i, obj, cell=edge_i)
                frontier.insert(right, obj)

                node_i += 1
                edge_i += 1

            elif event_name == "gain":
                [[_, right]] = event_adj.items()

                reeb_gr.add_node(node_i)
                for succ in sorted(right):
                    obj = object()
                    reeb_gr.add_edge(node_i, obj, cell=edge_i)
                    frontier.insert(succ, obj)

                    edge_i += 1

                node_i += 1

            elif event_name == "loss":
                [[left, _]] = event_adj.items()

                obj = frontier.pop(left)
                nx.relabel_nodes(reeb_gr, {obj: node_i}, copy=False)
                node_i += 1

            else:
                raise NotImplementedError()

    reeb_edge_weight(reeb_gr, adj_gr)

    # Postcondition
    for cell in adj_gr.nodes():
        assert any(cell == attrs["cell"]
                   for _, _, attrs in reeb_gr.edges(data=True)), \
            f"Cell {cell} not in Reeb graph!"

    return reeb_gr
Пример #2
0
    def __mul__(self, scaling_matrix):
        """
        Replicates the graph, creating a supercell,
        intelligently joining together
        edges that lie on periodic boundaries.
        In principle, any operations on the expanded
        graph could also be done on the original
        graph, but a larger graph can be easier to
        visualize and reason about.
        :param scaling_matrix: same as Structure.__mul__
        :return:
        """

        # Developer note: a different approach was also trialed, using
        # a simple Graph (instead of MultiDiGraph), with node indices
        # representing both site index and periodic image. Here, the
        # number of nodes != number of sites in the Structure. This
        # approach has many benefits, but made it more difficult to
        # keep the graph in sync with its corresponding Structure.

        # Broadly, it would be easier to multiply the Structure
        # *before* generating the StructureGraph, but this isn't
        # possible when generating the graph using critic2 from
        # charge density.

        # Multiplication works by looking for the expected position
        # of an image node, and seeing if that node exists in the
        # supercell. If it does, the edge is updated. This is more
        # computationally expensive than just keeping track of the
        # which new lattice images present, but should hopefully be
        # easier to extend to a general 3x3 scaling matrix.

        # code adapted from Structure.__mul__
        scale_matrix = np.array(scaling_matrix, np.int16)
        if scale_matrix.shape != (3, 3):
            scale_matrix = np.array(scale_matrix * np.eye(3), np.int16)
        else:
            # TODO: test __mul__ with full 3x3 scaling matrices
            raise NotImplementedError(
                'Not tested with 3x3 scaling matrices yet.')
        new_lattice = Lattice(
            np.dot(scale_matrix, self.structure.lattice.matrix))

        f_lat = lattice_points_in_supercell(scale_matrix)
        c_lat = new_lattice.get_cartesian_coords(f_lat)

        new_sites = []
        new_graphs = []

        for v in c_lat:

            # create a map of nodes from original graph to its image
            mapping = {
                n: n + len(new_sites)
                for n in range(len(self.structure))
            }

            for idx, site in enumerate(self.structure):

                s = PeriodicSite(site.species_and_occu,
                                 site.coords + v,
                                 new_lattice,
                                 properties=site.properties,
                                 coords_are_cartesian=True,
                                 to_unit_cell=False)

                new_sites.append(s)

            new_graphs.append(nx.relabel_nodes(self.graph, mapping, copy=True))

        new_structure = Structure.from_sites(new_sites)

        # merge all graphs into one big graph
        new_g = nx.MultiDiGraph()
        for new_graph in new_graphs:
            new_g = nx.union(new_g, new_graph)

        edges_to_remove = []  # tuple of (u, v, k)
        edges_to_add = []  # tuple of (u, v, attr_dict)

        # list of new edges inside supercell
        # for duplicate checking
        edges_inside_supercell = [{u, v} for u, v, d in new_g.edges(data=True)
                                  if d['to_jimage'] == (0, 0, 0)]
        new_periodic_images = []

        orig_lattice = self.structure.lattice

        # use k-d tree to match given position to an
        # existing Site in Structure
        kd_tree = KDTree(new_structure.cart_coords)

        # tolerance in Å for sites to be considered equal
        # this could probably be a lot smaller
        tol = 0.05

        for u, v, k, d in new_g.edges(keys=True, data=True):

            to_jimage = d['to_jimage']  # for node v

            # reduce unnecessary checking
            if to_jimage != (0, 0, 0):

                # get index in original site
                n_u = u % len(self.structure)
                n_v = v % len(self.structure)

                # get fractional co-ordinates of where atoms defined
                # by edge are expected to be, relative to original
                # lattice (keeping original lattice has
                # significant benefits)
                v_image_frac = np.add(self.structure[n_v].frac_coords,
                                      to_jimage)
                u_frac = self.structure[n_u].frac_coords

                # using the position of node u as a reference,
                # get relative Cartesian co-ordinates of where
                # atoms defined by edge are expected to be
                v_image_cart = orig_lattice.get_cartesian_coords(v_image_frac)
                u_cart = orig_lattice.get_cartesian_coords(u_frac)
                v_rel = np.subtract(v_image_cart, u_cart)

                # now retrieve position of node v in
                # new supercell, and get absolute Cartesian
                # co-ordinates of where atoms defined by edge
                # are expected to be
                v_expec = new_structure[u].coords + v_rel

                # now search in new structure for these atoms
                # query returns (distance, index)
                v_present = kd_tree.query(v_expec)
                v_present = v_present[1] if v_present[0] <= tol else None

                # check if image sites now present in supercell
                # and if so, delete old edge that went through
                # periodic boundary
                if v_present is not None:

                    new_u = u
                    new_v = v_present
                    new_d = d.copy()

                    # node now inside supercell
                    new_d['to_jimage'] = (0, 0, 0)

                    edges_to_remove.append((u, v, k))

                    # make sure we don't try to add duplicate edges
                    # will remove two edges for everyone one we add
                    if {new_u, new_v} not in edges_inside_supercell:

                        # normalize direction
                        if new_v < new_u:
                            new_u, new_v = new_v, new_u

                        edges_inside_supercell.append({new_u, new_v})
                        edges_to_add.append((new_u, new_v, new_d))

                else:

                    # want to find new_v such that we have
                    # full periodic boundary conditions
                    # so that nodes on one side of supercell
                    # are connected to nodes on opposite side

                    v_expec_frac = new_structure.lattice.get_fractional_coords(
                        v_expec)

                    # find new to_jimage
                    # use np.around to fix issues with finite precision leading to incorrect image
                    v_expec_image = np.around(v_expec_frac, decimals=3)
                    v_expec_image = v_expec_image - v_expec_image % 1

                    v_expec_frac = np.subtract(v_expec_frac, v_expec_image)
                    v_expec = new_structure.lattice.get_cartesian_coords(
                        v_expec_frac)
                    v_present = kd_tree.query(v_expec)
                    v_present = v_present[1] if v_present[0] <= tol else None

                    if v_present is not None:

                        new_u = u
                        new_v = v_present
                        new_d = d.copy()
                        new_to_jimage = tuple(map(int, v_expec_image))

                        # normalize direction
                        if new_v < new_u:
                            new_u, new_v = new_v, new_u
                            new_to_jimage = tuple(
                                np.multiply(-1, d['to_jimage']).astype(int))

                        new_d['to_jimage'] = new_to_jimage

                        edges_to_remove.append((u, v, k))

                        if (new_u, new_v,
                                new_to_jimage) not in new_periodic_images:
                            edges_to_add.append((new_u, new_v, new_d))
                            new_periodic_images.append(
                                (new_u, new_v, new_to_jimage))

        logger.debug("Removing {} edges, adding {} new edges.".format(
            len(edges_to_remove), len(edges_to_add)))

        # add/delete marked edges
        for edges_to_remove in edges_to_remove:
            new_g.remove_edge(*edges_to_remove)
        for (u, v, d) in edges_to_add:
            new_g.add_edge(u, v, **d)

        # return new instance of StructureGraph with supercell
        d = {
            "@module": self.__class__.__module__,
            "@class": self.__class__.__name__,
            "structure": new_structure.as_dict(),
            "graphs": json_graph.adjacency_data(new_g)
        }

        sg = StructureGraph.from_dict(d)

        return sg
Пример #3
0
def directed_configuration_model(in_degree_sequence,
                                 out_degree_sequence,
                                 create_using=None, seed=None):
    """Return a directed_random graph with the given degree sequences.

    The configuration model generates a random directed pseudograph
    (graph with parallel edges and self loops) by randomly assigning
    edges to match the given degree sequences.

    Parameters
    ----------
    in_degree_sequence :  list of nonnegative integers
       Each list entry corresponds to the in-degree of a node.
    out_degree_sequence :  list of nonnegative integers
       Each list entry corresponds to the out-degree of a node.
    create_using : graph, optional (default MultiDiGraph)
       Return graph of this type. The instance will be cleared.
    seed : hashable object, optional
        Seed for random number generator.

    Returns
    -------
    G : MultiDiGraph
        A graph with the specified degree sequences.
        Nodes are labeled starting at 0 with an index
        corresponding to the position in deg_sequence.

    Raises
    ------
    NetworkXError
        If the degree sequences do not have the same sum.

    See Also
    --------
    configuration_model

    Notes
    -----
    Algorithm as described by Newman [1]_.

    A non-graphical degree sequence (not realizable by some simple
    graph) is allowed since this function returns graphs with self
    loops and parallel edges.  An exception is raised if the degree
    sequences does not have the same sum.

    This configuration model construction process can lead to
    duplicate edges and loops.  You can remove the self-loops and
    parallel edges (see below) which will likely result in a graph
    that doesn't have the exact degree sequence specified.  This
    "finite-size effect" decreases as the size of the graph increases.

    References
    ----------
    .. [1] Newman, M. E. J. and Strogatz, S. H. and Watts, D. J.
       Random graphs with arbitrary degree distributions and their applications
       Phys. Rev. E, 64, 026118 (2001)

    Examples
    --------
    One can modify the in- and out-degree sequences from an existing
    directed graph in order to create a new directed graph. For example,
    here we modify the directed path graph:

    >>> D = nx.DiGraph([(0, 1), (1, 2), (2, 3)])
    >>> din = list(d for n, d in D.in_degree())
    >>> dout = list(d for n, d in D.out_degree())
    >>> din.append(1)
    >>> dout[0] = 2
    >>> # We now expect an edge from node 0 to a new node, node 3.
    ... D = nx.directed_configuration_model(din, dout)

    The returned graph is a directed multigraph, which may have parallel
    edges. To remove any parallel edges from the returned graph:

    >>> D = nx.DiGraph(D)

    Similarly, to remove self-loops:

    >>> D.remove_edges_from(nx.selfloop_edges(D))

    """
    if sum(in_degree_sequence) != sum(out_degree_sequence):
        msg = 'Invalid degree sequences: sequences must have equal sums'
        raise nx.NetworkXError(msg)

    if create_using is None:
        create_using = nx.MultiDiGraph()

    G = _configuration_model(out_degree_sequence, create_using, directed=True,
                             in_deg_sequence=in_degree_sequence, seed=seed)

    name = "directed configuration_model {} nodes {} edges"
    return G
def main(argv):
    GList = []
    n = int(argv[0])
    outdir_pickle = 'DATA_motif_finding_template_list'
    if not os.path.exists(outdir_pickle):
        os.makedirs(outdir_pickle)
    if os.path.exists(outdir_pickle + "/{}nodes_template.pickle".format(n)):
        sys.exit(1)

    numPermu = len(list(itertools.permutations(range(n), 2)))
    numCombi = len(list(itertools.combinations(range(n), 2)))
    numLoops = n
    # numCasesTotal = numPermu + numCombi + n
    numCasesTotal = numPermu + numCombi

    # it should be different based on 'n'
    if n == 2:
        edgeList = [(0, 1), (1, 0), (0, 1)]
    elif n == 3:
        edgeList = [(0, 1), (1, 0), (1, 2), (2, 1), (0, 2), (2, 0), (0, 1),
                    (1, 2), (0, 2)]
    elif n == 4:
        edgeList = [(0, 1), (1, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 2),
                    (2, 1), (1, 3), (3, 1), (2, 3), (3, 2), (0, 1), (0, 2),
                    (0, 3), (1, 2), (1, 3), (2, 3)]
    else:
        sys.exit(1)

    for numSeledge in range(1, numCasesTotal + 1):
        totalCombiSeledge = itertools.combinations(range(numCasesTotal),
                                                   numSeledge)
        for eachCase in totalCombiSeledge:
            hereG = nx.MultiDiGraph()
            hereOnlyRTG = nx.Graph()
            for smallerEachCase in eachCase:
                if smallerEachCase < numPermu:
                    # default color is red
                    hereG.add_edge(*edgeList[smallerEachCase])
                elif numPermu <= smallerEachCase < numPermu + numCombi:
                    nodeIndices = edgeList[smallerEachCase]
                    swappedNodeIndices = (nodeIndices[1], nodeIndices[0])
                    hereG.add_edge(*nodeIndices, color='black')
                    hereG.add_edge(*swappedNodeIndices, color='black')
                    hereOnlyRTG.add_edge(*nodeIndices)
                else:
                    # hereG.add_edge(*edgeList[smallerEachCase])
                    pass
            if len(hereG.nodes()) != n:
                totalNodes = range(n)
                for nowNode in hereG.nodes():
                    totalNodes.remove(nowNode)
                hereG.add_nodes_from(totalNodes)
            if len(hereOnlyRTG.nodes()) != n:
                totalNodes = range(n)
                for nowNode in hereOnlyRTG.nodes():
                    totalNodes.remove(nowNode)
                hereOnlyRTG.add_nodes_from(totalNodes)
            if nx.is_connected(hereOnlyRTG):
                GList.append(hereG)

    duplicatedCase = set()
    em = iso.categorical_multiedge_match('color', 'red')
    for rawI in range(len(GList)):
        if rawI not in duplicatedCase:
            for rawJ in range(rawI + 1, len(GList)):
                if rawJ not in duplicatedCase:
                    if nx.is_isomorphic(GList[rawI],
                                        GList[rawJ],
                                        edge_match=em):
                        duplicatedCase.add(rawJ)
    notDuplicatedCase = set(range(len(GList)))
    notDuplicatedCase -= duplicatedCase
    notDuplicatedCase = list(notDuplicatedCase)
    notDuplicatedCase.sort()

    selectedGList = [
        i for rawI, i in enumerate(GList) if rawI in notDuplicatedCase
    ]

    pickle.dump(
        selectedGList,
        open(outdir_pickle + '/{}nodes_template.pickle'.format(n), 'w'))

    outdir = 'MotifCountingTemplates/{}nodes_template'.format(n)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    templates = pickle.load(
        open(outdir_pickle + '/{}nodes_template.pickle'.format(n), 'r'))
    for rawI, i in enumerate(templates):
        # extracted undirected graph
        extractedUndir = []
        for eachEdge in i.edges(data=True):
            if 'color' in eachEdge[2]:
                extractedUndir.append(eachEdge)
        extractedUndirGraph = nx.Graph()
        extractedUndirGraph.add_edges_from(extractedUndir)
        extractedDir = []
        for eachEdge in i.edges(data=True):
            if 'color' not in eachEdge[2]:
                extractedDir.append((eachEdge[0], eachEdge[1]))
        ###
        hereG = nx.MultiDiGraph()
        for j in extractedUndirGraph.edges():
            hereG.add_edge(j[0], j[1], color='black', arrowhead='none')
        for j in extractedDir:
            hereG.add_edge(*j, color='red')
        graphOutName = outdir + '/{}nodes_{}.pdf'.format(n, rawI)
        dotName = outdir + '/{}nodes_{}.dot'.format(n, rawI)
        nx.nx_agraph.write_dot(hereG, dotName)
        graph = pydot.graph_from_dot_file(dotName)
        if type(graph) == type([]):
            graph[0].write_pdf(graphOutName, prog='neato')
        else:
            graph.write_pdf(graphOutName, prog='neato')
Пример #5
0
def parse_pajek(lines):
    """Parse Pajek format graph from string or iterable.

    Parameters
    ----------
    lines : string or iterable
       Data in Pajek format.

    Returns
    -------
    G : NetworkX graph

    See Also
    --------
    read_pajek()

    """
    import shlex
    # multigraph=False
    if is_string_like(lines): lines = iter(lines.split('\n'))
    lines = iter([line.rstrip('\n') for line in lines])
    G = nx.MultiDiGraph()  # are multiedges allowed in Pajek? assume yes
    while lines:
        try:
            l = next(lines)
        except:  #EOF
            break
        if l.lower().startswith("*network"):
            try:
                label, name = l.split()
            except ValueError:
                # Line was not of the form:  *network NAME
                pass
            else:
                G.graph['name'] = name
        elif l.lower().startswith("*vertices"):
            nodelabels = {}
            l, nnodes = l.split()
            for i in range(int(nnodes)):
                splitline = shlex.split(str(next(lines)))
                id, label = splitline[0:2]
                G.add_node(label)
                nodelabels[id] = label
                G.node[label] = {'id': id}
                try:
                    x, y, shape = splitline[2:5]
                    G.node[label].update({
                        'x': float(x),
                        'y': float(y),
                        'shape': shape
                    })
                except:
                    pass
                extra_attr = zip(splitline[5::2], splitline[6::2])
                G.node[label].update(extra_attr)
        elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"):
            if l.lower().startswith("*edge"):
                # switch from multidigraph to multigraph
                G = nx.MultiGraph(G)
            if l.lower().startswith("*arcs"):
                # switch to directed with multiple arcs for each existing edge
                G = G.to_directed()
            for l in lines:
                splitline = shlex.split(str(l))
                if len(splitline) < 2:
                    continue
                ui, vi = splitline[0:2]
                u = nodelabels.get(ui, ui)
                v = nodelabels.get(vi, vi)
                # parse the data attached to this edge and put in a dictionary
                edge_data = {}
                try:
                    # there should always be a single value on the edge?
                    w = splitline[2:3]
                    edge_data.update({'weight': float(w[0])})
                except:
                    pass
                    # if there isn't, just assign a 1


#                    edge_data.update({'value':1})
                extra_attr = zip(splitline[3::2], splitline[4::2])
                edge_data.update(extra_attr)
                # if G.has_edge(u,v):
                #     multigraph=True
                G.add_edge(u, v, **edge_data)
    return G
Пример #6
0
 def __init__(
     self,
     arrival_rate=1,
     success_prob=lambda: 1,
     max_agents=1000,
     graph=True,
     plots=False,
     plot_time=0.5,
     selfMatch=False,
 ):
     """
     Generate new market object
     Arguments
     -----------
     arrival_rate: int or f()->int
         rate of arrival in the Market
         parameter passed in a statistical distribution when advancing time
     success_prob: int or f()->int
         probability of match success
         parameter passed in a match probability function
     max_agents: int
         maximum number of agents over all periods in Market
     graph: bool
         store network of potential matches in a NetworkX DiGraph object
         necessary for graph plotting
     plots: bool
         output network graph plots 3 times per update
     plot_time: float
         time per frame on plot
     graph: bool
         if True, market maintains a networkX DiGraph object
         nodes are agents, directed edges are compatibility
         edge weight is expected match utility
             so match utility * match success probability
     max
     """
     self.Agents = list()
     self.arrival_rate = arrival_rate
     self.acceptable_prob = success_prob
     self.max_agents = max_agents
     self.perished = list()
     self.matched = list()
     self.matched_dict = dict()
     self.time = 0
     self.welfare = 0
     self.total_agents = 0
     self.loss = 0
     self.has_graph = graph
     self.plots_on = plots
     self.selfMatch = selfMatch
     if self.has_graph:
         if self.selfMatch:
             self.Graph = nx.MultiDiGraph()
         else:
             self.Graph = nx.DiGraph()
     if self.plots_on:
         if CANT_PLOT:
             print("WARNING: Cant plot dur to qt5agg backend import error")
             self.plots_on = False
         plt.ion()  # Interactive plotting
         self.has_graph = True
         self.Graph = nx.DiGraph()
         self.graph_labels = dict()
         self.color_map = dict()
         self.graph_colors = list()
         self.plot_time = plot_time
         self.graph_pos = dict()
Пример #7
0
 def test_astar_multigraph(self):
      G=nx.MultiDiGraph(self.XG)
      assert_raises((TypeError,nx.NetworkXError),
                   nx.astar_path, [G,'s','v'])
      assert_raises((TypeError,nx.NetworkXError),
                   nx.astar_path_length, [G,'s','v'])
Пример #8
0
    def __init__(self, conf_file):
        """Initialize transaction network from parameter files.
        :param conf_file: JSON file as configurations
        """
        self.g = nx.MultiDiGraph()  # Transaction graph object
        self.num_accounts = 0  # Number of total accounts
        self.degrees = dict()  # Degree distribution
        self.hubs = list()  # Hub vertices
        self.subject_candidates = set()
        self.attr_names = list()  # Additional account attribute names

        with open(conf_file, "r") as rf:
            self.conf = json.load(rf)

        general_conf = self.conf["general"]

        # Set random seed
        seed = general_conf.get("random_seed")
        self.seed = seed if seed is None else int(seed)
        np.random.seed(self.seed)
        random.seed(self.seed)

        self.total_steps = parse_int(general_conf["total_steps"])

        # Set default amounts, steps and model ID
        default_conf = self.conf["default"]
        self.default_min_amount = parse_amount(default_conf.get("min_amount"))
        self.default_max_amount = parse_amount(default_conf.get("max_amount"))
        self.default_min_balance = parse_amount(
            default_conf.get("min_balance"))
        self.default_max_balance = parse_amount(
            default_conf.get("max_balance"))
        self.default_start_step = parse_int(default_conf.get("start_step"))
        self.default_end_step = parse_int(default_conf.get("end_step"))
        self.default_start_range = parse_int(default_conf.get("start_range"))
        self.default_end_range = parse_int(default_conf.get("end_range"))
        self.default_model = parse_int(default_conf.get("transaction_model"))

        # Get input file names and properties
        input_conf = self.conf["input"]
        self.input_dir = input_conf[
            "directory"]  # Directory name of input files
        self.account_file = input_conf["accounts"]  # Account list file
        self.alert_file = input_conf["alert_patterns"]
        self.degree_file = input_conf["degree"]
        self.type_file = input_conf["transaction_type"]
        self.is_aggregated = input_conf["is_aggregated_accounts"]

        # Get output file names
        output_conf = self.conf[
            "temporal"]  # The destination directory is temporal
        self.output_dir = output_conf["directory"]
        self.out_tx_file = output_conf["transactions"]
        self.out_account_file = output_conf["accounts"]
        self.out_alert_file = output_conf["alert_members"]

        # Other properties for the transaction graph generator
        other_conf = self.conf["graph_generator"]
        self.degree_threshold = parse_int(other_conf["degree_threshold"])
        highrisk_countries_str = other_conf.get("high_risk_countries", "")
        highrisk_business_str = other_conf.get("high_risk_business", "")
        self.highrisk_countries = set(highrisk_countries_str.split(","))
        self.highrisk_business = set(highrisk_business_str.split(","))

        self.tx_id = 0  # Transaction ID
        self.alert_id = 0  # Alert ID from the alert parameter file
        self.alert_groups = dict()  # Alert ID and alert transaction subgraph
        self.alert_types = {
            "fan_out": 1,
            "fan_in": 2,
            "cycle": 3,
            "bipartite": 4,
            "stack": 5,
            "dense": 6
        }  # Pattern name and model ID

        def get_types(type_csv):
            tx_types = list()
            with open(type_csv, "r") as _rf:
                reader = csv.reader(_rf)
                next(reader)
                for row in reader:
                    if row[0].startswith("#"):
                        continue
                    ttype = row[0]
                    tx_types.extend([ttype] * int(row[1]))
            return tx_types

        self.tx_types = get_types(os.path.join(self.input_dir, self.type_file))
Пример #9
0
    def add_alert_pattern(self,
                          is_fraud,
                          pattern_type,
                          accounts,
                          schedule_id=1,
                          individual_amount=None,
                          aggregated_amount=None,
                          transaction_freq=None,
                          amount_difference=None,
                          period=None,
                          amount_rounded=None,
                          orig_country=False,
                          bene_country=False,
                          orig_business=False,
                          bene_business=False):
        """Add an AML rule transaction set
        :param is_fraud: Whether the transaction set is fraud or alert
        :param pattern_type: Pattern type ("fan_in", "fan_out", "dense", "mixed" or "stack")
        :param accounts: Number of transaction members (accounts)
        :param schedule_id: AML pattern transaction schedule model ID
        :param individual_amount: Minimum individual amount
        :param aggregated_amount: Minimum aggregated amount
        :param transaction_freq: Minimum transaction frequency
        :param amount_difference: Proportion of maximum transaction difference
        :param period: Lookback period (days)
        :param amount_rounded: Proportion of rounded amounts
        :param orig_country: Whether the originator country is suspicious
        :param bene_country: Whether the beneficiary country is suspicious
        :param orig_business: Whether the originator business type is suspicious
        :param bene_business: Whether the beneficiary business type is suspicious
        :return:
        """
        subject, members = self.get_alert_members(accounts, is_fraud)

        # Prepare parameters
        if individual_amount is None:
            min_amount = self.default_min_amount
            max_amount = self.default_max_amount
        else:
            min_amount = individual_amount
            max_amount = individual_amount * 2

        if aggregated_amount is None:
            aggregated_amount = 0

        start_day = 0
        end_day = self.total_steps

        # Create subgraph structure with transaction attributes
        modelID = self.alert_types[pattern_type]  # alert model ID
        sub_g = nx.MultiDiGraph(
            modelID=modelID,
            reason=pattern_type,
            scheduleID=schedule_id,
            start=start_day,
            end=end_day)  # Transaction subgraph for an alert
        num_members = len(members)  # Number of accounts
        total_amount = 0
        transaction_count = 0

        if pattern_type == "fan_in":  # fan_in pattern (multiple accounts --> single (subject) account)
            src_list = [n for n in members if n != subject]
            dst = subject
            if transaction_freq is None:
                transaction_freq = num_members - 1
            for src in itertools.cycle(
                    src_list
            ):  # Generate transactions for the specified number
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount
                if transaction_count >= transaction_freq and total_amount >= aggregated_amount:
                    break

        elif pattern_type == "fan_out":  # fan_out pattern (single (subject) account --> multiple accounts)
            src = subject
            dst_list = [n for n in members if n != subject]
            if transaction_freq is None:
                transaction_freq = num_members - 1
            for dst in itertools.cycle(
                    dst_list
            ):  # Generate transactions for the specified number
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)

                transaction_count += 1
                total_amount += amount
                if transaction_count >= transaction_freq and total_amount >= aggregated_amount:
                    break

        elif pattern_type == "bipartite":  # bipartite (sender accounts --> all-to-all --> receiver accounts)
            src_list = members[:(
                num_members //
                2)]  # The former half members are sender accounts
            dst_list = members[(
                num_members //
                2):]  # The latter half members are receiver accounts
            if transaction_freq is None:  # Number of transactions
                transaction_freq = len(src_list) * len(dst_list)
            for src, dst in itertools.product(
                    src_list, dst_list):  # All-to-all transactions
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)

                transaction_count += 1
                total_amount += amount
                if transaction_count > transaction_freq and total_amount >= aggregated_amount:
                    break

        elif pattern_type == "mixed":  # fan_out -> bipartite -> fan_in
            src = members[0]  # Source account
            dst = members[num_members - 1]  # Destination account
            src_list = members[1:(num_members //
                                  2)]  # First intermediate accounts
            dst_list = members[(num_members // 2):num_members -
                               1]  # Second intermediate accounts

            if transaction_freq is None:
                transaction_freq = len(src_list) + len(
                    dst_list) + len(src_list) * len(dst_list)

            for _dst in src_list:  # Fan-out
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, _dst, amount=amount, date=date)
                self.g.add_edge(src, _dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount

            for _src, _dst in itertools.product(src_list,
                                                dst_list):  # Bipartite
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(_src, _dst, amount=amount, date=date)
                self.g.add_edge(_src, _dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount

            for _src in itertools.cycle(dst_list):  # Fan-in
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(_src, dst, amount=amount, date=date)
                self.g.add_edge(_src, dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount
                if transaction_count >= transaction_freq and total_amount >= aggregated_amount:
                    break

        elif pattern_type == "stack":  # two dense bipartite layers
            src_list = members[:num_members //
                               3]  # First 1/3 of members are source accounts
            mid_list = members[
                num_members // 3:num_members * 2 //
                3]  # Second 1/3 of members are intermediate accounts
            dst_list = members[
                num_members * 2 //
                3:]  # Last 1/3 of members are destination accounts
            if transaction_freq is None:  # Total number of transactions
                transaction_freq = len(src_list) * len(mid_list) + len(
                    mid_list) * len(dst_list)

            for src, dst in itertools.product(
                    src_list, mid_list):  # all-to-all transactions
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount
                if transaction_count > transaction_freq and total_amount >= aggregated_amount:
                    break
            for src, dst in itertools.product(
                    mid_list, dst_list):  # all-to-all transactions
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)
                transaction_count += 1
                total_amount += amount
                if transaction_count > transaction_freq and total_amount >= aggregated_amount:
                    break

        elif pattern_type == "dense":  # Dense alert accounts (all-to-all)
            dsts = [n for n in members if n != subject]
            for dst in dsts:
                amount = random.uniform(min_amount, max_amount)
                date = random.randrange(start_day, end_day)
                sub_g.add_edge(subject, dst, amount=amount, date=date)
                self.g.add_edge(subject, dst, amount=amount, date=date)
            for dst in dsts:
                nb1 = random.choice(dsts)
                if dst != nb1:
                    amount = random.uniform(min_amount, max_amount)
                    date = random.randrange(start_day, end_day)
                    sub_g.add_edge(dst, nb1, amount=amount, date=date)
                    self.g.add_edge(dst, nb1, amount=amount, date=date)
                nb2 = random.choice(dsts)
                if dst != nb2:
                    amount = random.uniform(min_amount, max_amount)
                    date = random.randrange(start_day, end_day)
                    sub_g.add_edge(nb2, dst, amount=amount, date=date)
                    self.g.add_edge(nb2, dst, amount=amount, date=date)

        elif pattern_type == "cycle":  # Cycle transactions
            subject_index = list(members).index(
                subject)  # Index of member list indicates the subject account
            num = len(members)  # Number of involved accounts
            amount = random.uniform(min_amount,
                                    max_amount)  # Transaction amount
            dates = sorted([
                random.randrange(start_day, end_day) for _ in range(num)
            ])  # Transaction date (in order)
            for i in range(num):
                src_i = (subject_index + i) % num
                dst_i = (src_i + 1) % num
                src = members[src_i]  # Source account ID
                dst = members[dst_i]  # Destination account ID
                date = dates[i]  # Transaction date (timestamp)

                sub_g.add_edge(src, dst, amount=amount, date=date)
                self.g.add_edge(src, dst, amount=amount, date=date)

        else:
            print("Warning: unknown pattern type: %s" % pattern_type)
            return

        # Add the generated transaction edges to whole transaction graph
        sub_g.graph["subject"] = subject if is_fraud else None
        self.alert_groups[self.alert_id] = sub_g

        # Add the fraud flag to the subject account vertex
        if is_fraud:
            self.g.node[subject]["isFraud"] = True
        # for n in sub_g.nodes():
        #     self.g.node[n]["isFraud"] = True
        self.alert_id += 1
 def setup(self):
     self.G = nx.path_graph(9, nx.MultiDiGraph())
     self.G.add_edge(1, 2, key=3, foo='bar')
     self.eview = nx.reportviews.InMultiEdgeView
class DAG(object):
    __preprocessedInput = [] #Original input as a sequence of integers
    __dic = {} #Dictionary for correspondence of integers to original chars (only when charSeq = 'c','s')
    __DAG = {} #Adjacency list of DAG
    __DAGGraph = nx.MultiDiGraph()
    __DAGStrings = {}#Strings corresponding to each node in DAG

    __concatenatedDAG = [] #Concatenated DAG nodes with seperatorInts
    __concatenatedNTs = [] #For each DAG node, alongside the concatenated DAG
    __separatorInts = set([]) #Used for seperating DAG nodes in the concatenatedDAG
    __separatorIntsIndices = set([]) #Indices of separatorInts in the concatenated DAG
    __nextNewInt = 0 #Used for storing ints of repeat symbols and separators in odd numbers

    __quietLog = False #if true, disables logging
    __iterations = 0

    def __init__(self, inputFile, loadDAGFlag, chFlag = SequenceType.Character, noNewLineFlag = True):
        if loadDAGFlag:
            self.__initFromDAG(inputFile)
        else:
            self.__initFromStrings(inputFile, chFlag, noNewLineFlag)
    #Initializes (an unoptimized) DAG from inputFile. charSeq tells if inputFile is a char sequence, int sequence or space-separated sequence
    def __initFromStrings(self, inputFile, chFlag = SequenceType.Character, noNewLineFlag = True):
        (self.__preprocessedInput, self.__dic) = self.__preprocessInput(inputFile, charSeq = chFlag, noNewLineFlag = noNewLineFlag)
        allLetters = set(map(int,self.__preprocessedInput.split()))
        #Setting odd and even values for __nextNewInt and __nextNewContextInt
        self.__nextNewInt = max(allLetters)+1
        if self.__nextNewInt % 2 == 0:
            self.__nextNewInt += 1
        #Initializing the concatenated DAG
        for line in self.__preprocessedInput.split('\n'):
            line = line.rstrip('\n')
            self.__concatenatedDAG.extend(map(int,line.split()))
            self.__concatenatedDAG.append(self.__nextNewInt)
            self.__concatenatedNTs.extend(0 for j in range(len(map(int,line.split()))))
            self.__concatenatedNTs.append(self.__nextNewInt)
            self.__separatorInts.add(self.__nextNewInt)
            self.__separatorIntsIndices.add(len(self.__concatenatedDAG)-1)
            self.__nextNewInt += 2
    #Loads the DAG from an external file (The file should start from 'N0' line, without cost logs)
    def __initFromDAG(self, inputFile):
        textFile = inputFile.read().splitlines()
        maxInt = -1
        for line in textFile:
            nt = int(line.split(' ->  ')[0][1:])
            self.__dic[nt] = nt
            rhs = line.split(' ->  ')[1].split()
            for w in rhs:
                # sys.stderr.write(w + "\n")
                try:
                    word = int(w)
                except:
                    word = int(w[1:])
                if maxInt < word:
                    maxInt = word
                self.__dic[word] = word
                self.__concatenatedDAG.append(word)
                self.__concatenatedNTs.append(nt)
            self.__concatenatedDAG.append(-1)
            self.__concatenatedNTs.append(-1)
            self.__separatorIntsIndices.add(len(self.__concatenatedDAG) - 1)
        self.__nextNewInt = maxInt + 1
        for i in self.__separatorIntsIndices:
            self.__concatenatedDAG[i] = self.__nextNewInt
            self.__concatenatedNTs[i] = self.__nextNewInt
            self.__separatorInts.add(self.__nextNewInt)
            self.__nextNewInt += 1
        # wordDict = {}
        # counterDict = {}
        # counter = 0
        # textFile = inputFile.read().splitlines()
        # tmpnode = []
        # for line in textFile:
        #     # if len(line.split(' ->  ')) < 2:
        #     #     tmpnode = ['\n'] + line.split(' ')
        #     #     newnode = []
        #     #     for w in tmpnode:
        #     #         if w not in counterDict:
        #     #             wordDict[counter] = w
        #     #             counterDict[w] = counter
        #     #             counter += 1
        #     #         newnode.append(counterDict[w])
        #     #     self.__DAG[newNt] += newnode
        #     #     continue
        #     # else:
        #     nt = int(line.split(' ->  ')[0][1:])
        #     if counter % 2 == 0:
        #         if counter != 0:
        #             counter += 1
        #     if nt not in counterDict:
        #         wordDict[counter] = nt
        #         counterDict[nt] = counter
        #         counter += 1
        #     newNt = counterDict[nt]
        #     node = line.split(' ->  ')[1].split(' ')
        #     newnode = []
        #     for w in node:
        #         if w[0] == 'N':
        #             if w not in counterDict:
        #                 wordDict[counter] = w[1:]
        #                 counterDict[w[1:]] = counter
        #                 counter += 1
        #             newnode.append(counterDict[w[1:]])
        #         else:
        #             if w not in counterDict:
        #                 wordDict[counter] = w
        #                 counterDict[w] = counter
        #                 counter += 1
        #             newnode.append(counterDict[w])
        #     if newNt == 0:
        #         if newNt in self.__DAG:
        #             self.__DAG[newNt].append(newnode)
        #         else:
        #             self.__DAG[newNt] = [newnode]
        #     else:
        #         self.__DAG[newNt] = newnode
        # self.__dic = wordDict
        # self.__nextNewInt = counter
        # if self.__nextNewInt % 2 == 0:
        #     self.__nextNewContextInt = self.__nextNewInt
        #     self.__nextNewInt += 1
        # else:
        #     self.__nextNewContextInt = self.__nextNewInt + 1
        # for nt in self.__DAG:
        #     self.__concatenatedDAG.extend(self.__DAG[nt])
        #     self.__concatenatedDAG.append(self.__nextNewInt)
        #     self.__concatenatedNTs.extend(nt for j in range(len(self.__DAG[nt])))
        #     self.__concatenatedNTs.append(self.__nextNewInt)
        #     self.__separatorInts.add(self.__nextNewInt)
        #     self.__separatorIntsIndices.add(len(self.__concatenatedDAG)-1)
        #     self.__nextNewInt += 2
        # print self.__DAG
        # print self.__dic
        self.__createAdjacencyList()
        # print 'self dag'
        # print self.__DAG
        self.__createDAGGraph()
        # print 'self graph'
        # print self.__DAGGraph
        # print self.__DAGGraph.nodes()
        # print self.__DAGGraph.edges()
        self.__nodeStringsGenerate()
        # print 'self strings'
        # print self.__DAGStrings

    #...........Main G-Lexis Algorithm Functions........
    def GLexis(self, quiet, normalRepeatType, costFunction):
        self.__quietLog = quiet
        while True: #Main loop
            #Logging DAG Cost
            self.__logViaFlag(LogFlag.ConcatenationCostLog)
            self.__logViaFlag(LogFlag.EdgeCostLog)

            #Extracting Maximum-Gain Repeat
            (maximumRepeatGainValue, selectedRepeatOccs) = self.__retreiveMaximumGainRepeat(normalRepeatType, CostFunction.EdgeCost)
            if maximumRepeatGainValue == -1:
                break #No repeats, hence terminate

            self.__logMessage('maxR ' + str(maximumRepeatGainValue) + ' : ' + str(self.__concatenatedDAG[selectedRepeatOccs[1][0]:selectedRepeatOccs[1][0]+selectedRepeatOccs[0]]) + '\n')
            if maximumRepeatGainValue > 0:
                odd = True
                self.__replaceRepeat(selectedRepeatOccs) #Replacing the chosen repeat
                self.__iterations += 1
        self.__logMessage('---------------')
        self.__logMessage('Number of Iterations: ' + str(self.__iterations))
        self.__createAdjacencyList()
        self.__createDAGGraph()
        self.__nodeStringsGenerate()
    #Returns the cost of the DAG according to the selected costFunction
    def DAGCost(self, costFunction):
        if costFunction == CostFunction.ConcatenationCost:
            return len(self.__concatenatedDAG)-2*len(self.__separatorInts)
        if costFunction == CostFunction.EdgeCost:
            return len(self.__concatenatedDAG)-len(self.__separatorInts)
    #Replaces a repeat's occurrences with a new symbol and creates a new node in the DAG
    def __replaceRepeat(self, input):
        (repeatLength, (repeatOccs)) = input



        repeat = self.__concatenatedDAG[repeatOccs[0]:repeatOccs[0]+repeatLength]
        newTmpConcatenatedDAG = []
        newTmpConcatenatedNTs = []
        prevIndex = 0
        for i in repeatOccs:
            newTmpConcatenatedDAG += self.__concatenatedDAG[prevIndex:i] + [self.__nextNewInt]
            newTmpConcatenatedNTs += self.__concatenatedNTs[prevIndex:i] + [self.__concatenatedNTs[i]]
            prevIndex = i+repeatLength
        self.__concatenatedDAG = newTmpConcatenatedDAG + self.__concatenatedDAG[prevIndex:]
        self.__concatenatedNTs = newTmpConcatenatedNTs + self.__concatenatedNTs[prevIndex:]
        self.__concatenatedDAG = self.__concatenatedDAG + repeat
        self.__concatenatedNTs = self.__concatenatedNTs + [self.__nextNewInt for j in range(repeatLength)]
        self.__logMessage('Added Node: ' +  str(self.__nextNewInt))
        self.__nextNewInt += 2
        self.__concatenatedDAG = self.__concatenatedDAG + [self.__nextNewInt]
        self.__concatenatedNTs = self.__concatenatedNTs + [self.__nextNewInt]
        self.__separatorInts.add(self.__nextNewInt)
        self.__separatorIntsIndices = set([])
        for i in range(len(self.__concatenatedDAG)):
            if self.__concatenatedDAG[i] in self.__separatorInts:
                self.__separatorIntsIndices.add(i)
        self.__nextNewInt += 2
    #Retrieves the maximum-gain repeat (randomizes within ties).
    #Output is a tuple: "(RepeatGain, (RepeatLength, (RepeatOccurrences)))"
    #1st entry of output is the maximum repeat gain value
    #2nd entry of output is a tuple of form: "(selectedRepeatLength, selectedRepeatOccsList)"
    def __retreiveMaximumGainRepeat(self, repeatClass, costFunction):
        repeats = self.__extractRepeats(repeatClass)
        maxRepeatGain = 0
        candidateRepeats = []
        for r in repeats: #Extracting maximum repeat
            repeatStats = r.split()
            repeatOccs = self.__extractNonoverlappingRepeatOccurrences(int(repeatStats[0]),map(int,repeatStats[2][1:-1].split(',')))
            if maxRepeatGain < self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction):
                maxRepeatGain = self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction)
                candidateRepeats = [(int(repeatStats[0]),len(repeatOccs),repeatOccs)]
            else:
                if maxRepeatGain > 0 and maxRepeatGain == self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction):
                    candidateRepeats.append((int(repeatStats[0]),len(repeatOccs),repeatOccs))
        if(len(candidateRepeats) == 0):
            return (-1, (0, []))
        #Randomizing between candidates with maximum gain
        #selectedRepeatStats = candidateRepeats[random.randrange(len(candidateRepeats))]
        selectedRepeatStats = candidateRepeats[0]
        selectedRepeatLength = selectedRepeatStats[0]
        selectedRepeatOccs = sorted(selectedRepeatStats[2])
        return (maxRepeatGain, (selectedRepeatLength, selectedRepeatOccs))
    #Returns the repeat gain, according to the chosen cost function
    def __repeatGain(self, repeatLength, repeatOccsLength, costFunction):
        # if costFunction == CostFunction.ConcatenationCost:
        return (repeatLength-1)*(repeatOccsLength-1)
        # if costFunction == CostFunction.EdgeCost:
        #     return (repeatLength-1)*(repeatOccsLength-1)-1
    #Extracts the designated class of repeats (Assumes ./repeats binary being in the same directory)
    #Output is a string, each line containing: "RepeatLength    NumberOfOccurrence  (CommaSeparatedOccurrenceIndices)"
    def __extractRepeats(self, repeatClass):
        process = subprocess.Popen(["./repeats1/repeats11", "-i", "-r"+repeatClass, "-n2", "-psol"],stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
        process.stdin.write(' '.join(map(str,self.__concatenatedDAG)))
        text_file = ''
        while process.poll() is None:
            output = process.communicate()[0].rstrip()
            text_file += output
        process.wait()
        repeats=[]
        firstLine = False
        for line in text_file.splitlines():
            if firstLine == False:
                firstLine = True
                continue
            repeats.append(line.rstrip('\n'))
        return repeats
    #Extracts the non-overlapping occurrences of a repeat from a list of occurrences (scans from left to right)
    def __extractNonoverlappingRepeatOccurrences(self, repeatLength, occurrencesList):
        nonoverlappingIndices = []
        for i in range(len(occurrencesList)):
            if len(nonoverlappingIndices) > 0:
                if (nonoverlappingIndices[-1] + repeatLength <= occurrencesList[i]):#Not already covered
                    nonoverlappingIndices += [occurrencesList[i]]
            else:
                nonoverlappingIndices += [occurrencesList[i]]
        return  nonoverlappingIndices
    #Creates the adjacency list
    def __createAdjacencyList(self):
        separatorPassed = False
        for i in range(len(self.__concatenatedDAG)):
            if i not in self.__separatorIntsIndices:
                node = self.__concatenatedNTs[i]
                if separatorPassed and node == 0:
                    self.__DAG[node].append([])
                    separatorPassed = False
                if node not in self.__DAG:
                    if node == 0:#Target node
                        self.__DAG[node] = [[self.__concatenatedDAG[i]]]
                    else:
                        self.__DAG[node] = [self.__concatenatedDAG[i]]
                else:
                    if node == 0:#Target node
                        self.__DAG[node][-1].append(self.__concatenatedDAG[i])
                    else:
                        self.__DAG[node].append(self.__concatenatedDAG[i])
            else:
                separatorPassed = True
    #Creates the DAG graph object (adjacency list should already be processed)
    def __createDAGGraph(self):
        for node in self.__DAG:
            self.__DAGGraph.add_node(node)
            if node == 0:
                for l in self.__DAG[node]:
                    for n in l:
                        self.__DAGGraph.add_node(n)
                        self.__DAGGraph.add_edge(n, node)
            else:
                for n in self.__DAG[node]:
                    self.__DAGGraph.add_node(n)
                    self.__DAGGraph.add_edge(n, node)
    #Stores the strings corresponding to each DAG node
    def __nodeStringsGenerate(self):
        for node in nx.nodes(self.__DAGGraph):
            if self.__DAGGraph.in_degree(node) == 0:
                # if self.__dic == {}:
                self.__DAGStrings[node] = str(node)
                # else:
                #     self.__DAGStrings[node] = str(self.__dic[node])
            else:
                if node == 0:
                    self.__DAGStrings[node] = []
                else:
                    self.__DAGStrings[node] = ''
        self. __nodeStringsHelper(0)
    # Helper recursive function
    def __nodeStringsHelper(self, n):
        if self.__DAGStrings[n] != [] and self.__DAGStrings[n] != '':
            return
        if n == 0:
            for l in self.__DAG[n]:
                self.__DAGStrings[n].append('')
                for i in range(len(l)):
                    subnode = l[i]
                    self.__nodeStringsHelper(subnode)
                    # if self.__dic == {}:
                    self.__DAGStrings[n][-1] += ' ' + self.__DAGStrings[subnode]
                    # else:
                    #     self.__DAGStrings[n][-1] += self.__DAGStrings[subnode] + ' '
        else:
            for i in range(len(self.__DAG[n])):
                subnode = self.__DAG[n][i]
                self.__nodeStringsHelper(subnode)
                # if self.__dic == {}:
                self.__DAGStrings[n] += ' ' + self.__DAGStrings[subnode]
                # else:
                #     self.__DAGStrings[n] += self.__DAGStrings[subnode] + ' '
    #Returns node's corresponding string
    def __getNodeString(self, n):
        if n == 0:
            result = []
            for l in self.__DAGStrings[n]:
                result.append(' '.join(l.split()))
            return result
        return ' '.join(self.__DAGStrings[n].split())

    # ...........Path-Centrality Functions........
    #Returns a list of strings, corresponding to the nodes removed from DAG, according to greedy core identification algorithm, based on the threshold of edge removal tau
    def greedyCoreID_ByTau(self, tau):
        numberOfUpwardPaths = {}
        numberOfDownwardPaths = {}
        sources = []
        targets = []
        for node in nx.nodes(self.__DAGGraph):
            if self.__DAGGraph.in_degree(node) == 0:
                sources.append(node)
            if self.__DAGGraph.out_degree(node) == 0:
                targets.append(node)
            numberOfUpwardPaths[node] = 0
            numberOfDownwardPaths[node] = 0
        self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths)
        self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths)
        for t in targets:
            numberOfUpwardPaths[t] = 0
        for s in sources:
            numberOfDownwardPaths[s] = 0
        number_of_initial_paths = numberOfDownwardPaths[0]
        number_of_current_paths = numberOfDownwardPaths[0]
        listOfCentralNodes = []
        centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths)
        topCentralNodeInfo = max(centralities, key=lambda x:x[1])
        allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]]
        while topCentralNodeInfo[1] > 0 and float(number_of_current_paths)/float(number_of_initial_paths) > 1-tau:#Node with positive centrality exists
            for nodeToBeRemoved in allMaxes:
                nodeToBeRemoved = nodeToBeRemoved[0]
                self.__DAGGraph.remove_node(nodeToBeRemoved)
                listOfCentralNodes.append(nodeToBeRemoved)
            numberOfUpwardPaths = {}
            numberOfDownwardPaths = {}
            for node in nx.nodes(self.__DAGGraph):
                numberOfUpwardPaths[node] = 0
                numberOfDownwardPaths[node] = 0
            self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths)
            self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths)
            for t in targets:
                numberOfUpwardPaths[t] = 0
            for s in sources:
                numberOfDownwardPaths[s] = 0
            centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths)
            topCentralNodeInfo = max(centralities, key=lambda x: x[1])
            allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]]
            number_of_current_paths = numberOfDownwardPaths[0]
        self.__DAGGraph = nx.MultiGraph()
        self.__createDAGGraph()#Reconstructing the DAG graph
        core = []
        for i in range(len(listOfCentralNodes)):
            core.append(self.__getNodeString(listOfCentralNodes[i]))
        return core
    # Returns a list of strings, corresponding to the nodes removed from DAG, according to greedy core identification algorithm, based on the cardinality of the extracted set
    def greedyCoreID_ByCardinality(self, k):
        numberOfUpwardPaths = {}
        numberOfDownwardPaths = {}
        sources = []
        targets = []
        for node in nx.nodes(self.__DAGGraph):
            if self.__DAGGraph.in_degree(node) == 0:
                sources.append(node)
            if self.__DAGGraph.out_degree(node) == 0:
                targets.append(node)
            numberOfUpwardPaths[node] = 0
            numberOfDownwardPaths[node] = 0
        self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths)
        self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths)
        for t in targets:
            numberOfUpwardPaths[t] = 0
        for s in sources:
            numberOfDownwardPaths[s] = 0
        number_of_initial_paths = numberOfDownwardPaths[0]
        number_of_current_paths = numberOfDownwardPaths[0]
        listOfCentralNodes = []
        centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths)
        topCentralNodeInfo = max(centralities, key=lambda x: x[1])
        allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]]
        while topCentralNodeInfo[1] > 0 and len(listOfCentralNodes) <= k:  # Node with positive centrality exists
            for nodeToBeRemoved in allMaxes:
                nodeToBeRemoved = nodeToBeRemoved[0]
                self.__DAGGraph.remove_node(nodeToBeRemoved)
                listOfCentralNodes.append(nodeToBeRemoved)
            numberOfUpwardPaths = {}
            numberOfDownwardPaths = {}
            for node in nx.nodes(self.__DAGGraph):
                numberOfUpwardPaths[node] = 0
                numberOfDownwardPaths[node] = 0
            self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths)
            self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths)
            for t in targets:
                numberOfUpwardPaths[t] = 0
            for s in sources:
                numberOfDownwardPaths[s] = 0
            centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths)
            topCentralNodeInfo = max(centralities, key=lambda x: x[1])
            allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]]
            number_of_current_paths = numberOfDownwardPaths[0]
        self.__DAGGraph = nx.MultiGraph()
        self.__createDAGGraph()  # Reconstructing the DAG graph
        core = []
        for i in range(len(listOfCentralNodes)):
            core.append(self.__getNodeString(listOfCentralNodes[i]))
        return core
    #Calculates the centralities for all nodes
    def __calculateCentralities(self, numberOfUpwardPaths, numberOfDownwardPaths):
        result = []
        for node in nx.nodes(self.__DAGGraph):
            result.append((node, numberOfUpwardPaths[node] * numberOfDownwardPaths[node]))
        return result
    #Calculates the number of Upward paths for all nodes
    def __calculateNumberOfUpwardPaths(self, sources, targets, numberOfUpwardPaths):
        for n in sources:
            self.__dfsUpward(n, sources, targets, numberOfUpwardPaths)
    # Helper recursive function
    def __dfsUpward(self, n, sources, targets, numberOfUpwardPaths):
        if self.__DAGGraph.out_degree(n) == 0:
            numberOfUpwardPaths[n] = 1
            return
        elif numberOfUpwardPaths[n] > 0:
            return
        else:
            for o in self.__DAGGraph.out_edges(n):
                self.__dfsUpward(o[1], sources, targets, numberOfUpwardPaths)
                numberOfUpwardPaths[n] += numberOfUpwardPaths[o[1]]
    # Calculates the number of Downward paths for all nodes
    def __calculateNumberOfDownwardPaths(self, sources, targets, numberOfDownwardPaths):
        for n in targets:
            self.__dfsDownward(n, sources, targets, numberOfDownwardPaths)
    # Helper recursive function
    def __dfsDownward(self, n, sources, targets, numberOfDownwardPaths):
        if self.__DAGGraph.in_degree(n) == 0:
            numberOfDownwardPaths[n] = 1
            return
        elif numberOfDownwardPaths[n] > 0:
            return
        else:
            for o in self.__DAGGraph.in_edges(n):
                self.__dfsDownward(o[0], sources, targets, numberOfDownwardPaths)
                numberOfDownwardPaths[n] += numberOfDownwardPaths[o[0]]

    # ...........Printing Functions........
    # Prints the DAG, optionally in integer form if intDAGPrint==True
    def printDAG(self, intDAGPrint):
        self.__logMessage('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost)))
        self.__logMessage('DAGCost(Edges):' + str(self.DAGCost(CostFunction.EdgeCost)))
        DAG = self.__concatenatedDAG
        # print 'dag'
        # print DAG
        NTs = self.__concatenatedNTs
        # print 'nts'
        # print NTs
        separatorInts = self.__separatorInts
        Dic = self.__dic
        nodes = {}
        ntDic = {}
        counter = 1
        NTsSorted = set([])
        for i in range(len(NTs)):
            if NTs[i] not in ntDic and NTs[i] not in separatorInts:
                NTsSorted.add(NTs[i])
                # ntDic[NTs[i]] = 'N'+str(counter)
                # nodes['N'+str(counter)] = ''
                ntDic[NTs[i]] = 'N' + str(NTs[i])
                nodes['N' + str(NTs[i])] = ''
                counter += 1
        for i in range(len(DAG)):
            if DAG[i] not in NTsSorted:
                if DAG[i] not in separatorInts:
                    if not intDAGPrint:
                        try:
                            nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(Dic[DAG[i]])
                        except:
                            print (DAG[i], NTs[i])
                            raise
                    else:
                        nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(DAG[i])
                else:
                    nodes[ntDic[NTs[i - 1]]] = str(nodes[ntDic[NTs[i - 1]]]) + ' ||'
            else:
                if not intDAGPrint:
                    try:
                        nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(ntDic[DAG[i]])
                    except:
                        print (DAG[i], NTs[i])
                        raise
                else:
                    nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(ntDic[DAG[i]])
        NTsSorted = sorted(list(NTsSorted))
        nodeCounter = 0
        for nt in NTsSorted:
            if intDAGPrint:
                subnodes = nodes[ntDic[nt]].rstrip(' ||').split(' ||')
                for s in subnodes:
                    print (ntDic[nt] + ' ->' + s)
            else:
                subnodes = nodes[ntDic[nt]].rstrip(' ||').split(' ||')
                for s in subnodes:
                    print(ntDic[nt] + ' -> ' + s)
            nodeCounter += 1
    # Log via flags
    def __logViaFlag(self, flag):
        if not self.__quietLog:
            if flag == LogFlag.ConcatenationCostLog:
                sys.stderr.write('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost)) + '\n')
                print(str('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost))))
            if flag == LogFlag.EdgeCostLog:
                sys.stderr.write('DAGCost(Edges): ' + str(self.DAGCost(CostFunction.EdgeCost)) + '\n')
                print(str('DAGCost(Edges): ' + str(self.DAGCost(CostFunction.EdgeCost))))
    # Log custom message
    def __logMessage(self, message):
        if not self.__quietLog:
            sys.stderr.write(message + '\n')
            print(str(message))

    # ...........Utility Functions........
    # Converts the input data into an integer sequence, returns the integer sequence and the dictionary for recovering orginal letters
    def __preprocessInput(self, inputFile, charSeq=SequenceType.Character, noNewLineFlag=True):
        if charSeq == SequenceType.Character:  # Building an integer-spaced sequence from the input string
            letterDict = {}
            counterDict = {}
            i = 0
            counter = 1
            newContents = ''
            if noNewLineFlag:
                line = inputFile.read()
                for i in range(len(line)):
                    if line[i] not in counterDict:
                        letterDict[counter] = line[i]
                        counterDict[line[i]] = counter
                        counter += 1
                    newContents += str(counterDict[line[i]]) + ' '
            else:
                for line in inputFile:
                    line = line.rstrip('\n')
                    for i in range(len(line)):
                        if line[i] not in counterDict:
                            letterDict[counter] = line[i]
                            counterDict[line[i]] = counter
                            counter += 1
                        newContents += str(counterDict[line[i]]) + ' '
                    newContents += '\n'
            return (newContents.rstrip('\n'), letterDict)
        if charSeq == SequenceType.Integer:  # input is space seperated integers
            newContents = ''
            dict = {}
            for l in inputFile.read().splitlines():
                line = l.split()
                for i in range(len(line)):
                    if not isinstance(int(line[i]), int) or line[i] == ' ':
                        raise ValueError('Input file is not in space-separated integer form.')
                    else:
                        dict[int(line[i])] = line[i]
                newContents += l + '\n'
            return (newContents.rstrip('\n'), dict)
        if charSeq == SequenceType.SpaceSeparated:  # input is space-seperated words
            wordDict = {}
            counterDict = {}
            i = 0
            counter = 1
            newContents = ''
            for line in inputFile:
                line = line.rstrip('\n')
                for w in line.split():
                    if w not in counterDict:
                        wordDict[counter] = w
                        counterDict[w] = counter
                        counter += 1
                    newContents += str(counterDict[w]) + ' '
                newContents += '\n'
            return (newContents.rstrip('\n'), wordDict)
 def setUp(self):
     self.G = nx.path_graph(9, create_using=nx.MultiDiGraph())
     self.eview = nx.reportviews.InMultiEdgeView
Пример #13
0
    def setUp(self):
        super(TopologyLayer2TestCase, self).setUp()

        self.model_id = 1
        self.nav_graph = nx.MultiDiGraph()

        self.a = a = self._netbox_factory('a')
        self.b = b = self._netbox_factory('b')
        self.c = c = self._netbox_factory('c')
        self.d = d = self._netbox_factory('d')

        self.a1 = a1 = self._interface_factory('a1', a)
        self.a2 = a2 = self._interface_factory('a2', a)
        self.a3 = a3 = self._interface_factory('a3', a)
        self.b1 = b1 = self._interface_factory('b1', b)
        self.b2 = b2 = self._interface_factory('b2', b)
        self.c3 = c3 = self._interface_factory('c3', c)
        self.c4 = c4 = self._interface_factory('c4', c)
        self.d4 = d4 = self._interface_factory('d4', d)

        self._add_edge(self.nav_graph, a1.netbox, a1, b1.netbox, b1)
        self._add_edge(self.nav_graph, b1.netbox, b1, a1.netbox, a1)
        self._add_edge(self.nav_graph, a2.netbox, a2, b2.netbox, b2)
        self._add_edge(self.nav_graph, b2.netbox, b2, a2.netbox, a2)
        self._add_edge(self.nav_graph, a3.netbox, a3, c3.netbox, c3)
        self._add_edge(self.nav_graph, d4.netbox, d4, c4.netbox, c4)

        self.vlan__a1_b1 = a_vlan_between_a1_and_b1 = SwPortVlan(
            id=self._next_id(), interface=self.a1, vlan=Vlan(id=201, vlan=2)
        )

        self.vlans = patch.object(
            topology,
            '_get_vlans_map_layer2',
            return_value=(
                {
                    self.a1: [a_vlan_between_a1_and_b1],
                    self.b1: [a_vlan_between_a1_and_b1],
                    self.a2: [],
                    self.b2: [],
                    self.a3: [],
                    self.c3: [],
                },
                {
                    self.a: {201: a_vlan_between_a1_and_b1},
                    self.b: {201: a_vlan_between_a1_and_b1},
                    self.c: {},
                },
            ),
        )
        self.vlans.start()

        self.build_l2 = patch.object(
            vlan, 'build_layer2_graph', return_value=self.nav_graph
        )
        self.build_l2.start()

        bar = vlan.build_layer2_graph()
        # foo = topology._get_vlans_map_layer2(bar)

        vlan_by_interfaces, vlan_by_netbox = topology._get_vlans_map_layer2(
            self.nav_graph
        )

        self.netmap_graph = topology.build_netmap_layer2_graph(
            vlan.build_layer2_graph(), vlan_by_interfaces, vlan_by_netbox, None
        )
Пример #14
0
def parse_gml_lines(lines, label, destringizer):
    """Parse GML `lines` into a graph.
    """
    def tokenize():
        patterns = [
            r'[A-Za-z][0-9A-Za-z_]*\b',  # keys
            r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?',  # reals
            r'[+-]?[0-9]+',  # ints
            r'".*?"',  # strings
            r'\[',  # dict start
            r'\]',  # dict end
            r'#.*$|\s+'  # comments and whitespaces
        ]
        tokens = re.compile('|'.join('(' + pattern + ')'
                                     for pattern in patterns))
        lineno = 0
        for line in lines:
            length = len(line)
            pos = 0
            while pos < length:
                match = tokens.match(line, pos)
                if match is not None:
                    for i in range(len(patterns)):
                        group = match.group(i + 1)
                        if group is not None:
                            if i == 0:  # keys
                                value = group.rstrip()
                            elif i == 1:  # reals
                                value = float(group)
                            elif i == 2:  # ints
                                value = int(group)
                            else:
                                value = group
                            if i != 6:  # comments and whitespaces
                                yield (i, value, lineno + 1, pos + 1)
                            pos += len(group)
                            break
                else:
                    raise NetworkXError('cannot tokenize %r at (%d, %d)' %
                                        (line[pos:], lineno + 1, pos + 1))
            lineno += 1
        yield (None, None, lineno + 1, 1)  # EOF

    def unexpected(curr_token, expected):
        category, value, lineno, pos = curr_token
        raise NetworkXError(
            'expected %s, found %s at (%d, %d)' %
            (expected, repr(value) if value is not None else 'EOF', lineno,
             pos))

    def consume(curr_token, category, expected):
        if curr_token[0] == category:
            return next(tokens)
        unexpected(curr_token, expected)

    def parse_kv(curr_token):
        dct = defaultdict(list)
        while curr_token[0] == 0:  # keys
            key = curr_token[1]
            curr_token = next(tokens)
            category = curr_token[0]
            if category == 1 or category == 2:  # reals or ints
                value = curr_token[1]
                curr_token = next(tokens)
            elif category == 3:  # strings
                value = unescape(curr_token[1][1:-1])
                if destringizer:
                    try:
                        value = destringizer(value)
                    except ValueError:
                        pass
                curr_token = next(tokens)
            elif category == 4:  # dict start
                curr_token, value = parse_dict(curr_token)
            else:
                unexpected(curr_token, "an int, float, string or '['")
            dct[key].append(value)
        dct = {
            key: (value if not isinstance(value, list) or len(value) != 1 else
                  value[0])
            for key, value in dct.items()
        }
        return curr_token, dct

    def parse_dict(curr_token):
        curr_token = consume(curr_token, 4, "'['")  # dict start
        curr_token, dct = parse_kv(curr_token)
        curr_token = consume(curr_token, 5, "']'")  # dict end
        return curr_token, dct

    def parse_graph():
        curr_token, dct = parse_kv(next(tokens))
        if curr_token[0] is not None:  # EOF
            unexpected(curr_token, 'EOF')
        if 'graph' not in dct:
            raise NetworkXError('input contains no graph')
        graph = dct['graph']
        if isinstance(graph, list):
            raise NetworkXError('input contains more than one graph')
        return graph

    tokens = tokenize()
    graph = parse_graph()

    directed = graph.pop('directed', False)
    multigraph = graph.pop('multigraph', False)
    if not multigraph:
        G = nx.DiGraph() if directed else nx.Graph()
    else:
        G = nx.MultiDiGraph() if directed else nx.MultiGraph()
    G.graph.update((key, value) for key, value in graph.items()
                   if key != 'node' and key != 'edge')

    def pop_attr(dct, category, attr, i):
        try:
            return dct.pop(attr)
        except KeyError:
            raise NetworkXError("%s #%d has no '%s' attribute" %
                                (category, i, attr))

    nodes = graph.get('node', [])
    mapping = {}
    node_labels = set()
    for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]):
        id = pop_attr(node, 'node', 'id', i)
        if id in G:
            raise NetworkXError('node id %r is duplicated' % (id, ))
        if label is not None and label != 'id':
            node_label = pop_attr(node, 'node', label, i)
            if node_label in node_labels:
                raise NetworkXError('node label %r is duplicated' %
                                    (node_label, ))
            node_labels.add(node_label)
            mapping[id] = node_label
        G.add_node(id, **node)

    edges = graph.get('edge', [])
    for i, edge in enumerate(edges if isinstance(edges, list) else [edges]):
        source = pop_attr(edge, 'edge', 'source', i)
        target = pop_attr(edge, 'edge', 'target', i)
        if source not in G:
            raise NetworkXError('edge #%d has an undefined source %r' %
                                (i, source))
        if target not in G:
            raise NetworkXError('edge #%d has an undefined target %r' %
                                (i, target))
        if not multigraph:
            if not G.has_edge(source, target):
                G.add_edge(source, target, **edge)
            else:
                raise nx.NetworkXError(
                    """edge #%d (%r%s%r) is duplicated

Hint:  If this is a multigraph, add "multigraph 1" to the header of the file."""
                    % (i, source, '->' if directed else '--', target))
        else:
            key = edge.pop('key', None)
            if key is not None and G.has_edge(source, target, key):
                raise nx.NetworkXError(
                    'edge #%d (%r%s%r, %r) is duplicated' %
                    (i, source, '->' if directed else '--', target, key))
            G.add_edge(source, target, key, **edge)

    if label is not None and label != 'id':
        G = nx.relabel_nodes(G, mapping)
    return G
Пример #15
0
    EdgeList_Original = []
    conf_file = open('Configuration.txt', "r")
    lineList = conf_file.readlines()
    conf_file.close()

    #Find input folder name
    input_folder = lineList[-1] + "/input_files/"
    allFiles = find_all_filenames(input_folder)
    data_file = input_folder + allFiles[0]  #Read first input file

    #with open('/work/fz56/LANS-6.0/input_files/8.binetflow') as csvfile:
    with open(data_file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            EdgeList_Original.append((row["SrcAddr"], row["DstAddr"]))
    GT = nx.MultiDiGraph()
    GT.add_edges_from(EdgeList_Original)
    #GT = nx.read_graphml("CTU13_4_Original.graphml")
    originalPropertyGT = Property(GT)
    original_in_degree = originalPropertyGT.getInDegree()
    Original_Node_In_Degree = open('Original_Node_In_Degree.txt', "w")
    for e in original_in_degree:
        Original_Node_In_Degree.write(str(e) + "\n")
    Original_Node_In_Degree.close()

    original_out_degree = originalPropertyGT.getOutDegree()
    Original_Node_Out_Degree = open('Original_Node_Out_Degree.txt', "w")
    for e in original_out_degree:
        Original_Node_Out_Degree.write(str(e) + "\n")
    Original_Node_Out_Degree.close()
Пример #16
0
def blockmodel(G, partitions, multigraph=False):
    """Returns a reduced graph constructed using the generalized block modeling
    technique.

    The blockmodel technique collapses nodes into blocks based on a
    given partitioning of the node set.  Each partition of nodes
    (block) is represented as a single node in the reduced graph.

    Edges between nodes in the block graph are added according to the
    edges in the original graph.  If the parameter multigraph is False
    (the default) a single edge is added with a weight equal to the
    sum of the edge weights between nodes in the original graph
    The default is a weight of 1 if weights are not specified.  If the
    parameter multigraph is True then multiple edges are added each
    with the edge data from the original graph.

    Parameters
    ----------
    G : graph
        A networkx Graph or DiGraph
    partitions : list of lists, or list of sets 
        The partition of the nodes.  Must be non-overlapping.
    multigraph : bool, optional
        If True return a MultiGraph with the edge data of the original
        graph applied to each corresponding edge in the new graph.
        If False return a Graph with the sum of the edge weights, or a
        count of the edges if the original graph is unweighted.

    Returns
    -------
    blockmodel : a Networkx graph object
    
    Examples
    --------
    >>> G=nx.path_graph(6)
    >>> partition=[[0,1],[2,3],[4,5]]
    >>> M=nx.blockmodel(G,partition)

    References
    ----------
    .. [1] Patrick Doreian, Vladimir Batagelj, and Anuska Ferligoj
    	"Generalized Blockmodeling",Cambridge University Press, 2004.
    """
    # Create sets of node partitions
    part = list(map(set, partitions))

    # Check for overlapping node partitions
    u = set()
    for p1, p2 in zip(part[:-1], part[1:]):
        u.update(p1)
        #if not u.isdisjoint(p2):  # Python 2.6 required
        if len(u.intersection(p2)) > 0:
            raise nx.NetworkXException("Overlapping node partitions.")

    # Initialize blockmodel graph
    if multigraph:
        if G.is_directed():
            M = nx.MultiDiGraph()
        else:
            M = nx.MultiGraph()
    else:
        if G.is_directed():
            M = nx.DiGraph()
        else:
            M = nx.Graph()

    # Add nodes and properties to blockmodel
    # The blockmodel nodes are node-induced subgraphs of G
    # Label them with integers starting at 0
    for i, p in enumerate(part):
        M.add_node(i)
        # The node-induced subgraph is stored as the node 'graph' attribute
        SG = G.subgraph(p)
        M.nodes[i]['graph'] = SG
        M.nodes[i]['nnodes'] = SG.number_of_nodes()
        M.nodes[i]['nedges'] = SG.number_of_edges()
        M.nodes[i]['density'] = nx.density(SG)

    # Create mapping between original node labels and new blockmodel node labels
    block_mapping = {}
    for n in M:
        nodes_in_block = M.nodes[n]['graph'].nodes()
        block_mapping.update(dict.fromkeys(nodes_in_block, n))

    # Add edges to block graph
    for u, v, d in G.edges(data=True):
        bmu = block_mapping[u]
        bmv = block_mapping[v]
        if bmu == bmv:  # no self loops
            continue
        if multigraph:
            # For multigraphs add an edge for each edge in original graph
            M.add_edge(bmu, bmv, attr_dict=d)
        else:
            # For graphs and digraphs add single weighted edge
            weight = d.get('weight',
                           1.0)  # default to 1 if no weight specified
            if M.has_edge(bmu, bmv):
                M[bmu][bmv]['weight'] += weight
            else:
                M.add_edge(bmu, bmv, weight=weight)
    return M
Пример #17
0
def main(
    original_in_degree, original_out_degree, parent_dir
    # original_average_neighbor_degree, \
    # original_pageRank, \
    # original_triangle, \
    # original_local_clustering_coefficient,\
    # original_core_number
):
    #############################Random Node###############################################################################
    #GraphGT = nx.read_graphml("Simulation.graphml")
    EdgeList_Simulation = []

    simulated_file = parent_dir + "/SimulatedGraph/localgen_0.csv"
    #with open('/work/fz56/LANS-6.0/SimulatedGraph/localgen_0.csv') as csvfile:
    with open(simulated_file) as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            EdgeList_Simulation.append((row["source"], row["destination"]))
    GraphGT = nx.MultiDiGraph()
    GraphGT.add_edges_from(EdgeList_Simulation)

    PropertyGT = Property(GraphGT)
    propertyDistance = [0.0] * 7

    in_degree = PropertyGT.getInDegree()
    New_Node_In_Degree = open('New_Node_In_Degree.txt', "w")
    for e in in_degree:
        New_Node_In_Degree.write(str(e) + "\n")
    New_Node_In_Degree.close()

    out_degree = PropertyGT.getOutDegree()
    New_Node_Out_Degree = open('New_Node_Out_Degree.txt', "w")
    for e in out_degree:
        New_Node_Out_Degree.write(str(e) + "\n")
    New_Node_Out_Degree.close()

    # average_neighbor_degree = PropertyGT.getAverageNeighborDegree()
    # New_Average_Neighbor_Degree = open('New_Average_Neighbor_Degree.txt', "w")
    # for e in average_neighbor_degree:
    #     New_Average_Neighbor_Degree.write(str(e) + "\n")
    # New_Average_Neighbor_Degree.close()
    #
    # pageRank = PropertyGT.getPageRank()
    # New_pageRank = open('New_pageRank.txt', "w")
    # for e in pageRank:
    #     New_pageRank.write(str(e) + "\n")
    # New_pageRank.close()
    #
    # triangle = PropertyGT.getTriangles()
    # New_triangle = open('New_triangle.txt', "w")
    # for e in triangle:
    #     New_triangle.write(str(e) + "\n")
    # New_triangle.close()
    #
    # local_clustering_coefficient = PropertyGT.getLocalClusteringCoefficient()
    # New_local_clustering_coefficient = open('New_local_clustering_coefficient.txt', "w")
    # for e in local_clustering_coefficient:
    #     New_local_clustering_coefficient.write(str(e) + "\n")
    # New_local_clustering_coefficient.close()
    #
    # core_number = PropertyGT.getCoreNumber()
    # New_core_number = open('New_core_number.txt', "w")
    # for e in core_number:
    #     New_core_number.write(str(e) + "\n")
    # New_core_number.close()

    propertyDistance[0] = originalPropertyGT.averageKL(original_in_degree,
                                                       in_degree)
    propertyDistance[1] = originalPropertyGT.averageKL(original_out_degree,
                                                       out_degree)
    # propertyDistance[2] = originalPropertyGT.averageKL(original_average_neighbor_degree, average_neighbor_degree)
    # propertyDistance[3] = originalPropertyGT.averageKL(original_pageRank, pageRank)
    # propertyDistance[4] = originalPropertyGT.averageKL(original_triangle, triangle)
    # propertyDistance[5] = originalPropertyGT.averageKL(original_local_clustering_coefficient, local_clustering_coefficient)
    # propertyDistance[6] = originalPropertyGT.averageKL(original_core_number, core_number)

    print('\n')
    for i in range(len(propertyDistance)):
        print(propertyDistance[i], end="\t")
    print('\n')
def degreeMessageNumberCSV(log_directory, channel_name, output_directory,
                           startingDate, startingMonth, endingDate,
                           endingMonth):
    """ creates two csv files having no. of nodes with a certain in and out-degree for number of messages respectively

    Args:
        log_directory (str): Location of the logs (Assumed to be arranged in directory structure as : <year>/<month>/<day>/<log-file-for-channel>.txt)
        channel_name (str): Channel to be perform analysis on
        output_directory (str): Location of output directory
        startingDate (int): Date to start the analysis (in conjunction with startingMonth)
        startingMonth (int): Date to start the analysis (in conjunction with startingDate)
        endingDate (int): Date to end the analysis (in conjunction with endingMonth)
        endingMonth (int): Date to end the analysis (in conjunction with endingDate)

    Returns:
       null 

    """

    nodes_with_OUT_degree_per_day = []
    nodes_with_IN_degree_per_day = []
    nodes_with_TOTAL_degree_per_day = []

    max_degree_possible = 1000

    # output_dir_degree = output_directory+"degreeMessageNumberCSV/"
    output_dir_degree = output_directory
    output_file_out_degree = output_dir_degree + "msg_no_out_degree.csv"
    output_file_in_degree = output_dir_degree + "msg_no_in_degree.csv"
    output_file_total_degree = output_dir_degree + "msg_no_total_degree.csv"

    # print "Creating a new output folder"
    # os.system("rm -rf "+output_dir_degree)
    # os.system("mkdir "+output_dir_degree)

    if not os.path.exists(os.path.dirname(output_dir_degree)):
        try:
            os.makedirs(os.path.dirname(output_dir_degree))
            os.system("rm " + output_file_out_degree)
            os.system("touch " + output_file_out_degree)
            os.system("rm " + output_file_in_degree)
            os.system("touch " + output_file_in_degree)
            os.system("rm " + output_file_total_degree)
            os.system("touch " + output_file_total_degree)
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise

    rem_time = None  #remembers the time of the last message of the file parsed before the current file

    for folderiterator in range(startingMonth, endingMonth + 1):
        temp1 = "0" if folderiterator < 10 else ""
        for fileiterator in range(
                startingDate if folderiterator == startingMonth else 1,
                endingDate + 1 if folderiterator == endingMonth else 32):
            temp2 = "0" if fileiterator < 10 else ""
            filePath = log_directory + temp1 + str(
                folderiterator) + "/" + temp2 + str(
                    fileiterator) + "/" + channel_name + ".txt"
            if not os.path.exists(filePath):
                if not ((folderiterator == 2 and
                         (fileiterator == 29 or fileiterator == 30
                          or fileiterator == 31)) or
                        ((folderiterator == 4 or folderiterator == 6
                          or folderiterator == 9 or folderiterator == 11)
                         and fileiterator == 31)):
                    print "[Error] Path " + filePath + " doesn't exist"
                continue
            with open(filePath) as f:
                content = f.readlines(
                )  #contents stores all the lines of the file channel_name

            nicks = []  #list of all the nicknames
            '''
				Getting all the nicknames in a list nicks[]
			'''
            for i in content:
                if (i[0] != '=' and "] <" in i and "> " in i):
                    m = re.search(r"\<(.*?)\>", i)
                    if m.group(0) not in nicks:
                        nicks.append(
                            m.group(0)
                        )  #used regex to get the string between <> and appended it to the nicks list

            for i in xrange(0, len(nicks)):
                nicks[i] = nicks[i][1:-1]  #removed <> from the nicknames

            for i in xrange(0, len(nicks)):
                nicks[i] = ext.util.correctLastCharCR(nicks[i])

            for line in content:
                if (
                        line[0] == '=' and "changed the topic of" not in line
                ):  #excluding the condition when user changes the topic. Search for only nick changes
                    nick1 = ext.util.correctLastCharCR(
                        line[line.find("=") + 1:line.find(" is")][3:])
                    nick2 = ext.util.correctLastCharCR(
                        line[line.find("wn as") + 1:line.find("\n")][5:])
                    if nick1 not in nicks:
                        nicks.append(nick1)
                    if nick2 not in nicks:
                        nicks.append(nick2)

            #print("printing nicks***********************************")
            #print(nicks)
            '''
				Forming list of lists for avoiding nickname duplicacy
			'''
            nick_same_list = [
                [] for i in range(len(nicks))
            ]  #list of list with each list having all the nicks for that particular person

            for line in content:
                if (line[0] == '=' and "changed the topic of" not in line):
                    line1 = line[line.find("=") + 1:line.find(" is")][3:]
                    line2 = line[line.find("wn as") + 1:line.find("\n")][5:]
                    line1 = ext.util.correctLastCharCR(line1)
                    line2 = ext.util.correctLastCharCR(line2)
                    for i in range(5000):
                        if line1 in nick_same_list[
                                i] or line2 in nick_same_list[i]:
                            nick_same_list[i].append(line1)
                            nick_same_list[i].append(line2)
                            break
                        if not nick_same_list[i]:
                            nick_same_list[i].append(line1)
                            nick_same_list[i].append(line2)
                            break

            #print("printing nick_same_list****************************")
            #print(nick_same_list)
            '''=========================== Plotting the conversation graph =========================== '''

            graph_conversation = nx.MultiDiGraph(
            )  #graph with multiple directed edges between clients used
            for line in content:
                flag_comma = 0
                if (line[0] != '=' and "] <" in line and "> " in line):
                    m = re.search(r"\<(.*?)\>", line)
                    var = m.group(0)[1:-1]
                    var = ext.util.correctLastCharCR(var)
                    for d in range(len(nicks)):
                        if var in nick_same_list[d]:
                            nick_sender = nick_same_list[d][0]
                            break
                        else:
                            nick_sender = var

                    for i in nicks:
                        rec_list = [e.strip() for e in line.split(':')
                                    ]  #receiver list splited about :
                        rec_list[1] = rec_list[1][rec_list[1].find(">") +
                                                  1:len(rec_list[1])]
                        rec_list[1] = rec_list[1][1:]
                        if not rec_list[1]:  #index 0 will contain time 14:02
                            break
                        for k in xrange(0, len(rec_list)):
                            if (rec_list[k]):  #checking for \
                                rec_list[k] = ext.util.correctLastCharCR(
                                    rec_list[k])
                        for z in rec_list:
                            if (z == i):
                                if (var != i):
                                    for d in range(len(nicks)):
                                        if i in nick_same_list[d]:
                                            nick_receiver = nick_same_list[d][
                                                0]
                                            break
                                        else:
                                            nick_receiver = i

                                    graph_conversation.add_edge(
                                        nick_sender,
                                        nick_receiver,
                                        weight=line[1:6])

                        if "," in rec_list[
                                1]:  #receiver list may of the form <Dhruv> Rohan, Ram :
                            flag_comma = 1
                            rec_list_2 = [
                                e.strip() for e in rec_list[1].split(',')
                            ]
                            for y in xrange(0, len(rec_list_2)):
                                if (rec_list_2[y]):  #checking for \
                                    rec_list_2[y] = ext.util.correctLastCharCR(
                                        rec_list_2[y])
                            for j in rec_list_2:
                                if (j == i):
                                    if (var != i):
                                        for d in range(len(nicks)):
                                            if i in nick_same_list[d]:
                                                nick_receiver = nick_same_list[
                                                    d][0]
                                                break
                                            else:
                                                nick_receiver = i

                                        graph_conversation.add_edge(
                                            nick_sender,
                                            nick_receiver,
                                            weight=line[1:6])

                        if (flag_comma == 0
                            ):  #receiver list can be <Dhruv> Rohan, Hi!
                            rec = line[line.find(">") + 1:line.find(", ")]
                            rec = rec[1:]
                            rec = ext.util.correctLastCharCR(rec)
                            if (rec == i):
                                if (var != i):
                                    for d in range(len(nicks)):
                                        if i in nick_same_list[d]:
                                            nick_receiver = nick_same_list[d][
                                                0]
                                            break
                                        else:
                                            nick_receiver = i

                                    graph_conversation.add_edge(
                                        nick_sender,
                                        nick_receiver,
                                        weight=line[1:6])

            for u, v, d in graph_conversation.edges(data=True):
                d['label'] = d.get('weight', '')
            # output_file_out_degree=out_dir_msg_time+channel_name+"_2013_"+str(folderiterator)+"_"+str(fileiterator)+"_msg_time.png"
            # print "Generated " + output_file_out_degree
            # A = nx.drawing.nx_agraph.to_agraph(graph_conversation)
            # A.layout(prog='dot')
            # A.draw(output_file_out_degree)
            nodes_with_OUT_degree = [0] * max_degree_possible
            nodes_with_IN_degree = [0] * max_degree_possible
            nodes_with_TOTAL_degree = [0] * max_degree_possible

            print graph_conversation.out_degree(
            ), graph_conversation.in_degree(), graph_conversation.degree()
            print graph_conversation.out_degree().values()
            print graph_conversation.in_degree().values()
            print graph_conversation.degree().values()

            for degree in graph_conversation.out_degree().values():
                nodes_with_OUT_degree[degree] += 1

            for degree in graph_conversation.in_degree().values():
                nodes_with_IN_degree[degree] += 1

            for degree in graph_conversation.degree().values():
                nodes_with_TOTAL_degree[degree] += 1

            print "\n"
            nodes_with_OUT_degree.insert(0, sum(nodes_with_OUT_degree))
            nodes_with_OUT_degree.insert(
                0,
                str(folderiterator) + "-" + str(fileiterator))
            nodes_with_OUT_degree_per_day.append(nodes_with_OUT_degree)

            nodes_with_IN_degree.insert(0, sum(nodes_with_IN_degree))
            nodes_with_IN_degree.insert(
                0,
                str(folderiterator) + "-" + str(fileiterator))
            nodes_with_IN_degree_per_day.append(nodes_with_IN_degree)

            nodes_with_TOTAL_degree.insert(0, sum(nodes_with_TOTAL_degree))
            nodes_with_TOTAL_degree.insert(
                0,
                str(folderiterator) + "-" + str(fileiterator))
            nodes_with_TOTAL_degree_per_day.append(nodes_with_TOTAL_degree)

    # print nodes_with_OUT_degree_per_day
    # print nodes_with_IN_degree_per_day
    # print nodes_with_TOTAL_degree_per_day

    temp = ['deg' + str(i) for i in xrange(max_degree_possible)]
    temp.insert(0, 'total')
    temp.insert(0, 'out-degree/day>')

    nodes_with_OUT_degree_per_day.insert(0, temp)
    column_wise = zip(*nodes_with_OUT_degree_per_day)
    with open(output_file_out_degree, 'wb') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        for col in column_wise:
            wr.writerow(col)

    temp = ['deg' + str(i) for i in xrange(max_degree_possible)]
    temp.insert(0, 'total')
    temp.insert(0, 'in-degree/day>')

    nodes_with_IN_degree_per_day.insert(0, temp)
    column_wise = zip(*nodes_with_IN_degree_per_day)
    with open(output_file_in_degree, 'wb') as myfile2:
        wr = csv.writer(myfile2, quoting=csv.QUOTE_ALL)
        for col in column_wise:
            wr.writerow(col)

    temp = ['deg' + str(i) for i in xrange(max_degree_possible)]
    temp.insert(0, 'total')
    temp.insert(0, 'degree/day>')

    nodes_with_TOTAL_degree_per_day.insert(0, temp)
    column_wise = zip(*nodes_with_TOTAL_degree_per_day)
    with open(output_file_total_degree, 'wb') as myfile3:
        wr = csv.writer(myfile3, quoting=csv.QUOTE_ALL)
        for col in column_wise:
            wr.writerow(col)
Пример #19
0
    def query(self,
              topic,
              max_depth=4,
              config=None,
              pivot_on=list(),
              dont_pivot_on=list(['enrichment', 'classification']),
              direction='successors'):
        """

            :param topic: a  graph to return the context of.  At least one node ID in topic \
             must be in full graph g to return any context.
            :param max_depth: The maximum distance from the topic to search
            :param config: The titanDB configuration to use if not using the one configured with the plugin
            :param pivot_on: A list of attribute types to pivot on.
            :param dont_pivot_on: A list of attribute types to not pivot on.
            :param direction: The direction to transverse the graph
            :return: subgraph in networkx format

            NOTE: If an attribute is in both pivot_on and dont_pivot_on it will not be pivoted on
        """
        if config is None:
            config = self.titandb_config

        # Connect to TitanDB Database
        titan_graph = TITAN_Graph(config)

        # Convert the topic nodes into titanDB eids
        current_nodes = set()
        eid_uri_map = {}
        # Validate the node URI
        for node in topic.nodes():
            titan_node = titan_graph.vertices.index.get_unique(
                "uri", topic.node[node]["uri"])
            if titan_node:
                current_nodes.add(titan_node.eid)
                eid_uri_map[titan_node.eid] = node
        topic_nodes = frozenset(current_nodes)
        subgraph_nodes = current_nodes
        #sg = copy.deepcopy(topic)
        sg = nx.MultiDiGraph()
        sg.add_nodes_from(topic.nodes(data=True))
        sg.add_edges_from(topic.edges(data=True))
        distances = {node: 0 for node in topic.nodes()}
        #    Below 1 line is probably not necessary
        #    pivot_edges = list()
        #    print "Initial current Nodes: {0}".format(current_nodes)  # DEBUG
        for i in range(1, max_depth + 1):
            new_nodes = set()
            new_out_edges = set()
            new_in_edges = set()
            for eid in current_nodes:
                #            properties = og.node[node]
                node = titan_graph.vertices.get(eid)
                # If all directions, get all neighbors
                if direction == 'all' or eid in topic_nodes:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # If there is a list of things to NOT pivot on, pivot on everything else
                elif dont_pivot_on and 'attribute' in node and node.map(
                )['attribute'] not in dont_pivot_on:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # Otherwise, only get all neighbors if the node is to be pivoted on.
                elif 'attribute' in node and \
                      node['attribute'] in pivot_on and \
                      node['attribute'] not in dont_pivot_on:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # If not all neighbors and not in pivot, if we are transversing up, get predecessors
                elif direction == 'predecessors':
                    # add edges to make predecessors successors for later probability calculation
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).out".format(eid))
                        })
                    except:
                        pass
                    # add the reverse edges. These opposite of these edges will get placed in the subgraph
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # Otherwise assume we are transversing down and get all successors
                else:  # default to successors
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass

            # Remove nodes from new_nodes that are already in the subgraph so we don't overwrite their topic distance
            current_nodes = new_nodes - subgraph_nodes
            # combine the new nodes into the subgraph nodes set
            subgraph_nodes = subgraph_nodes.union(current_nodes)

            # Copy nodes, out-edges, in-edges, and reverse in-edges into subgraph
            # Add nodes
            for neighbor_eid in new_nodes:
                attr = titan_graph.vertices.get(neighbor_eid).map()
                sg.add_node(attr['uri'], attr)
                eid_uri_map[neighbor_eid] = attr['uri']
            # Add predecessor edges
            for out_eid in new_out_edges:
                out_edge = titan_graph.edges.get(out_eid)
                attr = out_edge.map()
                sg.add_edge(eid_uri_map[out_edge._outV],
                            eid_uri_map[out_edge._inV], out_eid, attr)
            # Add successor edges & reverse pivot edges
            for in_eid in new_in_edges:
                in_edge = titan_graph.edges.get(in_eid)
                attr = in_edge.map()
                attr['origin'] = "subgraph_creation_pivot"
                sg.add_edge(eid_uri_map[in_edge._inV],
                            eid_uri_map[in_edge._outV], in_eid, attr)

            # Set the distance from the topic on the nodes in the graph
            for eid in current_nodes:
                if eid_uri_map[eid] not in distances:
                    distances[eid_uri_map[eid]] = i

    #        logging.debug("Current nodes: {0}".format(current_nodes))  # DEBUG

    # add the distances to the subgraph
        nx.set_node_attributes(sg, "topic_distance", distances)

        logging.debug(nx.info(sg))  # DEBUG
        # Return the subgraph
        return sg
Пример #20
0
 def __init__(self):
     Visitor.__init__(self)
     self.edge_types = ["ast"]
     self.G = nx.MultiDiGraph()
Пример #21
0
def strongly_connected_comp_splitter(graph, edge_weight):
    """
    Written by JC
    Breaks graph into strongly connected components. Removes components with no edges.
    Adds all edges and nodes contained by strongly connected components to a new graph. Labels nodes and edges by a component ID.
    Since this function substitutes for a function that contracted edges in addition to splitting components,
    this version continues to add a 'path' attribute that is used in subsequent functions.
    Args:
        graph (Networkx DiGraph): graph of required edges
        edge_weight (str): edge attribute to designate as a weight
    Returns:
        MultiDiGraph of strongly connected components with edge attributes catered for subsequent functions
    """
    graph_split = nx.MultiDiGraph()
    removed_comps = []
    comp_list = []
    print('\nInitial Strongly Connected Component Breakdown')
    for i, comp in enumerate([
            graph.subgraph(c).copy()
            for c in nx.strongly_connected_components(graph)
    ]):
        print('Comp: {}'.format(i))
        print('\tEdges: {}'.format(len(comp.edges())))
        print('\tNodes: {}'.format(len(comp.nodes())))
        #for n in comp.nodes():
        #print(comp.degree(n))
        #exclude components with only 1 node
        if len(comp.nodes()) > 1:
            comp_list += [i]
            for cc in connected_comp_edge_handler(comp, edge_weight):
                start_node, end_node, attr = cc

                attr['comp'] = i
                graph_split.add_edge(start_node, end_node, **attr)

                graph_split.nodes[start_node]['comp'] = i
                graph_split.nodes[end_node]['comp'] = i
                graph_split.nodes[start_node]['y'] = graph.nodes[start_node][
                    'y']
                graph_split.nodes[start_node]['x'] = graph.nodes[start_node][
                    'x']
                graph_split.nodes[end_node]['y'] = graph.nodes[end_node]['y']
                graph_split.nodes[end_node]['x'] = graph.nodes[end_node]['x']
        else:
            removed_comps += [i]
    for comp in removed_comps:
        print('Comp {} removed'.format(comp))
    comp_corrector_dic = {}
    for i in range(0, len(comp_list)):
        comp_corrector_dic[comp_list[i]] = i
    graph_split_copy = graph_split.copy()
    for edge in graph_split.edges(data=True):
        #rename comp
        graph_split_copy[edge[0]][edge[1]][0]['comp'] = comp_corrector_dic[
            edge[2]['comp']]
        #remove parallel edges
        if 1 in graph_split_copy[edge[0]][edge[1]]:
            graph_split_copy.remove_edge(edge[0], edge[1], key=1)
    print('Comp numbers reordered')
    print('Parallel edges removed')
    graph_split = graph_split_copy
    return graph_split
Пример #22
0
 def __init__(self):
     Visitor.__init__(self)
     self.edge_types = ["ast", "cfg", "in", "data"]
     self.G = nx.MultiDiGraph()
Пример #23
0
 def clear(self):
     self._function_map.clear()
     self.callgraph = networkx.MultiDiGraph()
     self.block_map.clear()
Пример #24
0
 def test_multidigraph(self):
     G = nx.MultiDiGraph(self.edges)
     x = list(find_cycle(G, self.nodes))
     x_ = [(0, 1, 0), (1, 0, 0)]  # (1, 0, 1)
     assert_equal(x[0], x_[0])
     assert_equal(x[1][:2], x_[1][:2])
Пример #25
0
	def parseJsonToNx(self):
			command = "curl --max-time 30 -s http://"+self.IpPort+"/stats/switches"
			result = os.popen(command).read()
			if result != "":
				try:
					switches = json.loads(result)
				except ValueError: 
					print 'Decoding JSON has failed'
					print "Error: something does not work in getting info from ryu controller"
					sys.exit(-2)

				for switch in switches:
					command = "curl --max-time 30 -s http://"+self.IpPort+"/stats/portdesc/"+str(switch)
					result = os.popen(command).read()
					if result != "":
						try:
							switch_ports = json.loads(result)
						except ValueError: 
							print 'Decoding JSON has failed'
							print "Error: something does not work in getting info from ryu controller"
							sys.exit(-2)
						self.ports[str(switch)] = switch_ports[str(switch)]

				command = "curl --max-time 30 -s http://"+self.IpPort+"/v1.0/topology/links" 
				result = os.popen(command).read()

				if result != "":
					try:
						self.topology = json.loads(result)
					except ValueError: 
						print 'Decoding JSON has failed'
						print "Error: something does not work in getting info from ryu controller"
						sys.exit(-2)

					self.nx_topology = nx.MultiDiGraph()
					self.nx_topology.clear()

					index = 0

					for link in self.topology:
						src = link['src']['dpid']
						dst = link['dst']['dpid']
						src_port = link['src']['name']
						dst_port = link['dst']['name']
						src_port_no = link['src']['port_no']
						dst_port_no = link['dst']['port_no']
						src_mac = link['src']['hw_addr'].replace(":","")
						dst_mac = link['dst']['hw_addr'].replace(":","")				

						src_capacity = 0.0
						src_ports = self.ports[str(int(src,16))]
						for port in src_ports:
							if port['name'] == src_port:
								src_capacity = int(port['curr_speed'])/1000						
								break
						if src_capacity == 0.0:
							print "Error - SRC Capacity cannot be 0.0"
							sys.exit(-1)
					
						dst_capacity = 0.0
						dst_ports = self.ports[str(int(dst,16))]
						for port in dst_ports:
							if port['name'] == dst_port:
								dst_capacity = int(port['curr_speed'])/1000						
								break
						if dst_capacity == 0.0:
							print "Error - DST Capacity cannot be 0.0"
							sys.exit(-1)

						if src_capacity <= dst_capacity:
							capacity = src_capacity
						else:
							capacity = dst_capacity	

						if capacity >= self.max_capacity:
							self.max_capacity = capacity		

						self.sip.update(str(index))
						id_ = str(self.sip.hash())

						self.nx_topology.add_edge(src, dst, capacity=capacity, allocated=0.0, src_port=src_port, dst_port=dst_port, src_port_no=src_port_no, dst_port_no=dst_port_no, src_mac=src_mac, dst_mac=dst_mac, flows=[], id=id_) 

						index = index + 1

				else:
					print "Error: something does not work in getting info from ryu controller"
					sys.exit(-2)
Пример #26
0
 def test_multidigraph_ignore2(self):
     # Loop traversed an edge while ignoring its orientation.
     G = nx.MultiDiGraph([(0, 1), (1, 2), (1, 2)])
     x = list(find_cycle(G, [0, 1, 2], orientation='ignore'))
     x_ = [(1, 2, 0, FORWARD), (1, 2, 1, REVERSE)]
     assert_equal(x, x_)
# coding=UTF-8
import networkx as nx
import matplotlib.pyplot as plt
import math
import re as re
from matplotlib.patches import FancyArrowPatch, Circle
import numpy as np
import matplotlib.patches as mpatches
import Analyser
from netwulf import visualize

G = nx.Graph()  #总图
H = nx.Graph()  #连通子图
J = nx.Graph()  #无向图
L = nx.MultiDiGraph()  #重图
weight = []
abnormal_IP_list = []


#为总图填充点和边
def create_wholeGraph():
    global G, sql
    sql = "select * from Link"
    results = Analyser.get_data(sql)
    i = 0
    for r in results:
        G.add_edge(r[1], r[2], weight=r[3])
        i = i + 1


def list_subGraph():
Пример #28
0
###########################################################################
# vladShortestPath by Teague Forren
# takes in proprietary train graph *_in.txt file
# prints out to system how many bags of blood vlad will need for each graph
# example: python vladShortestPath.py examples/sample_in.txt
###########################################################################
import networkx as nx
import sys

#globals
trainGraph = nx.MultiDiGraph()  #create networkx graph
hourRange = range(18, 25) + range(1, 7)
f = open(sys.argv[1])
routesFile = f.readlines()
f.close()


#validateTime
#takes in start and duration hours and returns boolean if vlad can travel during it
def validateTime(start, duration):
    result = False

    end = start + duration
    if end > 24:
        end -= 24
    if start >= 18 and end >= 18:  #starts in pm and ends in pm
        result = True
    if start <= 6 and end <= 6:  #starts and ends in pm
        result = True
    if start >= 18 and end <= 6:  #starts in pm and ends in am
        result = True
Пример #29
0
def directed_configuration_model(in_degree_sequence,
                                 out_degree_sequence,
                                 create_using=None,
                                 seed=None):
    """Return a directed_random graph with the given degree sequences.

    The configuration model generates a random directed pseudograph
    (graph with parallel edges and self loops) by randomly assigning
    edges to match the given degree sequences.

    Parameters
    ----------
    in_degree_sequence :  list of integers 
       Each list entry corresponds to the in-degree of a node.
    out_degree_sequence :  list of integers 
       Each list entry corresponds to the out-degree of a node.
    create_using : graph, optional (default MultiDiGraph)
       Return graph of this type. The instance will be cleared.
    seed : hashable object, optional
        Seed for random number generator.   

    Returns
    -------
    G : MultiDiGraph
        A graph with the specified degree sequences.
        Nodes are labeled starting at 0 with an index
        corresponding to the position in deg_sequence.

    Raises
    ------
    NetworkXError
        If the degree sequences do not have the same sum.

    See Also
    --------
    configuration_model
    
    Notes
    -----
    Algorithm as described by Newman [1]_.

    A non-graphical degree sequence (not realizable by some simple
    graph) is allowed since this function returns graphs with self
    loops and parallel edges.  An exception is raised if the degree
    sequences does not have the same sum.

    This configuration model construction process can lead to
    duplicate edges and loops.  You can remove the self-loops and
    parallel edges (see below) which will likely result in a graph
    that doesn't have the exact degree sequence specified.  This
    "finite-size effect" decreases as the size of the graph increases.

    References
    ----------
    .. [1] Newman, M. E. J. and Strogatz, S. H. and Watts, D. J.
       Random graphs with arbitrary degree distributions and their applications
       Phys. Rev. E, 64, 026118 (2001)
        
    Examples
    --------
    >>> D=nx.DiGraph([(0,1),(1,2),(2,3)]) # directed path graph
    >>> din=list(D.in_degree().values())
    >>> dout=list(D.out_degree().values())
    >>> din.append(1) 
    >>> dout[0]=2
    >>> D=nx.directed_configuration_model(din,dout)

    To remove parallel edges:

    >>> D=nx.DiGraph(D)

    To remove self loops:
    
    >>> D.remove_edges_from(D.selfloop_edges())
    """
    if not sum(in_degree_sequence) == sum(in_degree_sequence):
        raise nx.NetworkXError(
            'Invalid degree sequences. Sequences must have equal sums.')

    if create_using is None:
        create_using = nx.MultiDiGraph()

    if not seed is None:
        random.seed(seed)

    nin = len(in_degree_sequence)
    nout = len(out_degree_sequence)

    # pad in- or out-degree sequence with zeros to match lengths
    if nin > nout:
        out_degree_sequence.extend((nin - nout) * [0])
    else:
        in_degree_sequence.extend((nout - nin) * [0])

    # start with empty N-node graph
    N = len(in_degree_sequence)

    # allow multiedges and selfloops
    G = nx.empty_graph(N, create_using)

    if N == 0 or max(in_degree_sequence) == 0:  # done if no edges
        return G

    # build stublists of available degree-repeated stubs
    # e.g. for degree_sequence=[3,2,1,1,1]
    # initially, stublist=[1,1,1,2,2,3,4,5]
    # i.e., node 1 has degree=3 and is repeated 3 times, etc.
    in_stublist = []
    for n in G:
        for i in range(in_degree_sequence[n]):
            in_stublist.append(n)

    out_stublist = []
    for n in G:
        for i in range(out_degree_sequence[n]):
            out_stublist.append(n)

    # shuffle stublists and assign pairs by removing 2 elements at a time
    random.shuffle(in_stublist)
    random.shuffle(out_stublist)
    while in_stublist and out_stublist:
        source = out_stublist.pop()
        target = in_stublist.pop()
        G.add_edge(source, target)

    G.name = "directed configuration_model %d nodes %d edges" % (G.order(),
                                                                 G.size())
    return G
    def ActiveUsersNotes(self, list1):
        edgesInNotes = []
        nodesInNotes = []
        temp = []
        temp2 = []
        Nusers = numpy.zeros(shape=(len(list1), 4), dtype=numpy.int)

        # fill in matrix for JOURNAL USERS
        for row in dataTables.TblN:  # userID; #notes; friends; commenters
            poster = row[0]  # poster handle
            if poster == "creator":
                pass
            else:
                #UserID and times written a note
                u = list1.index(poster)  # user's ID
                Nusers[u][0] = poster
                Nusers[u][2] = row[1]  # the times poster received notes
                Nusers[u][3] = row[2]  # calculate # of people receive from

                temp = row[3].split(';')
                set1 = []
                for p in temp:
                    if (p != ''):
                        set1.append(Prepare().slice(p, "id"))

                        # sender's position in array
                        mX = list1.index(Prepare().slice(p, "id"))

                        # times a user wrote notes
                        Nusers[mX][0] = Prepare().slice(p, "id")
                        Nusers[mX][1] = Nusers[mX][1] + 1

                        #calculate number of ppl written to
                        #[u][4]

                # build pairs of communication
                set2 = set1
                set2.insert(0, poster)

                ## algorithm for creating pairs
                partialPairs = [(x, y) for y in set1 for x in set2
                                if set2.index(x) > set1.index(y)]
                if len(partialPairs) > 0:
                    for x in partialPairs:
                        edgesInNotes.append(x)
                        print(x)
        ## save edges in text file
        numpy.savetxt(foLN + ".pairs", edgesInNotes, fmt='%s')

        #completed row =====> # times WRITE a note; # times receive a note;  # OF people receive from, # of ppl write to
        #filter: remove blank rows
        for row in Nusers:
            if ((row[1] >= 5)):  # limit to active users, 5+ notes written
                #            if (((row[0] >= 5) ) | (row[1]>=5) | (row[2]>=5)):
                temp2.append(row)
                nodesInNotes.append(row[0])

        numpy.savetxt(foLN, temp2, fmt='%s')
        print(str(len(nodesInNotes)) + " active notes users")

        ## make matrix with data
        matrixN = numpy.zeros(shape=(len(nodesInNotes), len(nodesInNotes)),
                              dtype=numpy.int)
        nG = networkx.MultiDiGraph()
        for sender, target in edgesInNotes:
            try:
                x = nodesInNotes.index(int(target))
                y = nodesInNotes.index(int(sender))
                weight = matrixN[x][y]
                matrixN[x][y] = weight + 1
                nG.add_edge(sender, target)
            except:
                pass
        numpy.savetxt(foMN, matrixN, fmt='%s')
        print("saved matrix for active notes users of this community.")

        print("\tdegree centrality")
        print(networkx.degree_centrality(nG))
        print("\tin degree centrality")
        print(networkx.in_degree_centrality(nG))
        print("\tout degree centrality")
        print(networkx.out_degree_centrality(nG))
        return edgesInNotes