def build_reeb_graph(adj_gr: nx.Graph, adjs: Sequence[AdjacencyList]): """ Construct the Reeb graph from the adjacency lists. """ # Newly-created object()s are inserted into the graph as dummy nodes. Those # dummy objects are then inserted into the frontier. That way, we don't # have to make a decision on which critical point index to assign them, or # even what kind of critical point should be assigned. # (A newly-created object() will only ever compare equal to itself.) # Why MultiDiGraph? In the adjacency graph, there can't be more than one # edge between nodes. Equivalently, there can't be more than one way to # reach a neighbour of any cell. (The boustrophedon decomposition # guarantees that, since cells don't overlap.) However, there isn't such a # guarantee for the Reeb graph, and indeed there can be more than one path # to reach two critical points next to each other. reeb_gr = nx.MultiDiGraph() frontier = [] # next available critical point index node_i = 0 # next available cell index edge_i = 0 # The order of assigning free indices to edges is exactly the same as the # order of assigning to nodes in the adjacency graph. So the edges in the # Reeb graph have correspondence with the cells. for adj in adjs: events = find_events_from_adjlist(adj) for event_name, event_adj in events.items(): # assert len(event_adj) == 1, "too many conn. changes" if event_name == "split": [[left, right]] = event_adj.items() pred = frontier.pop(left) # assert type(pred) == object # Replace the dummy object with the next available node number nx.relabel_nodes(reeb_gr, {pred: node_i}, copy=False) for succ in right: obj = object() reeb_gr.add_edge(node_i, obj, cell=edge_i) frontier.insert(succ, obj) edge_i += 1 node_i += 1 elif event_name == "merge": # The order is inverted for the adjacency relation # for easier representation [[right, left]] = event_adj.items() # need to delete from the frontier in reverse order, # so that we don't disturb the other elements as we delete # the ones in front preds = [ frontier.pop(pred) for pred in sorted(left, reverse=True) ] # order is backward now... preds.reverse() relabel_inplace_fixed(reeb_gr, {pred: node_i for pred in preds}) # Only one new node is created, and only one descendant cell # is inserted into the frontier obj = object() reeb_gr.add_edge(node_i, obj, cell=edge_i) frontier.insert(right, obj) node_i += 1 edge_i += 1 elif event_name == "gain": [[_, right]] = event_adj.items() reeb_gr.add_node(node_i) for succ in sorted(right): obj = object() reeb_gr.add_edge(node_i, obj, cell=edge_i) frontier.insert(succ, obj) edge_i += 1 node_i += 1 elif event_name == "loss": [[left, _]] = event_adj.items() obj = frontier.pop(left) nx.relabel_nodes(reeb_gr, {obj: node_i}, copy=False) node_i += 1 else: raise NotImplementedError() reeb_edge_weight(reeb_gr, adj_gr) # Postcondition for cell in adj_gr.nodes(): assert any(cell == attrs["cell"] for _, _, attrs in reeb_gr.edges(data=True)), \ f"Cell {cell} not in Reeb graph!" return reeb_gr
def __mul__(self, scaling_matrix): """ Replicates the graph, creating a supercell, intelligently joining together edges that lie on periodic boundaries. In principle, any operations on the expanded graph could also be done on the original graph, but a larger graph can be easier to visualize and reason about. :param scaling_matrix: same as Structure.__mul__ :return: """ # Developer note: a different approach was also trialed, using # a simple Graph (instead of MultiDiGraph), with node indices # representing both site index and periodic image. Here, the # number of nodes != number of sites in the Structure. This # approach has many benefits, but made it more difficult to # keep the graph in sync with its corresponding Structure. # Broadly, it would be easier to multiply the Structure # *before* generating the StructureGraph, but this isn't # possible when generating the graph using critic2 from # charge density. # Multiplication works by looking for the expected position # of an image node, and seeing if that node exists in the # supercell. If it does, the edge is updated. This is more # computationally expensive than just keeping track of the # which new lattice images present, but should hopefully be # easier to extend to a general 3x3 scaling matrix. # code adapted from Structure.__mul__ scale_matrix = np.array(scaling_matrix, np.int16) if scale_matrix.shape != (3, 3): scale_matrix = np.array(scale_matrix * np.eye(3), np.int16) else: # TODO: test __mul__ with full 3x3 scaling matrices raise NotImplementedError( 'Not tested with 3x3 scaling matrices yet.') new_lattice = Lattice( np.dot(scale_matrix, self.structure.lattice.matrix)) f_lat = lattice_points_in_supercell(scale_matrix) c_lat = new_lattice.get_cartesian_coords(f_lat) new_sites = [] new_graphs = [] for v in c_lat: # create a map of nodes from original graph to its image mapping = { n: n + len(new_sites) for n in range(len(self.structure)) } for idx, site in enumerate(self.structure): s = PeriodicSite(site.species_and_occu, site.coords + v, new_lattice, properties=site.properties, coords_are_cartesian=True, to_unit_cell=False) new_sites.append(s) new_graphs.append(nx.relabel_nodes(self.graph, mapping, copy=True)) new_structure = Structure.from_sites(new_sites) # merge all graphs into one big graph new_g = nx.MultiDiGraph() for new_graph in new_graphs: new_g = nx.union(new_g, new_graph) edges_to_remove = [] # tuple of (u, v, k) edges_to_add = [] # tuple of (u, v, attr_dict) # list of new edges inside supercell # for duplicate checking edges_inside_supercell = [{u, v} for u, v, d in new_g.edges(data=True) if d['to_jimage'] == (0, 0, 0)] new_periodic_images = [] orig_lattice = self.structure.lattice # use k-d tree to match given position to an # existing Site in Structure kd_tree = KDTree(new_structure.cart_coords) # tolerance in Å for sites to be considered equal # this could probably be a lot smaller tol = 0.05 for u, v, k, d in new_g.edges(keys=True, data=True): to_jimage = d['to_jimage'] # for node v # reduce unnecessary checking if to_jimage != (0, 0, 0): # get index in original site n_u = u % len(self.structure) n_v = v % len(self.structure) # get fractional co-ordinates of where atoms defined # by edge are expected to be, relative to original # lattice (keeping original lattice has # significant benefits) v_image_frac = np.add(self.structure[n_v].frac_coords, to_jimage) u_frac = self.structure[n_u].frac_coords # using the position of node u as a reference, # get relative Cartesian co-ordinates of where # atoms defined by edge are expected to be v_image_cart = orig_lattice.get_cartesian_coords(v_image_frac) u_cart = orig_lattice.get_cartesian_coords(u_frac) v_rel = np.subtract(v_image_cart, u_cart) # now retrieve position of node v in # new supercell, and get absolute Cartesian # co-ordinates of where atoms defined by edge # are expected to be v_expec = new_structure[u].coords + v_rel # now search in new structure for these atoms # query returns (distance, index) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None # check if image sites now present in supercell # and if so, delete old edge that went through # periodic boundary if v_present is not None: new_u = u new_v = v_present new_d = d.copy() # node now inside supercell new_d['to_jimage'] = (0, 0, 0) edges_to_remove.append((u, v, k)) # make sure we don't try to add duplicate edges # will remove two edges for everyone one we add if {new_u, new_v} not in edges_inside_supercell: # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u edges_inside_supercell.append({new_u, new_v}) edges_to_add.append((new_u, new_v, new_d)) else: # want to find new_v such that we have # full periodic boundary conditions # so that nodes on one side of supercell # are connected to nodes on opposite side v_expec_frac = new_structure.lattice.get_fractional_coords( v_expec) # find new to_jimage # use np.around to fix issues with finite precision leading to incorrect image v_expec_image = np.around(v_expec_frac, decimals=3) v_expec_image = v_expec_image - v_expec_image % 1 v_expec_frac = np.subtract(v_expec_frac, v_expec_image) v_expec = new_structure.lattice.get_cartesian_coords( v_expec_frac) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None if v_present is not None: new_u = u new_v = v_present new_d = d.copy() new_to_jimage = tuple(map(int, v_expec_image)) # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u new_to_jimage = tuple( np.multiply(-1, d['to_jimage']).astype(int)) new_d['to_jimage'] = new_to_jimage edges_to_remove.append((u, v, k)) if (new_u, new_v, new_to_jimage) not in new_periodic_images: edges_to_add.append((new_u, new_v, new_d)) new_periodic_images.append( (new_u, new_v, new_to_jimage)) logger.debug("Removing {} edges, adding {} new edges.".format( len(edges_to_remove), len(edges_to_add))) # add/delete marked edges for edges_to_remove in edges_to_remove: new_g.remove_edge(*edges_to_remove) for (u, v, d) in edges_to_add: new_g.add_edge(u, v, **d) # return new instance of StructureGraph with supercell d = { "@module": self.__class__.__module__, "@class": self.__class__.__name__, "structure": new_structure.as_dict(), "graphs": json_graph.adjacency_data(new_g) } sg = StructureGraph.from_dict(d) return sg
def directed_configuration_model(in_degree_sequence, out_degree_sequence, create_using=None, seed=None): """Return a directed_random graph with the given degree sequences. The configuration model generates a random directed pseudograph (graph with parallel edges and self loops) by randomly assigning edges to match the given degree sequences. Parameters ---------- in_degree_sequence : list of nonnegative integers Each list entry corresponds to the in-degree of a node. out_degree_sequence : list of nonnegative integers Each list entry corresponds to the out-degree of a node. create_using : graph, optional (default MultiDiGraph) Return graph of this type. The instance will be cleared. seed : hashable object, optional Seed for random number generator. Returns ------- G : MultiDiGraph A graph with the specified degree sequences. Nodes are labeled starting at 0 with an index corresponding to the position in deg_sequence. Raises ------ NetworkXError If the degree sequences do not have the same sum. See Also -------- configuration_model Notes ----- Algorithm as described by Newman [1]_. A non-graphical degree sequence (not realizable by some simple graph) is allowed since this function returns graphs with self loops and parallel edges. An exception is raised if the degree sequences does not have the same sum. This configuration model construction process can lead to duplicate edges and loops. You can remove the self-loops and parallel edges (see below) which will likely result in a graph that doesn't have the exact degree sequence specified. This "finite-size effect" decreases as the size of the graph increases. References ---------- .. [1] Newman, M. E. J. and Strogatz, S. H. and Watts, D. J. Random graphs with arbitrary degree distributions and their applications Phys. Rev. E, 64, 026118 (2001) Examples -------- One can modify the in- and out-degree sequences from an existing directed graph in order to create a new directed graph. For example, here we modify the directed path graph: >>> D = nx.DiGraph([(0, 1), (1, 2), (2, 3)]) >>> din = list(d for n, d in D.in_degree()) >>> dout = list(d for n, d in D.out_degree()) >>> din.append(1) >>> dout[0] = 2 >>> # We now expect an edge from node 0 to a new node, node 3. ... D = nx.directed_configuration_model(din, dout) The returned graph is a directed multigraph, which may have parallel edges. To remove any parallel edges from the returned graph: >>> D = nx.DiGraph(D) Similarly, to remove self-loops: >>> D.remove_edges_from(nx.selfloop_edges(D)) """ if sum(in_degree_sequence) != sum(out_degree_sequence): msg = 'Invalid degree sequences: sequences must have equal sums' raise nx.NetworkXError(msg) if create_using is None: create_using = nx.MultiDiGraph() G = _configuration_model(out_degree_sequence, create_using, directed=True, in_deg_sequence=in_degree_sequence, seed=seed) name = "directed configuration_model {} nodes {} edges" return G
def main(argv): GList = [] n = int(argv[0]) outdir_pickle = 'DATA_motif_finding_template_list' if not os.path.exists(outdir_pickle): os.makedirs(outdir_pickle) if os.path.exists(outdir_pickle + "/{}nodes_template.pickle".format(n)): sys.exit(1) numPermu = len(list(itertools.permutations(range(n), 2))) numCombi = len(list(itertools.combinations(range(n), 2))) numLoops = n # numCasesTotal = numPermu + numCombi + n numCasesTotal = numPermu + numCombi # it should be different based on 'n' if n == 2: edgeList = [(0, 1), (1, 0), (0, 1)] elif n == 3: edgeList = [(0, 1), (1, 0), (1, 2), (2, 1), (0, 2), (2, 0), (0, 1), (1, 2), (0, 2)] elif n == 4: edgeList = [(0, 1), (1, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 2), (2, 1), (1, 3), (3, 1), (2, 3), (3, 2), (0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)] else: sys.exit(1) for numSeledge in range(1, numCasesTotal + 1): totalCombiSeledge = itertools.combinations(range(numCasesTotal), numSeledge) for eachCase in totalCombiSeledge: hereG = nx.MultiDiGraph() hereOnlyRTG = nx.Graph() for smallerEachCase in eachCase: if smallerEachCase < numPermu: # default color is red hereG.add_edge(*edgeList[smallerEachCase]) elif numPermu <= smallerEachCase < numPermu + numCombi: nodeIndices = edgeList[smallerEachCase] swappedNodeIndices = (nodeIndices[1], nodeIndices[0]) hereG.add_edge(*nodeIndices, color='black') hereG.add_edge(*swappedNodeIndices, color='black') hereOnlyRTG.add_edge(*nodeIndices) else: # hereG.add_edge(*edgeList[smallerEachCase]) pass if len(hereG.nodes()) != n: totalNodes = range(n) for nowNode in hereG.nodes(): totalNodes.remove(nowNode) hereG.add_nodes_from(totalNodes) if len(hereOnlyRTG.nodes()) != n: totalNodes = range(n) for nowNode in hereOnlyRTG.nodes(): totalNodes.remove(nowNode) hereOnlyRTG.add_nodes_from(totalNodes) if nx.is_connected(hereOnlyRTG): GList.append(hereG) duplicatedCase = set() em = iso.categorical_multiedge_match('color', 'red') for rawI in range(len(GList)): if rawI not in duplicatedCase: for rawJ in range(rawI + 1, len(GList)): if rawJ not in duplicatedCase: if nx.is_isomorphic(GList[rawI], GList[rawJ], edge_match=em): duplicatedCase.add(rawJ) notDuplicatedCase = set(range(len(GList))) notDuplicatedCase -= duplicatedCase notDuplicatedCase = list(notDuplicatedCase) notDuplicatedCase.sort() selectedGList = [ i for rawI, i in enumerate(GList) if rawI in notDuplicatedCase ] pickle.dump( selectedGList, open(outdir_pickle + '/{}nodes_template.pickle'.format(n), 'w')) outdir = 'MotifCountingTemplates/{}nodes_template'.format(n) if not os.path.exists(outdir): os.makedirs(outdir) templates = pickle.load( open(outdir_pickle + '/{}nodes_template.pickle'.format(n), 'r')) for rawI, i in enumerate(templates): # extracted undirected graph extractedUndir = [] for eachEdge in i.edges(data=True): if 'color' in eachEdge[2]: extractedUndir.append(eachEdge) extractedUndirGraph = nx.Graph() extractedUndirGraph.add_edges_from(extractedUndir) extractedDir = [] for eachEdge in i.edges(data=True): if 'color' not in eachEdge[2]: extractedDir.append((eachEdge[0], eachEdge[1])) ### hereG = nx.MultiDiGraph() for j in extractedUndirGraph.edges(): hereG.add_edge(j[0], j[1], color='black', arrowhead='none') for j in extractedDir: hereG.add_edge(*j, color='red') graphOutName = outdir + '/{}nodes_{}.pdf'.format(n, rawI) dotName = outdir + '/{}nodes_{}.dot'.format(n, rawI) nx.nx_agraph.write_dot(hereG, dotName) graph = pydot.graph_from_dot_file(dotName) if type(graph) == type([]): graph[0].write_pdf(graphOutName, prog='neato') else: graph.write_pdf(graphOutName, prog='neato')
def parse_pajek(lines): """Parse Pajek format graph from string or iterable. Parameters ---------- lines : string or iterable Data in Pajek format. Returns ------- G : NetworkX graph See Also -------- read_pajek() """ import shlex # multigraph=False if is_string_like(lines): lines = iter(lines.split('\n')) lines = iter([line.rstrip('\n') for line in lines]) G = nx.MultiDiGraph() # are multiedges allowed in Pajek? assume yes while lines: try: l = next(lines) except: #EOF break if l.lower().startswith("*network"): try: label, name = l.split() except ValueError: # Line was not of the form: *network NAME pass else: G.graph['name'] = name elif l.lower().startswith("*vertices"): nodelabels = {} l, nnodes = l.split() for i in range(int(nnodes)): splitline = shlex.split(str(next(lines))) id, label = splitline[0:2] G.add_node(label) nodelabels[id] = label G.node[label] = {'id': id} try: x, y, shape = splitline[2:5] G.node[label].update({ 'x': float(x), 'y': float(y), 'shape': shape }) except: pass extra_attr = zip(splitline[5::2], splitline[6::2]) G.node[label].update(extra_attr) elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"): if l.lower().startswith("*edge"): # switch from multidigraph to multigraph G = nx.MultiGraph(G) if l.lower().startswith("*arcs"): # switch to directed with multiple arcs for each existing edge G = G.to_directed() for l in lines: splitline = shlex.split(str(l)) if len(splitline) < 2: continue ui, vi = splitline[0:2] u = nodelabels.get(ui, ui) v = nodelabels.get(vi, vi) # parse the data attached to this edge and put in a dictionary edge_data = {} try: # there should always be a single value on the edge? w = splitline[2:3] edge_data.update({'weight': float(w[0])}) except: pass # if there isn't, just assign a 1 # edge_data.update({'value':1}) extra_attr = zip(splitline[3::2], splitline[4::2]) edge_data.update(extra_attr) # if G.has_edge(u,v): # multigraph=True G.add_edge(u, v, **edge_data) return G
def __init__( self, arrival_rate=1, success_prob=lambda: 1, max_agents=1000, graph=True, plots=False, plot_time=0.5, selfMatch=False, ): """ Generate new market object Arguments ----------- arrival_rate: int or f()->int rate of arrival in the Market parameter passed in a statistical distribution when advancing time success_prob: int or f()->int probability of match success parameter passed in a match probability function max_agents: int maximum number of agents over all periods in Market graph: bool store network of potential matches in a NetworkX DiGraph object necessary for graph plotting plots: bool output network graph plots 3 times per update plot_time: float time per frame on plot graph: bool if True, market maintains a networkX DiGraph object nodes are agents, directed edges are compatibility edge weight is expected match utility so match utility * match success probability max """ self.Agents = list() self.arrival_rate = arrival_rate self.acceptable_prob = success_prob self.max_agents = max_agents self.perished = list() self.matched = list() self.matched_dict = dict() self.time = 0 self.welfare = 0 self.total_agents = 0 self.loss = 0 self.has_graph = graph self.plots_on = plots self.selfMatch = selfMatch if self.has_graph: if self.selfMatch: self.Graph = nx.MultiDiGraph() else: self.Graph = nx.DiGraph() if self.plots_on: if CANT_PLOT: print("WARNING: Cant plot dur to qt5agg backend import error") self.plots_on = False plt.ion() # Interactive plotting self.has_graph = True self.Graph = nx.DiGraph() self.graph_labels = dict() self.color_map = dict() self.graph_colors = list() self.plot_time = plot_time self.graph_pos = dict()
def test_astar_multigraph(self): G=nx.MultiDiGraph(self.XG) assert_raises((TypeError,nx.NetworkXError), nx.astar_path, [G,'s','v']) assert_raises((TypeError,nx.NetworkXError), nx.astar_path_length, [G,'s','v'])
def __init__(self, conf_file): """Initialize transaction network from parameter files. :param conf_file: JSON file as configurations """ self.g = nx.MultiDiGraph() # Transaction graph object self.num_accounts = 0 # Number of total accounts self.degrees = dict() # Degree distribution self.hubs = list() # Hub vertices self.subject_candidates = set() self.attr_names = list() # Additional account attribute names with open(conf_file, "r") as rf: self.conf = json.load(rf) general_conf = self.conf["general"] # Set random seed seed = general_conf.get("random_seed") self.seed = seed if seed is None else int(seed) np.random.seed(self.seed) random.seed(self.seed) self.total_steps = parse_int(general_conf["total_steps"]) # Set default amounts, steps and model ID default_conf = self.conf["default"] self.default_min_amount = parse_amount(default_conf.get("min_amount")) self.default_max_amount = parse_amount(default_conf.get("max_amount")) self.default_min_balance = parse_amount( default_conf.get("min_balance")) self.default_max_balance = parse_amount( default_conf.get("max_balance")) self.default_start_step = parse_int(default_conf.get("start_step")) self.default_end_step = parse_int(default_conf.get("end_step")) self.default_start_range = parse_int(default_conf.get("start_range")) self.default_end_range = parse_int(default_conf.get("end_range")) self.default_model = parse_int(default_conf.get("transaction_model")) # Get input file names and properties input_conf = self.conf["input"] self.input_dir = input_conf[ "directory"] # Directory name of input files self.account_file = input_conf["accounts"] # Account list file self.alert_file = input_conf["alert_patterns"] self.degree_file = input_conf["degree"] self.type_file = input_conf["transaction_type"] self.is_aggregated = input_conf["is_aggregated_accounts"] # Get output file names output_conf = self.conf[ "temporal"] # The destination directory is temporal self.output_dir = output_conf["directory"] self.out_tx_file = output_conf["transactions"] self.out_account_file = output_conf["accounts"] self.out_alert_file = output_conf["alert_members"] # Other properties for the transaction graph generator other_conf = self.conf["graph_generator"] self.degree_threshold = parse_int(other_conf["degree_threshold"]) highrisk_countries_str = other_conf.get("high_risk_countries", "") highrisk_business_str = other_conf.get("high_risk_business", "") self.highrisk_countries = set(highrisk_countries_str.split(",")) self.highrisk_business = set(highrisk_business_str.split(",")) self.tx_id = 0 # Transaction ID self.alert_id = 0 # Alert ID from the alert parameter file self.alert_groups = dict() # Alert ID and alert transaction subgraph self.alert_types = { "fan_out": 1, "fan_in": 2, "cycle": 3, "bipartite": 4, "stack": 5, "dense": 6 } # Pattern name and model ID def get_types(type_csv): tx_types = list() with open(type_csv, "r") as _rf: reader = csv.reader(_rf) next(reader) for row in reader: if row[0].startswith("#"): continue ttype = row[0] tx_types.extend([ttype] * int(row[1])) return tx_types self.tx_types = get_types(os.path.join(self.input_dir, self.type_file))
def add_alert_pattern(self, is_fraud, pattern_type, accounts, schedule_id=1, individual_amount=None, aggregated_amount=None, transaction_freq=None, amount_difference=None, period=None, amount_rounded=None, orig_country=False, bene_country=False, orig_business=False, bene_business=False): """Add an AML rule transaction set :param is_fraud: Whether the transaction set is fraud or alert :param pattern_type: Pattern type ("fan_in", "fan_out", "dense", "mixed" or "stack") :param accounts: Number of transaction members (accounts) :param schedule_id: AML pattern transaction schedule model ID :param individual_amount: Minimum individual amount :param aggregated_amount: Minimum aggregated amount :param transaction_freq: Minimum transaction frequency :param amount_difference: Proportion of maximum transaction difference :param period: Lookback period (days) :param amount_rounded: Proportion of rounded amounts :param orig_country: Whether the originator country is suspicious :param bene_country: Whether the beneficiary country is suspicious :param orig_business: Whether the originator business type is suspicious :param bene_business: Whether the beneficiary business type is suspicious :return: """ subject, members = self.get_alert_members(accounts, is_fraud) # Prepare parameters if individual_amount is None: min_amount = self.default_min_amount max_amount = self.default_max_amount else: min_amount = individual_amount max_amount = individual_amount * 2 if aggregated_amount is None: aggregated_amount = 0 start_day = 0 end_day = self.total_steps # Create subgraph structure with transaction attributes modelID = self.alert_types[pattern_type] # alert model ID sub_g = nx.MultiDiGraph( modelID=modelID, reason=pattern_type, scheduleID=schedule_id, start=start_day, end=end_day) # Transaction subgraph for an alert num_members = len(members) # Number of accounts total_amount = 0 transaction_count = 0 if pattern_type == "fan_in": # fan_in pattern (multiple accounts --> single (subject) account) src_list = [n for n in members if n != subject] dst = subject if transaction_freq is None: transaction_freq = num_members - 1 for src in itertools.cycle( src_list ): # Generate transactions for the specified number amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count >= transaction_freq and total_amount >= aggregated_amount: break elif pattern_type == "fan_out": # fan_out pattern (single (subject) account --> multiple accounts) src = subject dst_list = [n for n in members if n != subject] if transaction_freq is None: transaction_freq = num_members - 1 for dst in itertools.cycle( dst_list ): # Generate transactions for the specified number amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count >= transaction_freq and total_amount >= aggregated_amount: break elif pattern_type == "bipartite": # bipartite (sender accounts --> all-to-all --> receiver accounts) src_list = members[:( num_members // 2)] # The former half members are sender accounts dst_list = members[( num_members // 2):] # The latter half members are receiver accounts if transaction_freq is None: # Number of transactions transaction_freq = len(src_list) * len(dst_list) for src, dst in itertools.product( src_list, dst_list): # All-to-all transactions amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count > transaction_freq and total_amount >= aggregated_amount: break elif pattern_type == "mixed": # fan_out -> bipartite -> fan_in src = members[0] # Source account dst = members[num_members - 1] # Destination account src_list = members[1:(num_members // 2)] # First intermediate accounts dst_list = members[(num_members // 2):num_members - 1] # Second intermediate accounts if transaction_freq is None: transaction_freq = len(src_list) + len( dst_list) + len(src_list) * len(dst_list) for _dst in src_list: # Fan-out amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, _dst, amount=amount, date=date) self.g.add_edge(src, _dst, amount=amount, date=date) transaction_count += 1 total_amount += amount for _src, _dst in itertools.product(src_list, dst_list): # Bipartite amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(_src, _dst, amount=amount, date=date) self.g.add_edge(_src, _dst, amount=amount, date=date) transaction_count += 1 total_amount += amount for _src in itertools.cycle(dst_list): # Fan-in amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(_src, dst, amount=amount, date=date) self.g.add_edge(_src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count >= transaction_freq and total_amount >= aggregated_amount: break elif pattern_type == "stack": # two dense bipartite layers src_list = members[:num_members // 3] # First 1/3 of members are source accounts mid_list = members[ num_members // 3:num_members * 2 // 3] # Second 1/3 of members are intermediate accounts dst_list = members[ num_members * 2 // 3:] # Last 1/3 of members are destination accounts if transaction_freq is None: # Total number of transactions transaction_freq = len(src_list) * len(mid_list) + len( mid_list) * len(dst_list) for src, dst in itertools.product( src_list, mid_list): # all-to-all transactions amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count > transaction_freq and total_amount >= aggregated_amount: break for src, dst in itertools.product( mid_list, dst_list): # all-to-all transactions amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) transaction_count += 1 total_amount += amount if transaction_count > transaction_freq and total_amount >= aggregated_amount: break elif pattern_type == "dense": # Dense alert accounts (all-to-all) dsts = [n for n in members if n != subject] for dst in dsts: amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(subject, dst, amount=amount, date=date) self.g.add_edge(subject, dst, amount=amount, date=date) for dst in dsts: nb1 = random.choice(dsts) if dst != nb1: amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(dst, nb1, amount=amount, date=date) self.g.add_edge(dst, nb1, amount=amount, date=date) nb2 = random.choice(dsts) if dst != nb2: amount = random.uniform(min_amount, max_amount) date = random.randrange(start_day, end_day) sub_g.add_edge(nb2, dst, amount=amount, date=date) self.g.add_edge(nb2, dst, amount=amount, date=date) elif pattern_type == "cycle": # Cycle transactions subject_index = list(members).index( subject) # Index of member list indicates the subject account num = len(members) # Number of involved accounts amount = random.uniform(min_amount, max_amount) # Transaction amount dates = sorted([ random.randrange(start_day, end_day) for _ in range(num) ]) # Transaction date (in order) for i in range(num): src_i = (subject_index + i) % num dst_i = (src_i + 1) % num src = members[src_i] # Source account ID dst = members[dst_i] # Destination account ID date = dates[i] # Transaction date (timestamp) sub_g.add_edge(src, dst, amount=amount, date=date) self.g.add_edge(src, dst, amount=amount, date=date) else: print("Warning: unknown pattern type: %s" % pattern_type) return # Add the generated transaction edges to whole transaction graph sub_g.graph["subject"] = subject if is_fraud else None self.alert_groups[self.alert_id] = sub_g # Add the fraud flag to the subject account vertex if is_fraud: self.g.node[subject]["isFraud"] = True # for n in sub_g.nodes(): # self.g.node[n]["isFraud"] = True self.alert_id += 1
def setup(self): self.G = nx.path_graph(9, nx.MultiDiGraph()) self.G.add_edge(1, 2, key=3, foo='bar') self.eview = nx.reportviews.InMultiEdgeView
class DAG(object): __preprocessedInput = [] #Original input as a sequence of integers __dic = {} #Dictionary for correspondence of integers to original chars (only when charSeq = 'c','s') __DAG = {} #Adjacency list of DAG __DAGGraph = nx.MultiDiGraph() __DAGStrings = {}#Strings corresponding to each node in DAG __concatenatedDAG = [] #Concatenated DAG nodes with seperatorInts __concatenatedNTs = [] #For each DAG node, alongside the concatenated DAG __separatorInts = set([]) #Used for seperating DAG nodes in the concatenatedDAG __separatorIntsIndices = set([]) #Indices of separatorInts in the concatenated DAG __nextNewInt = 0 #Used for storing ints of repeat symbols and separators in odd numbers __quietLog = False #if true, disables logging __iterations = 0 def __init__(self, inputFile, loadDAGFlag, chFlag = SequenceType.Character, noNewLineFlag = True): if loadDAGFlag: self.__initFromDAG(inputFile) else: self.__initFromStrings(inputFile, chFlag, noNewLineFlag) #Initializes (an unoptimized) DAG from inputFile. charSeq tells if inputFile is a char sequence, int sequence or space-separated sequence def __initFromStrings(self, inputFile, chFlag = SequenceType.Character, noNewLineFlag = True): (self.__preprocessedInput, self.__dic) = self.__preprocessInput(inputFile, charSeq = chFlag, noNewLineFlag = noNewLineFlag) allLetters = set(map(int,self.__preprocessedInput.split())) #Setting odd and even values for __nextNewInt and __nextNewContextInt self.__nextNewInt = max(allLetters)+1 if self.__nextNewInt % 2 == 0: self.__nextNewInt += 1 #Initializing the concatenated DAG for line in self.__preprocessedInput.split('\n'): line = line.rstrip('\n') self.__concatenatedDAG.extend(map(int,line.split())) self.__concatenatedDAG.append(self.__nextNewInt) self.__concatenatedNTs.extend(0 for j in range(len(map(int,line.split())))) self.__concatenatedNTs.append(self.__nextNewInt) self.__separatorInts.add(self.__nextNewInt) self.__separatorIntsIndices.add(len(self.__concatenatedDAG)-1) self.__nextNewInt += 2 #Loads the DAG from an external file (The file should start from 'N0' line, without cost logs) def __initFromDAG(self, inputFile): textFile = inputFile.read().splitlines() maxInt = -1 for line in textFile: nt = int(line.split(' -> ')[0][1:]) self.__dic[nt] = nt rhs = line.split(' -> ')[1].split() for w in rhs: # sys.stderr.write(w + "\n") try: word = int(w) except: word = int(w[1:]) if maxInt < word: maxInt = word self.__dic[word] = word self.__concatenatedDAG.append(word) self.__concatenatedNTs.append(nt) self.__concatenatedDAG.append(-1) self.__concatenatedNTs.append(-1) self.__separatorIntsIndices.add(len(self.__concatenatedDAG) - 1) self.__nextNewInt = maxInt + 1 for i in self.__separatorIntsIndices: self.__concatenatedDAG[i] = self.__nextNewInt self.__concatenatedNTs[i] = self.__nextNewInt self.__separatorInts.add(self.__nextNewInt) self.__nextNewInt += 1 # wordDict = {} # counterDict = {} # counter = 0 # textFile = inputFile.read().splitlines() # tmpnode = [] # for line in textFile: # # if len(line.split(' -> ')) < 2: # # tmpnode = ['\n'] + line.split(' ') # # newnode = [] # # for w in tmpnode: # # if w not in counterDict: # # wordDict[counter] = w # # counterDict[w] = counter # # counter += 1 # # newnode.append(counterDict[w]) # # self.__DAG[newNt] += newnode # # continue # # else: # nt = int(line.split(' -> ')[0][1:]) # if counter % 2 == 0: # if counter != 0: # counter += 1 # if nt not in counterDict: # wordDict[counter] = nt # counterDict[nt] = counter # counter += 1 # newNt = counterDict[nt] # node = line.split(' -> ')[1].split(' ') # newnode = [] # for w in node: # if w[0] == 'N': # if w not in counterDict: # wordDict[counter] = w[1:] # counterDict[w[1:]] = counter # counter += 1 # newnode.append(counterDict[w[1:]]) # else: # if w not in counterDict: # wordDict[counter] = w # counterDict[w] = counter # counter += 1 # newnode.append(counterDict[w]) # if newNt == 0: # if newNt in self.__DAG: # self.__DAG[newNt].append(newnode) # else: # self.__DAG[newNt] = [newnode] # else: # self.__DAG[newNt] = newnode # self.__dic = wordDict # self.__nextNewInt = counter # if self.__nextNewInt % 2 == 0: # self.__nextNewContextInt = self.__nextNewInt # self.__nextNewInt += 1 # else: # self.__nextNewContextInt = self.__nextNewInt + 1 # for nt in self.__DAG: # self.__concatenatedDAG.extend(self.__DAG[nt]) # self.__concatenatedDAG.append(self.__nextNewInt) # self.__concatenatedNTs.extend(nt for j in range(len(self.__DAG[nt]))) # self.__concatenatedNTs.append(self.__nextNewInt) # self.__separatorInts.add(self.__nextNewInt) # self.__separatorIntsIndices.add(len(self.__concatenatedDAG)-1) # self.__nextNewInt += 2 # print self.__DAG # print self.__dic self.__createAdjacencyList() # print 'self dag' # print self.__DAG self.__createDAGGraph() # print 'self graph' # print self.__DAGGraph # print self.__DAGGraph.nodes() # print self.__DAGGraph.edges() self.__nodeStringsGenerate() # print 'self strings' # print self.__DAGStrings #...........Main G-Lexis Algorithm Functions........ def GLexis(self, quiet, normalRepeatType, costFunction): self.__quietLog = quiet while True: #Main loop #Logging DAG Cost self.__logViaFlag(LogFlag.ConcatenationCostLog) self.__logViaFlag(LogFlag.EdgeCostLog) #Extracting Maximum-Gain Repeat (maximumRepeatGainValue, selectedRepeatOccs) = self.__retreiveMaximumGainRepeat(normalRepeatType, CostFunction.EdgeCost) if maximumRepeatGainValue == -1: break #No repeats, hence terminate self.__logMessage('maxR ' + str(maximumRepeatGainValue) + ' : ' + str(self.__concatenatedDAG[selectedRepeatOccs[1][0]:selectedRepeatOccs[1][0]+selectedRepeatOccs[0]]) + '\n') if maximumRepeatGainValue > 0: odd = True self.__replaceRepeat(selectedRepeatOccs) #Replacing the chosen repeat self.__iterations += 1 self.__logMessage('---------------') self.__logMessage('Number of Iterations: ' + str(self.__iterations)) self.__createAdjacencyList() self.__createDAGGraph() self.__nodeStringsGenerate() #Returns the cost of the DAG according to the selected costFunction def DAGCost(self, costFunction): if costFunction == CostFunction.ConcatenationCost: return len(self.__concatenatedDAG)-2*len(self.__separatorInts) if costFunction == CostFunction.EdgeCost: return len(self.__concatenatedDAG)-len(self.__separatorInts) #Replaces a repeat's occurrences with a new symbol and creates a new node in the DAG def __replaceRepeat(self, input): (repeatLength, (repeatOccs)) = input repeat = self.__concatenatedDAG[repeatOccs[0]:repeatOccs[0]+repeatLength] newTmpConcatenatedDAG = [] newTmpConcatenatedNTs = [] prevIndex = 0 for i in repeatOccs: newTmpConcatenatedDAG += self.__concatenatedDAG[prevIndex:i] + [self.__nextNewInt] newTmpConcatenatedNTs += self.__concatenatedNTs[prevIndex:i] + [self.__concatenatedNTs[i]] prevIndex = i+repeatLength self.__concatenatedDAG = newTmpConcatenatedDAG + self.__concatenatedDAG[prevIndex:] self.__concatenatedNTs = newTmpConcatenatedNTs + self.__concatenatedNTs[prevIndex:] self.__concatenatedDAG = self.__concatenatedDAG + repeat self.__concatenatedNTs = self.__concatenatedNTs + [self.__nextNewInt for j in range(repeatLength)] self.__logMessage('Added Node: ' + str(self.__nextNewInt)) self.__nextNewInt += 2 self.__concatenatedDAG = self.__concatenatedDAG + [self.__nextNewInt] self.__concatenatedNTs = self.__concatenatedNTs + [self.__nextNewInt] self.__separatorInts.add(self.__nextNewInt) self.__separatorIntsIndices = set([]) for i in range(len(self.__concatenatedDAG)): if self.__concatenatedDAG[i] in self.__separatorInts: self.__separatorIntsIndices.add(i) self.__nextNewInt += 2 #Retrieves the maximum-gain repeat (randomizes within ties). #Output is a tuple: "(RepeatGain, (RepeatLength, (RepeatOccurrences)))" #1st entry of output is the maximum repeat gain value #2nd entry of output is a tuple of form: "(selectedRepeatLength, selectedRepeatOccsList)" def __retreiveMaximumGainRepeat(self, repeatClass, costFunction): repeats = self.__extractRepeats(repeatClass) maxRepeatGain = 0 candidateRepeats = [] for r in repeats: #Extracting maximum repeat repeatStats = r.split() repeatOccs = self.__extractNonoverlappingRepeatOccurrences(int(repeatStats[0]),map(int,repeatStats[2][1:-1].split(','))) if maxRepeatGain < self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction): maxRepeatGain = self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction) candidateRepeats = [(int(repeatStats[0]),len(repeatOccs),repeatOccs)] else: if maxRepeatGain > 0 and maxRepeatGain == self.__repeatGain(int(repeatStats[0]), len(repeatOccs), costFunction): candidateRepeats.append((int(repeatStats[0]),len(repeatOccs),repeatOccs)) if(len(candidateRepeats) == 0): return (-1, (0, [])) #Randomizing between candidates with maximum gain #selectedRepeatStats = candidateRepeats[random.randrange(len(candidateRepeats))] selectedRepeatStats = candidateRepeats[0] selectedRepeatLength = selectedRepeatStats[0] selectedRepeatOccs = sorted(selectedRepeatStats[2]) return (maxRepeatGain, (selectedRepeatLength, selectedRepeatOccs)) #Returns the repeat gain, according to the chosen cost function def __repeatGain(self, repeatLength, repeatOccsLength, costFunction): # if costFunction == CostFunction.ConcatenationCost: return (repeatLength-1)*(repeatOccsLength-1) # if costFunction == CostFunction.EdgeCost: # return (repeatLength-1)*(repeatOccsLength-1)-1 #Extracts the designated class of repeats (Assumes ./repeats binary being in the same directory) #Output is a string, each line containing: "RepeatLength NumberOfOccurrence (CommaSeparatedOccurrenceIndices)" def __extractRepeats(self, repeatClass): process = subprocess.Popen(["./repeats1/repeats11", "-i", "-r"+repeatClass, "-n2", "-psol"],stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT) process.stdin.write(' '.join(map(str,self.__concatenatedDAG))) text_file = '' while process.poll() is None: output = process.communicate()[0].rstrip() text_file += output process.wait() repeats=[] firstLine = False for line in text_file.splitlines(): if firstLine == False: firstLine = True continue repeats.append(line.rstrip('\n')) return repeats #Extracts the non-overlapping occurrences of a repeat from a list of occurrences (scans from left to right) def __extractNonoverlappingRepeatOccurrences(self, repeatLength, occurrencesList): nonoverlappingIndices = [] for i in range(len(occurrencesList)): if len(nonoverlappingIndices) > 0: if (nonoverlappingIndices[-1] + repeatLength <= occurrencesList[i]):#Not already covered nonoverlappingIndices += [occurrencesList[i]] else: nonoverlappingIndices += [occurrencesList[i]] return nonoverlappingIndices #Creates the adjacency list def __createAdjacencyList(self): separatorPassed = False for i in range(len(self.__concatenatedDAG)): if i not in self.__separatorIntsIndices: node = self.__concatenatedNTs[i] if separatorPassed and node == 0: self.__DAG[node].append([]) separatorPassed = False if node not in self.__DAG: if node == 0:#Target node self.__DAG[node] = [[self.__concatenatedDAG[i]]] else: self.__DAG[node] = [self.__concatenatedDAG[i]] else: if node == 0:#Target node self.__DAG[node][-1].append(self.__concatenatedDAG[i]) else: self.__DAG[node].append(self.__concatenatedDAG[i]) else: separatorPassed = True #Creates the DAG graph object (adjacency list should already be processed) def __createDAGGraph(self): for node in self.__DAG: self.__DAGGraph.add_node(node) if node == 0: for l in self.__DAG[node]: for n in l: self.__DAGGraph.add_node(n) self.__DAGGraph.add_edge(n, node) else: for n in self.__DAG[node]: self.__DAGGraph.add_node(n) self.__DAGGraph.add_edge(n, node) #Stores the strings corresponding to each DAG node def __nodeStringsGenerate(self): for node in nx.nodes(self.__DAGGraph): if self.__DAGGraph.in_degree(node) == 0: # if self.__dic == {}: self.__DAGStrings[node] = str(node) # else: # self.__DAGStrings[node] = str(self.__dic[node]) else: if node == 0: self.__DAGStrings[node] = [] else: self.__DAGStrings[node] = '' self. __nodeStringsHelper(0) # Helper recursive function def __nodeStringsHelper(self, n): if self.__DAGStrings[n] != [] and self.__DAGStrings[n] != '': return if n == 0: for l in self.__DAG[n]: self.__DAGStrings[n].append('') for i in range(len(l)): subnode = l[i] self.__nodeStringsHelper(subnode) # if self.__dic == {}: self.__DAGStrings[n][-1] += ' ' + self.__DAGStrings[subnode] # else: # self.__DAGStrings[n][-1] += self.__DAGStrings[subnode] + ' ' else: for i in range(len(self.__DAG[n])): subnode = self.__DAG[n][i] self.__nodeStringsHelper(subnode) # if self.__dic == {}: self.__DAGStrings[n] += ' ' + self.__DAGStrings[subnode] # else: # self.__DAGStrings[n] += self.__DAGStrings[subnode] + ' ' #Returns node's corresponding string def __getNodeString(self, n): if n == 0: result = [] for l in self.__DAGStrings[n]: result.append(' '.join(l.split())) return result return ' '.join(self.__DAGStrings[n].split()) # ...........Path-Centrality Functions........ #Returns a list of strings, corresponding to the nodes removed from DAG, according to greedy core identification algorithm, based on the threshold of edge removal tau def greedyCoreID_ByTau(self, tau): numberOfUpwardPaths = {} numberOfDownwardPaths = {} sources = [] targets = [] for node in nx.nodes(self.__DAGGraph): if self.__DAGGraph.in_degree(node) == 0: sources.append(node) if self.__DAGGraph.out_degree(node) == 0: targets.append(node) numberOfUpwardPaths[node] = 0 numberOfDownwardPaths[node] = 0 self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths) self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths) for t in targets: numberOfUpwardPaths[t] = 0 for s in sources: numberOfDownwardPaths[s] = 0 number_of_initial_paths = numberOfDownwardPaths[0] number_of_current_paths = numberOfDownwardPaths[0] listOfCentralNodes = [] centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths) topCentralNodeInfo = max(centralities, key=lambda x:x[1]) allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]] while topCentralNodeInfo[1] > 0 and float(number_of_current_paths)/float(number_of_initial_paths) > 1-tau:#Node with positive centrality exists for nodeToBeRemoved in allMaxes: nodeToBeRemoved = nodeToBeRemoved[0] self.__DAGGraph.remove_node(nodeToBeRemoved) listOfCentralNodes.append(nodeToBeRemoved) numberOfUpwardPaths = {} numberOfDownwardPaths = {} for node in nx.nodes(self.__DAGGraph): numberOfUpwardPaths[node] = 0 numberOfDownwardPaths[node] = 0 self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths) self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths) for t in targets: numberOfUpwardPaths[t] = 0 for s in sources: numberOfDownwardPaths[s] = 0 centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths) topCentralNodeInfo = max(centralities, key=lambda x: x[1]) allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]] number_of_current_paths = numberOfDownwardPaths[0] self.__DAGGraph = nx.MultiGraph() self.__createDAGGraph()#Reconstructing the DAG graph core = [] for i in range(len(listOfCentralNodes)): core.append(self.__getNodeString(listOfCentralNodes[i])) return core # Returns a list of strings, corresponding to the nodes removed from DAG, according to greedy core identification algorithm, based on the cardinality of the extracted set def greedyCoreID_ByCardinality(self, k): numberOfUpwardPaths = {} numberOfDownwardPaths = {} sources = [] targets = [] for node in nx.nodes(self.__DAGGraph): if self.__DAGGraph.in_degree(node) == 0: sources.append(node) if self.__DAGGraph.out_degree(node) == 0: targets.append(node) numberOfUpwardPaths[node] = 0 numberOfDownwardPaths[node] = 0 self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths) self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths) for t in targets: numberOfUpwardPaths[t] = 0 for s in sources: numberOfDownwardPaths[s] = 0 number_of_initial_paths = numberOfDownwardPaths[0] number_of_current_paths = numberOfDownwardPaths[0] listOfCentralNodes = [] centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths) topCentralNodeInfo = max(centralities, key=lambda x: x[1]) allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]] while topCentralNodeInfo[1] > 0 and len(listOfCentralNodes) <= k: # Node with positive centrality exists for nodeToBeRemoved in allMaxes: nodeToBeRemoved = nodeToBeRemoved[0] self.__DAGGraph.remove_node(nodeToBeRemoved) listOfCentralNodes.append(nodeToBeRemoved) numberOfUpwardPaths = {} numberOfDownwardPaths = {} for node in nx.nodes(self.__DAGGraph): numberOfUpwardPaths[node] = 0 numberOfDownwardPaths[node] = 0 self.__calculateNumberOfUpwardPaths(sources, targets, numberOfUpwardPaths) self.__calculateNumberOfDownwardPaths(sources, targets, numberOfDownwardPaths) for t in targets: numberOfUpwardPaths[t] = 0 for s in sources: numberOfDownwardPaths[s] = 0 centralities = self.__calculateCentralities(numberOfUpwardPaths, numberOfDownwardPaths) topCentralNodeInfo = max(centralities, key=lambda x: x[1]) allMaxes = [k for k in centralities if k[1] == topCentralNodeInfo[1]] number_of_current_paths = numberOfDownwardPaths[0] self.__DAGGraph = nx.MultiGraph() self.__createDAGGraph() # Reconstructing the DAG graph core = [] for i in range(len(listOfCentralNodes)): core.append(self.__getNodeString(listOfCentralNodes[i])) return core #Calculates the centralities for all nodes def __calculateCentralities(self, numberOfUpwardPaths, numberOfDownwardPaths): result = [] for node in nx.nodes(self.__DAGGraph): result.append((node, numberOfUpwardPaths[node] * numberOfDownwardPaths[node])) return result #Calculates the number of Upward paths for all nodes def __calculateNumberOfUpwardPaths(self, sources, targets, numberOfUpwardPaths): for n in sources: self.__dfsUpward(n, sources, targets, numberOfUpwardPaths) # Helper recursive function def __dfsUpward(self, n, sources, targets, numberOfUpwardPaths): if self.__DAGGraph.out_degree(n) == 0: numberOfUpwardPaths[n] = 1 return elif numberOfUpwardPaths[n] > 0: return else: for o in self.__DAGGraph.out_edges(n): self.__dfsUpward(o[1], sources, targets, numberOfUpwardPaths) numberOfUpwardPaths[n] += numberOfUpwardPaths[o[1]] # Calculates the number of Downward paths for all nodes def __calculateNumberOfDownwardPaths(self, sources, targets, numberOfDownwardPaths): for n in targets: self.__dfsDownward(n, sources, targets, numberOfDownwardPaths) # Helper recursive function def __dfsDownward(self, n, sources, targets, numberOfDownwardPaths): if self.__DAGGraph.in_degree(n) == 0: numberOfDownwardPaths[n] = 1 return elif numberOfDownwardPaths[n] > 0: return else: for o in self.__DAGGraph.in_edges(n): self.__dfsDownward(o[0], sources, targets, numberOfDownwardPaths) numberOfDownwardPaths[n] += numberOfDownwardPaths[o[0]] # ...........Printing Functions........ # Prints the DAG, optionally in integer form if intDAGPrint==True def printDAG(self, intDAGPrint): self.__logMessage('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost))) self.__logMessage('DAGCost(Edges):' + str(self.DAGCost(CostFunction.EdgeCost))) DAG = self.__concatenatedDAG # print 'dag' # print DAG NTs = self.__concatenatedNTs # print 'nts' # print NTs separatorInts = self.__separatorInts Dic = self.__dic nodes = {} ntDic = {} counter = 1 NTsSorted = set([]) for i in range(len(NTs)): if NTs[i] not in ntDic and NTs[i] not in separatorInts: NTsSorted.add(NTs[i]) # ntDic[NTs[i]] = 'N'+str(counter) # nodes['N'+str(counter)] = '' ntDic[NTs[i]] = 'N' + str(NTs[i]) nodes['N' + str(NTs[i])] = '' counter += 1 for i in range(len(DAG)): if DAG[i] not in NTsSorted: if DAG[i] not in separatorInts: if not intDAGPrint: try: nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(Dic[DAG[i]]) except: print (DAG[i], NTs[i]) raise else: nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(DAG[i]) else: nodes[ntDic[NTs[i - 1]]] = str(nodes[ntDic[NTs[i - 1]]]) + ' ||' else: if not intDAGPrint: try: nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(ntDic[DAG[i]]) except: print (DAG[i], NTs[i]) raise else: nodes[ntDic[NTs[i]]] = str(nodes[ntDic[NTs[i]]]) + ' ' + str(ntDic[DAG[i]]) NTsSorted = sorted(list(NTsSorted)) nodeCounter = 0 for nt in NTsSorted: if intDAGPrint: subnodes = nodes[ntDic[nt]].rstrip(' ||').split(' ||') for s in subnodes: print (ntDic[nt] + ' ->' + s) else: subnodes = nodes[ntDic[nt]].rstrip(' ||').split(' ||') for s in subnodes: print(ntDic[nt] + ' -> ' + s) nodeCounter += 1 # Log via flags def __logViaFlag(self, flag): if not self.__quietLog: if flag == LogFlag.ConcatenationCostLog: sys.stderr.write('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost)) + '\n') print(str('DAGCost(Concats): ' + str(self.DAGCost(CostFunction.ConcatenationCost)))) if flag == LogFlag.EdgeCostLog: sys.stderr.write('DAGCost(Edges): ' + str(self.DAGCost(CostFunction.EdgeCost)) + '\n') print(str('DAGCost(Edges): ' + str(self.DAGCost(CostFunction.EdgeCost)))) # Log custom message def __logMessage(self, message): if not self.__quietLog: sys.stderr.write(message + '\n') print(str(message)) # ...........Utility Functions........ # Converts the input data into an integer sequence, returns the integer sequence and the dictionary for recovering orginal letters def __preprocessInput(self, inputFile, charSeq=SequenceType.Character, noNewLineFlag=True): if charSeq == SequenceType.Character: # Building an integer-spaced sequence from the input string letterDict = {} counterDict = {} i = 0 counter = 1 newContents = '' if noNewLineFlag: line = inputFile.read() for i in range(len(line)): if line[i] not in counterDict: letterDict[counter] = line[i] counterDict[line[i]] = counter counter += 1 newContents += str(counterDict[line[i]]) + ' ' else: for line in inputFile: line = line.rstrip('\n') for i in range(len(line)): if line[i] not in counterDict: letterDict[counter] = line[i] counterDict[line[i]] = counter counter += 1 newContents += str(counterDict[line[i]]) + ' ' newContents += '\n' return (newContents.rstrip('\n'), letterDict) if charSeq == SequenceType.Integer: # input is space seperated integers newContents = '' dict = {} for l in inputFile.read().splitlines(): line = l.split() for i in range(len(line)): if not isinstance(int(line[i]), int) or line[i] == ' ': raise ValueError('Input file is not in space-separated integer form.') else: dict[int(line[i])] = line[i] newContents += l + '\n' return (newContents.rstrip('\n'), dict) if charSeq == SequenceType.SpaceSeparated: # input is space-seperated words wordDict = {} counterDict = {} i = 0 counter = 1 newContents = '' for line in inputFile: line = line.rstrip('\n') for w in line.split(): if w not in counterDict: wordDict[counter] = w counterDict[w] = counter counter += 1 newContents += str(counterDict[w]) + ' ' newContents += '\n' return (newContents.rstrip('\n'), wordDict)
def setUp(self): self.G = nx.path_graph(9, create_using=nx.MultiDiGraph()) self.eview = nx.reportviews.InMultiEdgeView
def setUp(self): super(TopologyLayer2TestCase, self).setUp() self.model_id = 1 self.nav_graph = nx.MultiDiGraph() self.a = a = self._netbox_factory('a') self.b = b = self._netbox_factory('b') self.c = c = self._netbox_factory('c') self.d = d = self._netbox_factory('d') self.a1 = a1 = self._interface_factory('a1', a) self.a2 = a2 = self._interface_factory('a2', a) self.a3 = a3 = self._interface_factory('a3', a) self.b1 = b1 = self._interface_factory('b1', b) self.b2 = b2 = self._interface_factory('b2', b) self.c3 = c3 = self._interface_factory('c3', c) self.c4 = c4 = self._interface_factory('c4', c) self.d4 = d4 = self._interface_factory('d4', d) self._add_edge(self.nav_graph, a1.netbox, a1, b1.netbox, b1) self._add_edge(self.nav_graph, b1.netbox, b1, a1.netbox, a1) self._add_edge(self.nav_graph, a2.netbox, a2, b2.netbox, b2) self._add_edge(self.nav_graph, b2.netbox, b2, a2.netbox, a2) self._add_edge(self.nav_graph, a3.netbox, a3, c3.netbox, c3) self._add_edge(self.nav_graph, d4.netbox, d4, c4.netbox, c4) self.vlan__a1_b1 = a_vlan_between_a1_and_b1 = SwPortVlan( id=self._next_id(), interface=self.a1, vlan=Vlan(id=201, vlan=2) ) self.vlans = patch.object( topology, '_get_vlans_map_layer2', return_value=( { self.a1: [a_vlan_between_a1_and_b1], self.b1: [a_vlan_between_a1_and_b1], self.a2: [], self.b2: [], self.a3: [], self.c3: [], }, { self.a: {201: a_vlan_between_a1_and_b1}, self.b: {201: a_vlan_between_a1_and_b1}, self.c: {}, }, ), ) self.vlans.start() self.build_l2 = patch.object( vlan, 'build_layer2_graph', return_value=self.nav_graph ) self.build_l2.start() bar = vlan.build_layer2_graph() # foo = topology._get_vlans_map_layer2(bar) vlan_by_interfaces, vlan_by_netbox = topology._get_vlans_map_layer2( self.nav_graph ) self.netmap_graph = topology.build_netmap_layer2_graph( vlan.build_layer2_graph(), vlan_by_interfaces, vlan_by_netbox, None )
def parse_gml_lines(lines, label, destringizer): """Parse GML `lines` into a graph. """ def tokenize(): patterns = [ r'[A-Za-z][0-9A-Za-z_]*\b', # keys r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[Ee][+-]?[0-9]+)?', # reals r'[+-]?[0-9]+', # ints r'".*?"', # strings r'\[', # dict start r'\]', # dict end r'#.*$|\s+' # comments and whitespaces ] tokens = re.compile('|'.join('(' + pattern + ')' for pattern in patterns)) lineno = 0 for line in lines: length = len(line) pos = 0 while pos < length: match = tokens.match(line, pos) if match is not None: for i in range(len(patterns)): group = match.group(i + 1) if group is not None: if i == 0: # keys value = group.rstrip() elif i == 1: # reals value = float(group) elif i == 2: # ints value = int(group) else: value = group if i != 6: # comments and whitespaces yield (i, value, lineno + 1, pos + 1) pos += len(group) break else: raise NetworkXError('cannot tokenize %r at (%d, %d)' % (line[pos:], lineno + 1, pos + 1)) lineno += 1 yield (None, None, lineno + 1, 1) # EOF def unexpected(curr_token, expected): category, value, lineno, pos = curr_token raise NetworkXError( 'expected %s, found %s at (%d, %d)' % (expected, repr(value) if value is not None else 'EOF', lineno, pos)) def consume(curr_token, category, expected): if curr_token[0] == category: return next(tokens) unexpected(curr_token, expected) def parse_kv(curr_token): dct = defaultdict(list) while curr_token[0] == 0: # keys key = curr_token[1] curr_token = next(tokens) category = curr_token[0] if category == 1 or category == 2: # reals or ints value = curr_token[1] curr_token = next(tokens) elif category == 3: # strings value = unescape(curr_token[1][1:-1]) if destringizer: try: value = destringizer(value) except ValueError: pass curr_token = next(tokens) elif category == 4: # dict start curr_token, value = parse_dict(curr_token) else: unexpected(curr_token, "an int, float, string or '['") dct[key].append(value) dct = { key: (value if not isinstance(value, list) or len(value) != 1 else value[0]) for key, value in dct.items() } return curr_token, dct def parse_dict(curr_token): curr_token = consume(curr_token, 4, "'['") # dict start curr_token, dct = parse_kv(curr_token) curr_token = consume(curr_token, 5, "']'") # dict end return curr_token, dct def parse_graph(): curr_token, dct = parse_kv(next(tokens)) if curr_token[0] is not None: # EOF unexpected(curr_token, 'EOF') if 'graph' not in dct: raise NetworkXError('input contains no graph') graph = dct['graph'] if isinstance(graph, list): raise NetworkXError('input contains more than one graph') return graph tokens = tokenize() graph = parse_graph() directed = graph.pop('directed', False) multigraph = graph.pop('multigraph', False) if not multigraph: G = nx.DiGraph() if directed else nx.Graph() else: G = nx.MultiDiGraph() if directed else nx.MultiGraph() G.graph.update((key, value) for key, value in graph.items() if key != 'node' and key != 'edge') def pop_attr(dct, category, attr, i): try: return dct.pop(attr) except KeyError: raise NetworkXError("%s #%d has no '%s' attribute" % (category, i, attr)) nodes = graph.get('node', []) mapping = {} node_labels = set() for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]): id = pop_attr(node, 'node', 'id', i) if id in G: raise NetworkXError('node id %r is duplicated' % (id, )) if label is not None and label != 'id': node_label = pop_attr(node, 'node', label, i) if node_label in node_labels: raise NetworkXError('node label %r is duplicated' % (node_label, )) node_labels.add(node_label) mapping[id] = node_label G.add_node(id, **node) edges = graph.get('edge', []) for i, edge in enumerate(edges if isinstance(edges, list) else [edges]): source = pop_attr(edge, 'edge', 'source', i) target = pop_attr(edge, 'edge', 'target', i) if source not in G: raise NetworkXError('edge #%d has an undefined source %r' % (i, source)) if target not in G: raise NetworkXError('edge #%d has an undefined target %r' % (i, target)) if not multigraph: if not G.has_edge(source, target): G.add_edge(source, target, **edge) else: raise nx.NetworkXError( """edge #%d (%r%s%r) is duplicated Hint: If this is a multigraph, add "multigraph 1" to the header of the file.""" % (i, source, '->' if directed else '--', target)) else: key = edge.pop('key', None) if key is not None and G.has_edge(source, target, key): raise nx.NetworkXError( 'edge #%d (%r%s%r, %r) is duplicated' % (i, source, '->' if directed else '--', target, key)) G.add_edge(source, target, key, **edge) if label is not None and label != 'id': G = nx.relabel_nodes(G, mapping) return G
EdgeList_Original = [] conf_file = open('Configuration.txt', "r") lineList = conf_file.readlines() conf_file.close() #Find input folder name input_folder = lineList[-1] + "/input_files/" allFiles = find_all_filenames(input_folder) data_file = input_folder + allFiles[0] #Read first input file #with open('/work/fz56/LANS-6.0/input_files/8.binetflow') as csvfile: with open(data_file) as csvfile: reader = csv.DictReader(csvfile) for row in reader: EdgeList_Original.append((row["SrcAddr"], row["DstAddr"])) GT = nx.MultiDiGraph() GT.add_edges_from(EdgeList_Original) #GT = nx.read_graphml("CTU13_4_Original.graphml") originalPropertyGT = Property(GT) original_in_degree = originalPropertyGT.getInDegree() Original_Node_In_Degree = open('Original_Node_In_Degree.txt', "w") for e in original_in_degree: Original_Node_In_Degree.write(str(e) + "\n") Original_Node_In_Degree.close() original_out_degree = originalPropertyGT.getOutDegree() Original_Node_Out_Degree = open('Original_Node_Out_Degree.txt', "w") for e in original_out_degree: Original_Node_Out_Degree.write(str(e) + "\n") Original_Node_Out_Degree.close()
def blockmodel(G, partitions, multigraph=False): """Returns a reduced graph constructed using the generalized block modeling technique. The blockmodel technique collapses nodes into blocks based on a given partitioning of the node set. Each partition of nodes (block) is represented as a single node in the reduced graph. Edges between nodes in the block graph are added according to the edges in the original graph. If the parameter multigraph is False (the default) a single edge is added with a weight equal to the sum of the edge weights between nodes in the original graph The default is a weight of 1 if weights are not specified. If the parameter multigraph is True then multiple edges are added each with the edge data from the original graph. Parameters ---------- G : graph A networkx Graph or DiGraph partitions : list of lists, or list of sets The partition of the nodes. Must be non-overlapping. multigraph : bool, optional If True return a MultiGraph with the edge data of the original graph applied to each corresponding edge in the new graph. If False return a Graph with the sum of the edge weights, or a count of the edges if the original graph is unweighted. Returns ------- blockmodel : a Networkx graph object Examples -------- >>> G=nx.path_graph(6) >>> partition=[[0,1],[2,3],[4,5]] >>> M=nx.blockmodel(G,partition) References ---------- .. [1] Patrick Doreian, Vladimir Batagelj, and Anuska Ferligoj "Generalized Blockmodeling",Cambridge University Press, 2004. """ # Create sets of node partitions part = list(map(set, partitions)) # Check for overlapping node partitions u = set() for p1, p2 in zip(part[:-1], part[1:]): u.update(p1) #if not u.isdisjoint(p2): # Python 2.6 required if len(u.intersection(p2)) > 0: raise nx.NetworkXException("Overlapping node partitions.") # Initialize blockmodel graph if multigraph: if G.is_directed(): M = nx.MultiDiGraph() else: M = nx.MultiGraph() else: if G.is_directed(): M = nx.DiGraph() else: M = nx.Graph() # Add nodes and properties to blockmodel # The blockmodel nodes are node-induced subgraphs of G # Label them with integers starting at 0 for i, p in enumerate(part): M.add_node(i) # The node-induced subgraph is stored as the node 'graph' attribute SG = G.subgraph(p) M.nodes[i]['graph'] = SG M.nodes[i]['nnodes'] = SG.number_of_nodes() M.nodes[i]['nedges'] = SG.number_of_edges() M.nodes[i]['density'] = nx.density(SG) # Create mapping between original node labels and new blockmodel node labels block_mapping = {} for n in M: nodes_in_block = M.nodes[n]['graph'].nodes() block_mapping.update(dict.fromkeys(nodes_in_block, n)) # Add edges to block graph for u, v, d in G.edges(data=True): bmu = block_mapping[u] bmv = block_mapping[v] if bmu == bmv: # no self loops continue if multigraph: # For multigraphs add an edge for each edge in original graph M.add_edge(bmu, bmv, attr_dict=d) else: # For graphs and digraphs add single weighted edge weight = d.get('weight', 1.0) # default to 1 if no weight specified if M.has_edge(bmu, bmv): M[bmu][bmv]['weight'] += weight else: M.add_edge(bmu, bmv, weight=weight) return M
def main( original_in_degree, original_out_degree, parent_dir # original_average_neighbor_degree, \ # original_pageRank, \ # original_triangle, \ # original_local_clustering_coefficient,\ # original_core_number ): #############################Random Node############################################################################### #GraphGT = nx.read_graphml("Simulation.graphml") EdgeList_Simulation = [] simulated_file = parent_dir + "/SimulatedGraph/localgen_0.csv" #with open('/work/fz56/LANS-6.0/SimulatedGraph/localgen_0.csv') as csvfile: with open(simulated_file) as csvfile: reader = csv.DictReader(csvfile) for row in reader: EdgeList_Simulation.append((row["source"], row["destination"])) GraphGT = nx.MultiDiGraph() GraphGT.add_edges_from(EdgeList_Simulation) PropertyGT = Property(GraphGT) propertyDistance = [0.0] * 7 in_degree = PropertyGT.getInDegree() New_Node_In_Degree = open('New_Node_In_Degree.txt', "w") for e in in_degree: New_Node_In_Degree.write(str(e) + "\n") New_Node_In_Degree.close() out_degree = PropertyGT.getOutDegree() New_Node_Out_Degree = open('New_Node_Out_Degree.txt', "w") for e in out_degree: New_Node_Out_Degree.write(str(e) + "\n") New_Node_Out_Degree.close() # average_neighbor_degree = PropertyGT.getAverageNeighborDegree() # New_Average_Neighbor_Degree = open('New_Average_Neighbor_Degree.txt', "w") # for e in average_neighbor_degree: # New_Average_Neighbor_Degree.write(str(e) + "\n") # New_Average_Neighbor_Degree.close() # # pageRank = PropertyGT.getPageRank() # New_pageRank = open('New_pageRank.txt', "w") # for e in pageRank: # New_pageRank.write(str(e) + "\n") # New_pageRank.close() # # triangle = PropertyGT.getTriangles() # New_triangle = open('New_triangle.txt', "w") # for e in triangle: # New_triangle.write(str(e) + "\n") # New_triangle.close() # # local_clustering_coefficient = PropertyGT.getLocalClusteringCoefficient() # New_local_clustering_coefficient = open('New_local_clustering_coefficient.txt', "w") # for e in local_clustering_coefficient: # New_local_clustering_coefficient.write(str(e) + "\n") # New_local_clustering_coefficient.close() # # core_number = PropertyGT.getCoreNumber() # New_core_number = open('New_core_number.txt', "w") # for e in core_number: # New_core_number.write(str(e) + "\n") # New_core_number.close() propertyDistance[0] = originalPropertyGT.averageKL(original_in_degree, in_degree) propertyDistance[1] = originalPropertyGT.averageKL(original_out_degree, out_degree) # propertyDistance[2] = originalPropertyGT.averageKL(original_average_neighbor_degree, average_neighbor_degree) # propertyDistance[3] = originalPropertyGT.averageKL(original_pageRank, pageRank) # propertyDistance[4] = originalPropertyGT.averageKL(original_triangle, triangle) # propertyDistance[5] = originalPropertyGT.averageKL(original_local_clustering_coefficient, local_clustering_coefficient) # propertyDistance[6] = originalPropertyGT.averageKL(original_core_number, core_number) print('\n') for i in range(len(propertyDistance)): print(propertyDistance[i], end="\t") print('\n')
def degreeMessageNumberCSV(log_directory, channel_name, output_directory, startingDate, startingMonth, endingDate, endingMonth): """ creates two csv files having no. of nodes with a certain in and out-degree for number of messages respectively Args: log_directory (str): Location of the logs (Assumed to be arranged in directory structure as : <year>/<month>/<day>/<log-file-for-channel>.txt) channel_name (str): Channel to be perform analysis on output_directory (str): Location of output directory startingDate (int): Date to start the analysis (in conjunction with startingMonth) startingMonth (int): Date to start the analysis (in conjunction with startingDate) endingDate (int): Date to end the analysis (in conjunction with endingMonth) endingMonth (int): Date to end the analysis (in conjunction with endingDate) Returns: null """ nodes_with_OUT_degree_per_day = [] nodes_with_IN_degree_per_day = [] nodes_with_TOTAL_degree_per_day = [] max_degree_possible = 1000 # output_dir_degree = output_directory+"degreeMessageNumberCSV/" output_dir_degree = output_directory output_file_out_degree = output_dir_degree + "msg_no_out_degree.csv" output_file_in_degree = output_dir_degree + "msg_no_in_degree.csv" output_file_total_degree = output_dir_degree + "msg_no_total_degree.csv" # print "Creating a new output folder" # os.system("rm -rf "+output_dir_degree) # os.system("mkdir "+output_dir_degree) if not os.path.exists(os.path.dirname(output_dir_degree)): try: os.makedirs(os.path.dirname(output_dir_degree)) os.system("rm " + output_file_out_degree) os.system("touch " + output_file_out_degree) os.system("rm " + output_file_in_degree) os.system("touch " + output_file_in_degree) os.system("rm " + output_file_total_degree) os.system("touch " + output_file_total_degree) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise rem_time = None #remembers the time of the last message of the file parsed before the current file for folderiterator in range(startingMonth, endingMonth + 1): temp1 = "0" if folderiterator < 10 else "" for fileiterator in range( startingDate if folderiterator == startingMonth else 1, endingDate + 1 if folderiterator == endingMonth else 32): temp2 = "0" if fileiterator < 10 else "" filePath = log_directory + temp1 + str( folderiterator) + "/" + temp2 + str( fileiterator) + "/" + channel_name + ".txt" if not os.path.exists(filePath): if not ((folderiterator == 2 and (fileiterator == 29 or fileiterator == 30 or fileiterator == 31)) or ((folderiterator == 4 or folderiterator == 6 or folderiterator == 9 or folderiterator == 11) and fileiterator == 31)): print "[Error] Path " + filePath + " doesn't exist" continue with open(filePath) as f: content = f.readlines( ) #contents stores all the lines of the file channel_name nicks = [] #list of all the nicknames ''' Getting all the nicknames in a list nicks[] ''' for i in content: if (i[0] != '=' and "] <" in i and "> " in i): m = re.search(r"\<(.*?)\>", i) if m.group(0) not in nicks: nicks.append( m.group(0) ) #used regex to get the string between <> and appended it to the nicks list for i in xrange(0, len(nicks)): nicks[i] = nicks[i][1:-1] #removed <> from the nicknames for i in xrange(0, len(nicks)): nicks[i] = ext.util.correctLastCharCR(nicks[i]) for line in content: if ( line[0] == '=' and "changed the topic of" not in line ): #excluding the condition when user changes the topic. Search for only nick changes nick1 = ext.util.correctLastCharCR( line[line.find("=") + 1:line.find(" is")][3:]) nick2 = ext.util.correctLastCharCR( line[line.find("wn as") + 1:line.find("\n")][5:]) if nick1 not in nicks: nicks.append(nick1) if nick2 not in nicks: nicks.append(nick2) #print("printing nicks***********************************") #print(nicks) ''' Forming list of lists for avoiding nickname duplicacy ''' nick_same_list = [ [] for i in range(len(nicks)) ] #list of list with each list having all the nicks for that particular person for line in content: if (line[0] == '=' and "changed the topic of" not in line): line1 = line[line.find("=") + 1:line.find(" is")][3:] line2 = line[line.find("wn as") + 1:line.find("\n")][5:] line1 = ext.util.correctLastCharCR(line1) line2 = ext.util.correctLastCharCR(line2) for i in range(5000): if line1 in nick_same_list[ i] or line2 in nick_same_list[i]: nick_same_list[i].append(line1) nick_same_list[i].append(line2) break if not nick_same_list[i]: nick_same_list[i].append(line1) nick_same_list[i].append(line2) break #print("printing nick_same_list****************************") #print(nick_same_list) '''=========================== Plotting the conversation graph =========================== ''' graph_conversation = nx.MultiDiGraph( ) #graph with multiple directed edges between clients used for line in content: flag_comma = 0 if (line[0] != '=' and "] <" in line and "> " in line): m = re.search(r"\<(.*?)\>", line) var = m.group(0)[1:-1] var = ext.util.correctLastCharCR(var) for d in range(len(nicks)): if var in nick_same_list[d]: nick_sender = nick_same_list[d][0] break else: nick_sender = var for i in nicks: rec_list = [e.strip() for e in line.split(':') ] #receiver list splited about : rec_list[1] = rec_list[1][rec_list[1].find(">") + 1:len(rec_list[1])] rec_list[1] = rec_list[1][1:] if not rec_list[1]: #index 0 will contain time 14:02 break for k in xrange(0, len(rec_list)): if (rec_list[k]): #checking for \ rec_list[k] = ext.util.correctLastCharCR( rec_list[k]) for z in rec_list: if (z == i): if (var != i): for d in range(len(nicks)): if i in nick_same_list[d]: nick_receiver = nick_same_list[d][ 0] break else: nick_receiver = i graph_conversation.add_edge( nick_sender, nick_receiver, weight=line[1:6]) if "," in rec_list[ 1]: #receiver list may of the form <Dhruv> Rohan, Ram : flag_comma = 1 rec_list_2 = [ e.strip() for e in rec_list[1].split(',') ] for y in xrange(0, len(rec_list_2)): if (rec_list_2[y]): #checking for \ rec_list_2[y] = ext.util.correctLastCharCR( rec_list_2[y]) for j in rec_list_2: if (j == i): if (var != i): for d in range(len(nicks)): if i in nick_same_list[d]: nick_receiver = nick_same_list[ d][0] break else: nick_receiver = i graph_conversation.add_edge( nick_sender, nick_receiver, weight=line[1:6]) if (flag_comma == 0 ): #receiver list can be <Dhruv> Rohan, Hi! rec = line[line.find(">") + 1:line.find(", ")] rec = rec[1:] rec = ext.util.correctLastCharCR(rec) if (rec == i): if (var != i): for d in range(len(nicks)): if i in nick_same_list[d]: nick_receiver = nick_same_list[d][ 0] break else: nick_receiver = i graph_conversation.add_edge( nick_sender, nick_receiver, weight=line[1:6]) for u, v, d in graph_conversation.edges(data=True): d['label'] = d.get('weight', '') # output_file_out_degree=out_dir_msg_time+channel_name+"_2013_"+str(folderiterator)+"_"+str(fileiterator)+"_msg_time.png" # print "Generated " + output_file_out_degree # A = nx.drawing.nx_agraph.to_agraph(graph_conversation) # A.layout(prog='dot') # A.draw(output_file_out_degree) nodes_with_OUT_degree = [0] * max_degree_possible nodes_with_IN_degree = [0] * max_degree_possible nodes_with_TOTAL_degree = [0] * max_degree_possible print graph_conversation.out_degree( ), graph_conversation.in_degree(), graph_conversation.degree() print graph_conversation.out_degree().values() print graph_conversation.in_degree().values() print graph_conversation.degree().values() for degree in graph_conversation.out_degree().values(): nodes_with_OUT_degree[degree] += 1 for degree in graph_conversation.in_degree().values(): nodes_with_IN_degree[degree] += 1 for degree in graph_conversation.degree().values(): nodes_with_TOTAL_degree[degree] += 1 print "\n" nodes_with_OUT_degree.insert(0, sum(nodes_with_OUT_degree)) nodes_with_OUT_degree.insert( 0, str(folderiterator) + "-" + str(fileiterator)) nodes_with_OUT_degree_per_day.append(nodes_with_OUT_degree) nodes_with_IN_degree.insert(0, sum(nodes_with_IN_degree)) nodes_with_IN_degree.insert( 0, str(folderiterator) + "-" + str(fileiterator)) nodes_with_IN_degree_per_day.append(nodes_with_IN_degree) nodes_with_TOTAL_degree.insert(0, sum(nodes_with_TOTAL_degree)) nodes_with_TOTAL_degree.insert( 0, str(folderiterator) + "-" + str(fileiterator)) nodes_with_TOTAL_degree_per_day.append(nodes_with_TOTAL_degree) # print nodes_with_OUT_degree_per_day # print nodes_with_IN_degree_per_day # print nodes_with_TOTAL_degree_per_day temp = ['deg' + str(i) for i in xrange(max_degree_possible)] temp.insert(0, 'total') temp.insert(0, 'out-degree/day>') nodes_with_OUT_degree_per_day.insert(0, temp) column_wise = zip(*nodes_with_OUT_degree_per_day) with open(output_file_out_degree, 'wb') as myfile: wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) for col in column_wise: wr.writerow(col) temp = ['deg' + str(i) for i in xrange(max_degree_possible)] temp.insert(0, 'total') temp.insert(0, 'in-degree/day>') nodes_with_IN_degree_per_day.insert(0, temp) column_wise = zip(*nodes_with_IN_degree_per_day) with open(output_file_in_degree, 'wb') as myfile2: wr = csv.writer(myfile2, quoting=csv.QUOTE_ALL) for col in column_wise: wr.writerow(col) temp = ['deg' + str(i) for i in xrange(max_degree_possible)] temp.insert(0, 'total') temp.insert(0, 'degree/day>') nodes_with_TOTAL_degree_per_day.insert(0, temp) column_wise = zip(*nodes_with_TOTAL_degree_per_day) with open(output_file_total_degree, 'wb') as myfile3: wr = csv.writer(myfile3, quoting=csv.QUOTE_ALL) for col in column_wise: wr.writerow(col)
def query(self, topic, max_depth=4, config=None, pivot_on=list(), dont_pivot_on=list(['enrichment', 'classification']), direction='successors'): """ :param topic: a graph to return the context of. At least one node ID in topic \ must be in full graph g to return any context. :param max_depth: The maximum distance from the topic to search :param config: The titanDB configuration to use if not using the one configured with the plugin :param pivot_on: A list of attribute types to pivot on. :param dont_pivot_on: A list of attribute types to not pivot on. :param direction: The direction to transverse the graph :return: subgraph in networkx format NOTE: If an attribute is in both pivot_on and dont_pivot_on it will not be pivoted on """ if config is None: config = self.titandb_config # Connect to TitanDB Database titan_graph = TITAN_Graph(config) # Convert the topic nodes into titanDB eids current_nodes = set() eid_uri_map = {} # Validate the node URI for node in topic.nodes(): titan_node = titan_graph.vertices.index.get_unique( "uri", topic.node[node]["uri"]) if titan_node: current_nodes.add(titan_node.eid) eid_uri_map[titan_node.eid] = node topic_nodes = frozenset(current_nodes) subgraph_nodes = current_nodes #sg = copy.deepcopy(topic) sg = nx.MultiDiGraph() sg.add_nodes_from(topic.nodes(data=True)) sg.add_edges_from(topic.edges(data=True)) distances = {node: 0 for node in topic.nodes()} # Below 1 line is probably not necessary # pivot_edges = list() # print "Initial current Nodes: {0}".format(current_nodes) # DEBUG for i in range(1, max_depth + 1): new_nodes = set() new_out_edges = set() new_in_edges = set() for eid in current_nodes: # properties = og.node[node] node = titan_graph.vertices.get(eid) # If all directions, get all neighbors if direction == 'all' or eid in topic_nodes: try: new_nodes = new_nodes.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).both".format(eid)) }) except: pass try: new_out_edges = new_out_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).outE".format(eid)) }) except: pass try: new_in_edges = new_in_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).inE".format(eid)) }) except: pass # If there is a list of things to NOT pivot on, pivot on everything else elif dont_pivot_on and 'attribute' in node and node.map( )['attribute'] not in dont_pivot_on: try: new_nodes = new_nodes.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).both".format(eid)) }) except: pass try: new_out_edges = new_out_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).outE".format(eid)) }) except: pass try: new_in_edges = new_in_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).inE".format(eid)) }) except: pass # Otherwise, only get all neighbors if the node is to be pivoted on. elif 'attribute' in node and \ node['attribute'] in pivot_on and \ node['attribute'] not in dont_pivot_on: try: new_nodes = new_nodes.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).both".format(eid)) }) except: pass try: new_out_edges = new_out_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).outE".format(eid)) }) except: pass try: new_in_edges = new_in_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).inE".format(eid)) }) except: pass # If not all neighbors and not in pivot, if we are transversing up, get predecessors elif direction == 'predecessors': # add edges to make predecessors successors for later probability calculation try: new_nodes = new_nodes.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).out".format(eid)) }) except: pass # add the reverse edges. These opposite of these edges will get placed in the subgraph try: new_in_edges = new_in_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).inE".format(eid)) }) except: pass # Otherwise assume we are transversing down and get all successors else: # default to successors try: new_nodes = new_nodes.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).both".format(eid)) }) except: pass try: new_out_edges = new_out_edges.union({ n.eid for n in titan_graph.gremlin.query( "g.v({0}).outE".format(eid)) }) except: pass # Remove nodes from new_nodes that are already in the subgraph so we don't overwrite their topic distance current_nodes = new_nodes - subgraph_nodes # combine the new nodes into the subgraph nodes set subgraph_nodes = subgraph_nodes.union(current_nodes) # Copy nodes, out-edges, in-edges, and reverse in-edges into subgraph # Add nodes for neighbor_eid in new_nodes: attr = titan_graph.vertices.get(neighbor_eid).map() sg.add_node(attr['uri'], attr) eid_uri_map[neighbor_eid] = attr['uri'] # Add predecessor edges for out_eid in new_out_edges: out_edge = titan_graph.edges.get(out_eid) attr = out_edge.map() sg.add_edge(eid_uri_map[out_edge._outV], eid_uri_map[out_edge._inV], out_eid, attr) # Add successor edges & reverse pivot edges for in_eid in new_in_edges: in_edge = titan_graph.edges.get(in_eid) attr = in_edge.map() attr['origin'] = "subgraph_creation_pivot" sg.add_edge(eid_uri_map[in_edge._inV], eid_uri_map[in_edge._outV], in_eid, attr) # Set the distance from the topic on the nodes in the graph for eid in current_nodes: if eid_uri_map[eid] not in distances: distances[eid_uri_map[eid]] = i # logging.debug("Current nodes: {0}".format(current_nodes)) # DEBUG # add the distances to the subgraph nx.set_node_attributes(sg, "topic_distance", distances) logging.debug(nx.info(sg)) # DEBUG # Return the subgraph return sg
def __init__(self): Visitor.__init__(self) self.edge_types = ["ast"] self.G = nx.MultiDiGraph()
def strongly_connected_comp_splitter(graph, edge_weight): """ Written by JC Breaks graph into strongly connected components. Removes components with no edges. Adds all edges and nodes contained by strongly connected components to a new graph. Labels nodes and edges by a component ID. Since this function substitutes for a function that contracted edges in addition to splitting components, this version continues to add a 'path' attribute that is used in subsequent functions. Args: graph (Networkx DiGraph): graph of required edges edge_weight (str): edge attribute to designate as a weight Returns: MultiDiGraph of strongly connected components with edge attributes catered for subsequent functions """ graph_split = nx.MultiDiGraph() removed_comps = [] comp_list = [] print('\nInitial Strongly Connected Component Breakdown') for i, comp in enumerate([ graph.subgraph(c).copy() for c in nx.strongly_connected_components(graph) ]): print('Comp: {}'.format(i)) print('\tEdges: {}'.format(len(comp.edges()))) print('\tNodes: {}'.format(len(comp.nodes()))) #for n in comp.nodes(): #print(comp.degree(n)) #exclude components with only 1 node if len(comp.nodes()) > 1: comp_list += [i] for cc in connected_comp_edge_handler(comp, edge_weight): start_node, end_node, attr = cc attr['comp'] = i graph_split.add_edge(start_node, end_node, **attr) graph_split.nodes[start_node]['comp'] = i graph_split.nodes[end_node]['comp'] = i graph_split.nodes[start_node]['y'] = graph.nodes[start_node][ 'y'] graph_split.nodes[start_node]['x'] = graph.nodes[start_node][ 'x'] graph_split.nodes[end_node]['y'] = graph.nodes[end_node]['y'] graph_split.nodes[end_node]['x'] = graph.nodes[end_node]['x'] else: removed_comps += [i] for comp in removed_comps: print('Comp {} removed'.format(comp)) comp_corrector_dic = {} for i in range(0, len(comp_list)): comp_corrector_dic[comp_list[i]] = i graph_split_copy = graph_split.copy() for edge in graph_split.edges(data=True): #rename comp graph_split_copy[edge[0]][edge[1]][0]['comp'] = comp_corrector_dic[ edge[2]['comp']] #remove parallel edges if 1 in graph_split_copy[edge[0]][edge[1]]: graph_split_copy.remove_edge(edge[0], edge[1], key=1) print('Comp numbers reordered') print('Parallel edges removed') graph_split = graph_split_copy return graph_split
def __init__(self): Visitor.__init__(self) self.edge_types = ["ast", "cfg", "in", "data"] self.G = nx.MultiDiGraph()
def clear(self): self._function_map.clear() self.callgraph = networkx.MultiDiGraph() self.block_map.clear()
def test_multidigraph(self): G = nx.MultiDiGraph(self.edges) x = list(find_cycle(G, self.nodes)) x_ = [(0, 1, 0), (1, 0, 0)] # (1, 0, 1) assert_equal(x[0], x_[0]) assert_equal(x[1][:2], x_[1][:2])
def parseJsonToNx(self): command = "curl --max-time 30 -s http://"+self.IpPort+"/stats/switches" result = os.popen(command).read() if result != "": try: switches = json.loads(result) except ValueError: print 'Decoding JSON has failed' print "Error: something does not work in getting info from ryu controller" sys.exit(-2) for switch in switches: command = "curl --max-time 30 -s http://"+self.IpPort+"/stats/portdesc/"+str(switch) result = os.popen(command).read() if result != "": try: switch_ports = json.loads(result) except ValueError: print 'Decoding JSON has failed' print "Error: something does not work in getting info from ryu controller" sys.exit(-2) self.ports[str(switch)] = switch_ports[str(switch)] command = "curl --max-time 30 -s http://"+self.IpPort+"/v1.0/topology/links" result = os.popen(command).read() if result != "": try: self.topology = json.loads(result) except ValueError: print 'Decoding JSON has failed' print "Error: something does not work in getting info from ryu controller" sys.exit(-2) self.nx_topology = nx.MultiDiGraph() self.nx_topology.clear() index = 0 for link in self.topology: src = link['src']['dpid'] dst = link['dst']['dpid'] src_port = link['src']['name'] dst_port = link['dst']['name'] src_port_no = link['src']['port_no'] dst_port_no = link['dst']['port_no'] src_mac = link['src']['hw_addr'].replace(":","") dst_mac = link['dst']['hw_addr'].replace(":","") src_capacity = 0.0 src_ports = self.ports[str(int(src,16))] for port in src_ports: if port['name'] == src_port: src_capacity = int(port['curr_speed'])/1000 break if src_capacity == 0.0: print "Error - SRC Capacity cannot be 0.0" sys.exit(-1) dst_capacity = 0.0 dst_ports = self.ports[str(int(dst,16))] for port in dst_ports: if port['name'] == dst_port: dst_capacity = int(port['curr_speed'])/1000 break if dst_capacity == 0.0: print "Error - DST Capacity cannot be 0.0" sys.exit(-1) if src_capacity <= dst_capacity: capacity = src_capacity else: capacity = dst_capacity if capacity >= self.max_capacity: self.max_capacity = capacity self.sip.update(str(index)) id_ = str(self.sip.hash()) self.nx_topology.add_edge(src, dst, capacity=capacity, allocated=0.0, src_port=src_port, dst_port=dst_port, src_port_no=src_port_no, dst_port_no=dst_port_no, src_mac=src_mac, dst_mac=dst_mac, flows=[], id=id_) index = index + 1 else: print "Error: something does not work in getting info from ryu controller" sys.exit(-2)
def test_multidigraph_ignore2(self): # Loop traversed an edge while ignoring its orientation. G = nx.MultiDiGraph([(0, 1), (1, 2), (1, 2)]) x = list(find_cycle(G, [0, 1, 2], orientation='ignore')) x_ = [(1, 2, 0, FORWARD), (1, 2, 1, REVERSE)] assert_equal(x, x_)
# coding=UTF-8 import networkx as nx import matplotlib.pyplot as plt import math import re as re from matplotlib.patches import FancyArrowPatch, Circle import numpy as np import matplotlib.patches as mpatches import Analyser from netwulf import visualize G = nx.Graph() #总图 H = nx.Graph() #连通子图 J = nx.Graph() #无向图 L = nx.MultiDiGraph() #重图 weight = [] abnormal_IP_list = [] #为总图填充点和边 def create_wholeGraph(): global G, sql sql = "select * from Link" results = Analyser.get_data(sql) i = 0 for r in results: G.add_edge(r[1], r[2], weight=r[3]) i = i + 1 def list_subGraph():
########################################################################### # vladShortestPath by Teague Forren # takes in proprietary train graph *_in.txt file # prints out to system how many bags of blood vlad will need for each graph # example: python vladShortestPath.py examples/sample_in.txt ########################################################################### import networkx as nx import sys #globals trainGraph = nx.MultiDiGraph() #create networkx graph hourRange = range(18, 25) + range(1, 7) f = open(sys.argv[1]) routesFile = f.readlines() f.close() #validateTime #takes in start and duration hours and returns boolean if vlad can travel during it def validateTime(start, duration): result = False end = start + duration if end > 24: end -= 24 if start >= 18 and end >= 18: #starts in pm and ends in pm result = True if start <= 6 and end <= 6: #starts and ends in pm result = True if start >= 18 and end <= 6: #starts in pm and ends in am result = True
def directed_configuration_model(in_degree_sequence, out_degree_sequence, create_using=None, seed=None): """Return a directed_random graph with the given degree sequences. The configuration model generates a random directed pseudograph (graph with parallel edges and self loops) by randomly assigning edges to match the given degree sequences. Parameters ---------- in_degree_sequence : list of integers Each list entry corresponds to the in-degree of a node. out_degree_sequence : list of integers Each list entry corresponds to the out-degree of a node. create_using : graph, optional (default MultiDiGraph) Return graph of this type. The instance will be cleared. seed : hashable object, optional Seed for random number generator. Returns ------- G : MultiDiGraph A graph with the specified degree sequences. Nodes are labeled starting at 0 with an index corresponding to the position in deg_sequence. Raises ------ NetworkXError If the degree sequences do not have the same sum. See Also -------- configuration_model Notes ----- Algorithm as described by Newman [1]_. A non-graphical degree sequence (not realizable by some simple graph) is allowed since this function returns graphs with self loops and parallel edges. An exception is raised if the degree sequences does not have the same sum. This configuration model construction process can lead to duplicate edges and loops. You can remove the self-loops and parallel edges (see below) which will likely result in a graph that doesn't have the exact degree sequence specified. This "finite-size effect" decreases as the size of the graph increases. References ---------- .. [1] Newman, M. E. J. and Strogatz, S. H. and Watts, D. J. Random graphs with arbitrary degree distributions and their applications Phys. Rev. E, 64, 026118 (2001) Examples -------- >>> D=nx.DiGraph([(0,1),(1,2),(2,3)]) # directed path graph >>> din=list(D.in_degree().values()) >>> dout=list(D.out_degree().values()) >>> din.append(1) >>> dout[0]=2 >>> D=nx.directed_configuration_model(din,dout) To remove parallel edges: >>> D=nx.DiGraph(D) To remove self loops: >>> D.remove_edges_from(D.selfloop_edges()) """ if not sum(in_degree_sequence) == sum(in_degree_sequence): raise nx.NetworkXError( 'Invalid degree sequences. Sequences must have equal sums.') if create_using is None: create_using = nx.MultiDiGraph() if not seed is None: random.seed(seed) nin = len(in_degree_sequence) nout = len(out_degree_sequence) # pad in- or out-degree sequence with zeros to match lengths if nin > nout: out_degree_sequence.extend((nin - nout) * [0]) else: in_degree_sequence.extend((nout - nin) * [0]) # start with empty N-node graph N = len(in_degree_sequence) # allow multiedges and selfloops G = nx.empty_graph(N, create_using) if N == 0 or max(in_degree_sequence) == 0: # done if no edges return G # build stublists of available degree-repeated stubs # e.g. for degree_sequence=[3,2,1,1,1] # initially, stublist=[1,1,1,2,2,3,4,5] # i.e., node 1 has degree=3 and is repeated 3 times, etc. in_stublist = [] for n in G: for i in range(in_degree_sequence[n]): in_stublist.append(n) out_stublist = [] for n in G: for i in range(out_degree_sequence[n]): out_stublist.append(n) # shuffle stublists and assign pairs by removing 2 elements at a time random.shuffle(in_stublist) random.shuffle(out_stublist) while in_stublist and out_stublist: source = out_stublist.pop() target = in_stublist.pop() G.add_edge(source, target) G.name = "directed configuration_model %d nodes %d edges" % (G.order(), G.size()) return G
def ActiveUsersNotes(self, list1): edgesInNotes = [] nodesInNotes = [] temp = [] temp2 = [] Nusers = numpy.zeros(shape=(len(list1), 4), dtype=numpy.int) # fill in matrix for JOURNAL USERS for row in dataTables.TblN: # userID; #notes; friends; commenters poster = row[0] # poster handle if poster == "creator": pass else: #UserID and times written a note u = list1.index(poster) # user's ID Nusers[u][0] = poster Nusers[u][2] = row[1] # the times poster received notes Nusers[u][3] = row[2] # calculate # of people receive from temp = row[3].split(';') set1 = [] for p in temp: if (p != ''): set1.append(Prepare().slice(p, "id")) # sender's position in array mX = list1.index(Prepare().slice(p, "id")) # times a user wrote notes Nusers[mX][0] = Prepare().slice(p, "id") Nusers[mX][1] = Nusers[mX][1] + 1 #calculate number of ppl written to #[u][4] # build pairs of communication set2 = set1 set2.insert(0, poster) ## algorithm for creating pairs partialPairs = [(x, y) for y in set1 for x in set2 if set2.index(x) > set1.index(y)] if len(partialPairs) > 0: for x in partialPairs: edgesInNotes.append(x) print(x) ## save edges in text file numpy.savetxt(foLN + ".pairs", edgesInNotes, fmt='%s') #completed row =====> # times WRITE a note; # times receive a note; # OF people receive from, # of ppl write to #filter: remove blank rows for row in Nusers: if ((row[1] >= 5)): # limit to active users, 5+ notes written # if (((row[0] >= 5) ) | (row[1]>=5) | (row[2]>=5)): temp2.append(row) nodesInNotes.append(row[0]) numpy.savetxt(foLN, temp2, fmt='%s') print(str(len(nodesInNotes)) + " active notes users") ## make matrix with data matrixN = numpy.zeros(shape=(len(nodesInNotes), len(nodesInNotes)), dtype=numpy.int) nG = networkx.MultiDiGraph() for sender, target in edgesInNotes: try: x = nodesInNotes.index(int(target)) y = nodesInNotes.index(int(sender)) weight = matrixN[x][y] matrixN[x][y] = weight + 1 nG.add_edge(sender, target) except: pass numpy.savetxt(foMN, matrixN, fmt='%s') print("saved matrix for active notes users of this community.") print("\tdegree centrality") print(networkx.degree_centrality(nG)) print("\tin degree centrality") print(networkx.in_degree_centrality(nG)) print("\tout degree centrality") print(networkx.out_degree_centrality(nG)) return edgesInNotes