def test_has_eulerian_path_not_weakly_connected(self): G = nx.DiGraph() H = nx.Graph() G.add_edges_from([(0, 1), (2, 3), (3, 2)]) H.add_edges_from([(0, 1), (2, 3), (3, 2)]) assert not nx.has_eulerian_path(G) assert not nx.has_eulerian_path(H)
def test_has_eulerian_path_unbalancedins_more_than_one(self): G = nx.DiGraph() H = nx.Graph() G.add_edges_from([(0, 1), (2, 3)]) H.add_edges_from([(0, 1), (2, 3)]) assert not nx.has_eulerian_path(G) assert not nx.has_eulerian_path(H)
def test_has_eulerian_path_isolated_node(self): # Test directed graphs without isolated node returns True G = nx.DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 0)]) assert nx.has_eulerian_path(G) # Test directed graphs with isolated node returns True G.add_node(3) assert nx.has_eulerian_path(G)
def compute_features(self): # checking if eulerian self.add_feature( "eulerian", lambda graph: nx.is_eulerian(graph) * 1, "A graph is eulerian if it has a eulerian circuit: a closed walk that includes \ each edges of the graph exactly once", InterpretabilityScore(3), ) # checking if semi eulerian self.add_feature( "semi_eulerian", lambda graph: nx.is_semieulerian(graph) * 1, "A graph is semi eulerian if it has a eulerian path but no eulerian circuit", InterpretabilityScore(3), ) # checking if eulerian path exists self.add_feature( "semi_eulerian", lambda graph: nx.has_eulerian_path(graph) * 1, "Whether a eulerian path exists in the network", InterpretabilityScore(3), )
import networkx as nx G = nx.Graph() G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (2, 6), (3, 4), (3, 5), (4, 5), (4, 6), (5, 6), (5, 7), (6, 7)]) posicoes = { 1: (.5, 1), 2: (0, .75), 3: (1, .75), 4: (.5, .5), 5: (1, .25), 6: (0, .25), 7: (.5, 0), } nx.draw_networkx(G, pos=posicoes) print("Grafo G") print("-------") print("É Euleriano?", "Sim" if nx.is_eulerian(G) else "Não") print("É Semieuleriano?", "Sim" if nx.is_semieulerian(G) else "Não") print("É Tem caminho Euliriano?", "Sim" if nx.has_eulerian_path(G) else "Não")
def test_has_eulerian_path_non_cyclic(self): # Test graphs with Eulerian paths but no cycles return True. assert nx.has_eulerian_path(nx.path_graph(4)) G = nx.path_graph(6, create_using=nx.DiGraph) assert nx.has_eulerian_path(G)
def test_has_eulerian_path_cyclic(self): # Test graphs with Eulerian cycles return True. assert nx.has_eulerian_path(nx.complete_graph(5)) assert nx.has_eulerian_path(nx.complete_graph(7)) assert nx.has_eulerian_path(nx.hypercube_graph(4)) assert nx.has_eulerian_path(nx.hypercube_graph(6))
print("Time for graph generation") print("Slow generetion P: ", time_p_slow / num_test, " R: ", time_r_slow / num_test) print("Fast generetion P: ", time_p_fast / num_test, " R: ", time_r_fast / num_test) ''' Plot of two graph only with the last graph generated ''' if plot_graph_P_R: plot_graph(P, name="graph_P", layout="spring", path="img/") plot_graph(R, name="graph_R", layout="random", path="img/") ''' TEST ON A SIMPLE TOY ''' print("\nHierholzer eulerian path test on toy example") toy = nx.Graph() toy.add_nodes_from(['A', 'B', 'C', 'D', 'E']) # graph with 2 triangles toy.add_edges_from([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'D'), ('C', 'E'), ('D', 'E')]) print(nx.is_eulerian(toy), nx.has_eulerian_path(toy)) print(hierholzer(toy)) ''' Eulerian Path test i made the test on R and on a toy example because eulerian path on graph of provinces dosen't exist because the graph of provinces in not strongly connected''' time_eulerian = [] time_eulerian_nx = [] num_node = [3, 9, 19, 29, 39, 49] for test in range(len(num_node)): toy = nx.complete_graph(num_node[test]) toy = nx.eulerize(toy) # print(nx.is_eulerian(toy)) # print(nx.has_eulerian_path(toy)) # print(nx.has_eulerian_path(R)) # print(nx.has_eulerian_path(P)) '''EULERIAN PATH ON A TOY EXAMPLE USING NETWORKX''' start = time.time()
def test_has_eulerian_path_directed_graph(self): # Test directed graphs and returns False G = nx.DiGraph() G.add_edges_from([(0, 1), (1, 2), (0, 2)]) assert not nx.has_eulerian_path(G)
# on the shortest paths Networkx documentation as # "https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.shortest_paths.html". # reference: https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.shortest_paths.html print(startBlue + '\nShortest path from Pensacola to Phoenix:\n' + endColor, nx.shortest_path(G, 'Pensacola', 'Phoenix')) print(startBlue + '\nDijkstra path from Pensacola to Phoenix:\n' + endColor, nx.dijkstra_path(G, 'Pensacola', 'Phoenix')) # Eulerian: # reference: https://networkx.github.io/documentation/stable/reference/algorithms/euler.html # Use-Case example: "The purpose of the proposed new roads is to make the town mailman-friendly. In graph theory terms, # we want to change the graph so it contains an Euler circuit. This is also referred to as Eulerizing a graph. The # most mailman-friendly graph is the one with an Euler circuit since it takes the mailman back to the starting point. # This means that the mailman can leave his car at one intersection, walk the route hitting all the streets just once, # and end up where he began. There is no backtracking or walking of streets twice. This saves him time." # reference: https://study.com/academy/lesson/eulerizing-graphs-in-math.html print(startBlue + '\nHas Eulerian path:\n' + endColor, nx.has_eulerian_path(G)) print(startBlue + '\nIs semi-Eulerian:\n' + endColor, nx.is_semieulerian(G)) # Shortest paths (Bellman Ford): # reference: https://networkx.github.io/documentation/stable/reference/algorithms/shortest_paths.html # defined: The Bellman-Ford algorithm is a graph search algorithm that finds the shortest path between a given source # vertex and all other vertices in the graph. This algorithm can be used on both weighted and unweighted graphs. # reference: https://brilliant.org/wiki/bellman-ford-algorithm/ print(startBlue + '\nBellman Ford path from Los Angeles:\n' + endColor, nx.bellman_ford_predecessor_and_distance(G, 'Los Angeles')) # Linear Algebra (Eigenvalues): # reference: https://networkx.github.io/documentation/stable/reference/linalg.html # defined: Using scaler multiplication (matrix multiplication = scaler multiplication) to create a new figure, # utilizing Eigenvalues and Eigenvectors # reference: https://www.youtube.com/watch?v=vs2sRvSzA3o # Real world use-case: To scale a model to a real-world dataset or graph # Reference: http://barabasi.com/f/94.pdf
def importar_texto_2(self): options = QFileDialog.Options() fileName, _ = QFileDialog.getOpenFileName(self.MainWindow, "Elige ejemplo a importar", "", "Text Files (*.txt)", options=options) if fileName: self.text_path_2.setText(str(fileName)) archivo = open(fileName, encoding="utf8") texto = archivo.read() #creando bigramas texto_limpio = limpiar_texto(texto) bigramas_texto = [] bigramas_texto = crear_bigramas(texto_limpio) #obteniendo tags partag = [] partag = crear_tags(texto_limpio) auxpartag = partag[:] g = nx.Graph() for a, b in bigramas_texto: subtag = partag.pop(0) g.add_edge(str(a), str(b), label=subtag) #empieza analisis de isomorfismo nodos = get_nodes(g) self.no_nodos_2.setText(str(nodos)) arcos = get_edges(g) self.no_arcos_2.setText(str(arcos)) grados = get_degree(g) self.grados_2.setText(str(grados)) if (nx.is_eulerian(g)): self.eulerian_2.setText(str(nx.eulerian_circuit(g))) else: self.eulerian_2.setText("No tiene circuito") if (nx.has_eulerian_path(g)): lista = list(nx.eulerian_path(g)) self.eulerian_path_2.setText(str(lista)) else: self.eulerian_path_2.setText("No tiene camino") openl = [] closedl = [] visitados = [] inicial = [texto_limpio[0]] terminal = texto_limpio[len(texto_limpio) - 1] arcos = subtag nodos = texto_limpio while inicial: openl.append(inicial.pop(0)) while openl: elem = openl.pop(0) closedl.append(elem) #print("New closed:") #print(closedl) for tup in arcos: if tup[0] == elem: if tup[1] not in closedl and tup[1] not in openl: openl.append(tup[1]) print(openl) #print("---------------------") visitados = visitados + closedl novisitados = list(set(nodos) - set(visitados)) #print("Nodos no visitados") #print(novisitados) if (novisitados): self.conexo_2.setText("No es conexo") else: self.conexo_2.setText("Si es conexo") numero_cromatico = coloreo_grafos(g, auxpartag, bigramas_texto, 'grafo2.gv') self.cromatico_2.setText(str(numero_cromatico)) archivo.close()
def test_has_eulerian_path_unbalancedins_more_than_one(self, G): G.add_edges_from([(0, 1), (2, 3)]) assert not nx.has_eulerian_path(G)
def test_has_eulerian_path_not_weakly_connected(self, G): G.add_edges_from([(0, 1), (2, 3), (3, 2)]) assert not nx.has_eulerian_path(G)
def main(): # Directed Bison Network bison_file = 'moreno_bison/out.moreno_bison_bison' bison_graph = nx.DiGraph() create_network(bison_graph, bison_file, True) # Undirected Kangaroo Network kangaroo_file = 'moreno_kangaroo/out.moreno_kangaroo_kangaroo' kangaroo_graph = nx.Graph() create_network(kangaroo_graph, kangaroo_file, False) # Part A: Connected Component Analysis # Connected Component Analysis of Bison Directed Graph print("PART A:\n") print("Bison Directed Graph Connected Component Analysis", "\nWeakly connected: ", nx.is_weakly_connected(bison_graph), "\nNumber of Weakly CCs: ", nx.number_weakly_connected_components(bison_graph), "\nSize of largest CC: ", len(max(nx.weakly_connected_components(bison_graph), key=len)), "\nSize of smallest CC: ", len(min(nx.weakly_connected_components(bison_graph), key=len))) # Connected Component Analysis of Kangaroo Undirected Graph print("\nKangaroo Undirected Graph Connected Component Analysis", "\nConnected: ", nx.is_connected(kangaroo_graph), "\nNumber of CCs: ", nx.number_connected_components(kangaroo_graph), "\nSize of largest CC: ", len(max(nx.connected_components(kangaroo_graph), key=len)), "\nSize of smallest CC: ", len(min(nx.connected_components(kangaroo_graph), key=len))) # Part B Computing Degrees and finding the Probability distribution # Creation of an arrayList to store the degree for each node of Bison Network bison_degrees = [] for node in range(1, 26): bison_degrees.append(bison_graph.degree(node)) # Computing Mean and Standard Deviation for Directed x_label = stats(bison_degrees) # Creating a Histogram to plot the data of the degrees Bison Network plt.figure(3) plt.title('Part B: Histogram Directed Bison') plt.xlabel(x_label) plt.hist(bison_degrees, bins='auto') # Creation of an arrayList to store the degree for each node of Kangaroo Network kangaroo_degrees = [] for node in range(1, 17): kangaroo_degrees.append(kangaroo_graph.degree(node)) # Computing Mean and Standard Deviation for Undirected x_label = stats(kangaroo_degrees) # Creating a Histogram to plot the data of the degrees for Kangaroo Network plt.figure(4) plt.title('Part B: Histogram Undirected Kangaroo') plt.xlabel(x_label) plt.hist(kangaroo_degrees, bins='auto') # lt.show() # Part C Find the Path between 2 abritrary vertices in the largest CC # Creating two arbritrary nodes making sure they aren't the same number node1 = random.randrange(1, 27, 1) node2 = random.randrange(1, 27, 1) while node1 == node2: node1 = random.randrange(1, 27, 1) # I put a cutoff on the list of simple paths for now so I can atleast run something # cut off is the act of only focusing on the paths <= 5 # This section creates a list of all simple paths and then creates a list with the lengths of these paths bison_paths = list(nx.all_simple_paths(bison_graph, node1, node2, cutoff=5)) bison_p_lengths = [] for node in range(0, len(bison_paths) - 1): bison_p_lengths.append(len(bison_paths[node])) x_label = stats(bison_p_lengths) # Creating a histogram for the degrees of the graph plt.figure(5) plt.title('Part C: Histogram Directed Bison Paths') plt.xlabel(x_label) plt.hist(bison_p_lengths, bins='auto') # plt.show() # Creating two arbitrary nodes making sure they aren't the same number node1 = random.randrange(1, 17, 1) node2 = random.randrange(1, 17, 1) while node1 == node2: node1 = random.randrange(1, 17, 1) # This section creates a list of all simple paths and then creates a list with the lengths of these paths kangaroo_paths = list( nx.all_simple_paths(kangaroo_graph, node1, node2, cutoff=5)) kangaroo_p_lengths = [] for node in range(0, len(kangaroo_paths) - 1): kangaroo_p_lengths.append(len(kangaroo_paths[node])) x_label = stats(kangaroo_p_lengths) # Creating a histogram for the degrees of the graph plt.figure(6) plt.title('Part C: Histogram Undirected Kangaroo Paths') plt.xlabel(x_label) plt.hist(kangaroo_p_lengths, bins='auto') # plt.show() # Part D Find the Simple Circuits between 2 abritrary vertices in the largest CC # UNABLE TO RUN BISON CIRCUITS ON LAPTOP THERE ARE TO MANY AND I CANNOT CREATE A CUTOFF # Creates a list of simple cycles and then creates another list of the lengths of the cycles # bison_circuits = list(nx.simple_cycles(bison_graph)) # bison_c_lengths = [] # for node in range(0,len(bison_circuits)-1): # bison_c_lengths.append(len(bison_circuits[node])) # # x_label = stats(bison_c_lengths) # # plt.figure(7) # plt.title('PART D: Histogram Directed Bison Circuits') # plt.xlabel(x_label) # plt.hist(bison_c_lengths, bins = 'auto') # You can't use the simple cycle function for undirected graphs so I used the basis function. # Creates a list of simple cycles and then creates another list of the lengths of the cycles kangaroo_circuits = nx.cycle_basis(kangaroo_graph) kangaroo_c_lengths = [] for node in range(0, len(kangaroo_circuits) - 1): kangaroo_c_lengths.append(len(kangaroo_circuits[node])) x_label = stats(kangaroo_c_lengths) plt.figure(7) plt.title('PART D: Histogram Undirected Kangaroo Circuits') plt.xlabel(x_label) plt.hist(kangaroo_c_lengths, bins='auto') # plt.show() # Part E Check if Eulerian, Find a Eulerian Path print("\nPART E:") print("\nDirected Bison Graph") print("Euelerian: ", nx.is_eulerian(bison_graph)) print("Has a Eulerian Path: ", nx.has_eulerian_path(bison_graph)) print("\nUndirected Kangaroo Graph") print("Euelerian: ", nx.is_eulerian(kangaroo_graph)) print("Has a Eulerian Path: ", nx.has_eulerian_path(kangaroo_graph)) # Part F: Convert to Matrix. # I don't know if this covers everything? bison_matrix = nx.to_numpy_matrix(bison_graph) plt.matshow(bison_matrix) # plt.show() kangaroo_matrix = nx.to_numpy_matrix(kangaroo_graph) plt.matshow(kangaroo_matrix) # plt.show() # Part G: Copy Largest CC comparing it to a copy and a slightly different CC print("\nPart G:\n") # copying the largest connected component from the Bison Directed graph bison_n1 = nx.Graph() largest_cc_bison = list( max(nx.weakly_connected_components(bison_graph), key=len)) for i in largest_cc_bison: bison_n1.add_edge(i, i + 1) bison_n2 = bison_n1.copy() # Checking Equivalence between copied graphs print("Is bison_n1 Equivalent to bison_n2?") compare(bison_n1, bison_n2) # Checking Equivalence between copied graphs but one has an extra 10 edges print("\nIs bison_n1 Equivalent to N3?") bison_n3 = bison_n2.copy() add_10_edges(bison_n3, len(bison_n3)) compare(bison_n1, bison_n3) # Repeat for Kangaroo Undirected Network kangaroo_n1 = nx.Graph() largest_cc_kangaroo = list( max(nx.connected_components(kangaroo_graph), key=len)) for i in largest_cc_kangaroo: kangaroo_n1.add_edge(i, i + 1) kangaroo_n2 = kangaroo_n1.copy() print("\nIs kangaroo_n1 Equivalent to kangaroo_n2?") compare(kangaroo_n1, kangaroo_n2) print("\nIs kangaroo_n1 Equivalent to N3?") kangaroo_n3 = kangaroo_n2.copy() add_10_edges(kangaroo_n3, len(kangaroo_n3)) compare(kangaroo_n1, kangaroo_n3) # Part H: Generate Minimum Spanning Tree print("\nPart H:\n") # Cannot generate SPanning tree for Directed networks # Generating a minimum spanning tree for Undirected network kangaroo_min_tree = nx.minimum_spanning_tree(kangaroo_graph) print( "~A Minimum Spanning Tree was created for the Undirected Kangaroo Graph~" ) tree_or_forest(kangaroo_min_tree) # Finding two random nodes that are connected x = 0 y = 0 while (not (kangaroo_min_tree.has_edge(x, y))): x = random.randrange(1, 17, 1) y = random.randrange(1, 17, 1) while x == y: x = random.randrange(1, 17, 1) # Removing the found edge print("\nAn edge from the spanning tree was removed") kangaroo_min_tree.remove_edge(x, y) tree_or_forest(kangaroo_min_tree) # Part I: Dijkstra's Algorithm bison_pairs = list(nx.all_pairs_node_connectivity(bison_graph)) connected_nodes = [] for i in bison_pairs: for j in bison_pairs: if bison_graph.has_edge(i, j + 1): connected_nodes.append([i, j + 1]) dijkstra_paths = [] length = len(connected_nodes) for i in range(0, length - 1): for j in range(0, 1): dijkstra_paths.append( int( nx.dijkstra_path_length(bison_graph, connected_nodes[i][j], connected_nodes[i][j + 1]))) x_label = stats(dijkstra_paths) plt.figure() plt.xlabel(x_label) plt.title('Directed Bison Dijkstra Path Lengths') plt.hist(dijkstra_paths) # plt.show() #Created a new temporary graph with edges from the connected nodes and weights from the distance list temp_bison = nx.DiGraph() for i in range(0, length - 1): j = 0 temp_bison.add_edge(connected_nodes[i][j], connected_nodes[i][j + 1], weight=dijkstra_paths[i]) # I dont really know if this creates a matrix for the weigths this is just what i did in a previous part bison_distance_matrix = nx.to_numpy_matrix(temp_bison) plt.matshow(bison_distance_matrix) plt.show() # Repeat for Kangaroo Undirected KangarooPairs = list(nx.all_pairs_node_connectivity(KangarooGraph)) ConnectedNodesK = [] for i in KangarooPairs: for j in KangarooPairs: if KangarooGraph.has_edge(i, j + 1): ConnectedNodesK.append([i, j + 1]) dijkstra_PathsK = [] length = len(ConnectedNodesK) for i in range(0, length): dijkstra_PathsK.append( int( nx.dijkstra_path_length(KangarooGraph, ConnectedNodesK[i][0], ConnectedNodesK[i][1]))) xLabel = Stats(dijkstra_PathsK) plt.figure() plt.xlabel(xLabel) plt.title('Undirected Kangaroo Dijkstra Path Lengths') plt.hist(dijkstra_PathsK) plt.show() temp_kangaroo = nx.Graph() for i in range(0, length - 1): j = 0 temp_kangaroo.add_edge(ConnectedNodesK[i][j], ConnectedNodesK[i][j + 1], weight=dijkstra_PathsK[i]) kangaroo_distance_matrix = nx.to_numpy_matrix(temp_kangaroo) plt.matshow(kangaroo_distance_matrix) plt.show()
import random import networkx as nx import pygraphviz as pgv from nxpd import draw, nxpdParams nxpdParams['show'] = 'ipynb' def random_dna(): return ''.join((random.choice('agct') for _ in range(3))) attempt_counts = 10000 for _ in range(attempt_counts): dnas = (random_dna() for _ in range(8)) G = nx.DiGraph(((dna[0:2], dna[1:3]) for dna in dnas)) if nx.has_eulerian_path(G): nx.nx_agraph.view_pygraphviz(G, prog='circo') break
def _order_edges_in_block(self, block_data, drop_augmented): """Produce an edge sequence for all edges in the component. Parameters ---------- block_data : pandas.DataFrame A DataFrame representing all the edges within a single block. drop_augmented : bool Whether or not to keep any edges that needed to be added to the source edges in order to navigate the network. Returns ------- edges : pandas.DataFrame The same edges that were input with the edge order and route type as new columns. """ logger.debug("order_edges_by_block started") logger.debug("Received edge data of shape %s", block_data.shape) # Sort the DataFrame to load right hand arcs into NetworkX first. # Note that Eulerian paths work in reverse order. block_data = block_data.sort_values(self.leftrightflag_field, ascending=False) block_g = nx.from_pandas_edgelist(block_data, source=self.source_field, target=self.target_field, edge_attr=True, create_using=self.graph_type) logger.debug("Block contains %s edges and %s nodes", block_g.number_of_edges(), block_g.number_of_nodes()) # if the graph is empty it means there is a problem with the source data # an error is logged, but other blocks are still processed if nx.is_empty(block_g): logger.error("Block contains no edges and cannot be sequenced") return # Scale nodes that are mid-segment by looking for duplicated ngd_str_uid values logger.debug( "Looking for nodes that fall in the middle of a road segment") block_data['same_ngd_str_uid'] = block_data.duplicated( subset=[self.struid_field], keep=False) mid_arc_start_nodes = set( block_data.loc[block_data['same_ngd_str_uid'] == True, self.source_field]) mid_arc_end_nodes = set( block_data.loc[block_data['same_ngd_str_uid'] == True, self.target_field]) mid_arc_nodes = mid_arc_start_nodes.intersection(mid_arc_end_nodes) if mid_arc_nodes: logger.debug("Found mid-segment nodes: %s", mid_arc_nodes) self._apply_node_scaling_factor(mid_arc_nodes, factor=-0.5) # initialize the edge sequence counter edge_sequence = 0 # record what type of path was used to determine the circuit path_indicator_name = self.path_indicator path_indicator_edges = {} # blocks don't necessarily form fully connected graphs, so cycle through the components logger.debug("Block contains %s connected components", nx.number_weakly_connected_components(block_g)) for block_comp in sorted(nx.weakly_connected_components(block_g), key=len, reverse=True): logger.debug( "Creating subgraph from connected component with %s nodes", len(block_comp)) block_g_comp = block_g.subgraph(block_comp) # determine the preferred start node for this block component preferred_sp = self._get_preferred_start_node(block_g_comp.nodes) logger.debug("Preferred start node for this block: %s", preferred_sp) logger.debug("Component contains %s edges and %s nodes", block_g_comp.number_of_edges(), len(block_g_comp)) # Need to pick an approach to processing this component depending on what type of circuit it forms. # Ideally things are a Eulerian circuit that can be walked and return to start, but not all blocks form # these nice circuits. If no good circuit can be found, then sequence numbers are just applied but may # not form a logical order. # Track the sequence value in case the enumeration method needs to be reset. This gets used when using # the preferred start point fails, and also controls if the start node for this component is marked as a # point we want to cluster on. seq_val_at_start = edge_sequence # the preferred option is a Eulerian circuit, so try that first # logger.debug("Available edges: %s", block_g_comp.edges) if nx.is_eulerian(block_g_comp): logger.debug("Block component is eulerian.") # record all these edges as being eulerian indicator = dict( zip(block_g_comp.edges, ['circuit'] * block_g_comp.size())) path_indicator_edges.update(indicator) # enumerate the edges and order them directly logger.debug("Creating Eulerian circuit from node %s", preferred_sp) for u, v, k in nx.eulerian_circuit(block_g_comp, source=preferred_sp, keys=True): edge_sequence += 1 block_g.edges[u, v, k][self.eo_name] = edge_sequence # logger.debug("Sequence applied: (%s, %s, %s) = %s", u, v, k, edge_sequence) # next best option is a path that stops at a different location from the start point elif nx.has_eulerian_path(block_g_comp): logger.debug("Block component forms Eulerian path") # record all these edges as being a eulerian path indicator = dict( zip(block_g_comp.edges, ['path'] * block_g_comp.size())) path_indicator_edges.update(indicator) try: logger.debug( "Trying to create path from preferred start node %s", preferred_sp) for u, v, k in nx.eulerian_path(block_g_comp, source=preferred_sp, keys=True): edge_sequence += 1 # check if the start point is actually in the first edge if edge_sequence == 1 and not (preferred_sp == u or preferred_sp == v): logger.debug( "Preferred start point not present on starting edge, throwing KeyError." ) raise KeyError("Invalid starting edge") # Sometimes the preferred start point means walking over the same edge twice, which will leave # a data gap (the previous edge order value will be overwritten). If this happens, throw a # KeyError if block_g.edges[u, v, k].get(self.eo_name): logger.debug("Edge already sequenced.") raise KeyError( "Preferred start point results in backtracking." ) block_g.edges[u, v, k][self.eo_name] = edge_sequence # logger.debug("Sequence applied: (%s, %s, %s) = %s", u, v, k, edge_sequence) if edge_sequence < block_g_comp.number_of_edges(): logger.debug("It looks like some edges got missed") raise KeyError("Missing edges on path") logger.debug( "Path was created from desired start point %s", preferred_sp) except KeyError: # preferred start point failed; let networkx pick and start over logger.debug( "Preferred start node did not create a path. Trying a different one." ) # reset the path listing since a new point will be picked logger.debug("Reset edge_sequence value to %s", seq_val_at_start) edge_sequence = seq_val_at_start for u, v, k in nx.eulerian_path(block_g_comp, keys=True): edge_sequence += 1 block_g.edges[u, v, k][self.eo_name] = edge_sequence # logger.debug("Sequence applied: (%s, %s, %s) = %s", u, v, k, edge_sequence) # No good path exists, which means someone will have to backtrack else: logger.debug( "Non-eulerian block is not easily traversable. Eulerizing it." ) # Record all these edges as being augmented. indicator = dict( zip(block_g_comp.edges, ['augmented'] * block_g_comp.size())) path_indicator_edges.update(indicator) # Send this data to the anomaly folder so that it can be investigated later. It could have addressable # issues that operations can correct for the next run. logger.debug("Writing anomaly set for this block") bf_uid_set = list( nx.get_edge_attributes( block_g_comp, self.edge_uid_field).values()).pop() anomaly_file_name = f"anomaly_block_component.{bf_uid_set}.yaml" nx.write_yaml(block_g_comp, (self.anomaly_folder / anomaly_file_name).as_posix()) # You cannot eulerize a directed graph, so create an undirected one logger.debug("Creating MultiGraph from directed graph.") temp_graph = nx.MultiGraph() for u, v, data in block_g_comp.edges(data=True): key = temp_graph.add_edge(u, v, **data) # logger.debug("Adding edge (%s, %s, %s) to temporary graph.", u, v, key) logger.debug("Created temporary MultiGraph with %s edges", temp_graph.number_of_edges()) # Convert the temporary graph to a proper Euler circuit so that it can be traversed. logger.debug("Eulerizing MultiGraph") euler_block = nx.eulerize(temp_graph) logger.debug("Added %s edges to the block", (euler_block.size() - temp_graph.size())) logger.debug("Number of vertices in eulerized graph: %s", euler_block.number_of_nodes()) # As we try to traverse the undirected graph, we need to keep track of places already visited to make # sure arcs are not skipped. visited_edges = Counter() # augmented edges will throw the node weights off, so don't bother trying the preferred start node logger.debug("Generating path through augmented block") for u, v, k in nx.eulerian_circuit(euler_block, preferred_sp, keys=True): # augmented edges have no attributes, so look for one and skip the edge if nothing is returned if drop_augmented and not euler_block.edges[u, v, k].get( self.edge_uid_field): logger.debug("Ignoring augmented edge (%s, %s, %s)", u, v, k) continue # Increment the sequence value for each edge we see. edge_sequence += 1 # Since we formed an undirected MultiGraph we need to check the orientation of the nodes on the # edge to assign the sequence back to the directed graph. start_node = u end_node = v available_edge_count = block_g.number_of_edges( start_node, end_node) # If no edges exist, invert the nodes and check again. # This also checks to see if we've already encountered all the edges between these nodes, indicating # we need to process the inverse of the start and end values if available_edge_count == 0 or ( ((start_node, end_node) in visited_edges) and (available_edge_count == visited_edges[(start_node, end_node)])): logger.debug( "Nothing to process between (%s, %s), inverting nodes.", start_node, end_node) start_node = v end_node = u available_edge_count = block_g.number_of_edges( start_node, end_node) logger.debug( "Number of edges available between (%s, %s): %s", start_node, end_node, available_edge_count) # Apply the edge_sequence to the first edge that hasn't received one yet for ki in range(available_edge_count): if not block_g.edges[start_node, end_node, ki].get( self.eo_name): logger.debug( "Edge sequence applied: (%s, %s, %s) = %s", start_node, end_node, ki, edge_sequence) block_g.edges[start_node, end_node, ki][self.eo_name] = edge_sequence visited_edges[(start_node, end_node)] += 1 break # At this point every edge should be accounted for, but in case something somehow slips through the cracks # it needs to be given a sequence label. The label almost certainly won't make much sense in terms of a # logical ordering, but this is just trying ot make sure it is counted. logger.debug("Looking for any missed edges in block component") for u, v, k in block_g_comp.edges: if not block_g.edges[u, v, k].get(self.eo_name): edge_sequence += 1 block_g.edges[u, v, k][self.eo_name] = edge_sequence logger.warning( "Applied out of order sequence to component edge (%s, %s, %s): %s", u, v, k, edge_sequence) # just log the last sequence value to make tracing easier logger.debug("Final edge sequence value for component: %s", edge_sequence) # apply a sequence value to all the edges that were discovered logger.debug("Edge order results: %s", nx.get_edge_attributes(block_g_comp, self.eo_name)) # To help cluster the start nodes, mark which node was used as the start point in this block if seq_val_at_start == 1: self._mark_chosen_start_node(block_g_comp, preferred_sp) logger.debug("Finished processing component") # record that block processing is finished logger.debug("Block processing complete") # nx.set_edge_attributes(block_g, block_sequence_labels, self.eo_name) nx.set_edge_attributes(block_g, path_indicator_edges, path_indicator_name) # check to see if the counts line up if not block_g.number_of_edges() == edge_sequence: logger.debug( "Edge sequence (%s) and edge count (%s) do not match in block", edge_sequence, block_g.number_of_edges()) # help start point clustering by apply a scaling factor to all nodes that were touched logger.debug( "Applying scaling factor to nodes in this block, except start point" ) nodes_in_block = set(block_g.nodes()) nodes_in_block.remove( preferred_sp) # don't scale the preferred start point self._apply_node_scaling_factor(nodes_in_block) logger.debug("Final node data for block: %s", self.graph.subgraph(block_g.nodes).nodes(data=True)) logger.debug("Returning pandas DataFrame from block graph.") return nx.to_pandas_edgelist(block_g, source=self.source_field, target=self.target_field)