def split_subslice_into_putative_modules(G_optimized, improvement_delta, modularity_score_objective, best_modularity): cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] cur_modularity = modularity(G_optimized, cur_components, weight='weight') if cur_modularity >= modularity_score_objective: return True, best_modularity if len(n_nodes) < 4: G_optimized.remove_nodes_from(n_nodes) cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] if len(cur_components) == 0: return True, best_modularity optimized_connected_components = girvan_newman(G_optimized) cur_components = sorted(next(optimized_connected_components)) cur_modularity = modularity(G_optimized, cur_components, weight='weight') if cur_modularity <= best_modularity + improvement_delta: return True, best_modularity else: optimal_components = cur_components edges_to_remove = [] for cur_edge in G_optimized.edges: included = False for n_nodes in optimal_components: if cur_edge[0] in n_nodes and cur_edge[1] in n_nodes: included = True if not included: edges_to_remove.append(cur_edge) G_optimized.remove_edges_from(edges_to_remove) return False, cur_modularity
def compute_molecule(universe): ''' Cluster atoms into molecules. The algorithm is to create a network graph containing every atom (in every frame as nodes and bonds as edges). Using this connectivity information, one can perform a (breadth first) traversal of the network graph to cluster all nodes (whose indices correspond to physical atoms). Args: universe (:class:`~exatomic.universe.Universe`): Atomic universe Returns: objs (tuple): Molecule indices (for atom dataframe(s)) and molecule dataframe Warning: This function will modify (in place) a few tables of the universe! ''' if 'bond_count' not in universe.atom: # The bond count is used to find single atoms; universe.compute_bond_count() # single atoms are treated as molecules. b0 = None b1 = None bonded = universe.two[universe.two['bond'] == True] if universe.is_periodic: mapper = universe.projected_atom['atom'] b0 = bonded['prjd_atom0'].map(mapper) b1 = bonded['prjd_atom1'].map(mapper) else: b0 = bonded['atom0'] b1 = bonded['atom1'] graph = Graph() graph.add_edges_from(zip(b0.values, b1.values)) mapper = {} for i, molecule in enumerate(connected_components(graph)): for atom in molecule: mapper[atom] = i n = 1 if len(mapper.values()) > 0: n += max(mapper.values()) else: n -= 1 idxs = universe.atom[universe.atom['bond_count'] == 0].index for i, index in enumerate(idxs): mapper[index] = i + n # Set the molecule indices universe.atom['molecule'] = universe.atom.index.map(lambda idx: mapper[idx]) # Now compute molecule table universe.atom['mass'] = universe.atom['symbol'].map(symbol_to_element_mass) # The coordinates of visual_atom represent grouped molecules for # periodic calculations and absolute coordinates for free boundary conditions. molecules = universe.atom.groupby('molecule') molecule = molecules['symbol'].value_counts().unstack().fillna(0).astype(np.int64) molecule.columns.name = None molecule['frame'] = universe.atom.drop_duplicates('molecule').set_index('molecule')['frame'] molecule['mass'] = molecules['mass'].sum() del universe.atom['mass'] frame = universe.atom[['molecule', 'frame']].drop_duplicates('molecule') frame = frame.set_index('molecule')['frame'].astype(np.int64) molecule['frame'] = frame.astype('category') return Molecule(molecule)
def helper2(G): T = nx.minimum_spanning_tree(G) curr_lowest = average_pairwise_distance(T) curr_lowest_tree = T S = min_weighted_dominating_set(T) newG = nx.subgraph(T, S) ncc = nx.number_connected_components(newG) ccs = list(connected_components(newG)) for i in range(len(ccs) - 1): curr_node = ccs[i].pop() ccs[i].add(curr_node) next_node = ccs[i + 1].pop() ccs[i + 1].add(next_node) path = nx.dijkstra_path(G, curr_node, next_node) for n in path: if (n not in list(newG.nodes)): S.add(n) newG = nx.subgraph(G, S) newT = nx.minimum_spanning_tree(newG) if (is_valid_network(G, newT)): apd = average_pairwise_distance(newT) if (apd < curr_lowest): curr_lowest = apd curr_lowest_tree = newT return curr_lowest_tree
def is_bipartite_node_set(G, nodes): """Returns True if nodes and G/nodes are a bipartition of G. Parameters ---------- G : NetworkX graph nodes: list or container Check if nodes are a one of a bipartite set. Examples -------- >>> from networkx.algorithms import bipartite >>> G = nx.path_graph(4) >>> X = set([1, 3]) >>> bipartite.is_bipartite_node_set(G, X) True Notes ----- For connected graphs the bipartite sets are unique. This function handles disconnected graphs. """ S = set(nodes) for CC in (G.subgraph(c).copy() for c in connected_components(G)): X, Y = sets(CC) if not ((X.issubset(S) and Y.isdisjoint(S)) or (Y.issubset(S) and X.isdisjoint(S))): return False return True
def approximate_steiner(graph, terminals): steiner_tree = nx.Graph() num_terminals = len(terminals) all_terminal_paths = list() for i in range(0, num_terminals): for j in range(i + 1, num_terminals): paths = get_paths(graph, terminals[i], terminals[j]) least_cost_path, least_cost = get_least_cost_path(graph, paths) path = dict() path['cost'] = least_cost path['path'] = least_cost_path print "Path" + str(path['path']) all_terminal_paths.append(path) all_terminal_paths.sort(key=lambda x: x['cost']) for t_path in all_terminal_paths: steiner_tree.add_path(t_path['path']) if check_terminals_connected(steiner_tree, terminals): break conn_components = list(comp.connected_components(steiner_tree)) while len(conn_components) > 1: comp1 = conn_components[0] comp2 = conn_components[1] for j in range(0, len(comp1)): for k in range(0, len(comp2)): if graph.has_edge(comp1[j], comp2[k]): steiner_tree.add_edge(comp1[j], comp2[k]) break conn_components = list(comp.connected_components(steiner_tree)) while True: try: cycle = nx.find_cycle(steiner_tree) print('Cycle found') edge = cycle[0] steiner_tree.remove_edge(edge[0], edge[1]) except: break weights = nx.get_node_attributes(graph, 'weight') steiner_cost = 0 for node in list(steiner_tree.nodes): steiner_cost = steiner_cost + weights[node] return steiner_tree, steiner_cost
def retain_relevant_slices(G_original, module_sig_th): global G_modularity pertubed_nodes = [] for cur_node in G_modularity.nodes(): if G_modularity.nodes[cur_node]["pertubed_node"]: pertubed_nodes.append(cur_node) ccs = [ G_modularity.subgraph(c) for c in connected_components(G_modularity) ] params = [] p = multiprocessing.Pool(constants.N_OF_THREADS) n_G_original = len(G_original) n_pertubed_nodes = len(pertubed_nodes) pertubed_nodes_in_ccs = [] print(f"number of slices: {len(list(ccs))}") for i_cur_cc, cur_cc in enumerate(ccs): pertubed_nodes_in_ccs.append( len([ cur_node for cur_node in cur_cc if G_modularity.nodes[cur_node]["pertubed_node"] ])) perturbation_factor = min(0.7, (float(n_pertubed_nodes) / n_G_original) * (1 + 100 / n_G_original**0.5)) for i_cur_cc, cur_cc in enumerate(ccs): params.append([ n_G_original, cur_cc, i_cur_cc, n_pertubed_nodes, perturbation_factor ]) res = [a for a in p.map(pf_filter, params) if a is not None] print(f'# of slices after perturbation TH: {len(res)}/{len(params)}') p.close() if len(res) == 0: return nx.Graph(), [], [] large_modules, sig_scores = zip(*res) fdr_bh_results = fdrcorrection0(sig_scores, alpha=module_sig_th, method='indep', is_sorted=False) # print(fdr_bh_results) # print(f'min: {min(list(fdr_bh_results[1]))}') passed_modules = [ cur_cc for cur_cc, is_passed_th in zip(large_modules, fdr_bh_results[0]) if is_passed_th ] return nx.algorithms.operators.union_all(passed_modules) if len(passed_modules) > 0 else nx.Graph(), [list(m.nodes) for m in passed_modules], \ fdr_bh_results[1]
def get_diameter(graph): networkx_graph = to_networkx(graph).to_undirected() sub_graph_list = [ networkx_graph.subgraph(c) for c in connected_components(networkx_graph) ] sub_graph_diam = [] for sub_g in sub_graph_list: sub_graph_diam.append(diameter(sub_g)) return max(sub_graph_diam)
def kdconnect(root_nodes, trees=None, tol=0.75): from networkx.algorithms.components import connected_components import networkx as nx cnt1, cnt2 = 0, 0 # make trees if they were not added before if trees is None: trees = [] for node in root_nodes: tr = gp.KDTreeIndex(fwd=True, bkwd=True)(node) trees.append(tr) # get a node count for validation for node in root_nodes: cnt1 += len(list(node.__iter__(fwd=True, bkwd=True))) gg = nx.Graph() for i in range(len(trees)): ti = trees[i] for j in range(i+1, len(trees)): tj = trees[j] res_ij = ti.query_ball_tree(tj, tol) adj = [(di, x) for di, x in enumerate(res_ij) if len(x) > 0] if any(adj): # unique indicies from tree_i and tree_j un_tix = np.unique([i for i, j in adj]) un_tjx = np.unique([j for i, j in adj]) # closest points between unique positions dists = distance.cdist(ti.data[un_tix], tj.data[un_tjx]) armi = np.unravel_index(np.argmin(dists, axis=None), dists.shape) # retrieve node ids from respective trees, and connect node_id1 = ti[un_tix[armi[0]]] node_id2 = tj[un_tjx[armi[1]]] ndi = gutil.node_with_id(root_nodes[i], node_id1) ndj = gutil.node_with_id(root_nodes[j], node_id2) ndi.connect_to(ndj, is_pipe=True) # add root indexes to component graph gg.add_edge(i, j) final_roots = [] # gather new root nodes using connected_component algorithm for component in connected_components(gg): # pick a random index from the set ... ix = list(component)[0] a_root = root_nodes[ix] cnt2 += len(list(a_root.__iter__(fwd=True, bkwd=True))) final_roots.append(a_root) # sanity check - number of nodes should not have changed assert cnt1 == cnt2, 'unequal number of nodes before and after merge' return final_roots
def get_communities_fluid(G): connected_components = components.connected_components(G) modules = [] min_size = 50 coef = 1. / min_size for component in connected_components: if len(component) < min_size: modules = modules + [component] continue k = int(np.ceil(coef * len(G.node))) modules = modules + list( community.asyn_fluidc(G.subgraph(component), k, seed=123)) return modules
def get_putative_modules(G, full_G=None, improvement_delta=0, modularity_score_objective=1, module_threshold=0.05, n_cc=1.0): """""" if full_G == None: full_G = G G_optimized = G.copy() # clean subslice from cycles and isolated nodes G_optimized.remove_edges_from(list(nx.selfloop_edges(G_optimized))) G_optimized.remove_nodes_from(list(nx.isolates(G_optimized))) # check subslice enrichment for active nodes pertubed_nodes = [ cur_node for cur_node in full_G.nodes if full_G.nodes[cur_node]["pertubed_node"] ] pertubed_nodes_in_cc = [ n for n in G_optimized.nodes if G_optimized.nodes[n]["pertubed_node"] ] n_nodes = list(G_optimized.nodes) sig_score = hypergeom.sf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes), len(n_nodes)) \ + hypergeom.pmf(len(pertubed_nodes_in_cc), len(full_G.nodes), len(pertubed_nodes), len(n_nodes)) sig_score = sig_score / n_cc # if subslice is not enriched for active nodes split in into putative modules. otherwise, report it as a single putative module # print(f'{sig_score}<{module_threshold} and {len(G_optimized.nodes)}<30') is_enriched_sublice = (len(G_optimized.nodes) < 100) or len( G_optimized.nodes) == 0 # sig_score<module_threshold and l break_loop = is_enriched_sublice best_modularity = -1 while not break_loop: break_loop, best_modularity = split_subslice_into_putative_modules( G_optimized, improvement_delta, modularity_score_objective, best_modularity) G_optimized.remove_nodes_from(list(nx.isolates(G_optimized))) cc_optimized = [] if len(G_optimized.nodes) == 0 else [ G_optimized.subgraph(c) for c in connected_components(G_optimized) ] return G_optimized, cc_optimized
def process_graph(graph): """ Process information in graph, returning a table (table data + column names) as a result. Each row of the table represents a side-chain, and each column records one property of the side-chain, such as the number of heavy atoms it contains, whether it is a hydrogen bond donor or acceptor, ect. """ scaffold_nodes = [] for node_id, node in graph.nodes.items(): if 'is_scaffold' in node: scaffold_nodes.append(node_id) # Remove scaffold graph.remove_nodes_from(scaffold_nodes) # Initialize data graph_info = [] # Iterate through disconnected subgraphs for subgraph in connected_components(graph): attached_atom_id = None num_heavy_atoms = 0 is_hbd = False is_hba = False for node_id in subgraph: node = graph.nodes[node_id] if attached_atom_id is None and 'anchor' in node: attached_atom_id = node['anchor'] if not is_hba and 'is_hba' in node: is_hba = True if not is_hbd and 'is_hbd' in node: is_hbd = True num_heavy_atoms += 1 if attached_atom_id is None: raise ValueError is_hbd_and_hba = is_hbd and is_hba graph_info.append([attached_atom_id, num_heavy_atoms, int(is_hbd), int(is_hba), int(is_hbd_and_hba)]) # Convert graph_info to dataframe if not graph_info: raise NoSubstitutionException() graph_info = np.array(graph_info, dtype=np.int32) col_names = ['attached_atom_id', 'num_heavy_atoms', 'is_hbd', 'is_hba', 'is_hbd_and_hba'] col_names = {key: val for val, key in enumerate(col_names)} return graph_info, col_names
def get_graph_diameter(data): networkx_graph = to_networkx(data).to_undirected() sub_graph_list = [ networkx_graph.subgraph(c) for c in connected_components(networkx_graph) ] sub_graph_diam = [] for sub_g in sub_graph_list: sub_graph_diam.append(diameter(sub_g)) data.diameter = max(sub_graph_diam) if data.x is None: data.x = torch.ones(data.num_nodes, 1) return data
def build_giant_component(self, return_component=False): ''' Stores the giant component in self.giant component. If return_component true returns the nx subgraph :param return_component: Bool :return: nx.Graph instance ''' from networkx.algorithms.components import connected_components giant_component_nodes = max(connected_components(self.graph), key=len) self.giant_component = self.graph.subgraph( giant_component_nodes).copy() if (return_component): return self.giant_component
def post_process(self): trackings = self.parents['irit_harmo_tracking'].results['irit_harmo_tracking'].data_object.value graph = Graph() for t, h in [(track, track.harmo_link(trackings)) for track in trackings]: graph.add_node(t) if len(h) > 0: graph.add_edges_from([(t, o) for o in h]) res = self.new_result(time_mode='global') res.data_object.value = [c2 for c in connected_components(graph) for c2 in Cluster(c).harmo_sub()] self.add_result(res) return
def compute_molecule(universe): """ Cluster atoms into molecules and create the :class:`~exatomic.molecule.Molecule` table. Args: universe: Atomic universe Returns: molecule: Molecule table Warning: This function modifies the universe's atom (:class:`~exatomic.atom.Atom`) table in place! """ nodes = universe.atom.index.values bonded = universe.atom_two.loc[universe.atom_two['bond'] == True, ['atom0', 'atom1']] edges = zip(bonded['atom0'].astype(np.int64), bonded['atom1'].astype(np.int64)) g = nx.Graph() g.add_nodes_from(nodes) g.add_edges_from(edges) # generate molecule indices for the atom table mapper = {} i = 0 for k, v in g.degree(): # First handle single atom "molecules" if v == 0: mapper[k] = i i += 1 for seht in connected_components(g): # Second handle multi atom molecules for adx in seht: mapper[adx] = i i += 1 universe.atom['molecule'] = universe.atom.index.map(lambda x: mapper[x]) universe.atom['mass'] = universe.atom['symbol'].map(sym2mass).astype(float) grps = universe.atom.groupby('molecule') molecule = grps['symbol'].value_counts().unstack().fillna(0).astype( np.int64) molecule.columns.name = None molecule['mass'] = grps['mass'].sum() universe.atom['molecule'] = universe.atom['molecule'].astype('category') del universe.atom['mass'] return molecule
def compute_molecule(universe): """ Cluster atoms into molecules and create the :class:`~exatomic.molecule.Molecule` table. Args: universe: Atomic universe Returns: molecule: Molecule table Warning: This function modifies the universe's atom (:class:`~exatomic.atom.Atom`) table in place! """ nodes = universe.atom.index.values bonded = universe.atom_two.ix[universe.atom_two["bond"] == True, ["atom0", "atom1"]] edges = zip(bonded["atom0"].astype(np.int64), bonded["atom1"].astype(np.int64)) g = nx.Graph() g.add_nodes_from(nodes) g.add_edges_from(edges) # generate molecule indices for the atom table mapper = {} i = 0 for k, v in g.degree().items(): # First handle single atom "molecules" if v == 0: mapper[k] = i i += 1 for seht in connected_components(g): # Second handle multi atom molecules for adx in seht: mapper[adx] = i i += 1 universe.atom["molecule"] = universe.atom.index.map(lambda x: mapper[x]) sym2mass = symbol_to_element_mass() universe.atom["mass"] = universe.atom["symbol"].map(sym2mass) grps = universe.atom.groupby("molecule") molecule = grps["symbol"].value_counts().unstack().fillna(0).astype(np.int64) molecule.columns.name = None molecule["mass"] = grps["mass"].sum() universe.atom["molecule"] = universe.atom["molecule"].astype("category") del universe.atom["mass"] return molecule
def get_graph_diameter(data): ''' compute the graph diameter and add the attribute to data object :param data: the graph :return: the graph representation augmented with diameter attribute ''' networkx_graph = to_networkx(data).to_undirected() sub_graph_list = [ networkx_graph.subgraph(c) for c in connected_components(networkx_graph) ] sub_graph_diam = [] for sub_g in sub_graph_list: sub_graph_diam.append(diameter(sub_g)) data.diameter = max(sub_graph_diam) if data.x is None: data.x = torch.ones(data.num_nodes, 1) return data
def chordal_graph_cliques(G): """Returns the set of maximal cliques of a chordal graph. The algorithm breaks the graph in connected components and performs a maximum cardinality search in each component to get the cliques. Parameters ---------- G : graph A NetworkX graph Returns ------- cliques : A set containing the maximal cliques in G. Raises ------ NetworkXError The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. If the input graph is an instance of one of these classes, a :exc:`NetworkXError` is raised. The algorithm can only be applied to chordal graphs. If the input graph is found to be non-chordal, a :exc:`NetworkXError` is raised. Examples -------- >>> import networkx as nx >>> e= [(1,2),(1,3),(2,3),(2,4),(3,4),(3,5),(3,6),(4,5),(4,6),(5,6),(7,8)] >>> G = nx.Graph(e) >>> G.add_node(9) >>> setlist = nx.chordal_graph_cliques(G) """ if not is_chordal(G): raise nx.NetworkXError("Input graph is not chordal.") cliques = set() for C in (G.subgraph(c).copy() for c in connected_components(G)): cliques |= _connected_chordal_graph_cliques(C) return cliques
def resolve_duplicate_clusters(clusters, pairs): if not clusters: summary.add('Duplicate components', 0) summary.add('Clusters truely duplicate', 0) return dict(), dict() G = nx.Graph() G.add_nodes_from(clusters) G.add_edges_from(pairs) # # print('Nodes:', G.number_of_nodes()) # print('Edges:', G.number_of_edges()) components_list = [] if not components.is_connected(G): # verbosity('Graph not connected, components = ' + str(components.number_connected_components(G)), args.quiet) for component in components.connected_components(G): components_list.append(component) else: components_list.append(clusters) summary.add('Duplicate components', components.number_connected_components(G)) translation_dict = {} duplicate_weights = {} for component in components_list: main_node = min(component) component.remove(main_node) try: duplicate_weights[len(component)] += 1 except KeyError: duplicate_weights[len(component)] = 1 for node in component: translation_dict[node] = main_node summary.add('Clusters truely duplicate') return translation_dict, duplicate_weights
def post_process(self): trackings = self.parents['irit_harmo_tracking'].results[ 'irit_harmo_tracking'].data_object.value graph = Graph() for t, h in [(track, track.harmo_link(trackings)) for track in trackings]: graph.add_node(t) if len(h) > 0: graph.add_edges_from([(t, o) for o in h]) res = self.new_result(time_mode='global') res.data_object.value = [ c2 for c in connected_components(graph) for c2 in Cluster(c).harmo_sub() ] self.add_result(res) return
def report_connectedness(G, save_img_path=None): """ Checks if the graph is connected and returns the connected components if the graph is disconnected G (nx.Graph): graph for which the top nodes must be determined. save_img_path (str): path to save visualisation of the components detected. Returns: True: if the given graph is connected. False, connected_components: if the graph is disconnected along with list of sets of nodes representing the components. """ # aggregrate connectedness metrics is_connected = components.is_connected(G) # get the connected components connected_components = components.connected_components(G) # save the disconnected components visualisation if the path given if save_img_path: colors = np.linspace(0, 1, len(connected_components)) com_color_map = dict() for idx, com in enumerate(connected_components): for node in com: com_color_map[node] = colors[idx] labels = nx.draw_networkx_labels(G, pos=pos) nx.draw(G, pos, node_color=list(com_color_map.values())) plt.savefig(save_img_path, format="PNG") return (is_connected, connected_components)
for j, pp in enumerate(kept_i2pp): seg_j = segments[pp2i[pp]] if isinstance(seg_j, Nphthong) and len(seg_j) == 2 and kept_dist_mat[i, j] == insert_cost: g.add_edge(i, j) elif isinstance(seg, Nphthong): for j, pp in enumerate(kept_i2pp): seg_j = segments[pp2i[pp]] if isinstance(seg_j, Segment) and seg_j.is_vowel() and kept_dist_mat[i, j] == insert_cost: g.add_edge(i, j) elif isinstance(seg_j, Nphthong) and abs(len(seg) - len(seg_j)) == 1 and kept_dist_mat[i, j] == insert_cost: g.add_edge(i, j) query_sound = st.selectbox('Query sound', sorted(kept_i2pp)) st.write(get_connected_sounds(query_sound, g, kept_dist_mat, kept_i2pp, kept_pp2i)) cc = list(connected_components(g)) assert len(cc) == 1 # Compute average number of connected sounds. cnt = dict() for i in kept_ids: cnt[i2pp[i]] = len(g.edges(i)) st.write(f'Average number of connected sounds: {(sum(cnt.values()) / len(kept_ids)):.3f}') if should_proceed('about_to_save'): proto_ph_map = dict() for i in kept_ids: ph = i2pp[i] proto_ph_map[ph] = ph lengths = [len(pp) for pp in kept_i2pp]
def _chordal_graph_cliques(G): """Returns all maximal cliques of a chordal graph. The algorithm breaks the graph in connected components and performs a maximum cardinality search in each component to get the cliques. Parameters ---------- G : graph A NetworkX graph Returns ------- iterator An iterator over maximal cliques, each of which is a frozenset of nodes in `G`. The order of cliques is arbitrary. Raises ------ NetworkXError The algorithm does not support DiGraph, MultiGraph and MultiDiGraph. If the input graph is an instance of one of these classes, a :exc:`NetworkXError` is raised. The algorithm can only be applied to chordal graphs. If the input graph is found to be non-chordal, a :exc:`NetworkXError` is raised. Examples -------- >>> e = [ ... (1, 2), ... (1, 3), ... (2, 3), ... (2, 4), ... (3, 4), ... (3, 5), ... (3, 6), ... (4, 5), ... (4, 6), ... (5, 6), ... (7, 8), ... ] >>> G = nx.Graph(e) >>> G.add_node(9) >>> cliques = [c for c in _chordal_graph_cliques(G)] >>> cliques[0] frozenset({1, 2, 3}) """ if not is_chordal(G): raise nx.NetworkXError("Input graph is not chordal.") for C in (G.subgraph(c).copy() for c in connected_components(G)): if C.number_of_nodes() == 1: yield frozenset(C.nodes()) else: unnumbered = set(C.nodes()) v = arbitrary_element(C) unnumbered.remove(v) numbered = {v} clique_wanna_be = {v} while unnumbered: v = _max_cardinality_node(C, unnumbered, numbered) unnumbered.remove(v) numbered.add(v) new_clique_wanna_be = set(C.neighbors(v)) & numbered sg = C.subgraph(clique_wanna_be) if _is_complete_graph(sg): new_clique_wanna_be.add(v) if not new_clique_wanna_be >= clique_wanna_be: yield frozenset(clique_wanna_be) clique_wanna_be = new_clique_wanna_be else: raise nx.NetworkXError("Input graph is not chordal.") yield frozenset(clique_wanna_be)
# Handle cliques try: signal.signal(signal.SIGALRM, percolate.clique_handler) if CCid in ['03ae', '03b0', '03b2', '03b5', '03b7', '0893']: # Skip histones and other complex OGs raise percolate.CliqueError signal.alarm(90) cliques = list(find_cliques(G)) signal.alarm(0) except percolate.CliqueError: print(f'CliqueError: {CCid}') for k in ks: subOGs = set() core = k_core(G, k) for component in connected_components(core): subOGs.add( frozenset( [frozenset(edge) for edge in core.edges(component)])) OGs_ks[k].append(subOGs) classify_CC(CCtypes_ks[k], subOGs) continue # Continue to next OG # Handle percolation for k in ks: try: signal.signal(signal.SIGALRM, percolate.percolate_handler) signal.alarm(90) subOGs = list( percolate.k_clique_communities_progressive(G, k, cliques)) signal.alarm(0)
def partition_reads(tint, maximum_ilp_size): reads = tint['reads'] read_reps = tint['read_reps'] I = tint['ilp_data']['I'] FL = tint['ilp_data']['FL'] tint['partitions'] = list() rids = sorted(I.keys()) unique_data = dict() edges = list() for i in rids: d = (tuple(I[i]), (FL[i][0], FL[i][1], reads[read_reps[i][0]]['poly_tail_category'])) if d in unique_data: unique_data[d].append(i) else: unique_data[d] = [i] unique_data = list(unique_data.items()) N = len(unique_data) for i in range(N): for j in range(i+1, N): d1, (f1, l1, t1) = unique_data[i][0] d2, (f2, l2, t2) = unique_data[j][0] f = max(f1, f2) l = min(l1, l2) o = l-f+1 d = sum(x != y for x, y in zip(d1[f:l+1], d2[f:l+1])) w = sum(x == y == 1 for x, y in zip(d1[f:l+1], d2[f:l+1])) if t1 != 'N' and t2 != 'N' and t1 != t2: continue if w < 1: continue if (o > 3 and d < 3) or (1 <= o <= 3 and d == 0): edges.append((i, j)) G = Graph() G.add_nodes_from(range(N)) G.add_edges_from(edges) while True: edges_to_remove = list() for i, j in G.edges: n1 = set(G.neighbors(i)) n2 = set(G.neighbors(j)) if len(n1) == 1 or len(n2) == 1 or len(n1 & n2) > 0: continue edges_to_remove.append((i, j)) G.remove_edges_from(edges_to_remove) if len(edges_to_remove) == 0: break for c in components.connected_components(G): rids = list() incomp = list() for c in split_list_evenly(list(c), maximum_ilp_size): for idx, i in enumerate(c): rids.extend(unique_data[i][1]) for j in c[idx+1:]: i,j = min(i,j),max(i,j) assert i<j if G.has_edge(i,j): continue for rid_1 in unique_data[i][1]: for rid_2 in unique_data[j][1]: incomp.append((rid_1,rid_2)) tint['partitions'].append((rids, incomp))
def compute_volumes(self, queries=None, evidence=None, cache=True): """Computes the unnormalized probabilities of univariate and bivariate literals in 'queries' associated to univariate literals and a list of uni/bivariate clauses representing the 'evidence'. Returns (Z given evidence, list[volumes of queries given evidence]). Raises NotImplementedError if the literals are not uni/bivariate. Parameters ---------- queries : list of pysmt.FNode instances (optional) Uni/bivariate literals evidence : iterable of pysmt.FNode instances (optional) Uni/bivariate clauses, default: None cache : bool (optional) If True, integrals are cached, default: False """ if not nx.is_forest(self.primal.G): raise NotImplementedError("MP requires a forest-shaped primal graph") if queries is None: queries = [] else: queries = [flip_negated_literals_cnf(q) for q in queries] if cache is True and self.cache is None: self.cache = dict() self.cache_hit = [0, 0] elif cache is False: self.cache = None # send around messages, possibly accounting for 'evidence' self._compute_marginals(evidence=evidence) # compute the partition function as the product of the marginals of any node # for each connected component in the primal graph components = list(connected_components(self.primal.G)) Z_components = [] for comp_vars in components: x = list(comp_vars)[0] full_marginal = self._get_full_marginal(x) comp_Z = self.piecewise_symbolic_integral(full_marginal, x) Z_components.append(comp_Z) query_volumes = [] for q in queries: q_vars = list(q.get_free_variables()) if not all([qv.symbol_type() == REAL for qv in q_vars]): raise NotImplementedError("Supporting lra queries only") x = q_vars[0].symbol_name() if len(q_vars) == 1: # univariate query l, u = domains_to_intervals(q)[0] q_msg = [(l, u, 1)] # intersecting with the node symbolic marginal q_marginal = self._get_msgs_intersection( [self._get_full_marginal(x), q_msg] ) q_vol = self.piecewise_symbolic_integral(q_marginal, x) # account for the volume of unconnected variables for i, comp_vars in enumerate(components): if x not in comp_vars: q_vol *= Z_components[i] query_volumes.append(q_vol) elif len(q_vars) == 2: # bivariate query y = q_vars[1].symbol_name() # creates a new message using the query 'q' as evidence q_marginal = self._compute_message(x, y, evidence=[q]) q_marginal = self._get_msgs_intersection([q_marginal] + [self.marginals[y][z] for z in self.marginals[y] if z != x]) y_potentials = self.primal.nodes()[y]['potentials'] if len(y_potentials) > 0: potential_msgs = self._parse_potentials( y_potentials, self.primal.nodes()[y]['var'] ) q_marginal = self._get_msgs_intersection( potential_msgs + [q_marginal] ) q_vol = self.piecewise_symbolic_integral(q_marginal, y) # account for the volume of unconnected variables for i, comp_vars in enumerate(components): if x not in comp_vars: q_vol *= Z_components[i] query_volumes.append(q_vol) else: raise NotImplementedError( "Queries of ariety > 2 aren't supported") Z = 1.0 for Z_comp in Z_components: Z *= Z_comp if self.cache_hit is not None: # TODO: check if cache_hit index should be True or False print("\tHITS: {}/{} (ratio {})".format(self.cache_hit[True], sum(self.cache_hit), self.cache_hit[True] / sum(self.cache_hit))) Z = float(Z.as_expr()) query_volumes = [float(qv.as_expr()) for qv in query_volumes] return Z, query_volumes
def compute_undirected_graph_metrics(G): assert type(G) is nx.Graph # degrees stats degrees = np.array([i for _, i in G.degree]) degrees_k_freq = np.unique(degrees, return_counts=True)[1] degrees_corr = numeric_attribute_correlation(G, dict(G.degree), dict(G.degree)) # clustering global_clustering = transitivity(G) local_clustering_mean = average_clustering(G) # fraction of connected node pairs (any path len) f_connected_node_pairs = fraction_of_connected_node_pairs(G) # centralization cent_metrics = centralization_metrics(G, prefix="_ud") # modularity modularity_metrics = compute_modularity_metrics(G) # largest CC CC1_nodes = max(connected_components(G), key=len) CC1 = G.subgraph(CC1_nodes).copy() f_CC1_nodes = len(CC1) / len(G) # algebraic_connectivity of the largest CC algebraic_connectivity_CC1 = None if len(CC1) > 2: try: algebraic_connectivity_CC1 = algebraic_connectivity(CC1, seed=0) except: algebraic_connectivity_CC1 = None # connected components CC = connected_components(G) CC_sizes = np.array([len(cc_i) for cc_i in CC]) CC_metrics = {} for k in CC_k_thresholds: CC_metrics[f"n_CC_{k}"] = np.sum(CC_sizes >= k) # k-core k_core_metrics = {} G_core_number = core_number(G) for k in k_core_ks: k_core_subgraph = k_core(G, k=k, core_number=G_core_number) k_core_metrics[f"core_{k}_n_nodes"] = len(k_core_subgraph.nodes) k_core_metrics[f"core_{k}_n_edges"] = len(k_core_subgraph.edges) k_core_metrics[f"core_{k}_density"] = density(k_core_subgraph) k_core_metrics[f"core_{k}_n_CC"] = len( list(connected_components(k_core_subgraph))) # k-truss k_truss_metrics = {} for k in k_truss_ks: k_truss_subgraph = k_truss(G, k=k) k_truss_metrics[f"truss_{k}_n_nodes"] = len(k_truss_subgraph.nodes) k_truss_metrics[f"truss_{k}_n_edges"] = len(k_truss_subgraph.edges) k_truss_metrics[f"truss_{k}_density"] = density(k_truss_subgraph) k_truss_metrics[f"truss_{k}_n_CC"] = len( list(connected_components(k_truss_subgraph))) metrics = { "n_edges_ud": len(G.edges()), "density_ud": density(G), # degree stats "degrees_mean": safe(np.mean, degrees), "degrees_var": safe(np.var, degrees), "degrees_hidx": safe(h_index, degrees), "degrees_gini": safe(gini, degrees + eps), "degrees_f0": safe(np.mean, (degrees == 0)), "degrees_corr": degrees_corr, "degrees_pk_ent": entropy(degrees_k_freq), "degrees_pk_gini": gini(degrees_k_freq), # fraction of connected node pairs with path of any length "f_connected_node_pairs_ud": f_connected_node_pairs, # clustering coefficients "global_clustering_ud": global_clustering, "local_clustering_mean_ud": local_clustering_mean, # centralization **cent_metrics, # modularity **modularity_metrics, # fraction of nodes in the largest CC "f_CC1_nodes": f_CC1_nodes, # algebraic connectivity of the largest CC "algebraic_connectivity_CC1": algebraic_connectivity_CC1, # connected components **CC_metrics, # k-core **k_core_metrics, # k-truss **k_truss_metrics } return metrics
def compute_volumes(self, queries=None, evidence=None, cache=True): """Computes the unnormalized probabilities of univariate and bivariate literals in 'queries' associated to univariate literals and a list of uni/bivariate clauses representing the 'evidence'. Returns (Z given evidence, list[volumes of queries given evidence]). Raises NotImplementedError if the literals are not uni/bivariate. Parameters ---------- queries : list of pysmt.FNode instances (optional) Uni/bivariate literals evidence : iterable of pysmt.FNode instances (optional) Uni/bivariate clauses, default: None cache : bool (optional) If True, integrals are cached, default: True """ if not nx.is_forest(self.primal.G): raise NotImplementedError( "MP requires a forest-shaped primal graph") if queries is None: queries = [] else: queries = [flip_negated_literals_cnf(q) for q in queries] if cache is False: self.cache = None elif cache is True and self.cache is None: self.cache = Manager().dict() self.cache_hit = [0, 0] else: self.cache = Manager().dict(self.cache) # needed? # compute the partition function as the product of the marginals of any node # for each connected component in the primal graph components = list(connected_components(self.primal.G)) subproblems = [] pysmt_env = get_env() for comp_vars in components: subprimal = PrimalGraph.from_graph( self.primal.G.subgraph(comp_vars)) subvars = {subprimal.nodes()[n]['var'] for n in subprimal.nodes()} submarginals = { k: v for k, v in self.marginals.items() if k in comp_vars } """ if self.cache is not None: subcache = self.cache else: subcache = None """ if evidence is None: subevidence = None else: subevidence = [ e for e in evidence if set(e.get_free_symbols()).issubset(subvars) ] subproblems.append( (subprimal, submarginals, self.smt_solver, self.cache, self.tolerance, self.rand_gen, pysmt_env, subevidence)) with Pool(processes=self.n_processes) as pool: results = pool.starmap(MP2WMI._compute_marginals, subproblems) for submarginals, ch in results: self.marginals.update(submarginals) if self.cache is not None: self.cache_hit[True] += ch[True] self.cache_hit[False] += ch[False] Z_components = [] for comp_vars in components: x = list(comp_vars)[0] full_marginal = MP2WMI._get_full_marginal(self.primal, self.marginals, self.tolerance, x) comp_Z, ch = MP2WMI._piecewise_symbolic_integral( self.cache, full_marginal, x) if self.cache is not None: self.cache_hit[True] += ch[True] self.cache_hit[False] += ch[False] Z_components.append(comp_Z) query_volumes = [] for q in queries: q_vars = list(q.get_free_variables()) if not all([qv.symbol_type() == REAL for qv in q_vars]): raise NotImplementedError("Supporting lra queries only") x = q_vars[0].symbol_name() if len(q_vars) == 1: # univariate query l, u = domains_to_intervals(q)[0] q_msg = [(l, u, 1)] # intersecting with the node symbolic marginal q_marginal = MP2WMI._get_msgs_intersection([ MP2WMI._get_full_marginal(self.primal, self.marginals, self.tolerance, x), q_msg ], self.tolerance) q_vol, ch = MP2WMI._piecewise_symbolic_integral( self.cache, q_marginal, x) if self.cache is not None: self.cache_hit[True] += ch[True] self.cache_hit[False] += ch[False] # account for the volume of unconnected variables for i, comp_vars in enumerate(components): if x not in comp_vars: q_vol *= Z_components[i] query_volumes.append(q_vol) elif len(q_vars) == 2: # bivariate query y = q_vars[1].symbol_name() # creates a new message using the query 'q' as evidence q_marginal, ch = MP2WMI._compute_message(self.primal, self.marginals, self.smt_solver, self.cache, self.tolerance, x, y, evidence=[q]) if self.cache is not None: self.cache_hit[True] += ch[True] self.cache_hit[False] += ch[False] marg_not_x = [ self.marginals[y][z] for z in self.marginals[y] if z != x ] q_marginal = MP2WMI._get_msgs_intersection( [q_marginal] + marg_not_x, self.tolerance) y_potentials = self.primal.nodes()[y]['potentials'] if len(y_potentials) > 0: potential_msgs = MP2WMI._parse_potentials( y_potentials, self.primal.nodes()[y]['var']) q_marginal = self._get_msgs_intersection( potential_msgs + [q_marginal], self.tolerance) q_vol, ch = MP2WMI._piecewise_symbolic_integral( self.cache, q_marginal, y) if self.cache is not None: self.cache_hit[True] += ch[True] self.cache_hit[False] += ch[False] # account for the volume of unconnected variables for i, comp_vars in enumerate(components): if x not in comp_vars: q_vol *= Z_components[i] query_volumes.append(q_vol) else: raise NotImplementedError( "Queries of ariety > 2 aren't supported") Z = 1.0 for Z_comp in Z_components: Z *= Z_comp if self.cache is not None: # TODO: check if cache_hit index should be True or False print("\tHITS: {}/{} (ratio {})".format( self.cache_hit[True], sum(self.cache_hit), self.cache_hit[True] / sum(self.cache_hit))) Z = float(Z.as_expr()) query_volumes = [float(qv.as_expr()) for qv in query_volumes] return Z, query_volumes
def to_bayesian_model(self): """ Creates a Bayesian Model which is a minimum I-Map for this markov model. The ordering of parents may not remain constant. It would depend on the ordering of variable in the junction tree (which is not constant) all the time. Also, if the model is not connected, the connected components are treated as separate models, converted, and then joined together. Examples -------- >>> from ProbabilityModel.models import MarkovModel >>> from ProbabilityModel.factors.discrete import DiscreteFactor >>> mm = MarkovModel() >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7']) >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'), ... ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'), ... ('x4', 'x7'), ('x5', 'x7')]) >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()] >>> mm.add_factors(*phi) >>> bm = mm.to_bayesian_model() """ from ProbabilityModel.models import BayesianModel # If the graph is not connected, treat them as separate models and join them together in the end. bms = [] for node_set in connected_components(self): bm = BayesianModel() var_clique_dict = defaultdict(tuple) var_order = [] subgraph = self.subgraph(node_set) # Create a junction tree from the markov model. # Creation of clique tree involves triangulation, finding maximal cliques # and creating a tree from these cliques junction_tree = MarkovModel(subgraph.edges()).to_junction_tree() # create an ordering of the nodes based on the ordering of the clique # in which it appeared first root_node = next(iter(junction_tree.nodes())) bfs_edges = nx.bfs_edges(junction_tree, root_node) for node in root_node: var_clique_dict[node] = root_node var_order.append(node) for edge in bfs_edges: clique_node = edge[1] for node in clique_node: if not var_clique_dict[node]: var_clique_dict[node] = clique_node var_order.append(node) # create a bayesian model by adding edges from parent of node to node as # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}} for node_index in range(len(var_order)): node = var_order[node_index] node_parents = (set(var_clique_dict[node]) - set([node])).intersection( set(var_order[:node_index])) bm.add_edges_from([(parent, node) for parent in node_parents]) # TODO : Convert factor into CPDs bms.append(bm) # Join the bms in a single model. final_bm = BayesianModel() for bm in bms: final_bm.add_edges_from(bm.edges()) final_bm.add_nodes_from(bm.nodes()) return final_bm