示例#1
0
    def _good_k_break(self, old_edges, new_edges):
        """
        Checks that the break does not change chromomsome structure significantly
        """
        MIN_OVLP_SCORE = 0.9
        MAX_K_BREAK = 4
        if len(old_edges) > MAX_K_BREAK:
            return False

        new_adj_graph = self.adj_graph.copy()
        for u, v in old_edges:
            new_adj_graph.remove_edge(u, v)
        for u, v in new_edges:
            new_adj_graph.add_edge(u, v)

        all_nodes = new_adj_graph.nodes()
        old_sets = list(map(lambda g: set(g.nodes()),
                            nx.connected_component_subgraphs(self.adj_graph)))
        new_sets = list(map(lambda g: set(g.nodes()),
                            nx.connected_component_subgraphs(new_adj_graph)))
        if len(old_sets) != len(new_sets):
            return False

        for old_set in old_sets:
            max_overlap = 0
            best_score = 0
            for new_set in new_sets:
                overlap = len(old_set & new_set)
                if overlap > max_overlap:
                    max_overlap = overlap
                    best_score = float(overlap) / len(old_set | new_set)
            if best_score < MIN_OVLP_SCORE:
                return False

        return True
示例#2
0
def zc(G,list_G1,list_G2,f):#计算z值
    """
    输入参数:原始网络G,不保持连通性置乱网络list_G1,保持连通性置乱网络list_G2,要求网络连通的指标函数名f
    返回:该指标的不保持连通性z值z1,保持连通性z值z2
    """
    list_G_l0 = []; list_G_l1 = []; list_G_l2 = []
    for g in nx.connected_component_subgraphs(G):
        list_G_l0.append(f(g))
    for G1 in list_G1:
        for g1 in nx.connected_component_subgraphs(G1):
            list_G_l1.append(f(g1))#指标值列表
    for G2 in list_G2:
        for g2 in nx.connected_component_subgraphs(G2):
            list_G_l2.append(f(g2))
    #print list_G_l0, list_G_l1, list_G_l2
    G_l0 = np.mean(list_G_l0)
    G_l1 = np.mean(list_G_l1) #求均值 
    G_l2 = np.mean(list_G_l2)
    var_z1 = np.var(list_G_l1) #求方差
    var_z2 = np.var(list_G_l2)
    if var_z1 == 0: #若方差为0,则z值取0
        z1 = 0
    else:
        z1 = (G_l0 - G_l1)/var_z1#z值
    if var_z2 == 0: 
        z2 = 0
    else:
        z2 = (G_l0 - G_l2)/var_z2#z值
    return z1, z2
示例#3
0
def main():
    tempo_dir = "../corpus-local/tempo-txt"
    file_regex = ".*\.txt"

    G = build_graph(tempo_dir, file_regex)
    """
  ccs = nx.clustering(G)
  avg_clust = sum(ccs.values()) / len(ccs)
  """
    print tempo_dir
    print "\tAda " + str(len(G.nodes())) + " node."
    print "\tAda " + str(len(G.edges())) + " edge."
    print "\tClustering coefficient      : " + str(nx.average_clustering(G))
    print "\tAverage shortest path length"
    for g in nx.connected_component_subgraphs(G):
        print "\t\t" + str(nx.average_shortest_path_length(g))

    kompas_dir = "../corpus-local/kompas-txt"
    G = build_graph(kompas_dir, file_regex)
    print kompas_dir
    print "\tAda " + str(len(G.nodes())) + " node."
    print "\tAda " + str(len(G.edges())) + " edge."
    print "\tClustering coefficient      : " + str(nx.average_clustering(G))
    print "\tAverage shortest path length"
    for g in nx.connected_component_subgraphs(G):
        print "\t\t" + str(nx.average_shortest_path_length(g))
示例#4
0
    def _extract_ccomponents(self, graph, threshold=0, min_size=2):
        # remove all vertices that have a score less then threshold
        cc_list = []

        if self.less_then:
            less_component_graph = graph.copy()
            for v, d in less_component_graph.nodes_iter(data=True):
                if d.get(self.attribute, False):
                    if d[self.attribute] < threshold:
                        less_component_graph.remove_node(v)
            for cc in nx.connected_component_subgraphs(less_component_graph):
                if len(cc) >= min_size:
                    cc_list.append(cc)

        # remove all vertices that have a score more then threshold
        if self.more_than:
            more_component_graph = graph.copy()
            for v, d in more_component_graph.nodes_iter(data=True):
                if d.get(self.attribute, False):
                    if d[self.attribute] >= threshold:
                        more_component_graph.remove_node(v)

            for cc in nx.connected_component_subgraphs(more_component_graph):
                if len(cc) >= min_size:
                    cc_list.append(cc)
        return cc_list
示例#5
0
def printStats(filename):
	'''
	Converts json adjacency list into networkx to calculate and print the
	graphs's 
	  - average clustering coefficient
	  - overall clustering coefficient
	  - maximum diameter
	  - average diameter
	  - number of paritions using community.best_parition
	  - modularity of community.best_partition
	'''
	g = makeGraphFromJSON(filename)
	
	print "Average Clustering Coefficient: %f" % nx.average_clustering(g)
	print "Overall Clustering Coefficient: %f" % nx.transitivity(g)
	
	connected_subgraphs = list(nx.connected_component_subgraphs(g))
	largest = max(nx.connected_component_subgraphs(g), key=len)
	print "# Connected Components: %d" % len(connected_subgraphs)
	print "    Maximal Diameter: %d" % nx.diameter(largest)
	print "    Average Diameter: %f" % nx.average_shortest_path_length(largest)

	# Find partition that maximizes modularity using Louvain's algorithm
	part = community.best_partition(g)	
	print "# Paritions: %d" % (max(part.values()) + 1)
	print "Louvain Modularity: %f" % community.modularity(part, g)
示例#6
0
    def _extract_ccomponents(self, graph, threshold=0, min_size=2, max_size=20):
        # remove all vertices that have a score less then threshold
        cc_list = []

        if self.less_then:
            less_component_graph = graph.copy()
            for v, d in less_component_graph.nodes_iter(data=True):
                if self.get_attr_from_noded(d):
                    if self.get_attr_from_noded(d) < threshold:
                        less_component_graph.remove_node(v)
            for cc in nx.connected_component_subgraphs(less_component_graph):
                if len(cc) >= min_size and len(cc) <= max_size:
                    cc_list.append(cc)
                if len(cc) > max_size and self.shrink_graphs:
                    cc_list += list(self.enforce_max_size(cc, min_size, max_size))

        # remove all vertices that have a score more then threshold
        if self.more_than:
            more_component_graph = graph.copy()
            for v, d in more_component_graph.nodes_iter(data=True):
                if self.get_attr_from_noded(d):
                    if self.get_attr_from_noded(d) >= threshold:
                        more_component_graph.remove_node(v)

            for cc in nx.connected_component_subgraphs(more_component_graph):
                if len(cc) >= min_size and len(cc) <= max_size:
                    cc_list.append(cc)

                if len(cc) > max_size and self.shrink_graphs:
                    cc_list += list(self.enforce_max_size(cc, min_size, max_size, choose_cut_node=max))

        return cc_list
示例#7
0
def get_boundary_for_label(data, classifier, num_label, step):
    # See
    # http://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom#Operation_and_application
    # for the various divisions.
    t_start = time.time()
    district = data[data[:,0] == num_label, 1:]

    # Align grid to nearest "step". Also grow border by 25 units to
    # to make sure the marching squares can build a full loop.
    x0, y0 = np.floor(district.min(0) / step - 25) * step
    x1, y1 = np.ceil(district.max(0) / step + 25) * step

    # Use KNN to colour a grid that covers the district
    xx, yy = np.mgrid[x0:x1:step, y0:y1:step]
    prediction = classifier.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    # Split predicted labels into inside/outside
    prediction = (prediction == num_label).astype('u1')

    # We transpose to make reasoning about the lookups easier.
    prediction = prediction.transpose()

    # zero-pad predictions to make sure marching squares creates
    # closed outlines.
    tmp = np.zeros((prediction.shape[0] + 2, prediction.shape[1] + 2), dtype='u1')
    tmp[1:-1,1:-1] = prediction
    prediction = tmp

    outline = networkx.Graph()

    h, w = prediction.shape
    
    for i, j in np.ndindex(h - 1, w - 1):
        # We use tostring() as a cheap, hashable lookup type for the
        # marching squared implementation.

        # Dimension 0 ~ y ~ i, dim 1 ~ x ~ j:
        piter = iter(MARCHING_SQUARE_LOOKUP[prediction[i:i+2,j:j+2].tostring()])

        for rel1, rel2 in zip(piter, piter):
            p1 = int(x0 + step * (j + rel1[0])), int(y0 + step * (i + rel1[1]))
            p2 = int(x0 + step * (j + rel2[0])), int(y0 + step * (i + rel2[1]))

            outline.add_node(p1)
            outline.add_node(p2)
            outline.add_edge(p1, p2)

    # Pick the largest subgraph, other graphs are most likely outliers.
    logging.info(
        "%s: Found %s connected graphs in %.2fs",
        num_label,
        len(networkx.connected_component_subgraphs(outline)),
        time.time() - t_start,
    )
    largest = max(
        networkx.connected_component_subgraphs(outline),
        key=lambda x: x.size()
    )
    return list(shapely.ops.polygonize(largest.edges()))[0]
示例#8
0
def get_small_worldness(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_small_worldness.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))

    cluster = nx.average_clustering(G)
    ER_cluster = nx.average_clustering(ER_graph)
    
    transi = nx.transitivity(G)
    ER_transi = nx.transitivity(ER_graph)

    print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi)

    f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster))
    components = nx.connected_component_subgraphs(G)
    ER_components = nx.connected_component_subgraphs(ER_graph)

    values = []
    ER_values = []
    for i in range(len(components)):
      if nx.number_of_nodes(components[i]) > 1:
        values.append(nx.average_shortest_path_length(components[i]))
    for i in range(len(ER_components)):
      if nx.number_of_nodes(ER_components[i]) > 1:
        ER_values.append(nx.average_shortest_path_length(ER_components[i]))
    if len(values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(values)/len(values)))

    if len(ER_values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(ER_values)/len(ER_values)))
    
    f.write("\t%f\t%f" % (transi, ER_transi))  
    
    if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_WS = 0.
    if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_Delta = 0.
    
    f.write("\t%f\t%f" % (S_WS, S_Delta))  
    f.write("\n")
    
  f.close()  
  print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity" 
示例#9
0
def hemst(G, k):
    nc = 1
    mst = G
    point_set = {}
    while nc != k:
        nc = 1
        mst = nx.minimum_spanning_tree(mst)
        weights = np.array([attrs['weight'] for _,_,attrs in mst.edges(data=True)])
        mean_w = weights.mean()
        std = weights.std()

        for a,b,attrs in mst.edges(data=True):
            w = attrs['weight']
            if w > mean_w + std:
                mst.remove_edge(a,b)
                nc+=1

        if nc < k:
            while nc != k:
                remove_longest_edge(mst)
                nc+=1
            break

        if nc > k:
            sG = nx.connected_component_subgraphs(mst)
            centroid_nodes = []
            for g in sG:
                cl = nx.closeness_centrality(g)
                sorted_set_nodes = sorted(cl.items(), key=lambda a: a[1])
                closest_to_c = sorted_set_nodes[0][0]

                point_set[closest_to_c] = g.nodes()
                for p, _ in sorted_set_nodes[1:]:
                    if p in point_set:
                        point_set[closest_to_c]+= point_set[p]

                centroid_nodes.append(closest_to_c)

            edges=itertools.combinations(centroid_nodes,2)
            mst.clear()
            mst.add_nodes_from(centroid_nodes)
            mst.add_edges_from(edges)
            for u,v in mst.edges():
                weight = G.get_edge_data(u,v)["weight"]
                nx.set_edge_attributes(mst, "weight", {(u,v):weight})
                
    sG = nx.connected_component_subgraphs(mst)
    if point_set:
        for g in sG:
            for node in g.nodes():
                if node in point_set:
                    g.add_nodes_from(point_set[node])

    return sG
def eigenvector_apl(g, recalculate=False):
    """
    Performs robustness analysis based on eigenvector centrality,
    on the network specified by infile using sequential (recalculate = True)
    or simultaneous (recalculate = False) approach. Returns a list
    with fraction of nodes removed, a list with the corresponding sizes of
    the largest component of the network, and the overall vulnerability
    of the network.
    """

    m = networkx.eigenvector_centrality(g)
    l = sorted(m.items(), key=operator.itemgetter(1), reverse=True)
    x = []
    y = []

    average_path_length = 0.0
    number_of_components = 0
    n = len(g.nodes())

    for sg in networkx.connected_component_subgraphs(g):
        average_path_length += networkx.average_shortest_path_length(sg)
    number_of_components += 1

    average_path_length /= number_of_components
    initial_apl = average_path_length

    r = 0.0
    for i in range(1, n - 1):
        g.remove_node(l.pop(0)[0])
        if recalculate:

            try:
                m = networkx.eigenvector_centrality(g, max_iter=5000)
            except networkx.NetworkXError:
                break

            l = sorted(m.items(), key=operator.itemgetter(1),
                       reverse=True)
        average_path_length = 0.0
        number_of_components = 0

        for sg in networkx.connected_component_subgraphs(g):
            if len(sg.nodes()) > 1:
                average_path_length += networkx.average_shortest_path_length(sg)
            number_of_components += 1

        average_path_length = average_path_length / number_of_components

        x.append(i * 1. / initial_apl)
        r += average_path_length * 1. / initial_apl
        y.append(average_path_length * 1. / initial_apl)
    return x, y, r / initial_apl
示例#11
0
def get_small_worldness(G, thr):
	f = open(out_prfx + 'small_worldness.dat', 'a')
	g = open(out_prfx + 'cc_trans_ER.dat', 'a')
	#g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n')
	ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))
	# erdos-renyi, binomial random graph generator ...(N,D:density)	
	cluster = nx.average_clustering(G)   # clustering coef. of whole network
	ER_cluster = nx.average_clustering(ER_graph)	#cc of random graph
	
	transi = nx.transitivity(G)
	ER_transi = nx.transitivity(ER_graph)

	g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi ))
	
	f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster))
	components = nx.connected_component_subgraphs(G)
	ER_components = nx.connected_component_subgraphs(ER_graph)

	values = []
	ER_values = []
	for i in range(len(components)):
		if nx.number_of_nodes(components[i]) > 1:
			values.append(nx.average_shortest_path_length(components[i]))
	for i in range(len(ER_components)):
		if nx.number_of_nodes(ER_components[i]) > 1:
			ER_values.append(nx.average_shortest_path_length(ER_components[i]))
	if len(values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(values)/len(values))) # pathlenght

	if len(ER_values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(ER_values)/len(ER_values)))

	f.write("\t%f\t%f" % (transi, ER_transi))  

	if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))  
	else:
		S_WS = 0.
	if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
	else:
		S_Delta = 0.

	f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness 
	f.write("\n")

	f.close() 
	g.close()	 
示例#12
0
def process_network(G, namespace):
    print 'Nodes:', len(G)
    print 'Edges:', G.number_of_edges()
    if namespace.clustering_coefficient:
        print 'Clustering Coefficient:', nx.average_clustering(G)
    if namespace.components:
        components = nx.connected_component_subgraphs(G)
        print 'Number of Components:', len(components)
        isles = [c for c in components if len(c) == 1]
        print 'Isles:', len(isles)
        print 'Largest Component Size:', len(components[0])
    else: components = None
    if namespace.cpl:
        if namespace.approximate_cpl:
            average_shortest_path_length = approximate_cpl
        else:
            print 'Using full slow CPL'
            average_shortest_path_length = nx.average_shortest_path_length
        if components is None:
            components = nx.connected_component_subgraphs(G)
        for i, g in enumerate(g for g in components if
                float(len(g))/float(len(G)) >
                namespace.component_size):
            print 'CPL %d: (%f)' % (i, float(len(g))/float(len(G)))
            print average_shortest_path_length(g)
    if namespace.assortativity:
        print 'Assortativity: NOT IMPLEMENTED.'
    if namespace.degree_distribution:
        hst = nx.degree_histogram(G)

        plt.subplot(121)
        plt.xscale('log')
        plt.yscale('log')
        plt.title("Degree Distribution")
        plt.ylabel("Occurrencies")
        plt.xlabel("Degree")
        plt.plot(range(len(hst)), hst, marker='+')

        plt.subplot(122)
        ccdf = pynetsym.mathutil.ccdf(hst)
        plt.xscale('log')
        plt.yscale('log')
        plt.title("CCDF Degree Distribution")
        plt.ylabel("$P(X>x)$")
        plt.xlabel("Degree")
        plt.plot(range(len(ccdf)), ccdf, color='red')

        if namespace.degree_distribution_out is None:
            plt.show()
        else:
            plt.save_fig(namespace.degree_distribution_out)
示例#13
0
    def one_girvan_newman(self,G):
        def find_best_edge(G0):
            eb = nx.edge_betweenness_centrality(G0)
            eb_il = eb.items()
            eb_il.sort(key=lambda x: x[1], reverse=True)
            return eb_il[0][0]

        num_clusters = len(sorted(nx.connected_component_subgraphs(G),key=len,reverse=True))
        caused_split = False
        while not caused_split:
            G.remove_edge(*find_best_edge(G))
            components = sorted(nx.connected_component_subgraphs(G),key=len,reverse=True)
            if len(components) == num_clusters+1:
                caused_split = True
示例#14
0
 def _plot_graphs(self):
     self.f,self.ax = plt.subplots(len(self.transition['all']),4,figsize=(14,10)) # first col motion , second distance
     self.f.suptitle('Scene : '+str(self.scene), fontsize=20)
     for feature in [0,2]:
         # plot the different graphs of motion and distance
         for sub,T in enumerate(self.transition['all']):
             plt.sca(self.ax[sub,feature])
             print 'plotting graph : '+str(sub+1)+' from '+str(len(self.transition['all']))
             if feature == 0: 
                 if T not in self.transition['motion']:
                     for i in self.transition['motion']:
                         if i<T: t=i
                 else: t=T
                 G=self.G_motion[t]['graph']
             elif feature == 2: 
                 if T not in self.transition['touch']:
                     for i in self.transition['touch']:
                         if i<T: t=i
                 else: t=T
                 G=self.G_touch[t]['graph']
             # layout graphs with positions using graphviz neato
             pos=nx.graphviz_layout(G,prog="neato")
             # color nodes the same in each connected subgraph
             C=nx.connected_component_subgraphs(G)
             cK = 0
             for i in C:  cK += 1
             C=nx.connected_component_subgraphs(G)
             colors = np.linspace(.2,.6,cK)
             for count,g in enumerate(C):
                 c=[colors[count]]*nx.number_of_nodes(g) # same color...
                 nx.draw(g,pos,node_size=80,node_color=c,vmin=0.0,vmax=1.0,with_labels=False)
                 #nx.draw_networkx_edges(g,pos, with_labels=False, edge_color=c[0], width=6.0, alpha=0.5)
             nx.draw_networkx_nodes(self.G,pos, node_color='b', node_size=100, alpha=1)
             nx.draw_networkx_nodes(self.G,pos, nodelist=['G'], node_color='r', node_size=100, alpha=1)
             nx.draw_networkx_nodes(self.G,pos, nodelist=[str(self.m_obj)], node_color='c', node_size=100, alpha=1)
             nx.draw_networkx_edges(G,pos, alpha=0.8)
             #nx.draw(G)  # networkx draw()
             self.ax[sub,feature].axis('on')
             self.ax[sub,feature].axis('equal')
             plt.tick_params(axis='x',which='both',bottom='off',top='off',labelbottom='off')
             plt.tick_params(axis='y',which='both',right='off',left='off',labelleft='off')
             if feature == 0:
                 self.ax[sub,feature].set_ylabel('frame : '+str(T))
                 if sub == 0:
                     self.ax[sub,feature].set_title('motion')
             if feature == 2:
                 self.ax[sub,feature].set_ylabel('frame : '+str(T))
                 if sub == 0:
                     self.ax[sub,feature].set_title('connectivity')
示例#15
0
文件: rig.py 项目: mortonjt/chemifrac
def rig(x, y, G, labs=None, res=1e-9):
    """ Compute the RIG metric on all components.

    Parameters
    ----------
    x : pd.Series or array_like
       Vector of nodes and their abundance in sample x
    y : pd.Series or array_like
       Vector of nodes and their abundance in sample y
    G : nx.Graph
       A connected graph of weighted edges

    Returns
    -------
    float :
       Distance between sample x and sample y

    Note
    ----
    If x or y is None, then 1 will be added to the total distance.
    If they are both None, then the distance will be zero.

    """
    if labs is not None:
        x = pd.Series(x, index=labs)
        y = pd.Series(y, index=labs)

    cost = 0
    _G = copy.deepcopy(G)
    # This converts all of the weights to integers
    for u, v, d in _G.edges(data=True):
        d["weight"] = int(d["weight"] / res)

    # This calculates the largest edge set to offset the insertion cost.
    weights = []
    for comp in nx.connected_component_subgraphs(_G):
        edges = list(comp.edges(data="weight"))
        if len(edges) > 0:
            weights.append(sum(list(zip(*edges))[2]))
    maxW = max(weights) + 1

    for comp in nx.connected_component_subgraphs(_G):
        nodes = set(comp.nodes())
        subx = x[nodes & set(x.keys())]
        suby = y[nodes & set(y.keys())]

    c = rig_component(comp, subx, suby, maxW)
    cost += c
    return (cost) * res
示例#16
0
def sensi_diameter(G):
    import networkx as nx
    
    """
    Compute graph sensitivity to node removal, in terms of
    the difference in graph diameter on the removal of each
    node in turn.
     
    This uses local function x_diameter(G), which is modified
    from networkx.diamter(G) to work on XGraphs.
    
    DL Urban (9 Feb 2007)
    """
    
    # Starting diameter for full graph:
    
    if nx.is_connected(G):
        d0 = x_diameter(G)
    else:
        G0 = nx.connected_component_subgraphs(G) [0] # the largest subgraph
        d0 = x_diameter(G0)
        nc = nx.number_connected_components(G)	     # how many are there?
    
    sensi = {}
    
    for node in G.nodes():
        ex = G.edges(node) 		# a set of edges adjacent to node; 
        G.delete_edges_from(ex)		# remove all of these,
        G.delete_node(node)		# and then kill the node, too
        if nx.is_connected(G):
            dx = x_diameter(G)
            cuts = 0
        else:
            Gx = nx.connected_component_subgraphs(G) [0]	# the biggest
            ncx = nx.number_connected_components(G)
            if nc == ncx:
                cuts = 0
            else:
                cuts = 1
            dx = x_diameter(Gx)
        delta = d0 - dx
        G.add_node(node)		# put the node and edges back again
        G.add_edges_from(ex)
        sensi[node] = (cuts, delta)
 

    # create and return a tuple (cuts, delta)
    return sensi
示例#17
0
    def analyze_graph(self, graph):
        start_time = time.time()
        self.clear_stats()
        self._graph = graph

        self.node_count = nx.number_of_nodes(graph)
        self.edge_count = nx.number_of_edges(graph)

        degree_list = nx.degree(graph).values()

        self.connected_component_count = \
            sum(1 for cx in nx.connected_components(graph))
        if self.connected_component_count is 0:
            return

        self._connected_component_graphs = \
            nx.connected_component_subgraphs(graph)
        self._largest_component_graph = \
            max(nx.connected_component_subgraphs(graph), key=len)

        self.average_degree = sum(degree_list) / float(len(degree_list))
        self._degree_histogram = nx.degree_histogram(graph)
        spc = self.shortest_paths(graph)
        self.shortest_path_count = len(spc)
        self.maximum_shortest_path_length = \
            self.max_shortest_path_length(graph)

        if self.connected_component_count is 1:

            self.diameter = nx.diameter(graph)

            if self.node_count > 1:
                self.average_shortest_path_length = \
                    nx.average_shortest_path_length(graph)
                self.minimum_connectivity = self.min_connectivity(graph)

        if self.node_count > 0:
            self.maximum_degree = max(degree_list)
            self.minimum_degree = min(degree_list)

        if self.node_count > 1:
            dg = nx.degree_centrality(graph)
            self.maximum_degree_centrality = max(list(dg.values()))

        bc = nx.betweenness_centrality(graph)
        self.maximum_between_centrality = max(list(bc.values()))

        self.elapsed_time = time.time() - start_time
示例#18
0
文件: repair.py 项目: Marviel/trimesh
def fix_face_winding(mesh):
    '''
    Traverse and change mesh faces in-place to make sure winding is coherent, 
    or that edges on adjacent faces are in opposite directions
    '''
    # we create the face adjacency graph: 
    # every node in g is an index of mesh.faces
    # every edge in g represents two faces which are connected
    graph_all = nx.from_edgelist(mesh.face_adjacency)
    flipped   = 0
    # we are going to traverse the graph using BFS, so we have to start
    # a traversal for every connected component
    for graph in nx.connected_component_subgraphs(graph_all):
        start = graph.nodes()[0]
        # we traverse every pair of faces in the graph
        # we modify mesh.faces and mesh.face_normals in place 
        for face_pair in nx.bfs_edges(graph, start):
            # for each pair of faces, we convert them into edges,
            # find the edge that both faces share, and then see if the edges
            # are reversed in order as you would expect in a well constructed mesh
            pair    = mesh.faces[[face_pair]]            
            edges   = faces_to_edges(pair)
            overlap = group_rows(np.sort(edges,axis=1), require_count=2)
            if len(overlap) == 0:
                # only happens on non-watertight meshes
                continue
            edge_pair = edges[[overlap[0]]]
            if edge_pair[0][0] == edge_pair[1][0]:
                # if the edges aren't reversed, invert the order of one of the faces
                flipped += 1
                mesh.faces[face_pair[1]] = mesh.faces[face_pair[1]][::-1]
    log.info('Flipped %d/%d edges', flipped, len(mesh.faces)*3)
示例#19
0
def get_giant_component(g):
    """
    Take only the big connected component of the graph.
    """
    graphs = nx.connected_component_subgraphs(g)
    graphs.sort(key=lambda x: -x.number_of_nodes())
    return graphs[0]
示例#20
0
def start(G, name):
	#pega somente o maior subgrafo
	if(not(nx.is_connected(G))):
		G = nx.connected_component_subgraphs(G)[0]

	# tuple of all parallel python servers to connect with
	ppservers = ()
	#ppservers = ("a3.ft.unicamp.br","a9.ft.unicamp.br","a7.ft.unicamp.br","a8.ft.unicamp.br","a10.ft.unicamp.br")
        job_server = pp.Server(ppservers=ppservers)
        job_server.set_ncpus(1)

	job = []
	capacities = []
	damage = []
	ran = 30 #range
	print "server e variaveis carregados"

	for i in xrange(1,ran):
		#Aqui faz-se um range de para 50 valores diferentes de capacidade inicial na rede
		capacity = 1.0+(1.0/float(ran)*float(i))
		job.append(job_server.submit(Attack, (cp.copy(G),capacity),(GlobalEfficiency,setCapacity), ("networkx as nx",)))
		capacities.append(capacity)

	job_server.wait()

	for i in xrange(len(job)):
		damage.append(job[i]())

	#Salva o arquivo da estrategia testada
        res = (capacities, damage)
        pickle.dump(res, open("dados/planejada/"+name+".pickle","w"))
        job_server.print_stats()
示例#21
0
def main():
    files = []
    for i in range(1,26): 
        files.append("db/Minna_no_nihongo_1.%02d.txt" % i)
    for i in range(26,51): 
        files.append("db/Minna_no_nihongo_2.%02d.txt" % i)


    words = get_words_from_files(files)

    G=nx.Graph()

    for w in words:
        G.add_node(w)
        G.node[w]['chapter'] = words[w]['chapter']
        G.node[w]['kana'] = words[w]['kana']
        G.node[w]['meaning'] = words[w]['meaning'][:-1]

    for word1, word2 in itertools.combinations(words,2):
        for w1 in word1[:-1]:
            #print w1.encode('utf-8')
            #print ud.name(w1)
            if "CJK UNIFIED" in ud.name(w1) and w1 in word2:
                #print word1.encode('utf-8'), word2.encode('utf-8')
                G.add_edge(word1, word2)
                break
    
    #G = nx.connected_component_subgraphs(G)
    G = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True)
    #print len(G)
    #nx.draw(G)
    nx.write_graphml(G[0], "kanjis.graphml", encoding='utf-8', prettyprint=True)
示例#22
0
文件: ca.py 项目: arvin580/jcvi
def unitigs(args):
    """
    %prog unitigs best.edges

    Reads Celera Assembler's "best.edges" and extract all unitigs.
    """
    p = OptionParser(unitigs.__doc__)
    p.add_option("--maxerr", default=2, type="int", help="Maximum error rate")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    bestedges, = args
    G = read_graph(bestedges, maxerr=opts.maxerr, directed=True)
    H = nx.Graph()
    intconv = lambda x: int(x.split("-")[0])
    for k, v in G.iteritems():
        if k == G.get(v, None):
            H.add_edge(intconv(k), intconv(v))

    nunitigs = nreads = 0
    for h in nx.connected_component_subgraphs(H, copy=False):
        st = [x for x in h if h.degree(x) == 1]
        if len(st) != 2:
            continue
        src, target = st
        path = list(nx.all_simple_paths(h, src, target))
        assert len(path) == 1
        path, = path
        print "|".join(str(x) for x in path)
        nunitigs += 1
        nreads += len(path)
    logging.debug("A total of {0} unitigs built from {1} reads.".format(nunitigs, nreads))
示例#23
0
def brute_rule(g, *args):
    subgraphs = sorted(nx.connected_component_subgraphs(g), key=len)
    if len(subgraphs) == 1: return er_rule(g)
    # connect different subgraphs of smallest size
    edge = (subgraphs[0].nodes()[0], subgraphs[1].nodes()[0])
    g.add_edge(*edge)
    return edge
示例#24
0
def get_sim_setting( N=10, p=.3, mu=1., K=5, lam=1. ) :
    """
    get largest connected component of Erdos(N,p) graph
    with exponentially distr. road lengths (avg. mu);
    Choose k road pairs randomly and assign intensity randomly,
    exponential lam
    """
    g = nx.erdos_renyi_graph( N, p )
    g = nx.connected_component_subgraphs( g )[0]
    
    roadnet = nx.MultiDiGraph()
    
    def roadmaker() :
        for i in itertools.count() : yield 'road%d' % i, np.random.exponential( mu )
    road_iter = roadmaker()
    
    for i, ( u,v,data ) in enumerate( g.edges_iter( data=True ) ) :
        label, length = road_iter.next()
        roadnet.add_edge( u, v, label, length=length )
    
    rates = nx.DiGraph()
    ROADS = [ key for u,v,key in roadnet.edges_iter( keys=True ) ]
    for i in range( K ) :
        r1 = random.choice( ROADS )
        r2 = random.choice( ROADS )
        if not rates.has_edge( r1, r2 ) :
            rates.add_edge( r1, r2, rate=0. )
        
        data = rates.get_edge_data( r1, r2 )
        data['rate'] += np.random.exponential( lam )
        
    return roadnet, rates
def main():
    """
    Pre-processing: 
        load data, compute centrality measures, write files with node data
    """
    print(nx.__version__)
    # Load network data, create storage dict, and extract main component
    depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    depends.name="depends"
    suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    suggests.name="suggests"
    imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),))
    imports.name="imports"
    nets_dict={"depends":depends,"suggests":suggests,"imports":imports}
    for k in nets_dict.keys():
        main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes()
        nets_dict[k]=nx.subgraph(nets_dict[k],main_component)
    
    # Run multiple measures on graphs and normalize weights
    measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank]
    for g in nets_dict.values():
        multiple_measures(g,measure_list)
        normalize_weights(g)
        
    # Output networks in GraphML format (to store node attributes)
    for i in nets_dict.items():
        # print(i[1].edges(data=True))
        nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml")
        print("")
    print("All files written with data")
    
    """Visualization:
示例#26
0
    def get_underlying_tree(self, connected_component):
        # Find the root (color with only one occurrence)
        root = None
        colors = [self.coloring[node] for node in connected_component.nodes()]
        for index, color in enumerate(colors):
            colors[index] = 'Not a color'
            if color not in colors:
                root = connected_component.nodes()[index]
                break
            colors[index] = color

        # If we can't find a root, something's wrong!
        if root == None:
            print 'WARNING: Coloring this has no root', colors
            return connected_component

        # Create a new NetworkX graph to represent the tree
        tree = nx.Graph()
        tree.add_node(root)

        # Remove the root from the connected component
        connected_component = nx.Graph(connected_component)
        connected_component.remove_node(root)

        # Every new connected component is a subtree
        for sub_cc in nx.connected_component_subgraphs(connected_component):
            subtree = self.get_underlying_tree(sub_cc)
            tree = nx.compose(tree, subtree)
            tree.add_edge(root, subtree.root)

        # Root field for use in recursive case to connect tree and subtree
        tree.root = root
        return tree
示例#27
0
    def get_connected_components(self, color_set):
        """
        A generator for connected components given a specific color set

        :param color_set: The color set
        :return: A generator for connected components (subgraphs) induced by
                 color_set
        """

        # Make an empty set to store vertices
        v_set = set()

        # Find vertices that are colored with colors in color_set
        for index, color in enumerate(self.coloring):
            if color in color_set:
                v_set.add(index)

        cc_list = []
        for new_cc in nx.connected_component_subgraphs(self.graph.subgraph(v_set)):
            found = False
            for n in new_cc.node:
                new_cc.node[n]['color'] = self.coloring[n]
            for i, cc in enumerate(cc_list):
                if nx.is_isomorphic(new_cc, cc,
                        node_match=lambda n1, n2: n1['color'] == n2['color']):
                    cc_list[i].occ += 1
                    found = True
                    break
            if not found:
                new_cc.occ = 1
                cc_list.append(new_cc)
        return cc_list
示例#28
0
文件: Sample.py 项目: kamir/NGA
def lanl_graph():
    """ Return the lanl internet view graph from lanl.edges
    """
    import networkx as nx
    try:
        fh=open('lanl_routes.edgelist','r')
    except IOError:
        print "lanl.edges not found"
        raise

    G=nx.Graph()

    time={}
    time[0]=0 # assign 0 to center node
    for line in fh.readlines():
        (head,tail,rtt)=line.split()
        G.add_edge(int(head),int(tail))
        time[int(head)]=float(rtt)

    # get largest component and assign ping times to G0time dictionary
    G0=nx.connected_component_subgraphs(G)[0]
    G0.rtt={}
    for n in G0:
        G0.rtt[n]=time[n]

    return G0
示例#29
0
def atlas6():
    """ Return the atlas of all connected graphs of 6 nodes or less.
        Attempt to check for isomorphisms and remove.
    """

    Atlas = graph_atlas_g()[0:208]  # 208
    # remove isolated nodes, only connected graphs are left
    U = nx.Graph()  # graph for union of all graphs in atlas
    for G in Atlas:
        zerodegree = [n for n in G if G.degree(n) == 0]
        for n in zerodegree:
            G.remove_node(n)
        U = nx.disjoint_union(U, G)

    # list of graphs of all connected components
    C = nx.connected_component_subgraphs(U)

    UU = nx.Graph()
    # do quick isomorphic-like check, not a true isomorphism checker
    nlist = []  # list of nonisomorphic graphs
    for G in C:
        # check against all nonisomorphic graphs so far
        if not iso(G, nlist):
            nlist.append(G)
            UU = nx.disjoint_union(UU, G)  # union the nonisomorphic graphs
    return UU
示例#30
0
def is_bipartite_node_set(G,nodes):
    """Returns True if nodes and G/nodes are a bipartition of G.

    Parameters
    ----------
    G : NetworkX graph 

    nodes: list or container
      Check if nodes are a one of a bipartite set.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> X = set([1,3])
    >>> nx.is_bipartite_node_set(G,X)
    True

    Notes
    -----
    For connected graphs the bipartite sets are unique.  This function handles
    disconnected graphs.
    """
    S=set(nodes)
    for CC in nx.connected_component_subgraphs(G):
        X,Y=bipartite_sets(CC)
        if not ( (X.issubset(S) and Y.isdisjoint(S)) or 
                 (Y.issubset(S) and X.isdisjoint(S)) ):
            return False
    return True
示例#31
0
    )

    if args.subsample_graph:
        # subsample g_nx
        nodes = g_nx.nodes(data=False)
        np.random.shuffle(nodes)
        subgraph_num_nodes = int(len(nodes) * subgraph_size)
        g_nx = g_nx.subgraph(nodes[0:subgraph_num_nodes])

    # Check if graph is connected; if not, then select the largest subgraph to continue
    if nx.is_connected(g_nx):
        print("Graph is connected")
    else:
        print("Graph is not connected")
        # take the largest connected component as the data
        g_nx = max(nx.connected_component_subgraphs(g_nx, copy=True), key=len)
        print("Largest subgraph statistics: {} nodes, {} edges".format(
            g_nx.number_of_nodes(), g_nx.number_of_edges()))

    # From the original graph, extract E_test and G_test
    edge_splitter_test = EdgeSplitter(g_nx)
    if args.hin:
        g_test, edge_data_ids_test, edge_data_labels_test = edge_splitter_test.train_test_split(
            p=p,
            edge_label=args.edge_type,
            edge_attribute_label=args.edge_attribute_label,
            edge_attribute_threshold=args.edge_attribute_threshold,
            attribute_is_datetime=args.attribute_is_datetime,
            method=args.sampling_method,
            probs=sampling_probs,
        )
def scenario_geant(net_cache=[0.05], n_contents=100000, alpha=[0.6, 0.8, 1.0]):
    """
    Return a scenario based on GARR topology
    
    Parameters
    ----------
    scenario_id : str
        String identifying the scenario (will be in the filename)
    net_cache : float
        Size of network cache (sum of all caches) normalized by size of content
        population
    n_contents : int
        Size of content population
    alpha : float
        List of alpha of Zipf content distribution
    """
    rate = 12.0
    warmup = 9000
    duration = 36000

    T = 'GEANT'  # name of the topology
    # 240 nodes in the main component
    topology = fnss.parse_topology_zoo(
        path.join(scenarios_dir,
                  'resources/Geant2012.graphml')).to_undirected()
    topology = list(nx.connected_component_subgraphs(topology))[0]

    deg = nx.degree(topology)

    receivers = [v for v in topology.nodes() if deg[v] == 1]  # 8 nodes

    caches = [v for v in topology.nodes() if deg[v] > 2]  # 19 nodes

    # attach sources to topology
    source_attachments = [v for v in topology.nodes()
                          if deg[v] == 2]  # 13 nodes
    sources = []
    for v in source_attachments:
        u = v + 1000  # node ID of source
        topology.add_edge(v, u)
        sources.append(u)

    routers = [
        v for v in topology.nodes() if v not in caches + sources + receivers
    ]

    # randomly allocate contents to sources
    contents = dict([(v, []) for v in sources])
    for c in range(1, n_contents + 1):
        s = choice(sources)
        contents[s].append(c)

    for v in sources:
        fnss.add_stack(topology, v, 'source', {'contents': contents[v]})
    for v in receivers:
        fnss.add_stack(topology, v, 'receiver', {})
    for v in routers:
        fnss.add_stack(topology, v, 'router', {})

    # set weights and delays on all links
    fnss.set_weights_constant(topology, 1.0)
    fnss.set_delays_constant(topology, internal_link_delay, 'ms')

    # label links as internal or external
    for u, v in topology.edges():
        if u in sources or v in sources:
            topology.edge[u][v]['type'] = 'external'
            # this prevents sources to be used to route traffic
            fnss.set_weights_constant(topology, 1000.0, [(u, v)])
            fnss.set_delays_constant(topology, external_link_delay, 'ms',
                                     [(u, v)])
        else:
            topology.edge[u][v]['type'] = 'internal'

    for nc in net_cache:
        size = (float(nc) * n_contents) / len(caches)  # size of a single cache
        C = str(nc)
        for v in caches:
            fnss.add_stack(topology, v, 'cache', {'size': size})
        fnss.write_topology(
            topology,
            path.join(scenarios_dir,
                      topo_prefix + 'T=%s@C=%s' % (T, C) + '.xml'))
        print('[WROTE TOPOLOGY] T: %s, C: %s' % (T, C))

    for a in alpha:
        event_schedule = gen_req_schedule(receivers, rate, warmup, duration,
                                          n_contents, a)
        fnss.write_event_schedule(
            event_schedule,
            path.join(scenarios_dir,
                      es_prefix + 'T=%s@A=%s' % (T, str(a)) + '.xml'))
        print('[WROTE SCHEDULE] T: %s, Alpha: %s, Events: %d' %
              (T, str(a), len(event_schedule)))
示例#33
0
def probabilistic_hrg(G, num_samples=1, n=None):
    '''
	Args:
	------------
	G: input graph (nx obj)
	num_samples:	 (int) in the 'grow' process, this is number of
								 synthetic graphs to generate
	n: (int) num_nodes; number of nodes in the resulting graphs
	Returns: List of synthetic graphs (H^stars)
	'''
    graphletG = []

    if DEBUG: print G.number_of_nodes()
    if DEBUG: print G.number_of_edges()
    start_time = time.time()
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes()
    else:
        num_nodes = n

    if DEBUG: print G.number_of_nodes()
    if DEBUG: print G.number_of_edges()

    graph_checks(G)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "-Tree Decomposition-"
    if DEBUG: print "--------------------"

    prod_rules = {}
    if num_nodes >= 500:
        print '  -- subgraphs'
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            #td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T

        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1
    # print rules
    #print 'P. Rules'
    if DEBUG:
        print("  --- Inference (PHRG) %s seconds ---" %
              (time.time() - start_time))
    start_time = time.time()
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in rules:
        #print type(id), type(lhs), type(rhs), type(prob)
        if DEBUG: print ' ', id, lhs, rhs, prob
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    if DEBUG: print "Starting max size"
    num_nodes = num_nodes
    num_samples = num_samples

    g.set_max_size(num_nodes)

    if DEBUG: print "Done with max size"

    Hstars = []

    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        if DEBUG: pp.pprint(rule_list)
        hstar = grow(rule_list, g)[0]
        # print "H* nodes: " + str(hstar.number_of_nodes())
        # print "H* edges: " + str(hstar.number_of_edges())
        Hstars.append(hstar)

    if DEBUG:
        print("  --- Graph gen (Fixed-size) %s seconds ---" %
              (time.time() - start_time))
    return Hstars
示例#34
0
def probabilistic_hrg_deriving_prod_rules(G, n=None):
    '''
	Rule extraction procedure

		'''
    if G is None: return

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes()
    else:
        num_nodes = n

    graph_checks(G)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "-Tree Decomposition-"
    if DEBUG: print "--------------------"
    prod_rules = {}
    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)
    # print (T)
    # print type(root), type(children)
    # print type(T), len(T), len(T[1])
    # print [type(x) for x in T[1]]
    # print (prod_rules)

    #TODO from enumhrgtree import enum_hrg_tree
    # enum_hrg_tree(T)
    # exit()

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    # pp.pprint(prod_rules)

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    return rules
示例#35
0
                             source='author1',
                             target='author2',
                             edge_attr='title')
degree_sequence = sorted([d for n, d in GG.degree()],
                         reverse=True)  # degree sequence
degreeCount = collections.Counter(degree_sequence)
deg, cnt = zip(*degreeCount.items())
fig, ax = plt.subplots()
plt.bar(deg, cnt, width=0.80, color='b')
plt.title("Degree Histogram")
plt.ylabel("Count")
plt.xlabel("Degree")
ax.set_xticks([d + 0.4 for d in deg])
ax.set_xticklabels(deg)
plt.axes([0.4, 0.4, 0.5, 0.5])
Gcc = sorted(nx.connected_component_subgraphs(GG), key=len, reverse=True)[0]
pos = nx.spring_layout(GG)
plt.axis('off')
nx.draw_networkx_nodes(GG, pos, node_size=20)
nx.draw_networkx_edges(GG, pos, alpha=0.4)
plt.show()
print('Degree Distribution Histogram done')

#FIRST NETWORK MEASURE
print("first network measure")
print("transitivity:")
print(nx.transitivity(G))

#SECOND NETOWRK MEASURE
print("second network measure")
print("reciprocity:")
示例#36
0
def get_single_data(trial_i, root_input):
    import scipy.io as sio
    cancer_related_genes = {
        4288: 'MKI67',
        1026: 'CDKN1A',
        472: 'ATM',
        7033: 'TFF3',
        2203: 'FBP1',
        7494: 'XBP1',
        1824: 'DSC2',
        1001: 'CDH3',
        11200: 'CHEK2',
        7153: 'TOP2A',
        672: 'BRCA1',
        675: 'BRCA2',
        580: 'BARD1',
        9: 'NAT1',
        771: 'CA12',
        367: 'AR',
        7084: 'TK2',
        5892: 'RAD51D',
        2625: 'GATA3',
        7155: 'TOP2B',
        896: 'CCND3',
        894: 'CCND2',
        10551: 'AGR2',
        3169: 'FOXA1',
        2296: 'FOXC1'
    }
    data = dict()
    f_name = 'overlap_data_%02d.mat' % trial_i
    re = sio.loadmat(root_input + f_name)['save_data'][0][0]
    data['data_X'] = np.asarray(re['data_X'], dtype=np.float64)
    data_y = [_[0] for _ in re['data_Y']]
    data['data_Y'] = np.asarray(data_y, dtype=np.float64)
    data_edges = [[_[0] - 1, _[1] - 1] for _ in re['data_edges']]
    data['data_edges'] = np.asarray(data_edges, dtype=int)
    data_pathways = [[_[0], _[1]] for _ in re['data_pathways']]
    data['data_pathways'] = np.asarray(data_pathways, dtype=int)
    data_entrez = [_[0] for _ in re['data_entrez']]
    data['data_entrez'] = np.asarray(data_entrez, dtype=int)
    data['data_splits'] = {i: dict() for i in range(5)}
    data['data_subsplits'] = {
        i: {j: dict()
            for j in range(5)}
        for i in range(5)
    }
    for i in range(5):
        xx = re['data_splits'][0][i][0][0]['train']
        data['data_splits'][i]['train'] = [_ - 1 for _ in xx[0]]
        xx = re['data_splits'][0][i][0][0]['test']
        data['data_splits'][i]['test'] = [_ - 1 for _ in xx[0]]
        for j in range(5):
            xx = re['data_subsplits'][0][i][0][j]['train'][0][0]
            data['data_subsplits'][i][j]['train'] = [_ - 1 for _ in xx[0]]
            xx = re['data_subsplits'][0][i][0][j]['test'][0][0]
            data['data_subsplits'][i][j]['test'] = [_ - 1 for _ in xx[0]]
    re_path = [_[0] for _ in re['re_path_varInPath']]
    data['re_path_varInPath'] = np.asarray(re_path)
    re_path_entrez = [_[0] for _ in re['re_path_entrez']]
    data['re_path_entrez'] = np.asarray(re_path_entrez)
    re_path_ids = [_[0] for _ in re['re_path_ids']]
    data['re_path_ids'] = np.asarray(re_path_ids)
    re_path_lambdas = [_ for _ in re['re_path_lambdas'][0]]
    data['re_path_lambdas'] = np.asarray(re_path_lambdas)
    re_path_groups = [_[0][0] for _ in re['re_path_groups_lasso'][0]]
    data['re_path_groups_lasso'] = np.asarray(re_path_groups)
    re_path_groups_overlap = [_[0][0] for _ in re['re_path_groups_overlap'][0]]
    data['re_path_groups_overlap'] = np.asarray(re_path_groups_overlap)
    re_edge = [_[0] for _ in re['re_edge_varInGraph']]
    data['re_edge_varInGraph'] = np.asarray(re_edge)
    re_edge_entrez = [_[0] for _ in re['re_edge_entrez']]
    data['re_edge_entrez'] = np.asarray(re_edge_entrez)
    data['re_edge_groups_lasso'] = np.asarray(re['re_edge_groups_lasso'])
    data['re_edge_groups_overlap'] = np.asarray(re['re_edge_groups_overlap'])
    for method in [
            're_path_re_lasso', 're_path_re_overlap', 're_edge_re_lasso',
            're_edge_re_overlap'
    ]:
        res = {fold_i: dict() for fold_i in range(5)}
        for fold_ind, fold_i in enumerate(range(5)):
            res[fold_i]['lambdas'] = re[method][0][fold_i]['lambdas'][0][0][0]
            res[fold_i]['kidx'] = re[method][0][fold_i]['kidx'][0][0][0]
            res[fold_i]['kgroups'] = re[method][0][fold_i]['kgroups'][0][0][0]
            res[fold_i]['kgroupidx'] = re[method][0][fold_i]['kgroupidx'][0][0]
            res[fold_i]['groups'] = re[method][0][fold_i]['groups'][0]
            res[fold_i]['sbacc'] = re[method][0][fold_i]['sbacc'][0]
            res[fold_i]['AS'] = re[method][0][fold_i]['AS'][0]
            res[fold_i]['completeAS'] = re[method][0][fold_i]['completeAS'][0]
            res[fold_i]['lstar'] = re[method][0][fold_i]['lstar'][0][0][0][0]
            res[fold_i]['auc'] = re[method][0][fold_i]['auc'][0]
            res[fold_i]['acc'] = re[method][0][fold_i]['acc'][0]
            res[fold_i]['bacc'] = re[method][0][fold_i]['bacc'][0]
            res[fold_i]['perf'] = re[method][0][fold_i]['perf'][0][0]
            res[fold_i]['pred'] = re[method][0][fold_i]['pred']
            res[fold_i]['Ws'] = re[method][0][fold_i]['Ws'][0][0]
            res[fold_i]['oWs'] = re[method][0][fold_i]['oWs'][0][0]
            res[fold_i]['nextGrad'] = re[method][0][fold_i]['nextGrad'][0]
        data[method] = res
    import networkx as nx
    g = nx.Graph()
    ind_pathways = {_: i for i, _ in enumerate(data['data_entrez'])}
    all_nodes = {ind_pathways[_]: '' for _ in data['re_path_entrez']}
    maximum_nodes, maximum_list_edges = set(), []
    for edge in data['data_edges']:
        if edge[0] in all_nodes and edge[1] in all_nodes:
            g.add_edge(edge[0], edge[1])
    isolated_genes = set()
    maximum_genes = set()
    for cc in nx.connected_component_subgraphs(g):
        if len(cc) <= 5:
            for item in list(cc):
                isolated_genes.add(data['data_entrez'][item])
        else:
            for item in list(cc):
                maximum_nodes = set(list(cc))
                maximum_genes.add(data['data_entrez'][item])
    maximum_nodes = np.asarray(list(maximum_nodes))
    subgraph = nx.Graph()
    for edge in data['data_edges']:
        if edge[0] in maximum_nodes and edge[1] in maximum_nodes:
            if edge[0] != edge[1]:  # remove some self-loops
                maximum_list_edges.append(edge)
            subgraph.add_edge(edge[0], edge[1])
    data['map_entrez'] = np.asarray(
        [data['data_entrez'][_] for _ in maximum_nodes])
    data['edges'] = np.asarray(maximum_list_edges, dtype=int)
    data['costs'] = np.asarray([1.] * len(maximum_list_edges),
                               dtype=np.float64)
    data['x'] = data['data_X'][:, maximum_nodes]
    data['y'] = data['data_Y']
    data['nodes'] = np.asarray(range(len(maximum_nodes)), dtype=int)
    data['cancer_related_genes'] = cancer_related_genes
    for edge_ind, edge in enumerate(data['edges']):
        uu = list(maximum_nodes).index(edge[0])
        vv = list(maximum_nodes).index(edge[1])
        data['edges'][edge_ind][0] = uu
        data['edges'][edge_ind][1] = vv
    method_list = [
        're_path_re_lasso', 're_path_re_overlap', 're_edge_re_lasso',
        're_edge_re_overlap'
    ]
    found_set = {method: set() for method in method_list}
    for method in method_list:
        for fold_i in range(5):
            best_lambda = data[method][fold_i]['lstar']
            kidx = data[method][fold_i]['kidx']
            re = list(data[method][fold_i]['lambdas']).index(best_lambda)
            ws = data[method][fold_i]['oWs'][:, re]
            for item in [kidx[_] for _ in np.nonzero(ws[1:])[0]]:
                if item in cancer_related_genes:
                    found_set[method].add(cancer_related_genes[item])
    data['found_related_genes'] = found_set
    return data
示例#37
0
def topology_tiscali2(**kwargs):
    """Return a scenario based on Tiscali topology, parsed from RocketFuel dataset
    Differently from plain Tiscali, this topology some receivers are appended to
    routers and only a subset of routers which are actually on the path of some
    traffic are selected to become ICN routers. These changes make this
    topology more realistic.
    Parameters
    ----------
    seed : int, optional
        The seed used for random number generation
    Returns
    -------
    topology : fnss.Topology
        The topology object
    """
    # 240 nodes in the main component
    topology = fnss.parse_rocketfuel_isp_map(
        path.join(TOPOLOGY_RESOURCES_DIR, '3257.r0.cch')).to_undirected()
    topology = list(nx.connected_component_subgraphs(topology))[0]
    # degree of nodes
    deg = nx.degree(topology)
    # nodes with degree = 1
    onedeg = [v for v in topology.nodes() if deg[v] == 1]  # they are 80
    # we select as caches nodes with highest degrees
    # we use as min degree 6 --> 36 nodes
    # If we changed min degrees, that would be the number of caches we would have:
    # Min degree    N caches
    #  2               160
    #  3               102
    #  4                75
    #  5                50
    #  6                36
    #  7                30
    #  8                26
    #  9                19
    # 10                16
    # 11                12
    # 12                11
    # 13                 7
    # 14                 3
    # 15                 3
    # 16                 2
    icr_candidates = [v for v in topology.nodes() if deg[v] >= 6]  # 36 nodes
    # Add remove caches to adapt betweenness centrality of caches
    for i in [181, 208, 211, 220, 222, 250, 257]:
        icr_candidates.remove(i)
    icr_candidates.extend([232, 303, 326, 363, 378])
    # sources are node with degree 1 whose neighbor has degree at least equal to 5
    # we assume that sources are nodes connected to a hub
    # they are 44
    sources = [
        v for v in onedeg if deg[list(topology.edge[v].keys())[0]] > 4.5
    ]  # they are
    # receivers are node with degree 1 whose neighbor has degree at most equal to 4
    # we assume that receivers are nodes not well connected to the network
    # they are 36
    receivers = [
        v for v in onedeg if deg[list(topology.edge[v].keys())[0]] < 4.5
    ]
    # we set router stacks because some strategies will fail if no stacks
    # are deployed
    routers = [v for v in topology.nodes() if v not in sources + receivers]

    # set weights and delays on all links
    fnss.set_weights_constant(topology, 1.0)
    fnss.set_delays_constant(topology, INTERNAL_LINK_DELAY, 'ms')

    # deploy stacks
    topology.graph['icr_candidates'] = set(icr_candidates)
    for v in sources:
        fnss.add_stack(topology, v, 'source')
    for v in receivers:
        fnss.add_stack(topology, v, 'receiver')
    for v in routers:
        fnss.add_stack(topology, v, 'router')

    # label links as internal or external
    for u, v in topology.edges():
        if u in sources or v in sources:
            topology.edge[u][v]['type'] = 'external'
            # this prevents sources to be used to route traffic
            fnss.set_weights_constant(topology, 1000.0, [(u, v)])
            fnss.set_delays_constant(topology, EXTERNAL_LINK_DELAY, 'ms',
                                     [(u, v)])
        else:
            topology.edge[u][v]['type'] = 'internal'
    return IcnTopology(topology)
示例#38
0
def partition_girvan_newman(graph, max_depth):
    """
    Use your approximate_betweenness implementation to partition a graph.
    Unlike in class, here you will not implement this recursively. Instead,
    just remove edges until more than one component is created, then return
    those components.
    That is, compute the approximate betweenness of all edges, and remove
    them until multiple comonents are created.

    You only need to compute the betweenness once.
    If there are ties in edge betweenness, break by edge name (e.g.,
    (('A', 'B'), 1.0) comes before (('B', 'C'), 1.0)).

    Note: the original graph variable should not be modified. Instead,
    make a copy of the original graph prior to removing edges.
    See the Graph.copy method https://networkx.github.io/documentation/development/reference/generated/networkx.Graph.copy.html
    Params:
      graph.......A networkx Graph
      max_depth...An integer representing the maximum depth to search.

    Returns:
      A list of networkx Graph objects, one per partition.

    >>> components = partition_girvan_newman(example_graph(), 5)
    >>> components = sorted(components, key=lambda x: sorted(x.nodes())[0])
    >>> sorted(components[0].nodes())
    ['A', 'B', 'C']
    >>> sorted(components[1].nodes())
    ['D', 'E', 'F', 'G']
    """
    ###TODO

    #approximate_betweenness(graph,max_depth)

    graph_new = graph.copy()

    betweenness = approximate_betweenness(graph_new, max_depth)

    #print("betweenness_result::",betweenness)

    def getKey_one(item1):
        return item1[1]

    sort_betweenness = sorted(betweenness.items(),
                              key=getKey_one,
                              reverse=True)

    list_graphs = []

    tuple_list = 0

    while len(list_graphs) <= 1:

        graph_new.remove_edge(sort_betweenness[tuple_list][0][1],
                              sort_betweenness[tuple_list][0][0])
        tuple_to_list = nx.connected_component_subgraphs(graph_new)
        #print("printlist::",tuple_to_list)
        list_graphs = list(tuple_to_list)
        tuple_list = tuple_list + 1
        #print("chk len::",len(list_graphs))

    return list_graphs

    pass
def get_components(graph):
    """
    A helper function you may use below.
    Returns the list of all connected components in the given graph.
    """
    return [c for c in nx.connected_component_subgraphs(graph)]
示例#40
0
    def fold(self, reverse=False):
        """ fold edges of the PlanarNet

        Returns
        -------

        A solid or a compound of faces

        Notes
        -----

        This method fold the planar net w.r.t to the edge angles.
        It yields a shell member

        """

        for edge in list(self.edges()):
            if0 = edge[0]
            if1 = edge[1]
            ag = self[if0][if1]['angle']
            # handle folding direction
            if reverse:
                angle = -ag
            else:
                angle = ag
            iedge = self[if0][if1]['iedge']

            ed = self.lfaces[if0].subshapes('Edge')[iedge]
            points = ed.poly()
            pdir = np.array(points[1]) - np.array(points[0])
            pabout = ed.center()

            # create 2 subgraphs
            self.remove_edge(if0, if1)
            #  DEPRECATED function in networkx
            lgraphs = list(nx.connected_component_subgraphs(nx.Graph(self)))
            #lgraphs = [ nx.Graph(self).subgraph(c).copy() for c in nx.connected_components(nx.Graph(self)) ] 

            ln0 = lgraphs[0].node.keys()
            ln1 = lgraphs[1].node.keys()
            self.add_edge(if0, if1, angle=ag, iedge=iedge)

            if if1 in ln1:
                lfaces1 = ln1
            else:
                lfaces1 = ln0

            # fold all faces in set lfaces1
            for f in lfaces1:
                self.lfaces[f] = cm.rotated(self.lfaces[f], pabout, pdir, angle)

        # update faces centroid in the Graph

        for iface in self.node:
            face = self.lfaces[iface]
            self.pos[iface] = face.center()[0:2]

        # creates the shell
        self.shell = cm.Shell(self.lfaces)
        if reverse:
            self.folded = False
        else:
            self.folded = True
            asolid = cm.Solid([self.shell])
            vertices = asolid.subshapes('Vertex')
            edges = asolid.subshapes('Edge')
            faces = asolid.subshapes('Face')

            Euler = len(vertices)-len(edges)+len(faces)

            print("V", len(vertices))
            print("E", len(edges))
            print("F", len(faces))
            print("Euler check (2): V-E+F :", Euler)

            if asolid.check():
                print("closed shape")
                # update the graph
            else:
                print("open shape")

            return asolid
示例#41
0
    def getOldDataSummary(self, ls, data):
        """ dump the graphs from a database. This is for legacy
        compatibility with the communityNetworkMonitor project """

        scanQuery = "SELECT * from scan"
        QUERY = """select snode.Id AS sid, dnode.Id AS did, etx.etx_value AS etxv from \
               link, scan, node as snode, node as dnode, etx \
               WHERE link.scan_Id = scan.Id AND snode.Id = link.from_node_Id \
               AND dnode.Id = link.to_node_Id AND etx.link_Id = link.Id \
               AND dnode.scan_Id = scan.Id AND snode.scan_Id = scan.Id AND \
               scan.Id= %d"""

        try:
            q = ls.query("Id", "time", "scan_type",
                         "network").from_statement(scanQuery)
            if len(q.all()) == 0:
                raise
        except:
            print "something went wrong opening the db"
            import sys
            sys.exit(1)

        numScan = len(q.all())
        scanCounter = 0
        data.etxThreshold = 10
        for [scanId, scanTime, scanType, scanNetwork] in q:
            data.scanTree[scanNetwork][scanType].append([scanId, scanTime])

        for net in data.scanTree:
            counter = 0
            # for graz I have one sample every 10 minutes,
            # for ninux/Wien I have one sample every 5 minutes
            if net == "FFGraz":
                networkPenalty = 2
            else:
                networkPenalty = 1
            for scanId in data.scanTree[net]['ETX']:
                queryString = QUERY % scanId[0]
                q = ls.query("sid", "did", "etxv").\
                    from_statement(queryString)
                dirtyG = nx.Graph()
                for s, d, e in q:
                    if e < data.etxThreshold:
                        dirtyG.add_edge(s, d, weight=float(e))

                if len(dirtyG) != 0:
                    G = max(nx.connected_component_subgraphs(dirtyG,
                                                             copy=True),
                            key=len)
                    componentSize = len(G)
                    G.graph = {
                        "network": net,
                        "scan_time": scanId[1],
                        "scan_id": scanId[0]
                    }
                else:
                    G = nx.Graph()
                    componentSize = 0
                if componentSize < 10:
                    continue
                counter += 1

                etxV = [e[2]['weight'] for e in G.edges(data=True)]
                data.routeData[net][scanId[0]]["Graph"] = G
                weightedPaths = nx.shortest_path(G, weight="weight")
                for s in G.nodes():
                    for d in G.nodes():
                        if s == d:
                            continue
                        if d in data.routeData[net][scanId[0]]["data"] and \
                                s in data.routeData[net][scanId[0]]["data"][d]:
                            continue
                        currPath = weightedPaths[s][d]
                        pathWeight = 0
                        for i in range(len(currPath) - 1):
                            pathWeight += G[currPath[i]][currPath[i +
                                                                  1]]["weight"]
                        data.routeData[net][scanId[0]]["data"][s][d] = \
                                      [len(weightedPaths[s][d])-1, pathWeight]
                data.routeData[net][scanId[0]]["Graph"] = G
                nd = filter(lambda x: x == 1, dirtyG.degree().values())
                nl = len(nd)
                nn = len(dirtyG)
                le = len(etxV)
                data.dataSummary[net][scanId[0]][("numLeaves", 9)] = nl
                data.dataSummary[net][scanId[0]][("time", 30)] = scanId[1]
                data.dataSummary[net][scanId[0]][("numNodes", 9)] = nn
                data.dataSummary[net][scanId[0]][("numEdges", 9)] = le
                data.dataSummary[net][scanId[0]][("largestComponent", 16)] = \
                    componentSize
                scanCounter += 1
                if int((100000 * 1.0 * scanCounter / numScan)) % 10000 == 0:
                    print int(
                        (100 * 1.0 * scanCounter / numScan)), "% complete"
            if is_node_class(t):
                for o in model[t]:
                    if o in G.nodes():
                        G.nodes()[o]['nclass'] = t
                        G.nodes()[o]['ndata'] = model[t][o]
                    else:
                        print('orphaned node', t, o)

        swing_node = ''
        for n1, data in G.nodes(data=True):
            if 'nclass' in data:
                if 'bustype' in data['ndata']:
                    if data['ndata']['bustype'] == 'SWING':
                        swing_node = n1

        sub_graphs = nx.connected_component_subgraphs(G)
        seg_loads = {}  # [name][kva, phases]
        total_kva = 0.0
        #       for sg in sub_graphs:
        #           print (sg.number_of_nodes())
        #           if sg.number_of_nodes() < 10:
        #               print(sg.nodes)
        #               print(sg.edges)
        for n1, data in G.nodes(data=True):
            if 'ndata' in data:
                kva = accumulate_load_kva(data['ndata'])
                if kva > 0:
                    total_kva += kva
                    nodes = nx.shortest_path(G, n1, swing_node)
                    edges = zip(nodes[0:], nodes[1:])
                    #                    print (n1, '{:.2f}'.format(kva), 'kva on', data['ndata']['phases'])
示例#43
0
def main(graph_name):

    G = nx.read_gml(graph_name)
    G = nx.connected_component_subgraphs(G)[0]  # Giant component

    #dir=graph_name.split("fr")[0]
    dir = graph_name.split("mast")[0]
    dir = dir + "roles/"
    dir2 = graph_name.split("mast")[0]

    time_in_system = 100  #minimum amount of time in the sytem for a user to be included in the statistics

    # clustering, vitality, activity, betweenness,weigh_ins,degree,time_in_system

    top_ten_feature = 'activity'

    print "\n\n", top_ten_feature

    name0 = dir + "overlap_top_ten_" + str(
        top_ten_feature) + "_averages_" + str(
            time_in_system) + "days_excluding_themselves_15.dat"
    file0 = open(name0, 'wt')
    file0.close()

    list_top_tens = []  # collect the top_tens of the system
    list_top_tens_percent_weight_change = []

    #   for node in G.nodes():
    #      print G.node[node]["activity"],G.node[node]["time_in_system"]
    #     G.node[node]["activity"]=float(G.node[node]["activity"])/float(G.node[node]["time_in_system"])
    #    print G.node[node]["activity"]

    f = lambda x: x[1][top_ten_feature]
    membership = map(f, G.nodes(data=True))
    membership.sort()
    top_ten_values = membership[-10:]  #TOP TEN

    # print top_ten_values # the sorted top-tens: from smallest to largest
    #print membership  #the whole sorted list

    cont = 0
    for value in top_ten_values:
        for node in G.nodes():
            if (G.node[node][top_ten_feature]
                    == value) and (node not in list_top_tens):
                list_top_tens.append(node)

                list_top_tens_percent_weight_change.append(
                    float(G.node[node]['percentage_weight_change']))

                name260 = dir2 + "scatter_plot_roles_top_ten_" + str(
                    top_ten_feature) + ".dat"
                file260 = open(name260, 'at')
                print >> file260, G.node[node]['Pi'], G.node[node][
                    'zi'], top_ten_feature
                file260.close()

                break
# if there are more than 10, it will pick just the first 10 according to their id

    name00 = dir + "R6s_and_top_tens_averages_" + str(
        time_in_system) + "days_exclude_R6s.dat"

    file0 = open(name00, 'at')
    print >> file0, top_ten_feature, numpy.mean(
        list_top_tens_percent_weight_change), numpy.std(
            list_top_tens_percent_weight_change)
    file0.close()

    file260.close()

    for node in list_top_tens:
        print G.node[node]['label'], G.node[node][top_ten_feature], len(
            G.neighbors(node)), G.node[node]['Pi'], G.node[node]['zi']

# studying the possible cumulative effect of more than one R6 on the population:
    for node in G.nodes():
        cont = 0
        for n in G.neighbors(node):
            if (n in list_top_tens):
                cont += 1

        G.node[node]["top_ten_overlap"] = int(cont)

    for r in range(len(list_top_tens) + 1):

        list_BMI_changes = []
        list_weight_changes = []
        list_percentage_weight_changes = []
        list_activities = []

        for node in G.nodes():

            if int(G.node[node]["top_ten_overlap"]) == r:

                if node in list_top_tens:  # i exclude the top_tens per se

                    pass
                else:

                    if int(G.node[node]['time_in_system']) > time_in_system:

                        list_BMI_changes.append(
                            float(G.node[node]['final_BMI']) -
                            float(G.node[node]['initial_BMI']))
                        list_weight_changes.append(
                            float(G.node[node]['weight_change']))
                        list_percentage_weight_changes.append(
                            float(G.node[node]['percentage_weight_change']))
                        list_activities.append(
                            float(G.node[node]['activity']) /
                            float(G.node[node]['time_in_system']))

        if len(list_BMI_changes) > 0:
            average_BMI_change = numpy.mean(list_BMI_changes)
            average_weight_change = numpy.mean(list_weight_changes)
            average_percentage_weight_change = numpy.mean(
                list_percentage_weight_changes)
            average_activity = numpy.mean(list_activities)

            deviation_BMI = numpy.std(list_BMI_changes)
            deviation_weight = numpy.std(list_weight_changes)
            deviation_percentage_weight = numpy.std(
                list_percentage_weight_changes)
            deviation_activity = numpy.std(list_activities)

            #print out

            file0 = open(name0, 'at')
            print >> file0, r, len(
                list_BMI_changes
            ), average_percentage_weight_change, deviation_percentage_weight, average_BMI_change, deviation_BMI, average_weight_change, deviation_weight, average_activity, deviation_activity
            file0.close()

#  averages for the neighbors of a given top-ten ########

    for node in list_top_tens:
        neighbors = G.neighbors(node)  #a list of nodes

        average_BMI_change = 0.0
        list_BMI_changes = []

        average_weight_change = 0.0
        list_weight_changes = []

        average_percentage_weight_change = 0.0
        list_percentage_weight_changes = []

        average_activity = 0.0  # ojo! sera dividida por el numero de dias!!!!!
        list_activities = []

        eff_degree = 0

        for n in G.neighbors(node):

            if int(G.node[n]['time_in_system']) > time_in_system:

                eff_degree = eff_degree + 1.0

                list_BMI_changes.append(
                    float(G.node[n]['final_BMI']) -
                    float(G.node[n]['initial_BMI']))

                list_weight_changes.append(float(G.node[n]['weight_change']))

                list_percentage_weight_changes.append(
                    float(G.node[n]['percentage_weight_change']))

                list_activities.append(
                    float(G.node[n]['activity']) /
                    float(G.node[n]['time_in_system']))

#averages
        average_weight_change = numpy.mean(list_weight_changes)
        average_percentage_weight_change = numpy.mean(
            list_percentage_weight_changes)
        average_BMI_change = numpy.mean(list_BMI_changes)
        average_activity = numpy.mean(list_activities)

        #standard deviation
        deviation_BMI = numpy.std(list_BMI_changes)
        deviation_weight = numpy.std(list_weight_changes)
        deviation_percentage_weight = numpy.std(list_weight_changes)
        deviation_activity = numpy.std(list_activities)

        # print cont,"R6: ",average_weight_change,deviation_weight,average_BMI_change,deviation_BMI,average_activity,deviation_activity

        #print out
        name1 = dir + "ego_top_ten_" + str(
            top_ten_feature) + "_average_BMI_change_" + str(
                time_in_system) + "days.dat"
        file1 = open(name1, 'at')
        print >> file1, cont, G.node[node]['role'], G.node[node]['label'], len(
            G.neighbors(node)
        ), eff_degree, average_BMI_change, deviation_BMI  #,list_BMI_changes
        file1.close()

        name2 = dir + "ego_top_ten_" + str(
            time_in_system) + "_average_weight_change_" + str(
                time_in_system) + "days.dat"
        file2 = open(name2, 'at')
        print >> file2, cont, G.node[node]['role'], G.node[node]['label'], len(
            G.neighbors(node)
        ), eff_degree, average_weight_change, deviation_weight  #,list_weight_changes
        file2.close()

        name3 = dir + "ego_top_ten_" + str(
            top_ten_feature) + "_average_activity_" + str(
                time_in_system) + "days.dat"
        file3 = open(name3, 'at')
        print >> file3, cont, G.node[node]['role'], G.node[node]['label'], len(
            G.neighbors(node)
        ), eff_degree, average_activity, deviation_activity  #,list_activities
        file3.close()

        name4 = dir + "ego_top_ten_" + str(
            top_ten_feature) + "_dispersions_" + str(
                time_in_system) + "days.dat"
        file4 = open(name4, 'at')
        for i in range(len(list_activities)):
            print >> file4, cont, list_BMI_changes[i], list_weight_changes[
                i], list_activities[i]
        print >> file4, "\n\n"  #to separate roles
        file4.close()

        cont = cont + 1
示例#44
0
def get_inter_cluster_relation(seq_records, geo_id):
    logging.debug('Calculating inter cluster relations on geo_record "%s"..' %
                  (geo_id))
    data = []
    full_g = nx.Graph()
    cluster_genes = {}
    bio_genes = set()
    cur_cluster1 = 0
    # First, inspect all cluster to get cluster_genes
    for record in seq_records:
        for cluster in utils.get_cluster_features(record):
            cur_cluster1 += 1
            cluster_genes[cur_cluster1] = set()

            for cluster_gene in utils.get_cluster_cds_features(
                    cluster, record):
                # We only care about cluster_genes that have a geo match
                for cluster_gene_geo in utils.parse_geo_feature(cluster_gene):
                    # We only care about data from the current geo_id
                    if cluster_gene_geo['rec_id'] == geo_id:
                        cur_gene1 = utils.get_gene_id(cluster_gene)
                        cur_gene1_distances = cluster_gene_geo['dist']
                        cur_gene1_neighbors = set(cur_gene1_distances)

                        # Add each gene to cluster_genes, and to the full_g(raph) and to bio_genes
                        cluster_genes[cur_cluster1].add(cur_gene1)
                        full_g.add_node(cur_gene1)
                        if 'sec_met' in cluster_gene.qualifiers:
                            bio_genes.add(cur_gene1)

                        # Get intra-cluster edges
                        interactions = cur_gene1_neighbors.intersection(
                            cluster_genes[cur_cluster1])
                        update_g(cur_gene1, interactions, cur_gene1_distances,
                                 full_g)

                        # From the second cluster onwards, we'll add inter-cluster edges backwards, i.e.: 2-1, 3-1, 3-2, 4-1, 4-2, etc...
                        if cur_cluster1 is not 1:
                            for cur_cluster2 in cluster_genes:
                                if cur_cluster1 is not cur_cluster2:
                                    interactions = cur_gene1_neighbors.intersection(
                                        cluster_genes[cur_cluster2])
                                    update_g(cur_gene1, interactions,
                                             cur_gene1_distances, full_g)

    # Remove single nodes
    for node in full_g.nodes():
        if full_g.degree(node) == 0:
            full_g.remove_node(node)

    # Get communities
    community_dict = community.best_partition(full_g)

    number_of_clusters = len(cluster_genes)

    # Now check inter-cluster interactions
    for i in range(1, number_of_clusters + 1):
        cluster1 = cluster_genes[i]

        for j in range(i + 1, number_of_clusters + 1):
            cluster2 = cluster_genes[j]
            cluster3 = cluster1.union(cluster2)

            cluster_pair_g = full_g.subgraph(cluster3)

            communities_present = np.unique(
                [community_dict[n] for n in cluster3 if n in community_dict])

            # CRITERIA 1 = only intra-community edges
            for cur_community in communities_present:
                cur_community_nodes = [
                    n for n in cluster3 if n in community_dict
                    and community_dict[n] == cur_community
                ]
                cur_community_g = cluster_pair_g.subgraph(cur_community_nodes)

                decomposed_g = list(
                    nx.connected_component_subgraphs(cur_community_g))
                for cur_g in decomposed_g:
                    # CRITERIA 2 = no isolates. anything with a clustering_coefficient=0 will be pruned out.
                    clustering_coefficient = nx.clustering(cur_g)

                    pred_nodes = [
                        n for n in clustering_coefficient
                        if clustering_coefficient[n] > 0
                    ]
                    pred_g = cur_g.subgraph(pred_nodes)
                    pred_edges = pred_g.edges()

                    prediction = set(pred_g.nodes())
                    prediction_cluster1 = prediction.intersection(cluster1)
                    prediction_cluster2 = prediction.intersection(cluster2)

                    bio_prediction = prediction.intersection(bio_genes)
                    bio_prediction_cluster1 = prediction_cluster1.intersection(
                        bio_genes)
                    bio_prediction_cluster2 = prediction_cluster2.intersection(
                        bio_genes)

                    #CRITERIA 3 = at least 2 genes per cluster
                    #CRITERIA 5 = at least 1 bio per cluster
                    #CRITERIA 4 = at least 3 bio
                    if (len(prediction_cluster1) >= 2
                            and len(prediction_cluster2) >= 2
                            and len(bio_prediction_cluster1) >= 1
                            and len(bio_prediction_cluster2) >= 1
                            and len(bio_prediction) >= 3):

                        pred_edges1 = [
                            n for n in pred_edges
                            if n[0] in cluster1 and n[1] in cluster1
                        ]
                        pred_edges2 = [
                            n for n in pred_edges
                            if n[0] in cluster2 and n[1] in cluster2
                        ]

                        pred_edges12 = [
                            n for n in pred_edges
                            if n[0] in cluster1 and n[1] in cluster2
                        ]
                        pred_edges21 = [
                            n for n in pred_edges
                            if n[0] in cluster2 and n[1] in cluster1
                        ]
                        inter_cluster_edges = pred_edges12 + pred_edges21

                        data.append({})
                        data[-1]['source'] = {}
                        data[-1]['source']['id'] = i
                        data[-1]['source']['links'] = pred_edges1

                        data[-1]['target'] = {}
                        data[-1]['target']['id'] = j
                        data[-1]['target']['links'] = pred_edges2

                        data[-1]['links'] = inter_cluster_edges
    return data
示例#45
0
def get_nk_lcc_undirected(G):
    G2 = max(nx.connected_component_subgraphs(G), key=len)
    tdl_nodes = G2.nodes()
    nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
    G2 = nx.relabel_nodes(G2, nodeListMap, copy=True)
    return G2, nodeListMap
示例#46
0
            R_GC[i].nodes[n]['degree'] = deg
            R_GC[i].nodes[n]['eigenvector'] = eigen
            R_GC[i].nodes[n]['betweenness'] = between
            R_GC[i].nodes[n]['current'] = current
    return R_GC


#%%
R_GC = centralidades2(redes_analisis)
#%%
nodes = np.empty(
    (4, len(R_GC)), dtype=object
)  #aca se van a guardar los nombres de los nodos a eliminar en cada caso(para cada centralidad)
for i in range(len(R_GC)):
    largo = max(
        nx.connected_component_subgraphs(R_GC[i]), key=len
    ).number_of_nodes(
    )  #int(np.sum([c[1]['Esencialidad'] for c in list(R_GC[i].nodes.data())]))
    nodes[0, i] = [
        b[0] for b in list(
            sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['degree']))
    ][0:largo]
    nodes[1, i] = [
        b[0] for b in list(
            sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['eigenvector']))
    ][0:largo]
    nodes[2, i] = [
        b[0] for b in list(
            sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['betweenness']))
    ][0:largo]
    nodes[3, i] = [
def S2(A):
    # Returns the size of the largest component that is already thresholded
    #from adjacency matrix A
    G = nx.from_numpy_matrix(A)
    S = nx.number_of_nodes(max(nx.connected_component_subgraphs(G), key=len))
    return S
示例#48
0
def graph_stats(G, diameter=False):
    logging.debug("Graph stats: |V|={0}, |E|={1}".format(len(G), G.size()))
    if diameter:
        d = max(nx.diameter(H) for H in nx.connected_component_subgraphs(G))
        logging.debug("Graph diameter: {0}".format(d))
示例#49
0
def cond_test_generation(args, train_loader, test_loader, model, decoder, decoder_name,
        flow_model=None, epoch=-1, oracle=None):

    node_dist = args.node_dist
    edge_index = None
    flow_name = ''
    model.eval()
    if args.flow_model:
        flow_name = args.flow_model

    if not decoder_name:
        decoder_name = ''

    save_gen_base = plots = './visualization/gen_plots/' + args.dataset + '/'
    save_gen_plots = save_gen_base + args.model + str(args.z_dim) + '_' \
        + str(flow_name) + '_' + decoder_name + '/'
    gen_graph_list, gen_graph_copy_list = [], []
    avg_connected_components, avg_triangles, avg_transitivity = [], [], []
    raw_triangles = []
    A_list = []
    test_recon_loss_avg = []
    test_recon_loss_avg.append(0)
    for i, data_batch in enumerate(test_loader):
        batch = extract_batch(args, data_batch)
        # Correct shapes for VGAE processing
        if len(batch[0]['adj'].shape) > 2:
            # We give the full Adj to the encoder
            adj = batch[0]['adj'] + batch[0]['adj'].transpose(2,3)
            node_feats = adj.view(-1, args.max_nodes)
        else:
            node_feats = batch[0]['adj']

        if batch[0]['edges'].shape[0] != 2:
            edge_index = batch[0]['encoder_edges'].long()
        else:
            edge_index = batch[0]['edges']

        z, z_k = model.encode(node_feats, edge_index)
        batch[0]['node_latents'] = z_k
        test_recon_loss = model.decode(batch)
        test_recon_loss_avg[-1] += test_recon_loss.sum(dim=-1).item()

        if args.decoder == 'gran':
            decoder.eval()
            num_nodes_pmf = train_loader.dataset.num_nodes_pmf
            num_adj_batch = batch[0]['adj'].size(0)
            A = decoder._sampling(num_adj_batch, enc_node_feats=z_k)
            A_list += [A[ii, :batch[0]['num_nodes_gt'][ii],
                         :batch[0]['num_nodes_gt'][ii]] for ii in
                       range(num_adj_batch)]
            # This is only needed for constrain sat eval, padded rows will be
            # masked out again. We have to check for 0-rows before Max Nodes
            # though which is why we cant just not-pad.
            adj_mats_padded = pad_adj_mat(args, A_list)
            decoder.train()
        else:
            num_nodes = None
            decoder.eval()
            adj_mats = decoder(z_k, edge_index, return_adj=True)[-1]
            decoder.train()
            if args.deterministic_decoding:
                adj_mats = (adj_mats > 0.5).float()
            else:
                adj_mats = torch.bernoulli(adj_mats)

    num_nodes = []
    for adj_mat in A_list:
        g = nx.from_numpy_matrix(adj_mat.detach().cpu().numpy())
        g.remove_edges_from(nx.selfloop_edges(g))
        g_copy = copy.deepcopy(g)
        gen_graph_copy_list.append(g_copy)
        num_nodes.append(g.number_of_nodes())

        if len(g) > 0:
            # process the graphs
            if args.better_vis:
                g = max(nx.connected_component_subgraphs(g), key=len)
            num_connected_components = nx.number_connected_components(g)
            avg_connected_components.append(num_connected_components)
            num_triangles = list(nx.triangles(g).values())
            avg_triangles.append(sum(num_triangles) /
                                 float(len(num_triangles)))
            avg_transitivity.append(nx.transitivity(g))
            raw_triangles.append([num_triangles, len(g.nodes)])
            gen_graph_list.append(g)

    # once graphs are generated
    model.train()
    total = len(gen_graph_list)  # min(3, len(vis_graphs))

    draw_graph_list(gen_graph_list[:args.num_gen_samples], 3, int(total // 3),
                    fname='./visualization/sample/{}/Cond_{}_{}.png'.format(args.namestr,
                                                                       constraint_str,
                                                                       epoch),
                    layout='spring')

    # Evaluate Generated Graphs using GraphRNN metrics
    if args.decoder == 'gran' or args.model == 'gran':
        test_dataset = [test_G for test_G in test_loader.dataset.graphs]
    else:
        test_dataset = [to_networkx(test_G).to_undirected()
                        for test_G in test_loader]
    metrics = evaluate_generated(
        test_dataset, gen_graph_list, args.dataset)
    metrics_copy = evaluate_generated(
        test_dataset, gen_graph_copy_list, args.dataset)
    # Orginal Graphs with nodes remoed
    mmd_degree, mmd_clustering, mmd_4orbits = metrics[0], metrics[1], metrics[2]
    mmd_spectral, accuracy = metrics[3], metrics[4]
    mean_connected_comps = sum(
        avg_connected_components) / float(len(avg_connected_components))
    mean_triangles = sum(avg_triangles) / float(len(avg_triangles))
    mean_transitivity = sum(avg_transitivity) / \
        float(len(avg_transitivity))

    # Copied Graphs with nodes not removed
    mmd_degree_copy, mmd_clustering_copy, mmd_4orbits_copy = metrics_copy[
        0], metrics_copy[1], metrics_copy[2]
    mmd_spectral_copy, accuracy_copy = metrics_copy[3], metrics_copy[4]
    test_recon_loss_avg[-1] /= len(test_loader.dataset)
    if args.wandb:
        wandb.log({"Cond Deg": mmd_degree, "Cond Clus": mmd_clustering,
                   "Cond Orb": mmd_4orbits, "Cond Acc": accuracy, "Cond Spec.":
                   mmd_spectral, "Cond Avg_CC": mean_connected_comps,
                   "Cond Avg_Tri": mean_triangles, "Cond Avg_transitivity":
                   mean_transitivity, "Cond Raw_triangles": raw_triangles,
                   "Cond Deg_copy": mmd_degree_copy, "Cond Clus_copy":
                   mmd_clustering_copy, "Cond Orb_copy": mmd_4orbits_copy,
                   "Cond Acc_copy": accuracy_copy, "Cond Spec_copy":
                   mmd_spectral_copy, "Cond Test Constr Loss": constraint_loss,
                   "Cond Test Constr Sat": constr_sat, "Test Recon Loss":
                   test_recon_loss_avg[-1], "test_step": epoch})

    print('Cond. Deg: {:.4f}, Clus.: {:.4f}, Orbit: {:.4f}, Spec.:{:.4f}, Acc: {:.4f}'.format(mmd_degree,
                                                                                        mmd_clustering,
                                                                                        mmd_4orbits,
                                                                                        mmd_spectral,
                                                                                        accuracy))
    print('Cond. Avg CC: {:.4f}, Avg. Tri: {:.4f}, Avg. Trans: {:.4f}'.format(mean_connected_comps, mean_triangles,
                                                                        mean_transitivity))
    return [mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy]
示例#50
0
def analyze(G):
    components = []

    components = nx.connected_component_subgraphs(G)

    i = 0

    for cc in components:
        #Set the connected component for each group
        for node in cc:
            G.node[node]['component'] = i

        #Calculate the in component betweeness, closeness and eigenvector centralities
        cent_betweenness = nx.betweenness_centrality(cc)
        # cent_eigenvector = nx.eigenvector_centrality_numpy(cc)
        cent_eigenvector = nx.eigenvector_centrality(cc)
        cent_closeness = nx.closeness_centrality(cc)

        for name in cc.nodes():
            G.node[name]['cc-betweenness'] = cent_betweenness[name]
            G.node[name]['cc-eigenvector'] = cent_eigenvector[name]
            G.node[name]['cc-closeness'] = cent_closeness[name]

        i += 1

    # Calculate cliques
    cliques = list(nx.find_cliques(G))
    j = 0
    processed_members = []
    for clique in cliques:
        for member in clique:
            if not member in processed_members:
                G.node[member]['cliques'] = []
                processed_members.append(member)
            G.node[member]['cliques'].append(j)
        j += 1

    #calculate degree
    degrees = G.degree()
    for name in degrees:
        G.node[name]['degree'] = degrees[name]

    betweenness = nx.betweenness_centrality(G)
    eigenvector = nx.eigenvector_centrality_numpy(G)
    closeness = nx.closeness_centrality(G)
    pagerank = nx.pagerank(G)
    k_cliques = nx.k_clique_communities(G, 3)

    for name in G.nodes():
        G.node[name]['betweenness'] = betweenness[name]
        G.node[name]['eigenvector'] = eigenvector[name]
        G.node[name]['closeness'] = closeness[name]
        G.node[name]['pagerank'] = pagerank[name]

    for pos, k_clique in enumerate(k_cliques):
        for member in k_clique:
            G.node[member]['k-clique'] = pos

    partitions = community.best_partition(G)

    for key in partitions:
        G.node[key]['modularity'] = partitions[key]

    return G
示例#51
0
nx.draw_networkx_labels(G2, pos=position)
plt.draw()
# But maybe rendering the labels wasn't a very good idea ?? See how cluttered it became.

connectedComp = nx.connected_components(G2)
connectedComp = list(connectedComp)
countComp = len(connectedComp)
print("This graph has ", countComp, " many connected components")

# It is already sorted but just to make sure, we get the largest component
compLengths = []
for i in range(0, countComp):
    compLengths.append(len(connectedComp[i]))

highestIndex = np.argmax(compLengths)
componentGraphs = list(nx.connected_component_subgraphs(G2))
largestComponent = componentGraphs[highestIndex]
# This is a subgraph with 379 nodes and 914 edges

#nx.draw(largestComponent)
#plt.draw()

betweenValues = nx.betweenness_centrality(largestComponent)

# betweenValues is a dictionary, let's get the values and keys in separate lists
values = list(betweenValues.values())
keys = list(betweenValues.keys())

# find the index of the node with highest betweeness centrality
highestIndex = np.argmax(values)
print("The node id ", keys[highestIndex], " has the centrality degree of ",
示例#52
0
def test_generation(args, config, train_loader, test_loader, decoder, decoder_name,
        flow_model=None, epoch=-1):

    node_dist = args.node_dist
    edge_index = None
    flow_name = ''

    if args.flow_model:
        flow_name = args.flow_model

    if not decoder_name:
        decoder_name = ''

    save_gen_base = plots = './visualization/gen_plots/' + config.dataset.name + '/'
    save_gen_plots = save_gen_base + args.model + str(args.z_dim) + '_' \
        + str(flow_name) + '_' + decoder_name + '/'
    gen_graph_list, gen_graph_copy_list = [], []
    avg_connected_components, avg_triangles, avg_transitivity = [], [], []
    raw_triangles = []

    if args.model == 'gran':
        num_nodes_pmf = train_loader.dataset.num_nodes_pmf
        model = decoder
        model.eval()
        A = decoder._sampling(args.num_gen_samples)
        num_nodes_pmf = torch.from_numpy(num_nodes_pmf).to(model.device)
        num_nodes = torch.multinomial(num_nodes_pmf.float(),
                args.num_gen_samples, replacement=True)  # shape B X 1

        A_list = [ A[ii, :num_nodes[ii], :num_nodes[ii]] for ii in
                  range(args.num_gen_samples) ]
        adj_mats_padded = pad_adj_mat(args, A_list)
        model.train()
    else:  # VAE
        # sample lengths of graphs
        if args.decoder == 'gran':
            num_nodes_pmf = train_loader.dataset.num_nodes_pmf
            num_nodes_pmf = torch.from_numpy(num_nodes_pmf).to(args.dev)
            num_nodes = torch.multinomial(num_nodes_pmf.float(),
                    args.num_gen_samples, replacement=True)  # shape B X 1
        else:
            num_nodes_pmf = np.random.choice(node_dist, args.num_gen_samples)

        batch_all_nodes = []
        for i, len_ in enumerate(num_nodes):
            fully_connected = nx.complete_graph(len_)
            edge_index_i = torch.tensor(list(fully_connected.edges)).to(args.dev).t().contiguous()
            batch_all_nodes += [edge_index_i + i * args.max_nodes]

        edge_index = torch.cat(batch_all_nodes, 1).to(args.dev)

        if flow_model is None:
            z_0 = torch.FloatTensor( args.num_gen_samples * args.max_nodes,
                                    args.z_dim).to(args.dev).normal_()
            z_k = z_0
        else:
            z_0 = flow_model.base_dist.sample((args.num_gen_samples,
                                               flow_model.n_components)).squeeze()
            # z_0 = z_0.view(args.num_gen_samples, -1)
            z_k, _ = flow_model.inverse(z_0, edge_index=edge_index)
            z_k = z_k.view(args.num_gen_samples * args.max_nodes, args.z_dim)

        if args.decoder == 'gran':
            decoder.eval()
            A = decoder._sampling(
                args.num_gen_samples, enc_node_feats=z_k)

            A_list = [ A[ii, :num_nodes[ii], :num_nodes[ii]] for ii in
                      range(args.num_gen_samples) ]
            # This is only needed for constrain sat eval, padded rows will be
            # masked out again. We have to check for 0-rows before Max Nodes
            # though which is why we cant just not-pad.
            adj_mats_padded = pad_adj_mat(args, A_list)
            decoder.train()
        else:
            num_nodes = None
            decoder.eval()
            adj_mats = decoder(z_k, edge_index, return_adj=True)[-1]
            decoder.train()
            if args.deterministic_decoding:
                adj_mats = (adj_mats > 0.5).float()
            else:
                adj_mats = torch.bernoulli(adj_mats)

    for adj_mat in A_list:
        g = nx.from_numpy_matrix(adj_mat.detach().cpu().numpy())
        g.remove_edges_from(nx.selfloop_edges(g))
        g_copy = copy.deepcopy(g)
        gen_graph_copy_list.append(g_copy)

        if len(g) > 0:
            # process the graphs
            if config.test.better_vis:
                g = max(nx.connected_component_subgraphs(g), key=len)
            num_connected_components = nx.number_connected_components(g)
            avg_connected_components.append(num_connected_components)
            num_triangles = list(nx.triangles(g).values())
            avg_triangles.append(sum(num_triangles) /
                                 float(len(num_triangles)))
            avg_transitivity.append(nx.transitivity(g))
            raw_triangles.append([num_triangles, len(g.nodes)])
            gen_graph_list.append(g)

    # once graphs are generated
    total = len(gen_graph_list)  # min(3, len(vis_graphs))

    draw_graph_list(gen_graph_list[:total], 3, int(total // 3),
                    fname='./visualization/sample/{}/{}_{}.png'.format(args.namestr,
                                                                       constraint_str,
                                                                       epoch),
                    layout='spring')

    # Evaluate Generated Graphs using GraphRNN metrics
    if args.decoder == 'gran' or args.model == 'gran':
        test_dataset = [test_G for test_G in test_loader.dataset.graphs]
    else:
        test_dataset = [to_networkx(test_G).to_undirected()
                        for test_G in test_loader]
    metrics = evaluate_generated(
        test_dataset, gen_graph_list, args.dataset)
    metrics_copy = evaluate_generated(
        test_dataset, gen_graph_copy_list, args.dataset)
    # Orginal Graphs with nodes remoed
    mmd_degree, mmd_clustering, mmd_4orbits = metrics[0], metrics[1], metrics[2]
    mmd_spectral, accuracy = metrics[3], metrics[4]
    mean_connected_comps = sum(
        avg_connected_components) / float(len(avg_connected_components))
    mean_triangles = sum(avg_triangles) / float(len(avg_triangles))
    mean_transitivity = sum(avg_transitivity) / \
        float(len(avg_transitivity))

    # Copied Graphs with nodes not removed
    mmd_degree_copy, mmd_clustering_copy, mmd_4orbits_copy = metrics_copy[
        0], metrics_copy[1], metrics_copy[2]
    mmd_spectral_copy, accuracy_copy = metrics_copy[3], metrics_copy[4]
    if args.wandb:
        wandb.log({"Deg": mmd_degree, "Clus": mmd_clustering, "Orb":
                   mmd_4orbits, "Acc": accuracy, "Spec.": mmd_spectral,
                   "Avg_CC": mean_connected_comps, "Avg_Tri": mean_triangles,
                   "Avg_transitivity": mean_transitivity, "Raw_triangles":
                   raw_triangles, "Deg_copy": mmd_degree_copy, "Clus_copy":
                   mmd_clustering_copy, "Orb_copy": mmd_4orbits_copy,
                   "Acc_copy": accuracy_copy, "Spec_copy": mmd_spectral_copy,
                   "Test Constr Loss": constraint_loss, "Test Constr Sat":
                   constr_sat, "test_step": epoch})

    print('Deg: {:.4f}, Clus.: {:.4f}, Orbit: {:.4f}, Spec.:{:.4f}, Acc: {:.4f}'.format(mmd_degree,
                                                                                        mmd_clustering,
                                                                                        mmd_4orbits,
                                                                                        mmd_spectral,
                                                                                        accuracy))
    print('Avg CC: {:.4f}, Avg. Tri: {:.4f}, Avg. Trans: {:.4f}'.format(mean_connected_comps, mean_triangles,
                                                                        mean_transitivity))
    return [mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy]
示例#53
0
# Example From KDD Paper
# Graph is undirected
# G = nx.Graph()
# G.add_edge(1, 2)
# G.add_edge(2, 3)
# G.add_edge(2, 4)
# G.add_edge(3, 4)
# G.add_edge(3, 5)
# G.add_edge(4, 6)
# G.add_edge(5, 6)
# G.add_edge(1, 5)

# Graph much be connected
if not nx.is_connected(G):
    print "Graph must be connected"
    G = list(nx.connected_component_subgraphs(G))[0]

# Graph must be simple
G.remove_edges_from(G.selfloop_edges())
if G.number_of_selfloops() > 0:
    print "Graph must be not contain self-loops"
    exit()

num_nodes = G.number_of_nodes()
print "Number of Nodes:\t" + str(num_nodes)

num_edges = G.number_of_edges()
print "Number of Edges:\t" + str(num_edges)

# To parse a large graph we use 10 samples of size 500 each. It is
# possible to parse the whole graph, but the approximate
示例#54
0
def count_inf_links_nodes_GC(G_period):

    H_period_aux = G_period.copy(
    )  # make a copy to get only the infected links and nodes (for cluster distribution)

    ##### i count inf. nodes and links
    fract_inf_nodes = 0.
    fract_inf_links = 0.
    for node in G_period.nodes():
        if G_period.node[node]["status"] == "I":
            fract_inf_nodes += 1.

            for neighbor in G_period.neighbors(node):

                if G_period.node[neighbor]["status"] == "I":

                    if node < neighbor:  # so i dont count each link twice
                        fract_inf_links += 1.

                ###### i remove non-infected links from the aux subgraph
                else:
                    try:
                        H_period_aux.remove_edge(node, neighbor)
                    except:
                        try:
                            H_period_aux.remove_edge(neighbor, node)
                        except:
                            pass

        else:
            H_period_aux.remove_node(
                node)  # with this, i remove the node and all its links

    try:
        fract_inf_nodes = fract_inf_nodes / float(len(G_period.nodes()))
    except ZeroDivisionError:
        fract_inf_nodes = float('NaN')

    try:
        fract_inf_links = fract_inf_links / float(len(G_period.edges()))
    except ZeroDivisionError:
        fract_inf_links = float('NaN')

    ##### i remove the isolates from aux graph:
    list_to_remove = []
    for node in H_period_aux.nodes():
        if H_period_aux.degree(node) == 0:
            list_to_remove.append(node)

    H_period_aux.remove_nodes_from(list_to_remove)

    # print " # nodes:  in G:", len(G_period.nodes()), " in H_aux:", len(H_period_aux.nodes())
    #print " # edges:  in G:", len(G_period.edges()), " in H_aux:", len(H_period_aux.edges())

    lista_components = []
    ##### i calculate components on the infected subgraph
    #print "components of Infected subgraph:"
    for item in nx.connected_component_subgraphs(H_period_aux):
        try:
            #       print "comp. size:",len(item.nodes()),  "  avg.path lenght within component:",nx.average_shortest_path_length(item)
            lista_components.append(len(item.nodes()))

        except ZeroDivisionError:
            pass
        #print "comp. size:",len(item.nodes())

    ####### i get the GC of the infected subgraph
    try:
        Gc = len(max(nx.connected_component_subgraphs(H_period_aux),
                     key=len)) / float(len(G_period.edges()))
    # print "GC:", Gc, "\n"
    except ValueError:
        Gc = float('NaN')

    return fract_inf_links, fract_inf_nodes, Gc, lista_components
def scenario_tiscali(net_cache=[0.05],
                     n_contents=100000,
                     alpha=[0.6, 0.8, 1.0]):
    """
    Return a scenario based on Tiscali topology, parsed from RocketFuel dataset
    
    Parameters
    ----------
    scenario_id : str
        String identifying the scenario (will be in the filename)
    net_cache : float
        Size of network cache (sum of all caches) normalized by size of content
        population
    n_contents : int
        Size of content population
    alpha : float
        List of alpha of Zipf content distribution
    """
    rate = 12.0
    warmup = 9000
    duration = 36000

    T = 'TISCALI'  # name of the topology
    # 240 nodes in the main component
    topology = fnss.parse_rocketfuel_isp_map(
        path.join(scenarios_dir, 'resources/3257.r0.cch')).to_undirected()
    topology = list(nx.connected_component_subgraphs(topology))[0]

    deg = nx.degree(topology)
    onedeg = [v for v in topology.nodes() if deg[v] == 1]  # they are 80

    # we select as caches nodes with highest degrees
    # we use as min degree 6 --> 36 nodes
    # If we changed min degrees, that would be the number of caches we would have:
    # Min degree    N caches
    #  2               160
    #  3               102
    #  4                75
    #  5                50
    #  6                36
    #  7                30
    #  8                26
    #  9                19
    # 10                16
    # 11                12
    # 12                11
    # 13                 7
    # 14                 3
    # 15                 3
    # 16                 2
    caches = [v for v in topology.nodes() if deg[v] >= 6]  # 36 nodes

    # sources are node with degree 1 whose neighbor has degree at least equal to 5
    # we assume that sources are nodes connected to a hub
    # they are 44
    sources = [
        v for v in onedeg if deg[list(topology.edge[v].keys())[0]] > 4.5
    ]  # they are

    # receivers are node with degree 1 whose neighbor has degree at most equal to 4
    # we assume that receivers are nodes not well connected to the network
    # they are 36
    receivers = [
        v for v in onedeg if deg[list(topology.edge[v].keys())[0]] < 4.5
    ]

    # we set router stacks because some strategies will fail if no stacks
    # are deployed
    routers = [
        v for v in topology.nodes() if v not in caches + sources + receivers
    ]

    # set weights and delays on all links
    fnss.set_weights_constant(topology, 1.0)
    fnss.set_delays_constant(topology, internal_link_delay, 'ms')

    # randomly allocate contents to sources
    contents = dict([(v, []) for v in sources])
    for c in range(1, n_contents + 1):
        s = choice(sources)
        contents[s].append(c)

    for v in sources:
        fnss.add_stack(topology, v, 'source', {'contents': contents[v]})
    for v in receivers:
        fnss.add_stack(topology, v, 'receiver', {})
    for v in routers:
        fnss.add_stack(topology, v, 'router', {})

    # label links as internal or external
    for u, v in topology.edges():
        if u in sources or v in sources:
            topology.edge[u][v]['type'] = 'external'
            # this prevents sources to be used to route traffic
            fnss.set_weights_constant(topology, 1000.0, [(u, v)])
            fnss.set_delays_constant(topology, external_link_delay, 'ms',
                                     [(u, v)])
        else:
            topology.edge[u][v]['type'] = 'internal'

    for nc in net_cache:
        size = (float(nc) * n_contents) / len(caches)  # size of a single cache
        C = str(nc)
        for v in caches:
            fnss.add_stack(topology, v, 'cache', {'size': size})
        fnss.write_topology(
            topology,
            path.join(scenarios_dir,
                      topo_prefix + 'T=%s@C=%s' % (T, C) + '.xml'))
        print('[WROTE TOPOLOGY] T: %s, C: %s' % (T, C))

    for a in alpha:
        event_schedule = gen_req_schedule(receivers, rate, warmup, duration,
                                          n_contents, a)
        fnss.write_event_schedule(
            event_schedule,
            path.join(scenarios_dir,
                      es_prefix + 'T=%s@A=%s' % (T, str(a)) + '.xml'))
        print('[WROTE SCHEDULE] T: %s, Alpha: %s, Events: %d' %
              (T, str(a), len(event_schedule)))
示例#56
0
def graph_clus(df, DIST_THRESH=DIST_THRESH):
    topics = []
    node_lens = []
    all_dict = {}
    counter = 0
    G = nx.Graph()
    for each in df["Topics"]:
        counter += 1
        topics_here = each.split("\n")
        topics.extend(topics_here)

        all_dict[str(counter)] = topics_here
        node_lens.append(len(topics_here))
        G.add_node(counter)

    topics = set(topics)

    node_list = range(1, counter + 1)

    # for i in range(len(node_list)):
    # 	for j in range(len(node_list)):
    # 		node_list[i].append(-1)

    tot_count = len(node_list)

    for i in range(tot_count):
        for j in range(i + 1, tot_count):
            dist_here = part1.jaccard_coef(all_dict[str(i + 1)],
                                           all_dict[str(j + 1)])
            # print(dist_here)
            if (dist_here > DIST_THRESH):
                G.add_edge(i + 1, j + 1)

    def second_elem(a):
        return a[1]

    # pu.db
    iter_ = 1
    print()
    while (1):
        clusters = list(nx.connected_component_subgraphs(G))
        # pu.db
        num_clusters = len(clusters)

        print("iter_no: " + str(iter_) + ", clusters: " + str(num_clusters),
              end="\r")
        iter_ += 1
        if num_clusters >= 9:
            break

        centralities = list(nx.edge_betweenness_centrality(G).items())
        # pu.db
        centralities.sort(key=second_elem, reverse=True)
        req_edges = centralities[0][0]

        # print(req_edges)
        G.remove_edge(req_edges[0], req_edges[1])
    print()
    nx.draw(G)
    plt.show("Clusters map")

    clusters = list(nx.connected_component_subgraphs(G))
    all_nodes = []
    for each in clusters:
        all_nodes.append(list(each.nodes))
    # pu.db
    return all_nodes
示例#57
0
def draw_colocalization(G,
                        seed_nodes_1,
                        seed_nodes_2,
                        edge_cmap=plt.cm.autumn_r,
                        export_file='colocalization.json',
                        export_network=False,
                        highlight_nodes=None,
                        k=None,
                        largest_connected_component=False,
                        node_cmap=plt.cm.autumn_r,
                        node_size=10,
                        num_nodes=None,
                        physics_enabled=False,
                        Wprime=None,
                        **kwargs):
    '''
    Implements and displays the network propagation for a given graph and two
    sets of seed nodes. Additional kwargs are passed to visJS_module.

    Inputs:
        - G: a networkX graph
        - seed_nodes_1: first set of nodes on which to initialize the simulation
        - seed_nodes_2: second set of nodes on which to initialize the simulation
        - edge_cmap: matplotlib colormap for edges, optional, default: matplotlib.cm.autumn_r
        - export_file: JSON file to export graph data, default: 'colocalization.json'
        - export_network: export network to Cytoscape, default: False
        - highlight_nodes: list of nodes to place borders around, default: None
        - k: float, optional, optimal distance between nodes for nx.spring_layout(), default: None
        - largest_connected_component: boolean, optional, whether or not to display largest_connected_component,
                                       default: False
        - node_cmap: matplotlib colormap for nodes, optional, default: matplotlib.cm.autumn_r
        - node_size: size of nodes, default: 10
        - num_nodes: the number of the hottest nodes to graph, default: None (all nodes will be graphed)
        - physics_enabled: enable physics simulation, default: False
        - Wprime:  Normalized adjacency matrix (from normalized_adj_matrix)

    Returns:
        - VisJS html network plot (iframe) of the colocalization.
    '''

    # check for invalid nodes in seed_nodes
    invalid_nodes = [(node, 'seed_nodes_1') for node in seed_nodes_1
                     if node not in G.nodes()]
    invalid_nodes.extend([(node, 'seed_nodes_2') for node in seed_nodes_2
                          if node not in G.nodes()])
    for node in invalid_nodes:
        print('Node {} in {} not in graph'.format(node[0], node[1]))
    if invalid_nodes:
        return

    # perform the colocalization
    if Wprime is None:
        Wprime = normalized_adj_matrix(G)
    prop_graph_1 = network_propagation(G, Wprime, seed_nodes_1).to_dict()
    prop_graph_2 = network_propagation(G, Wprime, seed_nodes_2).to_dict()
    prop_graph = {
        node: (prop_graph_1[node] * prop_graph_2[node])
        for node in prop_graph_1
    }
    nx.set_node_attributes(G, name='node_heat', values=prop_graph)

    # find top num_nodes hottest nodes and connected component if requested
    G = set_num_nodes(G, num_nodes)
    if largest_connected_component:
        G = max(nx.connected_component_subgraphs(G), key=len)
    nodes = list(G.nodes())
    edges = list(G.edges())

    # check for empty nodes and edges after getting subgraph of G
    if not nodes:
        print('There are no nodes in the graph. Try increasing num_nodes.')
        return
    if not edges:
        print('There are no edges in the graph. Try increasing num_nodes.')
        return

    # set position of each node
    if k is None:
        pos = nx.spring_layout(G)
    else:
        pos = nx.spring_layout(G, k=k)

    xpos, ypos = zip(*pos.values())
    nx.set_node_attributes(G,
                           name='xpos',
                           values=dict(
                               zip(pos.keys(), [x * 1000 for x in xpos])))
    nx.set_node_attributes(G,
                           name='ypos',
                           values=dict(
                               zip(pos.keys(), [y * 1000 for y in ypos])))

    # set the border width of nodes
    if 'node_border_width' not in kwargs.keys():
        kwargs['node_border_width'] = 2

    border_width = {}
    for n in nodes:
        if n in seed_nodes_1 or n in seed_nodes_2:
            border_width[n] = kwargs['node_border_width']
        elif highlight_nodes is not None and n in highlight_nodes:
            border_width[n] = kwargs['node_border_width']
        else:
            border_width[n] = 0

    nx.set_node_attributes(G, name='nodeOutline', values=border_width)

    # set the shape of each node
    nodes_shape = []
    for node in G.nodes():
        if node in seed_nodes_1:
            nodes_shape.append('triangle')
        elif node in seed_nodes_2:
            nodes_shape.append('square')
        else:
            nodes_shape.append('dot')
    node_to_shape = dict(zip(G.nodes(), nodes_shape))
    nx.set_node_attributes(G, name='nodeShape', values=node_to_shape)

    # add a field for node labels
    if highlight_nodes:
        node_labels = {}
        for node in nodes:
            if node in seed_nodes_1 or n in seed_nodes_2:
                node_labels[node] = str(node)
            elif node in highlight_nodes:
                node_labels[node] = str(node)
            else:
                node_labels[node] = ''
    else:
        node_labels = {n: str(n) for n in nodes}

    nx.set_node_attributes(G, name='nodeLabel', values=node_labels)

    # set the title of each node
    node_titles = [
        str(node[0]) + '<br/>heat = ' + str(round(node[1]['node_heat'], 10))
        for node in G.nodes(data=True)
    ]
    node_titles = dict(zip(nodes, node_titles))
    nx.set_node_attributes(G, name='nodeTitle', values=node_titles)

    # set the color of each node
    node_to_color = visJS_module.return_node_to_color(
        G,
        field_to_map='node_heat',
        cmap=node_cmap,
        color_vals_transform='log')

    # set heat value of edge based off hottest connecting node's value
    node_attr = nx.get_node_attributes(G, 'node_heat')
    edge_weights = {}
    for e in edges:
        if node_attr[e[0]] > node_attr[e[1]]:
            edge_weights[e] = node_attr[e[0]]
        else:
            edge_weights[e] = node_attr[e[1]]

    nx.set_edge_attributes(G, name='edge_weight', values=edge_weights)

    # set the color of each edge
    edge_to_color = visJS_module.return_edge_to_color(
        G,
        field_to_map='edge_weight',
        cmap=edge_cmap,
        color_vals_transform='log')

    # create the nodes_dict with all relevant fields
    nodes_dict = [{
        'id': str(n),
        'border_width': border_width[n],
        'degree': G.degree(n),
        'color': node_to_color[n],
        'node_label': node_labels[n],
        'node_size': node_size,
        'node_shape': node_to_shape[n],
        'title': node_titles[n],
        'x': np.float64(pos[n][0]).item() * 1000,
        'y': np.float64(pos[n][1]).item() * 1000
    } for n in nodes]

    # map nodes to indices for source/target in edges
    node_map = dict(zip(nodes, range(len(nodes))))

    # create the edges_dict with all relevant fields
    edges_dict = [{
        'source': node_map[edges[i][0]],
        'target': node_map[edges[i][1]],
        'color': edge_to_color[edges[i]]
    } for i in range(len(edges))]

    # set node_size_multiplier to increase node size as graph gets smaller
    if 'node_size_multiplier' not in kwargs.keys():
        if len(nodes) > 500:
            kwargs['node_size_multiplier'] = 1
        elif len(nodes) > 200:
            kwargs['node_size_multiplier'] = 3
        else:
            kwargs['node_size_multiplier'] = 5

    kwargs['physics_enabled'] = physics_enabled

    # if node hovering color not set, set default to black
    if 'node_color_hover_background' not in kwargs.keys():
        kwargs['node_color_hover_background'] = 'black'

    # node size determined by size in nodes_dict, not by id
    if 'node_size_field' not in kwargs.keys():
        kwargs['node_size_field'] = 'node_size'

    # node label determined by value in nodes_dict
    if 'node_label_field' not in kwargs.keys():
        kwargs['node_label_field'] = 'node_label'

    # export the network to JSON for Cytoscape
    if export_network:
        node_colors = map_node_to_color(G, 'node_heat', True)
        nx.set_node_attributes(G, name='nodeColor', values=node_colors)
        edge_colors = map_edge_to_color(G, 'edge_weight', True)
        nx.set_edge_attributes(G, name='edgeColor', values=edge_colors)
        visJS_module.export_to_cytoscape(G=G, export_file=export_file)

    return visJS_module.visjs_network(nodes_dict, edges_dict, **kwargs)
def detect_communities(g=None, comm_opt=None):
    maybe_print(" Detecting communities.", 2, 'i')
    ENABLE_DETECTION = False
    ALGORITHM = 'fluid'
    graph = g
    if not graph:
        maybe_print("   Can't detect community because the graph is undefined (value is None).\n "
                    "      Trying to load from tmp/pruned_graph.gpickle",1,'E')
        try:
            graph = nx.read_gpickle("tmp/pruned_graph.gpickle")
        except Exception:
            raise RuntimeError("Unable to detect communities. Invalid input graph.")

    if not comm_opt:
        raise ValueError("Invalid community detection options.")
    else:
        ENABLE_DETECTION = comm_opt['enable_community_detection'] if 'enable_community_detection' in comm_opt else False
        ALGORITHM = comm_opt['method']['algorithm'] if 'algorithm' in comm_opt else 'fluid_communities'
        LABEL_DETECTION_METHOD = comm_opt['community_label_inference']['method'] \
            if 'community_label_inference' in comm_opt and 'method' in comm_opt['community_label_inference']\
            else 'distributed_semantic'

    # Convert directed graph to undirected graph
    undir_graph = graph.to_undirected()

    if not undir_graph:
        raise ValueError("Unable to perform community detection! Perhaps due to the malformed graph.")
    if ENABLE_DETECTION:
        # Load model for  inferring cluster name using Glove
        glove_model = functions.glove_model
        try:
            if not glove_model:
                GLOVE_MODEL_FILE = config.uni_options['unify_semantic_similarity']['glove_model_file']
                maybe_print("   + Glove model is undefined. Trying to load from " + GLOVE_MODEL_FILE, 2, "i")
                glove_model = Glove.load_stanford(GLOVE_MODEL_FILE)
                maybe_print("   + Model loading completed :)", 2)
        except Exception as inst:
            maybe_print("   + Error while detecting group names. Check whether the Glove model was correctly loaded.", 2,
                        "E")
            print(inst)
        # Run algorithm
        try:
            if ALGORITHM == "fluid_communities":
                # get the largest messy graph
                # Get number of communities to be detected
                n_com = comm_opt['method']['params']['n_communities'] \
                    if 'n_communities' in comm_opt['method']['params'] else 4
                enable_pagerank = comm_opt['method']['params']['enable_pagerank_initialization'] \
                    if 'enable_pagerank_initialization' in comm_opt['method']['params'] else 4

                gc = max(nx.connected_component_subgraphs(undir_graph), key=len)
                # list of list. Each sublist contain ID of nodes in the same community
                communities = list(asyn_fluidc(gc, n_com,enable_pr=enable_pagerank))
                maybe_print("Detected communities: {0}".format(communities,'i'))
                com_index = -1
                for com in communities:
                    com_index += 1
                    # SVM One class classifier for outlier detection.
                    clf = OneClassSVM(nu=0.90 * outliers_fraction + 0.01, kernel="poly", gamma=0.03, degree=3)
                    #####################
                    # How this work? the program compute weight sum over the vector of all member of the communities who
                    # DO EXIST in the glove vector space. The scale factor is the ratio between the node's frequency
                    # (under 'weight' attribute) and the sum of weights of all keywords that DO EXIST in the vector spac
                    # -e. Those who many not exist will be disregarded.
                    # It first extract vector representation of each member of the community. Those
                    # keywords whose are successfully extracted (exist in glove vector space) has NonZero vector. Then
                    # weights are computed for the original keywords of these NonZero vectors.
                    # Suggest a label for the community
                    comm_labels = [graph.node[node_id]['label'] for node_id in com]
                    comm_labels_array = np.array(comm_labels)
                    # Now run abstraction by different method
                    suggested_labels = None
                    if LABEL_DETECTION_METHOD == 'distributed_semantic':
                        WINDOW = comm_opt['community_label_inference']['params']['window'] \
                            if 'window' in comm_opt['community_label_inference']['params'] else 3
                        V_WEIGHTS = comm_opt['community_label_inference']['params']['weight_ls'] \
                            if 'weight_ls' in comm_opt['community_label_inference']['params'] else 3
                        COMPOSITION_METHOD = comm_opt['community_label_inference']['params']['composition_method'] \
                            if 'composition_method' in comm_opt['community_label_inference']['params'] else 3
                        words_matrix = extract_vector_from_text_list(comm_labels,
                                                                     model=glove_model,
                                                                     window=WINDOW,
                                                                     vector_weights=V_WEIGHTS)
                        # get indices for rows whose is zero rows
                        zeros_indices = np.where(~words_matrix.any(axis=1))[0]
                        maybe_print(' --> Community ' + str(com_index) + ' has ' + str(len(zeros_indices))
                                    + " zero key(s) out of "+ str(len(com)),2,'i')
                        # remove zero rows from words_matrix
                        words_matrix = np.delete(words_matrix, zeros_indices, axis=0)
                        # remaining labels
                        comm_labels_array = np.delete(comm_labels_array,zeros_indices)
                        maybe_print(' --> Remaining labels: {0}'.format(', '.join(comm_labels_array)))

                        # get all the weight in the community, then convert to float by multiply 1.0
                        # Compute vector weight according to composition method
                        vector_weight = None
                        if COMPOSITION_METHOD == 'weighted_average':
                            vector_weight = np.array([graph.node[n]['weight'] for n in com]) * 1.0
                            vector_weight = np.delete(vector_weight, zeros_indices, axis=0)  # remove zero rows
                            # Compute weights -> this is a kind of weighted sum
                            vector_weight = vector_weight/np.sum(vector_weight)  # compute scale/co-efficient, whatever :D
                            vector_weight = vector_weight.reshape((len(vector_weight), 1))  # Transpose to column vector
                        elif COMPOSITION_METHOD == 'average':
                            n_row = len(com) - len(zeros_indices)
                            vector_weight = np.full((n_row,1),1.0/n_row,dtype=np.float)
                        elif COMPOSITION_METHOD == 'vec_sum':
                            vector_weight = np.ones((len(com) - len(zeros_indices), 1))
                        else:
                            raise ValueError('Invalid vector composition method')
                        # print words_matrix.shape, vector_weight.shape
                        assert words_matrix.shape[0] == vector_weight.shape[0], \
                            'Mismatch size of matrix for community {0}  with {1} members and its weight matrix.\n'\
                            .format(com_index-1, len(com))
                        # Multiple matrices and the sum te vector to be the representative vector for the community
                        # composition_matrix = np.multiply(words_matrix,vector_weight)
                        # Remove outliers
                        # clf.fit(X=composition_matrix)  # fit the model
                        y_pred = None
                        print words_matrix.shape, vector_weight.flatten().shape
                        if len(comm_labels) < 15:
                            maybe_print("Community {0} has less than 10 members, outliner removal skipped!".format(com))
                            y_pred = np.ones(words_matrix.shape[0])
                        else:
                            clf.fit(X=words_matrix,sample_weight=vector_weight.flatten())  # fit the model
                            # predict with the model. The outcome is an array, each element is the predicted value of
                            # the word/row. It can be 1 (inlier) or -1 (outlier)
                            # y_pred = clf.predict(composition_matrix)
                            y_pred = clf.predict(words_matrix)
                            print y_pred

                        # Weighted AVERAGE composition
                        composition_matrix = np.multiply(words_matrix, vector_weight)
                        # Now filter inliner only
                        filtered_composition_vector = composition_matrix[np.where(y_pred == 1)]
                        # filtered_composition_vector = words_matrix[np.where(y_pred == 1)]
                        # Remove predicted outlier
                        comm_labels_array = np.delete(comm_labels_array, np.where(y_pred == -1))
                        maybe_print('  --> Outlier removal discarded {0} words. Remaining words: {1}'
                                    .format(len(np.where(y_pred == -1)[0]), str(comm_labels_array)))
                        # Sum the matrix by row to form one vector
                        composition_vector = np.sum(filtered_composition_vector, axis=0)
                        # print composition_vector
                        # Dig to vector space of Glove to get the label
                        dst = (np.dot(glove_model.word_vectors, composition_vector)
                               / np.linalg.norm(glove_model.word_vectors, axis=1)
                               / np.linalg.norm(composition_vector))
                        word_ids = np.argsort(-dst)
                        # Get 2 most similar words @@@@@
                        raw_suggested_labels = [glove_model.inverse_dictionary[x] for x in word_ids[:50]
                                                if x in glove_model.inverse_dictionary]
                        suggested_labels = []
                        # Filter result by POS
                        for w in raw_suggested_labels:
                            if len(w)>2:
                                related_pos = set([syn.pos() for syn in wn.synsets(w)])
                                if related_pos and len(set([u'v',u'a',u's',u'r']) & related_pos) == 0:  # Filter: exclude some pos
                                    suggested_labels.append(w)
                        # Get 3 most frequent word
                    freqs = [w for w,_ in sorted([(g.node[n]['label'],g.node[n]['weight']) for n in com],
                                                 key=lambda e: int(e[1]),reverse=True)]
                    # suggested_labels = glove_model.most_similar_paragraph(comm_labels)
                    if len(suggested_labels) > 5:
                        suggested_labels = suggested_labels[:5]
                    '''
                    # Apply DBPedia Labeler
                    # top10 =[subword for word in freqs[:5] for subword in word.split('_') if en.is_noun(subword)] + freqs[:5]
                    top10 = freqs[:10]
                    print "---> ",top10
                    # DB_labels = DbpediaLabeller.DBPprocess(top10)
                    DB_labels = DbpediaLabeller.DBPprocess(top10)
                    # print 'ZZZZZZZZZzzzzz',comm_labels_array
                    # DB_labels = DbpediaLabeller.DBPprocess(comm_labels_array)
                    print DB_labels
                    if len(DB_labels) >5:
                        DB_labels = DB_labels[:5]
                    for node_id in com:  # sample_community_names[com_index]
                        graph.node[node_id]['cluster_id'] = u'[{0}] Top: {1} \nV.Comp: {2} \nDbpedia: {3}'\
                                                                .format(sample_community_names[com_index],
                                                                        ', '.join(freqs[:5]),
                                                                        ' - '.join(suggested_labels).upper(),
                                                                        ' - '.join(DB_labels).upper())
                     '''
                    for node_id in com:  # sample_community_names[com_index]
                        graph.node[node_id]['cluster_id'] = u'[{0}] Top: {1} \nV.Comp: {2}' \
                            .format(sample_community_names[com_index],
                                    ', '.join(freqs[:5]),
                                    ' - '.join(suggested_labels).upper())
                return graph
        except Exception as inst:
            maybe_print(" Error while running algorithm {0} to detect communities. Error name: {1}. \n"
                        "Perhaps incorrect algorithm name of parameters. Community detection is skipped and community "
                        "label for all nodes is set to be \'unknown\'.".format(ALGORITHM,inst.message), 2, 'E')
            traceback.print_exc()
            return g
    else:
        return g
def main(graph_name):
  
    H = nx.read_gml(graph_name)
    

  #dir=graph_name.split("fr")[0]
   # dir=graph_name.split("mas")[0]

    name00=graph_name.split(".gml")[0]
    print type(name00)
    name00=name00+"_average_percent_weight_change_per_kshell_clinically_signif.dat"

 



    list_conn=[]
    for node in H.nodes():  # i remove self loops
        if node in H.neighbors(node):          
            if len(H.neighbors(node))>1:
                H.remove_edge(node,node)             
            else:
                H.remove_node(node)              
        try:
            list_conn.append(len(H.neighbors(node)))
        except:
            pass 


    max_connect=max(list_conn)



    for node in H.nodes():        
        if H.node[node]['weigh_ins'] <5: #Adherent filter
            H.remove_node(node)
           # print node, "is going down"





    G = nx.connected_component_subgraphs(H)[0] # Giant component 

   
    print "final size of the GC:",len(G.nodes())
    
  



    cum_size_set=float(len(G.nodes()))
    
   
    list_percent_weight_change_k_shell=[]
    for index in range (max_connect+1):     
         
        k_core=nx.algorithms.core.k_shell(G,k=index)
        if len (k_core)>0:

            num_users_set=cum_size_set


            num_users_clinically_signif=0.0     

           
           
            for node in k_core:           
                list_percent_weight_change_k_shell.append(float(G.node[node]['percentage_weight_change']))

                if int(index)==12:#inner core
                    G.node[node]['role']="inner_core"


                G.node[node]['kshell_index']=int(index)
                #print node, G.node[node]['kshell_index']
               

                cum_size_set-=1.0


                if G.node [node]['percentage_weight_change']<=-5.0:
                    num_users_clinically_signif+=1.0

               
            print "\n",index,len(k_core),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percent_weight_change_k_shell),numpy.mean(list_percent_weight_change_k_shell),numpy.std(list_percent_weight_change_k_shell)

            file0=open(name00, 'at')
            print >> file0,index,len(k_core),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percent_weight_change_k_shell),numpy.mean(list_percent_weight_change_k_shell),numpy.std(list_percent_weight_change_k_shell),
                                                                 
            print  >> file0,stats.shapiro(list_percent_weight_change_k_shell)
#w entre 0 y 1 (normal si cerca de 1), p menor que 0.05 para normalidad
            

            file0.close()
           

           

   # print "size main k-core:",len(nx.algorithms.core.k_shell(G))


    list_nodes_kindex=[]
    for index in range (max_connect+1):  
        list=[]
        for node in G.nodes():
            if  G.node[node]['kshell_index']==index:
                list.append(node)
        if len(list)>0:
            list_nodes_kindex.append(list)




    name1=graph_name.split(".gml")[0]
    name=name1+"_list_of_lists_kshells.dat"        
    file=open(name, 'wt')
    print >> file,list_nodes_kindex   
    file.close()
    #print list_nodes_kindex   
        

    nx.write_gml(G,name1+"_inner_core.gml")
示例#60
0
def topology_rocketfuel_latency(asn,
                                source_ratio=0.1,
                                ext_delay=EXTERNAL_LINK_DELAY,
                                **kwargs):
    """Parse a generic RocketFuel topology with annotated latencies
    To each node of the parsed topology it is attached an artificial receiver
    node. To the routers with highest degree it is also attached a source node.
    Parameters
    ----------
    asn : int
        AS number
    source_ratio : float
        Ratio between number of source nodes (artificially attached) and routers
    ext_delay : float
        Delay on external nodes
    """
    if source_ratio < 0 or source_ratio > 1:
        raise ValueError('source_ratio must be comprised between 0 and 1')
    f_topo = path.join(TOPOLOGY_RESOURCES_DIR, 'rocketfuel-latency', str(asn),
                       'latencies.intra')
    topology = fnss.parse_rocketfuel_isp_latency(f_topo).to_undirected()
    topology = list(nx.connected_component_subgraphs(topology))[0]
    # First mark all current links as inernal
    for u, v in topology.edges_iter():
        topology.edge[u][v]['type'] = 'internal'
    # Note: I don't need to filter out nodes with degree 1 cause they all have
    # a greater degree value but we compute degree to decide where to attach sources
    routers = topology.nodes()
    # Source attachment
    n_sources = int(source_ratio * len(routers))
    sources = ['src_%d' % i for i in range(n_sources)]
    deg = nx.degree(topology)

    # Attach sources based on their degree purely, but they may end up quite clustered
    routers = sorted(routers, key=lambda k: deg[k], reverse=True)
    for i in range(len(sources)):
        topology.add_edge(sources[i],
                          routers[i],
                          delay=ext_delay,
                          type='external')

    # Here let's try attach them via cluster
#     clusters = compute_clusters(topology, n_sources, distance=None, n_iter=1000)
#     source_attachments = [max(cluster, key=lambda k: deg[k]) for cluster in clusters]
#     for i in range(len(sources)):
#         topology.add_edge(sources[i], source_attachments[i], delay=ext_delay, type='external')

# attach artificial receiver nodes to ICR candidates
    receivers = ['rec_%d' % i for i in range(len(routers))]
    for i in range(len(routers)):
        topology.add_edge(receivers[i], routers[i], delay=0, type='internal')
    # Set weights to latency values
    for u, v in topology.edges_iter():
        topology.edge[u][v]['weight'] = topology.edge[u][v]['delay']
    # Deploy stacks on nodes
    topology.graph['icr_candidates'] = set(routers)
    for v in sources:
        fnss.add_stack(topology, v, 'source')
    for v in receivers:
        fnss.add_stack(topology, v, 'receiver')
    for v in routers:
        fnss.add_stack(topology, v, 'router')
    return IcnTopology(topology)