Пример #1
0
def allRW(G,p):
	if p > 1:
		p = 1
	n = int(p*nx.number_of_nodes(G))
	G1 = nx.Graph()
	while nx.number_of_nodes(G1) < n:
		count = 0
		start_id = random.randint(0,nx.number_of_nodes(G)-1)
		s_node = nx.nodes(G)[start_id]
		now_node = s_node
		while count < 5*n:
            #print(now_node)
			neighber_list = G.neighbors(now_node)
		        for node in neighber_list:
                                G1.add_edge(node,now_node)
                        next_id = random.randint(0,len(neighber_list)-1)
			next_node = neighber_list[next_id]
		#	G1.add_edge(now_node,next_node)
			count += 1

			now_node = next_node
			if nx.number_of_nodes(G1) >= n:
				break
        #print(now_node)
        #print(G.neighbors(now_node))

	return G1
Пример #2
0
def RWall2(G,p):
    check = False
    while not check:
        count = 0
        G1 = nx.Graph()
        n = int(p*nx.number_of_nodes(G))
        start_id = random.randint(0,nx.number_of_nodes(G)-1)
        s_node = nx.nodes(G)[start_id]
        G1.add_node(s_node)

        now_node = s_node
        while True:
            neighbor_list = G.neighbors(now_node)
            next_id = random.randint(0,len(neighbor_list)-1)
            next_node = neighbor_list[next_id]

            G1.add_node(next_node)
            count += 1

            if random.random() < 0:
                next_id = random.randint(0,nx.number_of_nodes(G1)-1)
                next_node = G1.nodes()[next_id]
            now_node = next_node
            if nx.number_of_nodes(G1) >= n:
                check = True
                break
            if count > 50*n:
                break

    return NodeConnect(G,G1)
Пример #3
0
def BFS(G,p):
	if p > 1:
		p = 1
	n = int(p*nx.number_of_nodes(G))
       # print(n)
        G1 = nx.Graph()
        check = False
        while nx.number_of_nodes(G1) < n:
                process = []
                start_id = random.randint(0,nx.number_of_nodes(G)-1)
                s_node = nx.nodes(G)[start_id]
                process.append(s_node)
               ######### print(process)
                while True:
                        now_node = process[0]
                        neighbor = G.neighbors(now_node)
                        for next_node in neighbor:
                                if next_node not in G1.nodes():
                                        G1.add_node(now_node)
                                        G1.add_node(next_node)
                                        process.append(next_node)
                                else:
                                        G1.add_node(now_node)
                                        G1.add_node(next_node)

                        process.remove(now_node)
                        if nx.number_of_nodes(G1)>=n:
                                check = True
                                break
                        if len(process) == 0:
                                break
                if check:
                        break
        return NodeConnect(G,G1)
Пример #4
0
def get_global_efficiency(filename):
    import networkx as nx

    threshold = 0
    f = open(filename[:-4] + "_global_efficiency.dat", "w")
    g = open(filename[:-4] + "_node_global_efficiency.dat", "w")
    for i in range(0, 101):
        threshold = float(i) / 100
        G = get_threshold_matrix(filename, threshold)
        global_efficiency = 0.0
        for node_i in G:
            sum_inverse_dist = 0.0
            for node_j in G:
                if node_i != node_j:
                    if nx.has_path(G, node_i, node_j) == True:
                        sum_inverse_dist += 1.0 / nx.shortest_path_length(G, node_i, node_j)
            g.write("%d\t%f\t%f\n" % ((node_i + 1), threshold, (sum_inverse_dist / nx.number_of_nodes(G))))
            global_efficiency += sum_inverse_dist / (nx.number_of_nodes(G) - 1.0)
        g.write("\n")
        global_efficiency = global_efficiency / nx.number_of_nodes(G)
        f.write("%f\t%f\n" % (threshold, global_efficiency))
        print "global efficiency for threshold %f: %f " % (threshold, global_efficiency)

    f.close()
    g.close()
def eccentricityAttributes(graph):
	return_values = []
	#Average effective eccentricity
	eccVals = []
	e = 0
	for n in graph.nodes():
		try: 
			eccVals.append(nx.eccentricity(graph, v=n))	
		except nx.NetworkXError:
			eccVals.append(0)
	eccSum = 0
	center_nodes = 0
	phobic = 0
	diameter = max(eccVals)
	radius = min(eccVals)
	for i in range(len(eccVals)):
		if eccVals[i] ==  radius:
			center_nodes += 1
			if graph.node[i]['hydro'] == 'phobic':
				phobic += 1
		eccSum += eccVals[i]
	return_values.append(eccSum / float(nx.number_of_nodes(graph)))	
	#Effective diameter
	return_values.append(diameter)
	#Effective radius
	return_values.append(radius)
	#Percentage central nodes
	return_values.append(center_nodes / float(nx.number_of_nodes(graph)))
	#Percentage central nodes that are hydrophobic
	return_values.append(phobic / float(center_nodes))
	return return_values
def fast_search(G, F, k, n, ind):
	start = numpy.ones(networkx.number_of_nodes(G))
	C = laplacian_complete(networkx.number_of_nodes(G))
	A = weighted_adjacency_complete(G, F, ind)
	CAC = fast_cac(G, F, ind)

	return simple_spectral_cut(CAC, start, F, G, 1., k, n, ind)
Пример #7
0
def algorithm(w1,w2,w3,w4,G1,G2,G3,G4):
	try:
		cc=np.array([nx.average_clustering(G1,weight='weight'),nx.average_clustering(G2,weight='weight'),nx.average_clustering(G3,weight='weight'),nx.average_clustering(G4,weight='weight')])
		spl=np.array([nx.average_shortest_path_length(G1,weight='weight'),nx.average_shortest_path_length(G2,weight='weight'),nx.average_shortest_path_length(G3,weight='weight'),nx.average_shortest_path_length(G4,weight='weight')])
		nds=np.array([nx.number_of_nodes(G1),nx.number_of_nodes(G2),nx.number_of_nodes(G3),nx.number_of_nodes(G4)])
		edgs= np.array([nx.number_of_edges(G1),nx.number_of_edges(G2),nx.number_of_edges(G3),nx.number_of_edges(G4)])
		if valid(cc):
			cc=stats.zscore(cc)
		else:
			cc=np.array([.1,.1,.1,.1])
		cc= cc-min(cc)+.1
		if valid(spl):
			spl=stats.zscore(spl)
		else:
			spl=np.array([.1,.1,.1,.1])
		spl= spl-min(spl)+.1
		if valid(nds):
			nds=stats.zscore(nds)
		else:
			nds=np.array([.1,.1,.1,.1])
		nds = nds-min(nds)+.1
		if valid(edgs):
			edgs=stats.zscore(edgs)
		else:
			edgs=np.array([.1,.1,.1,.1])
		edgs=edgs-min(edgs)+.1
		r1=(w1*cc[0]+w2*spl[0]+w3*nds[0]+w4*edgs[0])*1000
		r2=(w1*cc[1]+w2*spl[1]+w3*nds[1]+w4*edgs[1])*1000
		r3=(w1*cc[2]+w2*spl[2]+w3*nds[2]+w4*edgs[2])*1000
		r4=(w1*cc[3]+w2*spl[3]+w3*nds[3]+w4*edgs[3])*1000
		d={'Player 1:': r1, 'Player 2:': r2,'Player 3:': r3, 'Player 4:': r4}
		rank = sorted(d.items(), key=lambda x: x[1], reverse=True)
		return ["USAU RANKINGS",str(rank[0][0])+ " " + str(int(rank[0][1])),str(rank[1][0])+" "+ str(int(rank[1][1])),str(rank[2][0])+" "+ str(int(rank[2][1])),str(rank[3][0])+" "+str(int(rank[3][1]))]
	except:
		return ["Unable to compute rankings!  Need data","Player 1","Player 2","Player 3","Player 4"]
def nc_recursive(node, G, ind):
	if networkx.number_of_nodes(G) < 3:
		n = Node(None)
		n.add_child(Node(ind[G.nodes()[0]]))
		n.add_child(Node(ind[G.nodes()[1]]))
		node.add_child(n)
	else:
		C =  normalized_cut(G)
#		print(C)
	
		(G1, G2) = get_subgraphs(G, C)

		if networkx.number_of_nodes(G1) > 1:
			l = Node(None)
			nc_recursive(l, G1, ind)
			node.add_child(l)
		else:
			l = Node(ind[G1.nodes()[0]])
			node.add_child(l)
	
#		print(C)
#		print("P1 = ")
#		print(P1)
#		print("P2 = ")
#		print(P2)

		if networkx.number_of_nodes(G2) > 1:
			r = Node(None)
			nc_recursive(r, G2, ind)
			node.add_child(r)
		else:
			r = Node(ind[G2.nodes()[0]])
			node.add_child(r)
Пример #9
0
def create_graph_of_agent(agent):
    # takes a string as input and outputs a Networkx graph object
    (nodes, edges) = parse(agent)
    G = nx.MultiDiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    active_nodes = [1]
    next_nodes = []
    # print nx.number_of_nodes(G)

    for i in range(nx.number_of_nodes(G)):
        
        active_nodes = list(set(active_nodes + next_nodes))
        next_nodes = []

        for node in active_nodes:          
            next_nodes += G.successors(node) #0 is predecessors, 1 is successors
            #print "next_nodes ",next_nodes
    #print active_nodes
    #active_nodes=list(set(sorted(active_nodes)))
    
    #print active_nodes
    #print nodes
    #print edges
    
    
    extra_nodes = sorted(list(set(range(1, nx.number_of_nodes(G) +1)) 
                       - set(active_nodes)), reverse=True)
    #print extra_nodes
    for i in extra_nodes:
        G.remove_node(i)
        #print nx.number_of_nodes(G)
    
    return G
Пример #10
0
def get_my_global_efficiency(filename) :
	threshold = 0
	f = open(filename[:-4]+'_global_efficiency.dat','w')
	g = open(filename[:-4]+'_node_global_efficiency.dat','w')
	print f
	print g
	f.write('threshold\tglob_effic\n')
	g.write('node\tthreshold\tnode_glob_effc\n')	
	for i in range(0,101):
		threshold = float(i)/100
		G = get_my_threshold_matrix(filename, threshold)
		global_efficiency = 0.
		for node_i in G :
			sum_inverse_dist = 0.
			for node_j in G :
				if node_i != node_j :
					if nx.has_path(G, node_i, node_j) == True :
						sum_inverse_dist += 1. / nx.shortest_path_length(G, node_i, node_j) 
			g.write('%d\t%f\t%f\n' % ((node_i+1), threshold, (sum_inverse_dist / nx.number_of_nodes(G)) )) ##?
			global_efficiency += sum_inverse_dist / (nx.number_of_nodes(G) -1.)
		g.write("\n")
		global_efficiency = global_efficiency / nx.number_of_nodes(G)
		f.write("%f\t%f\n" % (threshold, global_efficiency))
	f.close()
	g.close()
Пример #11
0
def get_single_network_measures(G, thr):
	f = open(out_prfx + 'single_network_measures.dat', 'a')
	N = nx.number_of_nodes(G)
	L = nx.number_of_edges(G)
	D = nx.density(G)
	cc = nx.average_clustering(G)
	compon = nx.number_connected_components(G)
	Con_sub = nx.connected_component_subgraphs(G)

	values = []
	values_2 =[]

	for node in G:
		values.append(G.degree(node))
	ave_deg = float(sum(values)) / float(N)
	
	f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon))
	#1. threshold, 2. edges, 3. density 4.clustering coefficient
	#5. average degree, 6. number of connected components
	
	for i in range(len(Con_sub)):
		if nx.number_of_nodes(Con_sub[i])>1:
			values_2.append(nx.average_shortest_path_length(Con_sub[i]))

	if len(values_2)==0:
		f.write("0.\n")
	else:
		f.write("%f\n" % (sum(values_2)/len(values_2)))
	#7. shortest pathway
	f.close()
def failure(compagnia):
    adiacenzaFinal = numpy.genfromtxt(("/home/protoss/Documenti/Siscomp_datas/data/AdiacenzaEuclidea_{0}.csv".format(compagnia)),delimiter=',',dtype='int')
    grafoFinal = networkx.Graph(adiacenzaFinal)

    graphSize = networkx.number_of_nodes(grafoFinal)
    steps = graphSize
    passo = 1
    i = 0
    ascisse.append(i)
    aziendaFinal.append(compagnia)
    diametro.append(2)
    relSizeGC.append(1)
    
    while (networkx.number_of_nodes(grafoFinal) > passo):
        gradiFinal = pandas.DataFrame(grafoFinal.degree().items(), columns=['index', 'grado'])
        randomante = gradiFinal['index'].values
        randomante = numpy.random.permutation(randomante)

        grafoFinal.remove_node(randomante[0])
    
        giantCluster = max(networkx.connected_component_subgraphs(grafoFinal), key = len)
                            
        i += 100/steps
        ascisse.append(i)
        aziendaFinal.append(compagnia)

        graphSize = networkx.number_of_nodes(grafoFinal)
        diametro.append(networkx.diameter(giantCluster, e=None))
        relSizeGC.append((networkx.number_of_nodes(giantCluster))/(float(graphSize)))
    def bfs(self, Asmall, start, path=True):
        queue = [start]
        chain = []
        extra_nodes_search = np.ones((nx.number_of_nodes(self.graph),), dtype=np.int)
        node_list = -1 * np.ones((nx.number_of_nodes(self.graph),), dtype=np.int)
        node_list[start] = start

        while queue:
            neighbors = np.array(np.nonzero(Asmall[queue[0], :])[1])  # get neighbors as numpy array
            for i in range(0, np.size(neighbors)):
                if node_list[neighbors[i]] == -1:  # Simon's computer dies here
                    node_list[neighbors[i]] = queue[0]
                    queue.append(neighbors[i])
            qsize = len(queue)
            furthest_node = queue[qsize - 1]
            queue = queue[1:]
        if path:
            curr = furthest_node
            while curr != start:
                chain.append(curr)
                extra_nodes_search[curr] = 0
                curr = node_list[curr]
            chain.append(start)
            chain = list(reversed(chain))
            extra_nodes_search[start] = 0

        return furthest_node, chain
def main():
    timeStart = time.time()
    if rank == 0:
        proc = random.sample(range(1, nx.number_of_nodes(G)), size)
        for i in range(1, size):
            comm.send(proc[i], dest=i)
        starting_node = proc[0]
        if check_neighbours(starting_node, None):
            print "Graph is hamiltonian! (process", rank, "starting node", starting_node,")"
        else:
            print "Graph is not hamiltonian (process", rank, "starting node", starting_node,")"
        timeEnd = time.time() - timeStart
        print timeEnd
        comm.Abort()
    elif rank < nx.number_of_nodes(G):
        starting_node = comm.recv(source=0)
        if check_neighbours(starting_node, None):
            print "Graph is hamiltonian! (process", rank, "starting node", starting_node,")"
        else:
            print "Graph is not hamiltonian (process", rank, "starting node", starting_node,")"
        timeEnd = time.time() - timeStart
        print timeEnd
        comm.Abort()
    else:
        MPI.Finalize()        
Пример #15
0
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp):
    """
    Simplify the undirected graph and then update the 3 undirected weight properties.
    :param read_g: is the graph pickle to read
    :param write_g: is the updated graph pickle to write
    :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight
    :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight)
    :return: None
    """
    G=nx.read_gpickle(read_g)
    print 'number of original nodes: ', nx.number_of_nodes(G)
    print 'number of original edges: ', nx.number_of_edges(G)

    for (u,v,w) in G.edges(data='weight'):
        if w < minEdgeWeight:
            G.remove_edge(u,v)

    for n in G.nodes():
        if G.degree(n)<minNodeDegree:
            G.remove_node(n)

    print 'number of new nodes: ', nx.number_of_nodes(G)
    print 'number of new edges: ', nx.number_of_edges(G)

    for (a, b, w) in G.edges_iter(data='weight'):
        unweight_allocation(G, a, b, w,Lp,Sp)

    print 'update weight ok'
    nx.write_gpickle(G, write_g)

    return
Пример #16
0
def get_degree_distr(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_degreedistr.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    check_sum = 0.
    degree_hist = {}
    for node in G:
      if G.degree(node) not in degree_hist:
	degree_hist[G.degree(node)] = 1
      else:
	degree_hist[G.degree(node)] += 1
    degrees = range(0, nx.number_of_nodes(G)+1, 1)
    keys = degree_hist.keys()
    keys.sort()
    for item in degrees:
      if item in keys:
        check_sum += float(degree_hist[item])/float(nx.number_of_nodes(G))
        f.write('%d\t%f\t%d\t%f\n' % (item, threshold, degree_hist[item], float(degree_hist[item])/float(nx.number_of_nodes(G))))
        #print item, degree_hist[item], float(degree_hist[item])/float(nx.number_of_nodes(G)))
      else:
        f.write('%d\t%f\t0\t0.\n' % (item, threshold))
    f.write("\n")
    print 'degree distribution for threshold: %f, check sum: %f' % (threshold, check_sum)
  f.close()
Пример #17
0
def attacco(compagnia):
    adiacenzaFinal = numpy.genfromtxt(
        ("/home/protoss/Documenti/Siscomp_datas/data/AdiacenzaEuclidea_{0}.csv".format(compagnia)),
        delimiter=",",
        dtype="int",
    )
    grafoFinal = networkx.Graph(adiacenzaFinal)

    graphSize = networkx.number_of_nodes(grafoFinal)
    steps = graphSize
    passo = 1
    i = 0
    ascisse.append(i)
    aziendaFinal.append(compagnia)
    diametro.append(2)
    relSizeGC.append(1)

    while networkx.number_of_nodes(grafoFinal) > passo:
        gradiFinal = pandas.DataFrame(grafoFinal.degree().items(), columns=["index", "grado"])
        gradiFinal.sort(["grado"], ascending=[False], inplace=True)
        sortedIDnode = gradiFinal["index"].values

        grafoFinal.remove_node(sortedIDnode[0])

        giantCluster = max(networkx.connected_component_subgraphs(grafoFinal), key=len)

        i += 100 / float(steps)
        ascisse.append(i)
        aziendaFinal.append(compagnia)

        newGraphSize = networkx.number_of_nodes(grafoFinal)
        #        diametro.append(networkx.diameter(giantCluster, e=None))
        relSizeGC.append((networkx.number_of_nodes(giantCluster)) / (float(newGraphSize)))
Пример #18
0
def get_my_degree_distribution(filename) :
	threshold = 0
	f = open(filename[:-4]+'_degree_distr.dat','w')
	print(f)
	for i in range(0,101) :
		threshold = float(i)/100
		G = get_my_threshold_matrix(filename,threshold)
		check_sum = 0
		degree_hist = {}
		for node in G :
			if G.degree(node) not in degree_hist :		
				degree_hist[G.degree(node)] = 1
			else :
				degree_hist[G.degree(node)] += 1
		keys = degree_hist.keys()
		keys.sort()
		degrees = range(0, nx.number_of_nodes(G)+1 , 1) #?
		for item in degrees :
			if item in keys :
				prob = float(degree_hist[item])/float(nx.number_of_nodes(G))
				check_sum += prob
				f.write('%d\t%f\t%d\t%f\n'%(item, threshold, degree_hist[item], prob))
			else :
				f.write('%d\t%f\t0\t0.\n' % (item, threshold))
    		f.write("\n")
    	print 'degree distr of threshold: %f, check sum: %f' % (threshold, check_sum)
	f.close()
Пример #19
0
def init():
    global projectname
    global version_aray
    global pos
    global x
    global y
    global size_array
    global numframes
    global sg
    for i in range(6):
        data_directory = projectname + "_history/" + projectname + version_array[i] + "/" + projectname
        [g, lines] = creategraph.readfile(data_directory)
        if i == 0:
            sg = creategraph.refine(g, 45)
            [pos, x, y] = creategraph.coordinate(sg)
            size = creategraph.point_sizes(sg, lines)
            zeros = np.array([0] * len(size))
            print 'len(size) = ', len(size)
            print 'zeros = ', zeros
            size_array.append(zeros)
            size_array.append(size)
        else:
            # create the graph induced by nodes from sg
            subg = nx.subgraph(g, nx.nodes(sg))
            print subg, sg
            if nx.number_of_nodes(subg) != nx.number_of_nodes(sg):
                print 'panic at 34' 
            else: #                            v  this seems to be a error, but not
                size = creategraph.point_sizes(sg, lines)
                size_array.append(size)


    x = np.array(x)
    y = np.array(y)
    size_array = np.array(size_array)
Пример #20
0
def run_main():
    file = str(sys.argv[1])
    f = open(file, 'r')
    print "\nReading inputfile:", file, "..."
    
    edgelist = []
    for line in f.readlines():
        edgelist.append((int(line.split()[0]), int(line.split()[1])))
    
    
    Directed_G = nx.DiGraph(edgelist)
    Undirected_G = Directed_G.to_undirected()
    #plt.figure(figsize=(8,8))
    #nx.draw(Directed_G,pos=nx.spring_layout(Directed_G))
    #plt.draw()
    #time.sleep(0.1)

    # compute other things
    print "Number of nodes involved in network:", nx.number_of_nodes(Undirected_G)
    print "Number of edges:", nx.number_of_edges(Undirected_G)
    print "Average degree:", nx.number_of_edges(Undirected_G) / float(nx.number_of_nodes(Undirected_G))
    t0 = time.clock()
    print "Average clustering coefficient:", compute_clustering_coefficient(Directed_G, Undirected_G)
    print "Took:", time.clock() - t0, "seconds"
    t1 = time.clock()
    print "Average path length:", average_shortest_path(Directed_G, Undirected_G)
    print "Took:", time.clock() - t1, "seconds"
    print "Total time:", time.clock() - t0, "seconds"
           
    report_final_stats()
    counter += 1
    second_counter += 1
Пример #21
0
def _draw(
        self,
        layout='spring',
        n_color='blue',
        n_size=15,
        n_alpha=0.5,
        e_alpha=0.1,
        e_arrows=False,
        scale_by_degree=False):
    """
    Draw topology using the draw() command from networkx.

    USAGE: topology.draw(layout = "spring", n_size = 15, scale_by_degree = False, n_color = 'blue', n_alpha = 0.5, e_alpha = 0.1, e_arrows=False)

    * layout: Network layout. Can be 'spring' or 'circular'.
    * n_size: The size of nodes. Becomes scaling factor when scale_by_degree=True.
    * scale_by_degree: When True, nodes will be sized proportional to their degree.
    * n_color: Node color.
    * n_alpha: Transparency of nodes. Takes value between 0 and 1.
    * e_elpha: Transparency of edges. Takes value between 0 and 1.
    * e_arrows: Plots arrows on the edges for directed graphs
    * scale_by_degree: When True, nodes will be sized proportional to their degree.
    """
    try:
        import networkx as nx
    except ImportError:
        raise ImportError('Could not import the networkx module.')
    try:
        import matplotlib.pyplot as pl
    except ImportError:
        raise ImportError('Could not improt the MatPlotLib module.')
    if self.number_of_vertices == 0 or self.number_of_vertices == 1:
        raise ValueError(
            'Cannot draw topology with one single vertex or less.')

    G = self.to_networkx()
    node_sizes = list(range(nx.number_of_nodes(G)))
    for i in range(nx.number_of_nodes(G)):
        if scale_by_degree:
            node_sizes[i] = nx.degree(G, i) * n_size
        else:
            node_sizes[i] = n_size

    if layout == "spring":
        pos = nx.spring_layout(self.to_networkx())
    if layout == "circular":
        pos = nx.circular_layout(self.to_networkx())

    pl.figure()
    nx.draw_networkx_edges(
        self.to_networkx(), pos, alpha=e_alpha, arrows=e_arrows)
    nx.draw_networkx_nodes(
        self.to_networkx(),
        pos,
        node_size=node_sizes,
        node_color=n_color,
        alpha=n_alpha)
    pl.axis('off')
    pl.show()
Пример #22
0
def test_strong_product_size():
    K5=nx.complete_graph(5)
    P5=nx.path_graph(5)
    K3 = nx.complete_graph(3)
    G=strong_product(P5,K3)
    assert_equal(nx.number_of_nodes(G),5*3)
    G=strong_product(K3,K5)
    assert_equal(nx.number_of_nodes(G),3*5)
Пример #23
0
def test_tensor_product_size():
    P5 = nx.path_graph(5)
    K3 = nx.complete_graph(3)
    K5 = nx.complete_graph(5)

    G = nx.tensor_product(P5, K3)
    assert_equal(nx.number_of_nodes(G), 5 * 3)
    G = nx.tensor_product(K3, K5)
    assert_equal(nx.number_of_nodes(G), 3 * 5)
    def find_largest_component(self):
	G = self.graph
	list_Graphs = nx.weakly_connected_component_subgraphs(G)
	max_component = list_Graphs[0]
	for g in list_Graphs:
    	    if nx.number_of_nodes(g) > nx.number_of_nodes(max_component):
                max_component = g

        return  max_component
Пример #25
0
def test_strong_product():
    null=nx.null_graph()
    empty1=nx.empty_graph(1)
    empty10=nx.empty_graph(10)
    K2=nx.complete_graph(2)
    K3=nx.complete_graph(3)
    K5=nx.complete_graph(5)
    K10=nx.complete_graph(10)
    P2=nx.path_graph(2)
    P3=nx.path_graph(3)
    P5=nx.path_graph(5)
    P10=nx.path_graph(10)
    # null graph
    G=strong_product(null,null)
    assert_true(nx.is_isomorphic(G,null))
    # null_graph X anything = null_graph and v.v.
    G=strong_product(null,empty10)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(null,K3)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(null,K10)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(null,P3)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(null,P10)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(empty10,null)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(K3,null)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(K10,null)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(P3,null)
    assert_true(nx.is_isomorphic(G,null))
    G=strong_product(P10,null)
    assert_true(nx.is_isomorphic(G,null))

    G=strong_product(P5,K3)
    assert_equal(nx.number_of_nodes(G),5*3)
    G=strong_product(K3,K5)
    assert_equal(nx.number_of_nodes(G),3*5)

    #No classic easily found classic results for strong product

    G = nx.erdos_renyi_graph(10,2/10.)
    H = nx.erdos_renyi_graph(10,2/10.)
    GH = strong_product(G,H)

    for (u_G,u_H) in GH.nodes_iter():
        for (v_G,v_H) in GH.nodes_iter():
            if (u_G==v_G and H.has_edge(u_H,v_H)) or \
               (u_H==v_H and G.has_edge(u_G,v_G)) or \
               (G.has_edge(u_G,v_G) and H.has_edge(u_H,v_H)):
                assert_true(GH.has_edge((u_G,u_H),(v_G,v_H)))
            else:
                assert_true(not GH.has_edge((u_G,u_H),(v_G,v_H)))
Пример #26
0
def validate_constituency_parse(tokenization):
    """
    Args:
      tokenization (concrete.structure.ttypes.Tokenization)

    Returns:
      bool: True if tokenization's constituency parse is valid, False otherwise
    """
    valid = True

    if tokenization.parse:
        total_constituents = len(tokenization.parse.constituentList)
        logging.debug(ilm(6, "tokenization '%s' has %d constituents" % (tokenization.uuid, total_constituents)))

        total_uuid_mismatches = 0
        constituent_id_set = set()
        constituent_parse_tree = nx.DiGraph()

        for constituent in tokenization.parse.constituentList:
            # Add nodes to parse tree
            constituent_parse_tree.add_node(constituent.id)

            if constituent.id not in constituent_id_set:
                constituent_id_set.add(constituent.id)
            else:
                valid = False
                logging.error(ilm(7, "constituent ID %d has already been used in this sentence's tokenization" % constituent.id))

            # Per the Concrete 'structure.thrift' file, tokenSequence may not be defined:
            #   "Typically, this field will only be defined for leaf constituents (i.e., constituents with no children)."
            if constituent.tokenSequence and constituent.tokenSequence.tokenizationId != tokenization.uuid:
                total_uuid_mismatches += 1

        if total_uuid_mismatches > 0:
            valid = False
            logging.error(ilm(6, "tokenization '%s' has UUID mismatch for %d/%d constituents" %
                              (tokenization.uuid, total_uuid_mismatches, total_constituents)))

        # Add edges to constituent parse tree
        for constituent in tokenization.parse.constituentList:
            if constituent.childList:
                for child_id in constituent.childList:
                    constituent_parse_tree.add_edge(constituent.id, child_id)

        # Check if constituent parse "tree" is actually a tree
        undirected_graph = constituent_parse_tree.to_undirected()
        if not nx.is_connected(undirected_graph):
            valid = False
            logging.error(ilm(6, "The constituent parse \"tree\" is not a fully connected graph - the graph has %d components" %
                len(nx.connected_components(undirected_graph))))
        if nx.number_of_nodes(constituent_parse_tree) != nx.number_of_edges(constituent_parse_tree) + 1:
            valid = False
            logging.error(ilm(6, "The constituent parse \"tree\" is not a tree.  |V| != |E|+1  (|V|=%d, |E|=%d)" %
                (nx.number_of_nodes(constituent_parse_tree), nx.number_of_edges(constituent_parse_tree))))

    return valid
Пример #27
0
def redirect(graph, component):
    current_nodes = graph.nodes()
    retained_nodes = component.nodes()
    out.write('\n' + 'There are %d nodes in retained_nodes' %len(retained_nodes))
    for entry in current_nodes:                        
        if not entry in retained_nodes:
            graph.remove_node(entry)
    out.write('\n' + 'There are %d nodes in component' %nx.number_of_nodes(component)) 
    out.write('\n' + 'There are %d nodes in graph' %nx.number_of_nodes(graph))
    return graph
Пример #28
0
def one_d_search(G, F, k, ind):
	"""
		Cut computation. Perform 1-D search for beta using golden search.
		Input:
			* G: graph
			* F: graph signal
			* k: max edges to be cut
			* n: number of chebyshev polynomials
			* ind: vertex index vertex: unique integer
		Output:
			* cut
	"""
	C = laplacian_complete(networkx.number_of_nodes(G))
	A = weighted_adjacency_complete(G,F, ind)
	CAC = numpy.dot(numpy.dot(C,A), C)
	start = numpy.ones(networkx.number_of_nodes(G))
	L = networkx.laplacian_matrix(G).todense()

	#Upper and lower bounds for search
	a = 0.
	b = 1000.
	c=b-gr*(b-a)
	d=a+gr*(b-a)
	
	#Tolerance
	tol = 1.

	resab = {}
	resab["size"] = k + 1
	
	#golden search
	while abs(c-d)>tol or resab["size"] > k:      
		resc = spectral_cut(CAC, L, C, A, start, F, G, c, k, ind)
		resd = spectral_cut(CAC, L, C, A, start, F, G, d, k, ind)
		
		if resc["size"] <= k: 
			if resc["score"] > resd["score"]: 
				start = numpy.array(resc["x"])
				b = d
				d = c
				c=b-gr*(b-a)
			else:
				start = numpy.array(resd["x"])
				a=c
				c=d  
				d=a+gr*(b-a)
		else:
				start = numpy.array(resc["x"])
				a=c
				c=d  
				d=a+gr*(b-a)
		
		resab = spectral_cut(CAC, L, C, A, start, F, G, (b+a) / 2, k, ind)
	
	return resab
Пример #29
0
def get_small_worldness(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_small_worldness.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))

    cluster = nx.average_clustering(G)
    ER_cluster = nx.average_clustering(ER_graph)
    
    transi = nx.transitivity(G)
    ER_transi = nx.transitivity(ER_graph)

    print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi)

    f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster))
    components = nx.connected_component_subgraphs(G)
    ER_components = nx.connected_component_subgraphs(ER_graph)

    values = []
    ER_values = []
    for i in range(len(components)):
      if nx.number_of_nodes(components[i]) > 1:
        values.append(nx.average_shortest_path_length(components[i]))
    for i in range(len(ER_components)):
      if nx.number_of_nodes(ER_components[i]) > 1:
        ER_values.append(nx.average_shortest_path_length(ER_components[i]))
    if len(values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(values)/len(values)))

    if len(ER_values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(ER_values)/len(ER_values)))
    
    f.write("\t%f\t%f" % (transi, ER_transi))  
    
    if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_WS = 0.
    if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_Delta = 0.
    
    f.write("\t%f\t%f" % (S_WS, S_Delta))  
    f.write("\n")
    
  f.close()  
  print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity" 
Пример #30
0
def test_strong_product_combinations():
    P5 = nx.path_graph(5)
    K3 = nx.complete_graph(3)
    G = strong_product(P5, K3)
    assert_equal(nx.number_of_nodes(G), 5 * 3)
    G = strong_product(nx.MultiGraph(P5), K3)
    assert_equal(nx.number_of_nodes(G), 5 * 3)
    G = strong_product(P5, nx.MultiGraph(K3))
    assert_equal(nx.number_of_nodes(G), 5 * 3)
    G = strong_product(nx.MultiGraph(P5), nx.MultiGraph(K3))
    assert_equal(nx.number_of_nodes(G), 5 * 3)
Пример #31
0
 def test_number_of_nodes(self):
     assert self.G.number_of_nodes() == nx.number_of_nodes(self.G)
     assert self.DG.number_of_nodes() == nx.number_of_nodes(self.DG)
Пример #32
0
    test_name = '-'.join([opt['g_type'], opt['test_min_n'], opt['test_max_n']])
    result_file = '%s/test-%s-gnn-%s-%s.csv' % (opt['save_dir'], test_name,
                                                opt['min_n'], opt['max_n'])

    n_test = 1000
    frac = 0.0
    with open(result_file, 'w') as f_out:
        print 'testing'
        sys.stdout.flush()
        idx = 0
        prepared_train_data = []
        train_opt_tours = []
        for g in tqdm(TestSet()):
            api.InsertGraph(g, is_test=True)
            t1 = time.time()
            val, sol = api.GetSol(idx, nx.number_of_nodes(g))
            prepared_train_data.append((g, val))
            train_opt_tours.append(sol[1:])
            t2 = time.time()
            f_out.write('%.8f,' % val)
            f_out.write('%d' % sol[0])
            #print "Num of nodes in graph: ",nx.number_of_nodes(g)," solution len: ",sol[0]
            if nx.number_of_nodes(g) != sol[0]:
                print "Problem"
            for i in range(sol[0]):
                f_out.write(' %d' % sol[i + 1])
            f_out.write(',%.6f\n' % (t2 - t1))
            frac += val

            idx += 1
        with open("prepared_train_data.pkl", 'wb') as f:
Пример #33
0
    #roadSegGraph = nx.read_graphml("roadSegGraph_110.graphml")
    #roadSegGraph = nx.read_graphml("roadSegGraph.graphml")
    nLayers = 4

    largeNumber = 10**5
    filename = 'example_all_routes.dat'

    #number of routes
    num_routes = nsamples
    pathNumber = 1
    data, data_var = init_dataFile(nLayers, budget, num_routes, largeNumber)

    rand_routes = random_routes(roadSegGraph)

    non_subroute = {}
    nnodes = nx.number_of_nodes(roadSegGraph)
    node_list = roadSegGraph.nodes()
    node_indx = {}
    count = 0
    for i in node_list:
        node_indx[i] = count
        count += 1

    for u in range(1, nnodes * nnodes + 1):
        non_subroute[u] = True

    for i, j in rand_routes:
        if i != j:
            shortest_path = nx.shortest_path(roadSegGraph,
                                             source=i,
                                             target=j,
Пример #34
0
import sys
import networkx as nx
import random as rand
from numpy import random
from collections import deque

g = nx.read_edgelist('cambridge_net.txt', create_using=nx.DiGraph(), nodetype=int)

total_nodes = nx.number_of_nodes(g)
target_nodes = 0.15 * total_nodes

print "Total Nodes:", total_nodes
print "Target Nodes:", target_nodes

print nx.info(g)

to_burn = deque()

finished = False
p = 2.0 / 3.0
print p

while not(finished):
    to_burn.clear()

    to_burn.append(rand.choice(g.nodes()))

    while to_burn:                
        burning = to_burn.popleft()   
        nodes = g.neighbors(burning)
        g.remove_node(burning)
Пример #35
0
def hierarchical_clustering(G, resolution=1):
    for n in G.nodes_iter():
        G.node[n]['ancIdxs'] = []

    # A dendrogram is a tree and each level is a partition of the graph nodes
    dendo = community.generate_dendrogram(G, resolution=resolution)

    num_levels = len(dendo)
    clusters_per_level = []
    for level in range(num_levels):
        partition = community.partition_at_level(dendo, level)
        clusters = list(set(partition.values()))
        clusters_per_level.append(clusters)
        print('clusters at level', level, 'are', clusters)
        for n, c in partition.items():
            G.node[n]['ancIdxs'].append(c)

    num_nodes = nx.number_of_nodes(G)

    def get_cluster_idx(level, idx):
        offset = num_nodes
        for i in range(level):
            offset += len(clusters_per_level[i])
        return offset + idx

    cluster_list = []
    for n in G.nodes_iter():
        node = G.node[n]
        node_clusters = node['ancIdxs']
        for level in range(len(node['ancIdxs'])):
            node_clusters[level] = get_cluster_idx(level, node_clusters[level])

        cluster_list.append({
            'idx': n,
            'nodeIdx': n,
            'parentIdx': node_clusters[0],
            'height': 0
        })

    for level, clusters in enumerate(clusters_per_level):
        for c in clusters:
            cluster_list.append({
                'idx': get_cluster_idx(level, c),
                'height': level + 1
            })

    for n in G.nodes_iter():
        node = G.node[n]
        node_clusters = node['ancIdxs']
        for level in range(len(node_clusters) - 1):
            cluster_list[node_clusters[level]]['parentIdx'] = node_clusters[
                level + 1]

    # Root
    root_cluster_idx = len(cluster_list)
    cluster_list.append({
        'idx': root_cluster_idx,
        'height': len(clusters_per_level) + 1
    })

    for c in clusters_per_level[-1]:
        cluster_list[get_cluster_idx(num_levels - 1,
                                     c)]['parentIdx'] = root_cluster_idx

    return cluster_list
Пример #36
0
    return UU


def iso(G1, glist):
    """Quick and dirty nonisomorphism checker used to check isomorphisms."""
    for G2 in glist:
        if isomorphic(G1, G2):
            return True
    return False


if __name__ == '__main__':
    G = atlas6()

    print("graph has %d nodes with %d edges"\
          %(nx.number_of_nodes(G), nx.number_of_edges(G)))
    print(nx.number_connected_components(G), "connected components")

    try:
        import pygraphviz
        from networkx.drawing.nx_agraph import graphviz_layout
    except ImportError:
        try:
            import pydot
            from networkx.drawing.nx_pydot import graphviz_layout
        except ImportError:
            raise ImportError("This example needs Graphviz and either "
                              "PyGraphviz or pydot")

    import matplotlib.pyplot as plt
    plt.figure(1, figsize=(8, 8))
Пример #37
0
"""
run考虑到7月9日、10日的跌停股票=1或0,这两天考察停牌股票,要单独运行(先把这两天的文件挪到另一个文件夹)
"""	
	
w_threshold = 0.95

os.chdir('/Users/shine/work_hard/financial_network/data/threshold/stock_alpha')
V = read_edgelist('2015-1.edgelist')

w = weight(V)
w = pd.DataFrame(w)
beta = w.quantile(q = w_threshold)
print beta

V = remove_edges(V, beta)
print "after remove edges, stock network nodes and edges are ",nx.number_of_nodes(V), nx.number_of_edges(V)

lower = []
sus = []
sus_lower = []
os.chdir('/Users/shine/work_hard/financial_network/data/status_wind_612_710')
FileList = glob.glob('*.xlsx') #for workingfile in filelist
print FileList
print "FileList length is", len(FileList) #共20个文件
for workingfile in FileList:	
	print workingfile,'is working now'
	filename  = workingfile.split('.')[0]
	print 'filename is', filename
	os.chdir('/Users/shine/work_hard/financial_network/data/status_wind_612_710')
	status = pd.ExcelFile(workingfile) 
	status = pd.read_excel(status, 'Wind资讯'.decode('utf-8'))
Пример #38
0
""" Test Fragmentation index with data from Borgatti """
import networkx as nx

#
#
# 2 complete graphs a 5 nodes
A = nx.to_undirected(nx.complete_graph([1,2,3,4,5]))
B = nx.to_undirected(nx.complete_graph([6,7,8,9,10]))
# A=nx.to_undirected(A)
# B=nx.to_undirected(B)
c = nx.union(A,B)
ch = nx.harmonic_centrality(c)
print(ch)
print(sum(ch.values())/(nx.number_of_nodes(c)*(nx.number_of_nodes(c)-1)))
print(1. - (sum(ch.values())/(nx.number_of_nodes(c)*(nx.number_of_nodes(c)-1))))

#
#
# 2 path 1->2->3->4->5 and 6->7->8->9->10
a = nx.to_undirected(nx.path_graph([1,2,3,4,5]))
b = nx.to_undirected(nx.path_graph([6,7,8,9,10]))
c=nx.union(a,b)
ch = nx.harmonic_centrality(c)
print(ch)
print(sum(ch.values())/(nx.number_of_nodes(c)*(nx.number_of_nodes(c)-1)))
print(1. - (sum(ch.values())/(nx.number_of_nodes(c)*(nx.number_of_nodes(c)-1))))



    def populate_instance_and_run_as_util(self, g, h):  #, lsape_instance):
        """
	/*!
	 * @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance.
	 * @param[in] g Input graph.
	 * @param[in] h Input graph.
	 * @param[out] result Result variable.
	 * @param[out] lsape_instance LSAPE instance.
	 */
		"""
        result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf}

        # Populate the LSAPE instance and set up the solver.
        nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
        lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf
        # 		lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1))
        self.populate_instance(g, h, lsape_instance)

        # 		nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
        # 		lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf
        # 		lsape_instance_new[nb1:, nb2:] = 0
        # 		lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2]
        # 		for i in range(nb1): # all u's neighbor
        # 			lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2]
        # 		for i in range(nb2): # all u's neighbor
        # 			lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i]
        # 		lsape_solver = LSAPESolver(lsape_instance_new)

        lsape_solver = LSAPESolver(lsape_instance)

        # Solve the LSAPE instance.
        if self._solve_optimally:
            lsape_solver.set_model(self._lsape_model)
        else:
            lsape_solver.set_greedy_method(self._greedy_method)
        lsape_solver.solve(self._max_num_solutions)

        # Compute and store lower and upper bound.
        if self._compute_lower_bound and self._solve_optimally:
            result['lower_bound'] = lsape_solver.minimal_cost(
            ) * self._lsape_lower_bound_scaling_factor(g, h)  # @todo: test

        for solution_id in range(0, lsape_solver.num_solutions()):
            result['node_maps'].append(
                NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h)))
            misc.construct_node_map_from_solver(lsape_solver,
                                                result['node_maps'][-1],
                                                solution_id)
            self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1])

        # Add centralities and reoptimize.
        if self._centrality_weight > 0 and self._centrality_method != 'NODE':
            print('This is not implemented.')
            pass  # @todo

        # Sort the node maps and set the upper bound.
        if len(result['node_maps']) > 1 or len(
                result['node_maps']) > self._max_num_solutions:
            print('This is not implemented.')  # @todo:
            pass
        if len(result['node_maps']) == 0:
            result['upper_bound'] = np.inf
        else:
            result['upper_bound'] = result['node_maps'][0].induced_cost()

        return result
Пример #40
0
import sys
import networkx as nx
import random as rand
from numpy import random

print "Reading Graph"
sys.stdout.flush()
g = nx.read_edgelist('cambridge_net.txt',
                     create_using=nx.Graph(),
                     nodetype=int)

target_nodes = 0.15 * nx.number_of_nodes(g)

print "Target Nodes:", target_nodes
sys.stdout.flush()

print "Attempting with limit of len(g)"
sys.stdout.flush()

g_sample = nx.Graph()
p = 0.15
steps = 0
limit = nx.number_of_nodes(g)

finished = False
while not (finished):
    source = rand.choice(g.nodes())
    node = source
    steps = 0
    while (steps < limit):
        steps = steps + 1
Пример #41
0
    words = set()
    for line in fh.readlines():
        line = line.decode()
        if line.startswith('*'):
            continue
        w = str(line[0:5])
        words.add(w)
    return generate_graph(words)


if __name__ == '__main__':
    G = words_graph()
    print("Loaded words_dat.txt containing 5757 five-letter English words.")
    print("Two words are connected if they differ in one letter.")
    print("Graph has %d nodes with %d edges" %
          (nx.number_of_nodes(G), nx.number_of_edges(G)))
    print("%d connected components" % nx.number_connected_components(G))

    for (source, target) in [
        ('chaos', 'order'),
        ('nodes', 'graph'),
        ('moron', 'smart'),
        ('flies', 'swims'),
        ('mango', 'peach'),
        ('pound', 'marks'),
    ]:
        print("Shortest path between %s and %s is" % (source, target))
        try:
            sp = nx.shortest_path(G, source, target)
            for n in sp:
                print(n)
Пример #42
0
N = 1000
k = 190
disease_largest_cc = 35
second_largest = 14

fh = open(os.path.join('./PPI_Graphs', 'reactomefi2015.tsv'), 'rb')
G = nx.read_edgelist(fh, delimiter='\t')
nodes_all = G.nodes()

for i in range(0, N):
    genes = []
    genes = random.sample(nodes_all, k)
    shortest_length = sys.maxint

    H = G.subgraph(genes)
    nodes = nx.number_of_nodes(H)

    if (nodes != 0):
        #Get All Components
        Gcc = sorted(nx.connected_component_subgraphs(H),
                     key=len,
                     reverse=True)
        first = Gcc[0]
        second = Gcc[1]
        G0 = nx.number_of_nodes(first)
        G1 = nx.number_of_nodes(second)

        #Diameter
        c1_diam.append(nx.diameter(first))
        if nx.diameter(first) > 12:
            c1d += 1
Пример #43
0
    except IndexError:
        sys.exit(
            "Motif graph properties\npython %s <tsv file generated by edges2table.pl> similarity_cutoff zscore_cutoff"
            % (sys.argv[0]))

    opr_nodes = operon_nodes(crs_f, similarity_cutoff, zscore_cutoff)
    #pprint(opr_nodes)
    G = motif_graph(crs_f, similarity_cutoff, zscore_cutoff)

    loo_f = "../../data/LOO_per_matrix_site.tsv"
    regulon_f = "../../data/regulon_by_first_gene.txt"
    LOO = read_LOO(loo_f)
    regulon = read_regulon(regulon_f)

    print("#graph has %d nodes with %d edges, edges to nodes ratio: %f, edge average zscore: %f, %f transitivity"\
            %(nx.number_of_nodes(G),
                nx.number_of_edges(G),
                edge_density(nx.number_of_edges(G), nx.number_of_nodes(G)),
                edge_average_zscore(G),
                nx.transitivity(G)))
    print("#", nx.number_connected_components(G), "connected components")

    print(
        "reg\tLOO\tsize\tnodes\tedges\tratio\tavg_zscore\tnumber_connected_components\tlargest_comp_size\ttransitivity"
    )

    for reg in regulon.keys():
        if len(regulon[reg]) > 2:
            nodes = list()
            for gi in regulon[reg]:
                for n in opr_nodes[gi]:
Пример #44
0
import networkx as nx
import random

from bokeh.io import show, curdoc
from bokeh.layouts import Column
from bokeh.models import Plot, Range1d, MultiLine, Circle
from bokeh.models.graphs import from_networkx
from bokeh.models.widgets import Button
from bokeh.palettes import Spectral4, Spectral10

L = 78

G=nx.karate_club_graph()

N = nx.number_of_nodes(G)

plot = Plot(plot_width=400, plot_height=400,
            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))

graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0,0))

graph_renderer.node_renderer.glyph = Circle(size=15, fill_color="colors", line_width=1.0)
graph_renderer.node_renderer.data_source.data["colors"] = [Spectral4[0] ]* N

graph_renderer.edge_renderer.glyph = MultiLine(line_color="colors", line_alpha=0.8, line_width="widths")

graph_renderer.edge_renderer.data_source.data["colors"] = ["black"] * L
graph_renderer.edge_renderer.data_source.data["widths"] = [2] * L

plot.renderers.append(graph_renderer)
def calc(G, invariant):
    if invariant == 'domination_number':
        return domination_number(G)
    elif invariant == 'chromatic_number':
        return chromatic_number(G)
    elif invariant == 'total_domination_number':
        return total_domination_number(G)
    elif invariant == 'connected_domination_number':
        return GPY.connected_domination_number(G)
    elif invariant == 'independent_domination_number':
        return independent_domination_number(G)
    elif invariant == 'slater':
        return GPY.slater(G)
    elif invariant == 'sub_total_domination_number':
        return GPY.sub_total_domination_number(G)
    elif invariant == 'annihilation_number':
        return GPY.annihilation_number(G)
    elif invariant == 'independence_number':
        return independence_number(G)
    elif invariant == 'power_domination_number':
        return GPY.power_domination_number(G)
    elif invariant == 'residue':
        return GPY.residue(G)
    elif invariant == 'k_residual_index':
        return GPY.k_residual_index(G)

    elif invariant == 'connected_zero_forcing_number':
        return GPY.connected_zero_forcing_number(G)
    elif invariant == 'total_zero_forcing_number':
        return GPY.total_zero_forcing_number(G)
    elif invariant == 'zero_forcing_number':
        return GPY.zero_forcing_number(G)

    elif invariant == 'diameter':
        return nx.diameter(G)
    elif invariant == 'radius':
        return nx.radius(G)

    elif invariant == 'order':
        return nx.number_of_nodes(G)
    elif invariant == 'size':
        return nx.number_of_edges(G)

    elif invariant == 'min_degree':
        return GPY.min_degree(G)
    elif invariant == 'max_degree':
        return GPY.max_degree(G)

    elif invariant == 'number_of_min_degree_nodes':
        return GPY.number_of_min_degree_nodes(G)
    elif invariant == 'number_of_degree_one_nodes':
        return GPY.number_of_degree_one_nodes(G)
    elif invariant == 'number_of_max_degree_nodes':
        return GPY.number_of_max_degree_nodes(G)
    elif invariant == 'clique_number':
        return GPY.clique_number(G)
    elif invariant == 'min_maximal_matching_number':
        return GPY.min_maximal_matching_number(G)
    elif invariant == 'matching_number':
        return matching_number(G)
    elif invariant == 'triameter':
        return triameter(G)
    elif invariant == 'vertex_cover_number':
        return vertex_cover_number(G)

    elif invariant == 'randic_index':
        return randic_index(G)
    elif invariant == 'augmented_randic_index':
        return augmented_randic_index(G)
    elif invariant == 'harmonic_index':
        return harmonic_index(G)
    elif invariant == 'atom_bond_connectivity_index':
        return atom_bond_connectivity_index(G)

    elif invariant == 'sum_connectivity_index':
        return sum_connectivity_index(G)

    else:
        print('ERROR')
        return False
    if len(result[key]) != 0:
        for friend_id in result[key]:
            Network.add_node(friend_id)
for key in result:
    if len(result[key]) != 0:
        for friend_id in result[key]:
            Network.add_edge(key, friend_id)

# draw the network, store the graph as png file and show the graph
nx.draw(Network)
plt.savefig("Network.png")
plt.show()

# print the number of nodes and edges, diameter and average distance of the network
print("The number of nodes is: ")
print(nx.number_of_nodes(Network))
print("The number of edges is: ")
print(nx.number_of_edges(Network))
print("The diameter is: ")
print(nx.diameter(Network))
print("The average distance of the graph is: ")
print(nx.average_shortest_path_length(Network))

# this txt file is used to store the number of nodes and edges, diameter and average distance information
f1 = open("stats.txt", "w")
f1.write("The number of nodes is: ")
f1.write(str(nx.number_of_nodes(Network)))
f1.write("\nThe number of edges is: ")
f1.write(str(nx.number_of_edges(Network)))
f1.write("\nThe diameter is: ")
f1.write(str(nx.diameter(Network)))
Пример #47
0
 def show_info(self):
     for node, attr in dict(self.bw.nodes).items():
         print('{0} -> {1}'.format(node, attr))
     print('Number of Nodes: {0}'.format(nx.number_of_nodes(self.bw)))
     print('Number of Edges: {0}'.format(nx.number_of_edges(self.bw)))
Пример #48
0
for c, v in p.items():
    if (v != ville):
        G.remove_node(c)


# second clustering to determine group inside a city
p = community.best_partition(G, weight='weight')
for c, v in p.items():
    color_map.append(v)


# export clusters for the city
f = open("data/cluster_" + str(ville) + ".csv", "w")
for c, v in p.items():
    f.write(str(c) + ";" + str(v) + ";" + str(ville) + "\n")
f.close()


# export gexf format for gephi
# nx.write_gexf(G, "graph.gexf", prettyprint=True)

print ("Number of edges : ", i)
print ("Number of nodes : ", nx.number_of_nodes(G))

weights = [G[u][v]['weight']/7 for u,v in G.edges] # take weight into account for edges

# draw and show
nx.draw(G, node_size=50, width=weights, node_color=color_map)
plt.show()

Пример #49
0
def weight(c):
    return float(2 * nx.number_of_edges(c) / nx.number_of_nodes(c))
Пример #50
0
def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
    """Calculate walk graph kernels up to n between 2 graphs using exponential 
    series.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    node_label : string
        Node attribute used as label.
    edge_label : string
        Edge attribute used as label.
    beta : integer
        Weight.
    ij : tuple of integer
        Index of graphs between which the kernel is computed.

    Return
    ------
    kernel : float
        The common walk Kernel between 2 graphs.
    """

    # get tensor product / direct product
    gp = direct_product(g1, g2, node_label, edge_label)
    # return 0 if the direct product graph have no more than 1 node.
    if nx.number_of_nodes(gp) < 2:
        return 0
    A = nx.adjacency_matrix(gp).todense()
    # print(A)

    # from matplotlib import pyplot as plt
    # nx.draw_networkx(G1)
    # plt.show()
    # nx.draw_networkx(G2)
    # plt.show()
    # nx.draw_networkx(gp)
    # plt.show()
    # print(G1.nodes(data=True))
    # print(G2.nodes(data=True))
    # print(gp.nodes(data=True))
    # print(gp.edges(data=True))

    ew, ev = np.linalg.eig(A)
    # print('ew: ', ew)
    # print(ev)
    # T = np.matrix(ev)
    # print('T: ', T)
    # T = ev.I
    D = np.zeros((len(ew), len(ew)))
    for i in range(len(ew)):
        D[i][i] = np.exp(beta * ew[i])
        # print('D: ', D)
    # print('hshs: ', T.I * D * T)

    # print(np.exp(-2))
    # print(D)
    # print(np.exp(weight * D))
    # print(ev)
    # print(np.linalg.inv(ev))
    exp_D = ev * D * ev.T
    # print(exp_D)
    # print(np.exp(weight * A))
    # print('-------')

    return exp_D.sum()
def fit_spearmans(mass_boxes, range):
    ln_box_number = np.log(mass_boxes)
    ln_range = np.log(range)
    gradient = stats.spearmanr(ln_range, ln_box_number)[0] * (
        np.std(ln_box_number) / np.std(ln_range))
    constant = ln_box_number[0] - gradient * ln_range[0]
    return (gradient, constant)


if __name__ == "__main__":
    graph = graph_magic.get_graph_from_file(
        "../../BIOGRID-ORGANISM-3.5.165.tab2/BIOGRID-ORGANISM-Human_Immunodeficiency_Virus_1-3.5.165.tab2.txt"
    )
    # graph = graph_magic.get_graph_from_file(
    #     "../../BIOGRID-ORGANISM-3.5.165.tab2/BIOGRID-ORGANISM-Escherichia_coli_K12_MC4100_BW2952-3.5.165.tab2.txt")
    node_number = nx.number_of_nodes(graph)
    print(node_number)
    mass_boxes = fd.compact_box_burning(graph)
    print(mass_boxes)
    # plt.subplot(121)
    # nx.draw(graph, with_labels=True, font_weight='bold')
    # plt.show()
    max_length = len(mass_boxes)
    lengths = np.arange(1, max_length + 1)
    (spearman_gradient, spearman_constant) = fit_spearmans(mass_boxes, lengths)
    (pearson_gradient, pearson_constant) = np.polyfit(np.log(lengths),
                                                      np.log(mass_boxes),
                                                      deg=1)
    boxes_spearman_theory = list(
        map(box_theory, lengths, [spearman_gradient] * max_length,
            [node_number] * max_length))
Пример #52
0
print(nx.is_connected(g))

# next, see how many pieces the graph is actually in
print(nx.number_connected_components(g))

# then, pull out each of these connected components
# and sort them by the number of nodes in them
graphs = list(nx.connected_component_subgraphs(g))
graphsSorted = sorted(graphs, key=len, reverse=True)

# for the top five largest graphs, print the number of nodes
# and draw the graph in a file
i = 0
for graph in graphsSorted[0:5]:
    i += 1
    print("num nodes in graph", i, ":", nx.number_of_nodes(graph))
    graphDegree = nx.degree(graph)

    # draw one set with name labels
    f1 = plt.figure()
    nx.draw(graph,
            node_size=[v * 10 for v in graphDegree.values()],
            with_labels=True,
            font_size=8)
    filename1 = 'graphLabels' + str(i) + '.png'
    f1.savefig(filename1)

    # draw one set without name labels
    f2 = plt.figure()
    nx.draw(graph, node_size=[v * 10 for v in graphDegree.values()])
    filename2 = 'graph' + str(i) + '.png'
Пример #53
0
def l_Connection_strength(G):
    l_Connection_strength_Dic = {}
    node_set = G.nodes()
    Connection_num = 0

    #_l阶连通图的数量

    #print nid,i_2_nei
    for nid in node_set:

        degree = G.degree(nid)
        Neighbor_Set = G.neighbors(nid)
        #print nid,Neighbor_Set
        #print len(Neighbor_Set)

        # i__nei=set(G.neighbors(i))

        ###current_1_neighbor=G.neighbors(nid)
        #print nid,current_1_neighbor
        ###current_2_neighbor=[]
        ###for nnid in current_1_neighbor:
        ###current_2_neighbor = list(set(current_2_neighbor).union(set(G.neighbors(nnid))))
        #print '2_hop:', nid,current_2_neighbor
        ###current_2_neighbor= list(  set(current_2_neighbor).difference( set(current_1_neighbor).union(set([nid]))  ) )
        #print nid ,current_2_neighbor
        #print nid,Neighbor_Set

        if len(Neighbor_Set) == 1:
            Connection_num = 1
            #print nid
            l_Connection_strength_Dic[nid] = 1.0
            #print nid,l_Connection_strength_Dic[nid]
        elif len(Neighbor_Set) == 0:
            l_Connection_strength_Dic[nid] = 0.0
        elif len(Neighbor_Set) > 1:
            G_conn = nx.Graph()
            #print nid, Neighbor_Set
            ##vi,j组合
            Cluster_head_connection_set = []
            for i in range(0, len(Neighbor_Set)):
                #vi目标节点的邻居
                vi = Neighbor_Set[i]
                #print nid,Neighbor_Set[i]
                n_vi_2 = []
                ##n_vi 是vi的邻居
                for n_vi in G.neighbors(vi):
                    n_vi_2 = list(set(n_vi_2).union(set(G.neighbors(n_vi))))
                n_vi_2 = list(
                    set(n_vi_2).difference(
                        set(G.neighbors(vi)).union(set([nid]))))
                for j in range(i + 1, len(Neighbor_Set)):
                    vj = Neighbor_Set[j]
                    #print vi,vj
                    fai_ij = list(
                        set(n_vi_2).intersection(set(G.neighbors(vj))))
                    #print vi,vj,fai_ij
                    if fai_ij:
                        Cluster_head_connection_set.append(list([vi, vj]))
                        #
            #print nid,Cluster_head_connection_set
            for k in Cluster_head_connection_set:
                G_conn.add_edge(k[0], k[1])
            H = len(list(nx.connected_components(G_conn)))
            #print nid,H
            G_conn_nodenums = int(nx.number_of_nodes(G_conn))
            ##独立簇的数量
            independent_cluster_num = int(
                len(Neighbor_Set)) - int(G_conn_nodenums)
            ##l-阶的连通数
            Connection_num = int(H) + int(independent_cluster_num)
            l_Connection_strength_Dic[nid] = round(
                float(Connection_num) / float(len(Neighbor_Set)), 3)
            #print nid,l_Connection_strength_Dic[nid]
    return l_Connection_strength_Dic
Пример #54
0
def get_stats(g, name):
    print("\n\nGetting Statistics for: "+name)
    print("Number of Nodes: "+str(networkx.number_of_nodes(g)))
    print("Number of Edges: "+str(networkx.number_of_edges(g)))
    print("Avg Clustering Coefficient: "+str(networkx.average_clustering(g)))
Пример #55
0
def commonwalkkernel(*args,
                     node_label='atom',
                     edge_label='bond_type',
#                     n=None,
                     weight=1,
                     compute_method=None,
                     n_jobs=None,
                     verbose=True):
    """Calculate common walk graph kernels between graphs.

    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.
    
    G1, G2 : NetworkX graphs
        Two graphs between which the kernel is calculated.
    node_label : string
        Node attribute used as symbolic label. The default node label is 'atom'.
    edge_label : string
        Edge attribute used as symbolic label. The default edge label is 'bond_type'.
    weight: integer
        Weight coefficient of different lengths of walks, which represents beta
        in 'exp' method and gamma in 'geo'.
    compute_method : string
        Method used to compute walk kernel. The Following choices are 
        available:

        'exp': method based on exponential serials applied on the direct 
        product graph, as shown in reference [1]. The time complexity is O(n^6) 
        for graphs with n vertices.

        'geo': method based on geometric serials applied on the direct product 
        graph, as shown in reference [1]. The time complexity is O(n^6) for 
        graphs with n vertices.

    n_jobs : int
        Number of jobs for parallelization. 

    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is a common walk kernel between 2 
        graphs.
    """
#    n : integer
#        Longest length of walks. Only useful when applying the 'brute' method.
#        'brute': brute force, simply search for all walks and compare them.
    compute_method = compute_method.lower()
    # arrange all graphs in a list
    Gn = args[0] if len(args) == 1 else [args[0], args[1]]
    
    # remove graphs with only 1 node, as they do not have adjacency matrices 
    len_gn = len(Gn)
    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 1]
    idx = [G[0] for G in Gn]
    Gn = [G[1] for G in Gn]
    if len(Gn) != len_gn:
        if verbose:
            print('\n %d graphs are removed as they have only 1 node.\n' %
                  (len_gn - len(Gn)))
        
    ds_attrs = get_dataset_attributes(
        Gn,
        attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
        node_label=node_label, edge_label=edge_label)
    if not ds_attrs['node_labeled']:
        for G in Gn:
            nx.set_node_attributes(G, '0', 'atom')
    if not ds_attrs['edge_labeled']:
        for G in Gn:
            nx.set_edge_attributes(G, '0', 'bond_type')
    if not ds_attrs['is_directed']:  #  convert
        Gn = [G.to_directed() for G in Gn]

    start_time = time.time()
    
    Kmatrix = np.zeros((len(Gn), len(Gn)))

    # ---- use pool.imap_unordered to parallel and track progress. ----
    def init_worker(gn_toshare):
        global G_gn
        G_gn = gn_toshare
    # direct product graph method - exponential
    if compute_method == 'exp':
        do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
    # direct product graph method - geometric
    elif compute_method == 'geo':
        do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)  
    parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, 
                glbv=(Gn,), n_jobs=n_jobs, verbose=verbose)  
    
    
#    pool = Pool(n_jobs)
#    itr = zip(combinations_with_replacement(Gn, 2),
#              combinations_with_replacement(range(0, len(Gn)), 2))
#    len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
#    if len_itr < 1000 * n_jobs:
#        chunksize = int(len_itr / n_jobs) + 1
#    else:
#        chunksize = 1000
#
#    # direct product graph method - exponential
#    if compute_method == 'exp':
#        do_partial = partial(wrapper_cw_exp, node_label, edge_label, weight)
#    # direct product graph method - geometric
#    elif compute_method == 'geo':
#        do_partial = partial(wrapper_cw_geo, node_label, edge_label, weight)
#
#    for i, j, kernel in tqdm(
#            pool.imap_unordered(do_partial, itr, chunksize),
#            desc='calculating kernels',
#            file=sys.stdout):
#        Kmatrix[i][j] = kernel
#        Kmatrix[j][i] = kernel
#    pool.close()
#    pool.join()


#    # ---- direct running, normally use single CPU core. ----
#    # direct product graph method - exponential
#    itr = combinations_with_replacement(range(0, len(Gn)), 2)
#    if compute_method == 'exp':
#        for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
#            Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
#                                                      edge_label, weight)
#            Kmatrix[j][i] = Kmatrix[i][j]
#
#    # direct product graph method - geometric
#    elif compute_method == 'geo':
#        for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
#            Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
#                                                      edge_label, weight)
#            Kmatrix[j][i] = Kmatrix[i][j]


#    # search all paths use brute force.
#    elif compute_method == 'brute':
#        n = int(n)
#        # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
#        all_walks = [
#            find_all_walks_until_length(Gn[i], n, node_label, edge_label)
#                for i in range(0, len(Gn))
#        ]
#
#        for i in range(0, len(Gn)):
#            for j in range(i, len(Gn)):
#                Kmatrix[i][j] = _commonwalkkernel_brute(
#                    all_walks[i],
#                    all_walks[j],
#                    node_label=node_label,
#                    edge_label=edge_label)
#                Kmatrix[j][i] = Kmatrix[i][j]

    run_time = time.time() - start_time
    if verbose:
        print("\n --- kernel matrix of common walk kernel of size %d built in %s seconds ---"
              % (len(Gn), run_time))

    return Kmatrix, run_time, idx
Пример #56
0
    idxes = sorted(idxes, key=lambda x: len(nx.neighbors(G, x)), reverse=True)
    pos = 0
    while numCoveredEdges < nx.number_of_edges(G):
        new_action = idxes[pos]
        covered_set.add(new_action)
        for neigh in nx.neighbors(G, new_action):
            if neigh not in covered_set:
                numCoveredEdges += 1
        pos += 1
    print 'done'
    return len(covered_set)
'''
if __name__ == '__main__':
    B = mmread('bcsstk01.mtx')
    g_undirected = nx.from_scipy_sparse_matrix(B)
    print(nx.number_of_nodes(g_undirected))
    print(nx.number_of_edges(g_undirected))

    api = VerelLib(sys.argv)

    opt = {}
    for i in range(1, len(sys.argv), 2):
        opt[sys.argv[i][1:]] = sys.argv[i + 1]

    # print greedy(g_undirected)
    api.InsertGraph(g_undirected, is_test=True)

    # startup
    gen_new_graphs(opt)
    for i in range(10):
        api.lib.PlayGame(100, ctypes.c_double(1.0))
# node_list = discussion_graph.nodes()
with open("graph_edges.csv", "r") as edge_file:
    for pair in edge_file:
        edge = pair.split(';')
        edge[1] = edge[1].strip()
        try:
            discussion_graph.node[edge[0]]['sender']
            discussion_graph.node[edge[1]]['sender']
            discussion_graph.add_edge(*edge)
        except KeyError:
            pass
    edge_file.close()
print("Edges added.")

print("No. of Nodes: ", nx.number_of_nodes(discussion_graph))
print("No. of Edges: ", nx.number_of_edges(discussion_graph))
print("No. of Weakly Connected Components: ",
      nx.number_weakly_connected_components(discussion_graph))

# Uncomment the lines below to save the graph as a GEXF file
# nx.write_gexf(discussion_graph, "gexf/master_disc_graph.gexf")
# print("GEXF file generated.")

# Uncomment the lines below to read the graph from a GEXF file
# discussion_graph = nx.read_gexf("gexf/master_disc_graph.gexf", node_type=int)
# print("Graph loaded from GEXF file.")

for conn_subgraph in nx.weakly_connected_component_subgraphs(discussion_graph):
    sender_color_map = {}
    node_list = [int(x) for x in conn_subgraph.nodes()]
Пример #58
0
    def read_graph(self,
                   filename,
                   file_type='edgelist',
                   separator='\t',
                   remove_whitespace=False):
        """
        Reads the graph from an edgelist, gml or graphml file and initializes the class attribute adjacency_matrix.

        Parameters
        ----------
        filename : string
            Name of the file, for example 'JohnsHopkins.edgelist', 'JohnsHopkins.gml', 'JohnsHopkins.graphml'.

        dtype : string
            Type of file. Currently only 'edgelist', 'gml' and 'graphml' are supported.
            Default = 'edgelist'

        separator : string
            used if file_type = 'edgelist'
            Default = '\t'

        remove_whitespace : bool
            set it to be True when there is more than one kinds of separators in the file
            Default = False
        """
        if file_type == 'edgelist':

            dtype = {0: 'int32', 1: 'int32', 2: 'float64'}
            if remove_whitespace:
                df = pd.read_csv(filename,
                                 header=None,
                                 dtype=dtype,
                                 delim_whitespace=True)
            else:
                df = pd.read_csv(filename,
                                 sep=separator,
                                 header=None,
                                 dtype=dtype)

            source = df[0].values
            target = df[1].values
            if df.shape[1] == 2:
                weights = np.ones(edges.shape[0])
            elif df.shape[1] == 3:
                weights = df[2].values
            else:
                raise Exception(
                    'graph_class_local.read_graph: df.shape[1] not in (2, 3)')

            self._num_vertices = max(source.max() + 1, target.max() + 1)
            #self.adjacency_matrix = source, target, weights

            self.adjacency_matrix = sp.csr_matrix(
                (weights, (source, target)),
                shape=(self._num_vertices, self._num_vertices))
            is_symmetric = (self.adjacency_matrix !=
                            self.adjacency_matrix.T).sum() == 0
            if not is_symmetric:
                # Symmetrize matrix, choosing larger weight
                sel = self.adjacency_matrix.T > self.adjacency_matrix
                self.adjacency_matrix = self.adjacency_matrix - self.adjacency_matrix.multiply(
                    sel) + self.adjacency_matrix.T.multiply(sel)
                assert (self.adjacency_matrix !=
                        self.adjacency_matrix.T).sum() == 0

            self._num_edges = self.adjacency_matrix.nnz

        elif file_type == 'gml':
            warnings.warn(
                "Loading a gml is not efficient, we suggest using an edgelist format for this API."
            )
            G = nx.read_gml(filename).to_undirected()
            self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64)
            self._num_edges = nx.number_of_edges(G)
            self._num_vertices = nx.number_of_nodes(G)

        elif file_type == 'graphml':
            warnings.warn(
                "Loading a graphml is not efficient, we suggest using an edgelist format for this API."
            )
            G = nx.read_graphml(filename).to_undirected()
            self.adjacency_matrix = nx.adjacency_matrix(G).astype(np.float64)
            self._num_edges = nx.number_of_edges(G)
            self._num_vertices = nx.number_of_nodes(G)

        else:
            print('This file type is not supported')
            return

        self.compute_statistics()
G, top_nodes = bipartite_graph(df)
V = stock_network(G, top_nodes)
V = remove_edges(V, 0)

w = weight(V)
w = pd.DataFrame(w)
beta = w.quantile(q=0.1)
alpha = w.quantile(q=0.9)

print 'before remove any edges', nx.number_of_edges(V)
for u, v, data in V.edges(data=True):
    #if data['weight'] < float(alpha):
    if data['weight'] > float(beta):
        V.remove_edge(u, v)
print 'after remove edges', nx.number_of_edges(V), nx.number_of_nodes(V)
remove = [node for node, degree in V.degree() if degree == 0]
V.remove_nodes_from(remove)
print 'after remove nodes with degree = 0', nx.number_of_nodes(V)

df = pd.DataFrame()  #不是上面的df了
for nodes1, nodes2, data in V.edges(data=True):
    w = 0
    p = 0
    w_df = []
    if nodes1 != nodes2:
        if len(list(nx.common_neighbors(G, nodes1, nodes2))) != 0:
            for nbr in nx.common_neighbors(G, nodes1, nodes2):
                p = p + 1
                w1 = G[nodes1][nbr]['weight']
                #print w1
Пример #60
0
def g_preprocess(G, alpha=1,
                 measures=["D1", "D2", "D3", "D4", "D5"]):

    if isinstance(alpha, list) and len(alpha) == 5:
        alphalist = alpha
    elif isinstance(alpha, (int, float)):
        alphalist = [alpha] * 5
    else:
        print(
            "Error in the choice of alpha. "
            "Please specify a single number or a list of 5 values."
        )
        return (
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
        )

    # Make an independent copy of the graph
    G = G.copy()

    if G.number_of_nodes() < 3:
        print("Graph must have at least 3 nodes.")
        return (
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
            np.nan,
        )

    # From multigraph to graph
    if type(G) == nx.MultiGraph:
        print("MultiGraph converted to Graph")
        G1 = nx.Graph()
        G1.add_nodes_from(G.nodes(data=True))
        for u, v, data in G.edges(data=True):
            w = data["weight"] if "weight" in data else 1.0
            if G1.has_edge(u, v):
                G1[u][v]["weight"] += w
            else:
                G1.add_edge(u, v, weight=w)
        G = G1.copy()
    elif type(G) == nx.MultiDiGraph:
        print("MultiDiGraph converted to DiGraph")
        G1 = nx.DiGraph()
        G1.add_nodes_from(G.nodes(data=True))
        for u, v, data in G.edges(data=True):
            w = data["weight"] if "weight" in data else 1.0
            if G1.has_edge(u, v):
                G1[u][v]["weight"] += w
            else:
                G1.add_edge(u, v, weight=w)
        G = G1.copy()

    # Remove Loops
    loops = nx.selfloop_edges(G)
    if list(loops):
        print("WARNING: Loops will be ignored.")
        G.remove_edges_from(loops)

    # Check if all existing arcs have weights, otherwise assign value of 1
    # Check for negative weights, zero weights and weight lower than 1
    arcweights = [e[2]["weight"] for e in G.edges.data() if "weight" in e[2]]
    numweights = len(arcweights)
    arcweights = set(arcweights)
    if any(w < 1 for w in arcweights):
        print(
            "Graph contains arcs with negative or zero weights,"
            " or weights lower than 1. Weights must be >= 1."
        )
    if numweights != len(G.edges):
        print(
            "WARNING: weights are not specified for all arcs."
            " Each arc must have a weight >= 1.\n"
            "Missing weights are automatically set equal to 1."
        )
        for u, v, data in G.edges(data=True):
            if "weight" not in data:
                data["weight"] = 1

    # Sums the weight of all arcs
    totalWEI = 0
    if "D3" in measures:
        for u, v, data in G.edges(data=True):
            totalWEI += data["weight"]

    n1 = nx.number_of_nodes(G) - 1

    # Calculates degree and weighted degree, taking alpha into account
    if type(G) == nx.Graph:
        if any(m in measures for m in ["D1", "D2", "D5"]):
            deg = dict(nx.degree(G))
        else:
            deg = np.nan
        indeg = outdeg = wei_insum_alpha_list = wei_outsum_alpha_list = np.nan

        # Calculate weighted degree, taking alpha into account
        # case of different alphas for each metric
        if len(set(alphalist)) != 1:
            wei_sum_alpha_list = [0, 0]
            
            # Only needed for D3 and D4
            if "D3" in measures:
                wei_sum_alpha_list.append(weisumalpha (G, alphalist[2]))
            else:
                wei_sum_alpha_list += [0]
                
            if "D4" in measures:
                wei_sum_alpha_list.append(weisumalpha (G, alphalist[3]))
            else:
                wei_sum_alpha_list += [0]

            wei_sum_alpha_list += [0]
        else:
            if any(m in measures for m in ["D3", "D4"]):
                if alphalist[0] != 1:
                    wei_sum_alpha = {}
                    for node in G.nodes():
                        wei_sum_alpha[node] = sum(
                            [
                                e[2]["weight"] ** alphalist[0]
                                for e in list(G.edges(node, data=True))
                            ]
                        )
                else:
                    wei_sum_alpha = dict(nx.degree(G, weight="weight"))
            else:
                wei_sum_alpha = 0

            wei_sum_alpha_list = (
                [0, 0] + [wei_sum_alpha] * 2 + [0]
            )  # Only needed for D3 and D4

    elif type(G) == nx.DiGraph:
        deg = wei_sum_alpha_list = np.nan
        if any(m in measures for m in ["D1", "D2", "D5"]):
            indeg = dict(G.in_degree())
            outdeg = dict(G.out_degree())
        else:
            indeg = outdeg = np.nan

        if len(set(alphalist)) != 1:
            wei_insum_alpha_list = [0, 0]
            wei_outsum_alpha_list = [0, 0]
            
            # Only needed for D3 and D4
            if "D3" in measures:
                insum, outsum = weiinoutsumalpha(G, alphalist[2])
                wei_insum_alpha_list.append(insum)
                wei_outsum_alpha_list.append(outsum)
            else:
                wei_insum_alpha_list.append(0)
                wei_outsum_alpha_list.append(0)
                
            if "D4" in measures:
                insum, outsum = weiinoutsumalpha(G, alphalist[3])
                wei_insum_alpha_list.append(insum)
                wei_outsum_alpha_list.append(outsum)
            else:
                wei_insum_alpha_list.append(0)
                wei_outsum_alpha_list.append(0)

            wei_insum_alpha_list += [0]
            wei_outsum_alpha_list += [0]
        else:
            if any(m in measures for m in ["D3", "D4"]):
                if alphalist[0] != 1:
                    wei_outsum_alpha = {}
                    wei_insum_alpha = {}
                    for node in G.nodes():
                        wei_outsum_alpha[node] = sum(
                            [
                                e[2]["weight"] ** alphalist[0]
                                for e in list(G.out_edges(node, data=True))
                            ]
                        )
                        wei_insum_alpha[node] = sum(
                            [
                                e[2]["weight"] ** alphalist[0]
                                for e in list(G.in_edges(node, data=True))
                            ]
                        )
                else:
                    wei_insum_alpha = dict(G.in_degree(weight="weight"))
                    wei_outsum_alpha = dict(G.out_degree(weight="weight"))
            else:
                wei_insum_alpha = wei_outsum_alpha = 0

            wei_insum_alpha_list = [0, 0] + [wei_insum_alpha] * 2 + [0]
            wei_outsum_alpha_list = [0, 0] + [wei_outsum_alpha] * 2 + [0]

    # Calculate max and min arc weight
    if G.number_of_edges() > 0:
        hasedges = True
        if any(m in measures for m in ["D1", "D3", "D4"]):
            maxwij = max(dict(G.edges).items(),
                         key=lambda x: x[1]["weight"])[1]["weight"]
        else:
            maxwij = np.nan
        if "D3" in measures:
            minwij = min(dict(G.edges).items(),
                         key=lambda x: x[1]["weight"])[1]["weight"]
        else:
            minwij = np.nan
    else:
        print(
            "Graph has no edges (remember that loops have been removed)."
            "The function will return all zeros, regardless of normalizaiton."
        )
        hasedges = False
        maxwij = np.nan
        minwij = np.nan

    return (
        G,
        n1,
        deg,
        indeg,
        outdeg,
        wei_insum_alpha_list,
        wei_outsum_alpha_list,
        wei_sum_alpha_list,
        totalWEI,
        maxwij,
        minwij,
        hasedges,
    )