def correlation_betweenness_degree_on_ER():
    N = 1000
    p = 0.004
    G = nx.erdos_renyi_graph(N, p)
    print nx.info(G)
    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    ND, driverNodes = ECT.get_driver_nodes(G)

    degrees = []
    betweenness = []
    tot_degree = nx.degree_centrality(G)
    tot_betweenness = nx.betweenness_centrality(G,weight=None)

    for node in driverNodes:
        degrees.append(tot_degree[node])
        betweenness.append(tot_betweenness[node])

    with open("results/driver_degree_ER.txt", "w") as f:
        for x in degrees:
            print >> f, x
    with open("results/driver_betweenness_ER.txt", "w") as f:
        for x in betweenness:
            print >> f, x

    with open("results/tot_degree_ER.txt", "w") as f:
        for key, value in tot_degree.iteritems():
            print >> f, value

    with open("results/tot_betweenness_ER.txt", "w") as f:
        for key, value in tot_betweenness.iteritems():
            print >> f, value
Exemplo n.º 2
0
def get_community_biconnections(commid, df, graph):

    print "Find biconnections in the community :", commid
    
    print nx.info(graph)

    biconnected_nodes = []
    for e in graph.edges():
        a, b = e
        if graph.has_edge(b,a) and a != b:
            # check if already there in the list
            if (a,b) in biconnected_nodes or (b,a) in biconnected_nodes:
                pass
            else:
                biconnected_nodes.append((a,b))

    print "number of biconnected edges:", len(biconnected_nodes)

    source_nodes, target_nodes = zip(*biconnected_nodes)
    all_subgraph_nodes = set(source_nodes).union(set(target_nodes))
    print "Unique nodes in the biconnections", len(all_subgraph_nodes)

    # get the subgraph of all biconnected edges 
    # plot 
    dfname = biconnbase+ str(commid) + '_biz_info.csv'
    bicon_df = df.loc[all_subgraph_nodes]
    print bicon_df.shape
    bicon_df.to_csv(dfname)

    # subgraph generated from the coordinates
    sgname = biconnbase+ str(commid) + '_sg_edgelist.ntx'
    sg = graph.subgraph(list(all_subgraph_nodes))
    print nx.info(sg)
    nx.write_edgelist(sg, sgname, data=False)
def correlation_betweenness_degree_on_ErdosNetwork():
    G = nx.read_pajek("dataset/Erdos971.net")
    isolated_nodes = nx.isolates(G)
    G.remove_nodes_from(isolated_nodes)

    print nx.info(G)
    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    print "ND = ", ND
    print "ND lambda:", ND_lambda
    ND, driverNodes = ECT.get_driver_nodes(G)
    print "ND =", ND

    degrees = []
    betweenness = []
    tot_degree = nx.degree_centrality(G)
    tot_betweenness = nx.betweenness_centrality(G,weight=None)

    for node in driverNodes:
        degrees.append(tot_degree[node])
        betweenness.append(tot_betweenness[node])

    with open("results/driver_degree_Erdos.txt", "w") as f:
        for x in degrees:
            print >> f, x
    with open("results/driver_betweenness_Erdos.txt", "w") as f:
        for x in betweenness:
            print >> f, x
    with open("results/tot_degree_Erdos.txt", "w") as f:
        for key, value in tot_degree.iteritems():
            print >> f, value

    with open("results/tot_betweenness_Erdos.txt", "w") as f:
        for key, value in tot_betweenness.iteritems():
            print >> f, value
def correlation_betweenness_degree_on_BA():
    n = 1000
    m = 2
    G = nx.barabasi_albert_graph(n, m)

    print nx.info(G)
    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    print "ND = ", ND
    print "ND lambda:", ND_lambda
    ND, driverNodes = ECT.get_driver_nodes(G)
    print "ND =", ND

    degrees = []
    betweenness = []
    tot_degree = nx.degree_centrality(G)
    tot_betweenness = nx.betweenness_centrality(G,weight=None)

    for node in driverNodes:
        degrees.append(tot_degree[node])
        betweenness.append(tot_betweenness[node])

    with open("results/driver_degree_BA.txt", "w") as f:
        for x in degrees:
            print >> f, x
    with open("results/driver_betweenness_BA.txt", "w") as f:
        for x in betweenness:
            print >> f, x
    with open("results/tot_degree_BA.txt", "w") as f:
        for key, value in tot_degree.iteritems():
            print >> f, value

    with open("results/tot_betweenness_BA.txt", "w") as f:
        for key, value in tot_betweenness.iteritems():
            print >> f, value
def correlation_betweenness_degree_on_WS():
    n = 1000
    k = 4
    p = 0.01
    G = nx.watts_strogatz_graph(n, k, p)

    print nx.info(G)
    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    ND, driverNodes = ECT.get_driver_nodes(G)

    degrees = []
    betweenness = []

    tot_degree = nx.degree_centrality(G)
    tot_betweenness = nx.betweenness_centrality(G,weight=None)

    for node in driverNodes:
        degrees.append(tot_degree[node])
        betweenness.append(tot_betweenness[node])

    with open("results/driver_degree_WS.txt", "w") as f:
        for x in degrees:
            print >> f, x
    with open("results/driver_betweenness_WS.txt", "w") as f:
        for x in betweenness:
            print >> f, x

    with open("results/tot_degree_WS.txt", "w") as f:
        for key, value in tot_degree.iteritems():
            print >> f, value

    with open("results/tot_betweenness_WS.txt", "w") as f:
        for key, value in tot_betweenness.iteritems():
            print >> f, value
Exemplo n.º 6
0
 def build_graph(self):
     
     '''
     Build a networkx graph from WordNet
     '''
     
     for synset in list(self.wordnet.all_synsets()):
     #for synset in list(self.wordnet.all_synsets('n'))[:10]:
         self.G.add_node(synset.name)
         self.add_edges(synset, synset.hypernyms())
         self.add_edges(synset, synset.hyponyms())
         self.add_edges(synset, synset.instance_hypernyms())
         self.add_edges(synset, synset.instance_hyponyms())
         self.add_edges(synset, synset.member_holonyms())
         self.add_edges(synset, synset.substance_holonyms())
         self.add_edges(synset, synset.part_holonyms())
         self.add_edges(synset, synset.member_meronyms())
         self.add_edges(synset, synset.substance_meronyms())
         self.add_edges(synset, synset.part_meronyms())
         self.add_edges(synset, synset.attributes())
         self.add_edges(synset, synset.entailments())
         self.add_edges(synset, synset.causes())
         self.add_edges(synset, synset.also_sees())
         self.add_edges(synset, synset.verb_groups())
         self.add_edges(synset, synset.similar_tos())
         
     print nx.info(self.G)
Exemplo n.º 7
0
 def summary(self):
     """
     User friendly wrapping and display of graph properties
     """
     print "\n Graph Summary:"
     print nx.info(self.g)
     pass
def EigenvectorCentralityExperiment(G, min_target, max_target, filename):
    print nx.info(G)
    print 'average eigenvector centrality: ', UF.average_eigenvector_centrality(G)

    X_eigenvector_centrality = []
    Y_nD = []

    target = min_target
    while target <= max_target:       
        copyG = G.copy()
        new_G = SimulatedAnnealing(copyG, target, eigenvector_centrality_cost_function)
        eigenvector_centrality = UF.average_eigenvector_centrality(new_G)
        nD = SCT.controllability(new_G)

        X_eigenvector_centrality.append(eigenvector_centrality)
        Y_nD.append(nD)

        print "target = ", target, " EC = ", eigenvector_centrality, 'nD = ', nD
        
        target += 0.01

    s = 'results/' + filename
    with open(s, "w") as f:
        for i in range(len(Y_nD)):
            print >> f, "%f %f"%(X_eigenvector_centrality[i], Y_nD[i])

    return (X_eigenvector_centrality, Y_nD)
Exemplo n.º 9
0
def computemaxweight(graph,path,protlist,path_lenght,alone):
	elements=[]
	nodes=[]
	ess=[]
	print "------Starting Graph------"
	print nx.info(graph)
	
	for i in path:
		max=0
		for j in path[i]:
			count=0
			for k in range(0,len(j)-1,1):
				count=count+float(graph.edge[j[k]][j[k+1]]["weight"])
			if count>max:
				max=count
				elements=j
		
		ess.extend(elements[1:len(elements)-1])
	ess=list(set(ess))
	H=graph.subgraph(ess+protlist)
	#H.add_nodes_from(protlist)
	graphred=check(H,path_lenght,ess,protlist,path)
	nx.write_gpickle(graphred,"weightmaxfilter.gpickle")
	f1=open("weightproteins.txt","w")
	for i in graphred.nodes():
		if i in alone:
			pass
		else:	
			f1.write(i+"\n")
Exemplo n.º 10
0
def main():
	test_user = '******'
	test_graph = buildSocialGraph(test_user, twitter_only=True)
	
	print nx.info(test_graph)
	
	"""
Exemplo n.º 11
0
def simpleDisplay(ipaddress = "localhost",port = "9999"):
    '''
    利用每次处理后保存的图来进行恢复展示
    :return:
    '''
    # client,repo,stargazers,user = getRespond()
    # g = addTOGraph(repo,stargazers,user)
    # addEdge(stargazers,client,g)
    # getPopular(g)
    # savaGraph1(g)
    # top10(g)
    g = nx.read_gpickle("data/github.1")
    print nx.info(g)
    print

    mtsw_users = [n for n in g if g.node[n]['type'] == 'user']
    h = g.subgraph(mtsw_users)

    print nx.info(h)
    print
    d = json_graph.node_link_data(h)
    json.dump(d, open('data/githubRec.json', 'w'))
    cmdstr = "python3 -m http.server %s" % port
    webbrowser.open_new_tab("http://%s:%s/%s.html"%(ipaddress,port, "display_githubRec"))
    os.system(cmdstr)
Exemplo n.º 12
0
    def splitGraphs(self,labels):
        """
        split the graph into several subgraphs by labels
        """
       
        id_label = []

        ## load labels
        ## Node id start from 0
        fid = open('labels','r')
        for line in fid:
            field = line.strip()
            id_label.append(int(field))
        fid.close()
    
        ## calculate the number of different labels
        nodup_labels = set(id_label)
        K = len(nodup_labels)

        for i in range(0,K):
            f = open('subgraph_' + str(i) +'.sub','w')
            
            subG = []
            
            for j in range(0,len(id_label)):
                if id_label[j] == i:
                    subG.append(str(j))

            
            G = self.G.subgraph(subG)
            print nx.info(G)

            nx.write_edgelist(G,f)
Exemplo n.º 13
0
def get_distance_dict(filename):
    g = nx.read_edgelist(filename)
    print "Read in edgelist file ", filename
    print nx.info(g)
    path_length = nx.all_pairs_shortest_path_length(g)
    print len(path_length.keys())
    print path_length
Exemplo n.º 14
0
def data_prep(infofile, graphfile):
    # read in the total biz file
    # Preparing the data files 
    df = pd.read_csv(infofile)

    #removing duplicate records
    df = df.groupby('pageid').first()
    print df.columns
    print df.index
    print df.shape
    print df.isnull().sum()
    df = df[df['latitude'] != 'N']
    print "Dropping loc, lat = N: ", df.shape
    df = df.dropna() #df[df['latitude'] != 'N']
    print "Dropping NA", df.shape #df.isnull().sum()


    # read in th original edgelist as a directed graph
    globalgraph= nx.read_edgelist(graphfile, create_using=nx.DiGraph(), nodetype=int)
    print "Original Graph:", nx.info(globalgraph)

    print "Keeping it consistent, removing all nodes not in database:"
    pageids = list(df.index)
    prunedglobalgraph = globalgraph.subgraph(pageids)
    print nx.info(prunedglobalgraph)
    return df, globalgraph
Exemplo n.º 15
0
def check(graph,path_lenght,removable,protlist,path):
	rem=[]
	ess=[]

	for i in removable:
		count=0
		flag=0
		rem.append(i)
		H=graph.copy()
		H.remove_nodes_from(rem)
		for j in path:
			try:
				lenght=nx.shortest_path_length(H, j[0], j[1])
			except:
				lenght=-1
			
			if lenght==-1 or (lenght+1)!=path_lenght[j]:
				ess.append(i)
				flag=1
				break
			else:
				count=count+1
		if count==len(path):
			rem.append(i)
		elif flag==1:
			rem.remove(i)
	
	graph.add_nodes_from(protlist)
	graph.remove_nodes_from(rem)
	print nx.info(graph)
	#drawgraph(graph,protlist)	
	return graph
Exemplo n.º 16
0
    def __init__(
        self,
        celltypes_file="nx_celltype_graph.edgelist",
        cells_file="nx_cell_graph.edgelist",
        format="edgelist",
        scale=1.0,
    ):
        """
        celltypes_file -- file containing celltype-celltype connectivity graph
        
        cells_file -- file containing cell-cell connectivity graph

        format -- string representation of file format of the celltypes_file and cells_file.

        """
        self.__celltype_graph = self._read_celltype_graph(celltypes_file, format=format)
        if not self.__celltype_graph:
            self.__celltype_graph = self._make_celltype_graph("connmatrix.txt", "cells.txt", scale=scale)
        self.__cell_graph = self._read_cell_graph(cells_file, format=format)
        if not self.__cell_graph:
            self.__cell_graph = self._make_cell_graph()
        start = datetime.now()
        print nx.info(self.__cell_graph)
        end = datetime.now()
        delta = end - start
        config.BENCHMARK_LOGGER.info("Computed Graph info in: %g" % (delta.seconds + 1e-6 * delta.microseconds))
Exemplo n.º 17
0
 def info(self , verbose = False):
     print "--------------Cloud_Reg_graph info:-----------------"
     print nx.info(self)
     ncloud = 0
     nreg = 0
     for node in self.nodes_iter():
         if isinstance(node, nx.DiGraph): 
             ncloud += 1
             if node.number_of_nodes() == 0:
                 if verbose: print "cloud ::\n empty cloud\n"
                 continue
             if verbose: print "cloud ::"
             for prim in node.nodes_iter():
                 assert isinstance(prim, cc.circut_module), "cloud type %s " % str(prim.__class__)              
                 if verbose: prim.__print__()
         else:
             assert isinstance(node ,cc.circut_module) ,"reg type %s " % str(node.__class__)
             if verbose:
                 print "fd ::"                
                 node.__print__()                
             nreg += 1
     assert  len(self.big_clouds) == ncloud ,"%d %d"%(len(self.big_clouds),ncloud)
     print "Number of cloud:%d " % ncloud
     print "Number of register:%d" % nreg
     print "--------------------------------------"
def add_partitions_to_digraph(graph, partitiondict):
	''' Add the partition numbers to a graph - in this case, using this to update the digraph, with partitions calc'd off the undirected graph. Yes, it's a bad hack.
	'''
	g = graph
	nx.set_node_attributes(g, 'partition', partitiondict)
	nx.info(g)
	return
Exemplo n.º 19
0
def main():
    
    ### Undirected graph ###
    
    # Initialize model using the Petersen graph
    model=gmm.gmm(nx.petersen_graph())
    old_graph=model.get_base()
    model.set_termination(node_ceiling)
    model.set_rule(rand_add)
    
    # Run simualation with tau=4 and Poisson density for motifs
    gmm.algorithms.simulate(model,4)   

    # View results
    new_graph=model.get_base()
    print(nx.info(new_graph))
    
    # Draw graphs
    old_pos=nx.spring_layout(old_graph)
    new_pos=nx.spring_layout(new_graph,iterations=2000)
    fig1=plt.figure(figsize=(15,7))
    fig1.add_subplot(121)
    #fig1.text(0.1,0.9,"Base Graph")
    nx.draw(old_graph,pos=old_pos,node_size=25,with_labels=False)
    fig1.add_subplot(122)
    #fig1.text(0.1,0.45,"Simulation Results")
    nx.draw(new_graph,pos=new_pos,node_size=20,with_labels=False)
    fig1.savefig("undirected_model.png")
    
    ### Directed graph ###
    
    # Initialize model using random directed Barabasi-Albert model
    directed_base=nx.barabasi_albert_graph(25,2).to_directed()
    directed_model=gmm.gmm(directed_base)
    directed_model.set_termination(node_ceiling)
    directed_model.set_rule(rand_add)
    
    # Run simualation with tau=4 and Poisson density for motifs
    gmm.algorithms.simulate(directed_model,4)
    
    # View results
    new_directed=directed_model.get_base()
    print(nx.info(new_directed))
    
    # Draw directed graphs
    old_dir_pos=new_pos=nx.spring_layout(directed_base)
    new_dir_pos=new_pos=nx.spring_layout(new_directed,iterations=2000)
    fig2=plt.figure(figsize=(7,10))
    fig2.add_subplot(211)
    fig2.text(0.1,0.9,"Base Directed Graph")
    nx.draw(directed_base,pos=old_dir_pos,node_size=25,with_labels=False)
    fig2.add_subplot(212)
    fig2.text(0.1,0.45, "Simualtion Results")
    nx.draw(new_directed,pos=new_dir_pos,node_size=20,with_labels=False)
    fig2.savefig("directed_model.png")
    
    # Export files
    nx.write_graphml(model.get_base(), "base_model.graphml")
    nx.write_graphml(directed_model.get_base(), "directed_model.graphml")
    nx.write_graphml(nx.petersen_graph(), "petersen_graph.graphml")
def main111():
  if 1:
    G = nx.read_edgelist(infname)
    print nx.info(G)
    # Graph adj matix
    A = nx.to_scipy_sparse_matrix(G)
    print type(A)
    from scipy import sparse, io
    io.mmwrite("Results/test.mtx", A)
    exit()
    # write to disk clustering coeffs for this graph
    snm.get_clust_coeff([G], 'orig', 'mmonth')
    # write to disk egienvalue
    snm.network_value_distribution([G], [], 'origMmonth')

  if 0:
    edgelist = np.loadtxt(infname, dtype=str, delimiter='\t')
    print edgelist[:4]
    idx = np.arange(len(edgelist))
    np.random.shuffle(idx)
    subsamp_edgelist = edgelist[idx[:100]]
    G = nx.Graph()
    G.add_edges_from([(long(x), long(y)) for x, y in subsamp_edgelist])

  # visualize this graph
  # visualize_graph(G)
  exit()

  G = nx.Graph()
  G.add_edges_from([(long(x), long(y)) for x, y in edgelist])
  print nx.info(G)
  print 'Done'
def draw_citing_users_follower_count():
  df = pd.read_csv('Results/twtrs_follower_network.tsv', sep='\t', header=None)
  df.columns = ['src', 'followers']

  count_followers = lambda row: len(row[1].split(','))
  df['fCnt'] = df.apply(count_followers, axis=1)

  edglstdf = pd.read_csv('Results/clustered_relevant_users.tsv', sep='\t', header=None)
  eldf = edglstdf.apply(lambda row: [x.lstrip('[').rstrip(']') for x in row])
  eldf.columns = ['src','trg']


  eldf[['src']] = eldf[['src']].apply(pd.to_numeric)
  df = pd.merge(eldf,df, on='src')
  df[['src','trg','fCnt']].to_csv('Results/procjson_edglst.tsv', sep='\t', header=False, index=False)

  g=nx.Graph()
  g.add_edges_from(df[['src','trg']].values)
  print nx.info(g)

  f, axs = plt.subplots(1, 1, figsize=(1.6*6., 1*6.))
  # nx.draw_networkx(g, pos=nx.spring_layout(g), ax=axs,  with_labels=False, node_size=df[['fCnt']]/float(len(df)), alpha=.5)
  pos=nx.spring_layout(g)
  # nx.draw_networkx(g, pos=pos, ax=axs, with_labels=False, alpha=.5, node_size=30)
  nx.draw_networkx_edges(g, pos=pos, ax=axs, alpha=0.5, width=0.8)
  nx.draw_networkx_nodes(g, pos=pos, ax=axs, nodelist=list(df['src'].values), node_color='#7A83AC', node_size=30, alpha=0.5)
  nx.draw_networkx_nodes(g, pos=pos, ax=axs, nodelist=list(df['trg'].values), node_color='k', node_size=20, alpha=0.8)

  axs.patch.set_facecolor('None')
  axs.set_xticks([]) #[None]# grid(True, which='both')
  axs.set_yticks([]) #[None]# grid(True, which='both')
  plt.savefig('figures/outfig', bbox_inches='tight', pad_inches=0)

  return
def phone_or_postid_pruning_network_construction(edge_list=
                path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list',
            edge_phone_count=path+'connected-component-analysis/network-profiling-data/cid6_analysis/edge-count-phone.jl',
            phone_edge_list=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list-tagged-phone'):
    G = nx.read_edgelist(edge_list, delimiter='\t')
    print nx.info(G)
    threshold = 50
    count = 0
    forbidden_phones = set()
    with codecs.open(edge_phone_count, 'r', 'utf-8') as f:
        for line in f:
            obj = json.loads(line[0:-1])
            if int(obj.keys()[0]) >= threshold:
                forbidden_phones = forbidden_phones.union(set(obj[obj.keys()[0]]))
    with codecs.open(phone_edge_list, 'r', 'utf-8') as f:
        for line in f:
            fields = re.split('\t', line[0:-1])
            phones = set(fields[2:])
            if len(phones.intersection(forbidden_phones)) != 0:
                count += 1
                G.remove_edge(fields[0], fields[1])
    print str(count),' edges pruned from graph'
    ccs = sorted(nx.connected_components(G), key=len, reverse=True)
    print len(ccs)
    print len(ccs[0])
Exemplo n.º 23
0
def simplify_edges(G):
    nodes = []
    print "Compacting nodes of degree 2"

    for n in G.nodes():
        if G.degree(n) == 2:
            nodes.append(n)
        G.node[n]['pos'] = n

    nodes = list(set(nodes))

    print "Simplifying an estimated %i nodes...."%len(nodes)
    while nodes:
        while nodes:
            nodes = list(set(nodes))
            n = nodes.pop()
            neighbors = G.neighbors(n)
            G.remove_node(n)
            G.add_path(neighbors)
            for nn in neighbors:
                if G.degree(n) == 2:
                    nodes.append(nn)

        for n in G.nodes():
            if G.degree(n) == 2:
                nodes.append(n)
        nodes = list(set(nodes))

    G = max(nx.connected_component_subgraphs(G), key=len)
    print nx.info(G)
    #return G
    for n in G.nodes():
        G.node[n]['pos'] = n
Exemplo n.º 24
0
def Gephi_Graph(r_serv, graphpath, mincard, maxcard, insert_type):
    """Create Gephi Graph by calling a "Sub function": Create_Graph

    :param r_serv: -- connexion to redis database
    :param graphpath: -- the absolute path of the .gephi graph created.
    :param mincard: -- the minimum links between 2 nodes to be created
    :param maxcard: -- the maximum links between 2 nodes to be created
    :param insert_type: -- the type of datastructure used to create the graph.

    In fact this function is juste here to be able to choose between two kind of
    Redis database structure: One which is a Sorted set and the other a simple
    set.

    """
    g = nx.Graph()

    if (insert_type == 0):

        for h in r_serv.smembers("hash"):
            Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)

    elif (insert_type == 2):

        for h in r_serv.zrange("hash", 0, -1):
            Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)

    nx.write_gexf(g,graphpath)
    print nx.info(g)
def cid6_phone_edges_not_in_postid(tagged_phone_edge_list=path + 'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list-tagged-phone',
        tagged_postid_edge_list=path + 'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list-tagged-postid',
        out_file=path + 'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list-tagged-phone-minus-postid'
):

    postid_edges = set()
    G = nx.Graph()
    with codecs.open(tagged_postid_edge_list, 'r', 'utf-8') as f:
        for line in f:
            fields = re.split('\t', line[0:-1])
            postid_edges.add(tuple(sorted([fields[0], fields[1]])))
    # ccs = sorted(nx.connected_components(G), key=len, reverse=True)
    # print len(ccs)
    # print len(ccs[0])
    # print nx.info(G)
    out = codecs.open(out_file, 'w', 'utf-8')
    with codecs.open(tagged_phone_edge_list, 'r', 'utf-8') as f:
        for line in f:
            fields = re.split('\t', line[0:-1])
            edge = sorted([fields[0], fields[1]])
            if tuple(edge) not in postid_edges:
                out.write(line)
                G.add_edge(fields[0], fields[1])

    out.close()
    print nx.info(G)
    ccs = sorted(nx.connected_components(G), key=len, reverse=True)
    print len(ccs)
    print len(ccs[0])
Exemplo n.º 26
0
def get_k_core(reviews_path,k_val):
	# Report start of process
	print "=================================="
	print "EXTRACTING K-CORE OF PID GRAPH    "
	print "=================================="

	print "AT STEP #1: Determine which reviewer reviewed which products"
#	with ufora.remotely.downloadAll():
	(PID_to_lines,PID_to_reviewerID) = get_PID_facts(reviews_path)	

	print "At STEP #2: Created weighted edges"
#	with ufora.remotely.downloadAll():
	weighted_edges = get_weighted_edges(PID_to_reviewerID)

	print "AT STEP #3: Create PID graph structure"
#	with ufora.remotely.downloadAll():
	PID_graph = create_graph(PID_to_reviewerID,weighted_edges)	
	print nx.info(PID_graph)	

	print "AT STEP #4: Extracting K-core"
#	with ufora.remotely.downloadAll():
	k_core_graph = nx.k_core(PID_graph,k_val)
	print nx.info(k_core_graph)
	pickle.dump(graph,open("graph",'w'))
	
	print "DONE!"
Exemplo n.º 27
0
def test_traversals():

    name = 'ca-CondMat.edges.gz'
    G = nx.read_edgelist(name, comments='#', create_using=nx.Graph(), nodetype=int, data=False, edgetype=None)
    G.name = name
    print nx.info(G)
    
    N = G.number_of_nodes()
    f = 0.1
    
    pk_real = degrees2pk([G.degree(v) for v in G])   # real node degree distribution
    qk_expected = qk_f(pk_real, f)         # expected node degree distribution in a BFS sample of fraction f 
    
    start_node =  random.choice(G.nodes())
    S = bfs(G, start_node, int(N*f))
    qk_sampled = degrees2pk([G.degree(v) for v in S])
    pk_corrected = estimate_pk(qk_sampled, f)
    
    print """
%2.1f  - mean degree in raw BFS, actually sampled
%2.1f  - mean degree in raw BFS, expected by the RG(pk) model.
----
%2.1f  - mean degree in G, calculated from the BFS sample according to the RG(pk) model.
%2.1f  - real mean degree in G
""" % (mean_degree(qk_sampled), mean_degree(qk_expected), mean_degree(pk_corrected),  mean_degree(pk_real))
Exemplo n.º 28
0
def main():
    # Load Zachary data, randomly delete nodes, and report
    zachary=nx.Graph(nx.read_pajek("karate.net")) # Do not want graph in default MultiGraph format
    zachary.name="Original Zachary Data"
    print(nx.info(zachary))
    zachary_subset=rand_delete(zachary, 15) # Remove half of the structure
    zachary_subset.name="Randomly Deleted Zachary Data"
    print(nx.info(zachary_subset))
    
    # Create model, and simulate
    zachary_model=gmm.gmm(zachary_subset,R=karate_rule,T=node_ceiling_34)
    gmm.algorithms.simulate(zachary_model,4,poisson=False,new_name="Simulation from sample")  # Use tau=4 because data is so small (it's fun!)
    
    # Report and visualize
    print(nx.info(zachary_model.get_base()))
    fig=plt.figure(figsize=(30,10))
    fig.add_subplot(131)
    nx.draw_spring(zachary,with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Original Karate Club",color="darkblue",size=20)
    fig.add_subplot(132)
    nx.draw_spring(zachary_subset,with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Random sample of Karate Club",color="darkblue",size=20)
    fig.add_subplot(133)
    nx.draw_spring(zachary_model.get_base(),with_labels=False,node_size=45,iterations=5000)
    plt.text(0.01,-0.1,"Simulation from random sample",color="darkblue",size=20)
    plt.savefig("zachary_simulation.png")
Exemplo n.º 29
0
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	outputpath = 'network_output/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	#basci info
	print nx.info(G),'\n', nx.info(GU) 
	average_degree = float(sum(nx.degree(G).values()))/len(G.nodes())
	print 'average degree :', average_degree 
	degree_histogram = nx.degree_histogram(G)
	print 'degree histogram max :', degree_histogram[1]
	desity = nx.density(G)
	print 'desity :', desity

	#Approximation
	#Centrality
	degree_centrality = nx.degree_centrality(G)
	print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2]
	out_degree_centrality = nx.out_degree_centrality(G)
	print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
Exemplo n.º 30
0
def kinetic(fileName='P2_1_9_p07', M=None, N=None, axis=None):
	#FILE  = "/home/xingzhong/MicrosoftGestureDataset-RC/data/%s"%fileName
	FILE  = "/Users/xingzhong/Downloads/MicrosoftGestureDataset-RC/data/%s"%fileName
	truth = np.genfromtxt(FILE+'.tagstream', delimiter=';', skiprows=1, dtype=None, converters={0: lambda x: (int(x) *1000 + 49875/2)/49875})
	nd = np.loadtxt(FILE+'.csv')
	nd = nd[np.where(nd[:,80]!=0)]# remove empty rows
	idx, ndd = map(int, nd[:,0]), nd[:, 1:] # unpack index and data
	m, n = ndd.shape
	panel = pd.Panel( ndd.reshape((m, 20, 4)), items=idx, major_axis=AGENTS, minor_axis=['x','y','z','v'] ).transpose(2, 0, 1)
	panel['dx'] = 1000* panel['x'].diff().fillna(0)
	panel['dy'] = 1000* panel['y'].diff().fillna(0)
	panel['dz'] = 1000* panel['z'].diff().fillna(0)
	panel = panel.transpose(2, 1, 0)
	samples =  [s for s in instance_generator(panel, truth)] 
	g = EventGraph()
	X = [np.array([0])]
	for aid, seq in enumerate (samples[0]):
		if M is not None and aid > M :
			break
		for t, atom in enumerate (seq):
			if N is not None and t > N:
				break
			elif not atom is None and t!=0:
				if axis:
					g.addEvent( Event(t, aid, atom ))
					X.append(atom)
				else:
					g.addEvent( Event(t, aid, np.array([atom[axis]]) ))
					X.append( np.array([atom[axis]]) )

	g.buildEdges(delta = 1)
	print nx.info(g)
	return g, X
Exemplo n.º 31
0
    def query(self,
              topic,
              max_depth=4,
              config=None,
              pivot_on=list(),
              dont_pivot_on=list(['enrichment', 'classification']),
              direction='successors'):
        """

            :param topic: a  graph to return the context of.  At least one node ID in topic \
             must be in full graph g to return any context.
            :param max_depth: The maximum distance from the topic to search
            :param config: The titanDB configuration to use if not using the one configured with the plugin
            :param pivot_on: A list of attribute types to pivot on.
            :param dont_pivot_on: A list of attribute types to not pivot on.
            :param direction: The direction to transverse the graph
            :return: subgraph in networkx format

            NOTE: If an attribute is in both pivot_on and dont_pivot_on it will not be pivoted on
        """
        if config is None:
            config = self.titandb_config

        # Connect to TitanDB Database
        titan_graph = TITAN_Graph(config)

        # Convert the topic nodes into titanDB eids
        current_nodes = set()
        eid_uri_map = {}
        # Validate the node URI
        for node in topic.nodes():
            titan_node = titan_graph.vertices.index.get_unique(
                "uri", topic.node[node]["uri"])
            if titan_node:
                current_nodes.add(titan_node.eid)
                eid_uri_map[titan_node.eid] = node
        topic_nodes = frozenset(current_nodes)
        subgraph_nodes = current_nodes
        #sg = copy.deepcopy(topic)
        sg = nx.MultiDiGraph()
        sg.add_nodes_from(topic.nodes(data=True))
        sg.add_edges_from(topic.edges(data=True))
        distances = {node: 0 for node in topic.nodes()}
        #    Below 1 line is probably not necessary
        #    pivot_edges = list()
        #    print "Initial current Nodes: {0}".format(current_nodes)  # DEBUG
        for i in range(1, max_depth + 1):
            new_nodes = set()
            new_out_edges = set()
            new_in_edges = set()
            for eid in current_nodes:
                #            properties = og.node[node]
                node = titan_graph.vertices.get(eid)
                # If all directions, get all neighbors
                if direction == 'all' or eid in topic_nodes:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # If there is a list of things to NOT pivot on, pivot on everything else
                elif dont_pivot_on and 'attribute' in node and node.map(
                )['attribute'] not in dont_pivot_on:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # Otherwise, only get all neighbors if the node is to be pivoted on.
                elif 'attribute' in node and \
                      node['attribute'] in pivot_on and \
                      node['attribute'] not in dont_pivot_on:
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # If not all neighbors and not in pivot, if we are transversing up, get predecessors
                elif direction == 'predecessors':
                    # add edges to make predecessors successors for later probability calculation
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).out".format(eid))
                        })
                    except:
                        pass
                    # add the reverse edges. These opposite of these edges will get placed in the subgraph
                    try:
                        new_in_edges = new_in_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).inE".format(eid))
                        })
                    except:
                        pass
                # Otherwise assume we are transversing down and get all successors
                else:  # default to successors
                    try:
                        new_nodes = new_nodes.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).both".format(eid))
                        })
                    except:
                        pass
                    try:
                        new_out_edges = new_out_edges.union({
                            n.eid
                            for n in titan_graph.gremlin.query(
                                "g.v({0}).outE".format(eid))
                        })
                    except:
                        pass

            # Remove nodes from new_nodes that are already in the subgraph so we don't overwrite their topic distance
            current_nodes = new_nodes - subgraph_nodes
            # combine the new nodes into the subgraph nodes set
            subgraph_nodes = subgraph_nodes.union(current_nodes)

            # Copy nodes, out-edges, in-edges, and reverse in-edges into subgraph
            # Add nodes
            for neighbor_eid in new_nodes:
                attr = titan_graph.vertices.get(neighbor_eid).map()
                sg.add_node(attr['uri'], attr)
                eid_uri_map[neighbor_eid] = attr['uri']
            # Add predecessor edges
            for out_eid in new_out_edges:
                out_edge = titan_graph.edges.get(out_eid)
                attr = out_edge.map()
                sg.add_edge(eid_uri_map[out_edge._outV],
                            eid_uri_map[out_edge._inV], out_eid, attr)
            # Add successor edges & reverse pivot edges
            for in_eid in new_in_edges:
                in_edge = titan_graph.edges.get(in_eid)
                attr = in_edge.map()
                attr['origin'] = "subgraph_creation_pivot"
                sg.add_edge(eid_uri_map[in_edge._inV],
                            eid_uri_map[in_edge._outV], in_eid, attr)

            # Set the distance from the topic on the nodes in the graph
            for eid in current_nodes:
                if eid_uri_map[eid] not in distances:
                    distances[eid_uri_map[eid]] = i

    #        logging.debug("Current nodes: {0}".format(current_nodes))  # DEBUG

    # add the distances to the subgraph
        nx.set_node_attributes(sg, "topic_distance", distances)

        logging.debug(nx.info(sg))  # DEBUG
        # Return the subgraph
        return sg
Exemplo n.º 32
0
import csv
import networkx as nx
from operator import itemgetter
import community

# Read in the nodelist file
with open('edge_graph.csv', 'r') as nodecsv:
    nodereader = csv.reader(nodecsv)
    nodes = [n for n in nodereader][1:]

# Get a list of just the node names (the first item in each row)
node_names = [n[0] for n in nodes]

# Read in the edgelist file
with open('edge_graph.csv', 'r') as edgecsv:
    edgereader = csv.reader(edgecsv)
    edges = [tuple(e) for e in edgereader][1:]

# Print the number of nodes and edges in our two lists
print(len(node_names))
print(len(edges))

G = nx.Graph()  # Initialize a Graph object
G.add_nodes_from(node_names)  # Add nodes to the Graph
G.add_edges_from(edges)  # Add edges to the Graph
print(nx.info(G))  # Print information about the Graph
print(G)
nx.draw(G)
Exemplo n.º 33
0
    def load_network(self):
        # read METIS file
        self.G = utils.read_metis(self.DATA_FILENAME)
        self.initial_number_of_nodes = self.G.number_of_nodes(
        )  # used for computing metrics

        # Alpha value used in prediction model
        self.prediction_model_alpha = self.G.number_of_edges() * (
            self.num_partitions / self.G.number_of_nodes()**2)

        if self.use_one_shot_alpha:
            self.prediction_model_alpha = self.one_shot_alpha

        # Order of nodes arriving
        self.arrival_order = list(range(0, self.G.number_of_nodes()))

        if self.SIMULATED_ARRIVAL_FILE == "":
            # mark all nodes as needing a shelter
            self.simulated_arrival_list = [1] * self.G.number_of_nodes()
        else:
            with open(self.SIMULATED_ARRIVAL_FILE, "r") as ar:
                self.simulated_arrival_list = [
                    int(line.rstrip('\n')) for line in ar
                ]

        # count the number of people arriving in the simulation
        self.number_simulated_arrivals = 0
        for arrival in self.simulated_arrival_list:
            self.number_simulated_arrivals += arrival

        if self.verbose > 0:
            print("Graph loaded...")
            print(nx.info(self.G))
            if nx.is_directed(self.G):
                print("Graph is directed")
            else:
                print("Graph is undirected")

        self.reset()

        # load displacement prediction weights
        if (self.PREDICTION_LIST_FILE == ""):
            self.predicted_displacement_weights = [
                1
            ] * self.G.number_of_nodes()
        else:
            with open(self.PREDICTION_LIST_FILE, 'r') as plf:
                self.predicted_displacement_weights = [
                    float(line.rstrip('\n')) for line in plf
                ]

        # preserve original node/edge weight when modification functions are applied
        if self.graph_modification_functions:
            node_weights = {
                n[0]: n[1]['weight']
                for n in self.G.nodes_iter(data=True)
            }
            nx.set_node_attributes(self.G, 'weight_orig', node_weights)

            edge_weights = {(e[0], e[1]): e[2]['weight']
                            for e in self.G.edges_iter(data=True)}
            nx.set_edge_attributes(self.G, 'weight_orig', edge_weights)

        # make a copy of the original graph since we may alter the graph later on
        # and may want to refer to information from the original graph
        self.originalG = self.G.copy()
Exemplo n.º 34
0
def label_prop():
    G = nx.read_edgelist("facebook_combined.txt",
                         create_using=nx.Graph(),
                         nodetype=int)
    print nx.info(G)

    for i in G.nodes():
        G.node[i]['label'] = i
        G.node[i]['ID'] = i
        G.node[i]['l_1'] = 0
        G.node[i]['l_2'] = 0
        G.node[i]['l_next'] = 0
    '''
    for n,nbrs in G.adjacency_iter():
        for nbr,edict in nbrs.items():
            if nbr==200:
                print n, nbrs, G.node[nbr]['label']
    '''
    mainStop = False
    i = 0
    while (i < 100):
        if i == 99:
            set_communities = set()
            for n in G.nodes():
                set_communities.add(G.node[n]['label'])
            print "the number of communities after 100 iterations==", len(
                set_communities)

        i += 1
        mainStop = False
        l1_stop = True
        l2_stop = True
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_1'])):
                l1_stop = False
        for n in G.nodes():
            if (not (G.node[n]['label'] == G.node[n]['l_2'])):
                l2_stop = False

        #print l1_stop, l2_stop
        if (not (l1_stop or l2_stop)):
            #print "in not loop"
            for n, nbrs in G.adjacency_iter():
                dict = {}
                dict.clear()
                for nbr, d in nbrs.items():
                    temp = G.node[nbr]['label']
                    if not dict.has_key(temp):
                        dict = {temp: 1}
                    else:
                        dict[temp] += 1
                max_key = 0
                max_key = max(dict, key=dict.get)
                G.node[n]['l_next'] = max_key
                G.node[n]['l_2'] = G.node[n]['l_1']
                G.node[n]['l_1'] = G.node[n]['label']
                G.node[n]['label'] = max_key
            '''
            for n in G.nodes():
                G.node[n]['l_2']=G.node[n]['l_1']
                G.node[n]['l_1']=G.node[n]['label']
                G.node[n]['label']=G.node[n]['l_next']
            '''

        else:
            print "The Community converges"
            mainStop = True

            print i
            return i
Exemplo n.º 35
0
def main(adminIsPoint=False):
    path = os.path.realpath(
        os.path.abspath(
            os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    path = os.path.split(path)[0]
    dash = os.path.join(path, r'dashboard.xlsm')
    ctrl = pd.read_excel(dash, sheetname="AGGREGATE", index_col=0)
    district = ctrl['Weight'].loc['DISTRICT']

    logging.basicConfig(filename=os.path.join(path, 'runtime', district,
                                              "PCS_Criticality_log.log"),
                        level=logging.INFO,
                        format="%(asctime)s-%(levelname)s: %(message)s")
    logging.info("Starting Criticality Process")
    print "Running: Criticality Analysis on %s. Do not interrupt" % district
    # Path Settings
    outpath = os.path.join(path, 'Outputs', '%s' % district)
    runtime = os.path.join(path, r'PCS\Criticality\runtime\%s\\' % district)
    for d in [outpath, runtime]:
        if not os.path.isdir(d):
            os.mkdir(d)
    NETWORK_IN = os.path.join(path, r'runtime\%s\\' % district)
    OD_IN = os.path.join(path, 'PCS\Criticality\input', '%s' % district)
    DATA_IN = os.path.join(path, 'PCS\Criticality\Vietnam_Data_Layers')
    inAdmin = os.path.join(DATA_IN, 'Poverty_Communes_2009.shp')
    inNetworkFile = os.path.join(NETWORK_IN, 'Network.csv')

    crs_in = {'init': 'epsg:4326'}  #WGS 84

    #Create folders for analysis
    for d in [outpath, runtime, OD_IN]:
        if not os.path.isdir(d):
            os.mkdir(d)
    #Error checking - Check input data
    for curFile in [dash, inNetworkFile, inAdmin, DATA_IN, OD_IN, NETWORK_IN]:
        if not os.path.exists(curFile):
            logging.error("No input found: %s" % curFile)
            raise ValueError("No input found: %s" % curFile)

    inNetwork = pd.read_csv(inNetworkFile)
    ctrldf = pd.read_excel(dash, sheetname="CRITICALITY", index_col='COL_ID')
    #Inputs
    network = os.path.join(runtime, 'Network.shp')

    #Network Prep
    fillvalue = inNetwork['iri_med'].mean()
    inNetwork['TC_iri_med'] = inNetwork['iri_med'].fillna(fillvalue)
    inNetwork['total_cost'] = inNetwork['length'] * (
        ctrldf['Base_cost_km'][0] +
        (ctrldf['IRI_Coeff'][0] * inNetwork['TC_iri_med']))
    ginNetwork = gpd.GeoDataFrame(inNetwork,
                                  crs=crs_in,
                                  geometry=inNetwork['Line_Geometry'].map(
                                      shapely.wkt.loads))
    ginNetwork.to_file(network, driver='ESRI Shapefile')
    logging.info("Successfully loaded data")
    if not adminIsPoint:
        prepareAdminCentroids(ginNetwork, inAdmin, crs_in,
                              os.path.join(OD_IN, 'adm_centroids.shp'))
        logging.info("Created admin centroids")

    def makeOrigin(n, ctrldf):
        origindict = {
            'name':
            ctrldf['OName'][n],
            'file':
            os.path.join(path, 'PCS', 'Criticality', 'input', district,
                         '%s.shp' % ctrldf['OName'][n]),
            'scalar_column':
            ctrldf['OScalar'][n]
        }
        return origindict

    def makeDestination(n, ctrldf):
        destdict = {
            'name':
            ctrldf['DName'][n],
            'file':
            os.path.join(path, 'PCS', 'Criticality', 'input', district,
                         '%s.shp' % ctrldf['DName'][n]),
            'penalty':
            ctrldf['DPenalty'][n],
            'importance':
            ctrldf['DImportance'][n],
            'annual':
            ctrldf['DAnnual'][n],
            'scalar_column':
            ctrldf['DScalar'][n]
        }
        return destdict

    origin_1, origin_2, origin_3, origin_4, origin_5 = makeOrigin(
        0, ctrldf), makeOrigin(1, ctrldf), makeOrigin(2, ctrldf), makeOrigin(
            3, ctrldf), makeOrigin(4, ctrldf)
    originlist = {
        '%s' % ctrldf['OName'][0]: origin_1,
        '%s' % ctrldf['OName'][1]: origin_2,
        '%s' % ctrldf['OName'][2]: origin_3,
        '%s' % ctrldf['OName'][3]: origin_4,
        '%s' % ctrldf['OName'][4]: origin_5,
    }
    destination_1, destination_2, destination_3, destination_4, destination_5 = makeDestination(
        0, ctrldf), makeDestination(1, ctrldf), makeDestination(
            2, ctrldf), makeDestination(3, ctrldf), makeDestination(4, ctrldf)
    destinationlist = {
        '%s' % ctrldf['DName'][0]: destination_1,
        '%s' % ctrldf['DName'][1]: destination_2,
        '%s' % ctrldf['DName'][2]: destination_3,
        '%s' % ctrldf['DName'][3]: destination_4,
        '%s' % ctrldf['DName'][4]: destination_5,
    }
    logging.debug("Opened origins and destinations")
    # Prepation of network
    gdf_points, gdf_node_pos, gdf = net_p.prepare_centroids_network(
        origin_1['file'], network)
    # Create Networkx MultiGraph object from the GeoDataFrame
    G = net_p.gdf_to_simplified_multidigraph(gdf_node_pos, gdf, simplify=False)
    # Change the MultiGraph object to Graph object to reduce computation cost
    G_tograph = net_p.multigraph_to_graph(G)
    logging.debug(
        'Loaded road network: number of disconnected components is: %d' %
        nx.number_connected_components(G_tograph))
    # Observe the properties of the Graph object
    nx.info(G_tograph)
    # Take only the largest subgraph with all connected links
    len_old = 0
    for g in nx.connected_component_subgraphs(G_tograph):
        if len(list(g.edges())) > len_old:
            G1 = g
            len_old = len(list(g.edges()))
    G_sub = G1.copy()

    nx.info(G_sub)

    # Save the simplified transport network into a GeoDataFrame
    gdf_sub = net_p.graph_to_df(G_sub)
    blank, gdf_node_pos2, gdf_new = net_p.prepare_newOD(
        origin_1['file'], gdf_sub)

    #Road Network Graph prep
    G2_multi = net_p.gdf_to_simplified_multidigraph(gdf_node_pos2,
                                                    gdf_new,
                                                    simplify=False)
    Filedump(gdf_new, 'Road_Lines', runtime)
    Filedump(gdf_node_pos2, 'Road_Nodes', runtime)
    G2 = net_p.multigraph_to_graph(G2_multi)
    gdf2 = net_p.graph_to_df(G2)
    nLink = len(G2.edges())

    Outputs, cost_list, iso_list = [], [], []

    for z in ctrldf.index:
        if (((ctrldf['ComboO'][z]) != 0) & ((ctrldf['ComboD'][z]) != 0) &
            (pd.notnull(ctrldf['ComboO'][z])) &
            (pd.notnull(ctrldf['ComboO'][z]))):
            Q = int(ctrldf['ComboNumber'][z])
            logging.info(
                'Computing | combination %s as origin and %s as destination ' %
                (ctrldf['ComboO'][z], ctrldf['ComboD'][z]))
            xx = calculateOD(originlist['%s' % ctrldf['ComboO'][z]],
                             destinationlist['%s' % ctrldf['ComboD'][z]], Q,
                             gdf_sub, G2, nLink, gdf2, runtime, ctrldf)
            Outputs.append(xx)
            cost_list.append("Social_Cost_%s" % Q)
            iso_list.append("Isolated_Trips_%s" % Q)

    Output = inNetwork.drop(["geometry", 'TC_iri_med', 'total_cost'], axis=1)
    for o_d_calc in range(0, len(Outputs)):
        Output = Output.merge(Outputs[o_d_calc]['summary'],
                              how='left',
                              on='ID')

    Output['Cost_total'] = Output[cost_list].sum(axis=1)
    Output['Iso_total'] = Output[iso_list].sum(axis=1)
    Output['CRIT_SCORE'] = (
        ctrldf['Disrupt_Weight'][0] * Output['Cost_total'] +
        ctrldf['Isolate_Weight'][0] * Output['Iso_total'])
    Output['CRIT_SCORE'] = (
        (Output['CRIT_SCORE'] - Output['CRIT_SCORE'].min()) /
        (Output['CRIT_SCORE'].max() - Output['CRIT_SCORE'].min()))
    logging.info("Calculated PCS Criticality")
    FileOut(Output, 'criticality_output', outpath)
Exemplo n.º 36
0
pSelection = 0.5
# When do add-ons start
groupPercent = 0.5
#Intervals
intervalPrimary = 10
intervalSecondary = 10
#Percentage to decide about Hubs
percentageP = 0.07
percentageS = 0.15
#Get Node Lists
nodeList = readNodes(nodeList, fileName)
#Create the graph
generateHInGBg(G_hybrid_btG, nodeList, kAverage, pSelection, groupPercent,
               intervalPrimary, intervalSecondary, percentageP, percentageS)

print nx.info(G_hybrid_btG)

#Hubs
percentage = 0.07
print getHubsAndDegrees(G_hybrid_btG, percentage)
print getHubs(G_hybrid_btG, percentage)

#Degree
PlotDegreeDistribution(G_hybrid_btG, loglogplot=False)
print 'Average Network Degree', np.average(G_hybrid_btG.degree().values())

#Clusters
PlotClusteringDistribution(G_hybrid_btG, loglogplot=False)
print nx.average_clustering(G_hybrid_btG)
print nx.clustering(G_hybrid_btG)
Exemplo n.º 37
0
def get_basic_info(graph_dict_list):
    for item in graph_dict_list:
        print(item["name"])
        print(nx.info(item["graph"]))
Exemplo n.º 38
0
def n2v_embedding(G,
                  targets,
                  verbose=False,
                  sample_size=0.5,
                  outfile_name="test.emb",
                  p=-100,
                  q=-100,
                  binary_path="./node2vec",
                  parameter_range=[0.25, 0.50, 1, 2, 4],
                  embedding_dimension=128):

    ## construct the embedding and return the binary..
    #./node2vec -i:graph/karate.edgelist -o:emb/karate.emb -l:3 -d:24 -p:0.3 -dr -v

    clf = OneVsRestClassifier(linear_model.LogisticRegression(),
                              n_jobs=mp.cpu_count())
    if verbose:
        print(nx.info(G))

    N = len(G.nodes())

    ## get the graph..
    if not os.path.exists("tmp"):
        os.makedirs("tmp")

    tmp_graph = "tmp/tmpgraph.edges"
    out_graph = "tmp/tmpgraph.emb"

    number_of_nodes = len(G.nodes())
    number_of_edges = len(G.edges())

    if verbose:
        print("Graph has {} edges and {} nodes.".format(
            number_of_edges, number_of_nodes))

    f = open(tmp_graph, "w+")

    #f.write(str(number_of_nodes)+" "+str(number_of_edges)+"\n")
    for e in G.edges(data=True):
        f.write(
            str(e[0]) + " " + str(e[1]) + " " + str(float(e[2]['weight'])) +
            "\n")
    f.close()

    if verbose:
        print("N2V training phase..")

    vals = parameter_range
    copt = 0
    cset = [0, 0]
    dim = embedding_dimension

    if float(p) > -100 and float(q) > -100:
        print("Runing specific config of N2V.")
        call_node2vec_binary(tmp_graph,
                             outfile_name,
                             p=p,
                             q=q,
                             directed=False,
                             weighted=True)

    else:

        ## commence the grid search
        for x in vals:
            for y in vals:
                call_node2vec_binary(tmp_graph,
                                     outfile_name,
                                     p=x,
                                     q=y,
                                     directed=False,
                                     weighted=True,
                                     binary=binary_path)
                print("parsing {}".format(outfile_name))
                rdict = benchmark_node_classification(
                    outfile_name, graph, targets, percent=float(sample_size))

                mi, ma, misd, masd = rdict[float(sample_size)]
                if ma > copt:
                    if verbose:
                        print("Updating the parameters: {} {}".format(
                            ma, cset))

                    cset = [x, y]
                    copt = ma
                else:
                    print("Current optimum {}".format(ma))

                call(["rm", "-rf",
                      outfile_name])  ## when updatedin delete the file

        print("Final iteration phase..")

        call_node2vec_binary(tmp_graph,
                             outfile_name,
                             p=cset[0],
                             q=cset[1],
                             directed=False,
                             weighted=True,
                             binary="./node2vec")

        with open(outfile_name, 'r') as f:
            fl = f.readline()
            print("Resulting dimensions:{}".format(fl))

        call(["rm", "-rf", "tmp"])
Exemplo n.º 39
0
def topographic_metrics(wn):
    # Get a copy of the graph
    G = wn.get_graph()

    # Print general topographic information
    print(nx.info(G))

    # Plot node and edge attributes.
    junction_attr = wn.query_node_attribute('elevation',
                                            node_type=wntr.network.Junction)
    pipe_attr = wn.query_link_attribute('length', link_type=wntr.network.Pipe)
    wntr.graphics.plot_network(wn,
                               node_attribute=junction_attr,
                               link_attribute=pipe_attr,
                               title='Node elevation and pipe length',
                               node_size=40,
                               link_width=2)

    # Compute link density
    print("Link density: " + str(nx.density(G)))

    # Compute node degree
    node_degree = dict(G.degree())
    wntr.graphics.plot_network(wn,
                               node_attribute=node_degree,
                               title='Node Degree',
                               node_size=40,
                               node_range=[1, 5])

    # Compute number of terminal nodes
    terminal_nodes = G.terminal_nodes()
    wntr.graphics.plot_network(wn,
                               node_attribute=terminal_nodes,
                               title='Terminal nodes',
                               node_size=40,
                               node_range=[0, 1])
    print("Number of terminal nodes: " + str(len(terminal_nodes)))
    print("   " + str(terminal_nodes))

    # Compute pipes with diameter > threshold
    diameter = 0.508  # m (20 inches)
    pipes = wn.query_link_attribute('diameter', np.greater, diameter)
    wntr.graphics.plot_network(wn,
                               link_attribute=list(pipes.keys()),
                               title='Pipes > 20 inches',
                               link_width=2,
                               link_range=[0, 1])
    print("Number of pipes > 20 inches: " + str(len(pipes)))
    print("   " + str(pipes))

    # Compute nodes with elevation <= treshold
    elevation = 1.524  # m (5 feet)
    nodes = wn.query_node_attribute('elevation', np.less_equal, elevation)
    wntr.graphics.plot_network(wn,
                               node_attribute=list(nodes.keys()),
                               title='Nodes <= 5 ft elevation',
                               node_size=40,
                               node_range=[0, 1])
    print("Number of nodes <= 5 ft elevation: " + str(len(nodes)))
    print("   " + str(nodes))

    # Compute eccentricity, diameter, and average shortest path length
    # These all use an undirected graph
    uG = G.to_undirected()  # undirected graph
    if nx.is_connected(uG):
        ecc = nx.eccentricity(uG)
        wntr.graphics.plot_network(wn,
                                   node_attribute=ecc,
                                   title='Eccentricity',
                                   node_size=40,
                                   node_range=[15, 30])

        print("Diameter: " + str(nx.diameter(uG)))

        ASPL = nx.average_shortest_path_length(uG)
        print("Average shortest path length: " + str(ASPL))

    # Compute cluster coefficient
    clust_coefficients = nx.clustering(nx.Graph(G))
    wntr.graphics.plot_network(wn,
                               node_attribute=clust_coefficients,
                               title='Clustering Coefficient',
                               node_size=40)

    # Compute betweenness centrality
    bet_cen = nx.betweenness_centrality(G)
    wntr.graphics.plot_network(wn,
                               node_attribute=bet_cen,
                               title='Betweenness Centrality',
                               node_size=40,
                               node_range=[0, 0.4])
    central_pt_dom = G.central_point_dominance()
    print("Central point dominance: " + str(central_pt_dom))

    # Compute articulation points
    Nap = list(nx.articulation_points(uG))
    Nap = list(set(Nap))  # get the unique nodes in Nap
    Nap_density = float(len(Nap)) / uG.number_of_nodes()
    print("Density of articulation points: " + str(Nap_density))
    wntr.graphics.plot_network(wn,
                               node_attribute=Nap,
                               title='Articulation Point',
                               node_size=40,
                               node_range=[0, 1])

    # Compute bridges
    bridges = G.bridges()
    wntr.graphics.plot_network(wn,
                               link_attribute=bridges,
                               title='Bridges',
                               link_width=2,
                               link_range=[0, 1])
    Nbr_density = float(len(bridges)) / G.number_of_edges()
    print("Density of bridges: " + str(Nbr_density))

    # Compute spectal gap
    spectral_gap = G.spectral_gap()
    print("Spectal gap: " + str(spectral_gap))

    # Compute algebraic connectivity
    alg_con = G.algebraic_connectivity()
    print("Algebraic connectivity: " + str(alg_con))

    # Critical ratio of defragmentation
    fc = G.critical_ratio_defrag()
    print("Critical ratio of defragmentation: " + str(fc))

    # Compute closeness centrality
    clo_cen = nx.closeness_centrality(G)
    wntr.graphics.plot_network(wn,
                               node_attribute=clo_cen,
                               title='Closeness Centrality',
                               node_size=40)
Exemplo n.º 40
0
import networkx as nx

g = nx.read_gpickle("data/github.gpickle.1")
print(nx.info(g))
Exemplo n.º 41
0
edgelist_format = []
for index, row in edgelist.iterrows():  # to get row indices of the data frame
    inside = []
    inside.append(edgelist.at[index, 'V1'])
    inside.append(edgelist.at[index, 'V2'])
    edgelist_format.append(inside)  # appending pairs to main list iteratively
edgelist_format[:10]
# inside only gets stored for the most recent iteration

len(edgelist_format)
# length is corect and last entry is correct - verified with data frame/csv file

# Adding edges to the graph
G.add_edges_from(edgelist_format)
nx.info(
    G
)  # getting the correct number of edges in the graph object (number of nodes also coresponds to the max value
# from the csv file)

# degree sequence sorted in descending order
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
degree_sequence[:50]

# Sampling 1000 nodes from the degree sequence
degree_sequence_sample = random.sample(degree_sequence, 1000)
degree_sequence_sample.sort(reverse=True)

# degree distribution for the true network
degreeCount1 = collections.Counter(degree_sequence)
degr, cont = zip(*degreeCount1.items())
plt.bar(degr, cont, width=0.80, color='b')
Exemplo n.º 42
0
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('./src')
from dijkstra2 import dijkstra_2_src

A = np.loadtxt('./WG59/wg59_dist.txt')
G = nx.from_numpy_matrix(A)

T = dijkstra_2_src(G, 1, 5)
#a = np.array([v for v in G.nodes()])

print(nx.info(T))

nx.draw(G)
nx.draw(T)
plt.show()
Exemplo n.º 43
0
def isomorphic_test_on_prod_rules(orig, tdfname, gname=""):
	""""
	orig: path to original/refernce input graph
	tdfname: path fragment for a set of td pro rules
	gname: graph name (str)
	returns:
    """

	# if whole tree path
	# else, assume a path fragment
	print '... input graph  :', os.path.basename(orig)
	print '... prod rules path frag :', tdfname

	G = load_edgelist(orig)  # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks
	G.name = gname
	print "\t", nx.info(G)

	files = glob(tdfname + "*.prs")
	stacked_df = pd.DataFrame()
	# mat_dict = {}
	# for i, x in enumerate(sorted(files)):
	# 	mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i
	# 	if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]
	for prs in sorted(files):
		tname = os.path.basename(prs).split(".")
		tname = "_".join(tname[:2])
		# print prs
		# with open(prs, 'r') as f:  # read tree decomp from inddgo
		# 	lines = f.readlines()
		# 	lines = [x.rstrip('\r\n') for x in lines]
		df = pd.read_csv(prs, sep="\t", header=None)
		print tname
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])
	# print df.shape
	print "\nStacked prod rules\n", "~" * 20
	print "  ", stacked_df.shape
	if args['verb']: print stacked_df.to_string()
	stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t")
	if os.path.exists(
			"../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname)

	print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20
	stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name]
	iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df)
	print "  ", iso_union.shape
	if args['verb']: print iso_union.to_string()

	print "\nIsomorphic intersection of the prod rules\n", "~" * 20
	iso_interx = iso_interx[[1,2,3,4]]
	#  print iso_interx.head(); exit()
	iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname), header=False, index=False, sep="\t")
	if os.path.exists('../Results/{}_isom_interxn.tsv'.format(gname)):
		print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
Exemplo n.º 44
0
def build_directed_graph(df,
                         path=work_david,
                         year=2000,
                         level='district',
                         graph_type='directed',
                         return_json=False):
    import networkx as nx
    import numpy as np
    from networkx import NetworkXNotImplemented
    """Build a directed graph with a specific level hierarchy and year/s.
    Input:  df: original pandas DataFrame
            year: selected year/s
            level: district, county or city
            return_json: convert networkx DiGraph object toJSON object and
            return it.
    Output: G: networkx DiGraph object or JSON object"""
    print('Building {} graph with {} hierarchy level'.format(
        graph_type, level))
    #    source = level_dict.get(level)['source']
    #    target = level_dict.get(level)['target']
    df_sliced = choose_year(df, year=year, dropna=True)
    node_sizes = node_sizes_source_target(df, year=year, level=level)
    #    node_geo = get_lat_lon_from_df_per_year(df, year=year, level=level)
    #    if weight_col is not None:
    #        df['weights'] = normalize(df[weight_col], 1, 10)
    #    else:
    #        df['weights'] = np.ones(len(df))
    # df = df[df['Percent-migrants'] != 0]
    G = create_G_with_df(df_sliced, level=level, graph_type=graph_type)
    #    G = nx.from_pandas_edgelist(
    #        df_sliced,
    #        source=source,
    #        target=target,
    #        edge_attr=[
    #            source,
    #            'Percent-migrants',
    #            'Direction',
    #            'Number',
    #            'Total',
    #            'Distance',
    #            'Angle'],
    #        create_using=Graph)
    # enter geographical coords as node attrs:
    geo = read_geo_name_cities(path=path)
    for col in geo.columns:
        dict_like = dict(
            zip([x for x in G.nodes()], [geo.loc[x, col] for x in G.nodes()]))
        nx.set_node_attributes(G, dict_like, name=col)
    # slice df for just inflow:
    df_in = df_sliced[df_sliced['Direction'] == 'inflow']
    # calculate popularity index:
    pi_dict = calculate_poplarity_index_for_InID(df_in)
    total_dict_in = get_total_number_of_migrants(G, df_in, direction='In')
    total_dict_out = get_total_number_of_migrants(G, df_in, direction='Out')
    # set some node attrs:
    nx.set_node_attributes(G, total_dict_in, 'total_in')
    nx.set_node_attributes(G, total_dict_out, 'total_out')
    total_net = {}
    for (key1, val1), (key2, val2) in zip(total_dict_in.items(),
                                          total_dict_out.items()):
        assert key1 == key2
        total_net[key1] = val1 - val2
    nx.set_node_attributes(G, total_net, 'total_net')
    # check that net totals is zero across network (conservation of people:-):
    nets = []
    for node in G.nodes():
        nets.append(G.nodes()[node]['total_net'])
    assert sum(nets) == 0
    nx.set_node_attributes(G, pi_dict, 'popularity')
    nx.set_node_attributes(G, node_sizes, 'size')
    #    nx.set_node_attributes(G, node_geo, 'coords_lat_lon')
    G.name = 'Israeli migration network'
    G.graph['level'] = level
    G.graph['year'] = year
    G.graph['density'] = nx.density(G)
    try:
        G.graph['triadic_closure'] = nx.transitivity(G)
    except NetworkXNotImplemented as e:
        print('nx.transitivity {}'.format(e))
    # G.graph['global_reaching_centrality'] = nx.global_reaching_centrality(G, weight=weight_col)
    # G.graph['average_clustering'] = nx.average_clustering(G, weight=weight_col)


#    if weight_col is not None:
#        print('adding {} as weights'.format(weight_col))
#        # add weights:
#        edgelist = [x for x in nx.to_edgelist(G)]
#        weighted_edges = [
#            (edgelist[x][0],
#             edgelist[x][1],
#             edgelist[x][2][weight_col]) for x in range(
#                len(edgelist))]
#        G.add_weighted_edges_from(weighted_edges)
    print(nx.info(G))
    for key, val in G.graph.items():
        if isinstance(val, float):
            print(key + ' : {:.2f}'.format(val))
        else:
            print(key + ' :', val)
    # G, metdf = calculate_metrics(G, weight_col=weight_col)
    if return_json:
        return nx.node_link_data(G)
    else:
        return G
Exemplo n.º 45
0
#Bueno para recursar
#Es buena debatiendo pero
print("Cloud from: Low rec teachers")
generate_word_cloud(low_rec_common)

#tokens = [t for t in low_rec_common.split()]
#freq = nltk.FreqDist(tokens)
#freq.plot(20, cumulative=False)
print("Cloud from: Medium rec teachers")
generate_word_cloud(medium_rec_common)

print("Cloud from: High rec teachers")
generate_word_cloud(high_rec_common)

#Compare the relevant words from each comment to every other comment to check for duplicates
#if a duplicate exists, then add an edge between those two nodes.
list_check = []
counter = 1
for i in range(0, len(comment_list) - 1):
    for j in range(counter, len(comment_list)):
        list_check = str(set(comment_list[i].split())) + str(
            set(comment_list[j].split()))
        if len(list_check) != len(set(list_check)):
            for k in range(0, len(list_check) - len(set(list_check))):
                graph.add_edge(comment_list[i], comment_list[j])
    counter += 1

print(nx.info(graph))

nx.draw_random(graph)
plt.show()
Exemplo n.º 46
0
# =============================================================================
# edge, nodes准备
# =============================================================================

edges = corrMatrix.stack().reset_index()
edges.columns = ['theOne', 'theOther', 'correlation']
# remove self correlations
# list, 含 pairwise correlation信息
edges = edges.loc[edges['theOne'] != edges['theOther']].copy()
# undirected graph with weights corresponding to the correlation magnitude
G0 = nx.from_pandas_edgelist(edges,
                             'theOne',
                             'theOther',
                             edge_attr=['correlation'])

print(nx.info(G0))


#%%
# =============================================================================
# Density
# =============================================================================
def get_density(G):
    # How many possible edges?
    possible_edges = len(G.nodes) * (len(G.nodes) - 1) / 2
    actual_edges = len(G.edges)
    return actual_edges / possible_edges


print('density: ', get_density(G0))
print('node connectivity: ', nx.node_connectivity(G0))
Exemplo n.º 47
0
# まとめてedgeを追加
# G.add_edges_from([("A", "B"), ("B", "C"), ("B", "F"),("C", "D"), ("C", "E"), ("C", "F"), ("B", "F")])

# edgeを追加。有向グラフなので1つ目の引数がstart、2つ目の引数がtarget
for node_start, node_target in nodes_list:
    print("node_start: %s, node_target: %s" % (node_start, node_target))
    G.add_edge(node_start, node_target)

print("Edges: ", G.edges())

# G.remove_node(nodes_list[0][0])
# G.remove_node(nodes_list[0][1])

# Graphオブジェクトの情報
print("Info: ", nx.info(G))
# nodeの総数
print("number of nodes:", G.number_of_nodes())
# nodeの要素一覧
print("Nodes:", G.nodes())
# edgeの総数
print("Number of edges:", G.number_of_edges())
# edgeの要素一覧
print("Edges:", G.edges())
# 次数(nodeが持つedgeの数)
print("Degrees:", G.degree())

# 指定したノードに対する、隣接しているノードの数
print("Node: %s, Degree: %d" % ('0', G.degree('0')))
# 指定したノードに対する、隣接しているノードの一覧
print("nx.all_neighbors: ", list(nx.all_neighbors(G, '0')))
Exemplo n.º 48
0
user = client.get_user(USER)
repo = user.get_repo(REPO)
stargazers = list(repo.get_stargazers())  #加星的用户集合

print repo
g = nx.DiGraph()
g.add_node(repo.name + '(repo)',
           type='repo',
           lang=repo.language,
           owner=user.login)

for sg in stargazers:
    g.add_node(sg.login + '(user)', type='user')
    g.add_edge(sg.login + '(user)', repo.name + '(repo)', type='gazes')
# 打印图的基本属性
print(nx.info(g), '\n')
# 打印项目和用户点的基本属性
print(g.node['findspark(repo)'])
print(g.node['luzhijun(user)'], '\n')
# 打印这条边属性
print(g['luzhijun(user)']['findspark(repo)'])
# 打印起点为XXX的信息
print(g['luzhijun(user)'])
print(g['findspark(repo)'])
# 打印用户的出入度信息
print(g.in_edges(['luzhijun(user)']))
print(g.out_edges(['luzhijun(user)']))
# 打印项目的出入度信息
print(g.in_edges(['findspark(repo)']))
print(g.out_edges(['findspark(repo)']))
Exemplo n.º 49
0
            CDR.append(cdr)
            #print(c.msisdn,contact.msisdn,random_date().strftime("%d-%m-%y %H:%M"),random.randint(0,120*60))

network_dict = {}
for cdr in CDR:
    connection = cdr.caller.msisdn + "->" + cdr.called.msisdn
    if connection not in network_dict:
        network_dict[connection] = 1
    else:
        network_dict[connection] += 1

G_weighted = nx.Graph()
for c in network_dict:
    G_weighted.add_edge(c.split("->")[0],
                        c.split("->")[1],
                        weight=network_dict[c])

print(nx.info(G_weighted))

pos = nx.spectral_layout(G_weighted)
betCent = nx.betweenness_centrality(G_weighted,
                                    normalized=True,
                                    endpoints=True)
node_color = [20000.0 * G_weighted.degree(v) for v in G_weighted]
node_size = [v * 1000 for v in betCent.values()]
plt.figure(figsize=(20, 20))
nx.draw_networkx(G_weighted,
                 pos=pos,
                 with_labels=False,
                 node_color=node_color,
                 node_size=node_size)
Exemplo n.º 50
0
print "Importing Libraries"
import sys
import networkx as nx

print "Reading in Full Graph."
g = nx.read_edgelist('data/wiki-Talk.txt',
                     create_using=nx.DiGraph(),
                     nodetype=int)
print "Graph Imported, analysing basic info."

file = open("results/basic_info.txt", "w+")

file.write("*** Full Graph ***\n")
print "Basic Graph Info."
sys.stdout.flush()
file.write(nx.info(g))

print "Reciprocity."
sys.stdout.flush()
reciprocated = 0
for (u, v) in g.edges_iter():
    if g.has_edge(v, u):
        reciprocated = reciprocated + 1
file.write("\nReciprocity: {}\n".format(
    float(reciprocated) / nx.number_of_edges(g)))
file.flush()
print "Clustering."
sys.stdout.flush()
file.write("Clustering: {}\n".format(nx.average_clustering(g.to_undirected())))

file.close()
Exemplo n.º 51
0
'''
 Creamos el grafo a partir del archivo  datos.txt el cual contiene los valores de
 cada nodo y el peso de las aristas
'''
GRAFO = nx.read_edgelist('data.txt',
                         create_using=tipo_grafo,
                         data=(('weight', float), ))

# Definimos nodo origen y destino para determinar la rutina más corta del grafo
origen = 'A'
destino = 'D'
'''
Imprimimos la información más importante del grafo como es el número de nodos, el número
número de aristas y el grado promedio
'''
print nx.info(GRAFO)

print("\n Ruta mas corta")
# Método que nos permite calcular la ruta más corta del grafo usando Dijkstra
ruta_mas_corta = nx.dijkstra_path(GRAFO, origen, destino)
print(' -> '.join(ruta_mas_corta))

print("Longitud de la ruta mas corta")
# Método que nos permite calcular la longitud de la ruta más corta del grafo
print(nx.dijkstra_path_length(GRAFO, origen, destino))

# Métodos para dibujar el grafo en 2D con los nombres de cada nodo
nx.draw(GRAFO, with_labels=True)
plt.show()

# reorder nodes from 0,len(G)-1
Exemplo n.º 52
0
            node_id += 1
        if edge1 in edges_map.keys():
            edges_map[edge1].append(edge2)
            nr_edges += 1
        else:
            edges_map[edge1] = [edge2]
            nr_edges += 1

G = nx.Graph()
for n in nodes.keys():
    G.add_node(nodes[n], id=nodes[n], predicate='user')

for e in edges_map.keys():
    for e1 in edges_map[e]:
        G.add_edge(nodes[e], nodes[e1])

pickle.dump(G, open(FILE_NAME, 'wb'))

data = nx.read_gpickle(FILE_NAME)
print("Nr nodes AMAZON: ", len(data.nodes()))
print("Nr edges AMAZON: ", len(data.edges()))
print("Max degree AMAZON: ", an.get_maximum_node_degree(data))
print("Density AMAZON: ", nx.density(data))
print("INFO AMAZON:", nx.info(data))
#print an.get_maximum_node_degree(graph)

number_of_pages = 0
for node in data.nodes():
    if data.node[node]['predicate'] == 'page':
        number_of_pages += 1
vis.visualize_graph_standard(data)
Exemplo n.º 53
0
edges = [tuple(e) for e in df.values]

# print('Nodes:', len(node_names))
# print('Edges:', len(edges))
# resultFile.write('Nodes: {} '.format(len(node_names)) + '\n')
# print('\n')

# This will create a new Graph object
G = nx.Graph()

# add your lists of nodes and edges like so:
G.add_nodes_from(node_names)
G.add_edges_from(edges)

# get basic information about your newly-created network using the info function:
print(nx.info(G), '\n')
resultFile.write('# Information about the newly-created network is:\n{}'.
                 format(nx.info(G)) + '\n\n')

# Compute the following node measures:
print('# Compute the following node measures: \n')
resultFile.write('# Compute the following node measures:' + '\n\n')

# a. Degree Centrality (normalized)
degree_centrality = degree_centrality(G)
sorted_degree = sorted(degree_centrality.items(),
                       key=itemgetter(1),
                       reverse=True)
print("# Top 10 nodes by degree centrality:")
resultFile.write('# Top 10 nodes by degree centrality:' + '\n')
Exemplo n.º 54
0
'''building graph from dataset using networkx'''

import networkx as nx
import pickle
import json


class Graph(object):
    def __init__(self, edges, author, w, trainingSet, testingSet):
        self.train = nx.Graph()
        self.test = nx.Graph()
        for (node1, node2), year in edges:
            weightEdge = 1 / w[(node1, node2)]
            if year >= trainingSet[0] and year <= trainingSet[1]:
                self.train.add_edge(node1, node2, weight=weightEdge)
            elif year >= testingSet[0] and year <= testingSet[1]:
                self.test.add_edge(node1, node2, weight=weightEdge)

        self.train = max(nx.connected_component_subgraphs(self.train), key=len)


if __name__ == "__main__":
    edges = pickle.load(open("newEdges.p", "rb"))
    authors = pickle.load(open("newAuthor.p", "rb"))
    w = pickle.load(open("weights.p", "rb"))

    dataGraph = Graph(edges, authors, w, [1967, 2006], [2007, 2017])
    print(nx.info(dataGraph.train))
    pickle.dump(dataGraph, open('graph_data.p', "wb"))
Exemplo n.º 55
0
    except:
        try:
            with open(datafn, 'rb') as f:
                G = pickle.load(f, encoding='latin1')
        except Exception as ex:
            print(ex)
            print('Could not open graph: {}'.format(datafn))
            sys.exit(0)

    for n in G.nodes():
        G.node[n] = {}

    if method == 'percolation':
        G = max(nx.connected_component_subgraphs(G), key=len)

    print(nx.info(G))

    ns = NodeRanking.get_instance(method, G)

    flag = 0
    for s in ['asc', 'desc']:
        fn = os.path.join(output, '{}_{}_{}.pickle'.format(name, method, s))
        if os.path.exists(fn):
            flag += 1

    if flag < 2:
        ns.compute_node_scores()

        for s in ['asc', 'desc']:
            fn = os.path.join(output,
                              '{}_{}_{}.pickle'.format(name, method, s))
Exemplo n.º 56
0
    def corpus_graph(self,
                     language_file,
                     limit_range=3000000,
                     verbose=False,
                     lemmatizer=None,
                     stopwords=None,
                     min_char=4,
                     stemmer=None,
                     input_type="file"):

        G = nx.DiGraph()
        ctx = 0
        reps = False
        dictionary_with_counts_of_pairs = {}

        def process_line(line):

            nonlocal G
            nonlocal ctx
            nonlocal reps
            nonlocal dictionary_with_counts_of_pairs

            stop = list(string.punctuation)
            line = line.strip()
            line = [i for i in word_tokenize(line.lower()) if i not in stop]

            if not stopwords is None:
                line = [w for w in line if not w in stopwords]

            if not stemmer is None:
                line = [stemmer.stem(w) for w in line]

            if not lemmatizer is None:
                new_line = []
                for x in line:
                    lemma = lemmatizer.lemmatize(x)
                    if not (lemma in self.inverse_lemmatizer_mapping):
                        self.inverse_lemmatizer_mapping[lemma] = set()
                    self.inverse_lemmatizer_mapping[lemma].add(x)
                    new_line.append(lemma)
                line = new_line

            line = [x for x in line if len(x) > min_char]
            if len(line) > 1:
                ctx += 1
                if ctx % 15000 == 0:
                    logging.info("Processed {} sentences.".format(ctx))
                if ctx % limit_range == 0:
                    return True
                for enx, el in enumerate(line):
                    if enx > 0:
                        edge_directed = (line[enx - 1], el)
                        if edge_directed[0] != edge_directed[1]:
                            G.add_edge(edge_directed[0], edge_directed[1])
                        else:
                            edge_directed = None
                    if enx < len(line) - 1:
                        edge_directed = (el, line[enx + 1])
                        if edge_directed[0] != edge_directed[1]:
                            G.add_edge(edge_directed[0], edge_directed[1])
                        else:
                            edge_directed = None
                    if edge_directed:
                        if edge_directed in dictionary_with_counts_of_pairs:
                            dictionary_with_counts_of_pairs[edge_directed] += 1
                            reps = True
                        else:
                            dictionary_with_counts_of_pairs[edge_directed] = 1
            return False

        if input_type == "file":
            with open(language_file) as lf:
                for line in lf:
                    breakBool = process_line(line)
                    if breakBool:
                        break

        elif input_type == "text":
            lines = language_file.split("\n")
            for line in lines:
                breakBool = process_line(line)
                if breakBool:
                    break

        ## assign edge properties.
        for edge in G.edges(data=True):
            try:
                edge[2]['weight'] = dictionary_with_counts_of_pairs[(edge[0],
                                                                     edge[1])]
            except Exception as es:
                raise (es)
        if verbose:
            print(nx.info(G))

        return (G, reps)
Exemplo n.º 57
0
            prev_state = state

    if bystate is True:
        # now simplify the multigraph by adding up the weights of
        # multi-edges between any two nodes
        for e in mg1.edges():
            if g1.has_edge(*e):
                continue
            #print "Sum: ", sum([i['weight'] for i in mg1[e[0]][e[1]].values()])
            g1.add_edge(e[0],
                        e[1],
                        weight=sum(
                            [i['weight'] for i in mg1[e[0]][e[1]].values()]))

    print nx.info(g1)
    #print "g1 nodes: ", str(g1.nodes())
    #print "g1 edges: ", str(g1.edges(data=True))
    #print "mg1 edges: ", str(mg1.edges(data=True))
    #print "mg1 edges: ", str(mg1.edges(data=True))

    g2 = nx.Graph(name="g2")

    for n1 in senators:
        for n2 in senators:
            if n1 == n2:
                continue
            n1_bills = g1.neighbors(n1)
            n2_bills = g1.neighbors(n2)
            common_bills = list(set(n1_bills) & set(n2_bills))
            if len(common_bills) is 0:
Exemplo n.º 58
0
Arquivo: ntwk.py Projeto: fsxchen/netx
import networkx as nx

G = nx.Graph()

G.add_node(1)

G.add_nodes_from([2, 3])

G.add_edge(1, 2)

print G.nodes()


nx.draw(G)

print nx.info(G)
Exemplo n.º 59
0
print(path)

g1 = nx.scale_free_graph(size)

cyjs1 = test_tools.TestTools.networkx_to_cyjs(g1)

tt = test_tools.TestTools(BASE, VERBOSE)

print(json.dumps(cyjs1))

# job_id = tt.post_job(path, json.dumps(cyjs1) )
job_id = tt.submit_network_to_service(path, cyjs1)
res = tt.get_result(job_id, SLEEP_INTERVAL, 10)

# print( res.json() )
cyjs2 = res.json()
print(json.dumps(cyjs2, indent=4))
# print( 'Result is at http://192.168.59.103/v1/jobs/' + job_id )

g2 = tt.cyjs_to_networkx(cyjs2)
print(nx.info(g2))
print(g2.nodes())
print(g2.edges())

print(tt.is_isomorphic(g1, g2))

# res = tt.delete_job( job_id )
# print(res)

# print(json.dumps(res.json(), indent=4))
Exemplo n.º 60
0
 'source': np.array([ 0,  0.5])}

width=[float(d['weight']*1.2) for (u,v,d) in gd.edges(data=True)]
edge_labels=dict([((u,v,),d['weight']) for u,v,d in gd.edges(data=True)])

nx.draw_networkx_edge_labels(gd,pos,edge_labels=edge_labels, font_size = 15, alpha = .5)
nx.draw(gd, pos, node_size = 3000, node_color = 'orange',
        alpha = 0.2, width = width, edge_color='orange',style='solid')
nx.draw_networkx_labels(gd,pos,font_size=18)
plt.show()


# In[10]:


nx.info(gd)


# In[11]:


# flow matrix
m = fn.getFlowMatrix(gd)
m


# In[12]:


fn.networkDissipate(gd)