Exemplo n.º 1
0
def find_common_clique(Gd, curr, wk, conceptnet_words, Tdict, Tnode_dict):
    gr = nx.Graph(Gd[curr])  # convert to undirected graph
    w1 = min(wk)
    w2 = max(wk)
    wk = (w1, w2)
    w1 = wk[0]
    w2 = wk[1]
    cli1 = sorted(nx.cliques_containing_node(gr, w1),
                  key=lambda s: len(s),
                  reverse=True)
    cli2 = sorted(nx.cliques_containing_node(gr, w2),
                  key=lambda s: len(s),
                  reverse=True)

    if wk in Tdict[curr]:
        print " graph already exist: %s, %s" % (curr, repr(wk))
        return [], [], []

    # find the largest common clique, size >= 3
    cnodes = []
    for i1 in range(len(cli1)):
        if len(cli1[i1]) < 3:
            break
        for i2 in range(len(cli2)):
            if len(cli2[i2]) == len(cli1[i1]) and sorted(cli2[i2]) == sorted(
                    cli1[i1]):
                cnodes = sorted(cli2[i2])
                break
        if len(cnodes) > 0:
            break

    if len(cnodes) > 0:
        all_nodes = tuple(sorted(cnodes))
        if all_nodes in Tnode_dict:
            print " node dup exists: %s, %s" % (repr(wk), curr)
            return [], [], []  # already exist
        else:
            Tnode_dict[all_nodes] = 'C'

        curg = nx.DiGraph()

        for u in range(len(cnodes)):
            un = cnodes[u]
            add_conceptnet_edges(curg, conceptnet_words, un, un)
            for v in range(len(cnodes)):
                vn = cnodes[v]
                if not vn == un:
                    add_conceptnet_edges(curg, conceptnet_words, un, vn)
    else:
        print " no good cliques found: %s, %s" % (repr(wk), curr)
        return [], [], []

    return curg, wk, cnodes
Exemplo n.º 2
0
def find_common_clique(Gd, curr, wk, conceptnet_words, Tdict, Tnode_dict):
    gr = nx.Graph(Gd[curr]) # convert to undirected graph
    w1 = min(wk)
    w2 = max(wk)
    wk = (w1, w2)
    w1 = wk[0]
    w2 = wk[1]
    cli1 = sorted(nx.cliques_containing_node(gr, w1), key=lambda s:len(s), reverse=True)
    cli2 = sorted(nx.cliques_containing_node(gr, w2), key=lambda s:len(s), reverse=True)
    
    if wk in Tdict[curr]:
        print " graph already exist: %s, %s" % (curr, repr(wk))
        return [], [], []
    
    # find the largest common clique, size >= 3
    cnodes = []
    for i1 in range(len(cli1)):
        if len(cli1[i1])<3:
            break
        for i2 in range(len(cli2)):
            if len(cli2[i2]) == len(cli1[i1]) and sorted(cli2[i2])==sorted(cli1[i1]):
                cnodes = sorted(cli2[i2])
                break
        if len(cnodes) > 0:
            break
    
    if len(cnodes) > 0:
        all_nodes = tuple(sorted(cnodes))
        if all_nodes in Tnode_dict:
            print " node dup exists: %s, %s" % (repr(wk), curr)
            return [], [], [] # already exist
        else:
            Tnode_dict[all_nodes] = 'C'
        
        curg = nx.DiGraph()

        for u in range(len(cnodes)):
            un = cnodes[u]
            add_conceptnet_edges(curg, conceptnet_words, un, un)
            for v in range(len(cnodes)):
                vn = cnodes[v]
                if not vn==un:
                    add_conceptnet_edges(curg, conceptnet_words, un, vn)
    else:
        print " no good cliques found: %s, %s" % (repr(wk), curr)
        return [], [], []
    
    return curg, wk, cnodes
Exemplo n.º 3
0
 def cliqueCounts(self, cdata, k):
     tdata = []
     cliques = nx.cliques_containing_node(self.graph, [d[0] for d in cdata])
     for d in cdata:
         cl = len([c for c in cliques[d[0]] if len(c) >= k])
         tdata.append((d[1], cl))
     return tdata
Exemplo n.º 4
0
    def get_node_cliques_metrics(self, node) -> dict:
        """

        :param node:
        :return:
        """

        out = {
            'node_cliques_size_avg': 0,
            'node_cliques_size_std': 0,
            'node_cliques_size_max': 0,
            'node_number_of_cliques': 0
        }

        cliques = nx.cliques_containing_node(self.undirected_graph, nodes=node)

        if cliques:
            clique_sizes = [len(c) for c in cliques]

            out['node_cliques_size_avg'] = np.mean(clique_sizes)
            out['node_cliques_size_std'] = np.std(clique_sizes)
            out['node_cliques_size_max'] = max(clique_sizes)
            out['node_number_of_cliques'] = len(cliques)

        return out
Exemplo n.º 5
0
def main():
	titles = open("fox titles.txt")
	titles = fileToList(titles)
	names = open("fox vertex.txt")
	names = fileToList(names)
	d1,d2 = todic(titles,names)	
	
	ylvis = "jofNR_WkoCE"
	g = nx.Graph()
	g =nx.read_graphml("foxsay.graphml")
	
	outdeg =g.degree()
	rm = [n for n in outdeg if outdeg[n] ==0]
	
	g.remove_nodes_from(rm)
	
	jaccard(g,d1,d2)
	print compute_jaccard_index(finn_nabo(g,"xO_a7OKmh7Q"),finn_nabo(g,"xiKiAlv9wWw"))
	q = nx.cliques_containing_node(g,ylvis)
	
	
	print "cliques containing the fox: "
	d2["jofNR_WkoCE"]= "ylvis original fox -tvn"
	for clique in q:
		print len(clique)
		for node in clique:
			print d2[str(node)[:40]]
		print "\n"
	

	return 0
Exemplo n.º 6
0
def cliques(data, subreddit):
    subreddits, authors, M = data
    d = cdist(M, M)
    adj = d < np.mean(d)
    G = nx.convert_matrix.from_numpy_matrix(adj)
    cliques = nx.cliques_containing_node(G, list(subreddits).index(subreddit))
    for c in cliques:
        print(np.array(subreddits)[c])
def tree_decomposition(input_graph):

    current_graph=input_graph.copy()
    current_graph.remove_edges_from(current_graph.selfloop_edges())

    decomposition_tree_vertices=list()
    counter=0;
    decomposition_tree=nx.Graph()
    tree_connectivity_dictionary=dict()
    for graph_vertex in current_graph.nodes():
	tree_connectivity_dictionary[graph_vertex]=[]


    while current_graph.order()>0:
	nodes_sorted_by_degree=sort_by_degree(current_graph)
	minimum_degree_vertex=nodes_sorted_by_degree[0][0]
	cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex)
	number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex)
	minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex)
	new_tree_vertex=[minimum_degree_vertex]
	new_tree_vertex.extend(minimum_degree_vertex_neighbors)
	new_tree_vertex=tuple(new_tree_vertex)
	decomposition_tree.add_node(new_tree_vertex)
	if number_of_cliques_containing_vertex>1:
	    pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors)
	    for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1])
	    toberemoved=[minimum_degree_vertex]
	else:
	    toberemoved=[minimum_degree_vertex]
	    number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors)
	    for temp_vertex in minimum_degree_vertex_neighbors:
		if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex:
		    toberemoved.append(temp_vertex)

	for graph_vertex in new_tree_vertex:
	    if graph_vertex in toberemoved:
		current_graph.remove_node(graph_vertex)
		tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex]
		for tree_vertex_waiting in tree_vertices_waiting:
		    decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting)


		temp_copy_tree_vertices_waiting=tree_vertices_waiting[:]
		for tree_vertex_waiting in temp_copy_tree_vertices_waiting:
		    common_graph_nodes_between_tree_vertices=my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting)
		    for candidate in common_graph_nodes_between_tree_vertices:
			if tree_vertex_waiting in tree_connectivity_dictionary[candidate]:tree_connectivity_dictionary[candidate].remove(tree_vertex_waiting)

		del tree_connectivity_dictionary[graph_vertex]
	    else:
		tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex)


    if ((decomposition_tree.number_of_nodes()-decomposition_tree.number_of_edges()) < 1):
	print 'WARNING WARNING WARNING: THE OUTPUT GRAPH IS ****NOT**** A TREE, IT INCLUDES CYCLES'
    elif ((decomposition_tree.number_of_nodes()-decomposition_tree.number_of_edges()) > 1):
	print 'WARNING WARNING WARNING: THE OUTPUT GRAPH IS ****NOT**** A TREE, IT IS DISCONNECTED'
    return decomposition_tree
Exemplo n.º 8
0
 def _annotate_cliques(self, graph, min_clique_size=3):
     cliques = list(nx.find_cliques(graph))
     for u in graph.nodes():
         cliques_list = nx.cliques_containing_node(
             graph, nodes=u, cliques=cliques)
         trimmed_cliques_list = [c for c in cliques_list
                                 if len(c) >= min_clique_size]
         if len(trimmed_cliques_list) > 0:
             graph.node[u]['cliques'] = trimmed_cliques_list
Exemplo n.º 9
0
def Cliques(G) :
    """
    Return a list of cliques the player belongs to
    """
    nx.find_cliques(G)
    cliques = []
    for i in G.nodes() :
        cliques.append(nx.cliques_containing_node(G, i))
    return cliques
Exemplo n.º 10
0
        def _get_cliques_dict(node):
            """
            Returns a dictionary in the form of {node: cliques_formed} of the
            node along with its neighboring nodes.

            clique_dict_removed would be containing the cliques created
            after deletion of the node
            clique_dict_node would be containing the cliques created before
            deletion of the node
            """
            graph_working_copy = nx.Graph(graph_copy.edges())
            neighbors = graph_working_copy.neighbors(node)
            graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2))
            clique_dict = nx.cliques_containing_node(graph_working_copy,
                                                     nodes=([node] + neighbors))
            graph_working_copy.remove_node(node)
            clique_dict_removed = nx.cliques_containing_node(graph_working_copy,
                                                             nodes=neighbors)
            return clique_dict, clique_dict_removed
Exemplo n.º 11
0
        def _get_cliques_dict(node):
            """
            Returns a dictionary in the form of {node: cliques_formed} of the
            node along with its neighboring nodes.

            clique_dict_removed would be containing the cliques created
            after deletion of the node
            clique_dict_node would be containing the cliques created before
            deletion of the node
            """
            graph_working_copy = nx.Graph(graph_copy.edges())
            neighbors = graph_working_copy.neighbors(node)
            graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2))
            clique_dict = nx.cliques_containing_node(graph_working_copy,
                                                     nodes=([node] + neighbors))
            graph_working_copy.remove_node(node)
            clique_dict_removed = nx.cliques_containing_node(graph_working_copy,
                                                             nodes=neighbors)
            return clique_dict, clique_dict_removed
Exemplo n.º 12
0
def findPerfectGroup(Event, User):
    User = UserHandler(User)
    PerfectGroups = nx.cliques_containing_node(Event.G(), User.id)
    for i in PerfectGroups:
        print(i)
        if len(i) is Event.groupSize:
            for j in i:
                Event._removeUserFromGraph(j)
            return GroupHandler.createGroup(i, Event)
    return None
Exemplo n.º 13
0
def create_edge(G, K, node_1, node_2):
    if node_1 != node_2:
        G.add_edge(node_1, node_2)

    for clique in nx.cliques_containing_node(G, nodes=node_1):
        if node_2 in clique and len(clique) > K:
            G.remove_edge(node_1, node_2)
            break

    return G
Exemplo n.º 14
0
    def all_cliques(self, spectrum):
        """
        Returns all cliques (based on peaks in given spectrum).
        """

        peaks = self[spectrum].peaks
        cliques = []
        for peak in peaks:
            current = nx.cliques_containing_node(self.G, peak)
            cliques += [self._clique(clique) for clique in current]
        return cliques
Exemplo n.º 15
0
 def _annotate_cliques(self, graph, min_clique_size=3):
     cliques = list(nx.find_cliques(graph))
     for u in graph.nodes():
         cliques_list = nx.cliques_containing_node(graph,
                                                   nodes=u,
                                                   cliques=cliques)
         trimmed_cliques_list = [
             c for c in cliques_list if len(c) >= min_clique_size
         ]
         if len(trimmed_cliques_list) > 0:
             graph.node[u]['cliques'] = trimmed_cliques_list
Exemplo n.º 16
0
    def generate_subgraph(self):
        v = random.randint(0,self.num_vertices-1)
	s = networkx.cliques_containing_node(self.G, [v])

	for q in s[v]:
	    if len(q) >= self.size_subgraph:
                value = random.uniform(0, self.max_value)
	        for v in q:
		    self.values[v] = value
		self.subgraph = q
		print q
		break
Exemplo n.º 17
0
    def clique_generator(self, spectrum):
        """
        Returns all cliques (based on peaks in given spectrum).
        """

        peaks = self[spectrum].peaks
        order = np.random.permutation(len(peaks))
        for i in order:
            peak = peaks[i]
            cliques = nx.cliques_containing_node(self.G, peak)
            for clique in cliques:
                yield self._clique(clique)
def init(G):
    """
    Algorithm 2 that initializes the key variables for the RMC algorithm
    Checks if the input graph is already not a maximum clique
    If so, returns the max cliqu

    This is a line by line implementation of the algorithm in the paper
    :param G:
    :return: r, wc_min, wc_max, and the initial max_clique
    """
    lis_nodes = G.nodes()
    wc_min = 0
    max_core_number = -100
    core_number_dict = nx.core_number(G)
    for k, v in core_number_dict.items():
        if v > max_core_number:
            max_core_number = v
            print(max_core_number)

    wc_max = max_core_number + 1
    max_core = nx.k_core(G, max_core_number)

    # check if max-core is a clique
    num_nodes = len(max_core.nodes())
    num_edges = max_core.number_of_edges()
    if num_edges == (num_nodes * (num_nodes - 1)) // 2:
        print("max_core is a clique, hence must be maximum clique")
        print("returning maximum clique")
        wc_min = num_nodes
        wc_max = num_nodes + 1
        return (1, wc_min, wc_max, max_core)

    for k, v in core_number_dict.items():
        if v > wc_max:
            # find the maximal clique containing that node
            cliques = nx.cliques_containing_node(G, k)
            node_maximal_clique = max(cliques, key=lambda clique: clique.number_of_nodes())
            if node_maximal_clique.number_of_nodes() > wc_min:
                cmax = node_maximal_clique
                wc_min = cmax.number_of_nodes()
    if wc_min == wc_max:
        return 1, wc_min, wc_max, max_core
    d = nx.coloring.greedy_color(G, strategy='largest_first')  # color number of graph through graph coloring - line 12
    color_set = set()
    for _, v in d.items():
        color_set.add(v)
    cn = len(color_set)
    if wc_max > cn:
        wc_max = cn
    if wc_max == wc_min:
        return 1, wc_min, wc_max, max_core
    return 0, wc_min, wc_max, max_core
Exemplo n.º 19
0
def find_clique(uid,p_ave_degree=50):
    buffered = get_buffer_clique(uid)
    if(buffered != False):
        return buffered
    G = nx.DiGraph()
    con = mdb.connect('localhost','root','root','weibo_ranker')
    cur = con.cursor()
    sql = "select uid from (select `to` from vip_relation where `from`=%d) as vip left join people_rank4 on vip.`to`=people_rank4.uid where flag < 1"%(uid)
    
    cur.execute(sql)
    users = cur.fetchall()
    user_list = [uid]
    for user in users:
        G.add_edge(uid,user[0])
        user_list.append(user[0])
    for user in user_list:
        sql = "select uid from (select `to` from vip_relation where `from`=%d) as vip left join people_rank4 on vip.`to`=people_rank4.uid where flag < 1"%(user)
        cur.execute(sql)
        results = cur.fetchall()
        for result in results:
            if(result[0] in user_list):
                G.add_edge(user,result[0])
    if(len(G.nodes()) == 0):
        save_clique(uid,[])
        return []
    
    #decomplex by deleting nodes
    decomplex(G,uid,200)
    
    dG = G.to_undirected(True)
    sub_graph_node_list = dG.neighbors(uid)
    sub_graph_node_list.append(uid)
    dG = dG.subgraph(sub_graph_node_list)
    
    dG_node_number = len(dG.nodes())
    cliques = nx.cliques_containing_node(dG,uid)
    clique_user_dic = {}
    for clique in cliques:
        if(len(clique) > 2):
            for user in clique:
                if(user == uid ):
                    continue
                if(isset(clique_user_dic,user)):
                    clique_user_dic[user] += 1
                else:
                    clique_user_dic[user] = 1
    clique_user_arr = sorted(clique_user_dic.iteritems(), key=itemgetter(1), reverse=True)
    save_clique(uid,clique_user_arr)
    return clique_user_arr
Exemplo n.º 20
0
 def test_cliques_containing_node(self):
     G=self.G
     assert_equal(nx.cliques_containing_node(G,1),
                  [[2, 6, 1, 3]])
     assert_equal(list(nx.cliques_containing_node(G,[1]).values()),
                  [[[2, 6, 1, 3]]])
     assert_equal(list(nx.cliques_containing_node(G,[1,2]).values()),
                  [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]])
     assert_equal(nx.cliques_containing_node(G,[1,2]),
                  {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]})
     assert_equal(nx.cliques_containing_node(G,1),
                  [[2, 6, 1, 3]])
     assert_equal(nx.cliques_containing_node(G,2),
                  [[2, 6, 1, 3], [2, 6, 4]])
     assert_equal(nx.cliques_containing_node(G,2,cliques=self.cl),
                  [[2, 6, 1, 3], [2, 6, 4]])
     assert_equal(len(nx.cliques_containing_node(G)),11)
Exemplo n.º 21
0
   def graph_iter(self, G, **kwargs):
      sv = kwargs['shadow_val']
      for root_node in G.nodes_iter():
         n_freq = defaultdict(int)

         if G.node[root_node][kwargs['attr_to_del']] != sv:
            continue

         cliques = []

         for aclique_raw in nx.cliques_containing_node(G, nodes=root_node):
            cliques.append(add_node_attrs(aclique_raw, G))

         yield { 'subgraph': cliques,
                 'root_node': root_node }
Exemplo n.º 22
0
def tree_decomposition(input_graph):

    current_graph=input_graph.copy()
    decomposition_tree_vertices=list()
    counter=0;
    decomposition_tree=nx.Graph()
    tree_connectivity_dictionary=dict()
    for graph_vertex in current_graph.nodes():
	tree_connectivity_dictionary[graph_vertex]=[]


    while current_graph.order()>0:
	nodes_sorted_by_degree=sort_by_degree(current_graph)
	minimum_degree_vertex=nodes_sorted_by_degree[0][0]
	cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex)
	number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex)
	minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex)
	new_tree_vertex=[minimum_degree_vertex]
	new_tree_vertex.extend(minimum_degree_vertex_neighbors)
	new_tree_vertex=tuple(new_tree_vertex)
	decomposition_tree.add_node(new_tree_vertex)
	if number_of_cliques_containing_vertex>1:
	    pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors)
	    for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1])
	    toberemoved=[minimum_degree_vertex]
	else:
	    toberemoved=[minimum_degree_vertex]
	    number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors)
	    for temp_vertex in minimum_degree_vertex_neighbors:
		if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex:
		    toberemoved.append(temp_vertex)
	for graph_vertex in new_tree_vertex:
	    if graph_vertex in toberemoved:
		current_graph.delete_node(graph_vertex)
		tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex]
		for tree_vertex_waiting in tree_vertices_waiting:
		    decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting)
		for tree_vertex_waiting in tree_vertices_waiting:
		    common_graph_nodes_between_tree_vertices=list(my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting))
		    for graph_vertex in common_graph_nodes_between_tree_vertices:
			tree_connectivity_dictionary[graph_vertex].remove(tree_vertex_waiting)


	    else:
		tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex)

    return decomposition_tree
Exemplo n.º 23
0
    def test_cliques_containing_node(self):
        G = self.G
        assert nx.cliques_containing_node(G, 1) == [[2, 6, 1, 3]]
        assert list(nx.cliques_containing_node(G, [1]).values()) == [[[2, 6, 1, 3]]]
        assert [
            sorted(c) for c in list(nx.cliques_containing_node(G, [1, 2]).values())
        ] == [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]]
        result = nx.cliques_containing_node(G, [1, 2])
        for k, v in result.items():
            result[k] = sorted(v)
        assert result == {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]}
        assert nx.cliques_containing_node(G, 1) == [[2, 6, 1, 3]]
        expected = [{2, 6, 1, 3}, {2, 6, 4}]
        answer = [set(c) for c in nx.cliques_containing_node(G, 2)]
        assert answer in (expected, list(reversed(expected)))

        answer = [set(c) for c in nx.cliques_containing_node(G, 2, cliques=self.cl)]
        assert answer in (expected, list(reversed(expected)))
        assert len(nx.cliques_containing_node(G)) == 11
Exemplo n.º 24
0
def create_group(session, graph, group_limit, group_size_limit):
    nodes = dict()
    while (len(graph) > 1):
        node = choice(graph.nodes())
        node_reached_group_limit = False
        long_cliques = [
            c for c in nx.cliques_containing_node(graph, nodes=[node])[node]
            if len(c) > 1
        ]
        if len(long_cliques) == 0:
            graph.remove_node(node)
            continue
        cliques = sorted(long_cliques, key=lambda x: len(x), reverse=True)
        for clique in cliques:
            if len(clique) < 2:
                break
            l = min(len(clique), group_size_limit)
            clique.remove(node)
            group = sample(clique, l - 1) + [node]
            available = True
            for n in group:
                nodes.setdefault(n, 0)
                if nodes[n] == group_limit:
                    available = False
                    break
            if available:
                new_group = Group(group_limit=group_limit,
                                  group_size_limit=group_size_limit)
                session.add(new_group)
                session.commit()
                for n in group:
                    membership = Membership(group_id=new_group.id, node_id=n)
                    session.add(membership)
                    session.commit()
                    nodes[n] += 1
                    if nodes[n] == group_limit:
                        graph.remove_node(n)
                        if n == node:
                            node_reached_group_limit = True
            if node_reached_group_limit:
                break
    return
Exemplo n.º 25
0
    def compute_cliques(self, node_id=None):  # pragma: no cover
        """
        Computes all maximum complete subgraphs for the given graph.
        If a node_id is given, method will return only the complete subgraphs that
        contain that node

        Parameters
        ----------
        node_id : int
                       Integer value for a given node

        Returns
        -------
        : list
          A list of lists of node ids that make up maximum complete subgraphs of the given graph
        """
        if node_id is not None:
            return list(nx.cliques_containing_node(self, nodes=node_id))
        else:
            return list(nx.find_cliques(self))
Exemplo n.º 26
0
    def test_cliques_containing_node(self):
        G = self.G
        assert_equal(nx.cliques_containing_node(G, 1), [[2, 6, 1, 3]])
        assert_equal(list(nx.cliques_containing_node(G, [1]).values()),
                     [[[2, 6, 1, 3]]])
        assert_equal([
            sorted(c)
            for c in list(nx.cliques_containing_node(G, [1, 2]).values())
        ], [[[2, 6, 1, 3]], [[2, 6, 1, 3], [2, 6, 4]]])
        result = nx.cliques_containing_node(G, [1, 2])
        for k, v in result.items():
            result[k] = sorted(v)
        assert_equal(result, {1: [[2, 6, 1, 3]], 2: [[2, 6, 1, 3], [2, 6, 4]]})
        assert_equal(nx.cliques_containing_node(G, 1), [[2, 6, 1, 3]])
        expected = [{2, 6, 1, 3}, {2, 6, 4}]
        answer = [set(c) for c in nx.cliques_containing_node(G, 2)]
        assert_in(answer, (expected, list(reversed(expected))))

        answer = [
            set(c) for c in nx.cliques_containing_node(G, 2, cliques=self.cl)
        ]
        assert_in(answer, (expected, list(reversed(expected))))
        assert_equal(len(nx.cliques_containing_node(G)), 11)
Exemplo n.º 27
0
def iter_feature1(G,node1,node2):
    # Extract graph features for a given pair of nodes
    # return a data frame of features


    feat_out=[]
    try:
        # Length of the shortest path
        feat = nx.shortest_path_length(G,node1,node2)
        feat_out.append(feat)

        # Number of shortest path
        feat = len(list(nx.all_shortest_paths(G,node1,node2)))
        feat_out.append(feat)

    except nx.NetworkXNoPath:
        feat_out.extend([0,0])

    # first order neighbood overlap
    feat1,feat2 = neighbor_overlap_orderK(G,node1,node2,1)
    feat_out.extend([feat1,feat2])

    # second order neighbood overlap
    feat1,feat2 = neighbor_overlap_orderK(G,node1,node2,2)
    feat_out.extend([feat1,feat2])

    # average neighbor degree
    #feat1,feat2 = nx.average_neighbor_degree(G,nodes=[node1,node2]).values()
    #feat_out.extend([feat1,feat2])

    # Connectivity
    feat = nx.node_connectivity(G,node1,node2)
    feat_out.append(feat)

    # whether the nodes are in the same cluster
    feat = in_same_cluster(nx.cliques_containing_node(G,node1),node2)
    feat_out.append(feat)

    return feat_out
Exemplo n.º 28
0
def dist_graph(pipe: Pipe, threshold: float, layout: str, cliques: bool):
    """Build a threshold graph, presumes pairwise_distances. """
    import networkx as nx
    assert pipe.matrix.index.values.shape == pipe.matrix.columns.values.shape, "call pdist first"
    samples = pipe.matrix.index.values
    weights = pipe.matrix.values
    n_samples = samples.shape[0]
    max_w = np.max(weights)
    min_w = np.min(weights + np.eye(n_samples) * max_w)
    graph = nx.Graph()
    weight_values = []
    for (i, sa), (j, sb) in product(enumerate(samples), enumerate(samples)):
        if i != j:
            w = 1 - (weights[i, j] - min_w) / (max_w - min_w)
            graph.add_edge(sa, sb, weight=w)
            weight_values.append(w)

    weight_values = np.array(weight_values)
    if threshold == -1:
        threshold = np.median(weight_values) - np.nextafter(0., 1)
    print(np.min(weight_values), np.median(weight_values),
          np.max(weight_values))
    under_threshold_edges = [(u, v) for (u, v, d) in graph.edges(data=True)
                             if d['weight'] < threshold]

    graph.remove_edges_from(under_threshold_edges)
    if cliques:
        cliques = list(nx.find_cliques(graph))
        node_cliques = nx.cliques_containing_node(graph, list(graph.nodes()),
                                                  cliques)
    else:
        node_cliques = defaultdict(list)
    layout_fn = getattr(nx, layout + "_layout", "fruchterman_reingold_layout")
    pos = layout_fn(graph, weight="weight")
    fig = _mk_networkx_figure(graph,
                              pos,
                              use_weights=True,
                              node_cliques=node_cliques)
    oplot(fig)
Exemplo n.º 29
0
def get_triangles(ring, G):
    """Given the set of vertices return triangle entirely contained within the set"""

    cliques = nx.cliques_containing_node(G, list(ring))
    ring_set = set(ring)
    triangles = []
    triangles_sets = []
    # this finds same clique 3 times

    for key in cliques.keys():

        # all cliques where virtex "key" is present
        for triangle in cliques[key]:
            triangle_set = set(triangle)
            if triangle_set.issubset(ring_set):
                if triangle_set in triangles_sets:
                    pass
                else:
                    triangles_sets.append(set(triangle))
                    triangles.append(triangle)

    return triangles
Exemplo n.º 30
0
	print "-------------------------------------"
	print "Compute size of largest maximal clique containing a given node"
	print "-------------------------------------"
	maximalCliqueSizePerNode = nx.node_clique_number(G)
	print maximalCliqueSizePerNode

	print "-------------------------------------"
	print "Compute number of maximal cliques for each node"
	print "-------------------------------------"
	noOfMaximalCliquesPerNode = nx.number_of_cliques(G)
	print noOfMaximalCliquesPerNode

	print "-------------------------------------"
	print "Compute list of cliques containing  a given node"
	print "-------------------------------------"
	lcliques = nx.cliques_containing_node(G)
	print lcliques

	print "-------------------------------------"
	print "Writing data into global file"
	print "-------------------------------------"

	globalCliqueFile = 'data/globalCliqueFile.csv'
	mode = '';
	if os.path.isfile(globalCliqueFile):
		mode = 'a';
	else:
		mode = 'wb';
		with open(globalCliqueFile, mode) as csvfile:
			sw = csv.writer(csvfile, delimiter=',')
			data = ['cuisine', 'edgeWtThreshold', 'NumberOfCliques', 'Size of Largest Maximal Clique'];
Exemplo n.º 31
0
e_c = nx.eigenvector_centrality(G)
print e_c

# ### Clique

# In[36]:

nx.draw(G, with_labels=True)
cliques = list(nx.enumerate_all_cliques(G))
print cliques
for i in range(1, len(max(cliques, key=len)) + 1):
    print i, "clique:", [x for x in cliques if len(x) == i]

# In[37]:

node_cliques = nx.cliques_containing_node(G, nodes=1)
print(node_cliques)

# ### Triangle, Transitivity, Clustering Coefficient

# In[38]:

print nx.triangles(G)
print nx.transitivity(G)
print nx.clustering(G)

# ## Comunity

# https://github.com/taynaud/python-louvain

# In[39]:
Exemplo n.º 32
0
#     lvl2.append(graphs_len[i])

print 'Η λίστα των μεγεθών των κλικών είναι:'
# print 'The list of clique sizes is:'
print lvl2
print str(" ")

print 'Ο αριθμός κλίκας (το μέγεθος της μεγαλύτερης κλίκας) του G είναι:', nx.graph_clique_number(
    G)
# print 'The clique number (size of the largest clique) for G is:', nx.graph_clique_number(G)
# print sorted(nx.connected_components(G), key = len, reverse=True)
print str(" ")

print 'Το λεξικό των κλικών που περιέχουν κάθε κόμβο είναι:'
# print 'The dictionary of the lists of cliques containing each node:'
print nx.cliques_containing_node(G)
print str(" ")

print 'Το λεξικό του πλήθους κλικών που περιέχουν κάθε κόμβο είναι:'
# print 'The dictionary of the numbers of maximal cliques for each node:'
print nx.number_of_cliques(G)
print str(" ")

print 'Το λεξικό του μεγέθους των μεγαλύτερων κλικών που περιέχουν κάθε κόμβο είναι:'
# print 'The dictionary of the sizes of the largest maximal cliques containing each given node:'
print nx.node_clique_number(G)
print str(" ")

maxclique = [
    clq for clq in nx.find_cliques(G) if len(clq) == nx.graph_clique_number(G)
]
def create_clique_top_node(graphml_filename):
    g = read_graphml(graphml_filename)
    top_node = get_top_node(g)
    print('before finding cliques for ' + top_node[0])
    cliques = nx.cliques_containing_node(g, nodes=[top_node])
    write_cliques_to_csv(cliques=cliques)
diameter

# Center
# DONT EXECUTE--TAKES TOO LONG
center = nx.center(friendship_graph)
center

# Eccentricity
# DONT EXECUTE--TAKES TOO LONG
eccentricity = nx.eccentricity(friendship_graph)
eccentricity

# Clique
clique = nx.max_clique(friendship_graph)

threezeroeight_cliques = nx.cliques_containing_node(friendship_graph,308)
len(threezeroeight_cliques)

# Transitvity = 0.02348
transitivity = nx.transitivity(friendship_graph)

# Return density of graph
nx.density(friendship_graph)

# Return Connectivity of a graph
# DONT EXECUTE--TAKES TOO LONG
connectivity_dict = nx.node_connectivity(friendship_graph)

connectivity = nx.node_connectivity(friendship_graph)

# Shortest Path
def main():

    slicing="monthly"    #"monthly"  # or "yearly"


    flag_cohort=1  # to only consider firms from the cohort, or every firm


    string_cohort=""
    if flag_cohort==1:
        string_cohort="_cohort_"



    num_periods=253
    ii=0
    while ii <= num_periods:

      ii+=1
      

      try:

        ####  i read pickles for networks

        #network_period="../Results/Simplified_supply_network_slicing_"+str(slicing)+"_period_"+str(ii)+"_.pickle"
        network_period="../Results/Simplified_supply_network_slicing_"+str(slicing)+"_period_"+str(ii)+string_cohort+".pickle"
        G_period = pickle.load(open(network_period, 'rb'))  
        N_period = len(G_period.nodes())
                       
        #network_accumulate_year="../Results/Simplified_supply_network_yearly_acummlate_until_period_"+str(ii)+"_.pickle"
        network_accumulate_year="../Results/Simplified_supply_network_yearly_acummlate_until_period_"+str(ii)+string_cohort+".pickle"
        G = pickle.load(open(network_accumulate_year, 'rb'))  
        
        
        print ii,"N",N_period
        if N_period >0:  # to avoid the couple of empty networks around period 120
            
         
          list_networks=[(network_period,G_period), (network_accumulate_year,G)]          
          
          for tupla in list_networks:

            Gi= tupla[1]
            network_name= tupla[0]

            dict_clustering=nx.clustering(Gi)                   
            dict_betweenness_nodes=nx.betweenness_centrality(Gi)

            max_k=0
            list_k=[]
            for node in Gi.nodes():  
                k=Gi.degree(node)           
                Gi.node[node]["degree"]=k
                list_k.append(k)
                max_k=max(list_k)            
                
                Gi.node[node]["CC"]=dict_clustering[node]            
                Gi.node[node]["betweeness"]= dict_betweenness_nodes[node]            
                                

            dict_betweenness_edges=nx.edge_betweenness_centrality(Gi, normalized=True, weight=None)   # it returns  dictionary of edges (tuplas as keys) with betweenness centrality as the value.   ### i can also calculate the edges' betweenness taking into account their weight!!


            
            for edge in Gi.edges():  
                try:                
                    Gi.edge[edge[0]][edge[1]]["link_betweeness"]=dict_betweenness_edges[edge]                    
                except TypeError:                             
                    Gi.edge[edge[0]][edge[1]]["link_betweeness"]="NA"
                    print "edge",edge, "not found"
                


            for node in Gi.nodes():              
                maximo=1     
                lista=nx.cliques_containing_node(Gi, node) #list of lists,  ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]]
            
                for elem in lista:               
                    if len(elem) > maximo:
                        maximo=len(elem)      
                Gi.node[node]['max_clique_size']=maximo                                  
            calculate_kshell(Gi, max_k)






            ########  i (over)write pickles for networks including the new attributes
            pickle.dump(Gi, open(network_name, 'wb'))
            print "  written", network_name


      except IOError: pass   # if network pickle not found
Exemplo n.º 36
0
MNDR.add_nodes_from(["1","5"])
MNDR.add_edges_from([("1","2"),("2","3"),("2","4"),("2","5"),("3","5"),("4","5"),("3","5"),
                     ("4","5"),("5","5")])

color_map = []
for node in MNDR:
    if (node == "5"):
        color_map.append('yellow')
    else:
        color_map.append('red')

replicas_5 = []
for j in range(30):
    start_time = time()
    for i in range (100000):
        cliques = nx.cliques_containing_node(MNDR)
    elapsed_time = time() - start_time
    replicas_5.append(elapsed_time)
print(replicas_5)
print(len(replicas_5))

normality_test=stats.shapiro(replicas_5)
print(normality_test)

hist, bin_edges=np.histogram(replicas_f5,density=True)
first_edge, last_edge = np.min(replicas_f5),np.max(replicas_f5)

n_equal_bins = 10
bin_edges = np.linspace(start=first_edge, stop=last_edge,num=n_equal_bins + 1, endpoint=True)

plt.hist(replicas_f5, bins=bin_edges, rwidth= 0.8, color= 'orange')
Exemplo n.º 37
0
    def create_partial_ktrees(self, n, k, p):
        """
        Generates partial k-trees using the parametric model: (n,k,p)
        """
        # Returns a caveman graph of l cliques of size k.
        G = nx.caveman_graph(1, k + 1)
        # Insert new nodes
        for i in range(0, n - k - 1):
            G.add_node(i + k + 1)
            adjacent_nodes = set()

            while len(adjacent_nodes) < k:

                cliques_of_G = nx.cliques_containing_node(G)

                temp = int(
                    np.round(
                        np.random.uniform(low=0.0,
                                          high=len(cliques_of_G) - 1,
                                          size=None)))
                temp_list = cliques_of_G.get(temp)
                temp2 = int(
                    np.round(
                        np.random.uniform(low=0.0,
                                          high=len(temp_list) - 1,
                                          size=None)))
                temp_list = temp_list[temp2]
                if len(temp_list) >= k:
                    while (len(temp_list) != k):
                        del temp_list[0]
                    [adjacent_nodes.add(i) for i in temp_list]
            for j in list(adjacent_nodes):
                G.add_edge(i + k + 1, j)

        a = set()
        b = set()
        pairs = set()
        # Remove p percent edges from the k-tree uniformly at random
        if p > 0:
            num = G.number_of_edges()
            num = int(num * (p / 100))
            # Finding the edges to remove without replacement
            while len(pairs) < num:
                temp1 = 0
                temp2 = 0
                while temp1 == temp2:
                    temp1 = int(
                        np.round(np.random.uniform(low=0.0, high=n,
                                                   size=None)))
                    temp2 = int(
                        np.round(np.random.uniform(low=0.0, high=n,
                                                   size=None)))
                temp_pairs = set()
                temp_pairs.add(temp1)
                temp_pairs.add(temp2)
                if temp_pairs not in pairs:
                    pairs.add(frozenset(temp_pairs))

            pairs = list(pairs)
            pairs_new = []
            [pairs_new.append(list(i)) for i in pairs]
            for i in range(0, num):
                if G.has_edge(pairs_new[i][0], pairs_new[i][1]):
                    G.remove_edge(pairs_new[i][0], pairs_new[i][1])
        return G
def main():

    path = "../Data/95_05NYCgamentdata/"

    initial_year = 1985
    final_year = 2005

    first_day = dt.datetime(1985, 1, 1)

    y = initial_year

    flag_hist = "NO"
    flag_network_metrics = "NO"
    flag_remove_selfloops = "YES"

    string_filename_loops = ""
    if flag_remove_selfloops == "YES":
        string_filename_loops = "_no_self_loops"

    string_filename = ""
    if flag_network_metrics == "NO":
        string_filename = "_no_network_metrics"

    name1 = "../Results/Time_evol_num_transactions.dat"
    file1 = open(name1, 'wt')
    file1.close()

    name2 = "../Results/Time_evol_num_firms.dat"
    file2 = open(name2, 'wt')
    file2.close()

    name3 = "../Results/Time_evol_num_neg_transactions.dat"
    file3 = open(name3, 'wt')
    file3.close()

    name4 = "../Results/Time_evol_num_self_transactions.dat"
    file4 = open(name4, 'wt')
    file4.close()

    name5 = "../Results/Time_evol_num_self_neg_transactions.dat"
    file5 = open(name5, 'wt')
    file5.close()

    master_name = "../Results/Master_file_transactions" + string_filename + string_filename_loops + ".csv"
    file_master = open(master_name, 'wt')
    print >> file_master, "Mi Cj Mi_Cj tot_adj_gross_ij pos_adj_gross_ij neg_adj_gross_ij pos_adj_gross_i neg_adj_gross_i pos_adj_gross_j neg_adj_gross_j P_inf P_inf_previous_year Cumulat_num_inf_years frac_previous_inf_years year ki kj K_nn_i K_nn_j HHIi_as_manuf HHIi_as_contr HHIj_as_manuf HHIj_as_contr betweenness_i betweenness_j link_betweenness_ij max_clique_i max_clique_j kshell_i kshell_j num_manuf_i num_contr_i num_manuf_j num_contr_j num_transact_ij num_transact_i num_transact_j amount_pos_self_trans_i amount_pos_self_trans_j amount_neg_self_trans_i amount_neg_self_trans_j num_pos_self_trans_i num_pos_self_trans_j num_neg_self_trans_i num_neg_self_trans_j fract_pos_bussiness_of_M_with_C fract_pos_bussiness_of_C_with_M degree_asymmetry_ij business_asymmetry_ij size_tot_errors_ij"

    file_master.close()

    G = nx.Graph()

    print
    print

    list_periods = []

    list_firm_ids = []

    cont_transactions = 0.
    cont_neg_transactions = 0.
    cont_self_transactions = 0.
    cont_self_neg_transactions = 0.

    dict_firm_id_active_years = {}
    dict_firm_total_trans_volum = {}

    dict_firm_num_pos_trans = {}
    dict_firm_num_neg_trans = {}

    dict_tuple_link_cumulat_previous_Pinf = {}
    dict_tuple_link_fract_previous_inf_years = {}
    dict_tuple_link_Pinf_previous_year = {}

    dict_manuf_dict_contr_amounts = {
    }  # for each manufact., dict of its contractors and total amounts
    dict_contr_dict_manuf_amounts = {
    }  # for each contract., dict of its manuf. and total amounts

    dict_link_num_pos_trans = {}
    dict_link_num_neg_trans = {}

    list_neg_adj_gross = []
    list_pos_adj_gross = []

    list_tuplas = []

    while y <= final_year:

        list_neg_adj_gross_year = []
        list_pos_adj_gross_year = []

        list_firm_ids_year = []

        list_tuplas_year = []

        dict_firm_total_trans_volum_year = {}

        dict_firm_tot_pos_trans_year = {}
        dict_firm_tot_neg_trans_year = {}

        dict_firm_num_pos_trans_year = {}
        dict_firm_num_neg_trans_year = {}

        dict_link_num_pos_trans_year = {}
        dict_link_num_neg_trans_year = {}

        dict_firm_amount_pos_self_trans_year = {}
        dict_firm_amount_neg_self_trans_year = {}

        dict_firm_num_pos_self_trans_year = {}
        dict_firm_num_neg_self_trans_year = {}

        dict_manuf_dict_contr_amounts_year = {
        }  # for each manufact., dict of its contractors and total amounts
        dict_contr_dict_manuf_amounts_year = {
        }  # for each contract., dict of its manuf. and total amounts

        cont_transactions_year = 0.
        cont_neg_transactions_year = 0.
        cont_self_transactions_year = 0.
        cont_self_neg_transactions_year = 0.

        G_year = nx.Graph()

        list_manuf_year = []
        list_contr_year = []
        list_non_self_contractors = []

        ##################
        ####### input datafile:
        name0 = "fhistory" + str(y) + ".csv"
        print "\nreading: ", path + name0, "......."

        ####  paidbyfi,paidforf,periodfr,periodto,adjgr,gross,net,caf,liqdmg,cafper,rateper,ratecode

        cont = 1
        csvfile = open(path + name0, 'rb')
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        next(reader, None)  # to skip the header

        for list_row in reader:
            cont += 1

            try:  #  some lines are missing the contractor or manufacturer: skip

                manufacturer = int(list_row[0])  ##paidbyfirm
                contractor = int(list_row[1])  ## paidforfirm

                list_manuf_year.append(manufacturer)
                list_contr_year.append(contractor)

                cont_transactions += 1
                cont_transactions_year += 1

                ############## for now i deal with integers Dollars !!  (easier for histograms)
                adj_gross = int(round(float((list_row[4]))))

                flag_ignore_row = 0
                if manufacturer == contractor:
                    cont_self_transactions += 1.
                    cont_self_transactions_year += 1.
                    if flag_remove_selfloops == "YES":
                        flag_ignore_row = 1

                    ####### if it is a self-transaction, i only record this (no network variables nor HHI etc)
                    if adj_gross < 0.:
                        cont_self_neg_transactions += 1.
                        cont_self_neg_transactions_year += 1.

                        try:
                            dict_firm_amount_neg_self_trans_year[
                                manufacturer] += adj_gross
                        except KeyError:
                            dict_firm_amount_neg_self_trans_year[
                                manufacturer] = adj_gross

                        try:
                            dict_firm_num_neg_self_trans_year[
                                manufacturer] += 1.
                        except KeyError:
                            dict_firm_num_neg_self_trans_year[
                                manufacturer] = 1.

                    else:
                        try:
                            dict_firm_amount_pos_self_trans_year[
                                manufacturer] += adj_gross
                        except KeyError:
                            dict_firm_amount_pos_self_trans_year[
                                manufacturer] = adj_gross

                        try:
                            dict_firm_num_pos_self_trans_year[
                                manufacturer] += 1.
                        except KeyError:
                            dict_firm_num_pos_self_trans_year[
                                manufacturer] = 1.

                else:
                    list_non_self_contractors.append(contractor)

                if flag_ignore_row == 0:  # in general i do not count the self-transactions (for network metrics nor HHI etc)

                    tupla_link = (manufacturer, contractor)

                    list_tuplas_year.append(tupla_link)
                    list_tuplas.append(tupla_link)

                    list_firm_ids.append(manufacturer)
                    list_firm_ids.append(contractor)

                    list_firm_ids_year.append(manufacturer)
                    list_firm_ids_year.append(contractor)

                    ########### initial date for transaction period
                    from_date = list_row[
                        2]  # format examples  040185         #010185
                    if len(from_date) < 6:
                        from_date = "0" + from_date  # when the date is 40185  instead of 040185

                    month = int(from_date[:2])
                    day = int(from_date[-4:-2])
                    year = int(from_date[-2:])

                    if year > 80:
                        year += 1900
                    else:
                        year += 2000
                    try:
                        initial_date = dt.datetime(year, month, day)
                    except ValueError:  # there are a bunch of incorrect dates!!!  043185   063185     113185   023185
                        day = 1
                        month += 1
                        initial_date = dt.datetime(year, month, day)

                    ########### final date for transaction period
                    to_date = list_row[3]
                    if len(to_date
                           ) < 6:  # when the date is 40185  instead of 040185
                        to_date = "0" + to_date

                    month = int(to_date[:2])
                    day = int(to_date[-4:-2])
                    year = int(to_date[-2:])

                    if year > 80:
                        year += 1900
                    else:
                        year += 2000

                    try:
                        final_date = dt.datetime(year, month, day)
                    except ValueError:  # there are a bunch of incorrect dates!!!  043185   063185     113185   023185
                        day = 1
                        month += 1
                        final_date = dt.datetime(year, month, day)

                    period = (final_date - initial_date).days
                    if period < 0:
                        old_initial = initial_date
                        old_final = final_date  #  some time periods have interved TO and FROM dates!
                        initial_date = final_date
                        final_date = old_initial
                        period = (final_date - initial_date).days
                    list_periods.append(period)

                    ########### list of active years for firms
                    try:
                        dict_firm_id_active_years[manufacturer].append(year)
                    except KeyError:
                        dict_firm_id_active_years[manufacturer] = []
                        dict_firm_id_active_years[manufacturer].append(year)

                    if manufacturer != contractor:
                        try:
                            dict_firm_id_active_years[contractor].append(year)
                        except KeyError:
                            dict_firm_id_active_years[contractor] = []
                            dict_firm_id_active_years[contractor].append(year)

                    ###########  i need to initialize dicts
                    try:
                        dict_firm_num_neg_trans_year[manufacturer]
                    except KeyError:
                        dict_firm_num_neg_trans_year[manufacturer] = 0.

                    try:
                        dict_firm_num_neg_trans_year[contractor]
                    except KeyError:
                        dict_firm_num_neg_trans_year[contractor] = 0.

                    try:
                        dict_firm_num_neg_trans[manufacturer]
                    except KeyError:
                        dict_firm_num_neg_trans[manufacturer] = 0.

                    try:
                        dict_firm_num_neg_trans[contractor]
                    except KeyError:
                        dict_firm_num_neg_trans[contractor] = 0.

                    try:
                        dict_firm_num_pos_trans_year[manufacturer]
                    except KeyError:
                        dict_firm_num_pos_trans_year[manufacturer] = 0.

                    try:
                        dict_firm_num_pos_trans_year[contractor]
                    except KeyError:
                        dict_firm_num_pos_trans_year[contractor] = 0.

                    try:
                        dict_firm_num_pos_trans[manufacturer]
                    except KeyError:
                        dict_firm_num_pos_trans[manufacturer] = 0.

                    try:
                        dict_firm_num_pos_trans[contractor]
                    except KeyError:
                        dict_firm_num_pos_trans[contractor] = 0.

                    #########

                    ########### initialize the same for links
                    try:
                        dict_link_num_pos_trans[tupla_link]
                    except KeyError:
                        dict_link_num_pos_trans[tupla_link] = 0.

                    try:
                        dict_link_num_neg_trans[tupla_link]
                    except KeyError:
                        dict_link_num_neg_trans[tupla_link] = 0.

                    try:
                        dict_link_num_pos_trans_year[tupla_link]
                    except KeyError:
                        dict_link_num_pos_trans_year[tupla_link] = 0.

                    try:
                        dict_link_num_neg_trans_year[tupla_link]
                    except KeyError:
                        dict_link_num_neg_trans_year[tupla_link] = 0.
                    ############

                    ########OJO! NOT SURE ABOUT THIS!!  do i want total, or only positive?
                    ######## for the HHI index of manuf. and contr.
                #      HHI is a measure of the size of firms in an  industry, and an indicator of the amount of competition among them.  Bounded by: 1/N  (equally distributed industry), 1 (total monopoly).
                    try:
                        dict_manuf_dict_contr_amounts[manufacturer]
                    except KeyError:
                        dict_manuf_dict_contr_amounts[manufacturer] = {}
                    try:
                        dict_manuf_dict_contr_amounts[manufacturer][contractor]
                    except KeyError:
                        dict_manuf_dict_contr_amounts[manufacturer][
                            contractor] = 0.
                    dict_manuf_dict_contr_amounts[manufacturer][
                        contractor] += adj_gross

                    try:
                        dict_manuf_dict_contr_amounts_year[manufacturer]
                    except KeyError:
                        dict_manuf_dict_contr_amounts_year[manufacturer] = {}
                    try:
                        dict_manuf_dict_contr_amounts_year[manufacturer][
                            contractor]
                    except KeyError:
                        dict_manuf_dict_contr_amounts_year[manufacturer][
                            contractor] = 0.
                    dict_manuf_dict_contr_amounts_year[manufacturer][
                        contractor] += adj_gross

                    try:
                        dict_contr_dict_manuf_amounts[contractor]
                    except KeyError:
                        dict_contr_dict_manuf_amounts[contractor] = {}
                    try:
                        dict_contr_dict_manuf_amounts[contractor][manufacturer]
                    except KeyError:
                        dict_contr_dict_manuf_amounts[contractor][
                            manufacturer] = 0.
                    dict_contr_dict_manuf_amounts[contractor][
                        manufacturer] += adj_gross

                    try:
                        dict_contr_dict_manuf_amounts_year[contractor]
                    except KeyError:
                        dict_contr_dict_manuf_amounts_year[contractor] = {}
                    try:
                        dict_contr_dict_manuf_amounts_year[contractor][
                            manufacturer]
                    except KeyError:
                        dict_contr_dict_manuf_amounts_year[contractor][
                            manufacturer] = 0.
                    dict_contr_dict_manuf_amounts_year[contractor][
                        manufacturer] += adj_gross

                    ########## total volume of (net) transactions
                    try:
                        dict_firm_total_trans_volum[manufacturer] += adj_gross
                    except KeyError:
                        dict_firm_total_trans_volum[manufacturer] = 0.
                        dict_firm_total_trans_volum[manufacturer] += adj_gross

                    try:
                        dict_firm_total_trans_volum[contractor] += adj_gross
                    except KeyError:
                        dict_firm_total_trans_volum[contractor] = 0.
                        dict_firm_total_trans_volum[contractor] += adj_gross

                    ##########   volume of (net) transactions YEARLY
                    try:
                        dict_firm_total_trans_volum_year[
                            manufacturer] += adj_gross
                    except KeyError:
                        dict_firm_total_trans_volum_year[manufacturer] = 0.
                        dict_firm_total_trans_volum_year[
                            manufacturer] += adj_gross

                    try:
                        dict_firm_total_trans_volum_year[
                            contractor] += adj_gross
                    except KeyError:
                        dict_firm_total_trans_volum_year[contractor] = 0.
                        dict_firm_total_trans_volum_year[
                            contractor] += adj_gross

                    ##########
                    if adj_gross < 0:

                        list_neg_adj_gross.append(-1 * adj_gross)
                        list_neg_adj_gross_year.append(-1 * adj_gross)

                        cont_neg_transactions += 1
                        cont_neg_transactions_year += 1

                        ##### num of neg. transactions YEARLY
                        dict_firm_num_neg_trans_year[manufacturer] += 1.
                        dict_firm_num_neg_trans_year[contractor] += 1.

                        ##### tot num of neg. transactions
                        dict_firm_num_neg_trans[manufacturer] += 1.
                        dict_firm_num_neg_trans[contractor] += 1.

                        #### same for the link
                        dict_link_num_neg_trans_year[tupla_link] += 1.
                        dict_link_num_neg_trans[tupla_link] += 1.

                        G_year.add_node(manufacturer)
                        G_year.add_node(contractor)

                        G.add_node(manufacturer)
                        G.add_node(contractor)

                        try:
                            G.edge[manufacturer][contractor][
                                "neg_weight"] += adj_gross
                        except KeyError:
                            G.add_edge(manufacturer,
                                       contractor,
                                       neg_weight=adj_gross)

                        try:

                            G_year.edge[manufacturer][contractor][
                                "neg_weight"] += adj_gross
                        except KeyError:
                            G_year.add_edge(manufacturer,
                                            contractor,
                                            neg_weight=adj_gross)

                        #### add up to the total neg. yearly amount for each firm
                        try:
                            dict_firm_tot_neg_trans_year[
                                manufacturer] += adj_gross
                        except KeyError:
                            dict_firm_tot_neg_trans_year[manufacturer] = 0.
                            dict_firm_tot_neg_trans_year[
                                manufacturer] += adj_gross
                        try:
                            dict_firm_tot_neg_trans_year[
                                contractor] += adj_gross
                        except KeyError:
                            dict_firm_tot_neg_trans_year[contractor] = 0.
                            dict_firm_tot_neg_trans_year[
                                contractor] += adj_gross

                    else:

                        list_pos_adj_gross.append(adj_gross)
                        list_pos_adj_gross_year.append(adj_gross)

                        ##### num of posit. transactions YEARLY
                        dict_firm_num_pos_trans_year[manufacturer] += 1.
                        dict_firm_num_pos_trans_year[contractor] += 1.

                        ##### tot. num of posit. transactions
                        dict_firm_num_pos_trans[manufacturer] += 1.
                        dict_firm_num_pos_trans[contractor] += 1.

                        #### same for the link
                        dict_link_num_pos_trans_year[tupla_link] += 1.
                        dict_link_num_pos_trans[tupla_link] += 1.

                        try:
                            G.edge[manufacturer][contractor][
                                "pos_weight"] += adj_gross
                        except KeyError:
                            G.add_edge(manufacturer,
                                       contractor,
                                       pos_weight=adj_gross)

                        try:

                            G_year.edge[manufacturer][contractor][
                                "pos_weight"] += adj_gross
                        except KeyError:
                            G_year.add_edge(manufacturer,
                                            contractor,
                                            pos_weight=adj_gross)

                        #### add up to the total neg. yearly amount for each firm
                        try:
                            dict_firm_tot_pos_trans_year[
                                manufacturer] += adj_gross
                        except KeyError:
                            dict_firm_tot_pos_trans_year[manufacturer] = 0.
                            dict_firm_tot_pos_trans_year[
                                manufacturer] += adj_gross
                        try:
                            dict_firm_tot_pos_trans_year[
                                contractor] += adj_gross
                        except KeyError:
                            dict_firm_tot_pos_trans_year[contractor] = 0.
                            dict_firm_tot_pos_trans_year[
                                contractor] += adj_gross

            except ValueError:
                pass  #  some lines (very rare, one single instance) are missing the contractor or manufacturer

        #############################  end of loop to read year file
        ###########################

        if flag_hist == "YES":
            try:
                name_h = "../Results/histogram_pos_adj_gross_values_years_" + str(
                    y) + ".dat"
                histograma_gral.histogram(list_pos_adj_gross_year, name_h)
            except:
                pass

            try:
                name_h = "../Results/histogram_neg_adj_gross_values_years_" + str(
                    y) + ".dat"
                histograma_gral.histogram(list_neg_adj_gross_year, name_h)
            except:
                pass

        for link in dict_link_num_pos_trans_year:
            G_year[link[0]][
                link[1]]['num_pos_trans'] = dict_link_num_pos_trans_year[link]
        for link in dict_link_num_neg_trans_year:
            G_year[link[0]][
                link[1]]['num_neg_trans'] = dict_link_num_neg_trans_year[link]
            G_year[link[0]][link[1]][
                'fract_neg_trans'] = dict_link_num_neg_trans_year[link] / (
                    dict_link_num_neg_trans_year[link] +
                    dict_link_num_pos_trans_year[link])

            if G_year[link[0]][link[1]]['num_neg_trans'] == 0.:
                G_year[link[0]][link[1]]['neg_weight'] = 0.

            if G_year[link[0]][link[1]]['num_pos_trans'] == 0.:
                G_year[link[0]][link[1]]['pos_weight'] = 0.

        for firm in G_year.nodes():

            G_year.node[firm][
                'vol_transct'] = dict_firm_total_trans_volum_year[firm]
            G_year.node[firm][
                'fract_neg_transct'] = dict_firm_num_neg_trans_year[firm] / (
                    dict_firm_num_neg_trans_year[firm] +
                    dict_firm_num_pos_trans_year[firm])

            G_year.node[firm]['num_transact'] = dict_firm_num_neg_trans_year[
                firm] + dict_firm_num_pos_trans_year[firm]

        for firm in dict_firm_tot_pos_trans_year:
            G_year.node[firm][
                'vol_pos_transct'] = dict_firm_tot_pos_trans_year[firm]

        for firm in dict_firm_tot_neg_trans_year:
            G_year.node[firm][
                'vol_neg_transct'] = dict_firm_tot_neg_trans_year[firm]

        ###### fill in the gaps for those firms without positive or neg. transactions
        for node in G_year.nodes():
            try:
                G_year.node[node]['vol_pos_transct']
            except KeyError:
                G_year.node[node]['vol_pos_transct'] = 0.

            try:
                G_year.node[node]['vol_neg_transct']
            except KeyError:
                G_year.node[node]['vol_neg_transct'] = 0.

        print " year", y
        print "  # firms:", len(set(list_firm_ids_year))
        print "  # manufacturers:", len(set(list_manuf_year))
        print "  # contractors", len(
            set(list_contr_year)), "(firms can act as both in general)"
        print "  # non-self contractors", len(set(list_non_self_contractors))

        print "  # transactions:", int(cont_transactions_year)
        print "  # negative transactions:", int(
            cont_neg_transactions_year
        ), "  ", cont_neg_transactions_year / cont_transactions_year * 100., "%"
        print "  # self-transactions:", int(
            cont_self_transactions_year
        ), "  ", cont_self_transactions_year / cont_transactions_year * 100., "%"
        print "  # self-neg-transactions:", int(
            cont_self_neg_transactions_year
        ), "  ", cont_self_neg_transactions_year / cont_transactions_year * 100., "%"

        print "  # unique tuples:", len(set(list_tuplas_year))

        print "row count:", cont - 1

        file1 = open(name1, 'at')
        print >> file1, y, int(cont_transactions_year)
        file1.close()

        file2 = open(name2, 'at')
        print >> file2, y, len(set(list_firm_ids_year))
        file2.close()

        file3 = open(name3, 'at')
        print >> file3, y, int(
            cont_neg_transactions_year
        ), cont_neg_transactions_year / cont_transactions_year * 100.
        file3.close()

        file4 = open(name4, 'at')
        print >> file4, y, int(
            cont_self_transactions_year
        ), cont_self_transactions_year / cont_transactions_year * 100.
        file4.close()

        print "#  manuf. in dict", len(dict_manuf_dict_contr_amounts_year)
        print "#  contr. in dict", len(dict_contr_dict_manuf_amounts_year)

        ########  HHI index as manufacturers and contractors

        for node in G_year.nodes():
            G_year.node[node]['num_manuf'] = 0
            G_year.node[node]['num_contractors'] = 0

        for manufacturer in dict_manuf_dict_contr_amounts_year:
            lista = list(
                dict_manuf_dict_contr_amounts_year[manufacturer].values())
            HHI = Herfindahl_index.calculate_HHI(
                lista)  ## tuple  (H, H_normalized)   !!!!
            G_year.node[manufacturer]['HHI_as_manuf'] = HHI[0]
            G_year.node[manufacturer]['num_contractors'] = len(lista)

        for contractor in dict_contr_dict_manuf_amounts_year:
            lista = list(
                dict_contr_dict_manuf_amounts_year[contractor].values())
            HHI = Herfindahl_index.calculate_HHI(
                lista)  ## tuple  (H, H_normalized)   !!!!
            G_year.node[contractor]['HHI_as_contr'] = HHI[0]
            G_year.node[contractor]['num_manuf'] = len(lista)

        ################  i add topological attributes to the nodes
        #############################
        print "calculating network metrics....."
        print "  CC..."

        if flag_network_metrics == "YES":
            dict_clustering = nx.clustering(G_year)
        print "  node betweenness..."

        if flag_network_metrics == "YES":
            dict_betweenness_nodes = nx.betweenness_centrality(G_year)
        list_k = []
        for node in G_year.nodes():
            k = G_year.degree(node)
            G_year.node[node]["degree"] = k
            list_k.append(k)

            if flag_network_metrics == "YES":
                G_year.node[node]["CC"] = dict_clustering[node]
                G_year.node[node]["betweeness"] = dict_betweenness_nodes[node]
            else:
                G_year.node[node]["CC"] = 0.
                G_year.node[node]["betweeness"] = 0.

        max_k = max(list_k)

        print "  edge betweenness..."
        if flag_network_metrics == "YES":
            dict_betweenness_edges = nx.edge_betweenness_centrality(
                G_year, normalized=True, weight=None
            )  # it returns  dictionary of edges (tuplas as keys) with betweenness centrality as the value.   ### i can also calculate the edges' betweenness taking into account their weight!!
        for edge in G_year.edges():
            # print edge
            try:
                if flag_network_metrics == "YES":
                    G_year.edge[edge[0]][edge[1]][
                        "link_betweeness"] = dict_betweenness_edges[edge]
                else:
                    G_year.edge[edge[0]][edge[1]]["link_betweeness"] = 0.
            except KeyError:
                G_year.edge[edge[0]][edge[1]]["link_betweeness"] = "NA"
                print "edge", edge, "not found"

        for node in G_year.nodes():

            try:
                G_year.node[node]['HHI_as_manuf']
            except KeyError:
                G_year.node[node]['HHI_as_manuf'] = "NA"

            try:
                G_year.node[node]['HHI_as_contr']
            except KeyError:
                G_year.node[node]['HHI_as_contr'] = "NA"

        ####### kshell structure
        print "  kshell..."
        calculate_kshell(G_year, max_k)

        ####### max clique size
        print "  max-clique..."
        for i in G_year.nodes():

            if flag_network_metrics == "YES":
                maximo = 1
                lista = nx.cliques_containing_node(
                    G_year, i
                )  #list of lists,  ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]]
                #  print i, lista

                for elem in lista:
                    if len(elem) > maximo:
                        maximo = len(elem)
                G_year.node[i]['max_clique_size'] = maximo
            else:
                G_year.node[i]['max_clique_size'] = 0

        ############## i write the corresponding rows in the master file (one line per link and per year)
        #########################
        file_master = open(master_name, 'at')
        for edge in G_year.edges():

            try:
                dict_tuple_link_Pinf_previous_year[edge]
            except KeyError:
                dict_tuple_link_Pinf_previous_year[edge] = 0.
            if year == initial_year:
                dict_tuple_link_Pinf_previous_year[edge] = "NA"

            manufacturer = edge[0]
            contractor = edge[1]

            P_inf = 0.  #  1: if there has been at least one neg. transaction between manuf. and contr. during the year, 0 otherwise
            if G_year[manufacturer][contractor]['neg_weight'] != 0:
                P_inf = 1.

            try:
                dict_tuple_link_cumulat_previous_Pinf[edge]
            except KeyError:
                dict_tuple_link_cumulat_previous_Pinf[edge] = 0.
            if year == initial_year:
                dict_tuple_link_cumulat_previous_Pinf[edge] = "NA"

            dict_tuple_link_fract_previous_inf_years[edge] = 0.
            if y > initial_year:
                dict_tuple_link_fract_previous_inf_years[
                    edge] = dict_tuple_link_cumulat_previous_Pinf[edge] / (
                        y - 1985.)
            else:
                dict_tuple_link_fract_previous_inf_years[edge] = "NA"

            lista = []  # avg degree of the manufacturer's neighbours
            for n in G.neighbors(manufacturer):
                lista.append(float(G.degree(n)))

            K_nn_i = 0.
            try:
                K_nn_i = numpy.mean(lista)
            except:
                pass

            lista = []  # avg degree of the manufacturer's neighbours
            for n in G.neighbors(contractor):
                lista.append(float(G.degree(n)))

            K_nn_j = 0.
            try:
                K_nn_j = numpy.mean(lista)
            except:
                pass

            amount_pos_self_trans_i = 0.
            amount_pos_self_trans_j = 0.
            try:
                amount_pos_self_trans_i = dict_firm_amount_pos_self_trans_year[
                    manufacturer]
            except KeyError:
                pass
            try:
                amount_pos_self_trans_j = dict_firm_amount_pos_self_trans_year[
                    contractor]
            except KeyError:
                pass

            amount_neg_self_trans_i = 0.
            amount_neg_self_trans_j = 0.
            try:
                amount_neg_self_trans_i = dict_firm_amount_neg_self_trans_year[
                    manufacturer]
            except KeyError:
                pass
            try:
                amount_neg_self_trans_j = dict_firm_amount_neg_self_trans_year[
                    contractor]
            except KeyError:
                pass

            num_pos_self_trans_i = 0.
            num_pos_self_trans_j = 0.
            try:
                num_pos_self_trans_i = dict_firm_num_pos_self_trans_year[
                    manufacturer]
            except KeyError:
                pass
            try:
                num_pos_self_trans_j = dict_firm_num_pos_self_trans_year[
                    contractor]
            except KeyError:
                pass

            num_neg_self_trans_i = 0.
            num_neg_self_trans_j = 0.
            try:
                num_neg_self_trans_i = dict_firm_num_neg_self_trans_year[
                    manufacturer]
            except KeyError:
                pass
            try:
                num_neg_self_trans_j = dict_firm_num_neg_self_trans_year[
                    contractor]
            except KeyError:
                pass

            try:
                fract_pos_bussiness_of_M_with_C = G_year[manufacturer][
                    contractor]['pos_weight'] / G_year.node[manufacturer][
                        'vol_pos_transct']
            except ZeroDivisionError:
                fract_pos_bussiness_of_M_with_C = "NA"  #  (otherwise i cant define business asymmetry)

            try:
                fract_pos_bussiness_of_C_with_M = G_year[manufacturer][
                    contractor]['pos_weight'] / G_year.node[contractor][
                        'vol_pos_transct']
            except ZeroDivisionError:
                fract_pos_bussiness_of_C_with_M = "NA"  #(otherwise i cant define business asymmetry)

            try:
                error_size_ij = -1. * G_year[manufacturer][contractor][
                    'neg_weight'] / G_year[manufacturer][contractor][
                        'pos_weight']
            except ZeroDivisionError:
                error_size_ij = "NA"
                if G_year[manufacturer][contractor]['neg_weight'] != 0.:
                    error_size_ij = -1.

            degree_asymmetry_ij = float(
                (G_year.degree(manufacturer) - G_year.degree(contractor)) *
                (G_year.degree(manufacturer) - G_year.degree(contractor))
            ) / float(
                (G_year.degree(manufacturer) + G_year.degree(contractor)) *
                (G_year.degree(manufacturer) + G_year.degree(contractor)))

            business_asymmetry_ij = "NA"
            try:
                business_asymmetry_ij = (
                    (fract_pos_bussiness_of_M_with_C -
                     fract_pos_bussiness_of_C_with_M) *
                    (fract_pos_bussiness_of_M_with_C -
                     fract_pos_bussiness_of_C_with_M)) / (
                         (fract_pos_bussiness_of_M_with_C +
                          fract_pos_bussiness_of_C_with_M) *
                         (fract_pos_bussiness_of_M_with_C +
                          fract_pos_bussiness_of_C_with_M))
            except:
                pass  # either for a zerodivision error, or because one of the elements is a NA

            #     print >> file_master, "Mi Cj Mi_Cj tot_adj_gross_ij pos_adj_gross_ij neg_adj_gross_ij pos_adj_gross_i neg_adj_gross_i pos_adj_gross_j neg_adj_gross_j P_inf P_inf_previous_year Cumulat_num_inf_years frac_previous_inf_years year ki kj K_nn_i K_nn_j HHIi_as_manuf HHIi_as_contr HHIj_as_manuf HHIj_as_contr betweenness_i betweenness_j link_betweenness_ij max_clique_i max_clique_j kshell_i kshell_j num_manuf_i num_contr_i num_manuf_j num_contr_j num_transact_ij num_transact_i num_transact_j amount_pos_self_trans_i amount_pos_self_trans_j amount_neg_self_trans_i amount_neg_self_trans_j num_pos_self_trans_i num_pos_self_trans_j num_neg_self_trans_i num_neg_self_trans_j fract_pos_bussiness_of_M_with_C fract_pos_bussiness_of_C_with_M degree_asymmetry_ij business_asymmetry_ij size_tot_errors_ij"

            print >> file_master, manufacturer, contractor, str(
                manufacturer
            ) + str(
                contractor
            ), G_year[manufacturer][contractor]['pos_weight'] + G_year[manufacturer][contractor][
                'neg_weight'], G_year[manufacturer][contractor]['pos_weight'], G_year[manufacturer][
                    contractor]['neg_weight'], G_year.node[manufacturer]['vol_pos_transct'], G_year.node[
                        manufacturer]['vol_neg_transct'], G_year.node[contractor][
                            'vol_pos_transct'], G_year.node[contractor]['vol_neg_transct'], P_inf, dict_tuple_link_Pinf_previous_year[
                                edge], dict_tuple_link_cumulat_previous_Pinf[edge], dict_tuple_link_fract_previous_inf_years[
                                    edge], y, G_year.degree(
                                        manufacturer
                                    ), G_year.degree(
                                        contractor
                                    ), K_nn_i, K_nn_j, G_year.node[manufacturer][
                                        'HHI_as_manuf'], G_year.node[manufacturer][
                                            'HHI_as_contr'], G_year.node[contractor]['HHI_as_manuf'], G_year.node[contractor][
                                                'HHI_as_contr'], G_year.node[manufacturer]['betweeness'], G_year.node[contractor]['betweeness'], G_year[
                                                    manufacturer][contractor]['link_betweeness'], G_year.node[
                                                        manufacturer]['max_clique_size'], G_year.node[contractor][
                                                            'max_clique_size'], G_year.node[manufacturer][
                                                                'kshell'], G_year.node[contractor]['kshell'], G_year.node[
                                                                    manufacturer]['num_manuf'], G_year.node[manufacturer][
                                                                        'num_contractors'], G_year.node[contractor][
                                                                            'num_manuf'], G_year.node[contractor]['num_contractors'], G_year[
                                                                                manufacturer][contractor]['num_pos_trans'] + G_year[
                                                                                    manufacturer][contractor][
                                                                                        'num_neg_trans'], G_year.node[manufacturer][
                                                                                            'num_transact'], G_year.node[contractor][
                                                                                                'num_transact'], amount_pos_self_trans_i, amount_pos_self_trans_j, amount_neg_self_trans_i, amount_neg_self_trans_j, num_pos_self_trans_i, num_pos_self_trans_j, num_neg_self_trans_i, num_neg_self_trans_j, fract_pos_bussiness_of_M_with_C, fract_pos_bussiness_of_C_with_M, degree_asymmetry_ij, business_asymmetry_ij, error_size_ij

            ### for next year
            try:
                dict_tuple_link_Pinf_previous_year[edge] = P_inf
            except TypeError:
                dict_tuple_link_Pinf_previous_year[edge] = 0.

            try:
                dict_tuple_link_cumulat_previous_Pinf[edge] += P_inf
            except TypeError:
                dict_tuple_link_cumulat_previous_Pinf[edge] = 0.

        file_master.close()

        ########  write the yearly  network
        filename_network = "../Results/Supply_network_year_" + str(y)
        pickle.dump(G_year, open(filename_network + ".pickle", 'wb'))
        print "  written", filename_network + ".pickle"

        nx.write_gml(G_year, filename_network + ".gml")
        print "  written", filename_network + ".gml"
        print "  N:", len(G_year.nodes()), " L:", len(G_year.edges())

        G_no_loops = remove_self_loops(G_year)
        print "   without self-loops:", len(G_no_loops.nodes()), " L:", len(
            G_no_loops.edges())

        print "# nodes (aggregated so far):", len(
            G.nodes()), " # links (id):", len(G.edges())

        y += 1
        ################  new year file
        ##################################################
        ##################################################
        ##################################################

    print "written:", master_name

    print "\n\nAggregated network:"
    print "tot. # firms:", len(set(list_firm_ids))
    print "tot. # transactions:", int(cont_transactions)
    print "tot. # negative transactions:", int(
        cont_neg_transactions
    ), "  ", cont_neg_transactions / cont_transactions * 100., "%"
    print "tot. # self-transactions:", int(
        cont_self_transactions
    ), "  ", cont_self_transactions / cont_transactions * 100., "%"
    print "tot. # self-neg-transactions:", int(
        cont_self_neg_transactions
    ), "  ", cont_self_neg_transactions / cont_transactions * 100., "%"

    print "  # unique tuples:", len(set(list_tuplas))

    for firm in G.nodes():
        G.node[firm]['vol_transct'] = dict_firm_total_trans_volum[firm]
        G.node[firm]['fract_neg_transct'] = dict_firm_num_neg_trans[firm] / (
            dict_firm_num_neg_trans[firm] + dict_firm_num_pos_trans[firm])
        G.node[firm]['num_transact'] = dict_firm_num_neg_trans[
            firm] + dict_firm_num_pos_trans[firm]

        G.node[firm]['num_manuf'] = 0
        G.node[firm]['num_contractors'] = 0

    for link in dict_link_num_pos_trans:
        G[link[0]][link[1]]['num_pos_trans'] = dict_link_num_pos_trans[link]
        G[link[0]][link[1]]['num_neg_trans'] = dict_link_num_neg_trans[link]
        G[link[0]][
            link[1]]['fract_neg_trans'] = dict_link_num_neg_trans[link] / (
                dict_link_num_neg_trans[link] + dict_link_num_pos_trans[link])

        if G[link[0]][link[1]]['num_neg_trans'] == 0.:
            G[link[0]][link[1]]['neg_weight'] = 0.

        if G[link[0]][link[1]]['num_pos_trans'] == 0.:
            G[link[0]][link[1]]['pos_weight'] = 0.

#    print sorted(list_periods)
    name_h = "../Results/histogram_period_lengths.dat"
    histograma_gral.histogram(list_periods, name_h)

    ################  i add topological attributes to the nodes
    ###########################
    print "calculating network metrics:"
    print "  CC..."
    if flag_network_metrics == "YES":
        dict_clustering = nx.clustering(G)

    print "  node betweenness..."
    if flag_network_metrics == "YES":
        dict_betweenness_nodes = nx.betweenness_centrality(G)
    list_k = []
    for node in G.nodes():
        k = G.degree(node)
        G.node[node]["degree"] = k
        list_k.append(k)
        if flag_network_metrics == "YES":
            G.node[node]["CC"] = dict_clustering[node]
            G.node[node]["betweeness"] = dict_betweenness_nodes[node]
        else:
            G.node[node]["CC"] = 0.
            G.node[node]["betweeness"] = 0.
    max_k = max(list_k)

    print "  edge betweenness..."
    if flag_network_metrics == "YES":
        dict_betweenness_edges = nx.edge_betweenness_centrality(
            G, normalized=True, weight=None
        )  # it returns  dictionary of edges (tuplas as keys) with betweenness centrality as the value.   ### i can also calculate the edges' betweenness taking into account their weight!!
    for edge in G.edges():
        # print edge
        try:
            if flag_network_metrics == "YES":
                G.edge[edge[0]][
                    edge[1]]["link_betweeness"] = dict_betweenness_edges[edge]
            else:
                G.edge[edge[0]][edge[1]]["link_betweeness"] = 0
        except KeyError:
            G.edge[edge[0]][edge[1]]["link_betweeness"] = "NA"
            print "edge", edge, "not found"

    #######  k-shell decomposition   (i need to make a copy and remove the self-loops from that before i can proceed)
    print "  kshell..."
    if flag_network_metrics == "YES":
        calculate_kshell(G, max_k)

    ####### max clique size
    print "  max-clique..."
    for node in G.nodes():

        if flag_network_metrics == "YES":
            maximo = 1
            lista = nx.cliques_containing_node(
                G, node
            )  #list of lists,  ej: [[207925, 203592], [207925, 10500761], [207925, 200554], [207925, 202587]]
            #  print i, lista

            for elem in lista:
                if len(elem) > maximo:
                    maximo = len(elem)
            G.node[i]['max_clique_size'] = maximo
        else:
            G.node[i]['max_clique_size'] = 0

    #######  HHI index as manufacturer and as contractor
    for manufact in dict_manuf_dict_contr_amounts:
        lista = list(dict_manuf_dict_contr_amounts[manufact].values())
        HHI = Herfindahl_index.calculate_HHI(lista)
        G.node[manufact]['HHI_as_manuf'] = HHI[0]
        G.node[manufact]['num_contractors'] = len(lista)

    for contr in dict_contr_dict_manuf_amounts:
        lista = list(dict_contr_dict_manuf_amounts[contr].values())
        HHI = Herfindahl_index.calculate_HHI(lista)
        G.node[contr]['HHI_as_contr'] = HHI[0]
        G.node[contr]['num_manuf'] = len(lista)

    ########  write the aggregated network
    filename_network = "../Results/Supply_network_" + str(
        initial_year) + "_" + str(final_year)
    pickle.dump(G, open(filename_network + ".pickle", 'wb'))
    print "written", filename_network + ".pickle"

    nx.write_gml(G, filename_network + ".gml")
    print "written", filename_network + ".gml"

    print "N:", len(G.nodes()), " L:", len(G.edges())

    G_no_loops = remove_self_loops(G)
    print "   without self-loops:", len(G_no_loops.nodes()), " L:", len(
        G_no_loops.edges())

    print

    if flag_hist == "YES":
        name_h = "../Results/histogram_pos_adj_gross_values_years_" + str(
            initial_year) + "_" + str(final_year) + ".dat"
        histograma_gral.histogram(list_pos_adj_gross, name_h)
#    print "# obsrv:",len(list_pos_adj_gross), "  max.", max(list_pos_adj_gross), "  min.", min(list_pos_adj_gross), "  avg:", numpy.mean(list_pos_adj_gross), "  sd:", numpy.std(list_pos_adj_gross)

    print

    if flag_hist == "YES":
        name_h = "../Results/histogram_neg_adj_gross_values_years_" + str(
            initial_year) + "_" + str(final_year) + ".dat"
        histograma_gral.histogram(list_neg_adj_gross, name_h)
# print "# obsrv:",len(list_neg_adj_gross), "  max.", -1.*max(list_neg_adj_gross), "  min.", -1.*min(list_neg_adj_gross), "  avg:", -1.*numpy.mean(list_neg_adj_gross), "  sd:", numpy.std(list_neg_adj_gross)

    print "written:", name1
    print "written:", name2
    print "written:", name3
    print "written:", name4
Exemplo n.º 39
0
def FindCliquesAndButterflies():

    #This will compute a list of cliques.
    #We are using NetworkX's Implementation of Cliques
    #Reciprocal = true make sure that only nodes that have both edges will be included for searching for cliques
    listOfCliques = list(
        nx.find_cliques(DirectedEmailGraph.to_undirected(reciprocal=True)))

    #This will find the maximum clique size.
    #Reciprocal = true make sure that only nodes that have both edges will be included for searching for cliques
    #Cliques = listOfCliques provides the list of cliques already computed
    maxCliqueSize = nx.graph_clique_number(
        DirectedEmailGraph.to_undirected(reciprocal=True),
        cliques=listOfCliques)

    #This list will hold the all of the cliques with the max size that matches maxCliqueSize
    listOfMaximumCliques = []

    #This will compute listOfMaximumCliques
    for i in range(len(listOfCliques)):
        if len(listOfCliques[i]) == maxCliqueSize:
            listOfMaximumCliques.append(listOfCliques[i])

    #This will contain the nodes from the listOfMaximumCliques, a set is chosen because of no duplicate elements
    setOfNodes = set()

    #This will compute the setOfNodes
    for i in range(len(listOfMaximumCliques)):
        for j in range(maxCliqueSize):
            setOfNodes.add(listOfMaximumCliques[i][j])

    #This will hold a temporary list of butterflies. The reason behind this, is we have not verrified that these are only connected by one node and one node only.
    tempListOfButterflies = []
    maxSizeOfTempListOfButterflies = 0

    #Computes tempListOfButterflies
    for i in setOfNodes:
        tempList = nx.cliques_containing_node(
            DirectedEmailGraph.to_undirected(reciprocal=True),
            nodes=i,
            cliques=listOfMaximumCliques)
        if (len(tempList) >= 2):
            tempListOfButterflies.append(tempList)

            #This is going to be used to calculate butterflies
            if maxSizeOfTempListOfButterflies < len(tempList):
                maxSizeOfTempListOfButterflies = len(tempList)

    #This will hold a list of list of lists that contain {{{LeftWing}{RightWing}},{{LeftWing}{RightWing}}} and so on
    RealListOfButterflies = []

    #This Computes RealListOfButterflies
    for i in range(len(tempListOfButterflies)):

        #This will tell us if the list if larger than size 1, in the case that it is larger than size 1
        #Then we know that there are either 2 cliques with 2 nodes in common
        #or more than 2 cliques one or more with the possibility of more than one node in common.
        if len(tempListOfButterflies[i]) > 2:
            for j in range(len(tempListOfButterflies[i])):
                for k in range(len(tempListOfButterflies[i])):
                    if k > j:
                        tempSet = set.intersection(
                            *map(set, (tempListOfButterflies[i][j],
                                       tempListOfButterflies[i][k])))
                        if len(tempSet) == 1:
                            RealListOfButterflies.append(
                                (tempListOfButterflies[i][j],
                                 tempListOfButterflies[i][k]))

        #If there are only two cliques and only one node in common then we will add them to the list of butterflies
        if (len(tempListOfButterflies[i]) == 2):
            tempSet = set.intersection(*map(set, tempListOfButterflies[i]))
            if len(tempSet) == 1:
                RealListOfButterflies.append(tempListOfButterflies[i])

    #Here we will start listing cliques of largest size.
    print "##########################################################"
    print "The largest size clique in the graph was: ", maxCliqueSize
    print "##########################################################"
    print "List of Butterflies: \n"

    for i in range(len(RealListOfButterflies)):
        print RealListOfButterflies[i]
    return
Exemplo n.º 40
0
        "follower_user_data"]  #followers of followers screen_names
    followers = graph_data["followers"]  #my followers
    followers_screen_names = graph_data[
        "followers_screen_names"]  #screen_names of my followers
    follower_data = graph_data["follower_data"]  #followers of followers

for follower in followers_screen_names:
    twitter_graph.add_edge(main_user, follower["screen_name"].lower())
    for each, value in follower_user_data.items():
        for name in value:
            twitter_graph.add_edge(follower["screen_name"].lower(),
                                   name["screen_name"].lower())

# #set positions
pos = nx.random_layout(twitter_graph)

plt.figure(figsize=(16, 10))

nx.draw_networkx_nodes(twitter_graph, pos, node_size=30)
nx.draw_networkx_edges(twitter_graph, pos, alpha=0.01)

#get the nodes that we want to draw labels for
clique = nx.cliques_containing_node(twitter_graph, nodes=[main_user])
clique = clique[main_user][0]
clique.append(main_user)
labels = {}
for name in clique:
    labels[name] = name
nx.draw_networkx_labels(twitter_graph, pos, font_size=16, labels=labels)
plt.show()
def create_clique_top_node(graphml_filename):
    g = read_graphml(graphml_filename)
    top_node = get_top_node(g)
    print('before finding cliques for ' + top_node[0])
    cliques = nx.cliques_containing_node(g, nodes=[top_node])
    write_cliques_to_csv(cliques=cliques)
def main(graph_name):

    H = nx.read_gml(graph_name)

    for node in H.nodes():  # i remove self loops
        if node in H.neighbors(node):
            if len(H.neighbors(node)) > 1:
                H.remove_edge(node, node)
            else:
                H.remove_node(node)

# for node in H.nodes():
#    if H.node[node]['weigh_ins'] <5: #Adherent filter
#       H.remove_node(node)
# print node, "is going down"

    G = nx.connected_component_subgraphs(H)[0]  # Giant component

    print "size of the GC:", len(
        G.nodes())  #, "after filtering for adherence!!"

    #dir=graph_name.split("full_")[0]
    #dir=graph_name.split("master")[0]
    #dir=graph_name.split("method3_")[0]
    #dir=graph_name.split("method3_adh")[0]
    dir = graph_name.split("friends")[0]

    dir = dir + "roles/"

    time_in_system = 50  #minimum amount of time in the sytem for a user to be included in the statistics

    #name=graph_name.split('data/')[1]
    #name=graph_name.split('method3_50/interim/')[1]
    #name=graph_name.split('network_all_users/')[1]
    name = graph_name.split('5_points_network_2010/data/')[1]

    name = name.split('.gml')[0]

    name0 = dir + name + "_overlap_R6s_averages_" + str(
        time_in_system) + "days_exclude_R6s.dat"
    file0 = open(name0, 'wt')
    file0.close()

    contador = 0
    name12 = dir + name + "_slopes_for_the_fits_average_weight_change.dat"
    file = open(name12, 'wt')
    file.close()

    ####for the Isolated Clusters:
    list_GC_nodes = []
    for n in G.nodes():
        list_GC_nodes.append(n)
    # print G.node[n]['percentage_weight_change']

# print "# users GC:",len(list_GC_nodes),"total:",len(H.nodes())

    list_weight_changes_not_GC = []
    for n in H.nodes():
        if n not in list_GC_nodes:
            #print n,"not in GC"
            list_weight_changes_not_GC.append(
                float(H.node[n]['percentage_weight_change']))

    #print  "# users not in GC:",len(list_weight_changes_not_GC)

# who="not_GC"
#Nbins=18
#histograma(list_weight_changes_not_GC,Nbins,dir,name,who)

###########################

    list_R6s = []  # collect the R6 of the system
    list_R6s_label = []
    list_R6s_percent_weight_change = []
    for node in G.nodes():
        if str(G.node[node]['role']) == "R6":
            list_R6s.append(node)
            list_R6s_label.append(G.node[node]['label'])
            list_R6s_percent_weight_change.append(
                float(G.node[node]['percentage_weight_change']))

    name00 = dir + name + "R6s_and_top_tens_averages_" + str(
        time_in_system) + "days_exclude_R6s.dat"

    file0 = open(name00, 'at')
    print >> file0, "R6s", numpy.mean(
        list_R6s_percent_weight_change), numpy.std(
            list_R6s_percent_weight_change)
    file0.close()

    #  print "\n\n R6s:\n"
    # for i in  list_R6s_label:
    #    print i

    # studying the possible cumulative effect of more than one R6 on the population:
    for node in G.nodes():
        cont = 0
        for n in G.neighbors(node):
            if str(G.node[n]['role']) == "R6":
                cont += 1

        G.node[node]["R6_overlap"] = int(cont)

    ##### weight change for people not connected to any R6s:####

    list_weight_changes_no_neighbors = []
    for node in G.nodes():
        interseccion = list(set(G.neighbors(node)) & set(list_R6s))

        # print node, "intersection:",intersection,len(intersection)
        #   print "because", list_R6s, "and ",G.neighbors(node)
        #  raw_input()
        if len(interseccion) == 0:
            list_weight_changes_no_neighbors.append(
                G.node[node]['percentage_weight_change'])

#  print len(list_weight_changes_no_neighbors),"no_neighbors"

    who = "no_neigbors_R6s"
    Nbins = 18
    histograma(list_weight_changes_no_neighbors, Nbins, dir, name, who)

    # mood test
    mood = stats.mood(list_weight_changes_no_neighbors,
                      list_weight_changes_not_GC)
    print "mood test for", who, "against not_GC:", mood

    ########
    # K-S test:
    ks = stats.ks_2samp(list_weight_changes_no_neighbors,
                        list_weight_changes_not_GC)
    print "KS test for", who, "against not_GC:", ks

    name00 = "ks_results.dat"

    file0 = open(dir + name00, 'at')
    print >> file0, "KS test for", who, "of", graph_name, "against not_GC:", ks
    file0.close()
    #############################################

    #average percentage weight change as a function of the size of the largest CLIQUE the node belongs to:

    absolute_max = 1
    for i in G.nodes():

        maximo = 1
        list2 = nx.cliques_containing_node(G, i)
        # print i, list2

        for elem in list2:
            # print elem,len(elem,)
            if len(elem) > maximo:
                maximo = len(elem)
    # print "\n",maximo
        G.node[i]['max_clique_size'] = maximo

        if absolute_max < maximo:
            absolute_max = maximo

    #print absolute_max

    lista = list(
        nx.find_cliques(G))  # crea una lista de cliques (lista de listas)
    max_clique = nx.graph_clique_number(G)  #finds out max size clique
    num_tot_clique = nx.graph_number_of_cliques(
        G)  #finds out total number of cliques

    # count number of 2, 3, 4, 5, 6  and 7cliques:

    num_2cliques = 0
    num_3cliques = 0
    num_4cliques = 0
    num_5cliques = 0
    num_6cliques = 0
    num_7cliques = 0
    num_8cliques = 0
    num_9cliques = 0

    for element in lista:
        if len(element) == 2:
            num_2cliques = num_2cliques + 1

        elif len(element) == 3:
            num_3cliques = num_3cliques + 1

        elif len(element) == 4:
            num_4cliques = num_4cliques + 1

        elif len(element) == 5:
            num_5cliques = num_5cliques + 1

        elif len(element) == 6:
            num_6cliques = num_6cliques + 1

        elif len(element) == 7:
            num_7cliques = num_7cliques + 1

        elif len(element) == 8:
            num_8cliques = num_8cliques + 1

        elif len(element) == 9:
            num_9cliques = num_9cliques + 1

#   print " 2: ",num_2cliques, "     3: ",num_3cliques, "   4: ",num_4cliques, "     5: ",num_5cliques, "   6: ",num_6cliques, "   7: ",num_7cliques, "   8: ",num_8cliques, "   9: ",num_9cliques, "   max_clique_size:",max_clique, "   num_tot_cliques:", num_tot_clique

    name33 = dir + name + "_percent_weight_change_vs_largest_clique_size.dat"
    file11 = open(name33, 'wt')
    file11.close()

    list_of_lists_for_bootstrap = []

    x_positions_fit = []
    y_positions_fit = []
    cum_size_set = float(len(G.nodes()))

    tot_nodes = []

    for clique_size in range(1, max_clique):

        clique_size = clique_size + 1
        print clique_size

        num_users_set = cum_size_set

        percent_weight_change_that_clique_size = []
        for n in G.nodes():

            if G.node[n]['max_clique_size'] == clique_size:
                percent_weight_change_that_clique_size.append(
                    float(G.node[n]['percentage_weight_change']))

                tot_nodes.append(float(G.node[n]['percentage_weight_change']))

                cum_size_set -= 1.0

        file11 = open(name33, 'at')
        print >> file11, clique_size, len(
            percent_weight_change_that_clique_size), num_users_set / float(
                len(G.nodes())), numpy.mean(
                    percent_weight_change_that_clique_size), numpy.std(
                        percent_weight_change_that_clique_size)
        file11.close()

        if len(x_positions_fit) <= 7:
            x_positions_fit.append(clique_size)
            y_positions_fit.append(
                numpy.mean(percent_weight_change_that_clique_size))

            list_of_lists_for_bootstrap.append(
                percent_weight_change_that_clique_size)

    slope, intercept, Corr_coef, p_value, std_err = stats.linregress(
        x_positions_fit, y_positions_fit)  # least squeares polinomial fit

    print "result linear. fit for clique size dependency:"

    print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err

    name11 = dir + name + "_fits_clique_size.dat"

    file11 = open(name11, 'wt')
    for i in range(len(x_positions_fit)):
        print >> file11, x_positions_fit[
            i], intercept + x_positions_fit[i] * slope

    print >> file11, "\n\n", "y=", intercept, "+", slope, "*x",
    print "Bootstrap for clique size:\n"

    mean_slope, standard_dev = bootstrap(x_positions_fit[0],
                                         x_positions_fit[-1],
                                         list_of_lists_for_bootstrap)
    zscore = (slope - mean_slope) / standard_dev

    print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    print x_positions_fit[0], x_positions_fit[
        -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    file11.close()

    contador += 1
    file = open(name12, 'at')
    print >> file, contador, mean_slope, standard_dev, "largest_clique_size"
    file.close()

    #######################################

    #####dose effect of the R6s independently########

    name11 = dir + name + "_dose_eff_indepently_only_one_R6_" + str(
        time_in_system) + "days_exclude_R6s.dat"
    file11 = open(name11, 'at')
    print >> file11, 0, "average_no_neighbors", "average_no_neighbors", "average_no_neighbors", len(
        list_weight_changes_no_neighbors
    ), numpy.mean(list_weight_changes_no_neighbors), numpy.std(
        list_weight_changes_no_neighbors
    )  # the first line of the file is actually for no_neighbors, the rest, for one_and_only_one
    file11.close()

    file11 = open(name11, 'wt')
    file11.close()

    cont = 1
    list_all = []
    list_all_nodes = []
    for R6 in list_R6s:
        list_weight_changes = []
        for n in G.neighbors(R6):
            if (G.node[n]['role'] != "R6") and (G.node[n]["R6_overlap"] == 1):
                list_weight_changes.append(
                    float(G.node[n]['percentage_weight_change']))

                if n not in list_all_nodes:
                    list_all_nodes.append(n)
                    list_all.append(
                        float(G.node[n]['percentage_weight_change']))

        if len(list_weight_changes) > 0:

            file11 = open(name11, 'at')
            print >> file11, cont, G.node[R6]['role'], G.node[R6][
                'label'], len(
                    G.neighbors(R6)), len(list_weight_changes), numpy.mean(
                        list_weight_changes), numpy.std(list_weight_changes)
            file11.close()
            # print cont,G.node[R6]['role'],G.node[R6]['label'], len(G.neighbors(R6)),len(list_weight_changes),numpy.mean(list_weight_changes),numpy.std(list_weight_changes)
            cont = cont + 1

        else:
            # file11=open(name11, 'at')
            #print >> file11,cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes)
            #file11.close()
            # print cont,G.node[R6]['role'],G.node[R6]['label'],len(G.neighbors(R6)),len(list_weight_changes)
            cont = cont + 1

    who = "one_and_only_one_R6s"
    Nbins = 18
    histograma(list_all, Nbins, dir, name, who)

    ####################################

    print "\n\n"

    list_of_lists_for_bootstrap = []

    x_positions_fit = []
    y_positions_fit = []

    averages_larger5_x = []
    averages_larger5_y = []
    norm = 0.0

    cum_size_set = float(len(G.nodes())) - float(len(list_R6s))
    for r in range(len(list_R6s) + 1):

        # list_BMI_changes=[]
        list_weight_changes = []
        list_percentage_weight_changes = []
        list_activities = []

        num_users_set = cum_size_set
        for node in G.nodes():

            if int(G.node[node]["R6_overlap"]) == r:

                if G.node[node]["role"] == "R6":  # i exclude the R6s

                    pass
                else:

                    if int(G.node[node]['time_in_system']) > time_in_system:

                        #   list_BMI_changes.append(float(G.node[node]['final_BMI'])-float(G.node[node]['initial_BMI']))
                        list_weight_changes.append(
                            float(G.node[node]['weight_change']))
                        list_percentage_weight_changes.append(
                            float(G.node[node]['percentage_weight_change']))
                        list_activities.append(
                            float(G.node[node]['activity']) /
                            float(G.node[node]['time_in_system']))
                        cum_size_set -= 1.0

        if len(list_percentage_weight_changes) > 0:
            # average_BMI_change=numpy.mean(list_BMI_changes)
            average_weight_change = numpy.mean(list_weight_changes)
            average_percentage_weight_change = numpy.mean(
                list_percentage_weight_changes)
            average_activity = numpy.mean(list_activities)

            #deviation_BMI=numpy.std(list_BMI_changes)
            deviation_weight = numpy.std(list_weight_changes)
            deviation_percentage_weight = numpy.std(
                list_percentage_weight_changes)
            deviation_activity = numpy.std(list_activities)

            #print out

            file0 = open(name0, 'at')
            print >> file0, r, len(
                list_percentage_weight_changes
            ), num_users_set / float(
                len(G.nodes())
            ), average_percentage_weight_change, deviation_percentage_weight, average_weight_change, deviation_weight, average_activity, deviation_activity
            file0.close()

            if r <= 5:
                x_positions_fit.append(r)
                y_positions_fit.append(average_percentage_weight_change)

                list_of_lists_for_bootstrap.append(
                    list_percentage_weight_changes)

        # else:
        #    aux_x=r*len(list_percentage_weight_changes)
        #   averages_larger5_x.append(aux_x)

        #  aux_y=average_percentage_weight_change*len(list_percentage_weight_changes)
        # averages_larger5_y.append(aux_y)
        #norm+=float(len(list_percentage_weight_changes))

#    x_positions_fit.append(numpy.mean(averages_larger5_x)/norm)
#   y_positions_fit.append(numpy.mean(averages_larger5_y)/norm)

    slope, intercept, Corr_coef, p_value, std_err = stats.linregress(
        x_positions_fit, y_positions_fit)  # least squeares polinomial fit

    print "result linear. fit for dose eff.:"
    print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err

    name11 = dir + name + "_fits_dose_eff_R6.dat"

    file11 = open(name11, 'wt')
    for i in range(len(x_positions_fit)):
        print >> file11, x_positions_fit[
            i], intercept + x_positions_fit[i] * slope

    print >> file11, "\n\n", "y=", intercept, "+", slope, "*x",
    print "Bootstrap for dose eff. R6s:\n"

    mean_slope, standard_dev = bootstrap(x_positions_fit[0],
                                         x_positions_fit[-1],
                                         list_of_lists_for_bootstrap)
    zscore = (slope - mean_slope) / standard_dev

    print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    print x_positions_fit[0], x_positions_fit[
        -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    file11.close()

    contador += 1
    file = open(name12, 'at')
    print >> file, contador, mean_slope, standard_dev, "dose_eff"
    file.close()

    #### averages for every R6's egonetwork:#########
    cont = 1
    list_all_ = []
    list_all_nodes_ = []
    for node in list_R6s:
        neighbors = G.neighbors(node)  #a list of nodes

        average_BMI_change = 0.0
        list_BMI_changes = []

        average_weight_change = 0.0
        list_weight_changes = []

        average_percentage_weight_change = 0.0
        list_percentage_weight_changes = []

        average_activity = 0.0  # ojo! sera dividida por el numero de dias!!!!!
        list_activities = []

        for n in G.neighbors(node):

            if int(G.node[n]['time_in_system']) > time_in_system:

                # list_BMI_changes.append(float(G.node[n]['final_BMI'])-float(G.node[n]['initial_BMI']))

                list_weight_changes.append(float(G.node[n]['weight_change']))

                list_percentage_weight_changes.append(
                    float(G.node[n]['percentage_weight_change']))

                list_activities.append(
                    float(G.node[n]['activity']) /
                    float(G.node[n]['time_in_system']))

                if n not in list_all_nodes_:
                    list_all_nodes_.append(n)
                    list_all_.append(
                        float(G.node[n]['percentage_weight_change']))

#averages
        average_weight_change = numpy.mean(list_weight_changes)
        #  average_BMI_change=numpy.mean(list_BMI_changes)
        average_activity = numpy.mean(list_activities)
        average_percentage_weight_change = numpy.mean(
            list_percentage_weight_changes)

        #standard deviation
        #deviation_BMI=numpy.std(list_BMI_changes)
        deviation_weight = numpy.std(list_weight_changes)
        deviation_percentage_weight = numpy.std(list_percentage_weight_changes)
        deviation_activity = numpy.std(list_activities)

        #print out
        name2 = dir + name + "_ego_R6s_average_weight_change_" + str(
            time_in_system) + "days.dat"
        file2 = open(name2, 'at')
        print >> file2, cont, G.node[node]['role'], G.node[node]['label'], len(
            G.neighbors(node)), average_weight_change, deviation_weight
        file2.close()

        name22 = dir + name + "_ego_R6s_average_percentage_weight_change_" + str(
            time_in_system) + "days.dat"
        file22 = open(name22, 'at')
        print >> file22, cont, G.node[node]['role'], G.node[node][
            'label'], len(
                G.neighbors(node)
            ), average_percentage_weight_change, deviation_percentage_weight
        file22.close()

        name3 = dir + name + "_ego_R6s_average_activity_" + str(
            time_in_system) + "days.dat"
        file3 = open(name3, 'at')
        print >> file3, cont, G.node[node]['role'], G.node[node]['label'], len(
            G.neighbors(node)), average_activity, deviation_activity
        file3.close()

        cont = cont + 1

    who = "R6s_egonetworks_all"
    Nbins = 18
    histograma(list_all_, Nbins, dir, name, who)

    #  print "intersection:",len(set(list_all_)&set(list_all)),len(list_all_),len(list_all)
    #############just checking what happens if we remove the 40155 guy

    ##### percent weight change vs. role:

    list_roles = ["R1", "R2", "R3", "R4", "R5", "R6", "R7"]

    file = open(dir + name + "_percentage_weight_change_vs_role", 'wt')
    cont = 1
    for role in list_roles:

        list_weight_changes_role = []
        for n in G.nodes():
            if G.node[n]['role'] == role:
                list_weight_changes_role.append(
                    G.node[n]['percentage_weight_change'])

        print >> file, cont, role, len(list_weight_changes_role), numpy.mean(
            list_weight_changes_role), numpy.std(list_weight_changes_role)

        cont += 1

    file.close()

    #############################

    ############## percentage weight change vs k
    x_positions_fit = []
    y_positions_fit = []

    cum_size_set = float(len(G.nodes()))

    list_of_lists_for_bootstrap = []

    list_k = []
    for n in G.nodes():
        list_k.append(len(G.neighbors(n)))

    max_k = max(list_k)

    file = open(dir + name + "_percentage_weight_change_vs_k.dat", 'wt')
    max_k = max_k + 1
    for k in range(1, max_k):

        num_users_set = cum_size_set

        list_percent_weight_change_k = []
        for n in G.nodes():
            if len(G.neighbors(n)) == k:
                list_percent_weight_change_k.append(
                    G.node[n]['percentage_weight_change'])
                cum_size_set -= 1.0

        if len(list_percent_weight_change_k) > 0:
            print >> file, k, len(
                list_percent_weight_change_k), num_users_set / float(
                    len(G.nodes())), numpy.mean(
                        list_percent_weight_change_k), numpy.std(
                            list_percent_weight_change_k)

            if len(x_positions_fit) <= 7:
                x_positions_fit.append(k)
                y_positions_fit.append(
                    numpy.mean(list_percent_weight_change_k))

                list_of_lists_for_bootstrap.append(
                    list_percent_weight_change_k)

    slope, intercept, Corr_coef, p_value, std_err = stats.linregress(
        x_positions_fit, y_positions_fit)  # least squeares polinomial fit

    print "result linear. fit for degree dependency:"
    print "slope:", slope, "intercept:", intercept, "Corr_coef:", Corr_coef, "p_value:", p_value, "std_err:", std_err

    file.close()

    name11 = dir + name + "_fits_degree.dat"

    file11 = open(name11, 'wt')
    for i in range(len(x_positions_fit)):
        print >> file11, x_positions_fit[
            i], intercept + x_positions_fit[i] * slope

    print >> file11, "\n\n", "y=", intercept, "+", slope, "*x",

    print "Bootstrap for degree:\n"

    mean_slope, standard_dev = bootstrap(x_positions_fit[0],
                                         x_positions_fit[-1],
                                         list_of_lists_for_bootstrap)
    zscore = (slope - mean_slope) / standard_dev

    print >> file11, "bootstrap:\n", "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    print x_positions_fit[0], x_positions_fit[
        -1], "actual slope:", slope, "mean_slope:", mean_slope, "standard_dev:", standard_dev, "\n zscore:", zscore

    file11.close()

    contador += 1
    file = open(name12, 'at')
    print >> file, contador, mean_slope, standard_dev, "degree"
    file.close()

    ########################################

    new_name = graph_name.split(".gml")[0]

    new_name = new_name + "_adherent_num_R6s_largest_clique.gml"

    nx.write_gml(G, new_name)
Exemplo n.º 43
0
    def triangulate(self, heuristic='H6', order=None, inplace=False):
        """
        Triangulate the graph.

        If order of deletion is given heuristic algorithm will not be used.

        Parameters
        ----------
        heuristic: H1 | H2 | H3 | H4 | H5 | H6
            The heuristic algorithm to use to decide the deletion order of
            the variables to compute the triangulated graph.
            Let X be the set of variables and X(i) denotes the i-th variable.
            S(i): The size of the clique created by deleting the variable.
            E(i): Cardinality of variable X(i).
            M(i): The maximum size of the cliques of the subgraph given by
                    X(i) and its adjacent nodes.
            C(i): The sum of the size of cliques of the subgraph given by X(i)
                    and its adjacent nodes.
            The heuristic algorithm decide the deletion order if this way:
            H1: Delete the variable with minimal S(i).
            H2: Delete the variable with minimal S(i)/E(i).
            H3: Delete the variable with minimal S(i) - M(i).
            H4: Delete the variable with minimal S(i) - C(i).
            H5: Delete the variable with minimal S(i)/M(i).
            H6: Delete the variable with minimal S(i)/C(i).

        order: list, tuple (array-like)
            The order of deletion of the variables to compute the triagulated
            graph. If order is given heuristic algorithm will not be used.

        inplace: True | False
            if inplace is true then adds the edges to the object from
            which it is called else returns a new object.

        Reference
        ---------
        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.56.3607
        """
        graph_copy = nx.Graph(self.edges())
        edge_set = set()
        if not order:
            order = []
            for index in range(self.number_of_nodes()):
                for node in graph_copy.nodes():
                    S = {}
                    graph_working_copy = nx.Graph(graph_copy.edges())
                    graph_working_copy.add_edges_from(itertools.combinations(graph_working_copy.neighbors(node), 2))
                    graph_working_copy.remove_node(node)
                    clique_dict = nx.cliques_containing_node(graph_working_copy, nodes=graph_copy.neighbours(node))

                    def _common_list(*lists):
                        common = [sorted(li) for li in lists[0]]
                        for i in range(1, len(lists)):
                            list1 = [sorted(li) for li in lists[i]]
                            for list2 in common:
                                if list2 not in list1:
                                    common.remove(list2)
                        return common

                    S[node] = _common_list(*list(clique_dict.values()))

                if heuristic == 'H1':
                    node_to_delete = min(S, key=S.get)

                elif heuristic == 'H2':
                    S_by_E = {S[key]/self.cardinality[key] for key in S}
                    node_to_delete = min(S_by_E, key=S_by_E.get)

                elif heuristic in ('H3', 'H5'):
                    M = {}
                    for node in graph_copy.nodes():
                        graph_working_copy = nx.Graph(graph_copy.edges())
                        neighbors = graph_working_copy.neighbors(node)
                        graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2))
                        graph_working_copy.remove_node(node)
                        cliques = nx.cliques_containing_node(graph_working_copy, nodes=neighbors)

                        common_clique = list(cliques.values())[0]
                        for values in cliques.values():
                            common_clique = [value for value in common_clique if value in values]

                        M[node] = np.prod([self.cardinality[node] for node in common_clique[0]])

                    if heuristic == 'H3':
                        S_minus_M = {S[key] - M[key] for key in S}
                        node_to_delete = min(S_minus_M, key=S_minus_M.get)

                    else:
                        S_by_M = {S[key]/M[key] for key in S}
                        node_to_delete = min(S_by_M, key=S_by_M.get)

                else:
                    C = {}
                    for node in graph_copy.nodes():
                        graph_working_copy = nx.Graph(graph_copy.edges())
                        neighbors = graph_working_copy.neighbors(node)
                        graph_working_copy.add_edges_from(itertools.combinations(neighbors, 2))
                        graph_working_copy.remove_node(node)
                        cliques = nx.cliques_containing_node(graph_working_copy, nodes=neighbors)

                        common_clique = list(cliques.values())[0]
                        for values in cliques.values():
                            common_clique = [value for value in common_clique if value in values]

                        clique_size_sum = 0
                        for r in range(1, len(common_clique)+1):
                            for clique in itertools.combinations(common_clique, r):
                                clique_size_sum += np.prod([self.cardinality[node] for node in clique])

                        C[node] = clique_size_sum

                    if heuristic == 'H4':
                        S_minus_C = {S[key] - C[key] for key in S}
                        node_to_delete = min(S_minus_C, key=S_minus_C.get)

                    else:
                        S_by_C = {S[key]/C[key] for key in S}
                        node_to_delete = min(S_by_C, key=S_by_C.get)

                order.append(node_to_delete)

        graph_copy = nx.Graph(self.edges())
        for node in order:
            for edge in itertools.combinations(graph_copy.neighbors(node), 2):
                graph_copy.add_edge(edge[0], edge[1])
                edge_set.add(edge)
            graph_copy.remove_node(node)

        if inplace:
            for edge in edge_set:
                self.add_edge(edge[0], edge[1])
            return self

        else:
            graph_copy = nx.copy(self)
            for edge in edge_set:
                self.add_edge(edge[0], edge[1])
            return graph_copy
Exemplo n.º 44
0
#     lvl2.append(graphs_len[i])


print "Η λίστα των μεγεθών των κλικών είναι:"
# print 'The list of clique sizes is:'
print lvl2
print str(" ")

print "Ο αριθμός κλίκας (το μέγεθος της μεγαλύτερης κλίκας) του G είναι:", nx.graph_clique_number(G)
# print 'The clique number (size of the largest clique) for G is:', nx.graph_clique_number(G)
# print sorted(nx.connected_components(G), key = len, reverse=True)
print str(" ")

print "Το λεξικό των κλικών που περιέχουν κάθε κόμβο είναι:"
# print 'The dictionary of the lists of cliques containing each node:'
print nx.cliques_containing_node(G)
print str(" ")

print "Το λεξικό του πλήθους κλικών που περιέχουν κάθε κόμβο είναι:"
# print 'The dictionary of the numbers of maximal cliques for each node:'
print nx.number_of_cliques(G)
print str(" ")

print "Το λεξικό του μεγέθους των μεγαλύτερων κλικών που περιέχουν κάθε κόμβο είναι:"
# print 'The dictionary of the sizes of the largest maximal cliques containing each given node:'
print nx.node_clique_number(G)
print str(" ")

maxclique = [clq for clq in nx.find_cliques(G) if len(clq) == nx.graph_clique_number(G)]
nodes = [n for clq in maxclique for n in clq]
H = G.subgraph(nodes)
Exemplo n.º 45
0
with open("twitter_network.dat",mode="rb") as my_file:
    graph_data = pickle.load(my_file)
    follower_user_data = graph_data["follower_user_data"]#followers of followers screen_names
    followers = graph_data["followers"] #my followers
    followers_screen_names = graph_data["followers_screen_names"] #screen_names of my followers
    follower_data = graph_data["follower_data"]  #followers of followers

for follower in followers_screen_names:
    twitter_graph.add_edge(main_user,follower["screen_name"].lower())
    for each,value in follower_user_data.items():
        for name in value:
            twitter_graph.add_edge(follower["screen_name"].lower(),name["screen_name"].lower())

# #set positions
pos = nx.random_layout(twitter_graph)

plt.figure(figsize=(16,10))

nx.draw_networkx_nodes(twitter_graph,pos,node_size=30)
nx.draw_networkx_edges(twitter_graph,pos,alpha=0.01)

#get the nodes that we want to draw labels for
clique = nx.cliques_containing_node(twitter_graph,nodes=[main_user])
clique = clique[main_user][0]
clique.append(main_user)
labels = {}
for name in clique:
    labels[name] = name
nx.draw_networkx_labels(twitter_graph,pos,font_size=16,labels=labels)
plt.show()
def tree_decomposition(input_graph):

    current_graph=input_graph.copy()
    decomposition_tree_vertices=list()
    counter=0;
    decomposition_tree=nx.Graph()
    tree_connectivity_dictionary=dict()
    for graph_vertex in current_graph.nodes():
	tree_connectivity_dictionary[graph_vertex]=[]


    while current_graph.order()>0:
	print current_graph.order()
	nodes_sorted_by_degree=sort_by_degree(current_graph)
	print 'nodes_sorted_by_degree', nodes_sorted_by_degree
	minimum_degree_vertex=nodes_sorted_by_degree[0][0]
	print 'Minimum Degree_vertex' , minimum_degree_vertex
	cliques_of_minimum_degree_vertex=nx.cliques_containing_node(current_graph,minimum_degree_vertex)
	print 'cliques_of_minimum_degree_vertex',cliques_of_minimum_degree_vertex
	number_of_cliques_containing_vertex=len(cliques_of_minimum_degree_vertex)
	print 'number_of_cliques_containing_vertex', number_of_cliques_containing_vertex
	minimum_degree_vertex_neighbors=current_graph.neighbors(minimum_degree_vertex)
	print 'minimum_degree_vertex_neighbors', minimum_degree_vertex_neighbors
	new_tree_vertex=[minimum_degree_vertex]
	print 'new_tree_vertex First element: ',new_tree_vertex
	new_tree_vertex.extend(minimum_degree_vertex_neighbors)
	new_tree_vertex=tuple(new_tree_vertex)
	decomposition_tree.add_node(new_tree_vertex)
	print 'decomposition_tree_vertices',decomposition_tree.nodes()
	if number_of_cliques_containing_vertex>1:
	    print 'Not Clique, will remove only one vertex'
	    pairs_of_neighbors=make_pairs(minimum_degree_vertex_neighbors)
	    print 'pairs_of_neighbors',pairs_of_neighbors
	    for additional_edge in pairs_of_neighbors:current_graph.add_edge(additional_edge[0],additional_edge[1])
	    toberemoved=[minimum_degree_vertex]
	    print 'toberemoved ', toberemoved
	else:
	    toberemoved=[minimum_degree_vertex]
	    print 'Clique detected, will try to remove more than one vertex'
	    number_of_clique_edges_per_vertex=len(minimum_degree_vertex_neighbors)
	    print 'number_of_clique_edges_per_vertex',number_of_clique_edges_per_vertex
	    print 'Checking all the vertex`s neighbors...'
	    print 'minimum_degree_vertex_neighbors', minimum_degree_vertex_neighbors
	    for temp_vertex in minimum_degree_vertex_neighbors:
		if current_graph.degree(temp_vertex)==number_of_clique_edges_per_vertex:
		    toberemoved.append(temp_vertex)
		    print 'Will ALSO remove vertex  ', temp_vertex
	for graph_vertex in new_tree_vertex:
	    if graph_vertex in toberemoved:
		current_graph.remove_node(graph_vertex)
		print 'Removed original graph vertex', graph_vertex
		tree_vertices_waiting=tree_connectivity_dictionary[graph_vertex]
		print 'For the removed node, tree_vertices_waiting: ' , tree_vertices_waiting
		for tree_vertex_waiting in tree_vertices_waiting:
		    print 'New Tree vertex:  ' , new_tree_vertex
		    print 'Tree Vertex waiting:', tree_vertex_waiting
		    decomposition_tree.add_edge(new_tree_vertex,tree_vertex_waiting)
		    print 'Connected tree vertices', new_tree_vertex, 'and   ' ,   tree_vertex_waiting
		    print 'The tree edges are now:    ', decomposition_tree.edges()
		    print 'THE NUMBER OF TREE EDGES ARE NOW:   ', len(decomposition_tree.edges())
		for tree_vertex_waiting in tree_vertices_waiting:
		    common_graph_nodes_between_tree_vertices=list(my_very_simple_tuple_intersection(new_tree_vertex,tree_vertex_waiting))
		    for graph_vertex in common_graph_nodes_between_tree_vertices:
			tree_connectivity_dictionary[graph_vertex].remove(tree_vertex_waiting)
			print 'Removed from dictionary entry', graph_vertex , 'tree node ', tree_vertex_waiting
			print 'Now the new dictionary is:  ' , tree_connectivity_dictionary


	    else:
		tree_connectivity_dictionary[graph_vertex].append(new_tree_vertex)
		print 'New tree_connectivity_dictionary node appended. New tree_connectivity_dictionary ', tree_connectivity_dictionary
	print 'tree_connectivity_dictionary:  '	 , tree_connectivity_dictionary
	print 'decomposition_tree.nodes:     ', decomposition_tree.nodes()
	print 'decomposition_tree.edges:     ', decomposition_tree.edges()



    return decomposition_tree