示例#1
0
def main(argv):
    #Standardvalues
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    to_pajek = False
    try:
      opts, args = getopt.getopt(argv,"p:s:o")
    except getopt.GetoptError:
      print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
      sys.exit(2)
    for opt, arg in opts:
        if opt in ("-p"):
            project = arg
        elif opt in ("-s"):
            partitionfile = arg
        elif opt in ("-o"):
             to_pajek = True
        else:
            print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
    
    print "##################### GROUP BRIDGING ########################"
    print "Project %s " % project
    print "Partition %s" % partitionfile
    
    ff_edges_writer = csv.writer(open("results/%s_ff_bridging_edges.csv" % project, "wb"))
    at_edges_writer = csv.writer(open("results/%s_at_bridging_edges.csv" % project, "wb"))
    rt_edges_writer = csv.writer(open("results/%s_rt_bridging_edges.csv" % project, "wb"))
    
    csv_bridging_writer = csv.writer(open('results/spss/group bridging/%s_group_bridging.csv' % project , 'wb'))
    
    csv_bridging_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists",
                                "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                "FF_volume_in","FF_volume_out",
                                "FF_bin_betweeness","FF_bin_closeness", "FF_bin_pagerank", #"FF_bin_eigenvector",
                                "FF_bin_c_size","FF_bin_c_density","FF_bin_c_hierarchy","FF_bin_c_index",
                                "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                "AT_bin_betweeness", "AT_bin_closeness", "AT_bin_pagerank", #"AT_bin_eigenvector",                            
                                "AT_bin_c_size","AT_bin_c_density","AT_bin_c_hierarchy","AT_bin_c_index",
                                "AT_volume_in", "AT_volume_out",
                                "RT_volume_in", "RT_volume_out",
                                "FF_rec", "AT_rec", "AT_avg", "FF_avg"])    
    
    # Get the overall network from disk    
    FF = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    AT = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    RT = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
        
    # Read in the partition
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]
    
    #Read in members count for each project
    reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",")
    temp  = {}
    reader.next() # Skip first row
    for row in reader:        
            temp[row[0]] = {"name":row[0],"member_count":int(row[3])}
    
    #Read in the list-listings for individuals
    listings = {}
    indiv_reader = csv.reader(open(partitionfile))
    for row in indiv_reader:                
            if listings.has_key(row[1]):
                listings[row[1]]["competing_lists"] += int(row[3])
            else:
                listings[row[1]] = {"competing_lists": int(row[3])}                            
           
    # Add dummy nodes if they are missing in the networks
    for partition in partitions:
            for node in partition:
                FF.add_node(node)
                AT.add_node(node)
                RT.add_node(node)
            
    #Blockmodel the networks into groups according to the partition
    P_FF = nx.blockmodel(FF,partitions)
    P_AT = nx.blockmodel(AT,partitions)
    P_RT = nx.blockmodel(RT,partitions)
    
    #Name the nodes in the network
    #TODO check: How do I know that the names really match?
    mapping = {}
    mapping_pajek = {}
    i = 0
    for group in groups:
        mapping_pajek[i] = "\"%s\"" % group # mapping for pajek
        mapping[i] = "%s" % group 
        i += 1
    
    H_FF = nx.relabel_nodes(P_FF,mapping)
    H_AT = nx.relabel_nodes(P_AT,mapping)
    H_RT = nx.relabel_nodes(P_RT,mapping)
    
    #Outpt the networks to pajek if needed
    if to_pajek:
        OUT_FF = nx.relabel_nodes(P_FF,mapping_pajek)
        OUT_AT = nx.relabel_nodes(P_AT,mapping_pajek)
        OUT_RT = nx.relabel_nodes(P_RT,mapping_pajek)
        
        #Write the blocked network out to disk
        nx.write_pajek(OUT_FF,"results/networks/%s_grouped_FF.net" % project)
        nx.write_pajek(OUT_AT,"results/networks/%s_grouped_AT.net" % project)
        nx.write_pajek(OUT_RT,"results/networks/%s_grouped_RT.net" % project)
    
    ########## Output the Edges between groups to csv ##############
    # Needed for the computation of individual bridging
    # Edges in both directions between the groups are addded up
    
    processed_edges = []
    for (u,v,attrib) in H_FF.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_FF.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                ff_edges_writer.writerow([u,v,attrib["weight"]+H_FF[v][u]["weight"]])
            else:
                ff_edges_writer.writerow([u,v,attrib["weight"]])
                
    processed_edges = []
    for (u,v,attrib) in H_AT.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_AT.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                at_edges_writer.writerow([u,v,attrib["weight"]+H_AT[v][u]["weight"]])
            else:
                at_edges_writer.writerow([u,v,attrib["weight"]])
                    
    processed_edges = []
    for (u,v,attrib) in H_RT.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_RT.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                rt_edges_writer.writerow([u,v,attrib["weight"]+H_RT[v][u]["weight"]])
            else:
                rt_edges_writer.writerow([u,v,attrib["weight"]])                

    
    ########## TRIM EDGES ################
    # For meaningfull results we have to trim edges in the AT and FF network so the whole network just doesnt look like a blob            
    # It is chosen this way so the network remains as one component
    
    THRESHOLD = min([hp.min_threshold(H_AT),hp.min_threshold(H_FF)])-1    
    H_FF = hp.trim_edges(H_FF, THRESHOLD)
    H_AT = hp.trim_edges(H_AT, THRESHOLD)    

    ########## MEASURES ##############
    
    #Get the number of nodes in the aggregated networks
    #FF_nodes = {}
    #for node in H_FF.nodes(data=True):
    #        FF_nodes[node[0]] = node[1]["nnodes"]
    
    
    #Get the FF network measures of the nodes
    # Works fine on binarized Data
    FF_bin_degree = nx.degree_centrality(H_FF) 
    FF_bin_in_degree = nx.in_degree_centrality(H_FF) # The attention paid towards this group
    FF_bin_out_degree = nx.out_degree_centrality(H_FF) # The attention that this group pays towards other people
    FF_bin_betweenness = nx.betweenness_centrality(H_FF,weight="weight") # How often is the group between other groups
    FF_bin_closeness = nx.closeness_centrality(H_FF) #FF_bin_eigenvector = nx.eigenvector_centrality(H_FF)
    FF_bin_pagerank = nx.pagerank(H_FF)        
    FF_bin_struc = sx.structural_holes(H_FF)
    
    # AT network measures of the nodes
    AT_bin_degree = nx.degree_centrality(H_AT)
    AT_bin_in_degree = nx.in_degree_centrality(H_AT)
    AT_bin_out_degree = nx.out_degree_centrality(H_AT)
    AT_bin_betweenness = nx.betweenness_centrality(H_AT,weight="weight") 
    AT_bin_closeness = nx.closeness_centrality(H_AT) #AT_bin_eigenvector = nx.eigenvector_centrality(H_AT)
    AT_bin_pagerank = nx.pagerank(H_AT)        
    AT_bin_struc = sx.structural_holes(H_AT)
    
    # Tie strengths
    dAT_avg_tie = hp.individual_average_tie_strength(H_AT)
    dFF_avg_tie = hp.individual_average_tie_strength(H_FF)
    dAT_rec = hp.individual_reciprocity(H_AT)    
    dFF_rec = hp.individual_reciprocity(H_FF)
    
    # Dependent Variable see csv
    # TODO A measure that calculates how often Tweets travel through this group: Eventually betweeness in the RT graph
    
    #Arrange it in a list and output
    for node in FF_bin_degree.keys():                
                csv_bridging_writer.writerow([project, node, int(temp[node]["member_count"]), listings[node]["competing_lists"],
                                                FF_bin_degree[node], FF_bin_in_degree[node], FF_bin_out_degree[node],
                                                H_FF.in_degree(node,weight="weight"), H_FF.out_degree(node,weight="weight"),
                                                FF_bin_betweenness[node],FF_bin_closeness[node],FF_bin_pagerank[node], #FF_bin_eigenvector[node],
                                                FF_bin_struc[node]['C-Size'],FF_bin_struc[node]['C-Density'],FF_bin_struc[node]['C-Hierarchy'],FF_bin_struc[node]['C-Index'],
                                                AT_bin_degree[node], AT_bin_in_degree[node], AT_bin_out_degree[node],
                                                AT_bin_betweenness[node], AT_bin_closeness[node], AT_bin_pagerank[node], #AT_bin_eigenvector[node],
                                                AT_bin_struc[node]['C-Size'],AT_bin_struc[node]['C-Density'],AT_bin_struc[node]['C-Hierarchy'],AT_bin_struc[node]['C-Index'],
                                                H_AT.in_degree(node,weight="weight"), H_AT.out_degree(node,weight="weight"),
                                                H_RT.in_degree(node,weight="weight"), H_RT.out_degree(node,weight="weight"),
                                                dFF_rec[node],dAT_rec[node],dAT_avg_tie[node],dFF_avg_tie[node]
                                            ])        
        if hp.reciprocity(FF) == hp.reciprocity(FF_bin): print "NOTICE: Group Reciprocity is BINARY"
        if nx.average_clustering(FF.to_undirected()) == nx.average_clustering(FF_bin.to_undirected()): print "NOTICE: Average Clustering is BINARY and UNDIRECTED"
        if nx.average_shortest_path_length(FF) == nx.average_shortest_path_length(FF_bin): print "NOTICE: Group Average path length is is BINARY"
        if hp.average_tie_strength(FF) == hp.average_tie_strength(FF_bin): print "NOTICE: Group Average tie strength path is is BINARY"

            
    ################# Individual Bonding measures ################
    
    #dFF_degree = FF.degree("a1")
    dFF = nx.degree_centrality(FF) # Binarized undirected
    dFF_in = nx.in_degree_centrality(FF)  #People that follow me in the network binarized 
    dFF_out = nx.out_degree_centrality(FF) #People that I follow in the network binarized 
    dFF_closeness = nx.closeness_centrality(FF) # Non-directed and binarized
    #dFF_pagerank = nx.pagerank(FF)
    dFF_eigenvector = nx.eigenvector_centrality(FF.to_undirected()) # Undirected and binarized
    dFF_rec = hp.individual_reciprocity(FF) # Individual Reciprocity
    dFF_avg_tie = hp.individual_average_tie_strength(FF) # Individual average tie strength        
    dFF_in_volume = hp.individual_in_volume(FF) #compute the volume of all incoming ties
    dFF_out_volume = hp.individual_out_volume(FF) #compute the volume of all outgoing ties
    
    # Test the output of NetworkX against UCINET
    if names[i]  == "a":
        print "######################## INDIVIDUAL MEASURES TEST of BINARY #####################"
        a2 = "a2"
        if nx.degree_centrality(FF) == nx.degree_centrality(FF_bin): print "NOTICE: Degree centrality is BINARY"
        if nx.in_degree_centrality(FF) == nx.in_degree_centrality(FF_bin): print "NOTICE: in_degree_centrality is BINARY"
        if nx.out_degree_centrality(FF) == nx.out_degree_centrality(FF_bin): print "NOTICE: out_degree_centrality is BINARY"
        if nx.closeness_centrality(FF) == nx.closeness_centrality(FF_bin): print "NOTICE: closeness_centrality is BINARY"
        # Eigenvector Centrality makes problems upon converging
        #if nx.eigenvector_centrality(FF) == nx.eigenvector_centrality(FF_bin): print "NOTICE: eigenvector_centrality is BINARY"
        if hp.individual_reciprocity(FF) == hp.individual_reciprocity(FF_bin): print "NOTICE: individual_reciprocity is BINARY"
def main(argv):
   #Standardvalues
   partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
   project = "584"
   to_pajek = False
   try:
     opts, args = getopt.getopt(argv,"p:s:o")
   except getopt.GetoptError:
     print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
     sys.exit(2)
   for opt, arg in opts:
       if opt in ("-p"):
           project = arg
       elif opt in ("-s"):
           partitionfile = arg
       else:
         print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
   
   print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################"
   print "Project %s " % project
   print "Partition %s" % partitionfile
   
   csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb'))
   csv_bridging_writer.writerow(["Project", "Community", "Person_ID",
                                 "Competing_lists",
                                 "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                 "FF_vol_in", "FF_vol_out",
                                 "FF_groups_in", "FF_groups_out",
                                 "FF_rec",
                                 "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank",
                                  #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index",
                                 "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                 "AT_vol_in", "AT_vol_out",
                                 "AT_groups_in", "AT_groups_out",
                                 "AT_rec",
                                 "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank",
                                 # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index,
                                 "AT_avg_tie_strength","AT_strength_centrality_in",
                                 "RT_bin_in_degree", "RT_bin_out_degree",
                                 "RT_vol_in", "RT_vol_out"])
   
   #Read in the list-listings for individuals
   listings = {}
   indiv_reader = csv.reader(open(partitionfile))
   for row in indiv_reader:        
           listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])}
   
   # Read in the centralities of nodes in their corresponding community
   centralities = {}
   centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project))
   for row in centrality_reader:
      centralities[row[2]] = {"ff_in_degree":row[5]}
   
   # Read in the partition
   tmp = hp.get_partition(partitionfile)
   partitions = tmp[0]
   groups = tmp[1]
   
   # Read in the networks   
   FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
   print "Done reading in Networks"
   
   #Determine the Maximum subset of nodes present in all Networks   
   maximum_subset = []
   for node in FF_all.nodes():
      if AT_all.has_node(node) and RT_all.has_node(node):
         maximum_subset.append(node)
   
   i = 0
   for partition in partitions:
       for node in partition:
           FF_all.add_node(node, group =  groups[i]) # Add nodes 
           AT_all.add_node(node, group =  groups[i])
           RT_all.add_node(node, group =  groups[i])
       i += 1

   i = 0
   
   #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values)
   if len(maximum_subset) < 1000:
      scaling_k = len(maximum_subset)
   else:
      scaling_k = len(maximum_subset)/100
   dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k)
   dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k)
   #dFF_struc = sx.structural_holes(FF_all)
   
   for partition in partitions:      
      project_name = groups[i]
      
      #Determine the groups that are not in the partition
      all_other_groups = groups[:]
      group = groups[i]
      all_other_groups.remove(group)
      
      # Get all the partitions without the current partition
      partitions_without_partition = partitions[:]
      partitions_without_partition.remove(partition)
      
      #Remove the nodes that are in this partition
      remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes
      for nodes_to_be_deleted in partition:
         remaining_nodes.remove(nodes_to_be_deleted)
      
      #Create Subgraphs that contain all nodes except the ones that are in the partition
      S_FF = FF_all.subgraph(remaining_nodes)
      S_AT = AT_all.subgraph(remaining_nodes)
      S_RT = RT_all.subgraph(remaining_nodes)
      
      i += 1
      for node in partition:
         if node in maximum_subset:            
            t0 = time.time() 
            
            #Add FF nodes and edges
            S_FF.add_node(node, group = group)            
            S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges 
            S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges               
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_FF:
                  S_FF.remove_node(tmp_node)
                        
            # Add AT nodes and edges
            S_AT.add_node(node, group = group)
            S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges 
            S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_AT:
                  S_AT.remove_node(tmp_node)
                  
            S_RT.add_node(node, group = group)
            S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges 
            S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges   
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_RT:
                  S_RT.remove_node(tmp_node)
                  
            print "Done creating Subgraphs"
            
            ## FF measures
            dFF_bin = nx.degree_centrality(S_FF)
            dFF_bin_in = nx.in_degree_centrality(S_FF)
            dFF_bin_out = nx.out_degree_centrality(S_FF)            
            #nx.load_centrality(S_FF,v=node, weight="weight")
            #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node)
            #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight")            
            dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0)
            dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0)            
            dFF_rec = hp.individual_reciprocity(S_FF,node)   #number of reciprocated ties            
            
            ## AT Measures
            dAT_bin = nx.degree_centrality(S_AT)
            dAT_bin_in = nx.in_degree_centrality(S_AT)
            dAT_bin_out = nx.out_degree_centrality(S_AT)
            #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight")
            #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) 
            #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight")
            dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0)
            dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0)
            dAT_rec = hp.individual_reciprocity(S_AT,node)   #number of @reciprocated ties
            dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node)
            
            #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person
            dAT_strength_centrality = 0
            for edge in S_AT.in_edges(node,data=True):
               if edge[0] in maximum_subset:
                  dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from
            
            ############### DEPENDENT VARIABLES ###########
            
            dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received 
            dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made            
            print "Done computing Measures"
            
            try:
               c_size = dFF_struc[node]['C-Size']
               c_dens = dFF_struc[node]['C-Density']
               c_hierarch = dFF_struc[node]['C-Hierarchy']
               c_index = dFF_struc[node]['C-Index']
            except:
               c_size = "NaN"
               c_dens = "NaN"
               c_hierarch = "NaN"
               c_index = "NaN"
               
            csv_bridging_writer.writerow([project, project_name, node, 
                                          listings[node]["competing_lists"],
                                          dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node],
                                          S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"),
                                          dFF_total_in_groups, dFF_total_out_groups,
                                          dFF_rec[node],
                                          dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node],                                                                                    
                                          #c_size,c_dens,c_hierarch,c_index,                                                                                    
                                          dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node],
                                          S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"),
                                          dAT_total_in_groups, dAT_total_out_groups,
                                          dAT_rec[node],
                                          dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node],                                       
                                          #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'],                                          
                                          dAT_avg_tie[node],dAT_strength_centrality,
                                          dRT_in[node],dRT_out[node],   
                                          S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight")
                                         ])
            t_delta = (time.time() - t0)
            print "Count: %s Node: %s Time: %s" % (i,node,t_delta)
            
            #Remove the nodes again
            S_FF.remove_node(node)
            S_AT.remove_node(node)
            S_RT.remove_node(node)
示例#4
0
def main(argv):
    # Standardvalues
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    try:
        opts, args = getopt.getopt(argv, "p:s:")
    except getopt.GetoptError:
        print "individual_bonding.py -p <project_name> -s <partitionfile>"
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-p"):
            project = arg
        elif opt in ("-s"):
            partitionfile = arg
        else:
            print "individual_bonding.py -p <project_name> -s <partitionfile>"

    print "##################### INDIVIDUAL BONDING ########################"
    print "Project %s " % project
    print "Partition %s" % partitionfile

    csv_writer = csv.writer(open("results/spss/individual bonding/%s_individual_bonding.csv" % project, "wb"))
    csv_writer.writerow(
        [
            "Project",
            "Community",
            "Person_ID",
            "Place_on_list",
            "FF_bin_deg",
            "FF_bin_in_deg",
            "FF_bin_out_deg",
            "FF_vol_in",
            "FF_vol_out",
            "FF_bin_close",
            "FF_bin_page",
            "FF_rec",
            "AT_bin_deg",
            "AT_bin_in_deg",
            "AT_bin_out_deg",
            "AT_bin_close",
            "AT_bin_page",
            "AT_rec",
            "AT_avg",
            "AT_vol_in",
            "AT_vol_out",
            "RT_bin_deg_in",
            "RT_bin_deg_out",
            "RT_vol_in",
            "RT_vol_out",
            "RT_global_vol_in",
            "RT_global_vol_out",
        ]
    )

    # Read in the list-listings for individuals
    listings = {}
    indiv_reader = csv.reader(open(partitionfile))
    i = 0
    for row in indiv_reader:
        if i > int(row[2]):  # in case there are less than 101 entries for a group for some reason
            i = 0
        i += 1
        listings[row[0]] = {"group": row[1], "place": i, "competing_lists": int(row[3]), "original_place": int(row[2])}
        if i == 101:  # Some of the original places have shifted because of the regrouping
            i = 0

    # Read in Networks
    FF_all = nx.read_edgelist(
        "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph()
    )
    AT_all = nx.read_edgelist(
        "data/networks/%s_solr_AT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )
    RT_all = nx.read_edgelist(
        "data/networks/%s_solr_RT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )

    # Read in the partitions
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]

    # Add missing nodes
    # We are limiting the analysis to only the maximal subset of nodes that are present in all networks
    maximum_subset = []
    for node in FF_all.nodes():
        if AT_all.has_node(node) and RT_all.has_node(node):
            maximum_subset.append(node)
        else:
            print node
    print "Maximum Subset of nodes %s" % len(maximum_subset)

    # In this case we are not adding missing nodes to the network, to produce a smaller error in the final regressions, but use the subset method.
    # i = 0
    # for partition in partitions:
    #    for node in partition:
    #        FF_all.add_node(node, group =  groups[i])
    #        AT_all.add_node(node, group =  groups[i])
    #        RT_all.add_node(node, group =  groups[i])
    #    i += 1

    i = 0

    for partition in partitions:

        project_name = groups[i]
        print "############ Calculating Project %s ############### " % project_name
        # Generate a subgraph according to the partition
        FF = FF_all.subgraph(partition)
        AT = AT_all.subgraph(partition)
        RT = RT_all.subgraph(partition)

        # Additional Info for each project
        FF.name = "FF_%s " % project_name
        AT.name = "AT_%s " % project_name
        RT.name = "RT_%s " % project_name

        # hp.draw_graph(FF)
        # hp.draw_graph(AT)
        # hp.draw_graph(RT)

        ############### Compute Individual measures ################

        # Compute FF Centralities
        # Works fine on binary data
        dFF_bin = nx.degree_centrality(FF)
        dFF_bin_in = nx.in_degree_centrality(FF)  # People that follow me in the network
        dFF_bin_out = nx.out_degree_centrality(FF)  # People that I follow in the network
        dFF_bin_closeness = nx.closeness_centrality(FF)
        dFF_bin_pagerank = nx.pagerank(FF)
        try:
            dFF_bin_eigenvector = nx.eigenvector_centrality(FF, 10000)
        except:
            print "Failed to compute for FF %s " % FF.name

        # if len(nx.weakly_connected_components(FF)) > 1:
        #    FF_comp = FF.subgraph(nx.weakly_connected_components(FF)[0])
        #    dFF_bin_eigenvector = nx.eigenvector_centrality(FF_comp)
        # else:

        # Compute AT Centralities
        # Centralities are problematic on weighted data, since we are losing all the information
        dAT_bin = nx.degree_centrality(AT)  # binary
        dAT_bin_in = nx.in_degree_centrality(AT)  # binary
        dAT_bin_out = nx.out_degree_centrality(AT)  # binary
        dAT_bin_closeness = nx.closeness_centrality(AT)  # binary
        dAT_bin_pagerank = nx.pagerank(AT)
        try:
            dAT_bin_eigenvector = nx.eigenvector_centrality(AT, 10000)
        except:
            print "Failed to compute for AT %s " % AT.name
        # if len(nx.weakly_connected_components(AT)) > 1:
        #    AT_comp = AT.subgraph(nx.weakly_connected_components(AT)[0])
        #    dFF_bin_eigenvector = nx.eigenvector_centrality(AT_comp)
        # else:
        #

        # Tie strengths
        dAT_avg_tie = hp.individual_average_tie_strength(AT)
        dAT_rec = hp.individual_reciprocity(AT)
        dFF_rec = hp.individual_reciprocity(FF)

        # Dependent Variable see csv below
        # Deprecated since in networkx centrality works only on binary edges
        dRT_in = nx.in_degree_centrality(RT)  # At least once a retweets that a person has received
        dRT_out = nx.out_degree_centrality(RT)  # At least one retweets that a person has made

        ############### Output ################
        for node in dFF_bin.keys():
            if node in maximum_subset:
                csv_writer.writerow(
                    [
                        project,
                        project_name,
                        node,
                        listings[node]["place"],
                        dFF_bin[node],
                        dFF_bin_in[node],
                        dFF_bin_out[node],
                        FF.in_degree(node, weight="weight"),
                        FF.out_degree(node, weight="weight"),
                        dFF_bin_closeness[node],
                        dFF_bin_pagerank[node],
                        dFF_rec[node],
                        dAT_bin[node],
                        dAT_bin_in[node],
                        dAT_bin_out[node],
                        dAT_bin_closeness[node],
                        dAT_bin_pagerank[node],
                        dAT_rec[node],
                        dAT_avg_tie[node],
                        AT.in_degree(node, weight="weight"),
                        AT.out_degree(node, weight="weight"),
                        dRT_in[node],
                        dRT_out[node],
                        RT.in_degree(node, weight="weight"),
                        RT.out_degree(node, weight="weight"),
                        RT_all.in_degree(node, weight="weight"),
                        RT_all.out_degree(node, weight="weight"),
                    ]
                )

        i += 1