Пример #1
0
def main(argv):
   #Standardvalues
   partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
   project = "584"
   to_pajek = False
   try:
     opts, args = getopt.getopt(argv,"p:s:o")
   except getopt.GetoptError:
     print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
     sys.exit(2)
   for opt, arg in opts:
       if opt in ("-p"):
           project = arg
       elif opt in ("-s"):
           partitionfile = arg
       else:
         print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
   
   print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################"
   print "Project %s " % project
   print "Partition %s" % partitionfile
   
   csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb'))
   csv_bridging_writer.writerow(["Project", "Community", "Person_ID",
                                 "Competing_lists",
                                 "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                 "FF_vol_in", "FF_vol_out",
                                 "FF_groups_in", "FF_groups_out",
                                 "FF_rec",
                                 "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank",
                                  #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index",
                                 "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                 "AT_vol_in", "AT_vol_out",
                                 "AT_groups_in", "AT_groups_out",
                                 "AT_rec",
                                 "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank",
                                 # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index,
                                 "AT_avg_tie_strength","AT_strength_centrality_in",
                                 "RT_bin_in_degree", "RT_bin_out_degree",
                                 "RT_vol_in", "RT_vol_out"])
   
   #Read in the list-listings for individuals
   listings = {}
   indiv_reader = csv.reader(open(partitionfile))
   for row in indiv_reader:        
           listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])}
   
   # Read in the centralities of nodes in their corresponding community
   centralities = {}
   centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project))
   for row in centrality_reader:
      centralities[row[2]] = {"ff_in_degree":row[5]}
   
   # Read in the partition
   tmp = hp.get_partition(partitionfile)
   partitions = tmp[0]
   groups = tmp[1]
   
   # Read in the networks   
   FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
   print "Done reading in Networks"
   
   #Determine the Maximum subset of nodes present in all Networks   
   maximum_subset = []
   for node in FF_all.nodes():
      if AT_all.has_node(node) and RT_all.has_node(node):
         maximum_subset.append(node)
   
   i = 0
   for partition in partitions:
       for node in partition:
           FF_all.add_node(node, group =  groups[i]) # Add nodes 
           AT_all.add_node(node, group =  groups[i])
           RT_all.add_node(node, group =  groups[i])
       i += 1

   i = 0
   
   #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values)
   if len(maximum_subset) < 1000:
      scaling_k = len(maximum_subset)
   else:
      scaling_k = len(maximum_subset)/100
   dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k)
   dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k)
   #dFF_struc = sx.structural_holes(FF_all)
   
   for partition in partitions:      
      project_name = groups[i]
      
      #Determine the groups that are not in the partition
      all_other_groups = groups[:]
      group = groups[i]
      all_other_groups.remove(group)
      
      # Get all the partitions without the current partition
      partitions_without_partition = partitions[:]
      partitions_without_partition.remove(partition)
      
      #Remove the nodes that are in this partition
      remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes
      for nodes_to_be_deleted in partition:
         remaining_nodes.remove(nodes_to_be_deleted)
      
      #Create Subgraphs that contain all nodes except the ones that are in the partition
      S_FF = FF_all.subgraph(remaining_nodes)
      S_AT = AT_all.subgraph(remaining_nodes)
      S_RT = RT_all.subgraph(remaining_nodes)
      
      i += 1
      for node in partition:
         if node in maximum_subset:            
            t0 = time.time() 
            
            #Add FF nodes and edges
            S_FF.add_node(node, group = group)            
            S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges 
            S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges               
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_FF:
                  S_FF.remove_node(tmp_node)
                        
            # Add AT nodes and edges
            S_AT.add_node(node, group = group)
            S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges 
            S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_AT:
                  S_AT.remove_node(tmp_node)
                  
            S_RT.add_node(node, group = group)
            S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges 
            S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges   
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_RT:
                  S_RT.remove_node(tmp_node)
                  
            print "Done creating Subgraphs"
            
            ## FF measures
            dFF_bin = nx.degree_centrality(S_FF)
            dFF_bin_in = nx.in_degree_centrality(S_FF)
            dFF_bin_out = nx.out_degree_centrality(S_FF)            
            #nx.load_centrality(S_FF,v=node, weight="weight")
            #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node)
            #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight")            
            dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0)
            dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0)            
            dFF_rec = hp.individual_reciprocity(S_FF,node)   #number of reciprocated ties            
            
            ## AT Measures
            dAT_bin = nx.degree_centrality(S_AT)
            dAT_bin_in = nx.in_degree_centrality(S_AT)
            dAT_bin_out = nx.out_degree_centrality(S_AT)
            #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight")
            #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) 
            #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight")
            dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0)
            dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0)
            dAT_rec = hp.individual_reciprocity(S_AT,node)   #number of @reciprocated ties
            dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node)
            
            #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person
            dAT_strength_centrality = 0
            for edge in S_AT.in_edges(node,data=True):
               if edge[0] in maximum_subset:
                  dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from
            
            ############### DEPENDENT VARIABLES ###########
            
            dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received 
            dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made            
            print "Done computing Measures"
            
            try:
               c_size = dFF_struc[node]['C-Size']
               c_dens = dFF_struc[node]['C-Density']
               c_hierarch = dFF_struc[node]['C-Hierarchy']
               c_index = dFF_struc[node]['C-Index']
            except:
               c_size = "NaN"
               c_dens = "NaN"
               c_hierarch = "NaN"
               c_index = "NaN"
               
            csv_bridging_writer.writerow([project, project_name, node, 
                                          listings[node]["competing_lists"],
                                          dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node],
                                          S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"),
                                          dFF_total_in_groups, dFF_total_out_groups,
                                          dFF_rec[node],
                                          dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node],                                                                                    
                                          #c_size,c_dens,c_hierarch,c_index,                                                                                    
                                          dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node],
                                          S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"),
                                          dAT_total_in_groups, dAT_total_out_groups,
                                          dAT_rec[node],
                                          dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node],                                       
                                          #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'],                                          
                                          dAT_avg_tie[node],dAT_strength_centrality,
                                          dRT_in[node],dRT_out[node],   
                                          S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight")
                                         ])
            t_delta = (time.time() - t0)
            print "Count: %s Node: %s Time: %s" % (i,node,t_delta)
            
            #Remove the nodes again
            S_FF.remove_node(node)
            S_AT.remove_node(node)
            S_RT.remove_node(node)
 group = groups[i]
 all_other_groups.remove(group)
 i += 1
 for node in partition:
         t0 = time.time()
         temp = partition[:] #create a copy
         temp.remove(node)
         
         remaining_nodes = [item for sublist in partitions for item in sublist]
         for temp_node in temp:      
            remaining_nodes.remove(temp_node)
         
         # Make temporary copies of the networks that contain all but the nodes from the group
         S_G = G.subgraph(remaining_nodes)
         
         dG_bin = nx.degree_centrality(S_G)
         dG_bin_in = nx.in_degree_centrality(S_G)
         dG_bin_out = nx.out_degree_centrality(S_G)
         dG_bin_betweeness = nx.betweenness_centrality(S_G, k=10)
         dG_bin_pagerank = nx.pagerank(S_G, weight="weight")
         
         dG_in_group_volume = hp.incoming_group_volume(S_G,node, all_other_groups)
         dG_total_group_in_volume = hp.filtered_group_volume(dG_in_group_volume,0)
         
         dG_out_group_volume = hp.outgoing_group_volume(S_G,node, all_other_groups)
         dG_total_group_out_volume = hp.filtered_group_volume(dG_out_group_volume,0)
         
         csv_bridging_writer.writerow([project, project_name, node,                               
                           dG_bin[node], dG_bin_in[node], dG_bin_out[node],
                           S_G.in_degree(node,weight="weight"), S_G.out_degree(node,weight="weight"),
                           dG_total_group_in_volume, dG_total_group_out_volume])