def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### GROUP BRIDGING ########################" print "Project %s " % project print "Partition %s" % partitionfile ff_edges_writer = csv.writer(open("results/%s_ff_bridging_edges.csv" % project, "wb")) at_edges_writer = csv.writer(open("results/%s_at_bridging_edges.csv" % project, "wb")) rt_edges_writer = csv.writer(open("results/%s_rt_bridging_edges.csv" % project, "wb")) csv_bridging_writer = csv.writer(open('results/spss/group bridging/%s_group_bridging.csv' % project , 'wb')) csv_bridging_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_volume_in","FF_volume_out", "FF_bin_betweeness","FF_bin_closeness", "FF_bin_pagerank", #"FF_bin_eigenvector", "FF_bin_c_size","FF_bin_c_density","FF_bin_c_hierarchy","FF_bin_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_bin_betweeness", "AT_bin_closeness", "AT_bin_pagerank", #"AT_bin_eigenvector", "AT_bin_c_size","AT_bin_c_density","AT_bin_c_hierarchy","AT_bin_c_index", "AT_volume_in", "AT_volume_out", "RT_volume_in", "RT_volume_out", "FF_rec", "AT_rec", "AT_avg", "FF_avg"]) # Get the overall network from disk FF = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] #Read in members count for each project reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",") temp = {} reader.next() # Skip first row for row in reader: temp[row[0]] = {"name":row[0],"member_count":int(row[3])} #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: if listings.has_key(row[1]): listings[row[1]]["competing_lists"] += int(row[3]) else: listings[row[1]] = {"competing_lists": int(row[3])} # Add dummy nodes if they are missing in the networks for partition in partitions: for node in partition: FF.add_node(node) AT.add_node(node) RT.add_node(node) #Blockmodel the networks into groups according to the partition P_FF = nx.blockmodel(FF,partitions) P_AT = nx.blockmodel(AT,partitions) P_RT = nx.blockmodel(RT,partitions) #Name the nodes in the network #TODO check: How do I know that the names really match? mapping = {} mapping_pajek = {} i = 0 for group in groups: mapping_pajek[i] = "\"%s\"" % group # mapping for pajek mapping[i] = "%s" % group i += 1 H_FF = nx.relabel_nodes(P_FF,mapping) H_AT = nx.relabel_nodes(P_AT,mapping) H_RT = nx.relabel_nodes(P_RT,mapping) #Outpt the networks to pajek if needed if to_pajek: OUT_FF = nx.relabel_nodes(P_FF,mapping_pajek) OUT_AT = nx.relabel_nodes(P_AT,mapping_pajek) OUT_RT = nx.relabel_nodes(P_RT,mapping_pajek) #Write the blocked network out to disk nx.write_pajek(OUT_FF,"results/networks/%s_grouped_FF.net" % project) nx.write_pajek(OUT_AT,"results/networks/%s_grouped_AT.net" % project) nx.write_pajek(OUT_RT,"results/networks/%s_grouped_RT.net" % project) ########## Output the Edges between groups to csv ############## # Needed for the computation of individual bridging # Edges in both directions between the groups are addded up processed_edges = [] for (u,v,attrib) in H_FF.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_FF.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) ff_edges_writer.writerow([u,v,attrib["weight"]+H_FF[v][u]["weight"]]) else: ff_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_AT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_AT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) at_edges_writer.writerow([u,v,attrib["weight"]+H_AT[v][u]["weight"]]) else: at_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_RT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_RT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) rt_edges_writer.writerow([u,v,attrib["weight"]+H_RT[v][u]["weight"]]) else: rt_edges_writer.writerow([u,v,attrib["weight"]]) ########## TRIM EDGES ################ # For meaningfull results we have to trim edges in the AT and FF network so the whole network just doesnt look like a blob # It is chosen this way so the network remains as one component THRESHOLD = min([hp.min_threshold(H_AT),hp.min_threshold(H_FF)])-1 H_FF = hp.trim_edges(H_FF, THRESHOLD) H_AT = hp.trim_edges(H_AT, THRESHOLD) ########## MEASURES ############## #Get the number of nodes in the aggregated networks #FF_nodes = {} #for node in H_FF.nodes(data=True): # FF_nodes[node[0]] = node[1]["nnodes"] #Get the FF network measures of the nodes # Works fine on binarized Data FF_bin_degree = nx.degree_centrality(H_FF) FF_bin_in_degree = nx.in_degree_centrality(H_FF) # The attention paid towards this group FF_bin_out_degree = nx.out_degree_centrality(H_FF) # The attention that this group pays towards other people FF_bin_betweenness = nx.betweenness_centrality(H_FF,weight="weight") # How often is the group between other groups FF_bin_closeness = nx.closeness_centrality(H_FF) #FF_bin_eigenvector = nx.eigenvector_centrality(H_FF) FF_bin_pagerank = nx.pagerank(H_FF) FF_bin_struc = sx.structural_holes(H_FF) # AT network measures of the nodes AT_bin_degree = nx.degree_centrality(H_AT) AT_bin_in_degree = nx.in_degree_centrality(H_AT) AT_bin_out_degree = nx.out_degree_centrality(H_AT) AT_bin_betweenness = nx.betweenness_centrality(H_AT,weight="weight") AT_bin_closeness = nx.closeness_centrality(H_AT) #AT_bin_eigenvector = nx.eigenvector_centrality(H_AT) AT_bin_pagerank = nx.pagerank(H_AT) AT_bin_struc = sx.structural_holes(H_AT) # Tie strengths dAT_avg_tie = hp.individual_average_tie_strength(H_AT) dFF_avg_tie = hp.individual_average_tie_strength(H_FF) dAT_rec = hp.individual_reciprocity(H_AT) dFF_rec = hp.individual_reciprocity(H_FF) # Dependent Variable see csv # TODO A measure that calculates how often Tweets travel through this group: Eventually betweeness in the RT graph #Arrange it in a list and output for node in FF_bin_degree.keys(): csv_bridging_writer.writerow([project, node, int(temp[node]["member_count"]), listings[node]["competing_lists"], FF_bin_degree[node], FF_bin_in_degree[node], FF_bin_out_degree[node], H_FF.in_degree(node,weight="weight"), H_FF.out_degree(node,weight="weight"), FF_bin_betweenness[node],FF_bin_closeness[node],FF_bin_pagerank[node], #FF_bin_eigenvector[node], FF_bin_struc[node]['C-Size'],FF_bin_struc[node]['C-Density'],FF_bin_struc[node]['C-Hierarchy'],FF_bin_struc[node]['C-Index'], AT_bin_degree[node], AT_bin_in_degree[node], AT_bin_out_degree[node], AT_bin_betweenness[node], AT_bin_closeness[node], AT_bin_pagerank[node], #AT_bin_eigenvector[node], AT_bin_struc[node]['C-Size'],AT_bin_struc[node]['C-Density'],AT_bin_struc[node]['C-Hierarchy'],AT_bin_struc[node]['C-Index'], H_AT.in_degree(node,weight="weight"), H_AT.out_degree(node,weight="weight"), H_RT.in_degree(node,weight="weight"), H_RT.out_degree(node,weight="weight"), dFF_rec[node],dAT_rec[node],dAT_avg_tie[node],dFF_avg_tie[node] ])
def main(argv): # Partitionfile partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" reverse = False # Read in Networks FF_all = nx.read_edgelist( "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph() ) AT_all = nx.read_edgelist( "data/networks/%s_solr_AT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) RT_all = nx.read_edgelist( "data/networks/%s_solr_RT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) try: opts, args = getopt.getopt(argv, "r") except getopt.GetoptError: print "edges.py -r [if you want to reverse the AT<-->RT tie direction ]" for opt, arg in opts: if opt in ("-r"): print "Calculating the influence of outgoing AT ties on incoming RT ties" reverse = True # Output summary_csv_writer = csv.writer(open("results/spss/edges/%s_edges_summary.csv" % project, "wb")) summary_csv_writer.writerow(["Community", "Retweets Inside Community", "Retweets between Communities"]) if reverse: bridging_csv_writer = csv.writer( open("results/spss/edges/%s_reverse_bridging_edges.csv" % project, "wb") ) # reverse bonding_csv_writer = csv.writer( open("results/spss/edges/%s_reverse_bonding_edges.csv" % project, "wb") ) # reverse else: bridging_csv_writer = csv.writer(open("results/spss/edges/%s_bridging_edges.csv" % project, "wb")) bonding_csv_writer = csv.writer(open("results/spss/edges/%s_bonding_edges.csv" % project, "wb")) # Read in the partitions tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] ff_bridging_edges = defaultdict(dict) ff_bonding_edges = defaultdict(dict) at_bridging_edges = defaultdict(dict) at_bonding_edges = defaultdict(dict) rt_bridging_edges = defaultdict(list) rt_bonding_edges = defaultdict(list) total_bridging_edges = 0 total_bonding_edges = 0 i = 0 for partition in partitions: ################ FF Edges ###################### # Collect the FF edges between groups for edge in nx.edge_boundary(FF_all, partition): if FF_all.has_edge(edge[1], edge[0]): ff_bridging_edges[edge[0]][edge[1]] = "ff_recip" else: ff_bridging_edges[edge[0]][edge[1]] = "ff_non_recip" # Collect the FF edges inside the group for edge in FF_all.subgraph(partition).edges(): if FF_all.has_edge(edge[1], edge[0]): ff_bonding_edges[edge[0]][edge[1]] = "ff_recip" else: ff_bonding_edges[edge[0]][edge[1]] = "ff_non_recip" ################ AT Edges ###################### # TODO its missing the reciprocated edges that have a weight > 1 # Idea 1: We might simply add up the incoming and outgoing edges to a total weight # Collect the AT edges that are between groups for edge in nx.edge_boundary(AT_all, partition): if AT_all.has_edge(edge[1], edge[0]): if AT_all.get_edge_data(*edge)["weight"] == 1: at_bridging_edges[edge[0]][edge[1]] = "at_recip" else: if AT_all.get_edge_data(*edge)["weight"] == 1: at_bridging_edges[edge[0]][edge[1]] = "at_non_recip_w1" else: at_bridging_edges[edge[0]][edge[1]] = AT_all.get_edge_data(*edge)["weight"] # Collect the AT edges that are inside the group for edge in AT_all.subgraph(partition).edges(): if AT_all.has_edge(edge[1], edge[0]): if AT_all.get_edge_data(*edge)["weight"] == 1: at_bonding_edges[edge[0]][edge[1]] = "at_recip" else: if AT_all.get_edge_data(*edge)["weight"] == 1: at_bonding_edges[edge[0]][edge[1]] = "at_non_recip_w1" else: at_bonding_edges[edge[0]][edge[1]] = AT_all.get_edge_data(*edge)["weight"] ################ RT Edges ###################### # Collect the RT edges between groups: tmp_rt_bridging_edges = 0 for edge in nx.edge_boundary(RT_all, partition): tmp_rt_bridging_edges += RT_all.get_edge_data(*edge)["weight"] rt_bridging_edges[RT_all.get_edge_data(*edge)["weight"]].append((edge[0], edge[1])) total_bridging_edges += tmp_rt_bridging_edges # Collect the RT edges inside group tmp_rt_bonding_edges = 0 for edge in RT_all.subgraph(partition).edges(): tmp_rt_bonding_edges += RT_all.get_edge_data(*edge)["weight"] rt_bonding_edges[RT_all.get_edge_data(*edge)["weight"]].append((edge[0], edge[1])) total_bonding_edges += tmp_rt_bonding_edges summary_csv_writer.writerow([groups[i], tmp_rt_bonding_edges, tmp_rt_bridging_edges]) print "Community %s, Total Retweets inside: %s, Total Retweets between %s" % ( groups[i], tmp_rt_bonding_edges, tmp_rt_bridging_edges, ) i += 1 print "Total Bonding Edges %s" % total_bonding_edges print "Total Bridging Edges %s" % total_bridging_edges ##################BONDING: Influence of AT strengths on bonding retweets ############################## bonding_flow = defaultdict(list) for rt_strength, retweets in rt_bonding_edges.iteritems(): for retweet in retweets: value = None try: if reverse: value = at_bonding_edges[retweet[1]][retweet[0]] # Reverse del at_bonding_edges[retweet[1]][retweet[0]] # delete that entry reverse else: value = at_bonding_edges[retweet[0]][retweet[1]] # Same direction del at_bonding_edges[retweet[0]][retweet[1]] # delete that entry same direction except: "" if value == None: # If the AT Network led to no diffusion ONLY then check the FF network try: if reverse: value = ff_bonding_edges[retweet[1]][retweet[0]] # Reverse del ff_bonding_edges[retweet[1]][retweet[0]] # delete that entry reverse else: value = ff_bonding_edges[retweet[0]][retweet[1]] # Same direction del ff_bonding_edges[retweet[0]][retweet[1]] # delete that entry same direction except: "" if value == None: # A retweet happend despite there being no ties at all value = "no_tie" bonding_flow[value].append(rt_strength) bonding_no_flow = {} # Count the AT ties that led to no diffusion for k, v1 in at_bonding_edges.iteritems(): for k, value in v1.iteritems(): if bonding_no_flow.has_key(value): bonding_no_flow[value] += 1 else: bonding_no_flow[value] = 0 # Count the FF ties that led to no diffusion for k, v1 in ff_bonding_edges.iteritems(): for k, value in v1.iteritems(): if bonding_no_flow.has_key(value): bonding_no_flow[value] += 1 else: bonding_no_flow[value] = 0 ##################BRIDGING: Influence of AT strenghts on bridging retweets ############################## bridging_flow = defaultdict(list) for rt_strength, retweets in rt_bridging_edges.iteritems(): for retweet in retweets: value = None try: if reverse: value = at_bridging_edges[retweet[1]][retweet[0]] # reverse del at_bridging_edges[retweet[1]][retweet[0]] # delete that entry reverse else: value = at_bridging_edges[retweet[0]][retweet[1]] # Same direction del at_bridging_edges[retweet[0]][retweet[1]] # delete that entry same direction except: "" if value == None: # If the AT Network led to no diffusion ONLY then check the FF network try: if reverse: value = ff_bridging_edges[retweet[1]][retweet[0]] # Reverse del ff_bridging_edges[retweet[1]][retweet[0]] # delete that entry reverse else: value = ff_bridging_edges[retweet[0]][retweet[1]] # Same direction del ff_bridging_edges[retweet[0]][retweet[1]] # delete that entry same direction except: "" if value == None: # A retweet happend despite there being no ties at all value = "no_tie" bridging_flow[value].append(rt_strength) bridging_no_flow = {} # Count the AT ties that led to no diffusion for k, v1 in at_bridging_edges.iteritems(): for k, value in v1.iteritems(): if bridging_no_flow.has_key(value): bridging_no_flow[value] += 1 else: bridging_no_flow[value] = 0 # Count the FF ties that led to no diffusion for k, v1 in ff_bridging_edges.iteritems(): for k, value in v1.iteritems(): if bridging_no_flow.has_key(value): bridging_no_flow[value] += 1 else: bridging_no_flow[value] = 0 ########################### Output ########################### bridging_csv_writer.writerow( [ "bridging_tie_type", "#_ties_w_retweets", "#_ties_w_o_retweets", "#_retweets", "%_of_total", "retweets/#_ties_w_o_retweets", "retweets/#_ties_w_retweets", "std", ] ) bonding_csv_writer.writerow( [ "bonding_tie_type", "#_ties_w_retweets", "#_ties_w_o_retweets", "#_retweets", "%_of_total", "retweets/#_ties_w_o_retweets", "retweets/#_ties_w_retweets", "std", ] ) # BRIDGING TIES bridging_total = [val for subl in bridging_flow.values() for val in subl] bridging_noflow_total = sum(bridging_no_flow.values()) for k, v in bridging_flow.iteritems(): if bridging_no_flow.has_key(k) and bridging_no_flow[k] != 0 and len(bridging_flow[k]) > 5: ratio = sum(bridging_flow[k]) / bridging_no_flow[k] of_total = sum(bridging_flow[k]) / float(sum(bridging_total)) std = np.std(bridging_flow[k]) average = np.average(bridging_flow[k]) bridging_csv_writer.writerow( [k, len(bridging_flow[k]), bridging_no_flow[k], sum(bridging_flow[k]), of_total, ratio, average, std] ) if k == "no_tie": std = np.std(bridging_flow[k]) average = np.average(bridging_flow[k]) bridging_csv_writer.writerow([k, len(bridging_flow[k]), 0, sum(bridging_flow[k]), 0, 0, average, std]) std = np.std(bridging_total) average = np.average(bridging_total) bridging_csv_writer.writerow( [ "total", len(bridging_total), bridging_noflow_total, sum(bridging_total), 1, sum(bridging_total) / float(bridging_noflow_total), average, std, ] ) # BONDING TIES bonding_total = [val for subl in bonding_flow.values() for val in subl] bonding_noflow_total = sum(bonding_no_flow.values()) for k, v in bonding_flow.iteritems(): if bonding_no_flow.has_key(k) and bonding_no_flow[k] != 0 and len(bonding_flow[k]) > 5: ratio = sum(bonding_flow[k]) / bonding_no_flow[k] of_total = sum(bridging_flow[k]) / float(sum(bonding_total)) std = np.std(bonding_flow[k]) average = np.average(bonding_flow[k]) bonding_csv_writer.writerow( [k, len(bonding_flow[k]), bonding_no_flow[k], sum(bonding_flow[k]), of_total, ratio, average, std] ) if k == "no_tie": std = np.std(bonding_flow[k]) average = np.average(bonding_flow[k]) bonding_csv_writer.writerow([k, len(bonding_flow[k]), 0, sum(bonding_flow[k]), 0, 0, average, std]) std = np.std(bonding_total) average = np.average(bonding_total) bonding_csv_writer.writerow( [ "total", len(bonding_total), bonding_noflow_total, sum(bonding_total), 1, sum(bonding_total) / float(bonding_no_flow_total), average, std, ] )
import networkx as nx import csv import helper as hp import sys import sys,getopt partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) #Read in the network as a dict def nonull(stream): for line in stream: yield line.replace('\x00', '') def read_in_net(edges_file): net_hash = {} for row in edges_file: if not net_hash.has_key(row[0]): net_hash[row[0]] = {row[1]: []} if not net_hash[row[0]].has_key(row[1]): net_hash[row[0]] = dict(net_hash[row[0]].items() + {row[1]: []}.items()) net_hash[row[0]][row[1]].append(row[3]) return net_hash f1 = open("data/solr_584_at_connections.csv", "rb")
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### INDIVIDUAL BRIDGING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging.csv' % project, 'wb')) csv_bridging_writer.writerow(["Name", "Group1", "Group2", "Number_between_ties", "Competing_lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_bin_betweeness", #"FF_c_size","FF_c_density","FF_c_hierarchy","FF_c_index", "FF_own_group_in_volume", "FF_other_group_in_volume", "FF_own_group_out_volume", "FF_other_group_out_volume", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_bin_betweeness", "AT_volume_in", "AT_volume_out", #"AT_c_size","AT_c_density","AT_c_hierarchy","AT_c_index", "AT_own_group_in_volume", "AT_other_group_in_volume", "AT_own_group_out_volume", "AT_other_group_out_volume", "RT_total_volume_in", "RT_total_volume_out", "RT_own_group_in_volume", "RT_other_group_in_volume", "RT_own_group_out_volume", "RT_other_group_out_volume"]) #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])} #Read in the edges between the groups and sort them GROUPS = 80 # 80x200 ~ 16000 individuals for analysis reader = csv.reader(open("results/%s_bridging_edges.csv" % project, "rb"), delimiter=",") edges = [] for row in reader: edges.append({"group1":row[0],"group2":row[1], "count":float(row[2])}) edges_sorted = sorted(edges, key=lambda k: k["count"]) distance_between_samples = int(float(len(edges_sorted)) / GROUPS) if distance_between_samples == 0: distance_between_samples = 1 #Minimal Distance iterator = 0 # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) # Add nodes AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 while iterator < len(edges_sorted): #Genereate a subgraph consisting out of two partitions # Problem: With n= 2(pairs of 2) and k = 200 (~number of groups) we can generate 200 ^ 200 /2 combinations. How to generate the two pairs? # Solution 1: By Random # Solution 2: Based on the ordered tie strength between groups from the group bridging step # e.g. [10,9,8,7,6,5,0] take every xth element to create set with this size [10,8,6,0] # TODO Bin same edges with same weight into the same category and then select a grop by random selected_edge = edges_sorted[iterator] group1 = selected_edge["group1"] group2 = selected_edge["group2"] index1 = groups.index(group1) index2 = groups.index(group2) print "%s : %s with %s of strength %s" % (iterator, group1, group2, selected_edge["count"]) # Create Subgraphs S_FF = FF_all.subgraph(partitions[index1]+partitions[index2]) S_FF.name = "%s_%s" % (group1, group2) S_AT = AT_all.subgraph(partitions[index1]+partitions[index2]) S_AT.name = "%s_%s" % (group1, group2) S_RT = RT_all.subgraph(partitions[index1]+partitions[index2]) S_RT.name = "%s_%s" % (group1, group2) iterator += distance_between_samples # Make equidistant steps in with the iterator #Optional Output to pajek if to_pajek: print "Generating pajek output for %s %s" % (groups[index1], groups[index2]) #Relabel for pajek def mapping(x): return "\"%s\"" % x H_FF = nx.relabel_nodes(S_FF,mapping) H_AT = nx.relabel_nodes(S_AT,mapping) H_RT = nx.relabel_nodes(S_RT,mapping) #Write it to disk nx.write_pajek(H_FF,"results/networks/pairs/%s_%s_%s_pair_FF.net" % (project, groups[index1], groups[index2])) nx.write_pajek(H_AT,"results/networks/pairs/%s_%s_%s_pair_AT.net" % (project, groups[index1], groups[index2])) nx.write_pajek(H_RT,"results/networks/pairs/%s_%s_%s_pair_RT.net" % (project, groups[index1], groups[index2])) ################ MEASURES ################ ## FF measures dFF_bin = nx.degree_centrality(S_FF) dFF_bin_in = nx.in_degree_centrality(S_FF) dFF_bin_out = nx.out_degree_centrality(S_FF) dFF_bin_betweeness = nx.betweenness_centrality(S_FF) # Structural Holes has problems, probably with nonconnected networks (eventually compte bigest component first) # dFF_struc = sx.structural_holes(S_FF) # Which one is own group which one is other ? dFF_group1_vol_in = hp.individual_in_volume(S_FF,group1) dFF_group2_vol_in = hp.individual_in_volume(S_FF,group2) dFF_group1_vol_out = hp.individual_out_volume(S_FF,group1) dFF_group2_vol_out = hp.individual_out_volume(S_FF,group2) ## AT Measures dAT_bin = nx.degree_centrality(S_AT) dAT_bin_in = nx.in_degree_centrality(S_AT) dAT_bin_out = nx.out_degree_centrality(S_AT) dAT_bin_betweeness = nx.betweenness_centrality(S_AT) # Why can here the structural holes not be computed? #dAT_struc = sx.structural_holes(S_AT) dAT_group1_vol_in = hp.individual_in_volume(S_AT,group1) dAT_group2_vol_in = hp.individual_in_volume(S_AT,group2) dAT_group1_vol_out = hp.individual_out_volume(S_AT,group1) dAT_group2_vol_out = hp.individual_out_volume(S_AT,group2) ############### DEPENDENT VARIABLES ########### dRT_group1_vol_in = hp.individual_in_volume(S_RT,group1) dRT_group2_vol_in = hp.individual_in_volume(S_RT,group2) dRT_group1_vol_out = hp.individual_out_volume(S_RT,group1) dRT_group2_vol_out = hp.individual_out_volume(S_RT,group2) ############ OUTPUT ########################### #Arrange it in a list and output for node in dFF_bin.keys(): # Depending if the node is in partition 1 or two the definition of "own" and "other" changes. if node in partitions[index1]: #FF FF_own_group_in_volume = dFF_group1_vol_in[node] FF_own_group_out_volume = dFF_group1_vol_out[node] FF_other_group_in_volume = dFF_group2_vol_in[node] FF_other_group_out_volume = dFF_group2_vol_out[node] #AT AT_own_group_in_volume = dAT_group1_vol_in[node] AT_own_group_out_volume = dAT_group1_vol_out[node] AT_other_group_in_volume = dAT_group2_vol_in[node] AT_other_group_out_volume = dAT_group2_vol_out[node] #RT RT_own_group_in_volume = dRT_group1_vol_in[node] RT_own_group_out_volume = dRT_group1_vol_out[node] RT_other_group_in_volume = dRT_group2_vol_in[node] RT_other_group_out_volume = dRT_group2_vol_out[node] else: FF_own_group_in_volume = dFF_group2_vol_in[node] FF_own_group_out_volume = dFF_group2_vol_out[node] FF_other_group_in_volume = dFF_group1_vol_in[node] FF_other_group_out_volume = dFF_group1_vol_out[node] #AT AT_own_group_in_volume = dAT_group2_vol_in[node] AT_own_group_out_volume = dAT_group2_vol_out[node] AT_other_group_in_volume = dAT_group1_vol_in[node] AT_other_group_out_volume = dAT_group1_vol_out[node] #RT RT_own_group_in_volume = dRT_group2_vol_in[node] RT_own_group_out_volume = dRT_group2_vol_out[node] RT_other_group_in_volume = dRT_group1_vol_in[node] RT_other_group_out_volume = dRT_group1_vol_out[node] csv_bridging_writer.writerow([node, group1, group2,selected_edge["count"], listings[node]["competing_lists"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], dFF_bin_betweeness[node], #dFF_struc[node]['C-Size'],dFF_struc[node]['C-Density'],dFF_struc[node]['C-Hierarchy'],dFF_struc[node]['C-Index'], FF_own_group_in_volume, FF_other_group_in_volume, FF_own_group_out_volume, FF_other_group_out_volume, dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], dAT_bin_betweeness[node], S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node,weight="weight"), #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'], AT_own_group_in_volume, AT_other_group_in_volume, AT_own_group_out_volume, AT_other_group_out_volume, S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight"), RT_own_group_in_volume, RT_other_group_in_volume, RT_own_group_out_volume, RT_other_group_out_volume, ])
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg else: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb')) csv_bridging_writer.writerow(["Project", "Community", "Person_ID", "Competing_lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_vol_in", "FF_vol_out", "FF_groups_in", "FF_groups_out", "FF_rec", "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank", #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_vol_in", "AT_vol_out", "AT_groups_in", "AT_groups_out", "AT_rec", "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank", # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index, "AT_avg_tie_strength","AT_strength_centrality_in", "RT_bin_in_degree", "RT_bin_out_degree", "RT_vol_in", "RT_vol_out"]) #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])} # Read in the centralities of nodes in their corresponding community centralities = {} centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project)) for row in centrality_reader: centralities[row[2]] = {"ff_in_degree":row[5]} # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) print "Done reading in Networks" #Determine the Maximum subset of nodes present in all Networks maximum_subset = [] for node in FF_all.nodes(): if AT_all.has_node(node) and RT_all.has_node(node): maximum_subset.append(node) i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) # Add nodes AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 i = 0 #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values) if len(maximum_subset) < 1000: scaling_k = len(maximum_subset) else: scaling_k = len(maximum_subset)/100 dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k) dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k) #dFF_struc = sx.structural_holes(FF_all) for partition in partitions: project_name = groups[i] #Determine the groups that are not in the partition all_other_groups = groups[:] group = groups[i] all_other_groups.remove(group) # Get all the partitions without the current partition partitions_without_partition = partitions[:] partitions_without_partition.remove(partition) #Remove the nodes that are in this partition remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes for nodes_to_be_deleted in partition: remaining_nodes.remove(nodes_to_be_deleted) #Create Subgraphs that contain all nodes except the ones that are in the partition S_FF = FF_all.subgraph(remaining_nodes) S_AT = AT_all.subgraph(remaining_nodes) S_RT = RT_all.subgraph(remaining_nodes) i += 1 for node in partition: if node in maximum_subset: t0 = time.time() #Add FF nodes and edges S_FF.add_node(node, group = group) S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_FF: S_FF.remove_node(tmp_node) # Add AT nodes and edges S_AT.add_node(node, group = group) S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_AT: S_AT.remove_node(tmp_node) S_RT.add_node(node, group = group) S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_RT: S_RT.remove_node(tmp_node) print "Done creating Subgraphs" ## FF measures dFF_bin = nx.degree_centrality(S_FF) dFF_bin_in = nx.in_degree_centrality(S_FF) dFF_bin_out = nx.out_degree_centrality(S_FF) #nx.load_centrality(S_FF,v=node, weight="weight") #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node) #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight") dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0) dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0) dFF_rec = hp.individual_reciprocity(S_FF,node) #number of reciprocated ties ## AT Measures dAT_bin = nx.degree_centrality(S_AT) dAT_bin_in = nx.in_degree_centrality(S_AT) dAT_bin_out = nx.out_degree_centrality(S_AT) #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight") #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight") dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0) dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0) dAT_rec = hp.individual_reciprocity(S_AT,node) #number of @reciprocated ties dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node) #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person dAT_strength_centrality = 0 for edge in S_AT.in_edges(node,data=True): if edge[0] in maximum_subset: dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from ############### DEPENDENT VARIABLES ########### dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made print "Done computing Measures" try: c_size = dFF_struc[node]['C-Size'] c_dens = dFF_struc[node]['C-Density'] c_hierarch = dFF_struc[node]['C-Hierarchy'] c_index = dFF_struc[node]['C-Index'] except: c_size = "NaN" c_dens = "NaN" c_hierarch = "NaN" c_index = "NaN" csv_bridging_writer.writerow([project, project_name, node, listings[node]["competing_lists"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"), dFF_total_in_groups, dFF_total_out_groups, dFF_rec[node], dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node], #c_size,c_dens,c_hierarch,c_index, dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"), dAT_total_in_groups, dAT_total_out_groups, dAT_rec[node], dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node], #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'], dAT_avg_tie[node],dAT_strength_centrality, dRT_in[node],dRT_out[node], S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight") ]) t_delta = (time.time() - t0) print "Count: %s Node: %s Time: %s" % (i,node,t_delta) #Remove the nodes again S_FF.remove_node(node) S_AT.remove_node(node) S_RT.remove_node(node)
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = True try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### GROUP BONDING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_writer = csv.writer(open('results/spss/group bonding/%s_group_bonding.csv' % project, 'wb')) #Attributes for Gephi csv_attributes = csv.writer(open('results/networks/%s_at_node_attributes.csv' % project, 'wb')) csv_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists", "FF_Nodes", "AT_Nodes", "RT_Nodes", "FF_Edges","AT_Edges", "RT_Edges", "FF_bin_density", "AT_density", "FF_bin_avg_path_length", "AT_bin_avg_path_length", "FF_bin_clustering", "AT_bin_clustering", "FF_reciprocity", "AT_reciprocity", "FF_bin_transitivity", "AT_bin_transitivity", "RT_density", "RT_total_volume" ]) # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] #Read in members count for each project reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",") temp = {} reader.next() # Skip first row for row in reader: temp[row[0]] = {"name":row[0],"member_count":int(row[3])} #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: if listings.has_key(row[1]): listings[row[1]]["competing_lists"] += int(row[3]) else: listings[row[1]] = {"competing_lists": int(row[3])} i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 #Write out to pajek for gephi visualization if to_pajek: #Write the attributes file i= 0 csv_attributes.writerow(["id", "name", "type"]) for node in AT_all.nodes(): i+= 1 csv_attributes.writerow([i, node, AT_all.node[node]["group"]]) nx.write_pajek(FF_all,"results/networks/%s_FF.net" % project) nx.write_pajek(AT_all,"results/networks/%s_AT.net" % project) nx.write_pajek(RT_all,"results/networks/%s_RT.net" % project) i = 0 for partition in partitions: project_name = groups[i] # Add up total members member_count = 0 member_count = int(temp[project_name]["member_count"]) print "############ Calculating Project %s ############### " % project_name # Generate a subgraph according to the partition FF = FF_all.subgraph(partition) AT = AT_all.subgraph(partition) RT = RT_all.subgraph(partition) #Additional Info for each project FF.name = "FF_%s " % project_name AT.name = "AT_%s " % project_name RT.name = "RT_%s " % project_name ############### Compute Group measures ################ #Measures FF FF_bin_density = nx.density(FF) FF_bin_transitivity = nx.transitivity(FF) FF_reciprocity = hp.reciprocity(FF) # Calculate the number of reciprocated ties of all ties # Measures that need a connected graph # In case the graph is split into multiple graphs get the biggest connected component FF_partition = nx.weakly_connected_components(FF)[0] FF_comp = FF.subgraph(FF_partition) FF_bin_avg_path_length = nx.average_shortest_path_length(FF_comp) FF_bin_clustering = nx.average_clustering(FF_comp.to_undirected(),count_zeros=False) # Networks with a lot of mutual trust have a high clustering coefficient. # Star networks with a single broadcast node and passive listeners have a low clustering coefficient. # Measures AT #AT_density = nx.density(AT) # deprecated since it treats the network as binarized and we lose all the interaction information AT_density = hp.average_tie_strength(AT) AT_bin_transitivity = nx.transitivity(AT) AT_reciprocity = hp.reciprocity(AT) #AT_avg_volume = hp.average_tie_strength(AT) AT_partition = nx.weakly_connected_components(AT)[0] AT_comp = AT.subgraph(AT_partition) AT_bin_avg_path_length = nx.average_shortest_path_length(AT_comp) AT_bin_clustering = nx.average_clustering(AT_comp.to_undirected()) # Dependent Variable #RT_density = nx.density(RT) # Danger this works on the binarized graph! # TODO I need a weighted density for RT RT_density = hp.average_tie_strength(RT) RT_total_volume = hp.total_edge_weight(RT) ############### Output ################ csv_writer.writerow([project, project_name, member_count, listings[project_name]["competing_lists"], len(FF.nodes()), len(AT.nodes()), len(RT.nodes()), len(FF.edges()), len(AT.edges()), len(RT.edges()), FF_bin_density, AT_density, FF_bin_avg_path_length, AT_bin_avg_path_length, FF_bin_clustering, AT_bin_clustering, FF_reciprocity, AT_reciprocity, FF_bin_transitivity, AT_bin_transitivity, RT_density, RT_total_volume]) i += 1
import networkx as nx import csv import helper as hp csv_writer = csv.writer(open('results/spss/whole network/whole_network.csv', 'wb')) csv_writer.writerow(["FF_assortativity","AT_assortativity","RT_assortativity"]) # Read in the partition tmp = hp.get_partition() partitions = tmp[0] groups = tmp[1] # Read in the networks project = "584" FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Add dummy nodes if they are missing in the networks i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 # Compute Assortativity in Friendships aFF = nx.attribute_assortativity_coefficient(FF_all,'group') aAT = nx.attribute_assortativity_coefficient(AT_all,'group')
def main(argv): # Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" try: opts, args = getopt.getopt(argv, "p:s:") except getopt.GetoptError: print "individual_bonding.py -p <project_name> -s <partitionfile>" sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg else: print "individual_bonding.py -p <project_name> -s <partitionfile>" print "##################### INDIVIDUAL BONDING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_writer = csv.writer(open("results/spss/individual bonding/%s_individual_bonding.csv" % project, "wb")) csv_writer.writerow( [ "Project", "Community", "Person_ID", "Place_on_list", "FF_bin_deg", "FF_bin_in_deg", "FF_bin_out_deg", "FF_vol_in", "FF_vol_out", "FF_bin_close", "FF_bin_page", "FF_rec", "AT_bin_deg", "AT_bin_in_deg", "AT_bin_out_deg", "AT_bin_close", "AT_bin_page", "AT_rec", "AT_avg", "AT_vol_in", "AT_vol_out", "RT_bin_deg_in", "RT_bin_deg_out", "RT_vol_in", "RT_vol_out", "RT_global_vol_in", "RT_global_vol_out", ] ) # Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) i = 0 for row in indiv_reader: if i > int(row[2]): # in case there are less than 101 entries for a group for some reason i = 0 i += 1 listings[row[0]] = {"group": row[1], "place": i, "competing_lists": int(row[3]), "original_place": int(row[2])} if i == 101: # Some of the original places have shifted because of the regrouping i = 0 # Read in Networks FF_all = nx.read_edgelist( "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph() ) AT_all = nx.read_edgelist( "data/networks/%s_solr_AT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) RT_all = nx.read_edgelist( "data/networks/%s_solr_RT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) # Read in the partitions tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Add missing nodes # We are limiting the analysis to only the maximal subset of nodes that are present in all networks maximum_subset = [] for node in FF_all.nodes(): if AT_all.has_node(node) and RT_all.has_node(node): maximum_subset.append(node) else: print node print "Maximum Subset of nodes %s" % len(maximum_subset) # In this case we are not adding missing nodes to the network, to produce a smaller error in the final regressions, but use the subset method. # i = 0 # for partition in partitions: # for node in partition: # FF_all.add_node(node, group = groups[i]) # AT_all.add_node(node, group = groups[i]) # RT_all.add_node(node, group = groups[i]) # i += 1 i = 0 for partition in partitions: project_name = groups[i] print "############ Calculating Project %s ############### " % project_name # Generate a subgraph according to the partition FF = FF_all.subgraph(partition) AT = AT_all.subgraph(partition) RT = RT_all.subgraph(partition) # Additional Info for each project FF.name = "FF_%s " % project_name AT.name = "AT_%s " % project_name RT.name = "RT_%s " % project_name # hp.draw_graph(FF) # hp.draw_graph(AT) # hp.draw_graph(RT) ############### Compute Individual measures ################ # Compute FF Centralities # Works fine on binary data dFF_bin = nx.degree_centrality(FF) dFF_bin_in = nx.in_degree_centrality(FF) # People that follow me in the network dFF_bin_out = nx.out_degree_centrality(FF) # People that I follow in the network dFF_bin_closeness = nx.closeness_centrality(FF) dFF_bin_pagerank = nx.pagerank(FF) try: dFF_bin_eigenvector = nx.eigenvector_centrality(FF, 10000) except: print "Failed to compute for FF %s " % FF.name # if len(nx.weakly_connected_components(FF)) > 1: # FF_comp = FF.subgraph(nx.weakly_connected_components(FF)[0]) # dFF_bin_eigenvector = nx.eigenvector_centrality(FF_comp) # else: # Compute AT Centralities # Centralities are problematic on weighted data, since we are losing all the information dAT_bin = nx.degree_centrality(AT) # binary dAT_bin_in = nx.in_degree_centrality(AT) # binary dAT_bin_out = nx.out_degree_centrality(AT) # binary dAT_bin_closeness = nx.closeness_centrality(AT) # binary dAT_bin_pagerank = nx.pagerank(AT) try: dAT_bin_eigenvector = nx.eigenvector_centrality(AT, 10000) except: print "Failed to compute for AT %s " % AT.name # if len(nx.weakly_connected_components(AT)) > 1: # AT_comp = AT.subgraph(nx.weakly_connected_components(AT)[0]) # dFF_bin_eigenvector = nx.eigenvector_centrality(AT_comp) # else: # # Tie strengths dAT_avg_tie = hp.individual_average_tie_strength(AT) dAT_rec = hp.individual_reciprocity(AT) dFF_rec = hp.individual_reciprocity(FF) # Dependent Variable see csv below # Deprecated since in networkx centrality works only on binary edges dRT_in = nx.in_degree_centrality(RT) # At least once a retweets that a person has received dRT_out = nx.out_degree_centrality(RT) # At least one retweets that a person has made ############### Output ################ for node in dFF_bin.keys(): if node in maximum_subset: csv_writer.writerow( [ project, project_name, node, listings[node]["place"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], FF.in_degree(node, weight="weight"), FF.out_degree(node, weight="weight"), dFF_bin_closeness[node], dFF_bin_pagerank[node], dFF_rec[node], dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], dAT_bin_closeness[node], dAT_bin_pagerank[node], dAT_rec[node], dAT_avg_tie[node], AT.in_degree(node, weight="weight"), AT.out_degree(node, weight="weight"), dRT_in[node], dRT_out[node], RT.in_degree(node, weight="weight"), RT.out_degree(node, weight="weight"), RT_all.in_degree(node, weight="weight"), RT_all.out_degree(node, weight="weight"), ] ) i += 1