def network_measures(graphname): H = nx_old.read_gml(graphname) ## Calculate the degree of nodes in the network for u, v in nx.degree(H).items(): H.node[u]['degree'] = int(v) ## Compute betweenness centrality for nodes betweenness_dictionary = nx.betweenness_centrality(H) for u, v in betweenness_dictionary.items(): H.node[u]['betweenness_centrality'] = float(v) ## Calculate the kshell-index of the network list_conn = [] for node in H.nodes(): list_conn.append(len(H.neighbors(node))) max_connect = max(list_conn) H.remove_edges_from(H.selfloop_edges()) for index in range(max_connect + 1): k_core = nx.algorithms.core.k_shell(H, k=index) if len(k_core) > 0: for node in k_core: H.node[node]['kshell_index'] = int(index) H = nx_old.write_gml(H, str(graphname))
def spatio_temporal_network_neighbor_ego_effect(gml_file): ''' Here we generate the data of neighbor's contribution and Ego network's contribution in terms of lines of code added, lines of code removed, total number of commits ''' G = nx_old.read_gml(str(gml_file)) list_attr_dv = [ 'lines_of_code_added_sum', 'lines_of_code_added_avg', 'lines_of_code_removed_sum', 'lines_of_code_removed_mean', 'total_num_committs' ] ### neighbors and Ego of a committer for x in list_attr_dv: print x for n in G.nodes(): try: G.node[n]['neigh' + '_' + str(x)] = np.mean( [G.node[k][str(x)] for k in G.neighbors(n)]) G.node[n]['ego' + '_' + str(x)] = np.mean( [G.node[k][str(x)] for k in nx.ego_graph(G, n)]) except KeyError, e: print n, G.neighbors(n), len(G.neighbors(n))
def network_attribute_node_type(files_csv, gml_file, col_id) : ### Here I introduce the attribute of the node, if it is an independent user, firm or distributor ##col_id : column G for author id and column I for committer id H = nx_old.read_gml(gml_file) f1 = open(files_csv,'r') ## input file from raw data/processed data dict_feature_type_authorid = defaultdict(list); color_feature = defaultdict(list) for line in f1.readlines()[1:] : line = line.strip().split("\t") dict_feature_type_authorid[str(line[int(col_id)])] = str(line[5].replace('"','')) for n in H.nodes() : H.node[n]['node_type'] = dict_feature_type_authorid[str(H.node[n]['label'])] if dict_feature_type_authorid[str(H.node[n]['label'])] == "firm" : H.node[n]['node_color_id'] = 1 if dict_feature_type_authorid[str(H.node[n]['label'])] == "independent" : H.node[n]['node_color_id'] = 2 if dict_feature_type_authorid[str(H.node[n]['label'])] == "distributor" : H.node[n]['node_color_id'] = 3 #print n, H.node[n]['label'], dict_feature_type_authorid[str(H.node[n]['label'])], H.node[n]['node_color_id'], H.node[n]['node_type'] H = nx_old.write_gml(H,str(gml_file))
def write_network_attr_gml(sourcedir, destdir, n1, n2): """ Usage: 1. Sourcedir : directory where the gml file (of directed graph of file dependencies are located) 2. destdir : destination directory 3. n1, n2: range of files we want to read and write We add node and edge attribute. Node attributes include: - roles and cartographic measures from community detection - number of motifs a node is part of - frequency of motif types a node is part of Edge attribute includes: - frequency of motif types an edge is part of """ globgml = glob.glob(sourcedir + '*.gml') for gml_file in globgml[int(n1):int(n2)]: print gml_file outname = gml_file.split('/')[-1] G = nx_old.read_gml(gml_file) G = nx_code_complexity.directed_network_measures(G) G = check_cycles(G) G = check_Motifs(G, 3) G = nx_comm_carto.communityroledetectionInfomap(G, 'Infomap') G = nx_old.write_gml(G, destdir + str(outname))
def spatio_temporal_network_neighbor_ego_effect(sourcedir, destdir): globgml = glob.glob(sourcedir + '*.gml') for gml_file in globgml: outname = gml_file.split('/')[-1] G = nx_old.read_gml(gml_file) list_attr_dv = ['lines_of_code_added_sum'] ### neighbors and Ego of a committer for x in list_attr_dv: for n in G.nodes(): try: G.node[n]['neigh' + '_' + str(x)] = np.mean( [G.node[k][str(x)] for k in G.neighbors(n)]) G.node[n]['ego' + '_' + str(x)] = np.mean( [G.node[k][str(x)] for k in nx.ego_graph(G, n)]) except KeyError, e: print gml_file, n, G.neighbors(n), len(G.neighbors(n)) G = nx_old.write_gml(G, destdir + str(outname))
def neighbor_attribute_node_type(files_csv, gml_file, output_txt) : outf = open(str(output_txt), 'w') # print >> outf, 'committer_id|neighbor_mean_files|neighbor_std_files|neighbor_med_files|neighbor_mean_codes|neighbor_std_codes|neighbor_med_codes|neighbor_mean_net_contr|neighbor_std_net_contr|neighbor_med_net_contr' # print >> outf, 'committer_id|ego_mean_files|ego_std_files|ego_med_files|ego_mean_codes|ego_std_codes|ego_med_codes|ego_mean_net_contr|ego_std_net_contr|ego_med_net_contr' #print >> outf, 'committer_id|ego_mean_files|ego_std_files|ego_med_files|ego_mean_files_added|ego_std_files_added|ego_med_files_added|ego_mean_codes_tot|ego_std_codes_tot|ego_med_codes_tot|ego_mean_codes_added|ego_std_codes_added|ego_med_codes_added|ego_mean_net_contr_total|ego_std_net_contr_total|ego_median_net_contr_total|ego_mean_net_contr_avg|ego_std_net_contr_avg|ego_median_net_contr_avg' print >> outf, 'committer_id|neighbor_mean_files|neighbor_std_files|neighbor_med_files|neighbor_mean_files_added|neighbor_std_files_added|neighbor_med_files_added|neighbor_mean_codes_tot|neighbor_std_codes_tot|neighbor_med_codes_tot|neighbor_mean_codes_added|neighbor_std_codes_added|neighbor_med_codes_added|neighbor_mean_net_contr_total|neighbor_std_net_contr_total|neighbor_median_net_contr_total|neighbor_mean_net_contr_avg|neighbor_std_net_contr_avg|neighbor_median_net_contr_avg' H = nx_old.read_gml(gml_file) ## committer network gml file f1 = open(files_csv,'r') ## Data individual committer file dict_num_files = defaultdict(list); dict_code_added = defaultdict(list) ; dict_net_contr_total = defaultdict(list) dict_num_files_added = defaultdict(list); dict_code_total = defaultdict(list) ; dict_net_contr_avg = defaultdict(list) for line in f1.readlines()[1:] : line = line.strip().split("|") dict_num_files[str(line[0])] = int(line[1]) ; dict_num_files_added[str(line[0])] = int(line[2]) dict_code_added[str(line[0])] = int(line[3]); dict_code_total[str(line[0])] = float(line[4]) dict_net_contr_total[str(line[0])] = float(line[5]); dict_net_contr_avg[str(line[0])] = float(line[8]) dict_neigh_files = defaultdict(list); dict_neigh_codes_added = defaultdict(list); dict_neigh_contr_total = defaultdict(list) dict_neigh_files_added = defaultdict(list); dict_neigh_codes_total = defaultdict(list); dict_neigh_contr_avg = defaultdict(list) ### neighbors of a committer for n in H.nodes() : for node in H.neighbors(n) : #for n in H.nodes() : # for node in nx.ego_graph(H, n) : if str(H.node[node]['label']) in dict_num_files : dict_neigh_files[str(H.node[n]['label'])].append(float(dict_num_files[str(H.node[node]['label'])])) dict_neigh_codes_added[str(H.node[n]['label'])].append(float(dict_code_added[str(H.node[node]['label'])])) dict_neigh_contr_total[str(H.node[n]['label'])].append(float(dict_net_contr_total[str(H.node[node]['label'])])) dict_neigh_files_added[str(H.node[n]['label'])].append(float(dict_num_files_added[str(H.node[node]['label'])])) dict_neigh_codes_total[str(H.node[n]['label'])].append(float(dict_code_total[str(H.node[node]['label'])])) dict_neigh_contr_avg[str(H.node[n]['label'])].append(float(dict_net_contr_avg[str(H.node[node]['label'])])) for keys, values in dict_neigh_files.items() : mean_files = np.mean(values); median_files = np.median(values); std_files = np.std(values) mean_files_added = np.mean(dict_neigh_files_added[str(keys)]); median_files_added = np.median(dict_neigh_files_added[str(keys)]); std_files_added = np.std(dict_neigh_files_added[str(keys)]) mean_code_tot = np.mean(dict_neigh_codes_total[str(keys)]); median_code_tot = np.median(dict_neigh_codes_total[str(keys)]) ;std_code_tot = np.std(dict_neigh_codes_total[str(keys)]) mean_code_added = np.mean(dict_neigh_codes_added[str(keys)]); median_code_added = np.median(dict_neigh_codes_added[str(keys)]) ;std_code_added = np.std(dict_neigh_codes_added[str(keys)]) mean_net_contr_total = np.mean(dict_neigh_contr_total[str(keys)]); median_net_contr_total = np.median(dict_neigh_contr_total[str(keys)]); std_net_contr_total = np.std(dict_neigh_contr_total[str(keys)]) mean_net_contr_avg = np.mean(dict_neigh_contr_avg[str(keys)]); median_net_contr_avg = np.median(dict_neigh_contr_avg[str(keys)]); std_net_contr_avg = np.std(dict_neigh_contr_avg[str(keys)]) # print >> outf, '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % ( keys, np.mean(values), np.std(values), np.median(values), mean_code_tot, std_code_tot, median_code_tot, mean_net_contr, std_net_contr, median_net_contr ) print >> outf, '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % ( keys, mean_files, std_files, median_files, mean_files_added, std_files_added, median_files_added, mean_code_tot, std_code_tot, median_code_tot, mean_code_added, std_code_added, median_code_added, mean_net_contr_total, std_net_contr_total, median_net_contr_total, mean_net_contr_avg, std_net_contr_avg, median_net_contr_avg)
def findCommunitiesInfomap(n1, n2, detectionalgo): """ Partition network with the Infomap algorithm. Annotates nodes with 'community' id and return number of communities found. """ globgml = glob.glob( '/media/mukherjee/My Book/OpenStack/WeightedDSM/Idea5_weighted_dsm/nova-*.gml' ) for gml_file in globgml[int(n1):int(n2)]: print gml_file G = nx_old.read_gml(gml_file) infomapWrapper = infomap.Infomap("-d -N10 --silent") #print("Building Infomap network from a NetworkX graph...") for e in G.edges_iter(): infomapWrapper.addLink(*e) #print("Find communities with Infomap...") infomapWrapper.run() tree = infomapWrapper.tree #print tree #print("Found %d top modules with codelength: %f" % (tree.numTopModules(), tree.codelength())) communities = {} for node in tree.leafIter(1): communities[node.originalLeafIndex] = node.moduleIndex() lv = communities.values() nx.set_node_attributes(G, 'community' + '__' + str(detectionalgo), communities) #print tree.numTopModules() for n in G.nodes(): if n in communities: G.node[n]['community_index_label' + '__' + str(detectionalgo)] = str( communities[n]) + '__' + str(n) #print G.nodes(data=True) #return communities #tree.numTopModules() #print list(set(communities.values())) G = rolescartography(G, str(detectionalgo)) G = nx_old.write_gml( G, '/media/mukherjee/My Book/OpenStack/WeightedDSM/Idea5_weighted_dsm_cartography/' + str(gml_file.split('/')[-1]))
def gen_null_network_from_raw_data(f1, destdir, n0, n1, n2, n3, nsim, yearstr, gml1): data1 = open(f1, 'r') dict_id_name = defaultdict(list) G = nx_old.read_gml(gml1) for n in G.nodes(): try: committer_name = str(G.node[n]['label']) lines_of_code_added_sum = str(G.node[n]['lines_of_code_added_sum']) tenure_committer = str(G.node[n]['tenure_committer']) avg_MI_committer = str(G.node[n]['avg_MI_committer']) dict_id_name[ committer_name] = lines_of_code_added_sum + '|' + tenure_committer + '|' + avg_MI_committer except KeyError, e: continue
def create_output_from_gml(input_gml_file, output_txt) : outf = open(str(output_txt), 'w') print >> outf, 'id|nrole|degree|betweenness_centrality|community_index_infm|kshell_index|eigenvector_centrality' H = nx_old.read_gml(input_gml_file) ## Compute eigenvector centrality of nodes eigen_dictionary = nx.eigenvector_centrality(H) for u,v in eigen_dictionary.items() : H.node[u]['eigenvector_centrality'] = float(v) for n in H.nodes() : node_id = H.node[n]['label'] node_deg = H.node[n]['degree'] node_bc = H.node[n]['betweenness_centrality'] node_com = H.node[n]['community_index_infm'] node_role = H.node[n]['nrole'] node_kshell = H.node[n]['kshell_index'] node_ec = H.node[n]['eigenvector_centrality'] print >> outf, '%s|%s|%s|%s|%s|%s|%s' % (node_id, node_role, node_deg, node_bc, node_com, node_kshell, node_ec)
def measure_developer_attributes(fadd, frem, fcompl, yearstr, gml1): ''' Usage: fadd : The file containing lines of code added by developers frem: The file containing lines of code removed by developers fcompl: The file containing code complexity yearstr: Which year you want to generate the network gml1: the gml file where we store the network with edge and node attributes ''' from datetime import datetime import time data_add = open(fadd, 'r') data_rem = open(frem, 'r') G = nx_old.read_gml(gml1) data_compl = open(fcompl, 'r') dict_committer_code_add = defaultdict(list) dict_committer_code_rem = defaultdict(list) dict_committer_code_CC = defaultdict(list) dict_committer_code_HV = defaultdict(list) dict_committer_code_MI = defaultdict(list) dict_committer_tenure = defaultdict(list) dict_committer_codes_complexity = defaultdict(list) ### file with lines of code added info for line in data_add.readlines()[1:]: line = line.strip().split('|') committer_name = line[6].replace(' ', '_').replace('-', '_').replace( '__', '_').split('_(')[0] if " -" in str(line[8]): time_of_commit = str(line[8].split(', ')[1].split(' -')[0]) if " +" in str(line[8]): time_of_commit = str(line[8].split(', ')[1].split(' +')[0]) year = time_of_commit.split(' ')[2] if int(year) == int(yearstr): dict_committer_tenure[str(committer_name)].append(time_of_commit) dict_committer_code_add[str(committer_name)].append(int(line[0])) ### file with lines of code removed info for line in data_rem.readlines()[1:]: line = line.strip().split('|') committer_name = line[6].replace(' ', '_').replace('-', '_').replace( '__', '_').split('_(')[0] if " -" in str(line[8]): time_of_commit = str(line[8].split(', ')[1].split(' -')[0]) if " +" in str(line[8]): time_of_commit = str(line[8].split(', ')[1].split(' +')[0]) year = time_of_commit.split(' ')[2] if int(year) == int(yearstr): dict_committer_code_rem[str(committer_name)].append(int(line[0])) ##get matching of above two dataset dict_commiter_info = defaultdict(list) for keys, values in dict_committer_code_add.items(): if str(keys) in dict_committer_code_rem: total_lines_of_code_added = np.sum(values) avg_lines_of_code_added = np.mean(values) total_lines_of_code_removed = np.sum( dict_committer_code_rem[str(keys)]) avg_lines_of_code_removed = np.sum( dict_committer_code_rem[str(keys)]) time_first_committed = datetime.strptime( str(min([x for x in dict_committer_tenure[str(keys)]])), '%d %b %Y %H:%M:%S') time_last_committed = datetime.strptime( str(max([x for x in dict_committer_tenure[str(keys)]])), '%d %b %Y %H:%M:%S') tenure_committer = abs( time.mktime(time_first_committed.timetuple()) - time.mktime(time_last_committed.timetuple())) * 1.0 / (60 * 60 * 24) total_num_committs = len(dict_committer_tenure[str(keys)]) dict_commiter_info[str( keys)] = str(total_lines_of_code_added) + '|' + str( avg_lines_of_code_added ) + '|' + str(total_lines_of_code_removed) + '|' + str( avg_lines_of_code_removed) + '|' + str( tenure_committer) + '|' + str(total_num_committs) for line in data_compl.readlines()[1:]: line = line.strip().split('|') committer_name = str(line[4].replace(' ', '_').replace('-', '_').replace( '__', '_').split('_(')[0]) commit_id_code = str(line[0]) new_line = line[8].split('+')[1].split(',') if len(new_line) > 1: start_line = int(new_line[0]) end_line = start_line + int(new_line[1]) year = int(line[9]) if year == int(yearstr): dict_committer_codes_complexity[ commit_id_code + '|' + committer_name] = line[11] + '|' + line[18] + '|' + line[19] for keys, values in dict_committer_codes_complexity.items(): committer_name = keys.split('|')[1] MI = values.split('|')[0] HV = values.split('|')[1] CC = values.split('|')[2] dict_committer_code_MI[str(committer_name)].append(float(MI)) dict_committer_code_HV[str(committer_name)].append(float(HV)) dict_committer_code_CC[str(committer_name)].append(float(CC)) dict_commiter_info_compl = defaultdict(list) for keys, values in dict_committer_code_MI.items(): avg_MI_committer = np.mean(values) avg_HV_committer = np.mean(dict_committer_code_HV[str(keys)]) avg_CC_committer = np.mean(dict_committer_code_CC[str(keys)]) if str(keys) in dict_commiter_info: info = dict_commiter_info[str(keys)] dict_commiter_info_compl[str( keys)] = info + '|' + str(avg_MI_committer) + '|' + str( avg_HV_committer) + '|' + str(avg_CC_committer) for nodes in G.nodes(): if str(G.node[nodes]['label']) in dict_commiter_info_compl: info = dict_commiter_info_compl[str(G.node[nodes]['label'])] lines_of_code_added_sum = info.split('|')[0] lines_of_code_added_avg = info.split('|')[1] lines_of_code_removed_sum = info.split('|')[2] lines_of_code_removed_mean = info.split('|')[3] tenure_committer = info.split('|')[4] total_num_committs = info.split('|')[5] MI = info.split('|')[6] HV = info.split('|')[7] CC = info.split('|')[8] G.node[nodes]['lines_of_code_added_sum'] = float( lines_of_code_added_sum) G.node[nodes]['lines_of_code_added_avg'] = float( lines_of_code_added_avg) G.node[nodes]['lines_of_code_removed_sum'] = float( lines_of_code_removed_sum) G.node[nodes]['lines_of_code_removed_mean'] = float( lines_of_code_removed_mean) G.node[nodes]['tenure_committer'] = float(tenure_committer) G.node[nodes]['total_num_committs'] = float(total_num_committs) G.node[nodes]['avg_MI_committer'] = float(MI) G.node[nodes]['avg_HV_committer'] = float(HV) G.node[nodes]['avg_CC_committer'] = float(CC) ### triangles ### for node, val in nx.triangles(G).items(): G.node[node]['triangle'] = float(val) ### local clustering coefficient ### weightlists = [ 'wt_n_com_code', 'mean_spatial_inter', 'std_spatial_inter', 'median_spatial_inter', 'diff_90_10_spatial', 'wt_hm_diff_first_commit_time', 'wt_hsum_diff_first_commit_time', 'wt_hm_diff_last_commit_time', 'wt_hsum_diff_last_commit_time', 'wt_mean_joint_commit', 'wt_sum_joint_commit', 'wt_mu_inter_commit_time', 'wt_std_inter_commit_time' ] for wtl in weightlists: for node, val in nx.clustering(G, weight=str(wtl)).items(): G.node[node]['LCC' + '_' + str(wtl)] = float(val) list_attr_dv = [ 'lines_of_code_added_sum', 'lines_of_code_added_avg', 'lines_of_code_removed_sum', 'lines_of_code_removed_mean', 'total_num_committs' ] G = nx_old.write_gml(G, str(gml1))
def gen_visibility_matrix_dsm_adjacency(sourcedir, destdir, n1, n2): """ generate the visibility matrix of DSM and evaluate the propagation cost as defined by Baldwin """ globgml = glob.glob(sourcedir + 'nova-*.gml') outfile = open( destdir + 'commitID_propagation_costs__2012' + '__' + str(n1) + '__' + str(n2) + '.txt', 'w') for gml_file in globgml[int(n1):int(n2)]: #print gml_file commitid = gml_file.split('/')[-1][:-4].split('-')[1] G = nx_old.read_gml(gml_file) #### get the visibility matrix from DSM ### nodelist = G.nodes() path_to_descedants = [] for n in nodelist: ## Get the list of descendants of a node. desc = nx.descendants(G, n) path_to_descedants.append(len(list(desc))) power = max(path_to_descedants) adjacency_matrix = nx.to_numpy_matrix( G, weight=None) ## Adjacency matrix of directed graph adjacency_matrix_wt = nx.to_numpy_matrix( G, weight='weight') ## Weighted Adjacency matrix of directed graph M1 = adjacency_matrix ## Visibility matrix visibilityM = 0 visibilityM_w = 0 for k in range(power): visibilityM += M1**int(k) visibilityM_w += adjacency_matrix_wt**int(k) ## Propagation cost column_sums = [ sum([row[i] for row in visibilityM]) for i in range(0, len(visibilityM[0])) ] propagation_cost_dsm = np.sum(column_sums) * 1.0 / (int(len(G.nodes())) **2) wcolumn_sums = [ sum([row[i] for row in visibilityM_w]) for i in range(0, len(visibilityM_w[0])) ] propagation_cost_dsm_wc = np.sum(wcolumn_sums) * 1.0 / (int( len(G.nodes()))**2) print >> outfile, '%s|%s|%s|%s|%s|%s|%s' % ( commitid, propagation_cost_dsm, propagation_cost_M1_col, propagation_cost_M5_col, propagation_cost_M8_col, propagation_cost_M9_col, propagation_cost_M10_col)
def plot_network_nx(graphname, iter1, sizeres, edgewidth): from networkx.drawing.nx_agraph import graphviz_layout G = nx_old.read_gml(graphname) fig = plt.figure() ax = fig.add_subplot(111) pos2 = nx.spring_layout(G,iterations=int(iter1)) #pos2 = graphviz_layout(G, prog='neato') nodelist_firm = []; nodelist_ind = []; nodelist_distr = [] nodelist_firm_clr = []; nodelist_ind_clr = []; nodelist_distr_clr = [] nodelist_firm_sz = []; nodelist_ind_sz = []; nodelist_distr_sz = [] nodekshell = []; nodesize = []; nodelist = []; nodesattr = []; colorf = []; colord = []; colori = [] for n in G.nodes() : nodekshell.append(int(G.node[n]['kshell_index'])) nodesize.append(100*(1+np.log(float(G.node[n]['degree'])))) nodelist.append(int(G.node[n]['id'])) nodesattr.append((G.node[n]['id'], int(sizeres)*(np.log10(1+float(G.node[n]['degree']))), G.node[n]['kshell_index'], G.node[n]['node_type'])) print min(nodekshell), max(nodekshell), nodekshell for (ind, sz, clr, type1) in nodesattr : if str(type1) == "distributor" : nodelist_distr.append(ind) nodelist_distr_sz.append(sz) nodelist_distr_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell))) if str(type1) == "independent" : nodelist_ind.append(ind) nodelist_ind_sz.append(sz) nodelist_ind_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell))) if str(type1) == "firm" : nodelist_firm.append(ind) nodelist_firm_sz.append(sz) nodelist_firm_clr.append(float(clr-min(nodekshell))*1.0/float(max(nodekshell) - min(nodekshell))) for colors in nodelist_firm_clr : colorf.append(plt.cm.jet(colors)) for colors in nodelist_distr_clr : colord.append(plt.cm.jet(colors)) for colors in nodelist_ind_clr : colori.append(plt.cm.jet(colors)) #cax = ax.imshow([nodekshell],cmap=plt.cm.jet,interpolation="nearest") #cbar = plt.colorbar(cax) #cbar.set_label('k', size=10) # define the colormap #cmap = plt.cm.jet # extract all colors from the map #cmaplist = [cmap(i) for i in range(cmap.N)] # force the first color entry to be grey #cmaplist[0] = (.5,.5,.5,1.0) # create the new map #cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N) # define the bins and normalize #bounds = list(set(nodekshell)) #bounds.sort() #norm = mpl.colors.BoundaryNorm(bounds, cmap.N) #cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm, spacing='proportional', ticks=bounds, boundaries=bounds, format='%1i') #cmap = plt.cm.get_cmap('jet', len(list(set(nodekshell)))) #cax = ax.imshow([nodekshell],cmap=cmap,interpolation="bilinear") #cbar = plt.colorbar(cax) #cbar.set_ticks([]) nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_firm, node_size = nodelist_firm_sz, node_shape = 'o', node_color=colorf, cmap=plt.cm.jet, linewidths=None) nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_ind, node_size = nodelist_ind_sz, node_shape = 'o', node_color=colori, cmap=plt.cm.jet, linewidths=None) nx.draw_networkx_nodes(G,pos2,nodelist=nodelist_distr, node_size = nodelist_distr_sz, node_shape = 'o', node_color=colord, cmap=plt.cm.jet, linewidths=None) nx.draw_networkx_edges(G,pos2,edgelist=None,width=int(edgewidth), edge_color='k',style='solid',alpha=0.35) plt.axis('off') plt.show()
def null_model_shuffle_edge_weights(gml1, year, nsim): ## Here we shuffle the weight of edges (temporal and spatial). Thus two edge-pairs (u1,v1) of weight w1 ## and (u2, v2) of weight w2 shuffled and resultant weighted edge-pairs are (u1, v1, w1) and (u2, v2, w2) ## Then we get the strength of the nodes as per the weighted spatial and temporal edges ##create the destination folder ## destdir = '../../Idea03_06/Data/Null/networks_from_edge_val_shuffle/' + str( year) + '/' for i in range(0, int(nsim) + 1, 1): G = nx_old.read_gml(gml1) ## generate a null network and add the links from the observed network G_null = nx.Graph() spatial_list_edge = [] temporal_list_edge = [] edge_list = [] committer_names_list_with_attr = [] for (u, v, d) in G.edges(data=True): spatial_list_edge.append(d['mean_spatial_inter']) temporal_list_edge.append(d['wt_mu_inter_commit_time']) edge_list.append((G.node[u]['label'], G.node[v]['label'])) shuffle(spatial_list_edge, random) shuffle(temporal_list_edge, random) #shuffle_spatial = sample(spatial_list_edge, len(spatial_list_edge)) #shuffle_temporal = sample(temporal_list_edge, len(temporal_list_edge)) z = zip(edge_list, spatial_list_edge, temporal_list_edge) G_null.add_weighted_edges_from([(z1[0], z1[1], z2) for (z1, z2, z3) in z], weight="mean_spatial_inter") G_null.add_weighted_edges_from([(z1[0], z1[1], z3) for (z1, z2, z3) in z], weight="wt_mu_inter_commit_time") weightlists = ["mean_spatial_inter", "wt_mu_inter_commit_time"] for wtl in weightlists: for u, v in nx.degree(G_null, weight=str(wtl)).items(): G_null.node[u]['s' + '_' + str(wtl)] = float(v) dict_id_name = defaultdict(list) for n in G.nodes(): committer_name = str(G.node[n]['label']) lines_of_code_added_sum = str(G.node[n]['lines_of_code_added_sum']) tenure_committer = str(G.node[n]['tenure_committer']) avg_MI_committer = str(G.node[n]['avg_MI_committer']) dict_id_name[ committer_name] = lines_of_code_added_sum + '|' + tenure_committer + '|' + avg_MI_committer for n in G_null.nodes(): if str(n) in dict_id_name: vals = dict_id_name[str(n)] G_null.node[n]['label'] = str(n) G_null.node[n]['lines_of_code_added_sum'] = float( vals.split('|')[0]) G_null.node[n]['tenure_committer'] = float(vals.split('|')[1]) G_null.node[n]['avg_MI_committer'] = float(vals.split('|')[2]) gmlw = destdir + 'sim__' + str(i) + '.gml' G_null = nx_old.write_gml(G_null, str(gmlw))