def analysis(graph, seed, calc_nrd=True, calc_ncc=True, calc_depth=True): """ Computes and returns a number of statistics on the graph """ logging.info('Computing Statistics') depth, nrd, ncc = {}, {}, {} if calc_depth: depth = _bfs_depth(graph, seed) if calc_nrd: nrd = bipartite.node_redundancy(graph) if calc_ncc: ncc = bipartite.clustering(graph, mode='min') for id in graph.nodes(): node = graph.node[id] if calc_depth: node['depth'] = depth[id] if calc_nrd: node['nrd'] = nrd[id] if calc_ncc: node['ncc'] = ncc[id] graph.node[id] = node return graph
def analysis(graph, seed, calc_nrd=True, calc_ncc=True, calc_depth=True): """ Computes and returns a number of statistics on the graph """ logging.info('Computing Statistics') depth, nrd, ncc = {}, {}, {} if calc_depth: depth = _bfs_depth(graph, seed) if calc_nrd: nrd = bipartite.node_redundancy(graph) if calc_ncc: ncc = bipartite.clustering(graph, mode='min') for id in graph.nodes(): node = graph.node[id] if calc_depth: node['depth'] = depth[id] if calc_nrd: node['nrd'] = nrd[id] if calc_ncc: node['ncc'] = ncc[id] graph.node[id] = node return graph
def analyze_graph(G): #centralities and node metrics out_degrees = G.out_degree() in_degrees = G.in_degree() betweenness = nx.betweenness_centrality(G) eigenvector = nx.eigenvector_centrality_numpy(G) closeness = nx.closeness_centrality(G) pagerank = nx.pagerank(G) avg_neighbour_degree = nx.average_neighbor_degree(G) redundancy = bipartite.node_redundancy(G) load = nx.load_centrality(G) hits = nx.hits(G) vitality = nx.closeness_vitality(G) for name in G.nodes(): G.node[name]['out_degree'] = out_degrees[name] G.node[name]['in_degree'] = in_degrees[name] G.node[name]['betweenness'] = betweenness[name] G.node[name]['eigenvector'] = eigenvector[name] G.node[name]['closeness'] = closeness[name] G.node[name]['pagerank'] = pagerank[name] G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name] G.node[name]['redundancy'] = redundancy[name] G.node[name]['load'] = load[name] G.node[name]['hits'] = hits[name] G.node[name]['vitality'] = vitality[name] #communities partitions = community.best_partition(G) for member, c in partitions.items(): G.node[member]['community'] = c return G
def test_redundant_nodes(): G = cycle_graph(6) edge = {0, 3} G.add_edge(*edge) redundancy = node_redundancy(G) for v in edge: assert redundancy[v] == 2 / 3 for v in set(G) - edge: assert redundancy[v] == 1
def test_redundant_nodes(): G = cycle_graph(6) edge = set([0, 3]) G.add_edge(*edge) redundancy = node_redundancy(G) for v in edge: assert_equal(redundancy[v], 2 / 3) for v in set(G) - edge: assert_equal(redundancy[v], 1)
def test_not_enough_neighbors(): with pytest.raises(NetworkXError): G = complete_bipartite_graph(1, 2) node_redundancy(G)
def test_no_redundant_nodes(): G = complete_bipartite_graph(2, 2) rc = node_redundancy(G) assert all(redundancy == 1 for redundancy in rc.values())
def test_not_enough_neighbors(): G = complete_bipartite_graph(1, 2) node_redundancy(G)
lineArr.append(line) length = len(line.split(",")) if length != 8: continue; srcIp = line.split(",")[1] destIp = line.split(",")[length -2] edgeArr.append((srcIp,destIp)) print('File Read , Now Creating Graph for Day val = ',dayVal) G.add_edges_from(edgeArr) print('Edges created') # redundancyMap = bipartite.node_redundancy(G) redundancyMap = bipartite.node_redundancy(G) validIps = {} for el in redundancyMap: if redundancyMap[el] != 0.0: validIps[el] = True # print(redundancyMap) print('Redundancy done for Day = ',dayVal) print('Now writing into the file for day = ',dayVal) for line in lineArr: srcIp = line.split(",")[1] destIp = line.split(",")[length -2] if srcIp in validIps or destIp in validIps: writeFile.write(line); else: writeFile2.write(line)
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'): ids = utils.UniqueIdGenerator() peps = [pep for pep in get_peps() if pep.created is not None] connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) networks_gen = networks_by_year() skip = next(networks_gen) networks = list(networks_gen) years = range(1992, 2015) devs_by_year = get_developers_by_years(networks=networks) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps', 'is_delegate', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'degree', 'contributions_sc', 'contributions_edits', 'contributions_added', 'contributions_deleted', 'collaborators', 'knum', 'aknum', 'top', 'top2', 'tenure', 'betweenness', 'closeness', 'degree_cent', 'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red', ]) for year, G in zip(years, networks): print("Analyzing {}".format(G.name)) bdfl_delegates = get_delegates_by_year(year, peps=peps) peps_this_year = peps_by_developer_that_year(year, peps=peps) peps_until_year = peps_by_developer_until_year(year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps) top = get_developers_top_connectivity_by_year(G, year, connectivity=connectivity) top2 = get_developers_top_connectivity_by_year_new(G, year, connectivity=connectivity) devs = devs_by_year[year] tenure = compute_tenure_by_year(year, networks=networks) k_num = connectivity[year]['k_num'] bet = normalize(centrality[year]['bet']) clos = normalize(centrality[year]['clos']) deg = normalize(centrality[year]['deg']) clus_sq = nx.square_clustering(G) clus_dot = bp.clustering(G) clus_red = bp.node_redundancy(G) for dev in devs: out.writerow([ ids[dev], year, dev.encode('utf8'), 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep 1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. len(G[dev]), #G.degree(dev, weight=None), G.degree(dev, weight='weight'), # lines of code added plus deleted G.degree(dev, weight='edits'), # number files edit G.degree(dev, weight='added'), # lines of code added G.degree(dev, weight='deleted'), # lines of code removed second_order_nbrs(G, dev), # second order neighbors k_num[dev][0], # k-component number k_num[dev][1], # Average k-component number 1 if dev in top else 0, # top connectivity level 1 if dev in top2 else 0, # top 2 connectivity level tenure[dev], bet[dev], clos[dev], deg[dev], sum(len(G[n]) for n in G[dev]) / float(len(G[dev])), clus_sq[dev], clus_dot[dev], clus_red[dev], ])
def build_survival_data_frame(fname=survival_file): nan = float('nan') ids = utils.UniqueIdGenerator() connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) peps = [pep for pep in get_peps() if pep.created is not None] networks = list(networks_by_year()) devs = get_developers_by_years(networks=networks) skip = networks.pop(0) # skip 1991 G_start = networks.pop(0) # start with 1992 devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1) years = range(1993, 2015) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'dev', 'period', 'rstart', 'rstop', 'status', 'has_written_peps', 'has_written_acc_peps', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'biconnected', 'top', 'tenure', 'colaborators', 'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red', 'degree', 'contributions', 'dcentrality', 'betweenness', 'closeness', ]) previous_devs = devs_start previous_year = 1992 previous_G = G_start for i, (year, G) in enumerate(zip(years, networks)): print("processing year {}".format(previous_year)) clus_sq = nx.square_clustering(previous_G) these_devs = devs[year] remaining_devs = get_all_remaining_devs(devs, years[i:]) top_devs = get_developers_top_connectivity( connectivity[previous_year]['k_components'], previous_devs) tenure = compute_tenure_by_year(previous_year) bet = normalize(centrality[previous_year]['bet']) clos = normalize(centrality[previous_year]['bet']) deg = normalize(centrality[previous_year]['deg']) clus_sq = nx.square_clustering(previous_G) clus_dot = bp.clustering(previous_G) clus_red = bp.node_redundancy(previous_G) peps_this_year = peps_by_developer_that_year(previous_year, peps=peps) peps_until_year = peps_by_developer_until_year(previous_year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps) for dev in previous_devs: out.writerow([ ids[dev], # developer numerical ID dev.encode('utf8'), # developer name i + 1, # period i, # start i + 1, # stop 0 if dev in remaining_devs else 1, # status (censored) 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. 0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected 0 if dev not in top_devs else 1, # member of the top connectivity level tenure[dev], # tenure in years second_order_nbrs(previous_G, dev), # collaborators connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum clus_sq.get(dev, nan), clus_dot.get(dev, nan), clus_red.get(dev, nan), previous_G.degree(dev), # degree previous_G.degree(dev, weight='weight'), # contributions deg.get(dev, nan), bet.get(dev, nan), clos.get(dev, nan), ]) previous_devs = these_devs previous_year = year previous_G = G
lineArr.append(line) length = len(line.split(",")) if length != 8: continue srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] edgeArr.append((srcIp, destIp)) print('File Read , Now Creating Graph for Day val = ', dayVal) G.add_edges_from(edgeArr) print('Edges created') # redundancyMap = bipartite.node_redundancy(G) redundancyMap = bipartite.node_redundancy(G) validIps = {} for el in redundancyMap: if redundancyMap[el] != 0.0: validIps[el] = True # print(redundancyMap) print('Redundancy done for Day = ', dayVal) print('Now writing into the file for day = ', dayVal) for line in lineArr: srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] if srcIp in validIps or destIp in validIps: writeFile.write(line) else: writeFile2.write(line)