示例#1
0
def contribution_percentage(result=None):
    if result is None:
        result = utils.load_result_pkl(connectivity_file)
    contributions = {}
    max_k = max(flatten([result[year]['k_components'].keys()
                            for year in result]))
    for G in networks_by_year():
        year = G.graph['year']
        contributions[year] = {}
        devs = set(n for n, d in G.nodes(data=True) if d['bipartite']==1)
        total = float(sum(G.degree(devs, weight='weight').values()))
        all_devs = float(len(devs))
        contributions[year]['total'] = (all_devs, 1, total, 1, total/all_devs)
        kcomps = result[year]['k_components']
        for k in range(2, max_k + 1):
            if k not in kcomps:
                contributions[year][k] = (0, 0, 0, 0, 0)
            else:
                nodes_at_k = set.union(*[nodes[1] for nodes in kcomps[k]])
                devs_at_k = nodes_at_k & devs
                if not devs_at_k:
                    print("No developers at level {0} in year {1}".format(k, year))
                    continue
                n_at_k = float(len(devs_at_k))
                contrib_at_k = sum(G.degree(devs_at_k, weight='weight').values())
                contributions[year][k] = (len(devs_at_k),
                                             (len(devs_at_k) / all_devs) * 100,
                                             contrib_at_k,
                                             (contrib_at_k / total) * 100,
                                             contrib_at_k / n_at_k)
    return contributions
示例#2
0
def get_developers_top_connectivity_by_year(G, year, connectivity=None):
    if connectivity is None:
        connectivity = utils.load_result_pkl(connectivity_file)
    all_devs = set(n for n, d in G.nodes(data=True) if d['bipartite']==1)
    kcomponents = connectivity[year]['k_components']
    max_k = max(kcomponents)
    nodes = set.union(*[c[1] for c in kcomponents[max_k]])
    return set(n for n in nodes if n in all_devs)
示例#3
0
def get_layouts(project_name, kind):
    if project_name == 'python':
        if kind == 'years':
            result = utils.load_result_pkl(python_layouts_years_file)
        elif kind == 'releases':
            result = utils.load_result_pkl(python_layouts_releases_file)
        else:
            raise Exception('Unknown kind {}'.format(kind))
    elif project_name == 'debian':
        if kind == 'years':
            result = utils.load_result_pkl(debian_layouts_years_file)
        elif kind == 'releases':
            result = utils.load_result_pkl(debian_layouts_releases_file)
        else:
            raise Exception('Unknown kind {}'.format(kind))
    else:
        raise Exception('Unknown project name {}'.format(project_name))
    return result
示例#4
0
def get_structural_cohesion_results(project_name, kind):
    if project_name == 'python':
        if kind == 'years':
            result = utils.load_result_pkl(python_connectivity_years_file)
        elif kind == 'releases':
            result = utils.load_result_pkl(python_connectivity_releases_file)
        else:
            raise Exception('Unknown kind {}'.format(kind))
    elif project_name == 'debian':
        if kind == 'years':
            result = utils.load_result_pkl(debian_connectivity_years_file)
        elif kind == 'releases':
            result = utils.load_result_pkl(debian_connectivity_releases_file)
        else:
            raise Exception('Unknown kind {}'.format(kind))
    else:
        raise Exception('Unknown project name {}'.format(project_name))
    return result
示例#5
0
def get_all_developers_top_connectivity(devs_by_year=None, connectivity=None):
    if devs_by_year is None:
        devs_by_year = get_developers_by_years()
    if connectivity is None:
        connectivity = utils.load_result_pkl(connectivity_file)
    all_devs = set.union(*[v for k, v in devs_by_year.items()])
    top_devs = set()
    for year in connectivity:
        kcomponents = connectivity[year]['k_components']
        max_k = max(kcomponents)
        nodes = set.union(*[c[1] for c in kcomponents[max_k]])
        top_devs.update(n for n in nodes if n in all_devs)
    return top_devs
示例#6
0
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'):
    ids = utils.UniqueIdGenerator()
    peps = [pep for pep in get_peps() if pep.created is not None]
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    networks_gen = networks_by_year()
    skip = next(networks_gen)
    networks = list(networks_gen)
    years = range(1992, 2015)
    devs_by_year = get_developers_by_years(networks=networks)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps',
            'is_delegate', 'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'degree', 'contributions_sc', 'contributions_edits',
            'contributions_added', 'contributions_deleted',
            'collaborators', 'knum', 'aknum', 'top', 'top2',
            'tenure', 'betweenness', 'closeness', 'degree_cent',
            'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red',
        ])
        for year, G in zip(years, networks):
            print("Analyzing {}".format(G.name))
            bdfl_delegates = get_delegates_by_year(year, peps=peps)
            peps_this_year = peps_by_developer_that_year(year, peps=peps)
            peps_until_year = peps_by_developer_until_year(year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps)
            top = get_developers_top_connectivity_by_year(G, year,
                                                          connectivity=connectivity)
            top2 = get_developers_top_connectivity_by_year_new(G, year,
                                                               connectivity=connectivity)
            devs = devs_by_year[year]
            tenure = compute_tenure_by_year(year, networks=networks)
            k_num = connectivity[year]['k_num']
            bet = normalize(centrality[year]['bet'])
            clos = normalize(centrality[year]['clos'])
            deg = normalize(centrality[year]['deg'])
            clus_sq = nx.square_clustering(G)
            clus_dot = bp.clustering(G)
            clus_red = bp.node_redundancy(G)
            for dev in devs:
                out.writerow([
                    ids[dev],
                    year,
                    dev.encode('utf8'),
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    len(G[dev]), #G.degree(dev, weight=None),
                    G.degree(dev, weight='weight'), # lines of code added plus deleted
                    G.degree(dev, weight='edits'), # number files edit
                    G.degree(dev, weight='added'), # lines of code added
                    G.degree(dev, weight='deleted'), # lines of code removed
                    second_order_nbrs(G, dev), # second order neighbors
                    k_num[dev][0], # k-component number
                    k_num[dev][1], # Average k-component number
                    1 if dev in top else 0, # top connectivity level
                    1 if dev in top2 else 0, # top 2 connectivity level
                    tenure[dev],
                    bet[dev],
                    clos[dev],
                    deg[dev],
                    sum(len(G[n]) for n in G[dev]) / float(len(G[dev])),
                    clus_sq[dev],
                    clus_dot[dev],
                    clus_red[dev],
                ])
示例#7
0
def build_survival_data_frame(fname=survival_file):
    nan = float('nan')
    ids = utils.UniqueIdGenerator()
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    peps = [pep for pep in get_peps() if pep.created is not None]
    networks = list(networks_by_year())
    devs = get_developers_by_years(networks=networks)
    skip = networks.pop(0) # skip 1991
    G_start = networks.pop(0) # start with 1992
    devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1)
    years = range(1993, 2015)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'dev', 'period', 'rstart', 'rstop', 'status',
            'has_written_peps', 'has_written_acc_peps',
            'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'biconnected', 'top', 'tenure', 'colaborators',
            'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red',
            'degree', 'contributions', 'dcentrality',
            'betweenness', 'closeness',
        ])
        previous_devs = devs_start
        previous_year = 1992
        previous_G = G_start
        for i, (year, G) in enumerate(zip(years, networks)):
            print("processing year {}".format(previous_year))
            clus_sq = nx.square_clustering(previous_G)
            these_devs = devs[year]
            remaining_devs = get_all_remaining_devs(devs, years[i:])
            top_devs = get_developers_top_connectivity(
                connectivity[previous_year]['k_components'], 
                previous_devs)
            tenure = compute_tenure_by_year(previous_year)
            bet = normalize(centrality[previous_year]['bet'])
            clos = normalize(centrality[previous_year]['bet'])
            deg = normalize(centrality[previous_year]['deg'])
            clus_sq = nx.square_clustering(previous_G)
            clus_dot = bp.clustering(previous_G)
            clus_red = bp.node_redundancy(previous_G)
            peps_this_year = peps_by_developer_that_year(previous_year, peps=peps)
            peps_until_year = peps_by_developer_until_year(previous_year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps)
            for dev in previous_devs:
                out.writerow([
                    ids[dev], # developer numerical ID
                    dev.encode('utf8'), # developer name
                    i + 1, # period
                    i, # start
                    i + 1, # stop
                    0 if dev in remaining_devs else 1, # status (censored)
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected
                    0 if dev not in top_devs else 1, # member of the top connectivity level
                    tenure[dev], # tenure in years
                    second_order_nbrs(previous_G, dev), # collaborators
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum
                    clus_sq.get(dev, nan),
                    clus_dot.get(dev, nan),
                    clus_red.get(dev, nan),
                    previous_G.degree(dev), # degree
                    previous_G.degree(dev, weight='weight'), # contributions
                    deg.get(dev, nan),
                    bet.get(dev, nan),
                    clos.get(dev, nan),
            ])
            previous_devs = these_devs
            previous_year = year
            previous_G = G