Пример #1
0
def project_participation_evolution(
        pm_frame, all_authors, n=2, skip_anon=True, research_only=False):
    """Assembles data on participation to projects with n as thresh.
    Returns DataFrame, index, selection and title for data for use
    by stacked bar-plot and heatmap functions."""
    if not research_only:
        thread_type = 'all threads'
        data, _ = get_last(pm_frame, thread_type)
        all_authors = list(all_authors)
        title = "Participation per project in Polymath\
                 (threshold = {})".format(n)
    else:
        thread_type = 'research threads'
        data, _ = get_last(pm_frame, thread_type)
        all_authors = set().union(
            *data['research threads', 'authors (accumulated)'])
        title = "Participation per project in Polymath\
                 (threshold = {}, only research-threads)".format(n)
    data.index = data.index.droplevel(1)
    author_project = DataFrame(columns=all_authors)
    for author in author_project.columns:
        author_project[author] = data[
            thread_type, 'authors (accumulated)'].apply(
                lambda project, author=author: author in project)
    author_project = author_project.T
    author_project = author_project.sort_values(by=data.index.tolist(),
                                                ascending=False)
    author_project = author_project.drop(
        "Anonymous") if skip_anon else author_project
    select = author_project.sum(axis=1) >= n
    return author_project, data.index, select, title
Пример #2
0
def plot_community_evolution(pm_frame, project, thread_type):
    """Area_plot of current, joined and left per project or thread.
    thread_type is 'all threads', 'research threads', or 'discussion
    threads'"""
    if not thread_type:
        logging.warning("Need explicit thread type")
        return
    try:
        int(project.split()[-1])
        as_threads = True
    except ValueError:
        as_threads = False
    if as_threads:
        data = pm_frame[['basic', thread_type]].loc[project].dropna()
        added, removed = _added_removed(data, thread_type, 'authors')
        size = data[thread_type, 'authors'].apply(len) - added
        df = DataFrame({'joined': added, 'left': removed, 'current': size},
                       columns=["joined", "current", "left"], index=data.index)
        df.index = range(len(df))
        try:
            assert np.all(df == df.dropna())
        except AssertionError:
            logging.warning("Some nan-values still present")
        df.index.name = "Threads"
    else:
        data, positions = get_last(pm_frame, thread_type)
        added, removed = _added_removed(
            data, thread_type, 'authors (accumulated)')
        size = data[thread_type, 'authors (accumulated)'].dropna().apply(
            len) - added
        df = DataFrame({'joined': added, 'left': removed, 'current': size},
                       columns=["joined", "current", "left"])
        df.index = range(1, len(positions) + 1)
    mpl.style.use(SBSTYLE)
    axes = df.plot(kind="area", title="Community Evolution in {} ({})".format(
        project, thread_type),
        color=['sage', 'lightgrey', 'indianred'], stacked=True)
    axes.set_xticks(df.index)
    axes.xaxis.set_ticks_position('bottom')
    axes.yaxis.set_ticks_position('left')
    if as_threads:
        axes.set_xticklabels(data['basic', 'title'], rotation=90,
                             fontsize='small')
    else:
        xlabels = data.index.droplevel(1)
        axes.set_xticklabels(xlabels, rotation=90, fontsize='small')
    axes.set_ylabel('number of active commenters')