Exemplo n.º 1
0
 def plot_centre_dist(self, thresh=2, show_threads=True, **kwargs):
     """Plots time elapsed since last comment for each participant"""
     project, show, _ = ac.handle_kwargs(**kwargs)
     data_high, data_low = self.__get_centre_distances(
         thresh, split=True)
     # set up and create plots
     plt.style.use(SETTINGS['style'])
     _, axes = plt.subplots()
     colors_high = ac.color_list(
         self.author_frame.loc[data_high.columns, 'color'],
         SETTINGS['vmin'], SETTINGS['vmax'],
         cmap=CMAP)
     colors_low = ac.color_list(
         self.author_frame.loc[data_low.columns, 'color'],
         SETTINGS['vmin'], SETTINGS['vmax'],
         cmap=CMAP)
     data_high.plot(ax=axes, color=colors_high, legend=False)
     data_low.plot(ax=axes, alpha=.1, color=colors_low, legend=False)
     axes.set_ylabel("Days elapsed since last comment")
     axes.set_title("Distance from centre of discussion\n{}".format(
         project))
     axes.xaxis.set_ticks_position('bottom')
     axes.yaxis.set_ticks_position('left')
     if show_threads:
         self.__show_threads(axes)
     ac.show_or_save(show)
Exemplo n.º 2
0
 def plot_activity_prop(self, **kwargs):
     """Shows plot of number of comments (bar) and proportion
     level-1 / higher-level comment (line) for all authors"""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     plt.style.use(SETTINGS['style'])
     cols = self.author_frame.columns[
         self.author_frame.columns.str.startswith('level')].tolist()
     data = self.author_frame[cols].copy()
     data['proportion'] = (data[cols[1:]].sum(axis=1) /
                           data[cols].sum(axis=1))
     colors = [plt.cm.Set1(20 * i) for i in range(len(data.index))]
     axes = data[cols].plot(
         kind='bar', stacked=True, color=colors,
         title="Commenting activity and proportion of higher-level comments for {}".format(project).title(),
         fontsize=fontsize)
     axes.set_ylabel("Number of comments")
     axes.legend(bbox_to_anchor=(0.165, 1))
     axes2 = axes.twinx()
     axes2.set_ylabel("Proportion of Higher-level comments")
     axes2.plot(axes.get_xticks(), data['proportion'].values,
                linestyle=':', marker='.', markersize=10, linewidth=.7,
                color='darkgrey',
                fontsize=fontsize)
     the_lines = [mlines.Line2D([], [], color='gray', linestyle=':',
                                marker='.', markersize=10,
                                label="Proportion")]
     axes2.legend(handles=the_lines,
                  bbox_to_anchor=(1, 1))
     ac.show_or_save(show)
Exemplo n.º 3
0
 def plot_centre_closeness(self, thresh=10, ylim=None, **kwargs):
     """Boxplot of time before return to centre for core authors"""
     project, show, _ = ac.handle_kwargs(**kwargs)
     timestamps = self.author_frame['timestamps'].apply(np.array)
     try:
         timestamps.drop("Anonymous", inplace=True)
     except ValueError:
         pass
     delays = timestamps.apply(np.diff)
     delays = delays[delays.apply(len) >= thresh]
     to_days = np.vectorize(lambda x: x.total_seconds() / (60**2 * 24))
     delays = delays.map(to_days)
     plt.style.use(SETTINGS['style'])
     _, axes = plt.subplots()
     bplot = plt.boxplot(delays, sym='.',
                         showmeans=True, meanline=True)
     for key in ['whiskers', 'boxes', 'caps']:
         plt.setp(bplot[key], color='steelblue')
     plt.setp(bplot['means'], color="firebrick")
     axes.set_xticklabels(delays.index, rotation=40, ha='right')
     axes.set_xlabel("Participants with at least {} comments".format(
         thresh))
     axes.set_yticks(np.logspace(-1, 3, num=5, base=2))
     axes.set_ylabel("Delay in days")
     if ylim:
         axes.set_ylim(0, ylim)
     axes.set_title("Delays between comments in {}".format(project))
     axes.xaxis.set_ticks_position('bottom')
     axes.yaxis.set_ticks_position('left')
     ac.show_or_save(show)
Exemplo n.º 4
0
    def scatter_authors(self,
                        measure="betweenness centrality",
                        thresh=15, **kwargs):
        """Scatter-plot with position based on interaction and cluster
        measure, color based on number of comments, and size on avg comment
        length"""
        project, show, _ = ac.handle_kwargs(**kwargs)
        x_measure, y_measure = [" ".join([netw, measure]) for netw in
                                ["interaction", "cluster"]]
        axes = self.author_frame.plot(
            kind='scatter',
            x=x_measure, y=y_measure,
            c='total comments',
            s=self.author_frame['word counts'] / self.author_frame[
                'total comments'],
            cmap="viridis_r",
            sharex=False,
            title="Author-activity and centrality in {}".format(project))

        for name, data in self.author_frame.iterrows():
            if data['total comments'] >= thresh:
                axes.text(data[x_measure], data[y_measure], name,
                          fontsize=6)

        ac.fake_legend([50, 100, 250], title="Average wordcount of comments")
        ac.show_or_save(show)
Exemplo n.º 5
0
 def plot_centrality_measures(self,
                              g_type="interaction", measures=None,
                              delete_on=None, thresh=0, **kwargs):
     """Shows plot of degree_centrality for each author
     (only if first measure is non-zero)"""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     if not measures:
         measures = self.centr_measures
     centr_cols, centrality, means = self.__get_centrality_measures(
         g_type, measures)
     if delete_on is not None:
         centrality = centrality[centrality[centr_cols[delete_on]] > thresh]
     colors = ac.color_list(len(measures),
                            SETTINGS['vmin'], SETTINGS['vmax'],
                            factor=15)
     full_measure_names = centrality.columns
     centrality.columns = [
         col.replace(g_type + " ", "") for col in centrality.columns]
     plt.style.use(SETTINGS['style'])
     axes = centrality.plot(
         kind='bar', color=colors,
         title="Centrality-measures for {} ({}-graph)".format(
             project, g_type).title())
     for measure, color in zip(full_measure_names, colors):
         the_mean = means[measure]
         axes.lines.append(
             mlines.Line2D(
                 [-.5, len(centrality.index) - .5],
                 [the_mean, the_mean],
                 linestyle='-', linewidth=.5,
                 color=color, zorder=1,
                 transform=axes.transData))
     axes.set_xticklabels(centrality.index, fontsize=fontsize)
     ac.show_or_save(show)
Exemplo n.º 6
0
 def plot_author_activity_bar(self, what='by level', **kwargs):
     """Shows plot of number of comments / wordcount per author.
     what can be either 'by level' or 'word counts'"""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     plt.style.use(SETTINGS['style'])
     if what == "by level":
         levels, colors = self.__get_author_activity_bylevel()
         total_num_of_comments = int(levels.sum().sum())
         axes = levels.plot(
             kind='barh', stacked=True, color=colors,
             title='Comments per author (total: {})'.format(
                 total_num_of_comments),
             fontsize=fontsize)
         axes.set_yticklabels(levels.index, fontsize=fontsize)
     elif what == "word counts":
         word_counts = self.author_frame[what].sort_values(ascending=False)
         total_word_count = int(word_counts.sum())
         axes = word_counts.plot(
             kind='bar', logy=True,
             title='Word-count per author in {} (total: {})'.format(
                 project, total_word_count),
             fontsize=fontsize)
         axes.xaxis.set_ticks_position('bottom')
         axes.yaxis.set_ticks_position('left')
     else:
         raise ValueError
     ac.show_or_save(show)
Exemplo n.º 7
0
 def scatter_comments_replies(self, **kwargs):
     """Scatter-plot of comments vs direct replies received"""
     project, show, _ = ac.handle_kwargs(**kwargs)
     data = self.author_frame[['total comments',
                               'replies (direct)']]
     data.plot(
         kind='scatter',
         x="total comments", y='replies (direct)',
         sharex=False,
         title="total comments vs replies in {}".format(project))
     ac.show_or_save(show)
Exemplo n.º 8
0
 def plot_author_activity_hist(self,
                               what='total comments', bins=10,
                               **kwargs):
     """Shows plot of histogram of commenting activity.
        What can be either 'total comments' (default) or 'word counts'"""
     project, show, _ = ac.handle_kwargs(**kwargs)
     if what not in set(['total comments', 'word counts']):
         raise ValueError
     comments = self.author_frame[what]
     plt.style.use(SETTINGS['style'])
     _, axes = plt.subplots()
     comments.hist(bins=bins, grid=False, ax=axes)
     axes.set_title("Histogram of {} for {}".format(what, project))
     axes.set_xlim(1)
     axes.set_yticks(axes.get_yticks()[1:])
     ac.show_or_save(show)
Exemplo n.º 9
0
 def draw_graph(self,
                graph_type="interaction",
                k=None, reset=False, **kwargs):
     """Draws and shows graph."""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     if graph_type == "cluster":
         graph = self.c_graph
         graph_type = "Co-location Network"
     elif graph_type == "interaction":
         graph = self.i_graph
         graph_type = "Interaction Network"
     # attributing widths and colors to edges
     edges = graph.edges()
     weights = [graph[source][dest]['weight'] * 15 for
                source, dest in edges]
     edge_colors = [plt.cm.Blues(weight) for weight in weights]
     # attributes sizes to nodes
     sizes = [(log(self.author_count()[author], 4) + 1) * 300
              for author in self.author_frame.index]
     # positions with spring
     if reset or not self.positions:
         self.positions = nx.spring_layout(graph, k=k, scale=1)
     # creating title and axes
     figure = plt.figure()
     figure.suptitle("{} for {}".format(graph_type, project).title(),
                     fontsize=12)
     axes = figure.add_subplot(111)
     axes.xaxis.set_ticks([])
     axes.yaxis.set_ticks([])
     # actual drawing
     # consider adding legend
     plt.style.use(SETTINGS['style'])
     nx.draw_networkx(graph, self.positions,
                      with_labels=SETTINGS['show_labels_authors'],
                      font_size=fontsize,
                      node_size=sizes,
                      nodelist=self.author_frame.index.tolist(),
                      node_color=self.author_frame['color'].tolist(),
                      edges=edges,
                      width=1,
                      edge_color=edge_colors,
                      vmin=SETTINGS['vmin'],
                      vmax=SETTINGS['vmax'],
                      cmap=CMAP,
                      ax=axes)
     ac.show_or_save(show)
Exemplo n.º 10
0
 def plot_activity_degree(self,
                          g_type='interaction', measures=None,
                          delete_on=None, thresh=0, **kwargs):
     """Shows plot of number of comments (bar) and network-measures (line)
     for all authors with non-null centrality-measure"""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     # data for centrality measures
     if not measures:
         measures = self.centr_measures
     if measures == ['hits']:
         centr_cols = ['hubs', 'authorities']
         centrality = self.__hits()[centr_cols].sort_values(
             centr_cols[0], ascending=False)
     else:
         centr_cols, centrality, _ = self.__get_centrality_measures(
             g_type, measures)
     if delete_on is not None:
         centrality = centrality[centrality[centr_cols[delete_on]] > thresh]
     # data for commenting-activity (limited to index of centrality)
     comments, colors = self.__get_author_activity_bylevel()
     comments = comments.loc[centrality.index]
     plt.style.use(SETTINGS['style'])
     axes = comments.plot(
         kind='bar', stacked=True, color=colors,
         title="Commenting activity and {} for {}".format(
             ", ".join(measures), project).title(),
         fontsize=fontsize)
     axes.set_ylabel("Number of comments")
     axes.xaxis.set_ticks_position('bottom')
     axes2 = axes.twinx()
     axes2.set_ylabel("Measures")
     col_marker = list(zip(centr_cols, "oDsv^"))
     for col, marker in col_marker:
         axes2.plot(axes.get_xticks(), centrality[col].values,
                    linestyle=':', marker=marker, markersize=5,
                    linewidth=.7, color='darkgray')
     the_lines = [mlines.Line2D([], [], color='darkgray',
                                linewidth=.7,
                                marker=marker,
                                markersize=5,
                                label=col.replace(g_type + " ", ""))
                  for (col, marker) in col_marker]
     axes2.legend(handles=the_lines,
                  bbox_to_anchor=(.83, 1))
     ac.show_or_save(show)
Exemplo n.º 11
0
 def plot_author_activity_pie(self, what='total comments', **kwargs):
     """Shows plot of commenting activity as piechart
        what can be either 'total comments' (default) or 'word counts'"""
     project, show, fontsize = ac.handle_kwargs(**kwargs)
     if what not in set(['total comments', 'word counts']):
         raise ValueError
     comments = self.author_frame[[what, 'color']].sort_values(
         what, ascending=False)
     thresh = int(np.ceil(comments[what].sum() / 100))
     whatcounted = 'comments' if what == 'total comments' else 'words'
     comments.index = [[x if y >= thresh else "fewer than {} {}"
                        .format(thresh, whatcounted) for
                        (x, y) in comments[what].items()]]
     merged_commenters = comments.index.value_counts()[0]
     comments = DataFrame({
         'totals': comments[what].groupby(comments.index).sum(),
         'maxs': comments[what].groupby(comments.index).max(),
         'color': comments['color'].groupby(
             comments.index).max()}).sort_values(
                 'maxs', ascending=False)
     for_pie = comments['totals']
     for_pie.name = ""
     colors = ac.color_list(comments['color'],
                            SETTINGS['vmin'], SETTINGS['vmax'],
                            cmap=CMAP)
     plt.style.use(SETTINGS['style'])
     title = "Activity per author for {}".format(project).title()
     if what == "total comments":
         title += ' ({} comments, {} with fewer than {} comments)'.format(
             int(comments['totals'].sum()),
             merged_commenters,
             thresh)
     else:
         title += ' ({} words, {} with fewer than {} words)'.format(
             int(comments['totals'].sum()),
             merged_commenters,
             thresh)
     for_pie.plot(
         kind='pie', autopct='%.2f %%', figsize=(6, 6),
         labels=for_pie.index,
         colors=colors,
         title=('\n'.join(wrap(title, 60))),
         fontsize=fontsize)
     ac.show_or_save(show)
Exemplo n.º 12
0
    def scatter_authors_hits(self, thresh=10, **kwargs):
        """Scatter-plot based on hits-algorithm for hubs and authorities"""
        project, show, _ = ac.handle_kwargs(**kwargs)
        hits = self.__hits()
        axes = hits.plot(
            kind='scatter',
            x='hubs', y='authorities',
            c='total comments',
            s=hits['word counts'] / hits['total comments'],
            cmap="viridis_r",
            sharex=False,
            title="Hubs and Authorities in {}".format(project))

        for name, data in hits.iterrows():
            if data['total comments'] >= thresh:
                axes.text(data['hubs'], data['authorities'], name,
                          fontsize=6)
        ac.fake_legend([50, 100, 250], title="Average wordcount of comments")
        ac.show_or_save(show)
Exemplo n.º 13
0
 def plot_centre_crowd(self, thresh=2, show_threads=False, **kwargs):
     """Plotting evolution of number of participants close to centre"""
     project, show, _ = ac.handle_kwargs(**kwargs)
     data = self.__get_centre_distances(thresh, split=False)
     data_close = DataFrame({
         '6 hours': data[data <= .25].count(axis=1),
         '12 hours': data[(data <= .5) & (data > .25)].count(axis=1),
         '24 hours': data[(data <= 1) & (data > .5)].count(axis=1)},
                            columns=['6 hours', '12 hours', '24 hours'])
     plt.style.use(SETTINGS['style'])
     y_max = data_close.sum(axis=1).max()
     _, axes = plt.subplots()
     data_close.plot(kind="area", ax=axes, stacked=True,
                     color=['darkslategray', 'steelblue', 'lightgray'])
     axes.set_yticks(range(1, y_max + 1))
     axes.set_ylabel("Number of participants")
     axes.set_title("Crowd close to the centre of discussion in {}".format(
         project))
     axes.xaxis.set_ticks_position('bottom')
     axes.yaxis.set_ticks_position('left')
     if show_threads:
         self.__show_threads(axes)
     ac.show_or_save(show)
Exemplo n.º 14
0
 def plot_i_trajectories(self,
                         thresh=None, select=None, l_thresh=5,
                         **kwargs):
     """Plots interaction-trajectories for each pair of contributors."""
     project, show, _ = ac.handle_kwargs(**kwargs)
     trajectories = {}
     for (source, dest, data) in self.i_graph.edges_iter(data=True):
         name = " / ".join([source, dest])
         trajectories[name] = Series(Counter(data['timestamps']),
                                     name=name)
     try:
         tr_data = DataFrame(trajectories)
     except ValueError as err:
         print("Could not create DataFrame: ", err)
     else:
         tr_data = tr_data.fillna(0).cumsum().sort_index()
         col_order = tr_data.iloc[-1].sort_values(ascending=False).index
         tr_data = tr_data[col_order]
         title = "Interaction trajectories for {}".format(project)
         if select:
             tr_data = tr_data.iloc[:, :select]
             title += " ({} largest)".format(select)
         elif thresh:
             tr_data = tr_data.loc[:, ~(tr_data < thresh).all(axis=0)]
             title += " (minimally {} interactions)".format(thresh)
         plt.style.use(SETTINGS['style'])
         _, axes = plt.subplots()
         for col in col_order[:l_thresh]:
             tr_data[col].plot(ax=axes, label=col)
         for col in col_order[l_thresh:]:
             tr_data[col].plot(ax=axes, label=None)
         axes.legend(labels=col_order[:l_thresh], loc='best')
         axes.set_title("Interaction trajectories for {}".format(project))
         axes.xaxis.set_ticks_position('bottom')
         axes.yaxis.set_ticks_position('left')
         ac.show_or_save(show)
Exemplo n.º 15
0
    def draw_centre_discussion(self,
                               regular_intervals=False,
                               skips=2, zoom=12, **kwargs):
        """Draws part of nx.DiGraph to picture who's
        at the centre of activity"""
        _, show, _ = ac.handle_kwargs(**kwargs)
        activity_df = self.author_frame[
            ['color', 'angle', 'timestamps']].copy()
        if not regular_intervals:
            intervals = np.concatenate(activity_df['timestamps'].values)
            intervals.sort(kind='mergesort')
            intervals = intervals[::skips]
        else:
            start = np.min(activity_df['timestamps'].apply(np.min))
            stop = np.max(activity_df['timestamps'].apply(np.max))
            intervals = date_range(start, stop)[::skips]
        x_max, y_max = 0, 0
        for interval in intervals:
            interval_data = activity_df['timestamps'].apply(
                # TODO: fix - does not work
                lambda x, intv=interval: x[x <= intv])
            try:
                interval_data = interval_data.apply(
                    lambda x, intv=interval: (intv - x[-1]).total_seconds()
                    if x.size else np.nan)
            except AttributeError:
                interval_data = interval_data.apply(
                    lambda x, intv=interval:
                    (intv - x[-1]) / np.timedelta64(1, 's')
                    if x.size else np.nan)
            x_coord = interval_data * np.cos(activity_df['angle'])
            the_min, the_max = np.min(x_coord), np.max(x_coord)
            x_max = max(abs(the_max), abs(the_min), x_max)
            y_coord = interval_data * np.sin(activity_df['angle'])
            the_min, the_max = np.min(y_coord), np.max(y_coord)
            y_max = max(abs(the_max), abs(the_min), y_max)
            coords = DataFrame({"x": x_coord, "y": y_coord})
            assert interval not in activity_df.columns
            activity_df[interval] = [list(x) for x in coords.values]
        in_secs = {'day': 86400, '2 days': 172800, 'week': 604800,
                   '1 week': 604800, '2 weeks': 1209600, '3 weeks': 1814400,
                   'month': 2635200}
        try:
            xy_max = max(x_max, y_max) / zoom
        except TypeError:
            xy_max = in_secs[zoom]
        except KeyError:
            xy_max = max(x_max, y_max)

        def get_fig(activity_df, col_name):
            """Helper-function returning a fig based on DataFrame and col."""
            plt.style.use(SETTINGS['style'])
            coord = activity_df[col_name].to_dict()
            dists = pd.DataFrame(activity_df[col_name].tolist(),
                                 columns=['x', 'y'],
                                 index=activity_df.index)
            dists = np.sqrt(dists['x']**2 + dists['y']**2)
            in_day = dists[dists < in_secs['day']].count()
            in_week = dists[dists < in_secs['week']].count()
            in_month = dists[dists < in_secs['month']].count()
            fig = plt.figure()
            # left plot (full)
            axes1 = fig.add_subplot(121, aspect='equal')
            axes1.set_xlim([-xy_max, xy_max])
            axes1.set_ylim([-xy_max, xy_max])
            axes1.xaxis.set_ticks([])
            axes1.yaxis.set_ticks([])
            day = plt.Circle((0, 0), in_secs['day'], color='darkslategray')
            week = plt.Circle((0, 0), in_secs['week'], color='slategray')
            month = plt.Circle((0, 0), in_secs['month'], color="lightblue")
            axes1.add_artist(month)
            axes1.add_artist(week)
            axes1.add_artist(day)
            the_date = pd.to_datetime(str(col_name)).strftime(
                '%Y.%m.%d\n%H:%M')
            axes1.text(-xy_max / 1.07, xy_max / 1.30, the_date,
                       bbox=dict(facecolor='slategray', alpha=0.5))
            # axes.text(in_secs['day'], -100, '1 day', fontsize=10)
            # axes.text(in_secs['week'], -100, '1 week', fontsize=10)
            # axes.text(in_secs['month'], -100, '1 month', fontsize=10)
            nx.draw_networkx_nodes(self.graph, coord,
                                   nodelist=activity_df.index.tolist(),
                                   node_color=activity_df['color'],
                                   node_size=20,
                                   cmap=CMAP,
                                   ax=axes1)
            # right plot: zoomed
            axes2 = fig.add_subplot(122, aspect='equal')
            axes2.set_xlim([-xy_max / 10, xy_max / 10])
            axes2.set_ylim([-xy_max / 10, xy_max / 10])
            axes2.xaxis.set_ticks([])
            axes2.yaxis.set_ticks([])
            day = plt.Circle((0, 0), in_secs['day'], color='darkslategray')
            week = plt.Circle((0, 0), in_secs['week'], color='slategray')
            month = plt.Circle((0, 0), in_secs['month'], color="lightblue")
            axes2.add_artist(month)
            axes2.add_artist(week)
            axes2.add_artist(day)
            day_patch = mpatches.Patch(
                color='darkslategray',
                label="{: <3} active in last day".format(in_day).ljust(25))
            week_patch = mpatches.Patch(
                color='slategray',
                label="{: <3} active in last week".format(in_week).ljust(25))
            month_patch = mpatches.Patch(
                color='lightblue',
                label="{: <3} active in last month".format(in_month).ljust(25))
            plt.legend(handles=[day_patch, week_patch, month_patch])
            nx.draw_networkx_nodes(self.graph, coord,
                                   nodelist=activity_df.index.tolist(),
                                   node_color=activity_df['color'],
                                   node_size=20,
                                   cmap=CMAP,
                                   ax=axes2)
            return fig

        ion()
        for (num, interval) in enumerate(intervals):
            fig = get_fig(activity_df, interval)
            if show:
                fig.canvas.draw()
                plt.draw()
                plt.pause(1)
                plt.close(fig)
            else:
                plt.savefig("FIGS/img{0:0>5}.png".format(num))
                plt.close(fig)