def plot_centre_dist(self, thresh=2, show_threads=True, **kwargs): """Plots time elapsed since last comment for each participant""" project, show, _ = ac.handle_kwargs(**kwargs) data_high, data_low = self.__get_centre_distances( thresh, split=True) # set up and create plots plt.style.use(SETTINGS['style']) _, axes = plt.subplots() colors_high = ac.color_list( self.author_frame.loc[data_high.columns, 'color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) colors_low = ac.color_list( self.author_frame.loc[data_low.columns, 'color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) data_high.plot(ax=axes, color=colors_high, legend=False) data_low.plot(ax=axes, alpha=.1, color=colors_low, legend=False) axes.set_ylabel("Days elapsed since last comment") axes.set_title("Distance from centre of discussion\n{}".format( project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') if show_threads: self.__show_threads(axes) ac.show_or_save(show)
def plot_activity_prop(self, **kwargs): """Shows plot of number of comments (bar) and proportion level-1 / higher-level comment (line) for all authors""" project, show, fontsize = ac.handle_kwargs(**kwargs) plt.style.use(SETTINGS['style']) cols = self.author_frame.columns[ self.author_frame.columns.str.startswith('level')].tolist() data = self.author_frame[cols].copy() data['proportion'] = (data[cols[1:]].sum(axis=1) / data[cols].sum(axis=1)) colors = [plt.cm.Set1(20 * i) for i in range(len(data.index))] axes = data[cols].plot( kind='bar', stacked=True, color=colors, title="Commenting activity and proportion of higher-level comments for {}".format(project).title(), fontsize=fontsize) axes.set_ylabel("Number of comments") axes.legend(bbox_to_anchor=(0.165, 1)) axes2 = axes.twinx() axes2.set_ylabel("Proportion of Higher-level comments") axes2.plot(axes.get_xticks(), data['proportion'].values, linestyle=':', marker='.', markersize=10, linewidth=.7, color='darkgrey', fontsize=fontsize) the_lines = [mlines.Line2D([], [], color='gray', linestyle=':', marker='.', markersize=10, label="Proportion")] axes2.legend(handles=the_lines, bbox_to_anchor=(1, 1)) ac.show_or_save(show)
def plot_centre_closeness(self, thresh=10, ylim=None, **kwargs): """Boxplot of time before return to centre for core authors""" project, show, _ = ac.handle_kwargs(**kwargs) timestamps = self.author_frame['timestamps'].apply(np.array) try: timestamps.drop("Anonymous", inplace=True) except ValueError: pass delays = timestamps.apply(np.diff) delays = delays[delays.apply(len) >= thresh] to_days = np.vectorize(lambda x: x.total_seconds() / (60**2 * 24)) delays = delays.map(to_days) plt.style.use(SETTINGS['style']) _, axes = plt.subplots() bplot = plt.boxplot(delays, sym='.', showmeans=True, meanline=True) for key in ['whiskers', 'boxes', 'caps']: plt.setp(bplot[key], color='steelblue') plt.setp(bplot['means'], color="firebrick") axes.set_xticklabels(delays.index, rotation=40, ha='right') axes.set_xlabel("Participants with at least {} comments".format( thresh)) axes.set_yticks(np.logspace(-1, 3, num=5, base=2)) axes.set_ylabel("Delay in days") if ylim: axes.set_ylim(0, ylim) axes.set_title("Delays between comments in {}".format(project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') ac.show_or_save(show)
def scatter_authors(self, measure="betweenness centrality", thresh=15, **kwargs): """Scatter-plot with position based on interaction and cluster measure, color based on number of comments, and size on avg comment length""" project, show, _ = ac.handle_kwargs(**kwargs) x_measure, y_measure = [" ".join([netw, measure]) for netw in ["interaction", "cluster"]] axes = self.author_frame.plot( kind='scatter', x=x_measure, y=y_measure, c='total comments', s=self.author_frame['word counts'] / self.author_frame[ 'total comments'], cmap="viridis_r", sharex=False, title="Author-activity and centrality in {}".format(project)) for name, data in self.author_frame.iterrows(): if data['total comments'] >= thresh: axes.text(data[x_measure], data[y_measure], name, fontsize=6) ac.fake_legend([50, 100, 250], title="Average wordcount of comments") ac.show_or_save(show)
def plot_centrality_measures(self, g_type="interaction", measures=None, delete_on=None, thresh=0, **kwargs): """Shows plot of degree_centrality for each author (only if first measure is non-zero)""" project, show, fontsize = ac.handle_kwargs(**kwargs) if not measures: measures = self.centr_measures centr_cols, centrality, means = self.__get_centrality_measures( g_type, measures) if delete_on is not None: centrality = centrality[centrality[centr_cols[delete_on]] > thresh] colors = ac.color_list(len(measures), SETTINGS['vmin'], SETTINGS['vmax'], factor=15) full_measure_names = centrality.columns centrality.columns = [ col.replace(g_type + " ", "") for col in centrality.columns] plt.style.use(SETTINGS['style']) axes = centrality.plot( kind='bar', color=colors, title="Centrality-measures for {} ({}-graph)".format( project, g_type).title()) for measure, color in zip(full_measure_names, colors): the_mean = means[measure] axes.lines.append( mlines.Line2D( [-.5, len(centrality.index) - .5], [the_mean, the_mean], linestyle='-', linewidth=.5, color=color, zorder=1, transform=axes.transData)) axes.set_xticklabels(centrality.index, fontsize=fontsize) ac.show_or_save(show)
def plot_author_activity_bar(self, what='by level', **kwargs): """Shows plot of number of comments / wordcount per author. what can be either 'by level' or 'word counts'""" project, show, fontsize = ac.handle_kwargs(**kwargs) plt.style.use(SETTINGS['style']) if what == "by level": levels, colors = self.__get_author_activity_bylevel() total_num_of_comments = int(levels.sum().sum()) axes = levels.plot( kind='barh', stacked=True, color=colors, title='Comments per author (total: {})'.format( total_num_of_comments), fontsize=fontsize) axes.set_yticklabels(levels.index, fontsize=fontsize) elif what == "word counts": word_counts = self.author_frame[what].sort_values(ascending=False) total_word_count = int(word_counts.sum()) axes = word_counts.plot( kind='bar', logy=True, title='Word-count per author in {} (total: {})'.format( project, total_word_count), fontsize=fontsize) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') else: raise ValueError ac.show_or_save(show)
def scatter_comments_replies(self, **kwargs): """Scatter-plot of comments vs direct replies received""" project, show, _ = ac.handle_kwargs(**kwargs) data = self.author_frame[['total comments', 'replies (direct)']] data.plot( kind='scatter', x="total comments", y='replies (direct)', sharex=False, title="total comments vs replies in {}".format(project)) ac.show_or_save(show)
def plot_author_activity_hist(self, what='total comments', bins=10, **kwargs): """Shows plot of histogram of commenting activity. What can be either 'total comments' (default) or 'word counts'""" project, show, _ = ac.handle_kwargs(**kwargs) if what not in set(['total comments', 'word counts']): raise ValueError comments = self.author_frame[what] plt.style.use(SETTINGS['style']) _, axes = plt.subplots() comments.hist(bins=bins, grid=False, ax=axes) axes.set_title("Histogram of {} for {}".format(what, project)) axes.set_xlim(1) axes.set_yticks(axes.get_yticks()[1:]) ac.show_or_save(show)
def draw_graph(self, graph_type="interaction", k=None, reset=False, **kwargs): """Draws and shows graph.""" project, show, fontsize = ac.handle_kwargs(**kwargs) if graph_type == "cluster": graph = self.c_graph graph_type = "Co-location Network" elif graph_type == "interaction": graph = self.i_graph graph_type = "Interaction Network" # attributing widths and colors to edges edges = graph.edges() weights = [graph[source][dest]['weight'] * 15 for source, dest in edges] edge_colors = [plt.cm.Blues(weight) for weight in weights] # attributes sizes to nodes sizes = [(log(self.author_count()[author], 4) + 1) * 300 for author in self.author_frame.index] # positions with spring if reset or not self.positions: self.positions = nx.spring_layout(graph, k=k, scale=1) # creating title and axes figure = plt.figure() figure.suptitle("{} for {}".format(graph_type, project).title(), fontsize=12) axes = figure.add_subplot(111) axes.xaxis.set_ticks([]) axes.yaxis.set_ticks([]) # actual drawing # consider adding legend plt.style.use(SETTINGS['style']) nx.draw_networkx(graph, self.positions, with_labels=SETTINGS['show_labels_authors'], font_size=fontsize, node_size=sizes, nodelist=self.author_frame.index.tolist(), node_color=self.author_frame['color'].tolist(), edges=edges, width=1, edge_color=edge_colors, vmin=SETTINGS['vmin'], vmax=SETTINGS['vmax'], cmap=CMAP, ax=axes) ac.show_or_save(show)
def plot_activity_degree(self, g_type='interaction', measures=None, delete_on=None, thresh=0, **kwargs): """Shows plot of number of comments (bar) and network-measures (line) for all authors with non-null centrality-measure""" project, show, fontsize = ac.handle_kwargs(**kwargs) # data for centrality measures if not measures: measures = self.centr_measures if measures == ['hits']: centr_cols = ['hubs', 'authorities'] centrality = self.__hits()[centr_cols].sort_values( centr_cols[0], ascending=False) else: centr_cols, centrality, _ = self.__get_centrality_measures( g_type, measures) if delete_on is not None: centrality = centrality[centrality[centr_cols[delete_on]] > thresh] # data for commenting-activity (limited to index of centrality) comments, colors = self.__get_author_activity_bylevel() comments = comments.loc[centrality.index] plt.style.use(SETTINGS['style']) axes = comments.plot( kind='bar', stacked=True, color=colors, title="Commenting activity and {} for {}".format( ", ".join(measures), project).title(), fontsize=fontsize) axes.set_ylabel("Number of comments") axes.xaxis.set_ticks_position('bottom') axes2 = axes.twinx() axes2.set_ylabel("Measures") col_marker = list(zip(centr_cols, "oDsv^")) for col, marker in col_marker: axes2.plot(axes.get_xticks(), centrality[col].values, linestyle=':', marker=marker, markersize=5, linewidth=.7, color='darkgray') the_lines = [mlines.Line2D([], [], color='darkgray', linewidth=.7, marker=marker, markersize=5, label=col.replace(g_type + " ", "")) for (col, marker) in col_marker] axes2.legend(handles=the_lines, bbox_to_anchor=(.83, 1)) ac.show_or_save(show)
def plot_author_activity_pie(self, what='total comments', **kwargs): """Shows plot of commenting activity as piechart what can be either 'total comments' (default) or 'word counts'""" project, show, fontsize = ac.handle_kwargs(**kwargs) if what not in set(['total comments', 'word counts']): raise ValueError comments = self.author_frame[[what, 'color']].sort_values( what, ascending=False) thresh = int(np.ceil(comments[what].sum() / 100)) whatcounted = 'comments' if what == 'total comments' else 'words' comments.index = [[x if y >= thresh else "fewer than {} {}" .format(thresh, whatcounted) for (x, y) in comments[what].items()]] merged_commenters = comments.index.value_counts()[0] comments = DataFrame({ 'totals': comments[what].groupby(comments.index).sum(), 'maxs': comments[what].groupby(comments.index).max(), 'color': comments['color'].groupby( comments.index).max()}).sort_values( 'maxs', ascending=False) for_pie = comments['totals'] for_pie.name = "" colors = ac.color_list(comments['color'], SETTINGS['vmin'], SETTINGS['vmax'], cmap=CMAP) plt.style.use(SETTINGS['style']) title = "Activity per author for {}".format(project).title() if what == "total comments": title += ' ({} comments, {} with fewer than {} comments)'.format( int(comments['totals'].sum()), merged_commenters, thresh) else: title += ' ({} words, {} with fewer than {} words)'.format( int(comments['totals'].sum()), merged_commenters, thresh) for_pie.plot( kind='pie', autopct='%.2f %%', figsize=(6, 6), labels=for_pie.index, colors=colors, title=('\n'.join(wrap(title, 60))), fontsize=fontsize) ac.show_or_save(show)
def scatter_authors_hits(self, thresh=10, **kwargs): """Scatter-plot based on hits-algorithm for hubs and authorities""" project, show, _ = ac.handle_kwargs(**kwargs) hits = self.__hits() axes = hits.plot( kind='scatter', x='hubs', y='authorities', c='total comments', s=hits['word counts'] / hits['total comments'], cmap="viridis_r", sharex=False, title="Hubs and Authorities in {}".format(project)) for name, data in hits.iterrows(): if data['total comments'] >= thresh: axes.text(data['hubs'], data['authorities'], name, fontsize=6) ac.fake_legend([50, 100, 250], title="Average wordcount of comments") ac.show_or_save(show)
def plot_centre_crowd(self, thresh=2, show_threads=False, **kwargs): """Plotting evolution of number of participants close to centre""" project, show, _ = ac.handle_kwargs(**kwargs) data = self.__get_centre_distances(thresh, split=False) data_close = DataFrame({ '6 hours': data[data <= .25].count(axis=1), '12 hours': data[(data <= .5) & (data > .25)].count(axis=1), '24 hours': data[(data <= 1) & (data > .5)].count(axis=1)}, columns=['6 hours', '12 hours', '24 hours']) plt.style.use(SETTINGS['style']) y_max = data_close.sum(axis=1).max() _, axes = plt.subplots() data_close.plot(kind="area", ax=axes, stacked=True, color=['darkslategray', 'steelblue', 'lightgray']) axes.set_yticks(range(1, y_max + 1)) axes.set_ylabel("Number of participants") axes.set_title("Crowd close to the centre of discussion in {}".format( project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') if show_threads: self.__show_threads(axes) ac.show_or_save(show)
def plot_i_trajectories(self, thresh=None, select=None, l_thresh=5, **kwargs): """Plots interaction-trajectories for each pair of contributors.""" project, show, _ = ac.handle_kwargs(**kwargs) trajectories = {} for (source, dest, data) in self.i_graph.edges_iter(data=True): name = " / ".join([source, dest]) trajectories[name] = Series(Counter(data['timestamps']), name=name) try: tr_data = DataFrame(trajectories) except ValueError as err: print("Could not create DataFrame: ", err) else: tr_data = tr_data.fillna(0).cumsum().sort_index() col_order = tr_data.iloc[-1].sort_values(ascending=False).index tr_data = tr_data[col_order] title = "Interaction trajectories for {}".format(project) if select: tr_data = tr_data.iloc[:, :select] title += " ({} largest)".format(select) elif thresh: tr_data = tr_data.loc[:, ~(tr_data < thresh).all(axis=0)] title += " (minimally {} interactions)".format(thresh) plt.style.use(SETTINGS['style']) _, axes = plt.subplots() for col in col_order[:l_thresh]: tr_data[col].plot(ax=axes, label=col) for col in col_order[l_thresh:]: tr_data[col].plot(ax=axes, label=None) axes.legend(labels=col_order[:l_thresh], loc='best') axes.set_title("Interaction trajectories for {}".format(project)) axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') ac.show_or_save(show)