def main(): df = read_plot_data(overwrite=False) df = df.reset_index() for centroid_day, grp in df.groupby('centroid_day'): df.loc[grp.index, 'h_index'] = grp.h_index.rank(method='first', ascending=False) df.loc[grp.index, 'num_retweets'] = grp.num_retweets.rank(method='first', ascending=False) df = df.groupby(['centroid_day', 'super_community']).mean().reset_index() palette = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] color_dict = { 'International expert': palette[4], 'National expert': palette[1], 'Political': palette[2], 'Other': '.5' } fig, axes = plt.subplots(1, 2, figsize=(3.3, 1.4)) lw = .8 ms = 4 for sc, grp in df.groupby('super_community'): axes[0].plot(grp.centroid_day.values, grp.h_index, c=color_dict[sc], label=sc, ms=ms, lw=lw, marker='.') axes[1].plot(grp.centroid_day.values, grp.num_retweets, c=color_dict[sc], label=sc, ms=ms, lw=lw, marker='.') _min = 600 _max = 3200 axes[0].set_ylim((_max, _min)) axes[1].set_ylim((_max, _min)) axes[0].set_ylabel('Avg. rank h-index $r_h$') axes[1].set_ylabel('Avg. rank retweets $r_{rt}$') for ax in axes: ax.grid() ax.xaxis.set_major_locator(mdates.MonthLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter('%b')) ax.tick_params(axis='x', direction='out', which='minor', size=2) ax.tick_params(axis='x', direction='out', which='major', size=2) ax.tick_params(axis='y', which='major', direction='out', size=2) ax.locator_params(nbins=4) sns.despine() fig.tight_layout() save_fig(fig, 'fig4cd', 1, dpi=600, plot_formats=['png', 'pdf', 'svg'])
def fig4a(): do_log = False palette = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] color_dict = { 'International expert': palette[4], 'National expert': palette[1], 'Political': palette[2], 'Other': '.5' } df = read_data(overwrite=False) g = sns.jointplot(data=df, kind='scatter', x='h_index_rank', y='num_retweets_rank', alpha=.8, s=4, ec=None, hue='super_community', palette=color_dict.values(), hue_order=color_dict.keys(), height=2, marginal_ticks=False, space=.1) num_ranks = len(df) + 50 if do_log: g.ax_joint.set_xlim((num_ranks, 1)) g.ax_joint.set_ylim((num_ranks, 1)) else: g.ax_joint.set_xlim((num_ranks, -30)) g.ax_joint.set_ylim((num_ranks, -30)) g.ax_joint.grid() g.ax_joint.get_legend().remove() g.ax_joint.set_xlabel(r'Rank $h$-index $r_h$') g.ax_joint.set_ylabel('Rank retweets $r_{rt}$') g.ax_marg_x.tick_params(axis='x', direction='out', which='major', size=0) g.ax_marg_y.tick_params(axis='y', direction='out', which='major', size=0) g.ax_marg_y.set_xlim((None, g.ax_marg_y.get_xlim()[1] * 1.1)) g.ax_marg_x.set_ylim((None, g.ax_marg_x.get_ylim()[1] * 1.1)) # log scale if do_log: g.ax_joint.set_xscale('log') g.ax_joint.set_yscale('log') # diagonal line g.ax_joint.plot([0, num_ranks], [0, num_ranks], color='0.15', ls='dashed', lw=.5) # num ticks num_ticks = 5 g.ax_joint.xaxis.set_major_locator(plt.MaxNLocator(num_ticks)) g.ax_joint.yaxis.set_major_locator(plt.MaxNLocator(num_ticks)) save_fig(g, 'fig4a', 1, dpi=600, plot_formats=['png', 'pdf', 'svg'])
def main(): df = get_category_data(without_other=False) df_entropy = get_geo_entropy() df = df.T df = df.divide(df.sum(axis=1), axis=0) df.columns = df.columns.tolist() df['Internationality'] = df_entropy['Internationality'] df = df[[c for c in df.columns if c != 'Other']] max_val = df.subtract(df.mean(axis=0), axis=1).divide(df.std(axis=0), axis=1).max().max() usecols = [ 'Political Supporter', 'Public Services', 'Politics & Government', 'Media', 'Healthcare', 'Science', 'Internationality' ] df = df[usecols] width = 2.2 height = 3.7 # cmap = None cmap = 'RdBu_r' # cmap = 'Blues' # cmap = 'Reds' palette = [c['color'] for c in matplotlib.rcParams['axes.prop_cycle']] colors = { 'International expert': palette[4], 'National elites': palette[1], 'Political': palette[2], 'Other': '.5' } #df.index.name = 'communities' super_communities = [ community_to_types[com] for com in df.index ] #df.reset_index().communities.apply(lambda s: community_to_types[s]).copy() super_community_colors = [colors[sc] for sc in super_communities ] #super_communities.apply(lambda s: colors[s]) df.to_csv('df_cat.csv') X = df.values Z = scipy.stats.zscore(X, axis=0) Z[Z < .5] = 0 f = plt.figure(dpi=300) sns.heatmap(Z, annot=True, xticklabels=usecols, yticklabels=alphab[:15]) plt.savefig('../plots/Zmatrix.png') print(Z) for i, com in enumerate(list(df.index)): for j, cat in enumerate(usecols): if Z[i, j] > 0: print('Com {} exceed of {}'.format(com, cat)) Xpca = PCA().fit_transform(Z) fig, axes23 = plt.subplots(2, 3, figsize=(6, 4)) for method, axes in zip(['average', 'ward'], axes23): z = hierarchy.linkage(Z, method=method) # Plotting axes[0].plot(range(1, len(z) + 1), z[::-1, 2], label='distance') knee = np.diff(z[::-1, 2], 2) axes[0].plot(range(2, len(z)), knee, label='2nd deriv of distance') axes[0].legend(fontsize=4) num_clust1 = knee.argmax() + 2 knee[knee.argmax()] = 0 num_clust2 = knee.argmax() + 2 print(num_clust1) print(num_clust2) axes[0].text(num_clust1, z[::-1, 2][num_clust1 - 1], 'possible\n<- knee point') part1 = hierarchy.fcluster(z, num_clust1, 'maxclust') part2 = hierarchy.fcluster(z, num_clust2, 'maxclust') clr = [ '#2200CC', '#D9007E', '#FF6600', '#FFCC00', '#ACE600', '#0099CC', '#8900CC', '#FF0000', '#FF9900', '#FFFF00', '#00CC01', '#0055CC', 'red' ] for part, ax in zip([part1, part2], axes[1:]): for cluster in set(part): ax.scatter(Xpca[part == cluster, -2], Xpca[part == cluster, -1], color=clr[cluster]) for i in range(Xpca.shape[0]): ax.annotate(alphab[i], (Xpca[i, -2], Xpca[i, -1])) m = '\n(method: {})'.format(method) plt.setp(axes[0], title='Screeplot{}'.format(m), xlabel='partition', ylabel='{}\ncluster distance'.format(m)) plt.setp(axes[1], title='{} Clusters'.format(num_clust1)) plt.setp(axes[2], title='{} Clusters'.format(num_clust2)) plt.tight_layout() plt.savefig('../plots/linkage.png') row_linkage = hierarchy.linkage(Z, method='average', metric='euclidean') #print(row_linkage) #print(df.index) clus = hierarchy.fcluster(row_linkage, t=1.1, depth=2) print('Nclus: ', len(set(clus)), clus) clus = hierarchy.fcluster(row_linkage, t=1.2, depth=3) print('Nclus: ', len(set(clus)), clus) clus = hierarchy.fcluster(row_linkage, t=3.1, criterion='distance') print('Nclus: ', len(set(clus)), clus) clus = hierarchy.fcluster(row_linkage, t=3.3, criterion='distance') print('Nclus: ', len(set(clus)), clus) print(df.index) g = sns.clustermap( df, #.reset_index(drop=True), method='ward', dendrogram_ratio=.3, colors_ratio=.04, cmap=cmap, center=0, lw=0.4, col_cluster=False, z_score=1, figsize=(width, height), cbar_pos=(1.1, .33, .02, .2), row_colors=super_community_colors) fig = plt.gcf() fig.delaxes(g.ax_col_dendrogram) g.ax_row_colors.get_xaxis().set_visible(False) # colorbar formatting g.ax_cbar.set_ylabel('Z-score', rotation=0, ha='left', va='center') g.ax_cbar.locator_params(nbins=5) g.ax_cbar.tick_params(length=2, axis='both', which='major', direction='out') # create type legend type_legend_patches = [ mpatches.Patch(color=c, label=l) for l, c in colors.items() ] labels = list(colors.keys()) #labels[labels.index('National expert')] = 'National elite' legend_type = g.ax_heatmap.legend(labels=labels, loc='center left', bbox_to_anchor=(1.3, .85), handles=type_legend_patches, frameon=False, title='Super-community', handlelength=.6, handleheight=1) # move axis labels g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xticklabels(), rotation=75, ha='right') g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0) #g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_yticklabels(), rotation=0) g.ax_row_colors.set_xticklabels(g.ax_row_colors.get_xticklabels(), rotation=75, ha='right') offset = matplotlib.transforms.ScaledTranslation(.05, 0, fig.dpi_scale_trans) for ax in [g.ax_heatmap, g.ax_row_colors]: for label in ax.xaxis.get_majorticklabels(): label.set_transform(label.get_transform() + offset) # other cosmetics g.ax_heatmap.tick_params(axis='both', which='both', bottom=False, right=False) g.ax_row_colors.tick_params(axis='both', which='both', bottom=False, right=False) g.ax_heatmap.set_ylabel('Community') save_fig(fig, 'fig2', 1, plot_formats=['png', 'pdf'], dpi=600)
def fig4b(): palette = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] color_dict = { 'International expert': palette[4], 'National expert': palette[1], 'Political': palette[2], 'Other': '.5' } df = read_data() fig, ax = plt.subplots(1, 1, figsize=(2, 2), sharex=False, sharey=False) for _t, grp in df.groupby('super_community'): means = grp.mean() ci = bootstrap_ci(grp, ['h_index_rank', 'num_retweets_rank']) for k, v in ci.items(): ci[k] = [[abs(means[k] - v[0])], [abs(means[k] - v[1])]] ax.errorbar(means['h_index_rank'], means['num_retweets_rank'], xerr=ci['h_index_rank'], yerr=ci['num_retweets_rank'], label=_t, c=color_dict[_t], ms=5, marker='.', capsize=.8, elinewidth=.5, markeredgewidth=.5) num_ranks = len(df) ax.set_xlim((num_ranks, 0)) ax.set_ylim((num_ranks, 0)) ax.grid(True) # num ticks num_ticks = 4 ax.xaxis.set_major_locator(plt.MaxNLocator(num_ticks)) ax.yaxis.set_major_locator(plt.MaxNLocator(num_ticks)) # annotate len_arr = 500 center = num_ranks / 2 arrow_props = dict(fc='.15', shrink=.1, width=.15, headwidth=2, headlength=2) ax.annotate('', xy=(center - len_arr, center + len_arr), xytext=(center, center), ha='left', va='top', arrowprops=arrow_props) ax.annotate('', xy=(center + len_arr, center - len_arr), xytext=(center, center), ha='left', va='top', arrowprops=arrow_props) ax.text(center + len_arr - 200, center - len_arr, 'Over-ranked\nby retweets\n(high virality)', ha='right', va='bottom', fontsize=6) ax.text(center - len_arr, center + len_arr, 'Under-ranked\nby retweets\n(low virality)', ha='left', va='top', fontsize=6) # axis labels ax.set_xlabel(r'Avg. rank $h$-index $r_h$') ax.set_ylabel('Avg. rank retweets $r_{rt}$') sns.despine() ax.set_aspect('equal') # diagonal line ax.plot([0, num_ranks], [0, num_ranks], color='0.15', ls='dashed', lw=.5, zorder=0) save_fig(fig, 'fig4b', 2, dpi=600, plot_formats=['png', 'pdf', 'svg'])
def main(): # read data df_int = get_interaction_data() df_size = get_community_size_data() df_size = df_size[df_int.index.min():df_int.index.max()] df_size['Total'] = df_size.sum(axis=1) dfs = {} for _type in ['inter_cluster', 'intra_cluster', 'received']: df = df_int[df_int.columns[df_int.columns.str.startswith(_type)]].copy() col_rename = {} for col in df.columns: col_rename[col] = col.split('_')[-1] df = df.rename(columns=col_rename) df['Total'] = df.sum(axis=1) dfs[_type] = df col_order = ['Total', 'International expert', 'National expert', 'Political', 'Other'] palette = [c['color'] for c in matplotlib.rcParams['axes.prop_cycle']] # orange, blue, color_dict = {'International expert': palette[4], 'National expert': palette[1], 'Political': palette[2], 'Other': '.5', 'Total': '0.15'} ls_dict = {'International expert': '-', 'National expert': '-', 'Political': '-', 'Other': '-', 'Total': 'dotted'} m_dict = {'International expert': '.', 'National expert': '.', 'Political': '.', 'Other': '.', 'Total': None} z_order = {'International expert': 4, 'National expert': 3, 'Political': 2, 'Other': 1, 'Total': 0} colors = [color_dict[c] for c in col_order] col_order_rev = col_order[::-1] colors_rev = [color_dict[c] for c in col_order_rev] # plot fig, all_axes = plt.subplots(2, 2, figsize=(4.5, 3.5), sharex=False, sharey=False) # specs lw = 1 ec = 'white' ms = 5 # panels A ax = all_axes[0][0] for col in df[col_order]: ax.plot(df_size.index.values, df_size[col].values, label=col, color=color_dict[col], marker=m_dict[col], ms=ms, lw=lw, ls=ls_dict[col], zorder=z_order[col]) ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0)) ax.set_ylabel('$N$') ax.set_title('Number of users', fontsize=7) ax.set_ylim((0, 0.6*10**7)) ax.grid() # panels B ax = all_axes[0][1] df = dfs['received'] df /= df_size # normalize by size of type for col in df[col_order]: ax.plot(df.index.values, df[col].values, label=col, color=color_dict[col], marker=m_dict[col], ms=ms, lw=lw, ls=ls_dict[col], zorder=z_order[col]) ax.set_ylabel('$A_u$') ax.set_title('Avg. attention per user', fontsize=7) ax.set_ylim((0, 8)) ax.grid() # panels C df = dfs['inter_cluster'] ax = all_axes[1][0] for col in df[col_order]: ax.plot(df.index.values, df[col], label=col, color=color_dict[col], marker=m_dict[col], ms=ms, lw=lw, ls=ls_dict[col], zorder=z_order[col]) ax.set_title('External attention component', fontsize=7) ax.set_ylabel(r'$a^{ext}$') ax.set_xlabel('') ax.grid() ax.set_ylim((0, .35)) # panels D df = dfs['intra_cluster'] ax = all_axes[1][1] for col in df[col_order]: ax.plot(df.index.values, df[col], label=col, color=color_dict[col], marker=m_dict[col], ms=ms, lw=lw, ls=ls_dict[col], zorder=z_order[col]) ax.set_title('Internal attention component', fontsize=7) ax.set_ylabel(r'$a^{int}$') ax.set_xlabel('') ax.grid() ax.set_ylim((0, .95)) # vertical lines for i, time_pos in enumerate([2, 9]): for ax_row in all_axes: for ax in ax_row: ax.axvline(df.index[time_pos], ls='dashed', lw=.5, c='.15', zorder=0) ax.annotate(chr(97 + i), (df.index[time_pos], ax.get_ylim()[1]*.95), xytext=(2, 0), textcoords='offset points', ha='left', va='center') # tick style for ax_row in all_axes: for ax in ax_row: ax.xaxis.set_major_locator(mdates.MonthLocator()) ax.xaxis.set_major_formatter(mdates.DateFormatter('%b')) ax.tick_params(axis='x', direction='out', which='minor', size=2) ax.tick_params(axis='x', direction='out', which='major', size=2) ax.tick_params(axis='y', which='major', direction='out', size=2) sns.despine() fig.tight_layout() fig.subplots_adjust(hspace=.5) save_fig(plt.gcf(), 'fig3', 1, dpi=600, plot_formats=['png', 'pdf'])