def draw_main(source, target, pred, field, itr): """ Save the figures that define a sample: the source, target, pred, and field""" save_fig(source[0], log_path + str(itr) + '_source') save_fig(target[0], log_path + str(itr) + '_target') save_fig(pred[0], log_path + str(itr) + '_pred') save_field(field[0], log_path + str(itr) + '_field') save_fig(torch.abs(pred[0] - target[0]), log_path + str(itr) + '_mydiff') save_fig(torch.abs(source[0] - target[0]), log_path + str(itr) + '_ogdiff')
def plot_fig3abc(): # read data df_fleiss = pd.read_csv(os.path.join(DATA_DIR, 'fig3_fleiss.csv')) df_var = pd.read_csv(os.path.join(DATA_DIR, 'fig3_variability.csv'), index_col='label') df_dis = pd.read_csv(os.path.join(DATA_DIR, 'fig3_label_imbalance.csv'), index_col='label') # plot stuff height_ratios = [2.5, .6, 3] fig, axes = plt.subplots(3, 1, figsize=(2.5, 4.5), sharex=True, gridspec_kw=dict(hspace=0, height_ratios=height_ratios)) cbar_opts = [ dict(pad=.08, shrink=.4 * 2.5 / hr, aspect=15) for hr in height_ratios ] plot_cbar = True common_args = dict(lw=.2, ec='white', annot=False, square=True) # plot distribution cmap = 'Purples' ax = axes[0] heatmap = sns.heatmap(data=df_dis, cmap=cmap, cbar=plot_cbar, cbar_kws=dict(label="Corpus size", **cbar_opts[0]), ax=ax, fmt='d', annot_kws=dict(fontsize=5), **common_args) ax.set_xticklabels(df_dis.columns.tolist()) ax.tick_params(axis='both', direction='out') ax.set_title('Label imbalance', fontsize=7) # plot fleiss cmap = 'Reds' ax = axes[1] labels = [f'{l:.2f}'.lstrip('0') for l in df_fleiss.fleiss_kappa.values] heatmap = sns.heatmap(data=[df_fleiss.fleiss_kappa], cbar=plot_cbar, cmap=cmap, cbar_kws=dict(label="Fleiss' Kappa", **cbar_opts[1]), ax=ax, fmt='', annot_kws=dict(fontsize=5), **common_args) ax.tick_params(axis='both', direction='out') ax.set_yticklabels(['all'], rotation=0) ax.set_title('Annotator agreement', fontsize=7) # plot variance cmap = 'Greens' ax = axes[2] heatmap = sns.heatmap(data=df_var, cmap=cmap, cbar=plot_cbar, center=df_var.loc['all'].mean(), cbar_kws=dict(label='Embedding variance', **cbar_opts[2]), ax=ax, **common_args) ax.set_title('Corpus variability', fontsize=7) # ticks formatting ax.tick_params(axis='both', direction='out') ax.set_xticklabels(ax.get_xticklabels(), rotation=75, ha='right') offset = matplotlib.transforms.ScaledTranslation(.06, 0, fig.dpi_scale_trans) for label in ax.xaxis.get_majorticklabels(): label.set_transform(label.get_transform() + offset) # common cosmetics for ax, cbar_ticks in zip(axes, [3, 3, 4]): # set labels invisible ax.xaxis.label.set_visible(False) ax.yaxis.label.set_visible(False) # colorbar cosmetics cbar = ax.collections[0].colorbar cbar.ax.tick_params(axis='y', direction='out') cbar.ax.yaxis.label.set_rotation(90) cbar.ax.yaxis.label.set_ha('center') cbar.ax.yaxis.label.set_va('top') cbar.ax.yaxis.set_major_locator( matplotlib.ticker.MaxNLocator(cbar_ticks)) save_fig(fig, 'fig3abc', version=1, plot_formats=['png', 'pdf'], dpi=800)
def plot_fig3d(cached=True): sim_matrix = {} keys = ['all', 'positive', 'neutral', 'negative'] for k in keys: sim_matrix[k] = pd.read_csv(os.path.join(DATA_DIR, f'fig3d_{k}.csv'), index_col=0) num_plots = len(sim_matrix.keys()) fig, _axes = plt.subplots(2, 2, figsize=(4, 4), sharex=True, sharey=True) axes = [] for ax_row in _axes: for ax in ax_row: axes.append(ax) min_vals = [] max_vals = [] for key, df in sim_matrix.items(): min_vals.append(df.values[np.triu_indices_from(df, k=1)].min()) max_vals.append(df.values[np.triu_indices_from(df, k=1)].max()) min_val = min(min_vals) max_val = max(max_vals) for key, ax in zip(keys, axes): df = sim_matrix[key] # normalize df = (df - min_val) / (max_val - min_val) mask = np.zeros(df.shape, dtype=bool) mask[np.tril_indices(len(df), k=-1)] = True cmap = 'Blues_r' sns.heatmap(data=df, mask=mask, cmap=cmap, cbar=False, cbar_kws=dict(label='Normalized\ncosine similarity', fraction=.025, pad=.08), vmax=1, center=.5, lw=.2, ec='white', ax=ax, square=True) # move axis labels ax.tick_params(axis='both', direction='out') ax.set_xticklabels(ax.get_xticklabels(), rotation=75, ha='right') offset = matplotlib.transforms.ScaledTranslation( .05, 0, fig.dpi_scale_trans) for label in ax.xaxis.get_majorticklabels(): label.set_transform(label.get_transform() + offset) # title ax.set_title(key, fontsize=7) # add colorbar cbar = add_colorbar(fig, ax, x=.9, y=.42, length=.013, width=.2, vmin=0, vmax=1, label='Normalized\ncosine similarity', cmap=cmap, orientation='vertical') cbar.ax.tick_params(axis='y', direction='out') cbar.ax.yaxis.label.set_ha('center') cbar.ax.yaxis.label.set_va('top') cbar.ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(5)) fig.subplots_adjust(hspace=.2, wspace=0) fig.suptitle('Corpus similarity', fontsize=7, y=.96) # save save_fig(fig, 'fig3d', version=1, plot_formats=['png', 'pdf'], dpi=800)
def main(): df_bert = read_bert_data() df_fasttext = read_fasttext_data() s_date = datetime(2017, 7, 1, tzinfo=pytz.utc) e_date = datetime(2020, 10, 24, tzinfo=pytz.utc) fig, axes = plt.subplots(2, 1, figsize=(3.5, 3.5), sharex=True, sharey=True) for ax, df, title in zip(axes, [df_fasttext, df_bert], ['FastText', 'BERT']): df = df[s_date:e_date] df = df.reset_index().melt(id_vars=['created_at'], var_name='trained_at', value_name='sentiment') df = select_intervals(df, s_date, e_date) palette = sns.color_palette('inferno', n_colors=df.trained_at.nunique()) # plot sns.lineplot(x='created_at', y='sentiment', hue='trained_at', palette=palette, data=df, hue_order=sorted(df.trained_at.unique()), solid_capstyle='round', legend=ax == axes[0], ax=ax) # legend if ax == axes[0]: handles, labels = ax.get_legend_handles_labels() leg = ax.legend(handles, labels, loc='center left', bbox_to_anchor=(1.05, 0), borderaxespad=0., frameon=False, title='Trained at') leg._legend_box.align = "left" # formatting ax.grid(True) ax.xaxis.set_minor_locator(mdates.MonthLocator()) ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1, 7])) ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) ax.set_title(title, pad=4) # labels ax.xaxis.label.set_visible(False) ax.yaxis.label.set_visible(False) ax.set_ylim((0, .83)) fig.subplots_adjust(hspace=.15) fig.text(.02, .5, 'Sentiment index $s$', rotation=90, ha='left', va='center') # save save_fig(plt.gcf(), 'fig4', version=1, plot_formats=['png', 'pdf'])
def plot_fig2(): df_fasttext = read_data_fasttext() df_bert = read_data_bert() # constants ms = 2 lw = .8 ms_square = 2.5 metric = 'f1_macro' # plot stuff fig, all_axes = plt.subplots(2, 2, sharex=True, figsize=(3.5, 3)) for i, (df, (ax1, ax2), title) in enumerate( zip([df_fasttext, df_bert], [[all_axes[0][i], all_axes[1][i]] for i in range(len(all_axes))], ['FastText', 'BERT'])): palette = sns.color_palette('inferno', n_colors=df.centroid_day_train.nunique()) # compute drift score df = compute_concept_drift_score(df, metric=metric) # train markers df_markers = df.groupby(['centroid_day_train', 'centroid_day' ])[[metric, 'rel_concept_drift' ]].mean().reset_index().copy() df_markers = df_markers[df_markers.centroid_day_train == df_markers.centroid_day] # performance score df = df[[ 'centroid_day_train', 'centroid_day', metric, 'repeat', 'rel_concept_drift' ]] df['train_day'] = df['centroid_day_train'].apply( lambda s: s.strftime('%Y-%m-%d')) df = df.sort_values(['train_day', 'centroid_day']) sns.lineplot(x='centroid_day', y=metric, hue='train_day', ci=95, err_style='band', marker='o', lw=lw, ms=ms, mec='none', palette=palette, data=df, legend=i == 1, ax=ax1) sns.lineplot(x='centroid_day', y=metric, hue='centroid_day_train', ci=None, lw=0, marker='s', mec='none', ms=ms_square, palette=palette, data=df_markers, ax=ax1, legend=False) # model drift score sns.lineplot(x='centroid_day', y='rel_concept_drift', hue='centroid_day_train', data=df, palette=palette, marker='o', lw=lw, ms=ms, mec='none', legend=False, ax=ax2) sns.lineplot(x='centroid_day', y='rel_concept_drift', hue='centroid_day_train', ci=None, lw=0, marker='s', mec='none', ms=ms_square, palette=palette, data=df_markers, ax=ax2, legend=False) # axis labels if i == 0: ax1.set_ylabel('F1-macro') ax2.set_ylabel('% Relative\nperforamnce change') else: ax1.yaxis.label.set_visible(False) ax2.yaxis.label.set_visible(False) ax1.yaxis.set_ticklabels([]) ax2.yaxis.set_ticklabels([]) ax2.locator_params(axis='y', nbins=5) ax1.set_ylim((.32, .65)) ax2.set_ylim((-28, 13)) # titles ax1.set_title(title) # common formatting for ax in [ax1, ax2]: ax.grid(True) ax.xaxis.set_minor_locator(mdates.MonthLocator()) ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1])) ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) # lims ax.set_xlim((datetime(2018, 7, 1), datetime(2020, 9, 1))) ax.xaxis.label.set_visible(False) # create legends if i == 1: handles, labels = ax1.get_legend_handles_labels() legend_opts = dict(loc='center left', bbox_to_anchor=(1.1, .2), borderaxespad=0., handlelength=1, handletextpad=.8, frameon=False) leg = ax1.legend(handles, labels, title='Trained on data up to', **legend_opts) leg._legend_box.align = "left" leg2 = [ Line2D([0], [0], lw=0, marker='s', mec='none', ms=3, color=palette[0], label='Train & evaluate'), Line2D([0], [0], lw=0, marker='o', mec='none', ms=3, color=palette[0], label='Evaluate') ] ax2.legend(handles=leg2, **legend_opts) fig.subplots_adjust(hspace=.1, wspace=.08) # save save_fig(plt.gcf(), 'fig2', version=1, plot_formats=['png', 'pdf'])
def main(): # read data df = read_data() df = df.set_index('centroid_day') # first datapoint was skipped (by accident) df = df.iloc[1:] # plot fig, ax = plt.subplots(1, 1, figsize=(3, 1.8)) # compute baselines df['b1_train'] = 0 df['b1_test'] = 0 df['train'] = 0 df['test'] = 0 # b1 train df.loc[df.iloc[1:5].index, 'b1_train'] = num_train_samples_per_bin # b1 test df.loc[df.iloc[4:].index, 'b1_test'] = num_test_samples_per_bin # train df.loc[df.iloc[:1].index, 'train'] = num_train_samples_per_bin df.loc[df.iloc[5:].index, 'train'] = num_train_samples_per_bin # test df.loc[df.iloc[3:4].index, 'test'] = num_test_samples_per_bin df['other'] = df['b1_train'] + df['b1_test'] + df['train'] + df['test'] df['total'] = df['all'].copy() df['all'] -= df['other'] # plot width = 40 ax.bar(df.index.to_pydatetime(), df['train'].values.tolist(), width=width, color='C0', label=f'Train') ax.bar(df.index.to_pydatetime(), df['test'].values.tolist(), bottom=df['train'] + df['b1_train'], color='C3', width=width, label=f'Eval') ax.bar(df.index.to_pydatetime(), df['b1_train'].values.tolist(), width=width, color='C0', hatch=6 * '/', label='Training data $b_1$', ec='white') ax.bar(df.index.to_pydatetime(), df['b1_test'].values.tolist(), bottom=df['b1_train'] + df['train'], width=width, color='C3', hatch=6 * '/', label='Eval datasets $b_1$', ec='white') ax.bar(df.index.to_pydatetime(), df['all'].values.tolist(), bottom=df['other'].values.tolist(), color='.8', width=width, label='Unused') # annotate ax.annotate('$b_0$', (df.index[3], df.iloc[3].total), ha='center', va='bottom', xytext=(0, 1), textcoords='offset points') ax.annotate('$b_1$', (df.index[4], df.iloc[4].total), ha='center', va='bottom', xytext=(0, 1), textcoords='offset points') ax.annotate('$b_{8}$', (df.index[11], df.iloc[11].total), ha='center', va='bottom', xytext=(0, 1), textcoords='offset points') # legend leg = plt.legend(loc='center left', title='Annotation type', bbox_to_anchor=(1.05, .5), frameon=False, handleheight=.4, handlelength=1.2) leg._legend_box.align = "left" # tick frequency ax.xaxis.set_minor_locator(mdates.MonthLocator()) ax.xaxis.set_major_locator(mdates.MonthLocator(bymonth=[1])) ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=400)) # tick direction ax.tick_params(axis='x', direction='out', which='minor', zorder=2, size=2) ax.tick_params(axis='x', direction='out', which='major', zorder=2, size=4) ax.set_ylim((0, 1500)) ax.set_xlim((datetime(2017, 10, 1), datetime(2020, 9, 1))) ax.grid(True) # annotations ts = ax.transAxes coords = ts.transform([-0.092, -0.065]) tr = mpl.transforms.Affine2D().rotate_deg_around(*coords, 90) t = ts + tr brace = curly_brace(x=.1, y=.1, width=.03, height=.54, lw=.5, pointing='right', transform=t, color='.15') ax.add_artist(brace) ax.text(.25, .85, 'training window\nfor $b_1$', ha='center', va='bottom', transform=ts, fontsize=7) # labels ax.set_ylabel('Number of annotations') # cosmetics sns.despine() # save save_fig(fig, 'fig1', version=1, plot_formats=['png', 'pdf'])