NUM_PARTS = 16 # make equal-sized partitions corresponding to approximately equal sized age bins age_bin2tokens_ = make_age_bin2data(CORPUS_NAME) age_bin2tokens = make_age_bin2data_with_min_size(age_bin2tokens_) num_bins = len(age_bin2tokens) WSPACE = 0.0 HSPACE = 0.0 WPAD = 0.0 HPAD = 0.0 PAD = 0.2 y = [] for part in age_bin2tokens.values(): y.append(mtld(part)) # fig _, ax = plt.subplots(figsize=(6, 4), dpi=configs.Fig.dpi) ax.set_ylabel('Measure of\nLexical Textual Diversity', fontsize=configs.Fig.ax_fontsize) ax.set_xlabel('Partition', fontsize=configs.Fig.ax_fontsize) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top=False, right=False) # plot x = np.arange(num_bins) + 1 ax.plot(x, y, '-') plot_best_fit_line(ax, x, y, x_pos=0.70, y_pos=0.1) plt.show()
plt.title(f'{CORPUS_NAME}\nnum_types={num_types:,}, part {part_id + 1} of {num_bins}') ax.set_xlabel('mean') ax.set_ylabel('std') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top=False, right=False) # plot ax.text(x=1.0, y=0.3, s='Taylor\'s exponent: {:.3f}'.format(alpha)) ax.loglog(x, y, '.', markersize=2) ax.loglog(x, amp * (x ** alpha) + 0, '.', markersize=2) plt.show() taylor_exponents.append(alpha) # fig _, ax = plt.subplots(figsize=(6, 4), dpi=configs.Fig.dpi) ax.set_ylabel('Taylor Exponent', fontsize=configs.Fig.ax_fontsize) ax.set_xlabel('Corpus Partition', fontsize=configs.Fig.ax_fontsize) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.tick_params(axis='both', which='both', top=False, right=False) # plot x = np.arange(num_bins) + 1 ax.plot(x, taylor_exponents, '-') plot_best_fit_line(ax, x, taylor_exponents, x_pos=0.70, y_pos=0.1) plt.show()