def plot_selections_per_epoch( data_file='results/update_grades_over_bach_chorales.csv', plt_dir='plots/augmented-generation/'): """ plot number of selections each epoch """ thres = get_threshold( data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv', column='grade', aggregate='75p', ) data_dict = read_training_data(data_file=data_file, feature='grade') picked = [ np.sum([1 for x in data if x < thres]) for data in data_dict.values() ] plt.figure() fig, ax = plt.subplots() plt.style.use('seaborn-whitegrid') ax.grid(False) rects = plt.bar(range(1, len(picked) + 1), picked) label_bars(rects) plt.xlabel('Epoch') plt.ylabel('Number of generations passing threshold') plt.title('Number of Generations Passing Threshold in Each Epoch') plt.savefig( os.path.join(plt_dir, 'generations_passing_threshold_per_epoch.png'))
def plot_histogram_per_iteration(data_file, feature='grade', plt_dir='plots/augmented-generation/', threshold=None): """ visualize model updates by plotting histogram for grade distribution at each iteration """ # read update data as dictionary data_dict = read_training_data(data_file=data_file, feature=feature, threshold=threshold) plt.figure(figsize=(20, 10)) plt.style.use('seaborn-whitegrid') for it, data in data_dict.items(): plt.subplot(2, 5, it) plt.hist(data, alpha=0.7) plt.xlabel(feature) plt.title(f'Iteration {it}') plt.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.suptitle( f'{feature} Distribution of Generations at Each Iteration of Training', fontsize=20) ensure_dir(plt_dir) plt.savefig(os.path.join(plt_dir, f'{feature}_update_dist.png')) plt.close()
def main(): bach_data = list(pd.read_csv(f'{bach_dir}/grades.csv')['grade']) # aug_gen_data = list(read_training_data(f'{aug_gen_dir}/grades.csv', feature='grade')[20]) # base_data = list(read_training_data(f'{base_dir}/grades.csv', feature='grade')[17]) baseline_data = list( read_training_data(f'{baseline_dir}/grades.csv', feature='grade')[17]) aug_gen_df = pd.read_csv(f'{aug_gen_dir}/351_mocks/grades.csv') aug_gen_data = aug_gen_df['grade'] base_df = pd.read_csv(f'{base_dir}/351_mocks/grades.csv') base_data = base_df['grade'] base_data = [x for x in base_data if x < 50] baseline_data = [x for x in baseline_data if x < 50] data_dict = { 'Bach': bach_data, 'Aug-Gen\n' + r'($t=Q_3$ of Bach grades)': aug_gen_data, 'Baseline-none\n' + r'($t=-\infty$)': base_data, 'Baseline-all\n' + r'($t=\infty$)': baseline_data, } for model, data in data_dict.items(): print(model) print(np.median(data)) print(np.std(data)) plot_violinplots( data_dict=data_dict, plt_title='Grade Distribution of Generations from Different Models', plt_dir='plots/')
def plot_boxplot_per_epoch( data_file='results/update_grades_over_bach_chorales.csv', feature='grade', plt_dir='plots/augmented-generation/', threshold=None): """ Arguments data_file: file containing upgrade grades feature: feature of interest (either overall grade or a feature distance) plt_dir: directory to save plots threshold: lower threshold for inclusion visualize model updates by plotting boxplot for grade distribution at each epoch """ # read update data as dictionary data_dict = read_training_data(data_file=data_file, feature=feature) # plot plt.figure() plt.style.use('seaborn-whitegrid') plt.rc('xtick', labelsize=11) plt.rc('ytick', labelsize=11) plt.rc('axes', titlesize=13) fig, ax = plt.subplots(figsize=(8, 5)) ax.xaxis.grid(False) ax.boxplot(list(data_dict.values())) ax.set_xticks([i + 1 for i in data_dict.keys()]) ax.set_xticklabels([str(i) for i in data_dict.keys()]) for label in ax.get_xaxis().get_ticklabels()[1::2]: label.set_visible(False) ylabel0 = ax.get_yaxis().get_ticklabels()[0] ylabel0.set_visible(False) plt.xlabel('Epoch') plt.title( f'{feature.capitalize()} Distribution of Generations During Aug-Gen Training' ) plt.text(-2.2, 1, 'better') plt.text(-2.2, 47, 'worse') plt.ylabel(feature.capitalize()) plt.ylim([0, 49.15]) threshold = get_threshold( data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv', column='grade', aggregate='75p', ) plt.axhline(y=threshold, color='steelblue', linestyle='-.', label=r'$Q_3$' + ' of Bach grades') plt.legend(loc='upper right') ensure_dir(plt_dir) fig.tight_layout() plt.savefig(os.path.join(plt_dir, f'{feature}_update_boxplots.png'))
def plot_median_grade_per_epoch(dir_dict, num_epochs): median_dict = defaultdict(lambda: [0] * num_epochs) for model_label, model_path in dir_dict.items(): data_dict = read_training_data(data_file=f'{model_path}/grades.csv', feature='grade') for epoch, grades in data_dict.items(): if epoch < num_epochs: median_dict[model_label][epoch] = np.median(grades) plt.figure() plt.style.use('seaborn-whitegrid') fig, ax = plt.subplots() ax.grid(False) thres = get_threshold( data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv', column='grade', aggregate='75p', ) plt.axhline(y=thres, dashes=(2, 2), label='Lowest Bach\ngrade threshold', color=PLOT_COLORS['bach']) xlim = range(num_epochs) for model_label, median_grades in median_dict.items(): plt.plot(xlim, median_grades[:num_epochs], label=PLOT_LABELS[model_label], color=PLOT_COLORS[model_label]) plt.title('Median Grade of Generations During Training') ax.set_xticks([i + 1 for i in xlim]) ax.set_xticklabels([str(i) for i in xlim]) for label in ax.get_xaxis().get_ticklabels()[1::2]: label.set_visible(False) # plt.legend(loc='right') handles, labels = ax.get_legend_handles_labels() lgd = ax.legend(handles, labels, loc='upper center', bbox_to_anchor=(-0.2, 0.5)) plt.ylabel('Grade') plt.xlabel('Epoch') plt.savefig('plots/median_grades_per_epoch.png', bbox_inches='tight')