def main( experiment_name, im_ext='.pdf', transform_loss=None, # 'log', colors='Paired', flip_axis=False, port_fwd=False, num_steps=np.inf, exclude=None, list_experiments=False, out_dir='analysis_data'): """Plot results of provided experiment name.""" config = Config() if list_experiments: db.list_experiments() return if port_fwd: config.db_ssh_forward = True py_utils.make_dir(out_dir) # Get experiment data if ',' in experiment_name: exps = experiment_name.split(',') perf = [] for exp in exps: perf += db.get_performance(experiment_name=exp) experiment_name = exps[0] else: perf = db.get_performance(experiment_name=experiment_name) if len(perf) == 0: raise RuntimeError('Could not find any results.') structure_names = [x['model'].split('/')[-1] for x in perf] datasets = [x['val_dataset'] for x in perf] steps = [float(x['step']) for x in perf] training_loss = [float(x['train_loss']) for x in perf] validation_loss = [float(x['val_loss']) for x in perf] training_score = [float(x['train_score']) for x in perf] validation_score = [float(x['val_score']) for x in perf] summary_dirs = [x['summary_path'] for x in perf] ckpts = [x['ckpt_path'] for x in perf] params = [x['num_params'] for x in perf] lrs = [x['lr'] for x in perf] # Pass data into a pandas DF df = pd.DataFrame(np.vstack( (structure_names, datasets, steps, params, training_loss, training_score, validation_loss, validation_score, summary_dirs, ckpts, lrs)).transpose(), columns=[ 'model names', 'datasets', 'training iteration', 'params', 'training loss', 'training accuracy', 'validation loss', 'validation accuracy', 'summary_dirs', 'checkpoints', 'lrs' ]) df['training loss'] = pd.to_numeric(df['training loss'], errors='coerce') df['validation accuracy'] = pd.to_numeric(df['validation accuracy'], errors='coerce') df['training accuracy'] = pd.to_numeric(df['training accuracy'], errors='coerce') df['training iteration'] = pd.to_numeric(df['training iteration'], errors='coerce') df['params'] = pd.to_numeric(df['params'], errors='coerce') df['lrs'] = pd.to_numeric(df['lrs'], errors='coerce') # Plot TTA dfs = [] print(len(df)) uni_structure_names = np.unique(structure_names) max_num_steps = num_steps # (20000 / 32) * num_epochs # min_num_steps = 1 for m in tqdm(uni_structure_names, total=len(uni_structure_names)): it_df = df[df['model names'] == m] it_df = it_df[it_df['training iteration'] < max_num_steps] # sorted_df = it_df.sort_values('training loss') # max_vals = sorted_df.groupby(['datasets']).first() sorted_df = [] different_models = np.unique(it_df['summary_dirs']) num_models = len(different_models) for model in different_models: # Grab each model then sort by training iteration sel_data = it_df[it_df['summary_dirs'] == model] sel_data = sel_data.sort_values('training iteration') # Smooth the sorted validation scores for tta sel_data['tta'] = ndimage.gaussian_filter1d( sel_data['validation accuracy'], 3) sel_data['num_runs'] = num_models sorted_df += [sel_data] sorted_df = pd.concat(sorted_df) dfs += [sorted_df] # Get max scores and TTAs dfs = pd.concat(dfs) scores = dfs.groupby(['lrs', 'datasets', 'model names'], as_index=False).max() # skipna=True) losses = dfs.groupby(['lrs', 'datasets', 'model names'], as_index=False).min() # skipna=True) ttas = dfs.groupby(['lrs', 'datasets', 'model names'], as_index=False).mean() # skipna=True) # Combine into a single DF print('Sort by val loss, then validate each (make a new dataloader)') scores['tta'] = ttas['validation accuracy'] scores['validation loss'] = losses['validation loss'] # Save datasets to csv filename = 'raw_data_%s.csv' % experiment_name dfs.to_csv(os.path.join(out_dir, filename)) filename = 'scores_%s.csv' % experiment_name scores.to_csv(os.path.join(out_dir, filename)) # Save an easy-to-parse csv for test datasets and fix for automated processing trim_ckpts, trim_models = [], [] for idx in range(len(scores)): ckpt = scores.iloc[idx]['checkpoints'] ckpt = '%s-%s' % (ckpt, ckpt.split('.')[0].split('_')[-1]) model = scores.iloc[idx]['model names'] trim_ckpts += [ckpt] trim_models += [model] # trimmed_ckpts = pd.DataFrame(trim_ckpts, columns=['checkpoints']) # trimmed_models = pd.DataFrame(trim_models, columns=['model']) trimmed_ckpts = pd.DataFrame(trim_ckpts) trimmed_models = pd.DataFrame(trim_models) trimmed_ckpts.to_csv( os.path.join(out_dir, 'checkpoints_%s.csv' % experiment_name)) trimmed_models.to_csv( os.path.join(out_dir, 'models_%s.csv' % experiment_name)) # Add indicator variable to group different model types during plotting scores['model_idx'] = 0 model_groups = ['fgru', 'resnet', 'unet', 'hgru'] for idx, m in enumerate(model_groups): scores['model_idx'][scores['model names'].str.contains( m, regex=False)] = idx keep_groups = np.where(~np.in1d(model_groups, 'hgru'))[0] scores = scores[scores['model_idx'].isin(keep_groups)] # Print scores to console print scores # Create max accuracy plots and aggregated dataset num_groups = len(keep_groups) # agg_df = [] f = plt.figure() sns.set(context='paper', font='Arial', font_scale=.5) sns.set_style("white") sns.despine() count = 1 for idx in keep_groups: plt.subplot(1, num_groups, count) sel_df = scores[scores['model_idx'] == idx] # sel_df = sel_df.groupby( # ['datasets', 'model names'], as_index=False).aggregate('max') # agg_df += [sel_df] sns.pointplot(data=sel_df, x='datasets', y='validation accuracy', hue='model names') plt.ylim([0.4, 1.1]) count += 1 plt.savefig(os.path.join(out_dir, 'max_%s.png' % experiment_name), dpi=300) filename = 'agg_data_%s.csv' % experiment_name # agg_df = pd.concat(agg_df) # agg_df.to_csv(os.path.join(out_dir, filename)) plt.close(f) # Create tta plots f = plt.figure() sns.set(context='paper', font='Arial', font_scale=.5) sns.set_style("white") sns.despine() count = 1 for idx in keep_groups: plt.subplot(1, num_groups, count) sel_df = scores[scores['model_idx'] == idx] # sel_df = sel_df.groupby( # ['datasets', 'model names'], as_index=False).aggregate('mean') sns.pointplot(data=sel_df, x='datasets', y='tta', hue='model names') plt.ylim([0.4, 1.1]) count += 1 plt.savefig(os.path.join(out_dir, 'tta_%s.png' % experiment_name), dpi=300) plt.close(f)
def main( experiment_name, im_ext='.pdf', transform_loss=None, # 'log', colors='Paired', flip_axis=False, exclude=None): """Plot results of provided experiment name.""" config = Config() pl_creds = credentials.plotly_credentials() py.sign_in( pl_creds['username'], pl_creds['api_key']) # Get experiment data perf = db.get_performance(experiment_name=experiment_name) if len(perf) == 0: raise RuntimeError('Could not find any results.') structure_names = [x['model_struct'].split('/')[-1] for x in perf] optimizers = [x['optimizer'] for x in perf] lrs = [x['lr'] for x in perf] datasets = [x['dataset'] for x in perf] loss_funs = [x['loss_function'] for x in perf] optimizers = [x['optimizer'] for x in perf] wd_types = [x['regularization_type'] for x in perf] wd_penalties = [x['regularization_strength'] for x in perf] steps = [float(x['training_step']) for x in perf] training_loss = [float(x['training_loss']) for x in perf] validation_loss = [float(x['validation_loss']) for x in perf] timesteps = [0. if x['timesteps'] is None else float(x['timesteps']) for x in perf] u_t = [0. if x['u_t'] is None else float(x['u_t']) for x in perf] q_t = [0. if x['q_t'] is None else float(x['q_t']) for x in perf] p_t = [0. if x['p_t'] is None else float(x['p_t']) for x in perf] t_t = [0. if x['t_t'] is None else float(x['t_t']) for x in perf] # Pass data into a pandas DF model_params = [ '%s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s | %s' % ( ipa, ipb, ipc, ipd, ipe, ipf, ipg, iph, ipi, ipj, ipk, ipl, ipm) for ipa, ipb, ipc, ipd, ipe, ipf, ipg, iph, ipi, ipj, ipk, ipl, ipm in zip( structure_names, optimizers, lrs, loss_funs, optimizers, wd_types, wd_penalties, datasets, timesteps, u_t, q_t, p_t, t_t)] # DF and plot df = pd.DataFrame( np.vstack( ( model_params, steps, training_loss, validation_loss ) ).transpose(), columns=[ 'model parameters', 'training iteration', 'training loss', 'validation loss' ] ) df['training iteration'] = pd.to_numeric( df['training iteration'], errors='coerce') df['training loss'] = pd.to_numeric(df['training loss'], errors='coerce') if exclude is not None: exclusion_search = df['model parameters'].str.contains(exclude) df = df[exclusion_search == False] print 'Removed %s rows.' % exclusion_search.sum() # Start plotting experiment_dict = experiments.experiments()[experiment_name]() print 'Plotting results for dataset: %s.' % experiment_dict['dataset'][0] dataset_module = py_utils.import_module( model_dir=config.dataset_info, dataset=experiment_dict['dataset'][0]) dataset_module = dataset_module.data_processing() # hardcoded class name if transform_loss is None: loss_label = '' elif transform_loss == 'log': loss_label = ' log loss' df['training loss'] = np.log(df['training loss']) elif transform_loss == 'max': loss_label = ' normalized (x / max(x)) ' df['training loss'] /= df.groupby( 'model parameters')['training loss'].transform(max) if ['loss_function'] in experiment_dict.keys(): loss_metric = experiment_dict['loss_function'][0] else: loss_metric = dataset_module.default_loss_function df['validation loss'] = pd.to_numeric(df['validation loss']) if loss_metric == 'pearson': loss_label = 'Pearson correlation' + loss_label elif loss_metric == 'l2': loss_label = 'L2' + loss_label else: loss_label = 'Classification accuracy (%)' df['validation loss'] *= 100. if ['score_metric'] in experiment_dict.keys(): score_metric = experiment_dict['score_metric'] else: score_metric = dataset_module.score_metric if score_metric == 'pearson': y_lab = 'Pearson correlation' matplotlib.style.use('ggplot') plt.rc('font', size=6) plt.rc('legend', fontsize=8, labelspacing=3) f, axs = plt.subplots(2, figsize=(20, 30)) ax = axs[1] NUM_COLORS = len(df['model parameters'].unique()) cm = plt.get_cmap('gist_rainbow') ax.set_color_cycle([cm(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]) for k in df['model parameters'].unique(): tmp = df[df['model parameters'] == k] tmp = tmp.sort('training iteration') ax = tmp.plot( x='training iteration', y='training loss', label=k, kind='line', ax=ax, logy=False) plt.setp(ax.xaxis.get_majorticklabels(), rotation=30) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_title('Training') ax.set_ylabel(loss_label) # ax.legend_.remove() ax = axs[0] ax.set_color_cycle([cm(1.*i/NUM_COLORS) for i in range(NUM_COLORS)]) for k in df['model parameters'].unique(): tmp = df[df['model parameters'] == k] tmp = tmp.sort('training iteration') ax = tmp.plot( x='training iteration', y='validation loss', label=k, kind='line', ax=ax, logy=False) plt.setp(ax.xaxis.get_majorticklabels(), rotation=30) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_title('Validation') # TODO: Mine the experiment declarations for the appropos metric name. ax.set_ylabel(y_lab) # ax.legend_.remove() out_name = os.path.join( config.plots, '%s_%s%s' % ( experiment_name, py_utils.get_dt_stamp(), im_ext)) plt.savefig(out_name) print 'Saved to: %s' % out_name plotly_fig = tls.mpl_to_plotly(f) plotly_fig['layout']['autosize'] = True # plotly_fig['layout']['showlegend'] = True plot_with_plotly(plotly_fig, 'line') plt.close(f) # Plot max performance bar graph f = plt.figure() max_perf = df.groupby( ['model parameters'], as_index=False)['validation loss'].max() plt.rc('xtick', labelsize=2) ax = max_perf.plot.bar( x='model parameters', y='validation loss', legend=False) plt.tight_layout() ax.set_title('Max validation value') ax.set_ylabel(y_lab) out_name = os.path.join( config.plots, '%s_%s_bar%s' % ( experiment_name, py_utils.get_dt_stamp(), im_ext)) plt.savefig(out_name) print 'Saved to: %s' % out_name try: plotly_fig = tls.mpl_to_plotly(f) plot_with_plotly(plotly_fig, chart='bar') except Exception as e: print 'Failed to plot bar chart in plotly: %s' % e plt.close(f)