def save_trial_log(dest_dir, sim_conf_filename, model_name, specific_params, is_baseline=False): """ Find the last DTR log produced in the trial (if any exist) and move it to the directory """ all_logs = glob.glob(os.path.join(os.getcwd(), '*.log')) if not all_logs: return # if we delete all logs in advance, there should be at most one log assert len(all_logs) == 1 most_recent = all_logs[0] # rename and move # (new name just appends info to the old one) batch_size = specific_params['batch_size'] budget = specific_params['memory_budget'] if budget < 0: budget = 'inf' new_name = '{}-{}-{}-{}'.format(model_name, batch_size, budget, os.path.basename(most_recent)) filename = prepare_out_file(dest_dir, new_name) os.rename(most_recent, filename) if is_baseline and sim_conf_filename is not None: extend_simrd_config(dest_dir, sim_conf_filename, model_name, specific_params, filename)
def run_baseline(model, exp_config, config, config_dir, output_dir): ''' Run a baseline triral and obtain memory usage. This is used for getting a reference memory usage for DTR `ratio` commands ''' baseline_config = { 'batch_size' : exp_config['batch_size'], 'timeout': exp_config.get('timeout', 60), # only doing a minimal number of runs because we are only getting the memory usage, # which should be identical between runs 'n_reps': 10, 'extra_params': exp_config.get('extra_params', {}) } if 'input_params' in exp_config: baseline_config['input_params'] = exp_config['input_params'] filename = str(time.time()) + '.json' temp_file = prepare_out_file(os.getcwd(), filename) success, msg = run_trials(config_dir, python_command('baseline', config), 'baseline', model, baseline_config, exp_config.get('n_inputs', config['n_inputs']), output_dir, report_errors=config['report_errors'], append_to_csv=False, trial_run=True, trial_run_outfile=temp_file, sync_gpu=config['sync_gpu']) if not success: return False, 'Error while running baseline trial: \n{}'.format(msg) mem_usage = read_json(output_dir, temp_file) os.remove(temp_file) if 'mem' not in mem_usage: return False, 'failed to get baseline memory usage' return True, mem_usage['mem']
def run_baseline(model, exp_config, config, config_dir, output_dir): ''' Run a baseline triral and obtain memory usage. This is used for getting a reference memory usage for DTR `ratio` commands ''' baseline_config = {'batch_size': exp_config['batch_size']} if 'extra_params' in exp_config: baseline_config['extra_params'] = exp_config['extra_params'] filename = str(time.time()) + '.json' temp_file = prepare_out_file(os.getcwd(), filename) success, msg = run_trials(config_dir, python_command('baseline', config), 'baseline', model, baseline_config, config['n_inputs'], config['n_reps'], output_dir, report_errors=config['report_errors'], append_to_csv=False, trial_run=True, trial_run_outfile=temp_file) if not success: return False, 'Error while running baseline trial: \n{}'.format(msg) mem_usage = read_json(output_dir, temp_file) os.remove(temp_file) if 'mem' not in mem_usage: return False, 'failed to get baseline memory usage' return True, mem_usage['mem']
def render_fixed(model_name, output_dir, x_axis, dtr_entries, failed_trials): if not (dtr_entries or failed_trials): return (True, 'nothing to render') filename = prepare_out_file( output_dir, f'{name_dict.get(model_name, model_name)}-fixed-gpu-time.png') try: plt.clf() plt.style.use('seaborn-paper') plt.rcParams["font.size"] = 30 fig = plt.figure() fig.add_subplot(111, frameon=False) fig.set_size_inches(12, 7) plt.xticks(fontsize=13) plt.yticks(fontsize=13) plt.xlabel('Memory Budget (MB)', fontsize=15, labelpad=10) plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10) plt.title(f'{name_dict.get(model_name, model_name)} GPU Time', fontsize=18) plt.grid(True) ax = plt.gca() if dtr_entries: lin, = ax.plot(x_axis, dtr_entries, color=color_scheme.get(model_name, 'black'), linewidth=4) mk, = ax.plot(x_axis, dtr_entries, label=name_dict.get(model_name, model_name), linewidth=4, marker=marker_scheme.get(model_name, '+'), ms=12, alpha=.6, color=color_scheme.get(model_name, 'black')) ax.legend([(lin, mk)], ['merged']) if failed_trials: plt.axvline(x=max(failed_trials), color=color_scheme.get(model_name, 'black'), linestyle='dashed') plt.legend(bbox_to_anchor=(0.5, 0.01), loc='lower center', bbox_transform=fig.transFigure, ncol=7, borderaxespad=0, prop={'size': 15}) plt.tight_layout() plt.savefig(filename, bbox_inches='tight') return (True, 'success') except Exception as e: raise e return (False, render_exception(e))
def render_field(model_name, output_dir, title, filename, x_label, y_label, x_axis, baseline_entries, dtr_entries, failed_trials, confidence=None, suptitle=''): if not (dtr_entries or baseline_entries or failed_trials): return (True, 'nothing to render') file = prepare_out_file(output_dir, filename) try: # min_x = min(*(x_axis + failed_trials)) # max_x = max(*(x_axis + failed_trials)) ax = plt.gca() if dtr_entries: lin, = ax.plot(x_axis, dtr_entries, color=COLOR_SCHEME.get(model_name, 'black'), linewidth=4) mk, = ax.plot(x_axis, dtr_entries, label=NAME_DICT.get(model_name, model_name), linewidth=4, marker=MARKER_SCHEME.get(model_name, '+'), ms=12, alpha=.6, color=COLOR_SCHEME.get(model_name, 'black')) if confidence: render_errorbars(ax, x_axis, dtr_entries, confidence) ax.legend([(lin, mk)], ['merged']) # if baseline_entries: # plt.hlines(y=baseline_entries[0], xmin=min_x, xmax=max_x, linewidth=3, # label='Baseline', color='blue', linestyles='dashed') if failed_trials: plt.axvline(x=max(failed_trials), color=COLOR_SCHEME.get(model_name, 'black'), linestyle='dashed') # fig = plt.legend().figure # fig.savefig(file) return (True, 'success') except Exception as e: raise e return (False, 'Exception encountered while rendering graph: {}'.format( render_exception(e)))
def extend_simrd_config(dest_dir, sim_conf_filename, model_name, specific_params, log_name): if not check_file_exists(dest_dir, sim_conf_filename): prepare_out_file(dest_dir, sim_conf_filename) write_json(dest_dir, sim_conf_filename, dict()) conf = read_json(dest_dir, sim_conf_filename) if model_name not in conf: conf[model_name] = [] conf[model_name].append({ 'name': model_util.get_model_family(model_name), 'batch_size': str(specific_params['batch_size']), 'layers': specific_params.get('layers', model_util.get_model_layers(model_name)), 'type': model_util.get_model_type(model_name), 'log': log_name, 'has_start': True }) write_json(dest_dir, sim_conf_filename, conf)
def extend_simrd_config(dest_dir, sim_conf_filename, model_name, specific_params, log_name): import model_util if not check_file_exists(dest_dir, sim_conf_filename): prepare_out_file(dest_dir, sim_conf_filename) write_json(dest_dir, sim_conf_filename, dict()) conf = read_json(dest_dir, sim_conf_filename) if model_name not in conf: conf[model_name] = [] name = model_util.format_model_name(model_name, specific_params) conf[model_name].append({ 'name': name, 'title': name, 'desc': model_util.format_input_description(model_name, specific_params), 'log': log_name, 'has_start': True }) write_json(dest_dir, sim_conf_filename, conf)
def run_trials(config_dir, python_cmd, experiment_name, model_name, specific_params, n_inputs, n_reps, path_prefix, report_errors=False, append_to_csv=False, trial_run=False, trial_run_outfile='', cmd_id=0, conf_cnt=0): """ Responsible for recording the time and max memory usage from running a model (the user must provide a lambda for actually running the model because different kinds of models need different kinds of setup and a lambda that generates an input for running that model) :params: trial_run: When set to true, no persistent experiment data will be saved. It is used to run a baseline trial and record how much memory is used then set the memory budget for `ratio` commands of DTR experiments trial_run_out_file: the temporary file that stores the memory usage data of the baseline run cmd_id: the command id for current model, starting from 0 by default conf_cnt: the id of confguration generated from `unfold_settings`; this is used for tracking which exact configuration that caused errors. """ try: cwd = os.getcwd() params_file = 'specific_params.json' try: write_json(cwd, params_file, specific_params) if not trial_run: filename = prepare_out_file( path_prefix, '{}-{}.csv'.format( get_report_prefix(experiment_name, specific_params, cmd_id), model_name)) mode = 'a' if append_to_csv else 'w' with open(filename, mode, newline='') as csvfile: writer = create_csv_writer(csvfile, specific_params) if not append_to_csv: writer.writeheader() else: filename = '' shared_dir = os.path.dirname(os.path.abspath(__file__)) run_script = os.path.join(shared_dir, 'run_torch_trial.py') for i in range(n_inputs): try: subprocess.run([ python_cmd, run_script, '--config-dir', config_dir, '--experiment-mode', experiment_name, '--model-name', model_name, '--input-idx', str(i), '--params-file', params_file, '--out-file', filename, '--trial-run', str(trial_run), '--trial-run-outfile', trial_run_outfile ], check=True, timeout=specific_params.get('timeout', 60)) except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: if not report_errors: raise e if trial_run: return (False, 'Baseline failed: {}'.format( render_exception(e))) log_error(experiment_name, model_name, specific_params, i, render_exception(e), path_prefix) return (True, 'successfully caught error') time.sleep(4) return (True, 'success') finally: os.remove(params_file) except Exception as e: return (False, 'Encountered exception on ({}, {}, {}):\n'.format( experiment_name, model_name, specific_params) + render_exception(e))
def save(self, dirname, filename): outfile = prepare_out_file(dirname, filename) plt.savefig(outfile, dpi=500, bbox_inches='tight') plt.close()
def render_graph(config, data, output_dir): try: plt.style.use('seaborn-paper') plt.rcParams["font.size"] = 30 fig = plt.figure() fig.add_subplot(111, frameon=False) fig.set_size_inches(12, 7) plt.xticks(fontsize=13) plt.yticks(fontsize=13) plt.xlabel('Memory Budget (Ratio)', fontsize=15, labelpad=10) plt.ylabel(r'Overhead Slow Down ($\times$)', fontsize=15, labelpad=10) plt.title('GPU Time Comparisons', fontsize=18) plt.grid(True) filename = prepare_out_file(output_dir, f'combined-comparison-ratio.png') metadata = {} for model in config['models']: dtr_dict = {} baseline_dict = {} stats = data[model] for stat in stats: if stat['specific_params']['type'] == 'baseline': baseline_dict = fill_data(baseline_dict, stat) else: dtr_dict = fill_data(dtr_dict, stat) metadata[model] = {'baseline': baseline_dict, 'dtr': dtr_dict} success, msg = traverse_field(metadata, 'ratio', lambda model, batch_size, dtr_dict, baseline_dict, output_dir:\ render_time_comparison(model, batch_size, 'ratio', dtr_dict[batch_size]['ratio'], baseline_dict.get(batch_size, {}), output_dir), output_dir) if not success: return (False, msg) plt.hlines(y=1, xmin=0.0, xmax=1.0, linewidth=3, label='Baseline', color='blue', linestyles='dashed') plt.legend(bbox_to_anchor=(0.5, 0.01), loc='lower center', bbox_transform=fig.transFigure, ncol=7, borderaxespad=0, prop={'size': 15}) plt.tight_layout() # plt.savefig(filename, bbox_inches = 'tight') plt.clf() plt.rcParams["font.size"] = 30 figure, axs = plt.subplots(2, 4, figsize=(20, 8)) # figure.set_size_inches(24, 12) axs = reversed(flatten(axs)) success, msg = traverse_field(metadata, 'fixed', lambda model, batch_size, dtr_dict, baseline_dict, output_dir:\ render_time_comparison(model, batch_size, 'fixed', dtr_dict[batch_size]['fixed'], baseline_dict.get(batch_size, {}), output_dir, plt_ax=next(axs)), output_dir) filename = prepare_out_file(output_dir, 'combined-breakdown-comparison.png') # figure.tight_layout() # plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) # plt.xlabel('Memory Budget (GiB)') # plt.ylabel("Time (ms)") figure.text(0.5, 0.02, r'\textbf{\Huge Memory Budget (GiB)}', ha='center') figure.text(0.09, 0.5, r'\textbf{\Huge Time (ms) / Batch}', ha='center', va='center', rotation='vertical') plt.legend(bbox_to_anchor=(0.17, 0.075), loc='upper left', bbox_transform=fig.transFigure, ncol=6, borderaxespad=0, prop={'size': 15}) # figure.tight_layout() # plt.tight_layout() # plt.tight_layout(h_pad=0.3) plt.subplots_adjust(hspace=0.4) plt.savefig(filename, bbox_inches='tight', pad_inches=0.4) if not success: return (False, msg) success, msg = render_throughput_breakdown(metadata, output_dir) if not success: return False, msg return (True, 'success') except Exception as e: raise e return (False, 'Exception encountered while rendering graphs: {}'.format( render_exception(e)))
def plot_model(model): filename = prepare_out_file(output_dir, f'throughput-comparison-{model}.png') plt.clf() plt.grid(True) plt.title(f'Throughput Comparison of {NAME_DICT.get(model, model)}') plt.xlabel('Batch Size', fontsize=15, labelpad=10) plt.ylabel('Throughput (Batch Size / Avg GPU Time (s))') num_batch_size = len(throughput_metadata[model]['dtr'].keys()) baseline_data = metadata[model]['baseline'] width = 0.15 ind = np.arange(num_batch_size) x_axis = list(sorted(throughput_metadata[model]['dtr'].keys())) # Wish we had currying !!! # If baseline data does not contain a batch size, then we fill 0 into the data, since it means baseline failed (OOMed) baseline_data = list( map(flip(throughput_metadata[model]['baseline'].get)(0), x_axis)) # Bar for baseline plt.bar(ind, [datum['throughput'] for datum in baseline_data], width, label='Baseline') dtr_data = {'throughput': {}, 'breakdown': {}} # Gather information collected # the structure of dtr_data: # Level 0: 'breakdown' | 'throughput' # Level 1: data dictionary | computed throughput (float) # Level 3: same as dictionaries processed in fill_data for x in x_axis: for datum in throughput_metadata[model]['dtr'][x]: if datum['memory_budget'] not in dtr_data['throughput']: dtr_data['throughput'][datum['memory_budget']] = [] dtr_data['breakdown'][datum['memory_budget']] = [] dtr_data['throughput'][datum['memory_budget']].append( datum['throughput'] if not datum['error'] else 0) dtr_data['breakdown'][datum['memory_budget']].append( dict(filter(lambda x: x[0] != 'throughput', datum.items()) ) if not datum['error'] else None) num_budget = len(dtr_data['throughput'].keys()) plt.xticks(ind + width * (num_budget / 2), map(str, x_axis)) for (i, (budget, throughput)) in enumerate( sorted(dtr_data['throughput'].items(), key=lambda x: -x[0])): plt.bar(ind + width * (i + 1), throughput, width, label=f'{round(budget * 1e-9, 1)} GiB') plt.legend(loc='best') plt.tight_layout() plt.savefig(filename, bbox_inches='tight') # Plot runtime profiling breakdown filename = prepare_out_file(output_dir, f'time-breakdown-{model}.png') plt.clf() plt.title(f'Runtime Breakdown of {NAME_DICT.get(model, model)}') plt.xlabel('Batch Size') plt.ylabel('Time / Batch (ms)') x_ticks_loc = { ind[i] + width * (num_budget / 2): '\n\n' + str(x_axis[i]) for i in range(num_batch_size) } plt.grid(True, axis='y') for (i, (budget, datum)) in enumerate( sorted(dtr_data['breakdown'].items(), key=lambda x: -x[0])): locs = ind + width * (i + 1) for loc in locs: x_tick = f'{round(budget * 1e-9, 1)}\nGiB' if loc in x_ticks_loc.keys(): x_tick += f'\n{x_ticks_loc[loc]}' x_ticks_loc[loc] = x_tick if datum is None: continue gathered_data = {key: [] for key in (timed_keys + ['cpu_time'])} gathered_data['dispatch_overhead'] = [] for e in datum: time_acc = 0 for key in gathered_data.keys(): if key != 'dispatch_overhead': if e is None: gathered_data[key].append(0) else: gathered_data[key].append(e[key]) if key != 'cpu_time' and e is not None: time_acc += e[key] if e is not None: gathered_data['dispatch_overhead'].append( gathered_data['cpu_time'][-1] - time_acc) else: gathered_data['dispatch_overhead'].append(0) height_acc = np.zeros(len(datum)) for key in timed_keys: # + ['dispatch_overhead']: if i == 0: plt.bar(ind + width * (i + 1), gathered_data[key], width=width, label=breakdown_namedict[key], color=breakdown_color_scheme[key], bottom=height_acc) else: plt.bar(ind + width * (i + 1), gathered_data[key], width=width, color=breakdown_color_scheme[key], bottom=height_acc) height_acc += gathered_data[key] xticks_data = list(sorted(x_ticks_loc.items(), key=lambda x: -x[0])) ticks = list(map(lambda x: x[0], xticks_data)) labels = list(map(lambda x: x[1], xticks_data)) plt.xticks(ticks, labels) plt.legend(loc='best') plt.tight_layout() plt.savefig(filename, bbox_inches='tight')
def render_fixed(ax, model_name, output_dir, x_axis, dtr_entries, baseline_data, failed_trials, batch_size=None, confidence=None): if not (dtr_entries or failed_trials): return (True, 'nothing to render') filename = prepare_out_file( output_dir, f'{NAME_DICT.get(model_name, model_name)}-fixed-gpu-time.png') try: # plt.style.use('seaborn-paper') # plt.rcParams["font.size"] = 30 # fig = plt.figure() # fig.add_subplot(111, frameon=False) # fig.set_size_inches(12, 7) # plt.xticks(fontsize=13) # plt.yticks(fontsize=13) # plt.xlabel('Memory Budget (MB)', fontsize=15, labelpad=10) # plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10) # plt.title(f'{NAME_DICT.get(model_name, model_name)} GPU Time', fontsize=18) # plt.grid(True) # ax = plt.gca() width = 0.0 all_axis = sorted(x_axis + failed_trials) ind = np.arange(len(all_axis) + 1) ind_index = dict(zip(all_axis, ind)) ind_pos = dict([(ind[i], i) for i in range(len(ind))]) ax.set_xticks(ind + width / 2) ax.set_xticklabels( map(lambda x: f'{round(x * 1e-9, 1)}', all_axis + [baseline_data['mem'] * 1e+6])) ax.tick_params(axis='both', labelsize=20) filtered_entries = [] if baseline_data and 'cpu_time' in baseline_data: for (x, datum) in zip(x_axis, dtr_entries): if not datum.get( 'error', False) and 'cpu_time' in datum and datum[ 'cpu_time'] > 3 * baseline_data['cpu_time']: failed_trials.append(x) filtered_entries.append({key: 0 for key in datum.keys()}) else: filtered_entries.append(datum) dtr_entries = filtered_entries if failed_trials: for x in failed_trials: ax.axvline(x=ind_index[x], color='red', linestyle='dashed', label='OOM') new_ind = [] for x in x_axis: new_ind.append(ind_index[x]) new_ind.append(ind[-1]) ind = np.array(new_ind) ax.grid(True, axis='y') ax.set_title( f'{NAME_DICT.get(model_name, model_name)} ({batch_size})\n{input_sizes.get(model_name, "")}', fontsize=15) for x in failed_trials: ax.bar(ind_index[x], 0) if dtr_entries: # lin, = ax.plot(x_axis, dtr_entries, color=COLOR_SCHEME.get(model_name, 'black'), linewidth=4) # mk, = ax.plot(x_axis, dtr_entries, label=NAME_DICT.get(model_name, model_name), # linewidth=4, marker=MARKER_SCHEME.get(model_name, '+'), ms=12, # alpha=.6, color=COLOR_SCHEME.get(model_name, 'black')) data_collection = {key: [] for key in timed_keys} data_collection['dispatch_overhead'] = [] for entry in dtr_entries: acc = 0 for (k, v) in entry.items(): if k != 'cpu_time': data_collection[k].append(v) acc += v data_collection['dispatch_overhead'].append(entry['cpu_time'] - acc) acc = np.zeros(len(x_axis)) for k in timed_keys + ['dispatch_overhead']: # print(ind[:-1], data_collection[k]) ax.bar(ind[:-1], data_collection[k], label=breakdown_namedict.get(k, k), color=breakdown_color_scheme.get(k, 'red'), bottom=acc) acc = acc + data_collection[k] if baseline_data and 'cpu_time' in baseline_data: ax.bar([ind[-1]], baseline_data['cpu_time'], label='Unmodified\nPyTorch', color='blue') else: ax.bar([ind[-1]], 0, label='Unmodified PyTorch', color='blue') ax.axvline(ind[-1], color='red', linestyle='dashed', label='OOM') if confidence and False: render_errorbars(ax, x_axis, dtr_entries, confidence) ax.invert_xaxis() # ax.legend([(lin, mk)], ['merged']) # plt.legend( # bbox_to_anchor=(0.5,0.01), # loc='lower center', # bbox_transform=fig.transFigure, # ncol=7, # borderaxespad=0, # prop={'size': 15} # ) # plt.tight_layout() # plt.savefig(filename, bbox_inches = 'tight') return (True, 'success') except Exception as e: raise e return (False, render_exception(e))
def render_graph(config, data, output_dir): try: plt.style.use('seaborn-paper') plt.rcParams["font.size"] = 30 fig = plt.figure() fig.add_subplot(111, frameon=False) fig.set_size_inches(12, 7) plt.xticks(fontsize=13) plt.yticks(fontsize=13) plt.xlabel('Memory Budget (Ratio)', fontsize=15, labelpad=10) plt.ylabel(r'Overhead Slow Down ($\times$)', fontsize=15, labelpad=10) plt.title('GPU Time Comparisons', fontsize=18) plt.grid(True) filename = prepare_out_file(output_dir, f'combined-comparison-ratio.png') metadata = {} for model in config['models']: dtr_dict = {} baseline_dict = {} stats = data[model] for stat in stats: if stat['specific_params']['type'] == 'baseline': baseline_dict = fill_data(baseline_dict, stat) else: dtr_dict = fill_data(dtr_dict, stat) metadata[model] = {'baseline': baseline_dict, 'dtr': dtr_dict} for batch_size in dtr_dict: baseline_data = baseline_dict.get(batch_size) for exp_kind in dtr_dict[batch_size]: if exp_kind == 'ratio': success, msg = render_time_comparison( model, batch_size, exp_kind, baseline_data, dtr_dict[batch_size][exp_kind], output_dir) if not success: return (False, msg) plt.hlines(y=1, xmin=0.0, xmax=1.0, linewidth=3, label='Baseline', color='blue', linestyles='dashed') plt.legend(bbox_to_anchor=(0.5, 0.01), loc='lower center', bbox_transform=fig.transFigure, ncol=7, borderaxespad=0, prop={'size': 15}) plt.tight_layout() plt.savefig(filename, bbox_inches='tight') for model in metadata: dtr_dict = metadata[model]['dtr'] baseline_dict = metadata[model]['baseline'] for batch_size in dtr_dict: baseline_data = baseline_dict.get(batch_size) for exp_kind in dtr_dict[batch_size]: if exp_kind == 'fixed': success, msg = render_time_comparison( model, batch_size, exp_kind, baseline_data, dtr_dict[batch_size][exp_kind], output_dir) if not success: return (False, msg) return (True, 'success') except Exception as e: raise e return (False, 'Exception encountered while rendering graphs: {}'.format( render_exception(e)))
def render_fixed(ax, model_name, output_dir, x_axis, dtr_entries, baseline_data, failed_trials, batch_size=None, confidence=None, render_confidence=False): if not (dtr_entries or failed_trials): return (True, 'nothing to render') filename = prepare_out_file(output_dir, f'{model_name}-fixed-gpu-time.png') try: if render_confidence: plt.clf() plt.style.use('seaborn-paper') plt.rcParams["font.size"] = 30 fig = plt.figure() fig.add_subplot(111, frameon=False) fig.set_size_inches(12, 7) plt.xticks(fontsize=13) plt.yticks(fontsize=13) plt.xlabel('Memory Budget (GiB)', fontsize=15, labelpad=10) plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10) plt.title(f'{NAME_DICT.get(model_name, model_name)} GPU Time', fontsize=18) plt.grid(True) ax = plt.gca() budgets = list(map(lambda x: x * 1e-9, x_axis)) y_value = list(map(lambda x: x['cpu_time'], dtr_entries)) if dtr_entries: if model_name == 'unroll_gan': print('Unroll GAN:') print(budgets, y_value) upper = list(map(lambda x: abs(x[1]), confidence)) lower = list(map(lambda x: abs(x[0]), confidence)) plt.errorbar(budgets, y_value, yerr=upper, uplims=True, lolims=False) plt.errorbar(budgets, y_value, yerr=lower, lolims=True, uplims=False) plt.tight_layout() plt.savefig(filename, bbox_inches = 'tight') else: width = 0.0 all_axis = sorted(x_axis + failed_trials) ind = np.arange(len(all_axis) + 1) ind_index = dict(zip(all_axis, ind)) ind_pos = dict([(ind[i], i) for i in range(len(ind))]) ax.set_xticks(ind + width / 2) filtered_entries = [] if baseline_data and 'cpu_time' in baseline_data: for (x, datum) in zip(x_axis, dtr_entries): if not datum.get('error', False) and 'cpu_time' in datum and datum['cpu_time'] > 3 * baseline_data['cpu_time']: failed_trials.append(x) filtered_entries.append({key : 0 for key in datum.keys()}) else: filtered_entries.append(datum) failed_trials_str = list(map(lambda x: f'{round(x * 1e-9, 1)}', failed_trials)) labels = list(map(lambda x: f'{round(x * 1e-9, 1)}', all_axis + [baseline_data.get('mem', 12000) * 1e+6])) if model_name in LOWEST_BUDGET_NON_SAMPLED: for i in range(len(labels)): if labels[i] not in failed_trials_str: labels[i] = f'{labels[i]}$^*$' break ax.set_xticklabels(labels) ax.tick_params(axis='both', labelsize=20) dtr_entries = filtered_entries if failed_trials: for x in failed_trials: ax.axvline(x=ind_index[x], color='red', linestyle='dashed', label='OOM') new_ind = [] for x in x_axis: new_ind.append(ind_index[x]) new_ind.append(ind[-1]) ind = np.array(new_ind) ax.grid(True, axis='y') ax.set_title(f'{NAME_DICT.get(model_name, model_name)} ({batch_size})\n{input_sizes.get(model_name, "")}', fontsize=15) for x in failed_trials: ax.bar(ind_index[x], 0) if dtr_entries: data_collection = { key : [] for key in timed_keys } data_collection['dispatch_overhead'] = [] for entry in dtr_entries: acc = 0 for (k, v) in entry.items(): if k != 'cpu_time': data_collection[k].append(v) acc += v data_collection['dispatch_overhead'].append(entry['cpu_time'] - acc) acc = np.zeros(len(x_axis)) for k in timed_keys + ['dispatch_overhead']: ax.bar(ind[:-1], data_collection[k], label=breakdown_namedict.get(k, k), color=breakdown_color_scheme.get(k, 'red'), bottom=acc) acc = acc + data_collection[k] if baseline_data and 'cpu_time' in baseline_data: ax.bar([ind[-1]], baseline_data['cpu_time'], label='Unmodified\nPyTorch', color='blue') else: ax.bar([ind[-1]], 0, label='Unmodified PyTorch', color='blue') ax.axvline(ind[-1], color='red', linestyle='dashed', label='OOM') if confidence and False: render_errorbars(ax, x_axis, dtr_entries, confidence) ax.invert_xaxis() return (True, 'success') except Exception as e: raise e return (False, render_exception(e))