Пример #1
0
def save_trial_log(dest_dir,
                   sim_conf_filename,
                   model_name,
                   specific_params,
                   is_baseline=False):
    """
    Find the last DTR log produced in the trial (if any exist)
    and move it to the directory
    """
    all_logs = glob.glob(os.path.join(os.getcwd(), '*.log'))
    if not all_logs:
        return

    # if we delete all logs in advance, there should be at most one log
    assert len(all_logs) == 1
    most_recent = all_logs[0]

    # rename and move
    # (new name just appends info to the old one)
    batch_size = specific_params['batch_size']
    budget = specific_params['memory_budget']
    if budget < 0:
        budget = 'inf'
    new_name = '{}-{}-{}-{}'.format(model_name, batch_size, budget,
                                    os.path.basename(most_recent))
    filename = prepare_out_file(dest_dir, new_name)
    os.rename(most_recent, filename)
    if is_baseline and sim_conf_filename is not None:
        extend_simrd_config(dest_dir, sim_conf_filename, model_name,
                            specific_params, filename)
Пример #2
0
def run_baseline(model, exp_config, config, config_dir, output_dir):
    '''
        Run a baseline triral and obtain memory usage.
        This is used for getting a reference memory usage for
        DTR `ratio` commands
    '''
    baseline_config = { 'batch_size' : exp_config['batch_size'],
                        'timeout': exp_config.get('timeout', 60),
                        # only doing a minimal number of runs because we are only getting the memory usage,
                        # which should be identical between runs
                        'n_reps': 10,
                        'extra_params': exp_config.get('extra_params', {})
    }
    if 'input_params' in exp_config:
        baseline_config['input_params'] = exp_config['input_params']
    filename = str(time.time()) + '.json'
    temp_file = prepare_out_file(os.getcwd(), filename)
    success, msg = run_trials(config_dir,
                              python_command('baseline', config),
                              'baseline', model, baseline_config,
                              exp_config.get('n_inputs', config['n_inputs']),
                              output_dir,
                              report_errors=config['report_errors'],
                              append_to_csv=False,
                              trial_run=True,
                              trial_run_outfile=temp_file,
                              sync_gpu=config['sync_gpu'])
    if not success:
        return False, 'Error while running baseline trial: \n{}'.format(msg)

    mem_usage = read_json(output_dir, temp_file)
    os.remove(temp_file)
    if 'mem' not in mem_usage:
        return False, 'failed to get baseline memory usage'
    return True, mem_usage['mem']
Пример #3
0
def run_baseline(model, exp_config, config, config_dir, output_dir):
    '''
        Run a baseline triral and obtain memory usage.
        This is used for getting a reference memory usage for
        DTR `ratio` commands
    '''
    baseline_config = {'batch_size': exp_config['batch_size']}
    if 'extra_params' in exp_config:
        baseline_config['extra_params'] = exp_config['extra_params']
    filename = str(time.time()) + '.json'
    temp_file = prepare_out_file(os.getcwd(), filename)
    success, msg = run_trials(config_dir,
                              python_command('baseline', config),
                              'baseline',
                              model,
                              baseline_config,
                              config['n_inputs'],
                              config['n_reps'],
                              output_dir,
                              report_errors=config['report_errors'],
                              append_to_csv=False,
                              trial_run=True,
                              trial_run_outfile=temp_file)
    if not success:
        return False, 'Error while running baseline trial: \n{}'.format(msg)

    mem_usage = read_json(output_dir, temp_file)
    os.remove(temp_file)
    if 'mem' not in mem_usage:
        return False, 'failed to get baseline memory usage'
    return True, mem_usage['mem']
Пример #4
0
def render_fixed(model_name, output_dir, x_axis, dtr_entries, failed_trials):
    if not (dtr_entries or failed_trials):
        return (True, 'nothing to render')
    filename = prepare_out_file(
        output_dir,
        f'{name_dict.get(model_name, model_name)}-fixed-gpu-time.png')
    try:
        plt.clf()
        plt.style.use('seaborn-paper')
        plt.rcParams["font.size"] = 30
        fig = plt.figure()
        fig.add_subplot(111, frameon=False)
        fig.set_size_inches(12, 7)
        plt.xticks(fontsize=13)
        plt.yticks(fontsize=13)
        plt.xlabel('Memory Budget (MB)', fontsize=15, labelpad=10)
        plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10)
        plt.title(f'{name_dict.get(model_name, model_name)} GPU Time',
                  fontsize=18)
        plt.grid(True)

        ax = plt.gca()
        if dtr_entries:
            lin, = ax.plot(x_axis,
                           dtr_entries,
                           color=color_scheme.get(model_name, 'black'),
                           linewidth=4)
            mk, = ax.plot(x_axis,
                          dtr_entries,
                          label=name_dict.get(model_name, model_name),
                          linewidth=4,
                          marker=marker_scheme.get(model_name, '+'),
                          ms=12,
                          alpha=.6,
                          color=color_scheme.get(model_name, 'black'))
            ax.legend([(lin, mk)], ['merged'])

        if failed_trials:
            plt.axvline(x=max(failed_trials),
                        color=color_scheme.get(model_name, 'black'),
                        linestyle='dashed')

        plt.legend(bbox_to_anchor=(0.5, 0.01),
                   loc='lower center',
                   bbox_transform=fig.transFigure,
                   ncol=7,
                   borderaxespad=0,
                   prop={'size': 15})
        plt.tight_layout()
        plt.savefig(filename, bbox_inches='tight')
        return (True, 'success')
    except Exception as e:
        raise e
        return (False, render_exception(e))
Пример #5
0
def render_field(model_name,
                 output_dir,
                 title,
                 filename,
                 x_label,
                 y_label,
                 x_axis,
                 baseline_entries,
                 dtr_entries,
                 failed_trials,
                 confidence=None,
                 suptitle=''):
    if not (dtr_entries or baseline_entries or failed_trials):
        return (True, 'nothing to render')
    file = prepare_out_file(output_dir, filename)
    try:
        # min_x = min(*(x_axis + failed_trials))
        # max_x = max(*(x_axis + failed_trials))
        ax = plt.gca()
        if dtr_entries:
            lin, = ax.plot(x_axis,
                           dtr_entries,
                           color=COLOR_SCHEME.get(model_name, 'black'),
                           linewidth=4)
            mk, = ax.plot(x_axis,
                          dtr_entries,
                          label=NAME_DICT.get(model_name, model_name),
                          linewidth=4,
                          marker=MARKER_SCHEME.get(model_name, '+'),
                          ms=12,
                          alpha=.6,
                          color=COLOR_SCHEME.get(model_name, 'black'))
            if confidence:
                render_errorbars(ax, x_axis, dtr_entries, confidence)
            ax.legend([(lin, mk)], ['merged'])
        # if baseline_entries:
        #     plt.hlines(y=baseline_entries[0], xmin=min_x, xmax=max_x, linewidth=3,
        #                label='Baseline', color='blue', linestyles='dashed')

        if failed_trials:
            plt.axvline(x=max(failed_trials),
                        color=COLOR_SCHEME.get(model_name, 'black'),
                        linestyle='dashed')

        # fig = plt.legend().figure
        # fig.savefig(file)
        return (True, 'success')
    except Exception as e:
        raise e
        return (False,
                'Exception encountered while rendering graph: {}'.format(
                    render_exception(e)))
Пример #6
0
def extend_simrd_config(dest_dir, sim_conf_filename, model_name,
                        specific_params, log_name):
    if not check_file_exists(dest_dir, sim_conf_filename):
        prepare_out_file(dest_dir, sim_conf_filename)
        write_json(dest_dir, sim_conf_filename, dict())

    conf = read_json(dest_dir, sim_conf_filename)
    if model_name not in conf:
        conf[model_name] = []
    conf[model_name].append({
        'name':
        model_util.get_model_family(model_name),
        'batch_size':
        str(specific_params['batch_size']),
        'layers':
        specific_params.get('layers', model_util.get_model_layers(model_name)),
        'type':
        model_util.get_model_type(model_name),
        'log':
        log_name,
        'has_start':
        True
    })
    write_json(dest_dir, sim_conf_filename, conf)
Пример #7
0
def extend_simrd_config(dest_dir, sim_conf_filename, model_name,
                        specific_params, log_name):
    import model_util
    if not check_file_exists(dest_dir, sim_conf_filename):
        prepare_out_file(dest_dir, sim_conf_filename)
        write_json(dest_dir, sim_conf_filename, dict())

    conf = read_json(dest_dir, sim_conf_filename)
    if model_name not in conf:
        conf[model_name] = []
    name = model_util.format_model_name(model_name, specific_params)
    conf[model_name].append({
        'name':
        name,
        'title':
        name,
        'desc':
        model_util.format_input_description(model_name, specific_params),
        'log':
        log_name,
        'has_start':
        True
    })
    write_json(dest_dir, sim_conf_filename, conf)
Пример #8
0
def run_trials(config_dir,
               python_cmd,
               experiment_name,
               model_name,
               specific_params,
               n_inputs,
               n_reps,
               path_prefix,
               report_errors=False,
               append_to_csv=False,
               trial_run=False,
               trial_run_outfile='',
               cmd_id=0,
               conf_cnt=0):
    """
    Responsible for recording the time and max memory usage
    from running a model (the user must provide a lambda for
    actually running the model because different kinds of models
    need different kinds of setup and a lambda that generates an
    input for running that model)

    :params:
        trial_run: When set to true, no persistent experiment data will be saved. It is used to
                   run a baseline trial and record how much memory is used then set the memory budget
                   for `ratio` commands of DTR experiments

        trial_run_out_file: the temporary file that stores the memory usage data of the baseline run

        cmd_id: the command id for current model, starting from 0 by default
        conf_cnt: the id of confguration generated from `unfold_settings`; this is used for tracking
                  which exact configuration that caused errors. 
    """
    try:
        cwd = os.getcwd()
        params_file = 'specific_params.json'
        try:
            write_json(cwd, params_file, specific_params)
            if not trial_run:
                filename = prepare_out_file(
                    path_prefix, '{}-{}.csv'.format(
                        get_report_prefix(experiment_name, specific_params,
                                          cmd_id), model_name))
                mode = 'a' if append_to_csv else 'w'
                with open(filename, mode, newline='') as csvfile:
                    writer = create_csv_writer(csvfile, specific_params)
                    if not append_to_csv:
                        writer.writeheader()
            else:
                filename = ''

            shared_dir = os.path.dirname(os.path.abspath(__file__))
            run_script = os.path.join(shared_dir, 'run_torch_trial.py')

            for i in range(n_inputs):
                try:
                    subprocess.run([
                        python_cmd, run_script, '--config-dir', config_dir,
                        '--experiment-mode', experiment_name, '--model-name',
                        model_name, '--input-idx',
                        str(i), '--params-file', params_file, '--out-file',
                        filename, '--trial-run',
                        str(trial_run), '--trial-run-outfile',
                        trial_run_outfile
                    ],
                                   check=True,
                                   timeout=specific_params.get('timeout', 60))
                except (subprocess.CalledProcessError,
                        subprocess.TimeoutExpired) as e:
                    if not report_errors:
                        raise e
                    if trial_run:
                        return (False, 'Baseline failed: {}'.format(
                            render_exception(e)))
                    log_error(experiment_name, model_name, specific_params, i,
                              render_exception(e), path_prefix)
                    return (True, 'successfully caught error')
                time.sleep(4)
            return (True, 'success')
        finally:
            os.remove(params_file)
    except Exception as e:
        return (False, 'Encountered exception on ({}, {}, {}):\n'.format(
            experiment_name, model_name, specific_params) +
                render_exception(e))
Пример #9
0
 def save(self, dirname, filename):
     outfile = prepare_out_file(dirname, filename)
     plt.savefig(outfile, dpi=500, bbox_inches='tight')
     plt.close()
Пример #10
0
def render_graph(config, data, output_dir):
    try:
        plt.style.use('seaborn-paper')
        plt.rcParams["font.size"] = 30
        fig = plt.figure()
        fig.add_subplot(111, frameon=False)
        fig.set_size_inches(12, 7)
        plt.xticks(fontsize=13)
        plt.yticks(fontsize=13)
        plt.xlabel('Memory Budget (Ratio)', fontsize=15, labelpad=10)
        plt.ylabel(r'Overhead Slow Down ($\times$)', fontsize=15, labelpad=10)
        plt.title('GPU Time Comparisons', fontsize=18)
        plt.grid(True)
        filename = prepare_out_file(output_dir,
                                    f'combined-comparison-ratio.png')

        metadata = {}
        for model in config['models']:
            dtr_dict = {}
            baseline_dict = {}
            stats = data[model]
            for stat in stats:
                if stat['specific_params']['type'] == 'baseline':
                    baseline_dict = fill_data(baseline_dict, stat)
                else:
                    dtr_dict = fill_data(dtr_dict, stat)

            metadata[model] = {'baseline': baseline_dict, 'dtr': dtr_dict}

        success, msg = traverse_field(metadata, 'ratio',
                lambda model, batch_size, dtr_dict, baseline_dict, output_dir:\
                        render_time_comparison(model, batch_size, 'ratio',
                                                dtr_dict[batch_size]['ratio'],
                                                baseline_dict.get(batch_size, {}),
                                                output_dir), output_dir)

        if not success:
            return (False, msg)

        plt.hlines(y=1,
                   xmin=0.0,
                   xmax=1.0,
                   linewidth=3,
                   label='Baseline',
                   color='blue',
                   linestyles='dashed')
        plt.legend(bbox_to_anchor=(0.5, 0.01),
                   loc='lower center',
                   bbox_transform=fig.transFigure,
                   ncol=7,
                   borderaxespad=0,
                   prop={'size': 15})
        plt.tight_layout()
        # plt.savefig(filename, bbox_inches = 'tight')

        plt.clf()
        plt.rcParams["font.size"] = 30

        figure, axs = plt.subplots(2, 4, figsize=(20, 8))
        # figure.set_size_inches(24, 12)
        axs = reversed(flatten(axs))

        success, msg = traverse_field(metadata, 'fixed',
                lambda model, batch_size, dtr_dict, baseline_dict, output_dir:\
                        render_time_comparison(model, batch_size, 'fixed',
                                                dtr_dict[batch_size]['fixed'],
                                                baseline_dict.get(batch_size, {}), output_dir, plt_ax=next(axs)),
                                                output_dir)

        filename = prepare_out_file(output_dir,
                                    'combined-breakdown-comparison.png')
        # figure.tight_layout()
        # plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
        # plt.xlabel('Memory Budget (GiB)')
        # plt.ylabel("Time (ms)")
        figure.text(0.5,
                    0.02,
                    r'\textbf{\Huge Memory Budget (GiB)}',
                    ha='center')
        figure.text(0.09,
                    0.5,
                    r'\textbf{\Huge Time (ms) / Batch}',
                    ha='center',
                    va='center',
                    rotation='vertical')
        plt.legend(bbox_to_anchor=(0.17, 0.075),
                   loc='upper left',
                   bbox_transform=fig.transFigure,
                   ncol=6,
                   borderaxespad=0,
                   prop={'size': 15})
        # figure.tight_layout()
        # plt.tight_layout()
        # plt.tight_layout(h_pad=0.3)
        plt.subplots_adjust(hspace=0.4)
        plt.savefig(filename, bbox_inches='tight', pad_inches=0.4)

        if not success:
            return (False, msg)

        success, msg = render_throughput_breakdown(metadata, output_dir)
        if not success:
            return False, msg
        return (True, 'success')
    except Exception as e:
        raise e
        return (False,
                'Exception encountered while rendering graphs: {}'.format(
                    render_exception(e)))
Пример #11
0
    def plot_model(model):
        filename = prepare_out_file(output_dir,
                                    f'throughput-comparison-{model}.png')
        plt.clf()
        plt.grid(True)
        plt.title(f'Throughput Comparison of {NAME_DICT.get(model, model)}')
        plt.xlabel('Batch Size', fontsize=15, labelpad=10)
        plt.ylabel('Throughput (Batch Size / Avg GPU Time (s))')
        num_batch_size = len(throughput_metadata[model]['dtr'].keys())
        baseline_data = metadata[model]['baseline']
        width = 0.15
        ind = np.arange(num_batch_size)
        x_axis = list(sorted(throughput_metadata[model]['dtr'].keys()))

        # Wish we had currying !!!
        # If baseline data does not contain a batch size, then we fill 0 into the data, since it means baseline failed (OOMed)
        baseline_data = list(
            map(flip(throughput_metadata[model]['baseline'].get)(0), x_axis))

        # Bar for baseline
        plt.bar(ind, [datum['throughput'] for datum in baseline_data],
                width,
                label='Baseline')
        dtr_data = {'throughput': {}, 'breakdown': {}}

        # Gather information collected
        # the structure of dtr_data:
        # Level 0: 'breakdown'      | 'throughput'
        # Level 1: data dictionary  | computed throughput (float)
        # Level 3: same as dictionaries processed in fill_data
        for x in x_axis:
            for datum in throughput_metadata[model]['dtr'][x]:
                if datum['memory_budget'] not in dtr_data['throughput']:
                    dtr_data['throughput'][datum['memory_budget']] = []
                    dtr_data['breakdown'][datum['memory_budget']] = []
                dtr_data['throughput'][datum['memory_budget']].append(
                    datum['throughput'] if not datum['error'] else 0)
                dtr_data['breakdown'][datum['memory_budget']].append(
                    dict(filter(lambda x: x[0] != 'throughput', datum.items())
                         ) if not datum['error'] else None)

        num_budget = len(dtr_data['throughput'].keys())
        plt.xticks(ind + width * (num_budget / 2), map(str, x_axis))

        for (i, (budget, throughput)) in enumerate(
                sorted(dtr_data['throughput'].items(), key=lambda x: -x[0])):
            plt.bar(ind + width * (i + 1),
                    throughput,
                    width,
                    label=f'{round(budget * 1e-9, 1)} GiB')

        plt.legend(loc='best')
        plt.tight_layout()
        plt.savefig(filename, bbox_inches='tight')

        # Plot runtime profiling breakdown
        filename = prepare_out_file(output_dir, f'time-breakdown-{model}.png')
        plt.clf()
        plt.title(f'Runtime Breakdown of {NAME_DICT.get(model, model)}')
        plt.xlabel('Batch Size')
        plt.ylabel('Time / Batch (ms)')
        x_ticks_loc = {
            ind[i] + width * (num_budget / 2): '\n\n' + str(x_axis[i])
            for i in range(num_batch_size)
        }
        plt.grid(True, axis='y')
        for (i, (budget, datum)) in enumerate(
                sorted(dtr_data['breakdown'].items(), key=lambda x: -x[0])):
            locs = ind + width * (i + 1)
            for loc in locs:
                x_tick = f'{round(budget * 1e-9, 1)}\nGiB'
                if loc in x_ticks_loc.keys():
                    x_tick += f'\n{x_ticks_loc[loc]}'
                x_ticks_loc[loc] = x_tick

            if datum is None:
                continue
            gathered_data = {key: [] for key in (timed_keys + ['cpu_time'])}
            gathered_data['dispatch_overhead'] = []
            for e in datum:
                time_acc = 0
                for key in gathered_data.keys():
                    if key != 'dispatch_overhead':
                        if e is None:
                            gathered_data[key].append(0)
                        else:
                            gathered_data[key].append(e[key])
                        if key != 'cpu_time' and e is not None:
                            time_acc += e[key]
                if e is not None:
                    gathered_data['dispatch_overhead'].append(
                        gathered_data['cpu_time'][-1] - time_acc)
                else:
                    gathered_data['dispatch_overhead'].append(0)

            height_acc = np.zeros(len(datum))
            for key in timed_keys:  # + ['dispatch_overhead']:
                if i == 0:
                    plt.bar(ind + width * (i + 1),
                            gathered_data[key],
                            width=width,
                            label=breakdown_namedict[key],
                            color=breakdown_color_scheme[key],
                            bottom=height_acc)
                else:
                    plt.bar(ind + width * (i + 1),
                            gathered_data[key],
                            width=width,
                            color=breakdown_color_scheme[key],
                            bottom=height_acc)

                height_acc += gathered_data[key]
        xticks_data = list(sorted(x_ticks_loc.items(), key=lambda x: -x[0]))
        ticks = list(map(lambda x: x[0], xticks_data))
        labels = list(map(lambda x: x[1], xticks_data))
        plt.xticks(ticks, labels)
        plt.legend(loc='best')
        plt.tight_layout()
        plt.savefig(filename, bbox_inches='tight')
Пример #12
0
def render_fixed(ax,
                 model_name,
                 output_dir,
                 x_axis,
                 dtr_entries,
                 baseline_data,
                 failed_trials,
                 batch_size=None,
                 confidence=None):
    if not (dtr_entries or failed_trials):
        return (True, 'nothing to render')
    filename = prepare_out_file(
        output_dir,
        f'{NAME_DICT.get(model_name, model_name)}-fixed-gpu-time.png')
    try:
        # plt.style.use('seaborn-paper')
        # plt.rcParams["font.size"] = 30
        # fig = plt.figure()
        # fig.add_subplot(111, frameon=False)
        # fig.set_size_inches(12, 7)
        # plt.xticks(fontsize=13)
        # plt.yticks(fontsize=13)
        # plt.xlabel('Memory Budget (MB)', fontsize=15, labelpad=10)
        # plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10)
        # plt.title(f'{NAME_DICT.get(model_name, model_name)} GPU Time', fontsize=18)
        # plt.grid(True)

        # ax = plt.gca()
        width = 0.0
        all_axis = sorted(x_axis + failed_trials)
        ind = np.arange(len(all_axis) + 1)
        ind_index = dict(zip(all_axis, ind))
        ind_pos = dict([(ind[i], i) for i in range(len(ind))])
        ax.set_xticks(ind + width / 2)
        ax.set_xticklabels(
            map(lambda x: f'{round(x * 1e-9, 1)}',
                all_axis + [baseline_data['mem'] * 1e+6]))

        ax.tick_params(axis='both', labelsize=20)

        filtered_entries = []

        if baseline_data and 'cpu_time' in baseline_data:
            for (x, datum) in zip(x_axis, dtr_entries):
                if not datum.get(
                        'error', False) and 'cpu_time' in datum and datum[
                            'cpu_time'] > 3 * baseline_data['cpu_time']:
                    failed_trials.append(x)
                    filtered_entries.append({key: 0 for key in datum.keys()})
                else:
                    filtered_entries.append(datum)

        dtr_entries = filtered_entries

        if failed_trials:
            for x in failed_trials:
                ax.axvline(x=ind_index[x],
                           color='red',
                           linestyle='dashed',
                           label='OOM')
        new_ind = []
        for x in x_axis:
            new_ind.append(ind_index[x])
        new_ind.append(ind[-1])
        ind = np.array(new_ind)
        ax.grid(True, axis='y')
        ax.set_title(
            f'{NAME_DICT.get(model_name, model_name)} ({batch_size})\n{input_sizes.get(model_name, "")}',
            fontsize=15)

        for x in failed_trials:
            ax.bar(ind_index[x], 0)
        if dtr_entries:
            # lin, = ax.plot(x_axis, dtr_entries, color=COLOR_SCHEME.get(model_name, 'black'), linewidth=4)
            # mk,  = ax.plot(x_axis, dtr_entries, label=NAME_DICT.get(model_name, model_name),
            #               linewidth=4, marker=MARKER_SCHEME.get(model_name, '+'), ms=12,
            #               alpha=.6, color=COLOR_SCHEME.get(model_name, 'black'))
            data_collection = {key: [] for key in timed_keys}
            data_collection['dispatch_overhead'] = []
            for entry in dtr_entries:
                acc = 0
                for (k, v) in entry.items():
                    if k != 'cpu_time':
                        data_collection[k].append(v)
                        acc += v
                data_collection['dispatch_overhead'].append(entry['cpu_time'] -
                                                            acc)

            acc = np.zeros(len(x_axis))
            for k in timed_keys + ['dispatch_overhead']:
                # print(ind[:-1], data_collection[k])
                ax.bar(ind[:-1],
                       data_collection[k],
                       label=breakdown_namedict.get(k, k),
                       color=breakdown_color_scheme.get(k, 'red'),
                       bottom=acc)
                acc = acc + data_collection[k]

            if baseline_data and 'cpu_time' in baseline_data:
                ax.bar([ind[-1]],
                       baseline_data['cpu_time'],
                       label='Unmodified\nPyTorch',
                       color='blue')
            else:
                ax.bar([ind[-1]], 0, label='Unmodified PyTorch', color='blue')
                ax.axvline(ind[-1],
                           color='red',
                           linestyle='dashed',
                           label='OOM')

            if confidence and False:
                render_errorbars(ax, x_axis, dtr_entries, confidence)

            ax.invert_xaxis()
            # ax.legend([(lin, mk)], ['merged'])

            # plt.legend(
        #         bbox_to_anchor=(0.5,0.01),
        #         loc='lower center',
        #         bbox_transform=fig.transFigure,
        #         ncol=7,
        #         borderaxespad=0,
        #         prop={'size': 15}
        #     )
        # plt.tight_layout()
        # plt.savefig(filename, bbox_inches = 'tight')
        return (True, 'success')
    except Exception as e:
        raise e
        return (False, render_exception(e))
Пример #13
0
def render_graph(config, data, output_dir):
    try:
        plt.style.use('seaborn-paper')
        plt.rcParams["font.size"] = 30
        fig = plt.figure()
        fig.add_subplot(111, frameon=False)
        fig.set_size_inches(12, 7)
        plt.xticks(fontsize=13)
        plt.yticks(fontsize=13)
        plt.xlabel('Memory Budget (Ratio)', fontsize=15, labelpad=10)
        plt.ylabel(r'Overhead Slow Down ($\times$)', fontsize=15, labelpad=10)
        plt.title('GPU Time Comparisons', fontsize=18)
        plt.grid(True)
        filename = prepare_out_file(output_dir,
                                    f'combined-comparison-ratio.png')

        metadata = {}
        for model in config['models']:
            dtr_dict = {}
            baseline_dict = {}
            stats = data[model]
            for stat in stats:
                if stat['specific_params']['type'] == 'baseline':
                    baseline_dict = fill_data(baseline_dict, stat)
                else:
                    dtr_dict = fill_data(dtr_dict, stat)

            metadata[model] = {'baseline': baseline_dict, 'dtr': dtr_dict}

            for batch_size in dtr_dict:
                baseline_data = baseline_dict.get(batch_size)
                for exp_kind in dtr_dict[batch_size]:
                    if exp_kind == 'ratio':
                        success, msg = render_time_comparison(
                            model, batch_size, exp_kind, baseline_data,
                            dtr_dict[batch_size][exp_kind], output_dir)
                        if not success:
                            return (False, msg)
        plt.hlines(y=1,
                   xmin=0.0,
                   xmax=1.0,
                   linewidth=3,
                   label='Baseline',
                   color='blue',
                   linestyles='dashed')
        plt.legend(bbox_to_anchor=(0.5, 0.01),
                   loc='lower center',
                   bbox_transform=fig.transFigure,
                   ncol=7,
                   borderaxespad=0,
                   prop={'size': 15})
        plt.tight_layout()
        plt.savefig(filename, bbox_inches='tight')

        for model in metadata:
            dtr_dict = metadata[model]['dtr']
            baseline_dict = metadata[model]['baseline']
            for batch_size in dtr_dict:
                baseline_data = baseline_dict.get(batch_size)
                for exp_kind in dtr_dict[batch_size]:
                    if exp_kind == 'fixed':
                        success, msg = render_time_comparison(
                            model, batch_size, exp_kind, baseline_data,
                            dtr_dict[batch_size][exp_kind], output_dir)
                        if not success:
                            return (False, msg)
        return (True, 'success')
    except Exception as e:
        raise e
        return (False,
                'Exception encountered while rendering graphs: {}'.format(
                    render_exception(e)))
Пример #14
0
def render_fixed(ax, model_name, output_dir, x_axis, dtr_entries, baseline_data, failed_trials, batch_size=None, confidence=None, render_confidence=False):
    if not (dtr_entries or failed_trials):
        return (True, 'nothing to render')
    filename = prepare_out_file(output_dir, f'{model_name}-fixed-gpu-time.png')
    try:
        if render_confidence:
            plt.clf()
            plt.style.use('seaborn-paper')
            plt.rcParams["font.size"] = 30
            fig = plt.figure()
            fig.add_subplot(111, frameon=False)
            fig.set_size_inches(12, 7)
            plt.xticks(fontsize=13)
            plt.yticks(fontsize=13)
            plt.xlabel('Memory Budget (GiB)', fontsize=15, labelpad=10)
            plt.ylabel(r'Compute Time (ms)', fontsize=15, labelpad=10)
            plt.title(f'{NAME_DICT.get(model_name, model_name)} GPU Time', fontsize=18)
            plt.grid(True)

            ax = plt.gca()
            budgets = list(map(lambda x: x * 1e-9, x_axis))
            y_value = list(map(lambda x: x['cpu_time'], dtr_entries))
            if dtr_entries:
                if model_name == 'unroll_gan':
                    print('Unroll GAN:')
                    print(budgets, y_value)
                upper = list(map(lambda x: abs(x[1]), confidence))
                lower = list(map(lambda x: abs(x[0]), confidence))
                plt.errorbar(budgets, y_value, yerr=upper, uplims=True, lolims=False)
                plt.errorbar(budgets, y_value, yerr=lower, lolims=True, uplims=False)

            plt.tight_layout()
            plt.savefig(filename, bbox_inches = 'tight')
        else:
            width = 0.0
            all_axis = sorted(x_axis + failed_trials)
            ind = np.arange(len(all_axis) + 1)
            ind_index = dict(zip(all_axis, ind))
            ind_pos = dict([(ind[i], i) for i in range(len(ind))])
            ax.set_xticks(ind + width / 2)
            filtered_entries = []

            if baseline_data and 'cpu_time' in baseline_data:
                for (x, datum) in zip(x_axis, dtr_entries):
                    if not datum.get('error', False) and 'cpu_time' in datum and datum['cpu_time'] > 3 * baseline_data['cpu_time']:
                        failed_trials.append(x)
                        filtered_entries.append({key : 0 for key in datum.keys()})
                    else:
                        filtered_entries.append(datum)

            failed_trials_str = list(map(lambda x: f'{round(x * 1e-9, 1)}', failed_trials))
            labels = list(map(lambda x: f'{round(x * 1e-9, 1)}', all_axis + [baseline_data.get('mem', 12000) * 1e+6]))
            if model_name in LOWEST_BUDGET_NON_SAMPLED:
                for i in range(len(labels)):
                    if labels[i] not in failed_trials_str:
                        labels[i] = f'{labels[i]}$^*$'
                        break
            ax.set_xticklabels(labels)
            ax.tick_params(axis='both', labelsize=20)

            dtr_entries = filtered_entries

            if failed_trials:
                for x in failed_trials:
                    ax.axvline(x=ind_index[x], color='red', linestyle='dashed', label='OOM')
            new_ind = []
            for x in x_axis:
                new_ind.append(ind_index[x])
            new_ind.append(ind[-1])
            ind = np.array(new_ind)
            ax.grid(True, axis='y')
            ax.set_title(f'{NAME_DICT.get(model_name, model_name)} ({batch_size})\n{input_sizes.get(model_name, "")}', fontsize=15)

            for x in failed_trials:
                ax.bar(ind_index[x], 0)
            if dtr_entries:
                data_collection = { key : [] for key in timed_keys }
                data_collection['dispatch_overhead'] = []
                for entry in dtr_entries:
                    acc = 0
                    for (k, v) in entry.items():
                        if k != 'cpu_time':
                            data_collection[k].append(v)
                            acc += v
                    data_collection['dispatch_overhead'].append(entry['cpu_time'] - acc)

                acc = np.zeros(len(x_axis))
                for k in timed_keys + ['dispatch_overhead']:
                    ax.bar(ind[:-1], data_collection[k], label=breakdown_namedict.get(k, k),
                                color=breakdown_color_scheme.get(k, 'red'),
                                    bottom=acc)
                    acc = acc + data_collection[k]

                if baseline_data and 'cpu_time' in baseline_data:
                    ax.bar([ind[-1]], baseline_data['cpu_time'], label='Unmodified\nPyTorch', color='blue')
                else:
                    ax.bar([ind[-1]], 0, label='Unmodified PyTorch', color='blue')
                    ax.axvline(ind[-1], color='red', linestyle='dashed', label='OOM')

                if confidence and False:
                    render_errorbars(ax, x_axis, dtr_entries, confidence)

                ax.invert_xaxis()

        return (True, 'success')
    except Exception as e:
        raise e
        return (False, render_exception(e))