Пример #1
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(data_dir)
    most_recent = all_data[-1]
    most_recent = {k: v for (k, v) in most_recent.items() if k not in METADATA_KEYS}
    summary = ''

    for (model, targets) in most_recent.items():
        # simulated target summary
        sim_targets = {target: targets[target] for target in targets if target in SIM_TARGETS}
        for (target, devices) in sim_targets.items():
            for (device, stats) in devices.items():
                summary += '_Stats on ({}, {}, {}) & _\n'.format(model, target.upper(), device.upper())
                for (stat, val) in stats.items():
                    summary += '{}: {:.2E}\n'.format(stat, Decimal(val))
        # physical target summary
        phys_targets = {target: v for (target, v) in targets.items() if target in PHYS_TARGETS}
        for (target, devices) in phys_targets.items():
            for (device, mean_time) in devices.items():
                summary += 'Time on ({}, {}, {}): {:.2f}\n'.format(
                        model, target.upper(), device.upper(), mean_time)

    write_summary(output_dir, config['title'], summary)
    write_status(output_dir, True, 'success')
Пример #2
0
def write_generic_summary(data_dir,
                          output_dir,
                          title,
                          devices,
                          networks=None,
                          use_networks=False):
    """
    Given a data directory and output directory, this function writes
    a generic summary assuming that the data has a field keyed by device
    (cpu/gpu) and optionally by network. It writes a summary and status to the output dir.
    """
    try:
        all_data = sort_data(data_dir)
        most_recent = all_data[-1]

        summary = None
        if use_networks:
            summary = summary_by_dev_and_network(most_recent, devices,
                                                 networks)
        else:
            summary = summary_by_dev(most_recent, devices)
        write_summary(output_dir, title, summary)
        write_status(output_dir, True, 'success')

        # TODO do something about comparisons to previous days
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
Пример #3
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    idemp_mkdir(output_dir)
    for exp_name in info.all_present_experiments():
        exp_status = info.exp_status_dir(exp_name)
        run_status = validate_json(exp_status,
                                   'run_cpu_telemetry',
                                   'run_gpu_telemetry',
                                   filename='run.json')
        if check_prerequisites(
                info, {exp_name: {}}) == (True, 'success') and run_status.get(
                    'success', False):
            telemetry_folder = info.subsys_telemetry_dir(exp_name)
            if os.path.exists(telemetry_folder):
                exp_graph_folder = os.path.join(telemetry_folder, 'graph')
                cpu_stat = info.exp_cpu_telemetry(exp_name)
                gpu_stat = info.exp_gpu_telemetry(exp_name)
                cpu_data = sort_data(cpu_stat)
                gpu_data = sort_data(gpu_stat)
                graph_folder = info.exp_graph_dir(exp_name)
                website_include_dir = os.path.join(graph_folder)
                try:
                    if cpu_data and run_status.get('run_cpu_telemetry', False):
                        visualize(
                            'cpu', process_cpu_telemetry(cpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'cpu_telemetry'),
                            f'Visualizing CPU telemetry for {exp_name}',
                            lambda adapter, title, *rest: f'{adapter}-{title}')

                    if gpu_data and run_status.get('run_gpu_telemetry', False):
                        visualize(
                            'gpu', process_gpu_telemetry(gpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'gpu_telemetry'),
                            f'Visualizing GPU telemetry for {exp_name}',
                            lambda _, title, *rest: title)
                except Exception as e:
                    write_status(
                        output_dir, False,
                        f'Encountered err while generating graphs: {e}')
                    return
                write_status(output_dir, True, 'success')
            else:
                write_status(output_dir, False, 'No telemetry data found')
                return
Пример #4
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    pass_spec_name_map = {
        '3;FuseOps': 'Op Fusion',
        '3;FoldConstant|FuseOps': '... + Constant Folding',
        '3;EliminateCommonSubexpr|FoldConstant|FuseOps': '... + Common Subexpr Elim',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldConstant|FuseOps': '... + Parallel Conv Comb',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|FoldConstant|FuseOps': '... + Axis Scale Folding',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|FoldConstant|FuseOps': '... + Cast Canonicalization',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|CanonicalizeOps|FoldConstant|FuseOps': '... + Op Canonicalization',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|CanonicalizeOps|AlterOpLayout|FoldConstant|FuseOps': '... + Op Layout Alteration'
    }

    prereqs, msg = check_prerequisites(info, {
        'pass_comparison': {
            'networks': networks,
            'passes': [
                parse_combo(combo) for combo in pass_spec_name_map.keys()
            ]
        }
    })

    all_data = sort_data(info.exp_data_dir('pass_comparison'))
    raw_data = all_data[-1]

    baseline = '0;'

    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    del raw_data['timestamp']
    del raw_data['tvm_hash']

    try:
        for (dev, raw_dev_data) in raw_data.items():
            plot_data = OrderedDict([
                (pass_spec_name_map[pass_spec], {
                    network_name_map[network]:
                    raw_dev_data[baseline][network] / raw_dev_data[pass_spec][network]
                    for network in networks})
                for pass_spec in pass_spec_name_map.keys()
            ])
            generate_pass_comparisons(plot_data, output_dir, f'pass-comp-{dev}.png')
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Пример #5
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)
    our_name = 'Relay'
    if 'our_name' in conf:
        our_name = conf['our_name']

    conf_fws = ['relay', 'pt', 'tf', 'mxnet', 'nnvm']
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    prereqs, msg = check_prerequisites(
        info, {
            'cnn_comp': {
                'devices': ['gpu'],
                'use_xla': True,
                'networks': networks,
                'frameworks': conf_fws
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(info.exp_data_dir('cnn_comp'))
    raw_data = all_data[-1]['gpu']

    our_fw = 'Relay'
    other_fws = ['TensorFlow', 'Pytorch', 'MxNet', 'NNVM', 'TF XLA']
    fw_name_map = {fw: fw for fw in other_fws}
    fw_name_map['Pytorch'] = 'PyTorch'

    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    plot_data = OrderedDict([(fw_name_map[fw], {
        network_name_map[network]:
        raw_data[fw][network] / raw_data[our_fw][network]
        for network in networks
    }) for fw in other_fws])

    try:
        generate_vision_comparisons(our_name, plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Пример #6
0
def prepare_exp_data_pages(info, out_dir):
    idemp_mkdir(out_dir)
    for exp in info.all_present_experiments():
        stage_statuses = info.exp_stage_statuses(exp)
        if 'analysis' not in stage_statuses or not stage_statuses['analysis'][
                'success']:
            continue
        all_exp_data = sort_data(info.exp_data_dir(exp))

        # customize the formatting here so that it's at
        # least somewhat human-readable
        with open(os.path.join(out_dir, '{}.json'.format(exp)), 'w') as f:
            json.dump(all_exp_data[::-1], f, indent=1)
Пример #7
0
def process_score(info, score_metric, data_dir, graph_dir, timestamp):
    data = score_metric.compute_score(info)
    data['timestamp'] = timestamp
    write_json(data_dir, 'data_{}.json'.format(timestamp), data)

    # graphs failing is not a fatal error, just an inconvenience
    try:
        score_metric.score_graph(data, graph_dir)
        all_data = sort_data(data_dir)
        score_metric.longitudinal_graphs(all_data, graph_dir)
    except Exception as e:
        print(render_exception(e))
    finally:
        return score_metric.score_text(data)
Пример #8
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate_trials_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        all_data = sort_data(data_dir)
        most_recent = all_data[-1]
        success, msg = render_graph(config, most_recent, output_dir)
        write_status(output_dir, success, msg)
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered: ' + render_exception(e))
        return 1
    finally:
        plt.close()
Пример #9
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate_trials_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        all_data = sort_data(data_dir)
        most_recent = all_data[-1]

        summary = summarize(config, most_recent)
        write_summary(output_dir, 'Pareto Curve Trial', summary)
        write_status(output_dir, True, 'success')

    except Exception as e:
        write_status(output_dir, False, 'Exception encountered: ' + render_exception(e))
        return 1
Пример #10
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    # read in data, output graphs of most recent data, and output longitudinal graphs
    all_data = sort_data(data_dir)
    most_recent = all_data[-1]

    try:
        generate_longitudinal_comparisons(all_data, output_dir)
        generate_arm_vta_comparisons(most_recent, output_dir)
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Пример #11
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    prereqs, msg = check_prerequisites(
        info, {
            'relay_opt': {
                'devices': ['gpu'],
                'opt_levels': [0, 1, 2, 3, 4],
                'networks': networks
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(info.exp_data_dir('relay_opt'))
    raw_data = all_data[-1]['gpu']

    baseline = 'O0'
    opts = ['O1', 'O2', 'O3', 'O4']

    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    plot_data = OrderedDict([(opt, {
        network_name_map[network]:
        raw_data[baseline][network] / raw_data[opt][network]
        for network in networks
    }) for opt in opts])

    try:
        generate_opt_comparisons(plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Пример #12
0
    def main(data_dir, config_dir, output_dir):
        try:
            config, msg = validate_config(config_dir)
            if config is None:
                write_status(output_dir, False, msg)
                return 1

            all_data = sort_data(data_dir)
            most_recent = all_data[-1]
            last_two_weeks = [
                entry for entry in all_data
                if time_difference(most_recent, entry).days < 14
            ]

            generate_longitudinal_comparisons(all_data, output_dir, 'all_time')
            generate_longitudinal_comparisons(last_two_weeks, output_dir,
                                              'two_weeks')
            generate_individual_comparisons(config, most_recent, output_dir)
        except Exception as e:
            write_status(output_dir, False,
                         'Exception encountered:\n' + render_exception(e))
            return 1

        write_status(output_dir, True, 'success')
Пример #13
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)
    our_name = 'Relay'
    if 'our_name' in conf:
        our_name = conf['our_name']

    prereqs, msg = check_prerequisites(
        info, {
            'treelstm': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'pt'],
                'relay_methods': ['aot']
            },
            'char_rnn': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'pt'],
                'relay_methods': ['aot'],
                'relay_configs': ['loop']
            },
            'gluon_rnns': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'mxnet'],
                'networks': ['rnn', 'lstm', 'gru'],
                'relay_methods': ['aot']
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    raw_data = {}
    for exp in ['treelstm', 'char_rnn', 'gluon_rnns']:
        all_data = sort_data(info.exp_data_dir(exp))
        raw_data[exp] = all_data[-1]

    plot_data = OrderedDict([
        ('MxNet', {
            'RNN':
            raw_data['gluon_rnns']['cpu']['MxNet']['rnn'] /
            raw_data['gluon_rnns']['cpu']['Aot']['rnn'],
            'GRU':
            raw_data['gluon_rnns']['cpu']['MxNet']['gru'] /
            raw_data['gluon_rnns']['cpu']['Aot']['gru'],
            'LSTM':
            raw_data['gluon_rnns']['cpu']['MxNet']['lstm'] /
            raw_data['gluon_rnns']['cpu']['Aot']['lstm'],
            'CharRNN':
            0.0,
            'TreeLSTM':
            0.0,
        }),
        ('PyTorch', {
            'RNN':
            0.0,
            'GRU':
            0.0,
            'LSTM':
            0.0,
            'CharRNN':
            raw_data['char_rnn']['cpu']['Pytorch'] /
            raw_data['char_rnn']['cpu']['Aot'],
            'TreeLSTM':
            raw_data['treelstm']['cpu']['Pytorch'] /
            raw_data['treelstm']['cpu']['Aot'],
        }),
    ])

    try:
        generate_nlp_comparisons(our_name, plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Пример #14
0
def latest_data(info, exp, dev):
    return sort_data(info.exp_data_dir(exp))[-1][dev]
Пример #15
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)

    # delete old report so it doesn't hang around if we exit
    # without a new one
    if check_file_exists(output_dir, 'report.json'):
        subprocess.call(['rm', '-f', os.path.join(output_dir, 'report.json')])

    time_window = -1
    if 'time_window' in conf:
        time_window = int(conf['time_window'])
    pings = conf['notify'] if 'notify' in conf else []

    # map: exp -> [(fields w/ high SD, historic mean, SD, current)]
    exp_alerts = {}
    for exp in info.all_present_experiments():
        if not info.exp_active(exp):
            continue

        # not this subsystem's job to report on failures
        stage_statuses = info.exp_stage_statuses(exp)
        if 'run' not in stage_statuses or 'analysis' not in stage_statuses:
            continue
        if not stage_statuses['analysis']['success']:
            continue

        all_data = sort_data(info.exp_data_dir(exp))
        if len(all_data) <= 1:
            continue

        exp_alerts[exp] = []
        most_recent = all_data[-1]
        past_data = all_data[:-1]
        if time_window >= 1:
            past_data = [
                entry for entry in past_data
                if time_difference(most_recent, entry).days <= time_window
            ]

        field_values = traverse_fields(most_recent)
        for fields in itertools.product(*field_values):
            current_stat, _ = gather_stats([most_recent], fields)
            current = current_stat[0]
            past_stats, _ = gather_stats(past_data, fields)

            past_sd = np.std(past_stats)
            past_mean = np.mean(past_stats)
            if abs(current - past_mean) > past_sd:
                exp_alerts[exp].append((fields, past_mean, past_sd, current))

        if not exp_alerts[exp]:
            del exp_alerts[exp]

    if exp_alerts:
        report = {
            'title': 'High SD Alerts',
            'value': format_report(info, exp_alerts, pings)
        }
        write_json(output_dir, 'report.json', report)

    write_status(output_dir, True, 'success')