Exemplo n.º 1
0
def write_generic_summary(data_dir,
                          output_dir,
                          title,
                          devices,
                          networks=None,
                          use_networks=False):
    """
    Given a data directory and output directory, this function writes
    a generic summary assuming that the data has a field keyed by device
    (cpu/gpu) and optionally by network. It writes a summary and status to the output dir.
    """
    try:
        all_data = sort_data(data_dir)
        most_recent = all_data[-1]

        summary = None
        if use_networks:
            summary = summary_by_dev_and_network(most_recent, devices,
                                                 networks)
        else:
            summary = summary_by_dev(most_recent, devices)
        write_summary(output_dir, title, summary)
        write_status(output_dir, True, 'success')

        # TODO do something about comparisons to previous days
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
Exemplo n.º 2
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(data_dir)
    most_recent = all_data[-1]
    most_recent = {k: v for (k, v) in most_recent.items() if k not in METADATA_KEYS}
    summary = ''

    for (model, targets) in most_recent.items():
        # simulated target summary
        sim_targets = {target: targets[target] for target in targets if target in SIM_TARGETS}
        for (target, devices) in sim_targets.items():
            for (device, stats) in devices.items():
                summary += '_Stats on ({}, {}, {}) & _\n'.format(model, target.upper(), device.upper())
                for (stat, val) in stats.items():
                    summary += '{}: {:.2E}\n'.format(stat, Decimal(val))
        # physical target summary
        phys_targets = {target: v for (target, v) in targets.items() if target in PHYS_TARGETS}
        for (target, devices) in phys_targets.items():
            for (device, mean_time) in devices.items():
                summary += 'Time on ({}, {}, {}): {:.2f}\n'.format(
                        model, target.upper(), device.upper(), mean_time)

    write_summary(output_dir, config['title'], summary)
    write_status(output_dir, True, 'success')
Exemplo n.º 3
0
    def main(config_dir, output_dir):
        try:
            config, msg = validate_config(config_dir)
            if config is None:
                write_status(output_dir, False, msg)
                return 1

            if check_early_exit is not None:
                early_exit, msg = check_early_exit(config)
                if early_exit:
                    write_status(output_dir, True, msg)
                    return 0

            configure_seed(config)

            if gen_trial_params is None:
                write_status(output_dir, True, 'No trial to run')
                return 0

            trial_params = gen_trial_params(config)
            success, msg = run_trials(*trial_params, path_prefix=output_dir)
            write_status(output_dir, success, msg)
            return 0 if success else 1
        except Exception as e:
            write_status(output_dir, False, render_exception(e))
            return 1
Exemplo n.º 4
0
def main(config_dir, output_dir):
    config, msg = validate_trials_config(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    if not config['models']:
        write_status(output_dir, True, 'Nothing run')
        return 0

    for model in sorted(config['models']):
        cmd_id = 0
        for success, msg, processed_command in parse_commands(model, config):
            if not success:
                write_status(output_dir, False, msg)
                return 1
            else:
                print(f'Running on command: {model}: {processed_command}')
                success, msg = eval_command(model, processed_command, config,
                                            config_dir, output_dir, cmd_id)
                if not success:
                    write_status(output_dir, False, msg)
                    return 1
                cmd_id += 1

    write_status(output_dir, True, 'success')
Exemplo n.º 5
0
def main(config_dir, setup_dir):
    try:
        export_mxnet_model('rnn', setup_dir)
        export_mxnet_model('gru', setup_dir)
        export_mxnet_model('lstm', setup_dir)
        write_status(setup_dir, True, 'success')
    except Exception as e:
        write_status(setup_dir, False, render_exception(e))
Exemplo n.º 6
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    # No further analysis is required beyond the raw stats reported by the VTA
    # simulator, so we just propagate the data to the next stage of the
    # pipeline.
    data = read_json(data_dir, 'data.json')
    write_json(output_dir, 'data.json', data)
    write_status(output_dir, True, 'success')
Exemplo n.º 7
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    pass_spec_name_map = {
        '3;FuseOps': 'Op Fusion',
        '3;FoldConstant|FuseOps': '... + Constant Folding',
        '3;EliminateCommonSubexpr|FoldConstant|FuseOps': '... + Common Subexpr Elim',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldConstant|FuseOps': '... + Parallel Conv Comb',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|FoldConstant|FuseOps': '... + Axis Scale Folding',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|FoldConstant|FuseOps': '... + Cast Canonicalization',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|CanonicalizeOps|FoldConstant|FuseOps': '... + Op Canonicalization',
        '3;EliminateCommonSubexpr|CombineParallelConv2D|FoldScaleAxis|CanonicalizeCast|CanonicalizeOps|AlterOpLayout|FoldConstant|FuseOps': '... + Op Layout Alteration'
    }

    prereqs, msg = check_prerequisites(info, {
        'pass_comparison': {
            'networks': networks,
            'passes': [
                parse_combo(combo) for combo in pass_spec_name_map.keys()
            ]
        }
    })

    all_data = sort_data(info.exp_data_dir('pass_comparison'))
    raw_data = all_data[-1]

    baseline = '0;'

    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    del raw_data['timestamp']
    del raw_data['tvm_hash']

    try:
        for (dev, raw_dev_data) in raw_data.items():
            plot_data = OrderedDict([
                (pass_spec_name_map[pass_spec], {
                    network_name_map[network]:
                    raw_dev_data[baseline][network] / raw_dev_data[pass_spec][network]
                    for network in networks})
                for pass_spec in pass_spec_name_map.keys()
            ])
            generate_pass_comparisons(plot_data, output_dir, f'pass-comp-{dev}.png')
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Exemplo n.º 8
0
def main(config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    if 'pt' not in config['frameworks']:
        write_status(output_dir, True, 'PT not run')
        return 0

    datasets = config['datasets']
    for dataset, max_idx in datasets:
        success, msg = run_trials(
            'pt',
            'treelstm',
            config['dry_run'],
            config['n_times_per_input'],
            config['n_inputs'],
            treelstm_trial,
            treelstm_setup,
            treelstm_teardown, ['device', 'dataset', 'idx'],
            [config['devices'], [dataset], [i for i in range(max_idx)]],
            path_prefix=output_dir,
            append_to_csv=True)
        if not success:
            write_status(output_dir, success, msg)
            return 1
    write_status(output_dir, True, 'success')
Exemplo n.º 9
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        summary = {}
        for model in sorted(config['models']):
            summary[model] = []
            # the script will not be run if there is an error
            cmd_id = 0
            for _, _, exp_config in parse_commands(model, config):
                for combo in unfold_settings(exp_config):
                    stats, msg = parse_data_file(exp_config['type'], model,
                                                 config, combo, data_dir,
                                                 cmd_id)
                    if stats is None:
                        write_status(output_dir, False, msg)
                        return 1
                    stats['command_id'] = cmd_id
                    summary[model].append(stats)
                cmd_id += 1
        write_json(output_dir, 'data.json', summary)
        write_status(output_dir, True, 'success')
    except Exception as e:
        write_status(output_dir, False, render_exception(e))
Exemplo n.º 10
0
    def main(data_dir, config_dir, output_dir):
        config, msg = validate_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        devs = config['devices']
        networks = []
        if use_networks:
            networks = config['networks']
        write_generic_summary(data_dir,
                              output_dir,
                              config['title'],
                              devs,
                              networks,
                              use_networks=use_networks)
Exemplo n.º 11
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate_trials_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        summary = {}

        baseline_dict = {}

        for model in sorted(config['models']):
            summary[model] = []
            baseline_dict[model] = {}
            # the script will not be run if there is an error
            cmd_id = 0
            for _, _, exp_config in parse_commands(model, config):
                baseline_params = None
                for specific_params in unfold_settings(exp_config):
                    batch_size = specific_params['batch_size']
                    if specific_params['type'] == 'baseline':
                        baseline_dict[model][batch_size] = {
                            'type': 'baseline',
                            'specific_params': specific_params,
                            'cmd_id': cmd_id
                        }

                    # if there is a corresponding baseline,
                    # let's match using the dict
                    baseline_params = None
                    if (batch_size in baseline_dict[model]
                            and specific_params['type'] != 'baseline'):
                        baseline_params = baseline_dict[model][batch_size]

                    stats, msg = parse_data_file(
                        exp_config['type'],
                        model,
                        config,
                        specific_params,
                        data_dir,
                        cmd_id,
                        baseline_params=baseline_params)
                    if stats is None:
                        write_status(output_dir, False, msg)
                        return 1
                    stats['command_id'] = cmd_id
                    summary[model].append(stats)
                cmd_id += 1
        write_json(output_dir, 'data.json', summary)
        write_status(output_dir, True, 'success')
    except Exception as e:
        write_status(output_dir, False, render_exception(e))
Exemplo n.º 12
0
def main(config_dir, output_dir):
    """Run the experiment."""
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    try:
        # the experiment involves RPC calls that could potentially hang so we have a timeout on our end too
        killswitch = Timer(config.get('timeout', 300),
                           lambda: timeout_failure(output_dir))
        killswitch.start()

        result = {}
        config_iter = itertools.product(config['models'], config['targets'],
                                        config['devices'])
        for (model, target, device) in config_iter:
            # TODO(weberlo): There has to be some idiom to get rid of this boilerplate.
            if model not in result:
                result[model] = {}
            if target not in result[model]:
                result[model][target] = {}
            if device not in result[model][target]:
                result[model][target][device] = {}
            result[model][target][device] = run_single(model, target, device,
                                                       config)

        killswitch.cancel()
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_json(output_dir, 'data.json', result)
    write_status(output_dir, True, 'success')
Exemplo n.º 13
0
def main(config_dir, output_dir):
    """Run the experiment."""
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    try:
        result = {}
        config_iter = itertools.product(
                config['models'],
                config['targets'],
                config['devices'])
        for (model, target, device) in config_iter:
            # TODO(weberlo): There has to be some idiom to get rid of this boilerplate.
            if model not in result:
                result[model] = {}
            if target not in result[model]:
                result[model][target] = {}
            if device not in result[model][target]:
                result[model][target][device] = {}
            result[model][target][device] = run_single(model, target, device, config)
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e))
        return 1

    write_json(output_dir, 'data.json', result)
    write_status(output_dir, True, 'success')
Exemplo n.º 14
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)

    data_dir = os.path.join(output_dir, 'data')
    graph_dir = os.path.join(output_dir, 'graphs')
    idemp_mkdir(data_dir)
    idemp_mkdir(graph_dir)

    timestamp = get_timestamp()

    score_confs = conf['score_confs']
    metrics = set(score_confs.keys())
    metrics = metrics.intersection(set(SCORE_METRICS.keys()))

    if not metrics:
        write_status(output_dir, True, 'No scores to report')
        return 0

    score_data = {}
    score_reports = {}
    for metric in metrics:
        score_metric = SCORE_METRICS[metric](score_confs[metric])
        valid, msg = check_prerequisites(info, score_metric.prereq())
        if not valid:
            write_status(output_dir, False, msg)
            return 1

        score_data_dir = os.path.join(data_dir, metric)
        score_graph_dir = os.path.join(graph_dir, metric)
        idemp_mkdir(score_data_dir)
        idemp_mkdir(score_graph_dir)

        try:
            report = process_score(info, score_metric, score_data_dir,
                                   score_graph_dir, timestamp)
            score_reports[metric] = report
        except Exception as e:
            write_status(
                output_dir, False,
                'Encountered exception while scoring {}:\n{}'.format(
                    metric, render_exception(e)))
            return 1

    report = {'title': 'Metric Scores', 'value': format_scores(score_reports)}
    write_json(output_dir, 'report.json', report)
    write_status(output_dir, True, 'success')
Exemplo n.º 15
0
def main(config_dir, home_dir, out_dir):
    config = read_config(config_dir)
    info = DashboardInfo(home_dir)
    exp_titles = get_exp_titles(info)
    score_titles = get_score_titles(info)

    deadline_config = None
    if info.subsys_config_valid('deadline'):
        deadline_config = info.read_subsys_config('deadline')

    set_up_out_dir(info, out_dir)
    # Switch to the output directory, so we don't need to keep track of
    # separate paths for loading images while the script is running and loading
    # images when viewing the generated webpage.
    os.chdir(out_dir)

    page_prefix = init_page_prefix_template(deadline_config)
    page_body = gen_page_body(exp_titles, score_titles)
    page_suffix = init_page_suffix_template(deadline_config)
    with open(os.path.join(out_dir, 'index.html'), 'w') as f:
        f.write(page_prefix)
        f.write(page_body)
        f.write(page_suffix)
    write_status(out_dir, True, 'success')
Exemplo n.º 16
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)
    our_name = 'Relay'
    if 'our_name' in conf:
        our_name = conf['our_name']

    conf_fws = ['relay', 'pt', 'tf', 'mxnet', 'nnvm']
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    prereqs, msg = check_prerequisites(
        info, {
            'cnn_comp': {
                'devices': ['gpu'],
                'use_xla': True,
                'networks': networks,
                'frameworks': conf_fws
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(info.exp_data_dir('cnn_comp'))
    raw_data = all_data[-1]['gpu']

    our_fw = 'Relay'
    other_fws = ['TensorFlow', 'Pytorch', 'MxNet', 'NNVM', 'TF XLA']
    fw_name_map = {fw: fw for fw in other_fws}
    fw_name_map['Pytorch'] = 'PyTorch'

    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    plot_data = OrderedDict([(fw_name_map[fw], {
        network_name_map[network]:
        raw_data[fw][network] / raw_data[our_fw][network]
        for network in networks
    }) for fw in other_fws])

    try:
        generate_vision_comparisons(our_name, plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Exemplo n.º 17
0
    def main(data_dir, config_dir, output_dir):
        config, msg = validate_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        devs = config['devices']
        listing_settings = generate_listing_settings(config)

        ret = {}
        for dev in devs:
            ret[dev] = {}
            for listing, settings in listing_settings.items():
                if not use_networks:
                    query = generate_data_query(config, dev, settings)
                    summary, success, msg = trials_stat_summary(
                        data_dir, *query)
                    if not success:
                        write_status(output_dir, False, msg)
                        return 1

                    ret[dev][listing] = summary['mean']
                    add_detailed_summary(ret, summary, dev, listing)
                    continue

                ret[dev][listing] = {}
                for network in config['networks']:
                    query = generate_data_query(config, dev, network, settings)
                    summary, success, msg = trials_stat_summary(
                        data_dir, *query)
                    if not success:
                        write_status(output_dir, False, msg)
                        return 1

                    ret[dev][listing][network] = summary['mean']
                    add_detailed_summary(ret, summary, dev, listing, network)
        write_json(output_dir, 'data.json', ret)
        write_status(output_dir, True, 'success')
Exemplo n.º 18
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate_trials_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        all_data = sort_data(data_dir)
        most_recent = all_data[-1]
        success, msg = render_graph(config, most_recent, output_dir)
        write_status(output_dir, success, msg)
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered: ' + render_exception(e))
        return 1
    finally:
        plt.close()
Exemplo n.º 19
0
def main(data_dir, config_dir, output_dir):
    try:
        config, msg = validate_trials_config(config_dir)
        if config is None:
            write_status(output_dir, False, msg)
            return 1

        all_data = sort_data(data_dir)
        most_recent = all_data[-1]

        summary = summarize(config, most_recent)
        write_summary(output_dir, 'Pareto Curve Trial', summary)
        write_status(output_dir, True, 'success')

    except Exception as e:
        write_status(output_dir, False, 'Exception encountered: ' + render_exception(e))
        return 1
Exemplo n.º 20
0
def main(config_dir, output_dir, device):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    if 'tf' not in config['frameworks']:
        write_status(output_dir, True, 'TF not run')
        return 0

    if device not in config['devices']:
        write_status(output_dir, True, 'TF not run on {}'.format(device))
        return 0

    configure_seed(config)

    enable_xla = [False]
    if config['use_xla']:
        enable_xla.append(True)

    success, msg = run_trials(
        'tf',
        'cnn_comp',
        config['dry_run'],
        config['n_times_per_input'],
        config['n_inputs'],
        cnn_trial,
        cnn_setup,
        cnn_teardown, ['network', 'device', 'batch_size', 'enable_xla'],
        [config['networks'], [device], config['batch_sizes'], enable_xla],
        path_prefix=output_dir,
        append_to_csv=True)

    write_status(output_dir, success, msg)
    if not success:
        return 1
Exemplo n.º 21
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    idemp_mkdir(output_dir)
    for exp_name in info.all_present_experiments():
        exp_status = info.exp_status_dir(exp_name)
        run_status = validate_json(exp_status,
                                   'run_cpu_telemetry',
                                   'run_gpu_telemetry',
                                   filename='run.json')
        if check_prerequisites(
                info, {exp_name: {}}) == (True, 'success') and run_status.get(
                    'success', False):
            telemetry_folder = info.subsys_telemetry_dir(exp_name)
            if os.path.exists(telemetry_folder):
                exp_graph_folder = os.path.join(telemetry_folder, 'graph')
                cpu_stat = info.exp_cpu_telemetry(exp_name)
                gpu_stat = info.exp_gpu_telemetry(exp_name)
                cpu_data = sort_data(cpu_stat)
                gpu_data = sort_data(gpu_stat)
                graph_folder = info.exp_graph_dir(exp_name)
                website_include_dir = os.path.join(graph_folder)
                try:
                    if cpu_data and run_status.get('run_cpu_telemetry', False):
                        visualize(
                            'cpu', process_cpu_telemetry(cpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'cpu_telemetry'),
                            f'Visualizing CPU telemetry for {exp_name}',
                            lambda adapter, title, *rest: f'{adapter}-{title}')

                    if gpu_data and run_status.get('run_gpu_telemetry', False):
                        visualize(
                            'gpu', process_gpu_telemetry(gpu_data[-1]),
                            exp_graph_folder,
                            os.path.join(website_include_dir, 'gpu_telemetry'),
                            f'Visualizing GPU telemetry for {exp_name}',
                            lambda _, title, *rest: title)
                except Exception as e:
                    write_status(
                        output_dir, False,
                        f'Encountered err while generating graphs: {e}')
                    return
                write_status(output_dir, True, 'success')
            else:
                write_status(output_dir, False, 'No telemetry data found')
                return
Exemplo n.º 22
0
def main(data_dir, config_dir, output_dir):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    # read in data, output graphs of most recent data, and output longitudinal graphs
    all_data = sort_data(data_dir)
    most_recent = all_data[-1]

    try:
        generate_longitudinal_comparisons(all_data, output_dir)
        generate_arm_vta_comparisons(most_recent, output_dir)
    except Exception as e:
        write_status(output_dir, False, 'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Exemplo n.º 23
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    networks = ['resnet-18', 'mobilenet', 'nature-dqn', 'vgg-16']
    prereqs, msg = check_prerequisites(
        info, {
            'relay_opt': {
                'devices': ['gpu'],
                'opt_levels': [0, 1, 2, 3, 4],
                'networks': networks
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    all_data = sort_data(info.exp_data_dir('relay_opt'))
    raw_data = all_data[-1]['gpu']

    baseline = 'O0'
    opts = ['O1', 'O2', 'O3', 'O4']

    network_name_map = {
        'resnet-18': 'ResNet-18',
        'mobilenet': 'MobileNet V2',
        'nature-dqn': 'DQN',
        'vgg-16': 'VGG-16'
    }

    plot_data = OrderedDict([(opt, {
        network_name_map[network]:
        raw_data[baseline][network] / raw_data[opt][network]
        for network in networks
    }) for opt in opts])

    try:
        generate_opt_comparisons(plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Exemplo n.º 24
0
    def main(data_dir, config_dir, output_dir):
        try:
            config, msg = validate_config(config_dir)
            if config is None:
                write_status(output_dir, False, msg)
                return 1

            all_data = sort_data(data_dir)
            most_recent = all_data[-1]
            last_two_weeks = [
                entry for entry in all_data
                if time_difference(most_recent, entry).days < 14
            ]

            generate_longitudinal_comparisons(all_data, output_dir, 'all_time')
            generate_longitudinal_comparisons(last_two_weeks, output_dir,
                                              'two_weeks')
            generate_individual_comparisons(config, most_recent, output_dir)
        except Exception as e:
            write_status(output_dir, False,
                         'Exception encountered:\n' + render_exception(e))
            return 1

        write_status(output_dir, True, 'success')
Exemplo n.º 25
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)

    # delete old report so it doesn't hang around if we exit
    # without a new one
    if check_file_exists(output_dir, 'report.json'):
        subprocess.call(['rm', '-f', os.path.join(output_dir, 'report.json')])

    time_window = -1
    if 'time_window' in conf:
        time_window = int(conf['time_window'])
    pings = conf['notify'] if 'notify' in conf else []

    # map: exp -> [(fields w/ high SD, historic mean, SD, current)]
    exp_alerts = {}
    for exp in info.all_present_experiments():
        if not info.exp_active(exp):
            continue

        # not this subsystem's job to report on failures
        stage_statuses = info.exp_stage_statuses(exp)
        if 'run' not in stage_statuses or 'analysis' not in stage_statuses:
            continue
        if not stage_statuses['analysis']['success']:
            continue

        all_data = sort_data(info.exp_data_dir(exp))
        if len(all_data) <= 1:
            continue

        exp_alerts[exp] = []
        most_recent = all_data[-1]
        past_data = all_data[:-1]
        if time_window >= 1:
            past_data = [
                entry for entry in past_data
                if time_difference(most_recent, entry).days <= time_window
            ]

        field_values = traverse_fields(most_recent)
        for fields in itertools.product(*field_values):
            current_stat, _ = gather_stats([most_recent], fields)
            current = current_stat[0]
            past_stats, _ = gather_stats(past_data, fields)

            past_sd = np.std(past_stats)
            past_mean = np.mean(past_stats)
            if abs(current - past_mean) > past_sd:
                exp_alerts[exp].append((fields, past_mean, past_sd, current))

        if not exp_alerts[exp]:
            del exp_alerts[exp]

    if exp_alerts:
        report = {
            'title': 'High SD Alerts',
            'value': format_report(info, exp_alerts, pings)
        }
        write_json(output_dir, 'report.json', report)

    write_status(output_dir, True, 'success')
Exemplo n.º 26
0
def main(config_dir, home_dir, output_dir):
    info = DashboardInfo(home_dir)
    conf = read_config(config_dir)
    our_name = 'Relay'
    if 'our_name' in conf:
        our_name = conf['our_name']

    prereqs, msg = check_prerequisites(
        info, {
            'treelstm': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'pt'],
                'relay_methods': ['aot']
            },
            'char_rnn': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'pt'],
                'relay_methods': ['aot'],
                'relay_configs': ['loop']
            },
            'gluon_rnns': {
                'devices': ['cpu'],
                'frameworks': ['relay', 'mxnet'],
                'networks': ['rnn', 'lstm', 'gru'],
                'relay_methods': ['aot']
            }
        })
    if not prereqs:
        write_status(output_dir, False, msg)
        return 1

    raw_data = {}
    for exp in ['treelstm', 'char_rnn', 'gluon_rnns']:
        all_data = sort_data(info.exp_data_dir(exp))
        raw_data[exp] = all_data[-1]

    plot_data = OrderedDict([
        ('MxNet', {
            'RNN':
            raw_data['gluon_rnns']['cpu']['MxNet']['rnn'] /
            raw_data['gluon_rnns']['cpu']['Aot']['rnn'],
            'GRU':
            raw_data['gluon_rnns']['cpu']['MxNet']['gru'] /
            raw_data['gluon_rnns']['cpu']['Aot']['gru'],
            'LSTM':
            raw_data['gluon_rnns']['cpu']['MxNet']['lstm'] /
            raw_data['gluon_rnns']['cpu']['Aot']['lstm'],
            'CharRNN':
            0.0,
            'TreeLSTM':
            0.0,
        }),
        ('PyTorch', {
            'RNN':
            0.0,
            'GRU':
            0.0,
            'LSTM':
            0.0,
            'CharRNN':
            raw_data['char_rnn']['cpu']['Pytorch'] /
            raw_data['char_rnn']['cpu']['Aot'],
            'TreeLSTM':
            raw_data['treelstm']['cpu']['Pytorch'] /
            raw_data['treelstm']['cpu']['Aot'],
        }),
    ])

    try:
        generate_nlp_comparisons(our_name, plot_data, output_dir)
    except Exception as e:
        write_status(output_dir, False,
                     'Exception encountered:\n' + render_exception(e))
        return 1

    write_status(output_dir, True, 'success')
Exemplo n.º 27
0
def main(config_dir, output_dir, method, dataset):
    config, msg = validate(config_dir)
    if config is None:
        write_status(output_dir, False, msg)
        return 1

    if 'relay' not in config['frameworks']:
        write_status(output_dir, True, 'Relay not run')
        return 0

    if method not in config['relay_methods']:
        write_status(output_dir, True, '{} not run'.format(method))
        return 0

    datasets = config['datasets']
    max_idx = -1
    for pair in config['datasets']:
        if pair[0] == dataset:
            max_idx = pair[1]
            break

    # dataset is not included in the config, so skip
    if max_idx == -1:
        write_status(output_dir, True, 'Dataset {} not run'.format(dataset))
        return 0

    success, msg = run_trials(
        'relay',
        'treelstm',
        config['dry_run'],
        config['n_times_per_input'],
        config['n_inputs'],
        treelstm_trial,
        treelstm_setup,
        treelstm_teardown, ['device', 'method', 'dataset', 'idx'],
        [config['devices'], [method], [dataset], [i for i in range(max_idx)]],
        path_prefix=output_dir,
        append_to_csv=True)
    if not success:
        write_status(output_dir, success, msg)
        return 1
    write_status(output_dir, True, 'success')
Exemplo n.º 28
0
def main(config_dir, home_dir, output_dir):
    config = read_config(config_dir)
    if 'webhook_url' not in config:
        write_status(output_dir, False, 'No webhook URL given')
        return 1

    webhook = config['webhook_url']
    description = ''
    if 'description' in config:
        description = config['description']

    info = DashboardInfo(home_dir)

    inactive_experiments = []     # list of titles
    failed_experiments = []       # list of slack fields
    successful_experiments = []   # list of slack fields
    failed_graphs = []            # list of titles

    for exp_name in info.all_present_experiments():
        stage_statuses = info.exp_stage_statuses(exp_name)
        if not stage_statuses['precheck']['success']:
            failed_experiments.append(
                failed_experiment_field(exp_name, stage_statuses, 'precheck'))
            continue

        exp_conf = info.read_exp_config(exp_name)

        exp_title = exp_name if 'title' not in exp_conf else exp_conf['title']
        notify = exp_conf['notify']
        if not exp_conf['active']:
            inactive_experiments.append(exp_title)
            continue

        failure = False
        for stage in ['setup', 'run', 'analysis', 'summary']:
            if stage not in stage_statuses:
                # setup is the only stage that's optional
                assert stage == 'setup'
                continue
            if not stage_statuses[stage]['success']:
                failed_experiments.append(
                    failed_experiment_field(exp_title, stage_statuses,
                                            stage, notify))
                failure = True
                break

        if failure:
            continue

        # failure to visualize is not as big a deal as failing to
        # run or analyze the experiment, so we only report it but
        # don't fail to report the summary
        if not stage_statuses['visualization']['success']:
            failed_graphs.append(exp_title)

        summary = info.read_exp_summary(exp_name)
        successful_experiments.append(
            build_field(summary['title'], summary['value']))

    # produce messages
    attachments = []
    if successful_experiments:
        attachments.append(
            build_attachment(
                title='Successful benchmarks',
                pretext=description,
                fields=successful_experiments))
    if failed_experiments:
        attachments.append(
            build_attachment(
                color='#fa0000',
                title='Failed benchmarks',
                fields=failed_experiments))
    if inactive_experiments:
        attachments.append(
            build_attachment(
                color='#616161',
                title='Inactive benchmarks',
                text=', '.join(inactive_experiments)))
    if failed_graphs:
        attachments.append(
            build_attachment(
                color='#fa0000',
                title='Failed to Visualize',
                text=', '.join(failed_graphs)))

    success, report = post_message(
        webhook,
        build_message(
            text='Dashboard Results',
            attachments=attachments))
    write_status(output_dir, success, report)
Exemplo n.º 29
0
def main(config_dir, home_dir, output_dir):
    config = read_config(config_dir)
    if 'channel_id' not in config:
        write_status(output_dir, False, 'No channel token given')
        return 1

    channel = config['channel_id']

    success, msg, client = new_client(config)

    if not success:
        write_status(output_dir, False, msg)
        return 1

    info = DashboardInfo(home_dir)

    failed_subsys = []
    reports = []
    failed_reports = []

    for subsys in info.all_present_subsystems():
        # ignore self
        if subsys == 'subsys_reporter':
            continue

        if not info.subsys_active(subsys):
            continue

        status = info.subsys_stage_status(subsys, 'run')
        if not status['success']:
            failed_subsys.append(failed_subsys_field(subsys, status))
            continue

        report_present = check_file_exists(info.subsys_output_dir(subsys),
                                           'report.json')
        if not report_present:
            continue

        try:
            report = read_json(info.subsys_output_dir(subsys), 'report.json')
            reports.append(
                build_field(title=report['title'], value=report['value']))
        except Exception:
            failed_reports.append(subsys_name)

    attachments = []
    if reports:
        attachments.append(build_attachment(title='Reports', fields=reports))
    if failed_reports or failed_subsys:
        failure_text = ''
        if failed_reports:
            failure_text = 'Failed to parse reports: {}'.format(
                ', '.join(failed_reports))
        attachments.append(
            build_attachment(title='Errors',
                             text=failure_text,
                             color='#fa0000',
                             fields=failed_subsys))

    if not attachments:
        write_status(output_dir, True, 'Nothing to report')
        return 0

    success, _, msg = post_message(
        client, channel,
        build_message(text='Subsystem Results', attachments=attachments))
    write_status(output_dir, success, msg)
Exemplo n.º 30
0
def main(config_dir, home_dir, output_dir):
    config = read_config(config_dir)
    if 'webhook_url' not in config:
        write_status(output_dir, False, 'No webhook URL given')
        return 1

    webhook = config['webhook_url']

    if 'deadlines' not in config:
        write_status(output_dir, True, 'No deadlines to report')
        return 1

    deadlines = config['deadlines']
    if not isinstance(deadlines, dict):
        write_status(output_dir, False, 'Invalid deadlines structure')
        return 0

    attachments = []
    present = datetime.datetime.now()
    for (name, info) in deadlines.items():
        if 'date' not in info:
            write_status(output_dir, False,
                         'Date missing in entry {} under {}'.format(info, name))
            return 1

        date = None
        try:
            date = datetime.datetime.strptime(info['date'], '%Y-%m-%d %H:%M:%S')
        except Exception as e:
            write_status(output_dir, False,
                         'Could not parse date {}'.format(info['date']))
            return 1

        diff = date - present
        days_left = diff.days
        if days_left < 0:
            # elapsed, so forget it
            continue
        alert = days_left <= 7

        time_left_msg = '{} days, {:.2f} hours left'.format(diff.days, diff.seconds/3600)
        fields = [build_field(value=time_left_msg)]
        if alert and 'ping' in info:
            pings = generate_ping_list(info['ping'])
            fields.append(build_field(value='Beware {}!'.format(pings)))
        attachments.append(build_attachment(
            title=name,
            text='Deadline: {}'.format(info['date']),
            fields=fields,
            color='#fa0000' if alert else '#0fbf24'))

    if not attachments:
        write_status(output_dir, True, 'All deadlines elapsed')
        return 0

    success, report = post_message(
        webhook,
        build_message(
            text='*Upcoming Deadlines*',
            attachments=attachments))
    write_status(output_dir, success, report)