Exemplo n.º 1
0
def main(argv):
    scfg = maybe_forced_preset(presets.Profiling)
    scfg.scheduler = 'preempt'
    scfg.disable_wc = True

    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 25, 1298),
            Pause(60),
            WTL.create("alexnet", 100, 508),
            )

    if not FLAGS.with_ref:
        return

    # we also need reference data
    run_seq(presets.MostEfficient(output_dir=FLAGS.save_dir / 'reference'),
            WTL.create("alexnet", 100, 508),
            Pause.Wait,
            WTL.create("alexnet", 100, 508, executor=Executor.TF),
            )
Exemplo n.º 2
0
def case5(argv):
    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.scheduler = 'preempt'
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    name = inspect.currentframe().f_code.co_name
    return eventloop(argv, scfg, FLAGS.save_dir/name, name + '.output')
Exemplo n.º 3
0
def main(argv):
    scfg = maybe_forced_preset(presets.OpTracing)

    name, bs = 'vgg11', 25
    if len(argv) > 0:
        name = argv[0]
    if len(argv) > 1:
        bs = int(argv[1])

    def create_wl(ex):
        return WTL.create(name, bs, 10, executor=ex)

    # Run on TF
    wl = create_wl(Executor.TF)
    wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '2'
    wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
    run_tf(FLAGS.save_dir / 'tf', wl)

    # Run on Salus
    wl = create_wl(Executor.Salus)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '1'), wl)

    return

    # Run 2 on Salus
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '2'),
            create_wl(Executor.Salus),
            create_wl(Executor.Salus),
            )
Exemplo n.º 4
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)

    wls = [
        'vgg19',
        'vgg16',
        'vgg11',
        'resnet50',
        'resnet101',
        'resnet152',
        'overfeat',
        'inception3',
        'inception4',
        'googlenet',
        'alexnet',
        'seq2seq',
        'vae',
        'superres',
    ] * 3
    wls = [WTL.create(name + 'eval', 1 if name != 'seq2seq' else 'small', 500, executor=Executor.Salus) for name in wls]

    for wl in wls:
        wl.env['SALUS_TFBENCH_EVAL_INTERVAL'] = '10'
        wl.env['SALUS_TFBENCH_EVAL_RAND_FACTOR'] = '3'
        wl.env['SALUS_TFBENCH_EVAL_BLOCK'] = 'true'

    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'
    scfg.logconf = 'log'
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'), *wls)
Exemplo n.º 5
0
def main(argv):
    scfg = maybe_forced_preset(presets.AllocProf)
    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            WTL.create("alexnet", 25, 200),
            )

    run_tf(FLAGS.save_dir,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           Pause.Wait,
           WTL.create("alexnet", 25, 200, executor=Executor.TF),
           )
Exemplo n.º 6
0
def main(argv):
    scfg = maybe_forced_preset(presets.OpTracing)

    name, bs, bn = 'vgg11', 25, 10
    if len(argv) > 0:
        name = argv[0]
    if len(argv) > 1:
        bs = argv[1]
        bs = try_with_default(int, bs, ValueError)(bs)
    if len(argv) > 2:
        bn = int(argv[2])

    def create_wl(ex):
        return WTL.create(name, bs, bn, executor=ex)

    # Run on Salus
    wl = create_wl(Executor.Salus)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / '1'), wl)

    if FLAGS.also_tf:
        wl = create_wl(Executor.TF)
        wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '1'
        wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
        run_tf(FLAGS.save_dir / "tf", wl)
        # filter and move file to a more convinent name
        for f in (FLAGS.save_dir / "tf").iterdir():
            f.rename(f.with_name('perf.output'))
            break
Exemplo n.º 7
0
def main(argv):
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'
    scfg.logconf = 'disable'

    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / "1"),
            WTL.create("mnistsf", 25, 100),
            Pause.Wait,
            WTL.create("mnistsf", 25, 200),
            Pause.Wait,
            WTL.create("mnistsf", 25, 300),
            )
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "tf"),
            WTL.create("mnistsf", 25, 100, executor=Executor.TF),  # 1min
            Pause.Wait,
            WTL.create("mnistsf", 25, 200, executor=Executor.TF),  # 1min
            Pause.Wait,
            WTL.create("mnistsf", 25, 300, executor=Executor.TF),  # 1min
            )

    for conc in range(2, 10):
        actions = [WTL.create("mnistsf", 25, 100) for _ in range(conc)]
        run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus" / str(conc)), *actions)
Exemplo n.º 8
0
def do_mem(logdir, network, batch_size):
    """Do basic JCT on workload"""
    batch_num = 20
    if network == "speech":
        batch_num = 5

    logger.info(f'Measuring memory for {network}_{batch_size} for {batch_num} iter')

    ex = "salus" if FLAGS.use_salus else "tf"
    final_dst = logdir / ex / WTL.from_name(network).canonical_name(RunConfig(batch_size, batch_num, None))
    with atomic_directory(final_dst) as outputdir:
        if not FLAGS.use_salus:
            logger.info('    Running on TF')
            wl = WTL.create(network, batch_size, batch_num, Executor.TF)
            wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '1'
            wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
            run_tf(outputdir, wl)
            # filter and move file to a more convinent name
            for f in pathlib.Path(outputdir).iterdir():
                with f.with_name('alloc.output').open('w') as file:
                    grep = execute(['egrep', r"] (\+|-)", f.name], stdout=file, cwd=str(f.parent))
                    grep.wait()
                f.unlink()
                break
        else:
            scfg = maybe_forced_preset(presets.AllocProf)
            scfg.logconf = "memop"
            scfg.output_dir = outputdir
            server = SalusServer(scfg)
            with server.run():
                logger.info('    Running on Salus')
                WTL.block_run(network, batch_size, batch_num, Executor.Salus, outputdir / 'rpc.output')

    return final_dst
Exemplo n.º 9
0
def do_jct(logdir, network, batch_size):
    """Do basic JCT on workload"""
    batch_num = 20

    final_dst = logdir / WTL.from_name(network).canonical_name(RunConfig(batch_size, batch_num, None))
    with atomic_directory(final_dst) as outputdir:
        logger.info(f'Measuring basic JCT for {batch_num} iterations')
        mps_name = '-mps' if FLAGS.is_mps else ''
        if not (final_dst/'gpu{}.output'.format(mps_name)).exists() or not FLAGS.resume:
            logger.info('    Running on TF')
            WTL.block_run(network, batch_size, batch_num, Executor.TF, outputdir / 'gpu{}.output'.format(mps_name))

        if FLAGS.do_tfdist:
            if not (final_dst/'tfdist{}.output'.format(mps_name)).exists() or not FLAGS.resume:
                with TFDistServer().run():
                    logger.info('    Running on TFDist')
                    WTL.block_run(network, batch_size, batch_num, Executor.TFDist, outputdir / 'tfdist{}.output'.format(mps_name))

        if FLAGS.is_mps:
            logger.info('    Skipping Salus jct when MPS is on')
            return final_dst

        if not (final_dst / 'rpc.output').exists() or not FLAGS.resume:
            scfg = maybe_forced_preset(presets.MostEfficient)
            scfg.output_dir = outputdir
            server = SalusServer(scfg)
            with server.run():
                logger.info('    Warming up Salus')
                # always use 20 batch num when warming up
                WTL.block_run(network, batch_size, 20, Executor.Salus, outputdir / 'rpc-warm.output')

                logger.info('    Running on Salus')
                WTL.block_run(network, batch_size, batch_num, Executor.Salus, outputdir / 'rpc.output')

    return final_dst
Exemplo n.º 10
0
def twoinfer(argv):
    # type: (Sequence[str]) -> None
    base_cfg = maybe_forced_preset(presets.MostEfficient)

    sm_factors = [float(v) for v in argv]
    if not sm_factors:
        sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0]

    for idx, factor in enumerate(sm_factors):
        scfg = base_cfg.copy(output_dir=FLAGS.save_dir / "twoinfer" / "salus" / f"{factor:.2f}")
        scfg.extra_args += [
            '--sm-factor', f'{factor:.2f}'
        ]
        with tempfile.TemporaryDirectory() as td:
            # create the foreground inference job
            wl1, pipe1 = create_infer(Executor.Salus, 10, td)

            # create the foreground inference job
            wl2, pipe2 = create_infer(Executor.Salus, 10, td)

            run_seq(scfg,
                    wl1,  # start the first job
                    wl2,  # start the second job
                    # wait for both jobs to be ready
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipe1)),
                    RunFn(lambda *args, **kwargs: wait_on_pipe(pipe2)),
                    # start 1st job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipe1)),
                    # release 2nd job
                    RunFn(lambda *args, **kwargs: release_on_pipe(pipe2)),
                    # run_seq automatically join all jobs at the end of the sequence
                    )
Exemplo n.º 11
0
def case4():
    scfg = maybe_forced_preset(presets.MostEfficient)

    for model in ['inception3eval', 'vgg19eval']:
        for i in [1, 2, 4, 8]:
            run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case4'/f'{model}-{i}'),
                    *[WTL.create("inception3eval", 1, 1000) for _ in range(i)]
                    )
Exemplo n.º 12
0
def case1():
    scfg = maybe_forced_preset(presets.MostEfficient)

    run_tf(FLAGS.save_dir/'case1',
           WTL.create("inception3eval", 1, 1000, executor=Executor.TF))

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3eval", 1, 1000))
Exemplo n.º 13
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("super_res", 128, 20, executor=Executor.TF),
            Pause.Wait,
            WTL.create("super_res", 128, 20))
Exemplo n.º 14
0
def case1(argv):
    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'
    scfg.logconf = 'log'
    # scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    name = inspect.currentframe().f_code.co_name
    return eventloop(argv, scfg, FLAGS.save_dir/name, name + '.output')
Exemplo n.º 15
0
def case2():
    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("inception3", 100, 20),
            WTL.create("inception3", 100, 20)
            )
Exemplo n.º 16
0
def test():
    scfg = maybe_forced_preset(presets.MostEfficient)

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'test'),
            WTL.create("alexnet", 25, 20, executor=Executor.TF),
            Pause.Wait,
            WTL.create("alexnet", 25, 20))
Exemplo n.º 17
0
def case2():
    """Use OpTracing to see if each iteration is exclusive"""
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 165),
            WTL.create("resnet50", 50, 798),
            WTL.create("resnet152", 75, 19))
Exemplo n.º 18
0
def main(argv):
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.scheduler = 'pack'

    # Firstly run concurrently on salus
    run_seq(scfg.copy(output_dir=FLAGS.save_dir / "salus"),
            WTL.create("resnet101", 50, 47),
            WTL.create("resnet101", 50, 47),
            )
Exemplo n.º 19
0
def main(argv):
    scfg = maybe_forced_preset(presets.AllocProf)
    command = argv[0] if argv else "single"

    {
        "plotrun": plotrun,
        "single": single,
        "all3": all3,
    }[command](scfg)
Exemplo n.º 20
0
def case1(argv):
    model, bs, bn = 'inception3', 50, 10
    name = inspect.currentframe().f_code.co_name

    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.scheduler = 'pack'

    wl = WTL.create(model, bs, bn)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/name), wl)
Exemplo n.º 21
0
def case2():
    scfg = maybe_forced_preset(presets.MostEfficient)

    for rate in rates:
        wl = WTL.create("inception3eval", 1, 500, executor=Executor.Salus)
        wl.env['SALUS_TFBENCH_EVAL_INTERVAL'] = str(1 / rate)
        print("using interval " + str(1 / rate))
        wl.env['SALUS_TFBENCH_EVAL_RAND_FACTOR'] = '1'
        wl.env['SALUS_TFBENCH_EVAL_BLOCK'] = 'false'
        run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'/str(rate)), wl)
Exemplo n.º 22
0
def case2():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'),
            WTL.create("alexnet", 25, 10),
            Pause.Wait,
            WTL.create("alexnet", 25, 50),
            WTL.create("alexnet", 25, 50))
Exemplo n.º 23
0
def test():
    scfg = maybe_forced_preset(presets.Debugging)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 165),
            WTL.create("resnet50", 50, 798),
            WTL.create("resnet152", 75, 19),
            Pause.Wait,
            WTL.create("inception3", 100, 165),
            WTL.create("resnet50", 50, 798),
            WTL.create("resnet152", 75, 19))
Exemplo n.º 24
0
def main(argv):
    scfg = maybe_forced_preset(presets.Debugging)

    command = argv[0] if argv else "test"

    {
        "case1": case1,
        "case2": case2,
        "test": test,
    }[command]()
Exemplo n.º 25
0
def case1():
    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'

    # BUG: seems we must run a single job first otherwise it will hang
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case1'),
            WTL.create("inception3", 100, 20),
            Pause.Wait,
            WTL.create("inception3", 100, 20),
            WTL.create("resnet50", 50, 20))
Exemplo n.º 26
0
def test(argv):
    model, bs, bn = 'vae', 64, 500
    name = inspect.currentframe().f_code.co_name

    scfg = maybe_forced_preset(presets.MostEfficient)
    scfg.scheduler = 'pack'

    # create 300 vae
    wls = [WTL.create(model, bs, bn) for _ in range(1)]
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/name), *wls)
Exemplo n.º 27
0
def case3():
    """With specially compiled salus, no restriction for how iteration runs, i.e. multiple iter can run
    together, to collect mem data and fragmentation
    """
    scfg = maybe_forced_preset(presets.AllocProf)
    scfg.env['SALUS_DISABLE_LANEMGR'] = '1'

    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case3'),
            WTL.create("inception3", 25, 20),
            WTL.create("inception3", 25, 20)
            )
Exemplo n.º 28
0
def case2():
    # Run on TF
    wl = WTL.create("super_res", 128, 20, executor=Executor.TF)
    wl.env['TF_CPP_MIN_VLOG_LEVEL'] = '2'
    wl.env['TF_CPP_MIN_LOG_LEVEL'] = ''
    run_tf(FLAGS.save_dir/'case2'/'tf', wl)

    scfg = maybe_forced_preset(presets.OpTracing)
    scfg.logconf = 'memop'
    run_seq(scfg.copy(output_dir=FLAGS.save_dir/'case2'/'salus'),
            WTL.create("super_res", 128, 20))
Exemplo n.º 29
0
def main(argv):
    scfg = maybe_forced_preset(presets.Profiling)
    if argv:
        run_seq(scfg.copy(output_dir=FLAGS.save_dir),
                *parse_actions_from_cmd(argv))
        return

    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("resnet50", 50, 265),
            WTL.create("resnet50", 50, 265),
            )
Exemplo n.º 30
0
def case2():
    """Inception3_100 is missing some dealloc log entry"""
    scfg = maybe_forced_preset(presets.Debugging)
    run_seq(scfg.copy(output_dir=FLAGS.save_dir),
            WTL.create("inception3", 100, 10),
            Pause.Wait,
            WTL.create("inception3", 50, 10),
            Pause.Wait,
            WTL.create("inception3", 25, 10),
            Pause.Wait
            )