示例#1
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

    sut = mlperf_loadgen.ConstructSUT(
        issue_query, flush_queries, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(
        1024 * 1024, 1024, load_samples_to_ram, unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#2
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.Offline
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.offline_expected_qps = 1000

    sut = mlperf_loadgen.ConstructSUT(
        issue_query, flush_queries, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(
        1024, 128, load_samples_to_ram, unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#3
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.MultiStream
    settings.mode = mlperf_loadgen.TestMode.SubmissionRun
    settings.samples_per_query = 4
    settings.target_qps = 1000
    settings.target_latency_ns = 1000000000

    sut = mlperf_loadgen.ConstructSUT(issue_query)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#4
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.AccuracyOnly
    settings.single_stream_expected_latency_ns = 1000000
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#5
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.Server
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.server_target_qps = 100
    settings.server_target_latency_ns = 100000000
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.single_stream_expected_latency_ns = 1000000
    settings.enable_spec_overrides = True
    settings.override_target_latency_ns = 100000000
    settings.override_min_query_count = 100
    settings.override_min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
def main(argv):
    del argv
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.MultiStreamFree
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.multi_stream_target_latency_ns = 100000000
    settings.multi_stream_samples_per_query = 4
    settings.multi_stream_max_async_queries = 2
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    warmup_queries = range(args.max_batchsize)
    ds.load_query_samples(warmup_queries)
    for _ in range(2):
        img, _ = ds.get_samples(warmup_queries)
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    # override target latency when it needs to be less than 1ms
    if args.model_name == "mobilenet":
        settings.single_stream_expected_latency_ns =  200000
    elif args.model_name == "resnet50":
        settings.single_stream_expected_latency_ns =  900000
    elif args.model_name == "ssd-mobilenet":
        settings.single_stream_expected_latency_ns = 1000000

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    #qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples)
    qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    if args.enable_trace:
        lg.StartTest(sut, qsl, settings)
    else:
        logsettings = lg.LogSettings()
        logsettings.enable_trace = False
        lg.StartTestWithLogSettings(sut, qsl, settings, logsettings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#9
0
文件: main.py 项目: tjyhlt/inference
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count = args.count
    if not count:
        if not args.accuracy:
            count = 200

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model,
                                      ds,
                                      args.threads,
                                      post_proc=post_proc,
                                      max_batchsize=args.max_batchsize)

        def issue_queries(query_samples):
            runner.enqueue(query_samples)

        def flush_queries():
            pass

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / NANO_SEC for t in latencies_ns]

        settings = lg.TestSettings()
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        if args.accuracy:
            settings.mode = lg.TestMode.AccuracyOnly

        if args.time:
            # override the time we want to run
            settings.min_duration_ms = args.time * MILLI_SEC
            settings.max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        if scenario == lg.TestScenario.SingleStream:
            settings.min_query_count = args.queries_single
            settings.max_query_count = args.queries_single
        elif scenario == lg.TestScenario.MultiStream:
            settings.min_query_count = args.queries_multi
            settings.max_query_count = args.queries_multi
            settings.multi_stream_samples_per_query = 4
        elif scenario == lg.TestScenario.Server:
            max_latency = args.max_latency
        elif scenario == lg.TestScenario.Offline:
            settings.min_query_count = args.queries_offline
            settings.max_query_count = args.queries_offline

        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                              ds.unload_query_samples)

        if scenario == lg.TestScenario.Server:
            for target_latency in max_latency:
                log.info("starting {}, latency={}".format(
                    scenario, target_latency))
                settings.server_target_latency_ns = int(target_latency *
                                                        NANO_SEC)

                result_dict = {
                    "good": 0,
                    "total": 0,
                    "scenario": str(scenario)
                }
                runner.start_run(result_dict, args.accuracy)
                lg.StartTest(sut, qsl, settings)

                if not last_timeing:
                    last_timeing = runner.result_timing
                if args.accuracy:
                    post_proc.finalize(result_dict,
                                       ds,
                                       output_dir=os.path.dirname(args.output))
                add_results(final_results,
                            "{}-{}".format(scenario, target_latency),
                            result_dict, last_timeing,
                            time.time() - ds.last_loaded, args.accuracy)
        else:
            log.info("starting {}".format(scenario))
            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, args.accuracy)
            lg.StartTest(sut, qsl, settings)

            if not last_timeing:
                last_timeing = runner.result_timing
            if args.accuracy:
                post_proc.finalize(result_dict,
                                   ds,
                                   output_dir=os.path.dirname(args.output))
            add_results(final_results, "{}".format(scenario), result_dict,
                        last_timeing,
                        time.time() - ds.last_loaded, args.accuracy)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#10
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        #settings.max_duration_ms = args.time * MILLI_SEC

    if count_override:
        settings.min_query_count = count


#        settings.max_query_count = count

    if args.min_query_count:
        settings.min_query_count = args.min_query_count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query

    if args.max_latency:
        settings.single_stream_expected_latency_ns = int(args.max_latency *
                                                         NANO_SEC)
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    def set_qps(current_qps):
        settings.server_target_qps = current_qps
        settings.offline_expected_qps = current_qps
        settings.multi_stream_target_qps = current_qps
        return current_qps

    if args.qps:
        qps = set_qps(args.qps)

    lower_qps = -1
    upper_qps = -1
    qps_passed = {}
    while True:
        print("schedual qps:", qps)
        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples,
                              ds.unload_query_samples)

        log.info("starting {}".format(scenario))
        result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
        runner.start_run(result_dict, args.accuracy)
        print("max query count:", settings.max_query_count)
        lg.StartTest(sut, qsl, settings)

        if not last_timeing:
            last_timeing = runner.result_timing
        if args.accuracy:
            post_proc.finalize(result_dict, ds, output_dir=args.output)
        took = time.time() - ds.last_loaded
        add_results(final_results, "{}".format(scenario), result_dict,
                    last_timeing, took, args.accuracy)

        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

        #
        # write final results
        #
        if args.output:
            with open("results.json", "w") as f:
                json.dump(final_results, f, sort_keys=True, indent=4)

        if args.scenario != 'Server':
            break

        if args.auto_qps is False:
            break

        if lower_qps == -1 or upper_qps == -1:
            base_qps = len(last_timeing) / took
            upper_qps = base_qps * 1.5
            lower_qps = base_qps * 0.5
            qps = set_qps(lower_qps)
            continue

        latency_percentile_ns = np.percentile(
            last_timeing,
            settings.server_target_latency_percentile * 100) * NANO_SEC
        if latency_percentile_ns < settings.server_target_latency_ns and qps > upper_qps * 0.98:
            print("target qps:", qps)
            break

        if upper_qps - lower_qps < 1 and lower_qps in qps_passed:
            print("target qps:", lower_qps)
            break

        if latency_percentile_ns > settings.server_target_latency_ns:
            #reduce qps
            print("reduce qps, bound:[%d, %d]" % (lower_qps, upper_qps))
            upper_qps = qps
            if qps == lower_qps:
                lower_qps = lower_qps * 0.5
                qps = set_qps(lower_qps)
                continue
            if qps > lower_qps:
                qps = set_qps((lower_qps + upper_qps) / 2)
                continue

        if latency_percentile_ns < settings.server_target_latency_ns:
            #increase qps
            qps_passed[qps] = None
            print("increase qps, bound:[%d, %d]" % (lower_qps, upper_qps))
            lower_qps = qps
            if qps == upper_qps:
                upper_qps = upper_qps * 1.5
                qps = set_qps(upper_qps)
                continue
            if qps < upper_qps:
                qps = set_qps((lower_qps + upper_qps) / 2)
                continue

        runner.finish()
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)
    if getattr(backend, "max_batchsize", -1) != -1:
        backend.max_batchsize = args.max_batchsize

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    audit_config_cp_loc = None
    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)

        # Check if audit.config file is used, copy to output directory before
        # we chdir to that location so loadgen can find it
        audit_files = glob.glob(
            "ncoresw/mlperf/vision/classification_and_detection/*audit.config")
        if len(audit_files):
            log.info("Found audit.config (" + audit_files[0] + ")")
            audit_config_cp_loc = os.path.join(output_dir, "audit.config")
            # If user already put audit.config at `output` directory, then use
            # that one. Otherwise, copy the one we found in the current
            # directory (before chdir to new output directory).
            if os.path.exists(audit_config_cp_loc):
                log.info(
                    "WARNING: audit.config already exists, so cannot copy over new audit file!"
                )
                log.info(audit_config_cp_loc)
                audit_config_cp_loc = None
            else:
                shutil.copy(audit_files[0], audit_config_cp_loc)

        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    warmup_queries = range(args.max_batchsize)
    ds.load_query_samples(warmup_queries)
    for _ in range(2):
        img, _ = ds.get_samples(warmup_queries)
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    # override target latency when it needs to be less than 1ms
    if args.model_name == "mobilenet":
        settings.single_stream_expected_latency_ns = 200000
    elif args.model_name == "resnet50":
        settings.single_stream_expected_latency_ns = 900000
    elif args.model_name == "ssd-mobilenet":
        settings.single_stream_expected_latency_ns = 900000

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples,
                          ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    # Dump the summary logs to stdout for convenience
    log.info("Output dir: " + os.path.abspath(output_dir))
    with open(os.path.join(output_dir, "mlperf_log_summary.txt"), 'r') as f:
        log.info(f.read())

    # Output accuracy txt file
    if args.accuracy:
        with open(os.path.join(output_dir, "accuracy.txt"), "w") as f_acc:
            # SSD accuracy calculation
            #----------------------------------------
            # The mAP is already stored in result_dict["mAP"], but we'll call
            # `accuracy_coco()` just to keep the submission process consistent.
            if args.model_name == "ssd-mobilenet":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            if args.model_name == "ssd-resnet34":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path,
                    use_inv_map=True,
                    remove_48_empty_images=False).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            # ImageNet accuracy calculation
            #----------------------------------------
            # The good / total values are already stored in result_dict["good"]
            # and result_dict["total"], but we'll call `accuracy_imagenet()`
            # just to keep the submission process consistent.
            else:
                accuracy_str = accuracy.ImagenetAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    imagenet_val_file=os.path.join(
                        args.dataset_path,
                        "val_map.txt")).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
        if audit_config_cp_loc != None:
            os.remove(audit_config_cp_loc)

    backend_destroy = getattr(backend, "destroy", None)
    if callable(backend_destroy):
        backend.destroy()
示例#12
0
def main():
  global last_timeing
  args = get_args()

  log.info(args)

  backend = BackendTensorRT()

  ds = Imagenet(
      data_path=args.dataset_path,
      use_cache=args.cache,
      batch_size=args.batch_size,
      image_size=args.image_size,
      calib_file='cal_image_list_option_%d.txt' % args.calib_file)

  model = backend.load(args, ds=ds)

  final_results = {
      "runtime": model.name(),
      "version": model.version(),
      "time": int(time.time()),
      "cmdline": str(args),
  }

  config = os.path.abspath(args.config)
  assert(os.path.exists(config)), "%s not existed!" % config
  user_config = os.path.abspath(args.user_config)
  assert(os.path.exists(user_config)), "%s not existed!" % user_config

  base_path = os.path.dirname(os.path.realpath(__file__))
  if args.output:
    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    os.chdir(output_dir)

  post_proc = PostProcessCommon(offset=0)
  runner = QueueRunner(
      model, ds, args.threads, post_proc=post_proc, batch_size=args.batch_size)

  def issue_queries(ids, indices):
    runner.enqueue(ids, indices)

  def flush_queries():
    pass

  def process_latencies(latencies_ns):
    global last_timeing
    last_timeing = [t / NANO_SEC for t in latencies_ns]

  settings = lg.TestSettings()
  model_name = 'OFAnet-AutoSinian'
  settings.FromConfig(config, model_name, args.scenario)
  settings.FromConfig(user_config, model_name, args.scenario)

  if args.audit_test:
    audit_config_path = base_path + '/audit%s.config' % args.audit_test
    settings.FromConfig(audit_config_path, model_name, args.scenario)

  scenario = SCENARIO_MAP[args.scenario]
  settings.scenario = scenario
  settings.mode = lg.TestMode.PerformanceOnly
  if args.accuracy:
    settings.mode = lg.TestMode.AccuracyOnly

  sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
  qsl = lg.ConstructQSL(ds.get_item_count(), args.batch_size,
                        ds.load_query_samples, ds.unload_query_samples)

  log.info("starting {}".format(scenario))
  result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
  runner.start_run(result_dict, args.accuracy)
  start = time.time()
  lg.StartTest(sut, qsl, settings)
  post_proc.finalize(result_dict)
  add_results(final_results, "{}".format(scenario), result_dict, last_timeing,
              runner.finishTime - ds.last_loaded, args)

  runner.finish()
  lg.DestroyQSL(qsl)
  lg.DestroyFastSUT(sut)

  if args.output:
    with open("results.json", "w") as f:
      json.dump(final_results, f, sort_keys=True, indent=2)
示例#13
0
            .format(self.count, qitem.sample_id[0]))
        self.count += 1

        return self.count


if __name__ == "__main__":
    runner = DummyRunner()

    runner.start_worker()

    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

    # Specify exactly how many queries need to be made
    settings.min_query_count = 3003
    settings.max_query_count = 3003

    total_queries = 256  # Maximum sample ID + 1
    perf_queries = 8  # TBD: Doesn't seem to have an effect

    sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries,
                                      runner.load_samples_to_ram,
                                      runner.unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#14
0
文件: main.py 项目: sf-wind/inference
def main():
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)

    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)

    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_proc=post_proc)
    runner.start_pool()

    # warmup
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(50):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, idx, data, label)

    def process_latencies(latencies_ns):
        global last_timeing
        last_timeing = [t / 10000000. for t in latencies_ns]

    sut = lg.ConstructSUT(issue_query, process_latencies)
    qsl = lg.ConstructQSL(count, count, ds.load_query_samples,
                          ds.unload_query_samples)

    scenarios = [
        lg.TestScenario.SingleStream,
        lg.TestScenario.MultiStream,
        lg.TestScenario.Server,
        # lg.TestScenario.Offline,
    ]
    for scenario in scenarios:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            settings.scenario = scenario

            if args.qps:
                settings.enable_spec_overrides = True
                qps = float(args.qps)
                settings.server_target_qps = qps
                settings.offline_expected_qps = qps
            if args.time:
                settings.enable_spec_overrides = True
                settings.override_min_duration_ms = args.time * MILLI_SEC
                settings.override_max_duration_ms = args.time * MILLI_SEC
                qps = args.qps or 100
                settings.override_min_query_count = qps * args.time
                settings.override_max_query_count = qps * args.time

            if args.time or args.qps:
                settings.mode = lg.TestMode.PerformanceOnly
            # FIXME: add SubmissionRun once available

            settings.enable_spec_overrides = True  # FIXME: needed because of override_target_latency_ns
            settings.single_stream_expected_latency_ns = int(target_latency *
                                                             NANO_SEC)
            settings.override_target_latency_ns = int(target_latency *
                                                      NANO_SEC)

            # reset result capture
            result_dict = {"good": 0, "total": 0}
            runner.start_run(result_dict, True)
            start = time.time()
            lg.StartTest(sut, qsl, settings)

            # aggregate results
            post_proc.finalize(result_dict, ds)

            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, last_timeing,
                        time.time() - start)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#15
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_proc=post_proc)

    #
    # warmup
    #
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        runner.start_pool(nolg=True)
        result_dict = {
            "good": 0,
            "total": 0,
            "scenario": "Accuracy",
            "timing": []
        }
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            runner.enqueue([idx], [idx], data, label)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict,
                           ds,
                           output_dir=os.path.dirname(args.output))
        last_timeing = result_dict["timing"]
        del result_dict["timing"]
        add_results(final_results, "Accuracy", result_dict, last_timeing,
                    time.time() - start)

    #
    # run the benchmark with timing
    #
    runner.start_pool()

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, idx, data, label)

    def process_latencies(latencies_ns):
        global last_timeing
        last_timeing = [t / 1e9 for t in latencies_ns]

    sut = lg.ConstructSUT(issue_query, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                          ds.unload_query_samples)

    for scenario in args.scenario:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            log.info(scenario)
            if str(scenario) == 'TestMode.AccuracyOnly':
                settings.mode = scenario
            else:
                settings.scenario = scenario

            if args.qps:
                settings.enable_spec_overrides = True
                qps = float(args.qps)
                settings.server_target_qps = qps
                settings.offline_expected_qps = qps

            if args.time:
                settings.enable_spec_overrides = True
                settings.override_min_duration_ms = args.time * MILLI_SEC
                settings.override_max_duration_ms = args.time * MILLI_SEC
                qps = args.qps or 100
                settings.override_min_query_count = qps * args.time
                settings.override_max_query_count = qps * args.time

            if args.time or args.qps and str(
                    scenario) != 'TestMode.AccuracyOnly':
                settings.mode = lg.TestMode.PerformanceOnly
            # FIXME: add SubmissionRun once available

            settings.enable_spec_overrides = True
            settings.single_stream_expected_latency_ns = int(target_latency *
                                                             NANO_SEC)
            settings.override_target_latency_ns = int(target_latency *
                                                      NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, last_timeing,
                        time.time() - ds.last_loaded)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#16
0
def main():
    global qcount
    global num_sockets
    global cpus_per_socket
    global cpus_per_process
    global cpus_per_instance
    global total_instances
    global start_time
    global item_total
    global last_timeing

    args = get_args()
    log.info(args)
    config = os.path.abspath(args.config)
    user_config = os.path.abspath(args.user_config)

    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if not os.path.exists(user_config):
        log.error("{} not found".format(user_config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        if os.path.exists("./audit.config"):
            copyfile("./audit.config", output_dir + "/audit.config")
        os.chdir(output_dir)

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model, args.scenario)
    settings.FromConfig(user_config, args.model, args.scenario)
    settings.mode = lg.TestMode.PerformanceOnly

    cpus_for_loadgen = 1
    left_cores = cpus_per_socket * num_sockets - total_procs * cpus_per_process
    first_instance_start_core = cpus_for_loadgen 
    if left_cores > cpus_for_loadgen:
        first_instance_start_core = 0
        cpus_for_loadgen = left_cores

    total_instances = 0
    instances_per_proc = (cpus_per_process // cpus_per_instance)
    for i in range(total_procs):
        if i == 0 and first_instance_start_core > 0:
            total_instances = total_instances + ((cpus_per_process - first_instance_start_core) // cpus_per_instance)
            if (cpus_per_instance - first_instance_start_core) >= (cpus_per_instance // 2):
                total_instances = total_instances + 1
        else:
            total_instances = total_instances + instances_per_proc
    #print("Setup {} Instances !!".format(total_instances))

    lock = multiprocessing.Lock()
    barrier = multiprocessing.Barrier(total_instances)
    init_counter = multiprocessing.Value("i", 0)
    total_samples = multiprocessing.Value("i", 0)
    finished_samples = multiprocessing.Value("i", 0)
    dsQueue = multiprocessing.Queue()
    numOutQ = num_sockets
    outQueues = [multiprocessing.Queue() for i in range(numOutQ)]
    #inQueue = multiprocessing.JoinableQueue()
    inQueue = multiprocessing.Queue()
    consumers = [Consumer(inQueue, outQueues[i%numOutQ], dsQueue, lock, init_counter, finished_samples, barrier, i, args, settings.min_query_count, first_instance_start_core)
                 for i in range(total_procs)]
    for c in consumers:
        c.start()

    # Wait until subprocess ready
    while init_counter.value < total_procs: time.sleep(2)

    import torch
    import criteo
    torch.set_num_threads(cpus_per_socket * num_sockets)

    dlrm_dataset = get_dataset(args)
    total_samples.value = dlrm_dataset.get_item_count()
    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    settings.scenario = scenario
    runner = runner_map[scenario](inQueue, dlrm_dataset, total_samples.value, args.max_sample_size, args.max_batchsize)

    # Start response thread
    response_workers = [threading.Thread(
        target=response_loadgen, args=(outQueues[i], args.accuracy)) for i in range(numOutQ)]
    for response_worker in response_workers:
       response_worker.daemon = True
       response_worker.start()


    def issue_queries(response_ids, query_sample_indexes):
        runner.enqueue(response_ids, query_sample_indexes)

    def flush_queries():
        runner.unload_query_samples()

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
        settings.performance_sample_count_override = total_samples.value

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    if args.accuracy:
        qcount = total_samples.value
    else:
        qcount = settings.min_query_count

    def load_query_samples(sample_list):
        # Wait until subprocess ready
        global start_time
        global total_instances
        for _ in range(total_instances):
            dsQueue.put(sample_list)
        while init_counter.value < total_procs + total_instances: time.sleep(2)
        start_time = time.time()

    def unload_query_samples(sample_list):
        pass

    sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(total_samples.value, min(total_samples.value, args.samples_per_query_offline), load_query_samples, unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)}

    torch.set_num_threads(cpus_for_loadgen)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = item_timing
    if args.accuracy:
        result_dict["good"] = item_good
        result_dict["total"] = item_total
        result_dict["roc_auc"] = criteo.auc_score(item_results)

    final_results = {
        "runtime": "pytorch-native-dlrm",
        "version": torch.__version__,
        "time": int(time.time()),
        "cmdline": str(args),
    }

    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - start_time, args.accuracy)

    #inQueue.join()
    for _ in range(total_instances):
        inQueue.put(None)
    for c in consumers:
        c.join()
    for i in range(numOutQ):
        outQueues[i].put(None)

    lg.DestroyQSL(qsl)
    lg.DestroyFastSUT(sut)

    # write final results
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#17
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        last_timeing = []
        runner = RunnerBase(model, ds, args.threads, post_proc=post_proc)
        result_dict = {"good": 0, "total": 0, "scenario": "Accuracy"}
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            start_one = time.time()
            runner.enqueue([idx], [idx], data, label)
            last_timeing.append(time.time() - start_one)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output))
        add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start)

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc)

        def issue_query(query_samples):
            # called by loadgen to issue queries
            idx = [q.index for q in query_samples]
            query_id = [q.id for q in query_samples]
            data, label = ds.get_samples(idx)
            runner.enqueue(query_id, idx, data, label)

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / 1e9 for t in latencies_ns]

        settings = lg.TestSettings()
        settings.enable_spec_overrides = True
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        settings.multi_stream_samples_per_query = 8

        if args.time:
            # override the time we want to run
            settings.enable_spec_overrides = True
            settings.override_min_duration_ms = args.time * MILLI_SEC
            settings.override_max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        # mlperf rules - min queries
        if scenario == lg.TestScenario.SingleStream:
            settings.override_min_query_count = args.queries_single
            settings.override_max_query_count = args.queries_single
        else:
            settings.override_min_query_count = args.queries_multi
            settings.override_max_query_count = args.queries_multi

        sut = lg.ConstructSUT(issue_query, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples)

        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario, target_latency))

            settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC)
            settings.override_target_latency_ns = int(target_latency * NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario, target_latency),
                        result_dict, last_timeing, time.time() - ds.last_loaded)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#18
0
def main():
    args = get_args()

    print(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, preprocessor, postprocessor, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=preprocessor,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)

    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)

    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_process=postprocessor)
    runner.start_pool()

    # warmup
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(100):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, data, label)

    sut = lg.ConstructSUT(issue_query)
    qsl = lg.ConstructQSL(count, args.time, ds.load_query_samples,
                          ds.unload_query_samples)
    scenarios = [
        # lg.TestScenario.SingleStream,
        lg.TestScenario.MultiStream,
        # lg.TestScenario.Cloud,
        # lg.TestScenario.Offline,
    ]
    for scenario in scenarios:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            settings.scenario = scenario
            settings.mode = lg.TestMode.SubmissionRun
            settings.samples_per_query = 4  # FIXME: we don't want to know about this
            settings.target_qps = 1000  # FIXME: we don't want to know about this
            settings.target_latency_ns = int(target_latency * 1000000000)

            result_list = []
            result_dict = {"good": 0, "total": 0}
            runner.start_run(result_list, result_dict)
            start = time.time()
            lg.StartTest(sut, qsl, settings)
            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, result_list,
                        time.time() - start)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#19
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend, args.dataset, args.max_ind_range, args.data_sub_sample_rate, args.use_gpu)

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]

    # --count-samples can be used to limit the number of samples used for testing
    ds = wanted_dataset(data_path=args.dataset_path,
                        name=args.dataset,
                        pre_process=pre_proc,  # currently an identity function
                        use_cache=args.cache,  # currently not used
                        count=args.count_samples,
                        samples_to_aggregate_fix=args.samples_to_aggregate_fix,
                        samples_to_aggregate_min=args.samples_to_aggregate_min,
                        samples_to_aggregate_max=args.samples_to_aggregate_max,
                        samples_to_aggregate_quantile_file=args.samples_to_aggregate_quantile_file,
                        samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file,
                        test_num_workers=args.test_num_workers,
                        max_ind_range=args.max_ind_range,
                        sub_sample_rate=args.data_sub_sample_rate,
                        mlperf_bin_loader=args.mlperf_bin_loader,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()
    # warmup
    ds.load_query_samples([0])

    for _ in range(5):
        batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0])
        _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i)

    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_path, args.scenario)
    settings.FromConfig(user_conf, args.model_path, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#20
0
def main():
    global num_sockets
    global start_time
    global item_total
    global last_timeing

    args = get_args()
    log.info(args)
    config = os.path.abspath(args.config)
    user_config = os.path.abspath(args.user_config)

    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if not os.path.exists(user_config):
        log.error("{} not found".format(user_config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    total_samples = multiprocessing.Value("i", 0)
    dsQueue = multiprocessing.Queue()
    outQueue = multiprocessing.Queue()
    inQueue = multiprocessing.JoinableQueue(num_sockets * 4)
    consumers = [
        Consumer(inQueue, outQueue, dsQueue, lock, init_counter, total_samples,
                 i, args) for i in range(num_sockets)
    ]
    for c in consumers:
        c.start()

    # Wait until subprocess ready
    while init_counter.value < num_sockets:
        time.sleep(2)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(outQueue, args.accuracy))
    response_worker.daemon = True
    response_worker.start()

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](inQueue, max_batchsize=args.max_batchsize)

    def issue_queries(response_ids, query_sample_indexes):
        runner.enqueue(response_ids, query_sample_indexes)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model, args.scenario)
    settings.FromConfig(user_config, args.model, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
        settings.performance_sample_count_override = total_samples.value

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    def load_query_samples(sample_list):
        # Wait until subprocess ready
        global start_time
        for _ in range(num_sockets):
            dsQueue.put(sample_list)
        while init_counter.value < 2 * num_sockets:
            time.sleep(2)
        start_time = time.time()

    def unload_query_samples(sample_list):
        pass

    import torch
    import criteo

    sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(
        total_samples.value,
        min(total_samples.value, args.samples_per_query_offline),
        load_query_samples, unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {
        "good": 0,
        "total": 0,
        "roc_auc": 0,
        "scenario": str(scenario)
    }

    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = item_timing
    if args.accuracy:
        result_dict["good"] = item_good
        result_dict["total"] = item_total
        result_dict["roc_auc"] = criteo.auc_score(item_results)

    final_results = {
        "runtime": "pytorch-native-dlrm",
        "version": torch.__version__,
        "time": int(time.time()),
        "cmdline": str(args),
    }

    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - start_time, args.accuracy)

    inQueue.join()
    for _ in consumers:
        inQueue.put(None)
    for c in consumers:
        c.join()
    outQueue.put(None)

    lg.DestroyQSL(qsl)
    lg.DestroyFastSUT(sut)

    # write final results
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#21
0
def main():
    global num_ins
    global num_phy_cpus
    global in_queue_cnt
    global out_queue_cnt

    args = get_args()
    log.info(args)

    num_ins = args.num_instance
    num_phy_cpus = args.num_phy_cpus
    log.info('Run with {} instance on {} cpus'.format(num_ins, num_phy_cpus))

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    image_format = 'NCHW'
    dataset = "imagenet"
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[dataset]

    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        cache_dir=args.cache_dir,
                        count=args.count,
                        use_int8=args.use_int8_dataset,
                        num_workers=num_phy_cpus,
                        **kwargs)

    # Establish communication queues
    log.info('Start comsumer queue and response thread')
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    in_queue = multiprocessing.JoinableQueue()
    out_queue = multiprocessing.Queue()
    ds_queue = multiprocessing.Queue()

    # Start consumers
    consumers = [Consumer(in_queue, out_queue, ds_queue, lock, init_counter, i, args)
                 for i in range(num_ins)]
    for c in consumers:
        c.start()

    # Wait until all sub-processors are ready
    block_until(init_counter, num_ins, 2)

    # Start response thread
    response_worker = threading.Thread(
        target=response_loadgen, args=(out_queue,))
    response_worker.daemon = True
    response_worker.start()

    scenario = SCENARIO_MAP[args.scenario]
    runner = QueueRunner(in_queue, args.batch_size)

    def issue_queries(response_ids, query_sample_indexes):
        runner.put(response_ids, query_sample_indexes)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        log.info("Average latency: {}".format(np.mean(latencies_ns)))
        log.info("Median latency: {}".format(np.percentile(latencies_ns, 50)))
        log.info("90 percentile latency: {}".format(np.percentile(latencies_ns, 90)))

    def load_query_samples(sample_list):
        for _ in range(num_ins):
            ds_queue.put(sample_list)
        block_until(init_counter, 2 * num_ins, 2)

    def unload_query_samples(sample_list):
        pass

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, "resnet50", args.scenario)
    settings.FromConfig(user_conf, "resnet50", args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    count = ds.get_item_count()
    perf_count = 1024
    if args.accuracy:
        perf_count = count
    sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, perf_count, load_query_samples, unload_query_samples)

    log.info("starting {}".format(scenario))
    lg.StartTest(sut, qsl, settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    in_queue.join()
    for i in range(num_ins):
        in_queue.put('DONE')
    for c in consumers:
        c.join()
    out_queue.put('DONE')

    if args.accuracy:
        output_file = 'accuracy.txt'
        if args.output_file:
            output_file = args.output_file
        cmd = "python tools/accuracy-imagenet.py " \
              "--mlperf-accuracy-file=mlperf_log_accuracy.json " \
              "--imagenet-val-file=val_map.txt --output-file={}".format(output_file)
        cmd = cmd.split(' ')
        subprocess.check_call(cmd)

    lg.DestroyQSL(qsl)
    lg.DestroyFastSUT(sut)

    log.info('Test done.')