Пример #1
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend, args.dataset_path,
                          args.dataset_calibration_list)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = output_dir
    log_output_settings.copy_summary_to_stdout = False
    log_settings = lg.LogSettings()
    log_settings.enable_trace = args.debug
    log_settings.log_output = log_output_settings

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples,
                          ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)

    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Пример #2
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend, args.dataset, args.max_ind_range,
                          args.data_sub_sample_rate, args.use_gpu)

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]

    # --count-samples can be used to limit the number of samples used for testing
    ds = wanted_dataset(
        data_path=args.dataset_path,
        name=args.dataset,
        pre_process=pre_proc,  # currently an identity function
        use_cache=args.cache,  # currently not used
        count=args.count_samples,
        samples_to_aggregate_fix=args.samples_to_aggregate_fix,
        samples_to_aggregate_min=args.samples_to_aggregate_min,
        samples_to_aggregate_max=args.samples_to_aggregate_max,
        samples_to_aggregate_quantile_file=args.
        samples_to_aggregate_quantile_file,
        samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file,
        test_num_workers=args.test_num_workers,
        max_ind_range=args.max_ind_range,
        sub_sample_rate=args.data_sub_sample_rate,
        mlperf_bin_loader=args.mlperf_bin_loader,
        **kwargs)
    # load model to backend
    model = backend.load(args.model_path,
                         inputs=args.inputs,
                         outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()
    # warmup
    ds.load_query_samples([0])

    for _ in range(5):
        batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0])
        _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i)

    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline),
                          ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {
        "good": 0,
        "total": 0,
        "roc_auc": 0,
        "scenario": str(scenario)
    }
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Пример #3
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)
    if getattr(backend, "max_batchsize", -1) != -1:
        backend.max_batchsize = args.max_batchsize

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    audit_config_cp_loc = None
    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)

        # Check if audit.config file is used, copy to output directory before
        # we chdir to that location so loadgen can find it
        audit_files = glob.glob(
            "ncoresw/mlperf/vision/classification_and_detection/*audit.config")
        if len(audit_files):
            log.info("Found audit.config (" + audit_files[0] + ")")
            audit_config_cp_loc = os.path.join(output_dir, "audit.config")
            # If user already put audit.config at `output` directory, then use
            # that one. Otherwise, copy the one we found in the current
            # directory (before chdir to new output directory).
            if os.path.exists(audit_config_cp_loc):
                log.info(
                    "WARNING: audit.config already exists, so cannot copy over new audit file!"
                )
                log.info(audit_config_cp_loc)
                audit_config_cp_loc = None
            else:
                shutil.copy(audit_files[0], audit_config_cp_loc)

        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    warmup_queries = range(args.max_batchsize)
    ds.load_query_samples(warmup_queries)
    for _ in range(2):
        img, _ = ds.get_samples(warmup_queries)
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    # override target latency when it needs to be less than 1ms
    if args.model_name == "mobilenet":
        settings.single_stream_expected_latency_ns = 200000
    elif args.model_name == "resnet50":
        settings.single_stream_expected_latency_ns = 900000
    elif args.model_name == "ssd-mobilenet":
        settings.single_stream_expected_latency_ns = 900000

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples,
                          ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    # Dump the summary logs to stdout for convenience
    log.info("Output dir: " + os.path.abspath(output_dir))
    with open(os.path.join(output_dir, "mlperf_log_summary.txt"), 'r') as f:
        log.info(f.read())

    # Output accuracy txt file
    if args.accuracy:
        with open(os.path.join(output_dir, "accuracy.txt"), "w") as f_acc:
            # SSD accuracy calculation
            #----------------------------------------
            # The mAP is already stored in result_dict["mAP"], but we'll call
            # `accuracy_coco()` just to keep the submission process consistent.
            if args.model_name == "ssd-mobilenet":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            if args.model_name == "ssd-resnet34":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path,
                    use_inv_map=True,
                    remove_48_empty_images=False).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            # ImageNet accuracy calculation
            #----------------------------------------
            # The good / total values are already stored in result_dict["good"]
            # and result_dict["total"], but we'll call `accuracy_imagenet()`
            # just to keep the submission process consistent.
            else:
                accuracy_str = accuracy.ImagenetAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    imagenet_val_file=os.path.join(
                        args.dataset_path,
                        "val_map.txt")).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
        if audit_config_cp_loc != None:
            os.remove(audit_config_cp_loc)

    backend_destroy = getattr(backend, "destroy", None)
    if callable(backend_destroy):
        backend.destroy()
Пример #4
0
def main():
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)

    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)

    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_proc=post_proc)
    runner.start_pool()

    # warmup
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(50):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, idx, data, label)

    def process_latencies(latencies_ns):
        global last_timeing
        last_timeing = [t / 10000000. for t in latencies_ns]

    sut = lg.ConstructSUT(issue_query, process_latencies)
    qsl = lg.ConstructQSL(count, count, ds.load_query_samples,
                          ds.unload_query_samples)

    scenarios = [
        lg.TestScenario.SingleStream,
        lg.TestScenario.MultiStream,
        lg.TestScenario.Server,
        # lg.TestScenario.Offline,
    ]
    for scenario in scenarios:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            settings.scenario = scenario

            if args.qps:
                settings.enable_spec_overrides = True
                qps = float(args.qps)
                settings.server_target_qps = qps
                settings.offline_expected_qps = qps
            if args.time:
                settings.enable_spec_overrides = True
                settings.override_min_duration_ms = args.time * MILLI_SEC
                settings.override_max_duration_ms = args.time * MILLI_SEC
                qps = args.qps or 100
                settings.override_min_query_count = qps * args.time
                settings.override_max_query_count = qps * args.time

            if args.time or args.qps:
                settings.mode = lg.TestMode.PerformanceOnly
            # FIXME: add SubmissionRun once available

            settings.enable_spec_overrides = True  # FIXME: needed because of override_target_latency_ns
            settings.single_stream_expected_latency_ns = int(target_latency *
                                                             NANO_SEC)
            settings.override_target_latency_ns = int(target_latency *
                                                      NANO_SEC)

            # reset result capture
            result_dict = {"good": 0, "total": 0}
            runner.start_run(result_dict, True)
            start = time.time()
            lg.StartTest(sut, qsl, settings)

            # aggregate results
            post_proc.finalize(result_dict, ds)

            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, last_timeing,
                        time.time() - start)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
Пример #5
0
def main():

    global so
    global last_timeing
    global last_loaded
    global result_timeing

    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True
    """
    Python signature
    go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize)
    """

    count, err = go_initialize(backend, args.model_path, args.dataset_path,
                               count, args.use_gpu, args.gpu_id,
                               args.trace_level, args.max_batchsize)

    if (err != 'nil'):
        print(err)
        raise RuntimeError('initialization in go failed')

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    log_dir = None

    if args.log_dir:
        log_dir = os.path.abspath(args.log_dir)
        os.makedirs(log_dir, exist_ok=True)

    scenario = SCENARIO_MAP[args.scenario]

    def issue_queries(query_samples):
        global so
        global last_timeing
        global result_timeing
        idx = np.array([q.index for q in query_samples]).astype(np.int32)
        query_id = [q.id for q in query_samples]
        if args.dataset == 'brats2019':
            start = time.time()
            response_array_refs = []
            response = []
            for i, qid in enumerate(query_id):
                processed_results = so.IssueQuery(1, idx[i][np.newaxis])
                processed_results = json.loads(
                    processed_results.decode('utf-8'))
                response_array = array.array(
                    "B",
                    np.array(processed_results[0], np.float16).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            result_timeing.append(time.time() - start)
            lg.QuerySamplesComplete(response)
        else:
            start = time.time()
            processed_results = so.IssueQuery(len(idx), idx)
            result_timeing.append(time.time() - start)
            processed_results = json.loads(processed_results.decode('utf-8'))
            response_array_refs = []
            response = []
            for idx, qid in enumerate(query_id):
                response_array = array.array(
                    "B",
                    np.array(processed_results[idx], np.float32).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            lg.QuerySamplesComplete(response)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    def load_query_samples(sample_list):
        global so
        global last_loaded
        err = go_load_query_samples(sample_list, so)
        last_loaded = time.time()
        if (err != ''):
            print(err)
            raise RuntimeError('load query samples failed')

    def unload_query_samples(sample_list):
        global so
        err = go_unload_query_samples(sample_list, so)
        if (err != ''):
            print(err)
            raise RuntimeError('unload query samples failed')

    settings = lg.TestSettings()
    if args.model_name != "":
        settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
        settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples,
                          unload_query_samples)

    log.info("starting {}".format(scenario))

    log_path = os.path.realpath(args.log_dir)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings
    # log_settings.enable_trace = True
    # lg.StartTest(sut, qsl, settings)
    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = result_timeing

    if args.accuracy:
        accuracy_script_paths = {
            'coco':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-coco.py'
            ),
            'imagenet':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py'
            ),
            'squad':
            os.path.realpath('../inference/language/bert/accuracy-squad.py'),
            'brats2019':
            os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py'
            ),
        }
        accuracy_script_path = accuracy_script_paths[args.dataset]
        accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json')
        data_dir = os.environ['DATA_DIR']
        if args.dataset == 'coco':
            if args.use_inv_map:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map'
                    .format(accuracy_script_path, accuracy_file_path,
                            data_dir),
                    shell=True)
            else:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {}'.
                    format(accuracy_script_path, accuracy_file_path, data_dir),
                    shell=True)
        elif args.dataset == 'imagenet':  # imagenet
            subprocess.check_call(
                'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'.
                format(accuracy_script_path, accuracy_file_path,
                       os.path.join(data_dir, 'val_map.txt')),
                shell=True)
        elif args.dataset == 'squad':  # squad
            vocab_path = os.path.join(data_dir, 'vocab.txt')
            val_path = os.path.join(data_dir, 'dev-v1.1.json')
            out_path = os.path.join(log_dir, 'predictions.json')
            cache_path = os.path.join(data_dir, 'eval_features.pickle')
            subprocess.check_call(
                'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}'
                .format(accuracy_script_path, vocab_path, val_path,
                        accuracy_file_path, out_path, cache_path, count),
                shell=True)
        elif args.dataset == 'brats2019':  # brats2019
            base_dir = os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/build')
            post_dir = os.path.join(base_dir, 'postprocessed_data')
            label_dir = os.path.join(
                base_dir,
                'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr')
            os.makedirs(post_dir, exist_ok=True)
            subprocess.check_call(
                'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}'
                .format(accuracy_script_path, accuracy_file_path, data_dir,
                        post_dir, label_dir),
                shell=True)
        else:
            raise RuntimeError('Dataset not Implemented.')

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    """
    Python signature
    go_finalize(so)
    """
    err = go_finalize(so)
    if (err != ''):
        print(err)
        raise RuntimeError('finialize in go failed')
if __name__ == "__main__":
  runner = DummyRunner()

  runner.start_worker()

  settings = mlperf_loadgen.TestSettings()
  settings.scenario = mlperf_loadgen.TestScenario.SingleStream
  settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

  # Specify exactly how many queries need to be made
  settings.min_query_count = 3003
  settings.max_query_count = 3003
  total_queries = 256  # Maximum sample ID + 1
  perf_queries = 8  # TBD: Doesn't seem to have an effect

  sut = mlperf_loadgen.ConstructSUT(runner.enqueue, runner.flush_queries,
                                    process_latencies)
  qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries,
                                    runner.load_samples_to_ram,
                                    runner.unload_samples_from_ram)

  log_settings = lg.LogSettings()
  log_settings.log_output.copy_detail_to_stdout = True
  log_settings.log_output.copy_summary_to_stdout = True
  log_settings.enable_trace = False
  lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

  runner.finish()
  mlperf_loadgen.DestroyQSL(qsl)
  mlperf_loadgen.DestroySUT(sut)
Пример #7
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count = args.count
    if not count:
        if not args.accuracy:
            count = 200

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model,
                                      ds,
                                      args.threads,
                                      post_proc=post_proc,
                                      max_batchsize=args.max_batchsize)

        def issue_queries(query_samples):
            runner.enqueue(query_samples)

        def flush_queries():
            pass

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / NANO_SEC for t in latencies_ns]

        settings = lg.TestSettings()
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        if args.accuracy:
            settings.mode = lg.TestMode.AccuracyOnly

        if args.time:
            # override the time we want to run
            settings.min_duration_ms = args.time * MILLI_SEC
            settings.max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        if scenario == lg.TestScenario.SingleStream:
            settings.min_query_count = args.queries_single
            settings.max_query_count = args.queries_single
        elif scenario == lg.TestScenario.MultiStream:
            settings.min_query_count = args.queries_multi
            settings.max_query_count = args.queries_multi
            settings.multi_stream_samples_per_query = 4
        elif scenario == lg.TestScenario.Server:
            max_latency = args.max_latency
        elif scenario == lg.TestScenario.Offline:
            settings.min_query_count = args.queries_offline
            settings.max_query_count = args.queries_offline

        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                              ds.unload_query_samples)

        if scenario == lg.TestScenario.Server:
            for target_latency in max_latency:
                log.info("starting {}, latency={}".format(
                    scenario, target_latency))
                settings.server_target_latency_ns = int(target_latency *
                                                        NANO_SEC)

                result_dict = {
                    "good": 0,
                    "total": 0,
                    "scenario": str(scenario)
                }
                runner.start_run(result_dict, args.accuracy)
                lg.StartTest(sut, qsl, settings)

                if not last_timeing:
                    last_timeing = runner.result_timing
                if args.accuracy:
                    post_proc.finalize(result_dict,
                                       ds,
                                       output_dir=os.path.dirname(args.output))
                add_results(final_results,
                            "{}-{}".format(scenario, target_latency),
                            result_dict, last_timeing,
                            time.time() - ds.last_loaded, args.accuracy)
        else:
            log.info("starting {}".format(scenario))
            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, args.accuracy)
            lg.StartTest(sut, qsl, settings)

            if not last_timeing:
                last_timeing = runner.result_timing
            if args.accuracy:
                post_proc.finalize(result_dict,
                                   ds,
                                   output_dir=os.path.dirname(args.output))
            add_results(final_results, "{}".format(scenario), result_dict,
                        last_timeing,
                        time.time() - ds.last_loaded, args.accuracy)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Пример #8
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        last_timeing = []
        runner = RunnerBase(model, ds, args.threads, post_proc=post_proc)
        result_dict = {"good": 0, "total": 0, "scenario": "Accuracy"}
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            start_one = time.time()
            runner.enqueue([idx], [idx], data, label)
            last_timeing.append(time.time() - start_one)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output))
        add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start)

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc)

        def issue_query(query_samples):
            # called by loadgen to issue queries
            idx = [q.index for q in query_samples]
            query_id = [q.id for q in query_samples]
            data, label = ds.get_samples(idx)
            runner.enqueue(query_id, idx, data, label)

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / 1e9 for t in latencies_ns]

        settings = lg.TestSettings()
        settings.enable_spec_overrides = True
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        settings.multi_stream_samples_per_query = 8

        if args.time:
            # override the time we want to run
            settings.enable_spec_overrides = True
            settings.override_min_duration_ms = args.time * MILLI_SEC
            settings.override_max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        # mlperf rules - min queries
        if scenario == lg.TestScenario.SingleStream:
            settings.override_min_query_count = args.queries_single
            settings.override_max_query_count = args.queries_single
        else:
            settings.override_min_query_count = args.queries_multi
            settings.override_max_query_count = args.queries_multi

        sut = lg.ConstructSUT(issue_query, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples)

        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario, target_latency))

            settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC)
            settings.override_target_latency_ns = int(target_latency * NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario, target_latency),
                        result_dict, last_timeing, time.time() - ds.last_loaded)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Пример #9
0
def main():
    global num_ins
    global num_cpus
    global in_queue_cnt
    global out_queue_cnt
    global batching
    global bs_step

    args = get_args()
    log.info(args)
    scenario = args.scenario
    accuracy_mode = args.accuracy
    perf_count = args.perf_count
    batch_size = args.batch_size
    num_ins = args.num_instance
    num_cpus = args.num_phy_cpus
    batching = args.batching

    ## TODO, remove
    log.info('Run with {} instance on {} cpus: '.format(num_ins, num_cpus))

    # Establish communication queues
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    calibrate_counter = multiprocessing.Value("i", 0)
    out_queue = multiprocessing.Queue()
    in_queue = MultiprocessShapeBasedQueue()

    if args.perf_calibrate:
        with open('prof_new.py', 'w') as f:
            print('prof_bs_step = {}'.format(bs_step), file=f)
            print('prof_map = {', file=f)

    # Start consumers
    consumers = [
        Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i,
                 num_ins, args) for i in range(num_ins)
    ]
    for c in consumers:
        c.start()

    # used by constructQSL
    data_set = BERTDataSet(args.vocab, args.perf_count)
    issue_queue = InQueue(in_queue, batch_size, data_set)

    # Wait until all sub-processors ready to do calibration
    block_until(calibrate_counter, num_ins)
    # Wait until all sub-processors done calibration
    block_until(calibrate_counter, 2 * num_ins)
    if args.perf_calibrate:
        with open('prof_new.py', 'a') as f:
            print('}', file=f)
        sys.exit(0)
    # Wait until all sub-processors are ready
    block_until(init_counter, num_ins)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(out_queue, ))
    response_worker.daemon = True
    response_worker.start()

    # Start loadgen
    settings = lg.TestSettings()
    settings.scenario = scenario_map[scenario]
    settings.FromConfig(args.mlperf_conf, "bert", scenario)
    settings.FromConfig(args.user_conf, "bert", scenario)
    settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly

    # TODO, for debug, remove
    #settings.server_target_qps = 40
    #settings.server_target_latency_ns = 100000000
    #settings.min_query_count = 100
    #settings.min_duration_ms = 10000

    def issue_queries(query_samples):
        # It's called by loadgen to send query to SUT
        issue_queue.put(query_samples)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(data_set.count, data_set.perf_count,
                          load_query_samples, unload_query_samples)

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    #lg.StartTest(sut, qsl, settings)
    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    in_queue.join()
    for i in range(num_ins):
        in_queue.put(None)
    for c in consumers:
        c.join()
    out_queue.put(None)

    if accuracy_mode:
        cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format(
            log_path)
        subprocess.check_call(cmd, shell=True)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
Пример #10
0
def main():
    args = get_args()

    print(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, preprocessor, postprocessor, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=preprocessor,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)

    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)

    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_process=postprocessor)
    runner.start_pool()

    # warmup
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(100):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, data, label)

    sut = lg.ConstructSUT(issue_query)
    qsl = lg.ConstructQSL(count, args.time, ds.load_query_samples,
                          ds.unload_query_samples)
    scenarios = [
        # lg.TestScenario.SingleStream,
        lg.TestScenario.MultiStream,
        # lg.TestScenario.Cloud,
        # lg.TestScenario.Offline,
    ]
    for scenario in scenarios:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            settings.scenario = scenario
            settings.mode = lg.TestMode.SubmissionRun
            settings.samples_per_query = 4  # FIXME: we don't want to know about this
            settings.target_qps = 1000  # FIXME: we don't want to know about this
            settings.target_latency_ns = int(target_latency * 1000000000)

            result_list = []
            result_dict = {"good": 0, "total": 0}
            runner.start_run(result_list, result_dict)
            start = time.time()
            lg.StartTest(sut, qsl, settings)
            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, result_list,
                        time.time() - start)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Пример #11
0
def main(argv):
    del argv

    global last_timeing

    if FLAGS.scenario == "Server":
        # Disable garbage collection for realtime performance.
        gc.disable()

    # define backend
    backend = BackendTensorflow()

    # override image format if given
    image_format = FLAGS.data_format if FLAGS.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        FLAGS.dataset]
    ds = wanted_dataset(data_path=FLAGS.dataset_path,
                        image_list=FLAGS.dataset_list,
                        name=FLAGS.dataset,
                        image_format=image_format,
                        use_cache=FLAGS.cache,
                        count=FLAGS.count,
                        cache_dir=FLAGS.cache_dir,
                        annotation_file=FLAGS.annotation_file,
                        use_space_to_depth=FLAGS.use_space_to_depth)
    # load model to backend
    # TODO(wangtao): parse flags to params.
    params = dict(ssd_model.default_hparams().values())
    params["conv0_space_to_depth"] = FLAGS.use_space_to_depth
    params["use_bfloat16"] = FLAGS.use_bfloat16
    params["use_fused_bn"] = FLAGS.use_fused_bn

    masters = []
    tpu_names = FLAGS.tpu_name
    tpu_names = tpu_names.split(",")
    for tpu_name in tpu_names:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
        masters.append(tpu_cluster_resolver.get_master())

    #
    # make one pass over the dataset to validate accuracy
    #
    count = FLAGS.count if FLAGS.count else ds.get_item_count()

    #
    # warmup
    #
    log.info("warmup ...")

    batch_size = FLAGS.batch_size[0] if FLAGS.scenario == "Offline" else 1
    backend_lists = []
    for _ in range(len(tpu_names)):
        backend = BackendTensorflow()
        backend_lists.append(backend)
    runner = QueueRunner(backend_lists,
                         ds,
                         FLAGS.threads,
                         post_proc=post_proc,
                         max_batchsize=batch_size)

    runner.start_run({}, FLAGS.accuracy)

    def issue_queries(query_samples):
        for i in [1]:
            runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    tf.logging.info("starting {}, latency={}".format(FLAGS.scenario,
                                                     FLAGS.max_latency))
    settings = lg.TestSettings()
    tf.logging.info(FLAGS.scenario)
    settings.scenario = SCENARIO_MAP[FLAGS.scenario]
    settings.qsl_rng_seed = FLAGS.qsl_rng_seed
    settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed
    settings.schedule_rng_seed = FLAGS.schedule_rng_seed

    if FLAGS.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    if FLAGS.qps:
        qps = float(FLAGS.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if FLAGS.time:
        settings.min_duration_ms = FLAGS.time * MILLI_SEC
        settings.max_duration_ms = 0
        qps = FLAGS.qps or 100
        settings.min_query_count = qps * FLAGS.time
        settings.max_query_count = 0
    else:
        settings.min_query_count = 270336
        settings.max_query_count = 0

    target_latency_ns = int(float(FLAGS.max_latency) * NANO_SEC)
    settings.single_stream_expected_latency_ns = target_latency_ns
    settings.multi_stream_target_latency_ns = target_latency_ns
    settings.server_target_latency_ns = target_latency_ns

    log_settings = lg.LogSettings()
    log_settings.log_output.outdir = tempfile.mkdtemp()
    log_settings.log_output.copy_detail_to_stdout = True
    log_settings.log_output.copy_summary_to_stdout = True
    log_settings.enable_trace = False

    def load_query_samples(sample_list):
        """Load query samples and warmup the model."""
        ds.load_query_samples(sample_list)
        data = ds.get_image_list_inmemory()

        def init_fn(cloud_tpu_id):
            tf.logging.info("Load model for %dth cloud tpu", cloud_tpu_id)
            runner.models[cloud_tpu_id].load(
                FLAGS.model,
                FLAGS.output_model_dir,
                data,
                params,
                batch_size=FLAGS.batch_size,
                master=masters[cloud_tpu_id],
                scenario=FLAGS.scenario,
                batch_timeout_micros=FLAGS.batch_timeout_micros)

            # Init TPU.
            for it in range(FLAGS.init_iterations):
                tf.logging.info("Initialize cloud tpu at iteration %d", it)
                for batch_size in FLAGS.batch_size:
                    example, _ = ds.get_indices([sample_list[0]] * batch_size)
                    _ = runner.models[cloud_tpu_id].predict(example)

        threads = []
        for i in range(len(tpu_names)):
            thread = threading.Thread(target=init_fn, args=(i, ))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 350), load_query_samples,
                          ds.unload_query_samples)

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    tf.io.gfile.mkdir(FLAGS.outdir)

    for oldfile in tf.gfile.Glob(
            os.path.join(log_settings.log_output.outdir, "*")):
        basename = os.path.basename(oldfile)
        newfile = os.path.join(FLAGS.outdir, basename)
        tf.gfile.Copy(oldfile, newfile, overwrite=True)

    if FLAGS.accuracy:
        with tf.gfile.Open(os.path.join(FLAGS.outdir, "results.txt"),
                           "w") as f:
            results = {"mAP": accuracy_coco.main()}
            json.dump(results, f, sort_keys=True, indent=4)
Пример #12
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_proc=post_proc)

    #
    # warmup
    #
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        runner.start_pool(nolg=True)
        result_dict = {
            "good": 0,
            "total": 0,
            "scenario": "Accuracy",
            "timing": []
        }
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            runner.enqueue([idx], [idx], data, label)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict,
                           ds,
                           output_dir=os.path.dirname(args.output))
        last_timeing = result_dict["timing"]
        del result_dict["timing"]
        add_results(final_results, "Accuracy", result_dict, last_timeing,
                    time.time() - start)

    #
    # run the benchmark with timing
    #
    runner.start_pool()

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, idx, data, label)

    def process_latencies(latencies_ns):
        global last_timeing
        last_timeing = [t / 1e9 for t in latencies_ns]

    sut = lg.ConstructSUT(issue_query, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                          ds.unload_query_samples)

    for scenario in args.scenario:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            log.info(scenario)
            if str(scenario) == 'TestMode.AccuracyOnly':
                settings.mode = scenario
            else:
                settings.scenario = scenario

            if args.qps:
                settings.enable_spec_overrides = True
                qps = float(args.qps)
                settings.server_target_qps = qps
                settings.offline_expected_qps = qps

            if args.time:
                settings.enable_spec_overrides = True
                settings.override_min_duration_ms = args.time * MILLI_SEC
                settings.override_max_duration_ms = args.time * MILLI_SEC
                qps = args.qps or 100
                settings.override_min_query_count = qps * args.time
                settings.override_max_query_count = qps * args.time

            if args.time or args.qps and str(
                    scenario) != 'TestMode.AccuracyOnly':
                settings.mode = lg.TestMode.PerformanceOnly
            # FIXME: add SubmissionRun once available

            settings.enable_spec_overrides = True
            settings.single_stream_expected_latency_ns = int(target_latency *
                                                             NANO_SEC)
            settings.override_target_latency_ns = int(target_latency *
                                                      NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, last_timeing,
                        time.time() - ds.last_loaded)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
Пример #13
0
    def __init__(self,
                 config_toml,
                 checkpoint_path,
                 dataset_dir,
                 manifest_filepath,
                 perf_count,
                 total_query_count,
                 scenario,
                 machine_conf,
                 batch_size=1,
                 cores_for_loadgen=0,
                 cores_per_instance=1,
                 enable_debug=False,
                 cosim=False,
                 profile=False,
                 ipex=False,
                 bf16=False,
                 warmup=False):
        ### multi instance attributes
        self.batch_size = batch_size
        self.cores_for_loadgen = cores_for_loadgen
        self.cores_per_instance = cores_per_instance
        self.num_cores = get_num_cores()
        self.lock = mp.Lock()
        self.init_counter = mp.Value("i", 0)
        self.output_queue = mp.Queue()
        self.input_queue = mp.JoinableQueue()
        self.cosim = cosim
        self.ipex = ipex
        self.bf16 = bf16
        self.warmup = warmup
        self.scenario = scenario

        #server-specific
        self.num_queues = None
        self.core_count_list = []
        self.num_instance_list = []
        self.seq_cutoff_list = []
        self.batch_size_list = []
        self.input_queue_list = []
        self.total_query_count = total_query_count

        if self.scenario == "Server":
            # read config
            self.read_machine_conf(machine_conf)
            # create queue list
            for _ in range(self.num_queues):
                self.input_queue_list.append(mp.JoinableQueue())

        config = toml.load(config_toml)

        dataset_vocab = config['labels']['labels']
        rnnt_vocab = add_blank_label(dataset_vocab)
        featurizer_config = config['input_eval']

        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                                   self.process_latencies)
        self.qsl = AudioQSLInMemory(dataset_dir, manifest_filepath,
                                    dataset_vocab,
                                    featurizer_config["sample_rate"],
                                    perf_count)

        if self.scenario == "Offline":
            self.issue_queue = InQueue(self.input_queue, batch_size)
        elif self.scenario == "Server":
            self.issue_queue = InQueueServer(self.input_queue_list, self.qsl,
                                             self.seq_cutoff_list,
                                             self.batch_size_list,
                                             self.total_query_count)

        ### worker process
        self.consumers = []
        cur_core_idx = self.cores_for_loadgen
        rank = 0
        if self.scenario == "Offline":
            while cur_core_idx + self.cores_per_instance <= self.num_cores:
                self.consumers.append(
                    Consumer(self.input_queue, self.output_queue, self.lock,
                             self.init_counter, rank, cur_core_idx,
                             cur_core_idx + self.cores_per_instance - 1,
                             self.num_cores, self.qsl, config_toml,
                             checkpoint_path, dataset_dir, manifest_filepath,
                             perf_count, cosim, profile, ipex, bf16, warmup))
                rank += 1
                cur_core_idx += self.cores_per_instance
        elif self.scenario == "Server":
            for i in range(self.num_queues):
                curr_cores_per_instance = self.core_count_list[i]
                for _ in range(self.num_instance_list[i]):

                    self.consumers.append(
                        Consumer(self.input_queue_list[i], self.output_queue,
                                 self.lock, self.init_counter, rank,
                                 cur_core_idx,
                                 cur_core_idx + curr_cores_per_instance - 1,
                                 self.num_cores, self.qsl, config_toml,
                                 checkpoint_path, dataset_dir,
                                 manifest_filepath, perf_count, cosim, profile,
                                 ipex, bf16, warmup))
                    rank += 1
                    cur_core_idx += curr_cores_per_instance
        self.num_instances = len(self.consumers)

        ### start worker process
        for c in self.consumers:
            c.start()

        ### wait until all sub processes are ready
        block_until(self.init_counter, self.num_instances, 2)

        ### start response thread
        self.response_worker = threading.Thread(target=response_loadgen,
                                                args=(self.output_queue, ))
        self.response_worker.daemon = True
        self.response_worker.start()

        ### debug
        global debug
        debug = enable_debug
Пример #14
0
def main(argv):
    del argv

    settings = mlperf_loadgen.TestSettings()
    settings.qsl_rng_seed = FLAGS.qsl_rng_seed
    settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed
    settings.schedule_rng_seed = FLAGS.schedule_rng_seed
    if FLAGS.accuracy_mode:
        settings.mode = mlperf_loadgen.TestMode.AccuracyOnly
    else:
        settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.scenario = SCENARIO_MAP[FLAGS.scenario]
    if FLAGS.qps:
        qps = float(FLAGS.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if FLAGS.scenario == "Offline" or FLAGS.scenario == "Server":
        masters = FLAGS.master
        masters = masters.split(",")
        if len(masters) < 1:
            masters = [FLAGS.master]

        runner = loadgen_gnmt.GNMTRunner(input_file=FLAGS.input_file,
                                         ckpt_path=FLAGS.ckpt_path,
                                         hparams_path=FLAGS.hparams_path,
                                         vocab_prefix=FLAGS.vocab_prefix,
                                         outdir=FLAGS.outdir,
                                         batch_size=FLAGS.batch_size,
                                         verbose=FLAGS.verbose,
                                         masters=masters,
                                         scenario=FLAGS.scenario)

        runner.load(FLAGS.batch_timeout_micros)

        # Specify exactly how many queries need to be made
        settings.min_query_count = FLAGS.qps * FLAGS.time
        settings.max_query_count = 0
        settings.min_duration_ms = 60 * MILLI_SEC
        settings.max_duration_ms = 0
        settings.server_target_latency_ns = int(0.25 * NANO_SEC)
        settings.server_target_latency_percentile = 0.97

    else:
        print("Invalid scenario selected")
        assert False

    # Create a thread in the GNMTRunner to start accepting work
    runner.start_worker()

    # Maximum sample ID + 1
    total_queries = FLAGS.query_count
    # Select the same subset of $perf_queries samples
    perf_queries = FLAGS.query_count

    sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries,
                                      generic_loadgen.process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries,
                                      runner.load_samples_to_ram,
                                      runner.unload_samples_from_ram)

    log_settings = mlperf_loadgen.LogSettings()
    log_settings.log_output.outdir = tempfile.mkdtemp()
    # Disable detail logs to prevent it from stepping on the summary
    # log in stdout on some systems.
    log_settings.log_output.copy_detail_to_stdout = False
    log_settings.log_output.copy_summary_to_stdout = True
    log_settings.enable_trace = False
    mlperf_loadgen.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    runner.finish()
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)

    for oldfile in tf.gfile.Glob(
            os.path.join(log_settings.log_output.outdir, "*")):
        basename = os.path.basename(oldfile)
        newfile = os.path.join(FLAGS.outdir, basename)
        tf.gfile.Copy(oldfile, newfile, overwrite=True)

    if FLAGS.accuracy_mode:
        log_accuracy = os.path.join(log_settings.log_output.outdir,
                                    "mlperf_log_accuracy.json")
        tf.gfile.Copy(FLAGS.reference, "/tmp/reference")
        bleu = process_accuracy.get_accuracy("/tmp/reference", log_accuracy)
        print("BLEU: %.2f" % (bleu * 100))  # pylint: disable=superfluous-parens