def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format() # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = output_dir log_output_settings.copy_summary_to_stdout = False log_settings = lg.LogSettings() log_settings.enable_trace = args.debug log_settings.log_output = log_output_settings settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend, args.dataset, args.max_ind_range, args.data_sub_sample_rate, args.use_gpu) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] # --count-samples can be used to limit the number of samples used for testing ds = wanted_dataset( data_path=args.dataset_path, name=args.dataset, pre_process=pre_proc, # currently an identity function use_cache=args.cache, # currently not used count=args.count_samples, samples_to_aggregate_fix=args.samples_to_aggregate_fix, samples_to_aggregate_min=args.samples_to_aggregate_min, samples_to_aggregate_max=args.samples_to_aggregate_max, samples_to_aggregate_quantile_file=args. samples_to_aggregate_quantile_file, samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file, test_num_workers=args.test_num_workers, max_ind_range=args.max_ind_range, sub_sample_rate=args.data_sub_sample_rate, mlperf_bin_loader=args.mlperf_bin_loader, **kwargs) # load model to backend model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } config = os.path.abspath(args.config) if not os.path.exists(config): log.error("{} not found".format(config)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0]) _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(config, args.model, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.duration: settings.min_duration_ms = args.duration settings.max_duration_ms = args.duration if args.target_qps: settings.server_target_qps = float(args.target_qps) settings.offline_expected_qps = float(args.target_qps) if args.count_queries: settings.min_query_count = args.count_queries settings.max_query_count = args.count_queries if args.samples_per_query_multistream: settings.multi_stream_samples_per_query = args.samples_per_query_multistream if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = { "good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario) } runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format() # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() if args.accuracy: # # accuracy pass # log.info("starting accuracy pass on {} items".format(count)) last_timeing = [] runner = RunnerBase(model, ds, args.threads, post_proc=post_proc) result_dict = {"good": 0, "total": 0, "scenario": "Accuracy"} runner.start_run(result_dict, True) start = time.time() for idx in range(0, count): ds.load_query_samples([idx]) data, label = ds.get_samples([idx]) start_one = time.time() runner.enqueue([idx], [idx], data, label) last_timeing.append(time.time() - start_one) runner.finish() # aggregate results post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start) # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) for scenario in args.scenario: runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc) def issue_query(query_samples): # called by loadgen to issue queries idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, idx, data, label) def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / 1e9 for t in latencies_ns] settings = lg.TestSettings() settings.enable_spec_overrides = True settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly settings.multi_stream_samples_per_query = 8 if args.time: # override the time we want to run settings.enable_spec_overrides = True settings.override_min_duration_ms = args.time * MILLI_SEC settings.override_max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps # mlperf rules - min queries if scenario == lg.TestScenario.SingleStream: settings.override_min_query_count = args.queries_single settings.override_max_query_count = args.queries_single else: settings.override_min_query_count = args.queries_multi settings.override_max_query_count = args.queries_multi sut = lg.ConstructSUT(issue_query, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC) settings.override_target_latency_ns = int(target_latency * NANO_SEC) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, False) lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
.format(self.count, qitem.sample_id[0])) self.count += 1 return self.count if __name__ == "__main__": runner = DummyRunner() runner.start_worker() settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.SingleStream settings.mode = mlperf_loadgen.TestMode.PerformanceOnly # Specify exactly how many queries need to be made settings.min_query_count = 3003 settings.max_query_count = 3003 total_queries = 256 # Maximum sample ID + 1 perf_queries = 8 # TBD: Doesn't seem to have an effect sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(): args = get_args() print(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # dataset to use wanted_dataset, preprocessor, postprocessor, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=preprocessor, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() runner = Runner(model, ds, args.threads, post_process=postprocessor) runner.start_pool() # warmup log.info("warmup ...") ds.load_query_samples([0]) for _ in range(100): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) def issue_query(query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, data, label) sut = lg.ConstructSUT(issue_query) qsl = lg.ConstructQSL(count, args.time, ds.load_query_samples, ds.unload_query_samples) scenarios = [ # lg.TestScenario.SingleStream, lg.TestScenario.MultiStream, # lg.TestScenario.Cloud, # lg.TestScenario.Offline, ] for scenario in scenarios: for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings = lg.TestSettings() settings.scenario = scenario settings.mode = lg.TestMode.SubmissionRun settings.samples_per_query = 4 # FIXME: we don't want to know about this settings.target_qps = 1000 # FIXME: we don't want to know about this settings.target_latency_ns = int(target_latency * 1000000000) result_list = [] result_dict = {"good": 0, "total": 0} runner.start_run(result_list, result_dict) start = time.time() lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, result_list, time.time() - start) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global num_ins global num_phy_cpus global in_queue_cnt global out_queue_cnt args = get_args() log.info(args) num_ins = args.num_instance num_phy_cpus = args.num_phy_cpus log.info('Run with {} instance on {} cpus'.format(num_ins, num_phy_cpus)) mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) image_format = 'NCHW' dataset = "imagenet" wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, cache_dir=args.cache_dir, count=args.count, use_int8=args.use_int8_dataset, num_workers=num_phy_cpus, **kwargs) # Establish communication queues log.info('Start comsumer queue and response thread') lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) in_queue = multiprocessing.JoinableQueue() out_queue = multiprocessing.Queue() ds_queue = multiprocessing.Queue() # Start consumers consumers = [Consumer(in_queue, out_queue, ds_queue, lock, init_counter, i, args) for i in range(num_ins)] for c in consumers: c.start() # Wait until all sub-processors are ready block_until(init_counter, num_ins, 2) # Start response thread response_worker = threading.Thread( target=response_loadgen, args=(out_queue,)) response_worker.daemon = True response_worker.start() scenario = SCENARIO_MAP[args.scenario] runner = QueueRunner(in_queue, args.batch_size) def issue_queries(response_ids, query_sample_indexes): runner.put(response_ids, query_sample_indexes) def flush_queries(): pass def process_latencies(latencies_ns): log.info("Average latency: {}".format(np.mean(latencies_ns))) log.info("Median latency: {}".format(np.percentile(latencies_ns, 50))) log.info("90 percentile latency: {}".format(np.percentile(latencies_ns, 90))) def load_query_samples(sample_list): for _ in range(num_ins): ds_queue.put(sample_list) block_until(init_counter, 2 * num_ins, 2) def unload_query_samples(sample_list): pass settings = lg.TestSettings() settings.FromConfig(mlperf_conf, "resnet50", args.scenario) settings.FromConfig(user_conf, "resnet50", args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps count = ds.get_item_count() perf_count = 1024 if args.accuracy: perf_count = count sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, perf_count, load_query_samples, unload_query_samples) log.info("starting {}".format(scenario)) lg.StartTest(sut, qsl, settings) # Wait until outQueue done while out_queue_cnt < in_queue_cnt: time.sleep(0.2) in_queue.join() for i in range(num_ins): in_queue.put('DONE') for c in consumers: c.join() out_queue.put('DONE') if args.accuracy: output_file = 'accuracy.txt' if args.output_file: output_file = args.output_file cmd = "python tools/accuracy-imagenet.py " \ "--mlperf-accuracy-file=mlperf_log_accuracy.json " \ "--imagenet-val-file=val_map.txt --output-file={}".format(output_file) cmd = cmd.split(' ') subprocess.check_call(cmd) lg.DestroyQSL(qsl) lg.DestroyFastSUT(sut) log.info('Test done.')
def main(argv): del argv global last_timeing if FLAGS.scenario == "Server": # Disable garbage collection for realtime performance. gc.disable() # define backend backend = BackendTensorflow() # override image format if given image_format = FLAGS.data_format if FLAGS.data_format else backend.image_format( ) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ FLAGS.dataset] ds = wanted_dataset(data_path=FLAGS.dataset_path, image_list=FLAGS.dataset_list, name=FLAGS.dataset, image_format=image_format, use_cache=FLAGS.cache, count=FLAGS.count, cache_dir=FLAGS.cache_dir, annotation_file=FLAGS.annotation_file, use_space_to_depth=FLAGS.use_space_to_depth) # load model to backend # TODO(wangtao): parse flags to params. params = dict(ssd_model.default_hparams().values()) params["conv0_space_to_depth"] = FLAGS.use_space_to_depth params["use_bfloat16"] = FLAGS.use_bfloat16 params["use_fused_bn"] = FLAGS.use_fused_bn masters = [] tpu_names = FLAGS.tpu_name tpu_names = tpu_names.split(",") for tpu_name in tpu_names: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) masters.append(tpu_cluster_resolver.get_master()) # # make one pass over the dataset to validate accuracy # count = FLAGS.count if FLAGS.count else ds.get_item_count() # # warmup # log.info("warmup ...") batch_size = FLAGS.batch_size[0] if FLAGS.scenario == "Offline" else 1 backend_lists = [] for _ in range(len(tpu_names)): backend = BackendTensorflow() backend_lists.append(backend) runner = QueueRunner(backend_lists, ds, FLAGS.threads, post_proc=post_proc, max_batchsize=batch_size) runner.start_run({}, FLAGS.accuracy) def issue_queries(query_samples): for i in [1]: runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] tf.logging.info("starting {}, latency={}".format(FLAGS.scenario, FLAGS.max_latency)) settings = lg.TestSettings() tf.logging.info(FLAGS.scenario) settings.scenario = SCENARIO_MAP[FLAGS.scenario] settings.qsl_rng_seed = FLAGS.qsl_rng_seed settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed settings.schedule_rng_seed = FLAGS.schedule_rng_seed if FLAGS.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly if FLAGS.qps: qps = float(FLAGS.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if FLAGS.time: settings.min_duration_ms = FLAGS.time * MILLI_SEC settings.max_duration_ms = 0 qps = FLAGS.qps or 100 settings.min_query_count = qps * FLAGS.time settings.max_query_count = 0 else: settings.min_query_count = 270336 settings.max_query_count = 0 target_latency_ns = int(float(FLAGS.max_latency) * NANO_SEC) settings.single_stream_expected_latency_ns = target_latency_ns settings.multi_stream_target_latency_ns = target_latency_ns settings.server_target_latency_ns = target_latency_ns log_settings = lg.LogSettings() log_settings.log_output.outdir = tempfile.mkdtemp() log_settings.log_output.copy_detail_to_stdout = True log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False def load_query_samples(sample_list): """Load query samples and warmup the model.""" ds.load_query_samples(sample_list) data = ds.get_image_list_inmemory() def init_fn(cloud_tpu_id): tf.logging.info("Load model for %dth cloud tpu", cloud_tpu_id) runner.models[cloud_tpu_id].load( FLAGS.model, FLAGS.output_model_dir, data, params, batch_size=FLAGS.batch_size, master=masters[cloud_tpu_id], scenario=FLAGS.scenario, batch_timeout_micros=FLAGS.batch_timeout_micros) # Init TPU. for it in range(FLAGS.init_iterations): tf.logging.info("Initialize cloud tpu at iteration %d", it) for batch_size in FLAGS.batch_size: example, _ = ds.get_indices([sample_list[0]] * batch_size) _ = runner.models[cloud_tpu_id].predict(example) threads = [] for i in range(len(tpu_names)): thread = threading.Thread(target=init_fn, args=(i, )) threads.append(thread) thread.start() for thread in threads: thread.join() sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 350), load_query_samples, ds.unload_query_samples) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) tf.io.gfile.mkdir(FLAGS.outdir) for oldfile in tf.gfile.Glob( os.path.join(log_settings.log_output.outdir, "*")): basename = os.path.basename(oldfile) newfile = os.path.join(FLAGS.outdir, basename) tf.gfile.Copy(oldfile, newfile, overwrite=True) if FLAGS.accuracy: with tf.gfile.Open(os.path.join(FLAGS.outdir, "results.txt"), "w") as f: results = {"mAP": accuracy_coco.main()} json.dump(results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() runner = Runner(model, ds, args.threads, post_proc=post_proc) # # warmup # log.info("warmup ...") ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) if args.accuracy: # # accuracy pass # log.info("starting accuracy pass on {} items".format(count)) runner.start_pool(nolg=True) result_dict = { "good": 0, "total": 0, "scenario": "Accuracy", "timing": [] } runner.start_run(result_dict, True) start = time.time() for idx in range(0, count): ds.load_query_samples([idx]) data, label = ds.get_samples([idx]) runner.enqueue([idx], [idx], data, label) runner.finish() # aggregate results post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) last_timeing = result_dict["timing"] del result_dict["timing"] add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start) # # run the benchmark with timing # runner.start_pool() def issue_query(query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, idx, data, label) def process_latencies(latencies_ns): global last_timeing last_timeing = [t / 1e9 for t in latencies_ns] sut = lg.ConstructSUT(issue_query, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) for scenario in args.scenario: for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings = lg.TestSettings() log.info(scenario) if str(scenario) == 'TestMode.AccuracyOnly': settings.mode = scenario else: settings.scenario = scenario if args.qps: settings.enable_spec_overrides = True qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if args.time: settings.enable_spec_overrides = True settings.override_min_duration_ms = args.time * MILLI_SEC settings.override_max_duration_ms = args.time * MILLI_SEC qps = args.qps or 100 settings.override_min_query_count = qps * args.time settings.override_max_query_count = qps * args.time if args.time or args.qps and str( scenario) != 'TestMode.AccuracyOnly': settings.mode = lg.TestMode.PerformanceOnly # FIXME: add SubmissionRun once available settings.enable_spec_overrides = True settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC) settings.override_target_latency_ns = int(target_latency * NANO_SEC) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, False) lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format() # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count = args.count if not count: if not args.accuracy: count = 200 # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) for scenario in args.scenario: runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if scenario == lg.TestScenario.SingleStream: settings.min_query_count = args.queries_single settings.max_query_count = args.queries_single elif scenario == lg.TestScenario.MultiStream: settings.min_query_count = args.queries_multi settings.max_query_count = args.queries_multi settings.multi_stream_samples_per_query = 4 elif scenario == lg.TestScenario.Server: max_latency = args.max_latency elif scenario == lg.TestScenario.Offline: settings.min_query_count = args.queries_offline settings.max_query_count = args.queries_offline sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) if scenario == lg.TestScenario.Server: for target_latency in max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings.server_target_latency_ns = int(target_latency * NANO_SEC) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) else: log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def __del__(self): lg.DestroyQSL(self.qsl) print("Finished destroying QSL.")
def main(): global last_timeing args = get_args() log.info(args) backend = BackendTensorRT() ds = Imagenet( data_path=args.dataset_path, use_cache=args.cache, batch_size=args.batch_size, image_size=args.image_size, calib_file='cal_image_list_option_%d.txt' % args.calib_file) model = backend.load(args, ds=ds) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } config = os.path.abspath(args.config) assert(os.path.exists(config)), "%s not existed!" % config user_config = os.path.abspath(args.user_config) assert(os.path.exists(user_config)), "%s not existed!" % user_config base_path = os.path.dirname(os.path.realpath(__file__)) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) post_proc = PostProcessCommon(offset=0) runner = QueueRunner( model, ds, args.threads, post_proc=post_proc, batch_size=args.batch_size) def issue_queries(ids, indices): runner.enqueue(ids, indices) def flush_queries(): pass def process_latencies(latencies_ns): global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() model_name = 'OFAnet-AutoSinian' settings.FromConfig(config, model_name, args.scenario) settings.FromConfig(user_config, model_name, args.scenario) if args.audit_test: audit_config_path = base_path + '/audit%s.config' % args.audit_test settings.FromConfig(audit_config_path, model_name, args.scenario) scenario = SCENARIO_MAP[args.scenario] settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(ds.get_item_count(), args.batch_size, ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) start = time.time() lg.StartTest(sut, qsl, settings) post_proc.finalize(result_dict) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, runner.finishTime - ds.last_loaded, args) runner.finish() lg.DestroyQSL(qsl) lg.DestroyFastSUT(sut) if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=2)
def main(argv): del argv settings = mlperf_loadgen.TestSettings() settings.qsl_rng_seed = FLAGS.qsl_rng_seed settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed settings.schedule_rng_seed = FLAGS.schedule_rng_seed if FLAGS.accuracy_mode: settings.mode = mlperf_loadgen.TestMode.AccuracyOnly else: settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.scenario = SCENARIO_MAP[FLAGS.scenario] if FLAGS.qps: qps = float(FLAGS.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if FLAGS.scenario == "Offline" or FLAGS.scenario == "Server": masters = FLAGS.master masters = masters.split(",") if len(masters) < 1: masters = [FLAGS.master] runner = loadgen_gnmt.GNMTRunner(input_file=FLAGS.input_file, ckpt_path=FLAGS.ckpt_path, hparams_path=FLAGS.hparams_path, vocab_prefix=FLAGS.vocab_prefix, outdir=FLAGS.outdir, batch_size=FLAGS.batch_size, verbose=FLAGS.verbose, masters=masters, scenario=FLAGS.scenario) runner.load(FLAGS.batch_timeout_micros) # Specify exactly how many queries need to be made settings.min_query_count = FLAGS.qps * FLAGS.time settings.max_query_count = 0 settings.min_duration_ms = 60 * MILLI_SEC settings.max_duration_ms = 0 settings.server_target_latency_ns = int(0.25 * NANO_SEC) settings.server_target_latency_percentile = 0.97 else: print("Invalid scenario selected") assert False # Create a thread in the GNMTRunner to start accepting work runner.start_worker() # Maximum sample ID + 1 total_queries = FLAGS.query_count # Select the same subset of $perf_queries samples perf_queries = FLAGS.query_count sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries, generic_loadgen.process_latencies) qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram) log_settings = mlperf_loadgen.LogSettings() log_settings.log_output.outdir = tempfile.mkdtemp() # Disable detail logs to prevent it from stepping on the summary # log in stdout on some systems. log_settings.log_output.copy_detail_to_stdout = False log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False mlperf_loadgen.StartTestWithLogSettings(sut, qsl, settings, log_settings) runner.finish() mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut) for oldfile in tf.gfile.Glob( os.path.join(log_settings.log_output.outdir, "*")): basename = os.path.basename(oldfile) newfile = os.path.join(FLAGS.outdir, basename) tf.gfile.Copy(oldfile, newfile, overwrite=True) if FLAGS.accuracy_mode: log_accuracy = os.path.join(log_settings.log_output.outdir, "mlperf_log_accuracy.json") tf.gfile.Copy(FLAGS.reference, "/tmp/reference") bleu = process_accuracy.get_accuracy("/tmp/reference", log_accuracy) print("BLEU: %.2f" % (bleu * 100)) # pylint: disable=superfluous-parens