def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend, args.dataset_path, args.dataset_calibration_list) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = output_dir log_output_settings.copy_summary_to_stdout = False log_settings = lg.LogSettings() log_settings.enable_trace = args.debug log_settings.log_output = log_output_settings settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend, args.dataset, args.max_ind_range, args.data_sub_sample_rate, args.use_gpu) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] # --count-samples can be used to limit the number of samples used for testing ds = wanted_dataset( data_path=args.dataset_path, name=args.dataset, pre_process=pre_proc, # currently an identity function use_cache=args.cache, # currently not used count=args.count_samples, samples_to_aggregate_fix=args.samples_to_aggregate_fix, samples_to_aggregate_min=args.samples_to_aggregate_min, samples_to_aggregate_max=args.samples_to_aggregate_max, samples_to_aggregate_quantile_file=args. samples_to_aggregate_quantile_file, samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file, test_num_workers=args.test_num_workers, max_ind_range=args.max_ind_range, sub_sample_rate=args.data_sub_sample_rate, mlperf_bin_loader=args.mlperf_bin_loader, **kwargs) # load model to backend model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } config = os.path.abspath(args.config) if not os.path.exists(config): log.error("{} not found".format(config)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0]) _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(config, args.model, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.duration: settings.min_duration_ms = args.duration settings.max_duration_ms = args.duration if args.target_qps: settings.server_target_qps = float(args.target_qps) settings.offline_expected_qps = float(args.target_qps) if args.count_queries: settings.min_query_count = args.count_queries settings.max_query_count = args.count_queries if args.samples_per_query_multistream: settings.multi_stream_samples_per_query = args.samples_per_query_multistream if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = { "good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario) } runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) if getattr(backend, "max_batchsize", -1) != -1: backend.max_batchsize = args.max_batchsize # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) audit_config_cp_loc = None if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) # Check if audit.config file is used, copy to output directory before # we chdir to that location so loadgen can find it audit_files = glob.glob( "ncoresw/mlperf/vision/classification_and_detection/*audit.config") if len(audit_files): log.info("Found audit.config (" + audit_files[0] + ")") audit_config_cp_loc = os.path.join(output_dir, "audit.config") # If user already put audit.config at `output` directory, then use # that one. Otherwise, copy the one we found in the current # directory (before chdir to new output directory). if os.path.exists(audit_config_cp_loc): log.info( "WARNING: audit.config already exists, so cannot copy over new audit file!" ) log.info(audit_config_cp_loc) audit_config_cp_loc = None else: shutil.copy(audit_files[0], audit_config_cp_loc) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup warmup_queries = range(args.max_batchsize) ds.load_query_samples(warmup_queries) for _ in range(2): img, _ = ds.get_samples(warmup_queries) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) # override target latency when it needs to be less than 1ms if args.model_name == "mobilenet": settings.single_stream_expected_latency_ns = 200000 elif args.model_name == "resnet50": settings.single_stream_expected_latency_ns = 900000 elif args.model_name == "ssd-mobilenet": settings.single_stream_expected_latency_ns = 900000 sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # Dump the summary logs to stdout for convenience log.info("Output dir: " + os.path.abspath(output_dir)) with open(os.path.join(output_dir, "mlperf_log_summary.txt"), 'r') as f: log.info(f.read()) # Output accuracy txt file if args.accuracy: with open(os.path.join(output_dir, "accuracy.txt"), "w") as f_acc: # SSD accuracy calculation #---------------------------------------- # The mAP is already stored in result_dict["mAP"], but we'll call # `accuracy_coco()` just to keep the submission process consistent. if args.model_name == "ssd-mobilenet": accuracy_str = accuracy.CocoAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), coco_dir=args.dataset_path).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) if args.model_name == "ssd-resnet34": accuracy_str = accuracy.CocoAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), coco_dir=args.dataset_path, use_inv_map=True, remove_48_empty_images=False).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) # ImageNet accuracy calculation #---------------------------------------- # The good / total values are already stored in result_dict["good"] # and result_dict["total"], but we'll call `accuracy_imagenet()` # just to keep the submission process consistent. else: accuracy_str = accuracy.ImagenetAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), imagenet_val_file=os.path.join( args.dataset_path, "val_map.txt")).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) if audit_config_cp_loc != None: os.remove(audit_config_cp_loc) backend_destroy = getattr(backend, "destroy", None) if callable(backend_destroy): backend.destroy()
def main(): args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() runner = Runner(model, ds, args.threads, post_proc=post_proc) runner.start_pool() # warmup log.info("warmup ...") ds.load_query_samples([0]) for _ in range(50): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) def issue_query(query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, idx, data, label) def process_latencies(latencies_ns): global last_timeing last_timeing = [t / 10000000. for t in latencies_ns] sut = lg.ConstructSUT(issue_query, process_latencies) qsl = lg.ConstructQSL(count, count, ds.load_query_samples, ds.unload_query_samples) scenarios = [ lg.TestScenario.SingleStream, lg.TestScenario.MultiStream, lg.TestScenario.Server, # lg.TestScenario.Offline, ] for scenario in scenarios: for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings = lg.TestSettings() settings.scenario = scenario if args.qps: settings.enable_spec_overrides = True qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if args.time: settings.enable_spec_overrides = True settings.override_min_duration_ms = args.time * MILLI_SEC settings.override_max_duration_ms = args.time * MILLI_SEC qps = args.qps or 100 settings.override_min_query_count = qps * args.time settings.override_max_query_count = qps * args.time if args.time or args.qps: settings.mode = lg.TestMode.PerformanceOnly # FIXME: add SubmissionRun once available settings.enable_spec_overrides = True # FIXME: needed because of override_target_latency_ns settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC) settings.override_target_latency_ns = int(target_latency * NANO_SEC) # reset result capture result_dict = {"good": 0, "total": 0} runner.start_run(result_dict, True) start = time.time() lg.StartTest(sut, qsl, settings) # aggregate results post_proc.finalize(result_dict, ds) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - start) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): global so global last_timeing global last_loaded global result_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True """ Python signature go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize) """ count, err = go_initialize(backend, args.model_path, args.dataset_path, count, args.use_gpu, args.gpu_id, args.trace_level, args.max_batchsize) if (err != 'nil'): print(err) raise RuntimeError('initialization in go failed') mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) log_dir = None if args.log_dir: log_dir = os.path.abspath(args.log_dir) os.makedirs(log_dir, exist_ok=True) scenario = SCENARIO_MAP[args.scenario] def issue_queries(query_samples): global so global last_timeing global result_timeing idx = np.array([q.index for q in query_samples]).astype(np.int32) query_id = [q.id for q in query_samples] if args.dataset == 'brats2019': start = time.time() response_array_refs = [] response = [] for i, qid in enumerate(query_id): processed_results = so.IssueQuery(1, idx[i][np.newaxis]) processed_results = json.loads( processed_results.decode('utf-8')) response_array = array.array( "B", np.array(processed_results[0], np.float16).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) result_timeing.append(time.time() - start) lg.QuerySamplesComplete(response) else: start = time.time() processed_results = so.IssueQuery(len(idx), idx) result_timeing.append(time.time() - start) processed_results = json.loads(processed_results.decode('utf-8')) response_array_refs = [] response = [] for idx, qid in enumerate(query_id): response_array = array.array( "B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) lg.QuerySamplesComplete(response) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] def load_query_samples(sample_list): global so global last_loaded err = go_load_query_samples(sample_list, so) last_loaded = time.time() if (err != ''): print(err) raise RuntimeError('load query samples failed') def unload_query_samples(sample_list): global so err = go_unload_query_samples(sample_list, so) if (err != ''): print(err) raise RuntimeError('unload query samples failed') settings = lg.TestSettings() if args.model_name != "": settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples, unload_query_samples) log.info("starting {}".format(scenario)) log_path = os.path.realpath(args.log_dir) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings # log_settings.enable_trace = True # lg.StartTest(sut, qsl, settings) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = result_timeing if args.accuracy: accuracy_script_paths = { 'coco': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-coco.py' ), 'imagenet': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py' ), 'squad': os.path.realpath('../inference/language/bert/accuracy-squad.py'), 'brats2019': os.path.realpath( '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py' ), } accuracy_script_path = accuracy_script_paths[args.dataset] accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json') data_dir = os.environ['DATA_DIR'] if args.dataset == 'coco': if args.use_inv_map: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map' .format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) else: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {}'. format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) elif args.dataset == 'imagenet': # imagenet subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'. format(accuracy_script_path, accuracy_file_path, os.path.join(data_dir, 'val_map.txt')), shell=True) elif args.dataset == 'squad': # squad vocab_path = os.path.join(data_dir, 'vocab.txt') val_path = os.path.join(data_dir, 'dev-v1.1.json') out_path = os.path.join(log_dir, 'predictions.json') cache_path = os.path.join(data_dir, 'eval_features.pickle') subprocess.check_call( 'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}' .format(accuracy_script_path, vocab_path, val_path, accuracy_file_path, out_path, cache_path, count), shell=True) elif args.dataset == 'brats2019': # brats2019 base_dir = os.path.realpath( '../inference/vision/medical_imaging/3d-unet/build') post_dir = os.path.join(base_dir, 'postprocessed_data') label_dir = os.path.join( base_dir, 'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr') os.makedirs(post_dir, exist_ok=True) subprocess.check_call( 'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}' .format(accuracy_script_path, accuracy_file_path, data_dir, post_dir, label_dir), shell=True) else: raise RuntimeError('Dataset not Implemented.') lg.DestroyQSL(qsl) lg.DestroySUT(sut) """ Python signature go_finalize(so) """ err = go_finalize(so) if (err != ''): print(err) raise RuntimeError('finialize in go failed')
if __name__ == "__main__": runner = DummyRunner() runner.start_worker() settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.SingleStream settings.mode = mlperf_loadgen.TestMode.PerformanceOnly # Specify exactly how many queries need to be made settings.min_query_count = 3003 settings.max_query_count = 3003 total_queries = 256 # Maximum sample ID + 1 perf_queries = 8 # TBD: Doesn't seem to have an effect sut = mlperf_loadgen.ConstructSUT(runner.enqueue, runner.flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram) log_settings = lg.LogSettings() log_settings.log_output.copy_detail_to_stdout = True log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) runner.finish() mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count = args.count if not count: if not args.accuracy: count = 200 # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) for scenario in args.scenario: runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if scenario == lg.TestScenario.SingleStream: settings.min_query_count = args.queries_single settings.max_query_count = args.queries_single elif scenario == lg.TestScenario.MultiStream: settings.min_query_count = args.queries_multi settings.max_query_count = args.queries_multi settings.multi_stream_samples_per_query = 4 elif scenario == lg.TestScenario.Server: max_latency = args.max_latency elif scenario == lg.TestScenario.Offline: settings.min_query_count = args.queries_offline settings.max_query_count = args.queries_offline sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) if scenario == lg.TestScenario.Server: for target_latency in max_latency: log.info("starting {}, latency={}".format( scenario, target_latency)) settings.server_target_latency_ns = int(target_latency * NANO_SEC) result_dict = { "good": 0, "total": 0, "scenario": str(scenario) } runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) else: log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format() # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() if args.accuracy: # # accuracy pass # log.info("starting accuracy pass on {} items".format(count)) last_timeing = [] runner = RunnerBase(model, ds, args.threads, post_proc=post_proc) result_dict = {"good": 0, "total": 0, "scenario": "Accuracy"} runner.start_run(result_dict, True) start = time.time() for idx in range(0, count): ds.load_query_samples([idx]) data, label = ds.get_samples([idx]) start_one = time.time() runner.enqueue([idx], [idx], data, label) last_timeing.append(time.time() - start_one) runner.finish() # aggregate results post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start) # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) for scenario in args.scenario: runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc) def issue_query(query_samples): # called by loadgen to issue queries idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, idx, data, label) def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / 1e9 for t in latencies_ns] settings = lg.TestSettings() settings.enable_spec_overrides = True settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly settings.multi_stream_samples_per_query = 8 if args.time: # override the time we want to run settings.enable_spec_overrides = True settings.override_min_duration_ms = args.time * MILLI_SEC settings.override_max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps # mlperf rules - min queries if scenario == lg.TestScenario.SingleStream: settings.override_min_query_count = args.queries_single settings.override_max_query_count = args.queries_single else: settings.override_min_query_count = args.queries_multi settings.override_max_query_count = args.queries_multi sut = lg.ConstructSUT(issue_query, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC) settings.override_target_latency_ns = int(target_latency * NANO_SEC) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, False) lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global num_ins global num_cpus global in_queue_cnt global out_queue_cnt global batching global bs_step args = get_args() log.info(args) scenario = args.scenario accuracy_mode = args.accuracy perf_count = args.perf_count batch_size = args.batch_size num_ins = args.num_instance num_cpus = args.num_phy_cpus batching = args.batching ## TODO, remove log.info('Run with {} instance on {} cpus: '.format(num_ins, num_cpus)) # Establish communication queues lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) calibrate_counter = multiprocessing.Value("i", 0) out_queue = multiprocessing.Queue() in_queue = MultiprocessShapeBasedQueue() if args.perf_calibrate: with open('prof_new.py', 'w') as f: print('prof_bs_step = {}'.format(bs_step), file=f) print('prof_map = {', file=f) # Start consumers consumers = [ Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i, num_ins, args) for i in range(num_ins) ] for c in consumers: c.start() # used by constructQSL data_set = BERTDataSet(args.vocab, args.perf_count) issue_queue = InQueue(in_queue, batch_size, data_set) # Wait until all sub-processors ready to do calibration block_until(calibrate_counter, num_ins) # Wait until all sub-processors done calibration block_until(calibrate_counter, 2 * num_ins) if args.perf_calibrate: with open('prof_new.py', 'a') as f: print('}', file=f) sys.exit(0) # Wait until all sub-processors are ready block_until(init_counter, num_ins) # Start response thread response_worker = threading.Thread(target=response_loadgen, args=(out_queue, )) response_worker.daemon = True response_worker.start() # Start loadgen settings = lg.TestSettings() settings.scenario = scenario_map[scenario] settings.FromConfig(args.mlperf_conf, "bert", scenario) settings.FromConfig(args.user_conf, "bert", scenario) settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly # TODO, for debug, remove #settings.server_target_qps = 40 #settings.server_target_latency_ns = 100000000 #settings.min_query_count = 100 #settings.min_duration_ms = 10000 def issue_queries(query_samples): # It's called by loadgen to send query to SUT issue_queue.put(query_samples) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(data_set.count, data_set.perf_count, load_query_samples, unload_query_samples) log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings #lg.StartTest(sut, qsl, settings) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) # Wait until outQueue done while out_queue_cnt < in_queue_cnt: time.sleep(0.2) in_queue.join() for i in range(num_ins): in_queue.put(None) for c in consumers: c.join() out_queue.put(None) if accuracy_mode: cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format( log_path) subprocess.check_call(cmd, shell=True) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): args = get_args() print(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # dataset to use wanted_dataset, preprocessor, postprocessor, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=preprocessor, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() runner = Runner(model, ds, args.threads, post_process=postprocessor) runner.start_pool() # warmup log.info("warmup ...") ds.load_query_samples([0]) for _ in range(100): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) def issue_query(query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, data, label) sut = lg.ConstructSUT(issue_query) qsl = lg.ConstructQSL(count, args.time, ds.load_query_samples, ds.unload_query_samples) scenarios = [ # lg.TestScenario.SingleStream, lg.TestScenario.MultiStream, # lg.TestScenario.Cloud, # lg.TestScenario.Offline, ] for scenario in scenarios: for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings = lg.TestSettings() settings.scenario = scenario settings.mode = lg.TestMode.SubmissionRun settings.samples_per_query = 4 # FIXME: we don't want to know about this settings.target_qps = 1000 # FIXME: we don't want to know about this settings.target_latency_ns = int(target_latency * 1000000000) result_list = [] result_dict = {"good": 0, "total": 0} runner.start_run(result_list, result_dict) start = time.time() lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, result_list, time.time() - start) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(argv): del argv global last_timeing if FLAGS.scenario == "Server": # Disable garbage collection for realtime performance. gc.disable() # define backend backend = BackendTensorflow() # override image format if given image_format = FLAGS.data_format if FLAGS.data_format else backend.image_format( ) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ FLAGS.dataset] ds = wanted_dataset(data_path=FLAGS.dataset_path, image_list=FLAGS.dataset_list, name=FLAGS.dataset, image_format=image_format, use_cache=FLAGS.cache, count=FLAGS.count, cache_dir=FLAGS.cache_dir, annotation_file=FLAGS.annotation_file, use_space_to_depth=FLAGS.use_space_to_depth) # load model to backend # TODO(wangtao): parse flags to params. params = dict(ssd_model.default_hparams().values()) params["conv0_space_to_depth"] = FLAGS.use_space_to_depth params["use_bfloat16"] = FLAGS.use_bfloat16 params["use_fused_bn"] = FLAGS.use_fused_bn masters = [] tpu_names = FLAGS.tpu_name tpu_names = tpu_names.split(",") for tpu_name in tpu_names: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) masters.append(tpu_cluster_resolver.get_master()) # # make one pass over the dataset to validate accuracy # count = FLAGS.count if FLAGS.count else ds.get_item_count() # # warmup # log.info("warmup ...") batch_size = FLAGS.batch_size[0] if FLAGS.scenario == "Offline" else 1 backend_lists = [] for _ in range(len(tpu_names)): backend = BackendTensorflow() backend_lists.append(backend) runner = QueueRunner(backend_lists, ds, FLAGS.threads, post_proc=post_proc, max_batchsize=batch_size) runner.start_run({}, FLAGS.accuracy) def issue_queries(query_samples): for i in [1]: runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] tf.logging.info("starting {}, latency={}".format(FLAGS.scenario, FLAGS.max_latency)) settings = lg.TestSettings() tf.logging.info(FLAGS.scenario) settings.scenario = SCENARIO_MAP[FLAGS.scenario] settings.qsl_rng_seed = FLAGS.qsl_rng_seed settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed settings.schedule_rng_seed = FLAGS.schedule_rng_seed if FLAGS.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly if FLAGS.qps: qps = float(FLAGS.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if FLAGS.time: settings.min_duration_ms = FLAGS.time * MILLI_SEC settings.max_duration_ms = 0 qps = FLAGS.qps or 100 settings.min_query_count = qps * FLAGS.time settings.max_query_count = 0 else: settings.min_query_count = 270336 settings.max_query_count = 0 target_latency_ns = int(float(FLAGS.max_latency) * NANO_SEC) settings.single_stream_expected_latency_ns = target_latency_ns settings.multi_stream_target_latency_ns = target_latency_ns settings.server_target_latency_ns = target_latency_ns log_settings = lg.LogSettings() log_settings.log_output.outdir = tempfile.mkdtemp() log_settings.log_output.copy_detail_to_stdout = True log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False def load_query_samples(sample_list): """Load query samples and warmup the model.""" ds.load_query_samples(sample_list) data = ds.get_image_list_inmemory() def init_fn(cloud_tpu_id): tf.logging.info("Load model for %dth cloud tpu", cloud_tpu_id) runner.models[cloud_tpu_id].load( FLAGS.model, FLAGS.output_model_dir, data, params, batch_size=FLAGS.batch_size, master=masters[cloud_tpu_id], scenario=FLAGS.scenario, batch_timeout_micros=FLAGS.batch_timeout_micros) # Init TPU. for it in range(FLAGS.init_iterations): tf.logging.info("Initialize cloud tpu at iteration %d", it) for batch_size in FLAGS.batch_size: example, _ = ds.get_indices([sample_list[0]] * batch_size) _ = runner.models[cloud_tpu_id].predict(example) threads = [] for i in range(len(tpu_names)): thread = threading.Thread(target=init_fn, args=(i, )) threads.append(thread) thread.start() for thread in threads: thread.join() sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 350), load_query_samples, ds.unload_query_samples) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) tf.io.gfile.mkdir(FLAGS.outdir) for oldfile in tf.gfile.Glob( os.path.join(log_settings.log_output.outdir, "*")): basename = os.path.basename(oldfile) newfile = os.path.join(FLAGS.outdir, basename) tf.gfile.Copy(oldfile, newfile, overwrite=True) if FLAGS.accuracy: with tf.gfile.Open(os.path.join(FLAGS.outdir, "results.txt"), "w") as f: results = {"mAP": accuracy_coco.main()} json.dump(results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=args.count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = args.count if args.count else ds.get_item_count() runner = Runner(model, ds, args.threads, post_proc=post_proc) # # warmup # log.info("warmup ...") ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) if args.accuracy: # # accuracy pass # log.info("starting accuracy pass on {} items".format(count)) runner.start_pool(nolg=True) result_dict = { "good": 0, "total": 0, "scenario": "Accuracy", "timing": [] } runner.start_run(result_dict, True) start = time.time() for idx in range(0, count): ds.load_query_samples([idx]) data, label = ds.get_samples([idx]) runner.enqueue([idx], [idx], data, label) runner.finish() # aggregate results post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) last_timeing = result_dict["timing"] del result_dict["timing"] add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start) # # run the benchmark with timing # runner.start_pool() def issue_query(query_samples): idx = [q.index for q in query_samples] query_id = [q.id for q in query_samples] data, label = ds.get_samples(idx) runner.enqueue(query_id, idx, data, label) def process_latencies(latencies_ns): global last_timeing last_timeing = [t / 1e9 for t in latencies_ns] sut = lg.ConstructSUT(issue_query, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) for scenario in args.scenario: for target_latency in args.max_latency: log.info("starting {}, latency={}".format(scenario, target_latency)) settings = lg.TestSettings() log.info(scenario) if str(scenario) == 'TestMode.AccuracyOnly': settings.mode = scenario else: settings.scenario = scenario if args.qps: settings.enable_spec_overrides = True qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if args.time: settings.enable_spec_overrides = True settings.override_min_duration_ms = args.time * MILLI_SEC settings.override_max_duration_ms = args.time * MILLI_SEC qps = args.qps or 100 settings.override_min_query_count = qps * args.time settings.override_max_query_count = qps * args.time if args.time or args.qps and str( scenario) != 'TestMode.AccuracyOnly': settings.mode = lg.TestMode.PerformanceOnly # FIXME: add SubmissionRun once available settings.enable_spec_overrides = True settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC) settings.override_target_latency_ns = int(target_latency * NANO_SEC) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, False) lg.StartTest(sut, qsl, settings) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def __init__(self, config_toml, checkpoint_path, dataset_dir, manifest_filepath, perf_count, total_query_count, scenario, machine_conf, batch_size=1, cores_for_loadgen=0, cores_per_instance=1, enable_debug=False, cosim=False, profile=False, ipex=False, bf16=False, warmup=False): ### multi instance attributes self.batch_size = batch_size self.cores_for_loadgen = cores_for_loadgen self.cores_per_instance = cores_per_instance self.num_cores = get_num_cores() self.lock = mp.Lock() self.init_counter = mp.Value("i", 0) self.output_queue = mp.Queue() self.input_queue = mp.JoinableQueue() self.cosim = cosim self.ipex = ipex self.bf16 = bf16 self.warmup = warmup self.scenario = scenario #server-specific self.num_queues = None self.core_count_list = [] self.num_instance_list = [] self.seq_cutoff_list = [] self.batch_size_list = [] self.input_queue_list = [] self.total_query_count = total_query_count if self.scenario == "Server": # read config self.read_machine_conf(machine_conf) # create queue list for _ in range(self.num_queues): self.input_queue_list.append(mp.JoinableQueue()) config = toml.load(config_toml) dataset_vocab = config['labels']['labels'] rnnt_vocab = add_blank_label(dataset_vocab) featurizer_config = config['input_eval'] self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies) self.qsl = AudioQSLInMemory(dataset_dir, manifest_filepath, dataset_vocab, featurizer_config["sample_rate"], perf_count) if self.scenario == "Offline": self.issue_queue = InQueue(self.input_queue, batch_size) elif self.scenario == "Server": self.issue_queue = InQueueServer(self.input_queue_list, self.qsl, self.seq_cutoff_list, self.batch_size_list, self.total_query_count) ### worker process self.consumers = [] cur_core_idx = self.cores_for_loadgen rank = 0 if self.scenario == "Offline": while cur_core_idx + self.cores_per_instance <= self.num_cores: self.consumers.append( Consumer(self.input_queue, self.output_queue, self.lock, self.init_counter, rank, cur_core_idx, cur_core_idx + self.cores_per_instance - 1, self.num_cores, self.qsl, config_toml, checkpoint_path, dataset_dir, manifest_filepath, perf_count, cosim, profile, ipex, bf16, warmup)) rank += 1 cur_core_idx += self.cores_per_instance elif self.scenario == "Server": for i in range(self.num_queues): curr_cores_per_instance = self.core_count_list[i] for _ in range(self.num_instance_list[i]): self.consumers.append( Consumer(self.input_queue_list[i], self.output_queue, self.lock, self.init_counter, rank, cur_core_idx, cur_core_idx + curr_cores_per_instance - 1, self.num_cores, self.qsl, config_toml, checkpoint_path, dataset_dir, manifest_filepath, perf_count, cosim, profile, ipex, bf16, warmup)) rank += 1 cur_core_idx += curr_cores_per_instance self.num_instances = len(self.consumers) ### start worker process for c in self.consumers: c.start() ### wait until all sub processes are ready block_until(self.init_counter, self.num_instances, 2) ### start response thread self.response_worker = threading.Thread(target=response_loadgen, args=(self.output_queue, )) self.response_worker.daemon = True self.response_worker.start() ### debug global debug debug = enable_debug
def main(argv): del argv settings = mlperf_loadgen.TestSettings() settings.qsl_rng_seed = FLAGS.qsl_rng_seed settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed settings.schedule_rng_seed = FLAGS.schedule_rng_seed if FLAGS.accuracy_mode: settings.mode = mlperf_loadgen.TestMode.AccuracyOnly else: settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.scenario = SCENARIO_MAP[FLAGS.scenario] if FLAGS.qps: qps = float(FLAGS.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if FLAGS.scenario == "Offline" or FLAGS.scenario == "Server": masters = FLAGS.master masters = masters.split(",") if len(masters) < 1: masters = [FLAGS.master] runner = loadgen_gnmt.GNMTRunner(input_file=FLAGS.input_file, ckpt_path=FLAGS.ckpt_path, hparams_path=FLAGS.hparams_path, vocab_prefix=FLAGS.vocab_prefix, outdir=FLAGS.outdir, batch_size=FLAGS.batch_size, verbose=FLAGS.verbose, masters=masters, scenario=FLAGS.scenario) runner.load(FLAGS.batch_timeout_micros) # Specify exactly how many queries need to be made settings.min_query_count = FLAGS.qps * FLAGS.time settings.max_query_count = 0 settings.min_duration_ms = 60 * MILLI_SEC settings.max_duration_ms = 0 settings.server_target_latency_ns = int(0.25 * NANO_SEC) settings.server_target_latency_percentile = 0.97 else: print("Invalid scenario selected") assert False # Create a thread in the GNMTRunner to start accepting work runner.start_worker() # Maximum sample ID + 1 total_queries = FLAGS.query_count # Select the same subset of $perf_queries samples perf_queries = FLAGS.query_count sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries, generic_loadgen.process_latencies) qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries, runner.load_samples_to_ram, runner.unload_samples_from_ram) log_settings = mlperf_loadgen.LogSettings() log_settings.log_output.outdir = tempfile.mkdtemp() # Disable detail logs to prevent it from stepping on the summary # log in stdout on some systems. log_settings.log_output.copy_detail_to_stdout = False log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False mlperf_loadgen.StartTestWithLogSettings(sut, qsl, settings, log_settings) runner.finish() mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut) for oldfile in tf.gfile.Glob( os.path.join(log_settings.log_output.outdir, "*")): basename = os.path.basename(oldfile) newfile = os.path.join(FLAGS.outdir, basename) tf.gfile.Copy(oldfile, newfile, overwrite=True) if FLAGS.accuracy_mode: log_accuracy = os.path.join(log_settings.log_output.outdir, "mlperf_log_accuracy.json") tf.gfile.Copy(FLAGS.reference, "/tmp/reference") bleu = process_accuracy.get_accuracy("/tmp/reference", log_accuracy) print("BLEU: %.2f" % (bleu * 100)) # pylint: disable=superfluous-parens