def __init__(self, mxnet_vocab=None, perf_count=None, logger=None):
        self.logger = logger
        if self.logger:
            self.logger.info("Constructing QSL...")
        test_batch_size = 1
        eval_features = []

        if self.logger:
            self.logger.info("Creating tokenizer...")
        with open(mxnet_vocab, 'r') as f:
            vocab = nlp.vocab.BERTVocab.from_json(f.read())
        tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=True)

        round_to = None
        if self.logger:
            self.logger.info("Reading examples...")
        dev_path = os.path.join(os.getcwd(), 'build/data')
        dev_data = SQuAD('dev', version='1.1', root=dev_path)
        dev_data_transform = preprocess_dataset(
            tokenizer,
            dev_data,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride,
            max_query_length=max_query_length,
            input_features=True)

        self.eval_features = dev_data_transform
        self.count = len(self.eval_features)
        self.perf_count = perf_count if perf_count is not None else self.count
        self.qsl = lg.ConstructQSL(self.count, self.perf_count,
                                   self.load_query_samples,
                                   self.unload_query_samples)
        if self.logger:
            self.logger.info("Finished constructing QSL.")
def benchmark_using_loadgen(scenario_str, mode_str, samples_in_mem,
                            config_filepath):
    "Perform the benchmark using python API for the LoadGen librar"

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[scenario_str]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[mode_str]

    ts = lg.TestSettings()
    if (config_filepath):
        ts.FromConfig(config_filepath, 'random_model_name', scenario_str)
    ts.scenario = scenario
    ts.mode = mode

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(dataset_size, samples_in_mem, load_query_samples,
                          unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#3
0
    def __init__(self, perf_count=None):
        print("Creating tokenizer...")
        tokenizer = BertTokenizer("build/data/bert_tf_v1_1_large_fp32_384_v2/vocab.txt")

        print("Reading examples...")
        eval_examples = read_squad_examples(input_file="build/data/dev-v1.1.json",
            is_training=False, version_2_with_negative=False)

        print("Converting examples to features...")
        eval_features = []
        def append_feature(feature):
            eval_features.append(feature)

        convert_examples_to_features(
            examples=eval_examples,
            tokenizer=tokenizer,
            max_seq_length=max_seq_length,
            doc_stride=doc_stride,
            max_query_length=max_query_length,
            is_training=False,
            output_fn=append_feature,
            verbose_logging=False)

        print("Constructing QSL...")
        self.eval_features = eval_features
        self.count = len(self.eval_features)
        self.perf_count = perf_count if perf_count is not None else self.count
        self.qsl = lg.ConstructQSL(self.count, self.perf_count, self.load_query_samples, self.unload_query_samples)
        print("Finished constructing QSL.")
示例#4
0
    def __init__(self, preprocessed_data_dir, perf_count):
        """
        Constructs all the necessary attributes for QSL

        Parameters
        ----------
            preprocessed_data_dir: str or PosixPath
                path to directory containing preprocessed data
            perf_count: int
                number of query samples guaranteed to fit in memory
        """
        print("Constructing QSL...")
        self.preprocessed_data_dir = preprocessed_data_dir
        with open(Path(self.preprocessed_data_dir, "preprocessed_files.pkl"),
                  "rb") as f:
            self.preprocess_files = pickle.load(f)['file_list']

        self.count = len(self.preprocess_files)
        self.perf_count = perf_count if perf_count is not None else self.count
        print("Found {:d} preprocessed files".format(self.count))
        print("Using performance count = {:d}".format(self.perf_count))

        self.loaded_files = {}
        self.qsl = lg.ConstructQSL(self.count, self.perf_count,
                                   self.load_query_samples,
                                   self.unload_query_samples)
        print("Finished constructing QSL.")
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode = mode

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE,
                          load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#6
0
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    global model

    # Load the [cached] Torch model
    torchvision_version = ''  # master by default
    try:
        import torchvision
        torchvision_version = ':v' + torchvision.__version__
    except Exception:
        pass

    model = torch.hub.load('pytorch/vision' + torchvision_version,
                           MODEL_NAME,
                           pretrained=True)
    model.eval()

    # move the model to GPU for speed if available
    if USE_CUDA:
        model.to('cuda')

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode = mode

    if LOADGEN_MULTISTREAMNESS:
        ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)

    if LOADGEN_COUNT_OVERRIDE:
        ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
        ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE,
                          load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#7
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

    sut = mlperf_loadgen.ConstructSUT(
        issue_query, flush_queries, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(
        1024 * 1024, 1024, load_samples_to_ram, unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#8
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.Offline
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.offline_expected_qps = 1000

    sut = mlperf_loadgen.ConstructSUT(
        issue_query, flush_queries, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(
        1024, 128, load_samples_to_ram, unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#9
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.MultiStream
    settings.mode = mlperf_loadgen.TestMode.SubmissionRun
    settings.samples_per_query = 4
    settings.target_qps = 1000
    settings.target_latency_ns = 1000000000

    sut = mlperf_loadgen.ConstructSUT(issue_query)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#10
0
    def __init__(self, preprocessed_data_dir, perf_count):
        print("Constructing QSL...")
        self.preprocessed_data_dir = preprocessed_data_dir
        with open(os.path.join(self.preprocessed_data_dir, "preprocessed_files.pkl"), "rb") as f:
            self.preprocess_files = pickle.load(f)

        self.count = len(self.preprocess_files)
        self.perf_count = perf_count if perf_count is not None else self.count
        print("Found {:d} preprocessed files".format(self.count))
        print("Using performance count = {:d}".format(self.perf_count))

        self.loaded_files = {}
        self.qsl = lg.ConstructQSL(self.count, self.perf_count, self.load_query_samples, self.unload_query_samples)
        print("Finished constructing QSL.")
示例#11
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.AccuracyOnly
    settings.single_stream_expected_latency_ns = 1000000
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.single_stream_expected_latency_ns = 1000000
    settings.enable_spec_overrides = True
    settings.override_target_latency_ns = 100000000
    settings.override_min_query_count = 100
    settings.override_min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#13
0
def main(argv):
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.Server
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.server_target_qps = 100
    settings.server_target_latency_ns = 100000000
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#14
0
    def __init__(self, perf_count=None, cache_path='eval_features.pickle'):
        print("Constructing QSL...")
        eval_features = []
        # Load features if cached, convert from examples otherwise.
        if os.path.exists(cache_path):
            print("Loading cached features from '%s'..." % cache_path)
            with open(cache_path, 'rb') as cache_file:
                eval_features = pickle.load(cache_file)
        else:
            print("No cached features at '%s'... converting from examples..." %
                  cache_path)

            print("Creating tokenizer...")
            tokenizer = BertTokenizer(
                "build/data/bert_tf_v1_1_large_fp32_384_v2/vocab.txt")

            print("Reading examples...")
            eval_examples = read_squad_examples(
                input_file="build/data/dev-v1.1.json",
                is_training=False,
                version_2_with_negative=False)

            print("Converting examples to features...")

            def append_feature(feature):
                eval_features.append(feature)

            convert_examples_to_features(examples=eval_examples,
                                         tokenizer=tokenizer,
                                         max_seq_length=max_seq_length,
                                         doc_stride=doc_stride,
                                         max_query_length=max_query_length,
                                         is_training=False,
                                         output_fn=append_feature,
                                         verbose_logging=False)

            print("Caching features at '%s'..." % cache_path)
            with open(cache_path, 'wb') as cache_file:
                pickle.dump(eval_features, cache_file)

        self.eval_features = eval_features
        self.count = len(self.eval_features)
        self.perf_count = perf_count if perf_count is not None else self.count
        self.qsl = lg.ConstructQSL(self.count, self.perf_count,
                                   self.load_query_samples,
                                   self.unload_query_samples)
        print("Finished constructing QSL.")
示例#15
0
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    global num_classes
    global model_output_volume

    pycuda_context, max_batch_size, input_volume, model_output_volume, num_layers = initialize_predictor(
    )
    num_classes = len(class_labels)

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode = mode

    if LOADGEN_MULTISTREAMNESS:
        ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)

    if LOADGEN_COUNT_OVERRIDE:
        ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
        ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE,
                          load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    pycuda_context.pop()
示例#16
0
文件: QSL.py 项目: xz10620/inference
 def __init__(self, dataset_dir, manifest_filepath, labels,
              sample_rate=16000, perf_count=None):
     m_paths = [manifest_filepath]
     self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels),
                              normalize=True, max_duration=15.0)
     self.sample_rate = sample_rate
     self.count = len(self.manifest)
     perf_count = self.count if perf_count is None else perf_count
     self.sample_id_to_sample = {}
     self.qsl = lg.ConstructQSL(self.count, perf_count,
                                self.load_query_samples,
                                self.unload_query_samples)
     print(
         "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format(
             self.manifest.duration / 3600,
             self.manifest.filtered_duration / 3600,
             self.count))
示例#17
0
def main(argv):
    del argv
    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.MultiStreamFree
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.multi_stream_target_latency_ns = 100000000
    settings.multi_stream_samples_per_query = 4
    settings.multi_stream_max_async_queries = 2
    settings.min_query_count = 100
    settings.min_duration_ms = 10000

    sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram,
                                      unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
示例#18
0
    def __init__(self, session, ds, optimization_config, onnx_output_names):

        self.session = session
        self.threads = optimization_config.threads_num
        self.max_batchsize = optimization_config.dynamic_batching_size
        self.ds = ds
        self.onnx_output_names = onnx_output_names
        self.guess = None

        self.cv = threading.Condition()
        self.done = False
        self.q_idx = []
        self.q_query_id = []
        self.workers = []

        self.settings = lg.TestSettings()
        self.settings.scenario = lg.TestScenario.Server
        self.settings.mode = lg.TestMode.FindPeakPerformance

        log_output_settings = lg.LogOutputSettings()
        log_output_settings.outdir = optimization_config.result_path
        log_output_settings.copy_summary_to_stdout = False
        self.log_settings = lg.LogSettings()
        self.log_settings.enable_trace = False
        self.log_settings.log_output = log_output_settings

        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
        self.qsl = lg.ConstructQSL(QUERY_COUNT, QUERY_COUNT, ds.load_query_samples, ds.unload_query_samples)

        self.settings.server_coalesce_queries = True
        self.settings.server_target_latency_ns = int(optimization_config.max_latency_ms * NANO_SEC / MILLI_SEC)
        self.settings.server_target_latency_percentile = optimization_config.max_latency_percentile
        self.settings.min_duration_ms = optimization_config.min_duration_sec * MILLI_SEC

        # start all threads
        for _ in range(self.threads):
            worker = threading.Thread(target=self.handle_tasks, args=(self.cv,))
            worker.daemon = True
            self.workers.append(worker)
            worker.start()
        time.sleep(1)
示例#19
0
    def start(self):
        """Starts the load test."""
        settings = self.get_test_settings()

        log_settings = lg.LogSettings()
        log_settings.log_output.outdir = tempfile.mkdtemp()
        log_settings.log_output.copy_detail_to_stdout = True
        log_settings.log_output.copy_summary_to_stdout = True
        log_settings.enable_trace = False

        logging.info("Constructing SUT.")
        sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
                              self.process_metrics)
        logging.info("Constructing QSL.")
        qsl = lg.ConstructQSL(self.total_sample_count,
                              self.performance_sample_count, self.load_samples,
                              self.unload_samples)
        logging.info("Starting test.")
        lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)
示例#20
0
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    global pycuda_context
    initialize_predictor()

    scenario = {
        'SingleStream': lg.TestScenario.SingleStream,
        'MultiStream': lg.TestScenario.MultiStream,
        'Server': lg.TestScenario.Server,
        'Offline': lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly': lg.TestMode.AccuracyOnly,
        'PerformanceOnly': lg.TestMode.PerformanceOnly,
        'SubmissionRun': lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    if LOADGEN_CONF_FILE:
        ts.FromConfig(LOADGEN_CONF_FILE, 'random_model_name', LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode = mode

    if LOADGEN_MULTISTREAMNESS:
        ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE,
                          load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False
    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    pycuda_context.pop()
示例#21
0
文件: main.py 项目: tjyhlt/inference
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count = args.count
    if not count:
        if not args.accuracy:
            count = 200

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model,
                                      ds,
                                      args.threads,
                                      post_proc=post_proc,
                                      max_batchsize=args.max_batchsize)

        def issue_queries(query_samples):
            runner.enqueue(query_samples)

        def flush_queries():
            pass

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / NANO_SEC for t in latencies_ns]

        settings = lg.TestSettings()
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        if args.accuracy:
            settings.mode = lg.TestMode.AccuracyOnly

        if args.time:
            # override the time we want to run
            settings.min_duration_ms = args.time * MILLI_SEC
            settings.max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        if scenario == lg.TestScenario.SingleStream:
            settings.min_query_count = args.queries_single
            settings.max_query_count = args.queries_single
        elif scenario == lg.TestScenario.MultiStream:
            settings.min_query_count = args.queries_multi
            settings.max_query_count = args.queries_multi
            settings.multi_stream_samples_per_query = 4
        elif scenario == lg.TestScenario.Server:
            max_latency = args.max_latency
        elif scenario == lg.TestScenario.Offline:
            settings.min_query_count = args.queries_offline
            settings.max_query_count = args.queries_offline

        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                              ds.unload_query_samples)

        if scenario == lg.TestScenario.Server:
            for target_latency in max_latency:
                log.info("starting {}, latency={}".format(
                    scenario, target_latency))
                settings.server_target_latency_ns = int(target_latency *
                                                        NANO_SEC)

                result_dict = {
                    "good": 0,
                    "total": 0,
                    "scenario": str(scenario)
                }
                runner.start_run(result_dict, args.accuracy)
                lg.StartTest(sut, qsl, settings)

                if not last_timeing:
                    last_timeing = runner.result_timing
                if args.accuracy:
                    post_proc.finalize(result_dict,
                                       ds,
                                       output_dir=os.path.dirname(args.output))
                add_results(final_results,
                            "{}-{}".format(scenario, target_latency),
                            result_dict, last_timeing,
                            time.time() - ds.last_loaded, args.accuracy)
        else:
            log.info("starting {}".format(scenario))
            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, args.accuracy)
            lg.StartTest(sut, qsl, settings)

            if not last_timeing:
                last_timeing = runner.result_timing
            if args.accuracy:
                post_proc.finalize(result_dict,
                                   ds,
                                   output_dir=os.path.dirname(args.output))
            add_results(final_results, "{}".format(scenario), result_dict,
                        last_timeing,
                        time.time() - ds.last_loaded, args.accuracy)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#22
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        #settings.max_duration_ms = args.time * MILLI_SEC

    if count_override:
        settings.min_query_count = count


#        settings.max_query_count = count

    if args.min_query_count:
        settings.min_query_count = args.min_query_count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query

    if args.max_latency:
        settings.single_stream_expected_latency_ns = int(args.max_latency *
                                                         NANO_SEC)
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    def set_qps(current_qps):
        settings.server_target_qps = current_qps
        settings.offline_expected_qps = current_qps
        settings.multi_stream_target_qps = current_qps
        return current_qps

    if args.qps:
        qps = set_qps(args.qps)

    lower_qps = -1
    upper_qps = -1
    qps_passed = {}
    while True:
        print("schedual qps:", qps)
        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples,
                              ds.unload_query_samples)

        log.info("starting {}".format(scenario))
        result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
        runner.start_run(result_dict, args.accuracy)
        print("max query count:", settings.max_query_count)
        lg.StartTest(sut, qsl, settings)

        if not last_timeing:
            last_timeing = runner.result_timing
        if args.accuracy:
            post_proc.finalize(result_dict, ds, output_dir=args.output)
        took = time.time() - ds.last_loaded
        add_results(final_results, "{}".format(scenario), result_dict,
                    last_timeing, took, args.accuracy)

        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

        #
        # write final results
        #
        if args.output:
            with open("results.json", "w") as f:
                json.dump(final_results, f, sort_keys=True, indent=4)

        if args.scenario != 'Server':
            break

        if args.auto_qps is False:
            break

        if lower_qps == -1 or upper_qps == -1:
            base_qps = len(last_timeing) / took
            upper_qps = base_qps * 1.5
            lower_qps = base_qps * 0.5
            qps = set_qps(lower_qps)
            continue

        latency_percentile_ns = np.percentile(
            last_timeing,
            settings.server_target_latency_percentile * 100) * NANO_SEC
        if latency_percentile_ns < settings.server_target_latency_ns and qps > upper_qps * 0.98:
            print("target qps:", qps)
            break

        if upper_qps - lower_qps < 1 and lower_qps in qps_passed:
            print("target qps:", lower_qps)
            break

        if latency_percentile_ns > settings.server_target_latency_ns:
            #reduce qps
            print("reduce qps, bound:[%d, %d]" % (lower_qps, upper_qps))
            upper_qps = qps
            if qps == lower_qps:
                lower_qps = lower_qps * 0.5
                qps = set_qps(lower_qps)
                continue
            if qps > lower_qps:
                qps = set_qps((lower_qps + upper_qps) / 2)
                continue

        if latency_percentile_ns < settings.server_target_latency_ns:
            #increase qps
            qps_passed[qps] = None
            print("increase qps, bound:[%d, %d]" % (lower_qps, upper_qps))
            lower_qps = qps
            if qps == upper_qps:
                upper_qps = upper_qps * 1.5
                qps = set_qps(upper_qps)
                continue
            if qps < upper_qps:
                qps = set_qps((lower_qps + upper_qps) / 2)
                continue

        runner.finish()
示例#23
0
def main():
    global num_sockets
    global start_time
    global item_total
    global last_timeing

    args = get_args()
    log.info(args)
    config = os.path.abspath(args.config)
    user_config = os.path.abspath(args.user_config)

    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if not os.path.exists(user_config):
        log.error("{} not found".format(user_config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    total_samples = multiprocessing.Value("i", 0)
    dsQueue = multiprocessing.Queue()
    outQueue = multiprocessing.Queue()
    inQueue = multiprocessing.JoinableQueue(num_sockets * 4)
    consumers = [
        Consumer(inQueue, outQueue, dsQueue, lock, init_counter, total_samples,
                 i, args) for i in range(num_sockets)
    ]
    for c in consumers:
        c.start()

    # Wait until subprocess ready
    while init_counter.value < num_sockets:
        time.sleep(2)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(outQueue, args.accuracy))
    response_worker.daemon = True
    response_worker.start()

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](inQueue, max_batchsize=args.max_batchsize)

    def issue_queries(response_ids, query_sample_indexes):
        runner.enqueue(response_ids, query_sample_indexes)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model, args.scenario)
    settings.FromConfig(user_config, args.model, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
        settings.performance_sample_count_override = total_samples.value

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    def load_query_samples(sample_list):
        # Wait until subprocess ready
        global start_time
        for _ in range(num_sockets):
            dsQueue.put(sample_list)
        while init_counter.value < 2 * num_sockets:
            time.sleep(2)
        start_time = time.time()

    def unload_query_samples(sample_list):
        pass

    import torch
    import criteo

    sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(
        total_samples.value,
        min(total_samples.value, args.samples_per_query_offline),
        load_query_samples, unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {
        "good": 0,
        "total": 0,
        "roc_auc": 0,
        "scenario": str(scenario)
    }

    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = item_timing
    if args.accuracy:
        result_dict["good"] = item_good
        result_dict["total"] = item_total
        result_dict["roc_auc"] = criteo.auc_score(item_results)

    final_results = {
        "runtime": "pytorch-native-dlrm",
        "version": torch.__version__,
        "time": int(time.time()),
        "cmdline": str(args),
    }

    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - start_time, args.accuracy)

    inQueue.join()
    for _ in consumers:
        inQueue.put(None)
    for c in consumers:
        c.join()
    outQueue.put(None)

    lg.DestroyQSL(qsl)
    lg.DestroyFastSUT(sut)

    # write final results
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
def main():
    global num_ins
    global num_cpus
    global in_queue_cnt
    global out_queue_cnt
    global batching
    global queries_so_far
    global Latencies

    queries_so_far = 0

    args = get_args()
    log.info(args)
    scenario = args.scenario
    accuracy_mode = args.accuracy
    perf_count = args.perf_count
    batch_size = args.batch_size
    num_ins = args.num_instance
    num_cpus = args.num_phy_cpus
    batching = args.batching

    # Read Loadgen and workload config parameters
    settings = lg.TestSettings()
    settings.scenario = scenario_map[scenario]
    settings.FromConfig(args.mlperf_conf, "bert", scenario)
    settings.FromConfig(args.user_conf, "bert", scenario)
    settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly

    # Establish communication queues
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    calibrate_counter = multiprocessing.Value("i", 0)
    out_queue = multiprocessing.Queue()

    # Create consumers
    consumers = []
    if scenario == "Server":
        from parse_server_config import configParser

        buckets = configParser("machine_conf.json")
        cutoffs = list(buckets.keys())
        batch_sizes = {}

        in_queue = {j: multiprocessing.JoinableQueue() for j in buckets}
        proc_idx = 0
        num_cpus = 0
        total_ins = 0
        for cutoff in list(buckets.keys()):
            batch_sizes[cutoff] = buckets[cutoff]["batch_size"]
            num_ins = buckets[cutoff]["instances"]
            cpus_per_instance = buckets[cutoff]["cpus_per_instance"]
            num_cpus = num_ins * cpus_per_instance
            total_ins += num_ins

            for j in range(num_ins):
                consumer = Consumer(in_queue[cutoff], out_queue, lock,
                                    init_counter, calibrate_counter, proc_idx,
                                    num_ins, args, cutoff)
                consumer.start_core_idx = proc_idx
                consumer.end_core_idx = proc_idx + cpus_per_instance - 1
                consumers.append(consumer)
                proc_idx = consumer.end_core_idx + 1

        num_ins = total_ins

    else:
        total_ins = num_ins
        in_queue = MultiprocessShapeBasedQueue()
        consumers = [
            Consumer(in_queue, out_queue, lock, init_counter,
                     calibrate_counter, i, num_ins, args)
            for i in range(num_ins)
        ]

    for c in consumers:
        c.start()

    # Dataset object used by constructQSL
    data_set = BERTDataSet(args.vocab, args.perf_count)
    if scenario == "Server":
        issue_queue = InQueueServer(in_queue, batch_sizes, data_set,
                                    settings.min_query_count)
    else:
        issue_queue = InQueue(in_queue, batch_size, data_set)

    # Wait until all sub-processors are ready
    block_until(init_counter, total_ins, 2)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(out_queue, ))
    response_worker.daemon = True
    response_worker.start()

    def issue_queries(query_samples):
        # It's called by loadgen to send query to SUT
        issue_queue.put(query_samples)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(data_set.count, data_set.perf_count,
                          load_query_samples, unload_query_samples)

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    if scenario == "Server":
        for i in in_queue:
            in_queue[i].join()
            for j in range(buckets[i]["cpus_per_instance"]):
                in_queue[i].put(None)
    else:
        for i in range(num_ins):
            in_queue.put(None)

    for c in consumers:
        c.join()
    out_queue.put(None)

    if accuracy_mode:
        cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format(
            log_path)
        subprocess.check_call(cmd, shell=True)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
示例#25
0
def main():

    global so
    global last_timeing
    global last_loaded
    global result_timeing

    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True
    """
    Python signature
    go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize)
    """

    count, err = go_initialize(backend, args.model_path, args.dataset_path,
                               count, args.use_gpu, args.gpu_id,
                               args.trace_level, args.max_batchsize)

    if (err != 'nil'):
        print(err)
        raise RuntimeError('initialization in go failed')

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    log_dir = None

    if args.log_dir:
        log_dir = os.path.abspath(args.log_dir)
        os.makedirs(log_dir, exist_ok=True)

    scenario = SCENARIO_MAP[args.scenario]

    def issue_queries(query_samples):
        global so
        global last_timeing
        global result_timeing
        idx = np.array([q.index for q in query_samples]).astype(np.int32)
        query_id = [q.id for q in query_samples]
        if args.dataset == 'brats2019':
            start = time.time()
            response_array_refs = []
            response = []
            for i, qid in enumerate(query_id):
                processed_results = so.IssueQuery(1, idx[i][np.newaxis])
                processed_results = json.loads(
                    processed_results.decode('utf-8'))
                response_array = array.array(
                    "B",
                    np.array(processed_results[0], np.float16).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            result_timeing.append(time.time() - start)
            lg.QuerySamplesComplete(response)
        else:
            start = time.time()
            processed_results = so.IssueQuery(len(idx), idx)
            result_timeing.append(time.time() - start)
            processed_results = json.loads(processed_results.decode('utf-8'))
            response_array_refs = []
            response = []
            for idx, qid in enumerate(query_id):
                response_array = array.array(
                    "B",
                    np.array(processed_results[idx], np.float32).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            lg.QuerySamplesComplete(response)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    def load_query_samples(sample_list):
        global so
        global last_loaded
        err = go_load_query_samples(sample_list, so)
        last_loaded = time.time()
        if (err != ''):
            print(err)
            raise RuntimeError('load query samples failed')

    def unload_query_samples(sample_list):
        global so
        err = go_unload_query_samples(sample_list, so)
        if (err != ''):
            print(err)
            raise RuntimeError('unload query samples failed')

    settings = lg.TestSettings()
    if args.model_name != "":
        settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
        settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples,
                          unload_query_samples)

    log.info("starting {}".format(scenario))

    log_path = os.path.realpath(args.log_dir)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings
    # log_settings.enable_trace = True
    # lg.StartTest(sut, qsl, settings)
    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = result_timeing

    if args.accuracy:
        accuracy_script_paths = {
            'coco':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-coco.py'
            ),
            'imagenet':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py'
            ),
            'squad':
            os.path.realpath('../inference/language/bert/accuracy-squad.py'),
            'brats2019':
            os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py'
            ),
        }
        accuracy_script_path = accuracy_script_paths[args.dataset]
        accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json')
        data_dir = os.environ['DATA_DIR']
        if args.dataset == 'coco':
            if args.use_inv_map:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map'
                    .format(accuracy_script_path, accuracy_file_path,
                            data_dir),
                    shell=True)
            else:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {}'.
                    format(accuracy_script_path, accuracy_file_path, data_dir),
                    shell=True)
        elif args.dataset == 'imagenet':  # imagenet
            subprocess.check_call(
                'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'.
                format(accuracy_script_path, accuracy_file_path,
                       os.path.join(data_dir, 'val_map.txt')),
                shell=True)
        elif args.dataset == 'squad':  # squad
            vocab_path = os.path.join(data_dir, 'vocab.txt')
            val_path = os.path.join(data_dir, 'dev-v1.1.json')
            out_path = os.path.join(log_dir, 'predictions.json')
            cache_path = os.path.join(data_dir, 'eval_features.pickle')
            subprocess.check_call(
                'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}'
                .format(accuracy_script_path, vocab_path, val_path,
                        accuracy_file_path, out_path, cache_path, count),
                shell=True)
        elif args.dataset == 'brats2019':  # brats2019
            base_dir = os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/build')
            post_dir = os.path.join(base_dir, 'postprocessed_data')
            label_dir = os.path.join(
                base_dir,
                'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr')
            os.makedirs(post_dir, exist_ok=True)
            subprocess.check_call(
                'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}'
                .format(accuracy_script_path, accuracy_file_path, data_dir,
                        post_dir, label_dir),
                shell=True)
        else:
            raise RuntimeError('Dataset not Implemented.')

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    """
    Python signature
    go_finalize(so)
    """
    err = go_finalize(so)
    if (err != ''):
        print(err)
        raise RuntimeError('finialize in go failed')
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    warmup_queries = range(args.max_batchsize)
    ds.load_query_samples(warmup_queries)
    for _ in range(2):
        img, _ = ds.get_samples(warmup_queries)
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    # override target latency when it needs to be less than 1ms
    if args.model_name == "mobilenet":
        settings.single_stream_expected_latency_ns =  200000
    elif args.model_name == "resnet50":
        settings.single_stream_expected_latency_ns =  900000
    elif args.model_name == "ssd-mobilenet":
        settings.single_stream_expected_latency_ns = 1000000

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    #qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples)
    qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    if args.enable_trace:
        lg.StartTest(sut, qsl, settings)
    else:
        logsettings = lg.LogSettings()
        logsettings.enable_trace = False
        lg.StartTestWithLogSettings(sut, qsl, settings, logsettings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#27
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend, args.dataset, args.max_ind_range, args.data_sub_sample_rate, args.use_gpu)

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]

    # --count-samples can be used to limit the number of samples used for testing
    ds = wanted_dataset(data_path=args.dataset_path,
                        name=args.dataset,
                        pre_process=pre_proc,  # currently an identity function
                        use_cache=args.cache,  # currently not used
                        count=args.count_samples,
                        samples_to_aggregate_fix=args.samples_to_aggregate_fix,
                        samples_to_aggregate_min=args.samples_to_aggregate_min,
                        samples_to_aggregate_max=args.samples_to_aggregate_max,
                        samples_to_aggregate_quantile_file=args.samples_to_aggregate_quantile_file,
                        samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file,
                        test_num_workers=args.test_num_workers,
                        max_ind_range=args.max_ind_range,
                        sub_sample_rate=args.data_sub_sample_rate,
                        mlperf_bin_loader=args.mlperf_bin_loader,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()
    # warmup
    ds.load_query_samples([0])

    for _ in range(5):
        batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0])
        _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i)

    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_path, args.scenario)
    settings.FromConfig(user_conf, args.model_path, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
示例#28
0
def benchmark_using_loadgen():
    "Perform the benchmark using python API for the LoadGen library"

    global funnel_should_be_running, warmup_mode, openme_data

    scenario = {
        'SingleStream':     lg.TestScenario.SingleStream,
        'MultiStream':      lg.TestScenario.MultiStream,
        'Server':           lg.TestScenario.Server,
        'Offline':          lg.TestScenario.Offline,
    }[LOADGEN_SCENARIO]

    mode = {
        'AccuracyOnly':     lg.TestMode.AccuracyOnly,
        'PerformanceOnly':  lg.TestMode.PerformanceOnly,
        'SubmissionRun':    lg.TestMode.SubmissionRun,
    }[LOADGEN_MODE]

    ts = lg.TestSettings()
    if LOADGEN_CONFIG_FILE:
        ts.FromConfig(LOADGEN_CONFIG_FILE, 'random_model_name', LOADGEN_SCENARIO)
    ts.scenario = scenario
    ts.mode     = mode

    if LOADGEN_MULTISTREAMNESS:
        ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS)

    if LOADGEN_MAX_DURATION_S:
        ts.max_duration_ms = int(LOADGEN_MAX_DURATION_S)*1000

    if LOADGEN_COUNT_OVERRIDE:
        ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE)
        ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE)

    if LOADGEN_TARGET_QPS:
        target_qps                  = float(LOADGEN_TARGET_QPS)
        ts.multi_stream_target_qps  = target_qps
        ts.server_target_qps        = target_qps
        ts.offline_expected_qps     = target_qps

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples)

    log_settings = lg.LogSettings()
    log_settings.enable_trace = False

    funnel_thread = threading.Thread(target=send_responses, args=())
    funnel_should_be_running = True
    funnel_thread.start()

    if LOADGEN_WARMUP_SAMPLES:
        warmup_id_range = list(range(LOADGEN_WARMUP_SAMPLES))
        load_query_samples(warmup_id_range)

        warmup_mode = True
        print("Sending out the warm-up samples, waiting for responses...")
        issue_queries([lg.QuerySample(id,id) for id in warmup_id_range])

        while len(in_progress)>0:       # waiting for the in_progress queue to clear up
            time.sleep(1)
        print(" Done!")

        warmup_mode = False

    lg.StartTestWithLogSettings(sut, qsl, ts, log_settings)

    funnel_should_be_running = False    # politely ask the funnel_thread to end
    funnel_thread.join()                # wait for it to actually end

    from_workers.close()
    to_workers.close()

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    if SIDELOAD_JSON:
        with open(SIDELOAD_JSON, 'w') as sideload_fd:
            json.dump(openme_data, sideload_fd, indent=4, sort_keys=True)
示例#29
0
def run():
  """Runs the offline mode."""
  global last_timing

  # Initiazation
  final_results, count, runner = setup()

  #
  # run the benchmark with timing
  #
  runner.start_pool()

  def issue_query_offline(query_samples):
    """Adds query to the queue."""
    for i in [1]:
      idx = np.array([q.index for q in query_samples])
      query_id = np.array([q.id for q in query_samples])
      batch_size = FLAGS.batch_size[0]
      for i in range(0, len(query_samples), batch_size):
          runner.enqueue(query_id[i:i + batch_size], idx[i:i + batch_size])

  def flush_queries():
    pass

  def process_latencies(latencies_ns):
    global last_timing
    last_timing = [t / 1e9 for t in latencies_ns]

  sut = lg.ConstructSUT(issue_query_offline, flush_queries, process_latencies)

  masters = []

  outdir = FLAGS.outdir if FLAGS.outdir else tempfile.mkdtemp()
  export_outdir = FLAGS.export_outdir if FLAGS.export_outdir else outdir
  export_outdir = os.path.join(export_outdir, "export_model")

  def load_query_samples(sample_list):
    """Load query samples."""
    runner.ds.load_query_samples(sample_list)
    # Find tpu master.
    if FLAGS.num_tpus == 1:
      runner.model.update_qsl(runner.ds.get_image_list_inmemory())
    else:
      for i in range(FLAGS.num_tpus):
        runner.models[i].update_qsl(runner.ds.get_image_list_inmemory())

  def warmup():
    """Warmup the TPUs."""
    load_query_samples([0])
    if FLAGS.num_tpus == 1:
      log.info("warmup ...")
      runner.warmup(0)
      log.info("warmup done")
    else:
      for cloud_tpu_id in range(FLAGS.num_tpus):
        log.info("warmup %d...", cloud_tpu_id)
        runner.warmup(0, cloud_tpu_id)
        log.info("warmup %d done", cloud_tpu_id)

    # After warmup, give the system a moment to quiesce before putting it under
    # load.
    time.sleep(1)

  if FLAGS.num_tpus == 1:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    master = tpu_cluster_resolver.get_master()

    runner.model.build_and_export(
        FLAGS.model,
        export_model_path=export_outdir,
        batch_size=FLAGS.batch_size,
        master=master,
        scenario=FLAGS.scenario)
    runner.model.load(export_model_path=export_outdir, master=master)
  else:
    # Use the first TPU instance to build and export the graph.
    tpu_names = FLAGS.tpu_name
    tpu_names = tpu_names.split(",")
    for tpu_name in tpu_names:
      tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
          tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
      masters.append(tpu_cluster_resolver.get_master())

    runner.models[0].build_and_export(
        FLAGS.model,
        export_model_path=export_outdir,
        batch_size=FLAGS.batch_size,
        master=masters[0],
        scenario=FLAGS.scenario)

    def init_fn(cloud_tpu_id):
      """Init and warmup each cloud tpu."""
      runner.models[cloud_tpu_id].load(
          export_model_path=export_outdir, master=masters[cloud_tpu_id])

    threads = []
    for i in range(FLAGS.num_tpus):
      thread = threading.Thread(target=init_fn, args=(i,))
      threads.append(thread)
      thread.start()

    for thread in threads:
      thread.join()

  warmup()

  qsl = lg.ConstructQSL(count, min(count, 1024), load_query_samples,
                        runner.ds.unload_query_samples)

  test_scenarios = FLAGS.scenario
  if test_scenarios is None:
    test_scenarios_list = []
  else:
    test_scenarios_list = test_scenarios.split(",")

  max_latency = FLAGS.max_latency
  max_latency_list = max_latency.split(",")
  for scenario in test_scenarios_list:
    for target_latency in max_latency_list:
      log.info("starting %s, latency=%s", scenario, target_latency)
      settings = lg.TestSettings()
      log.info(scenario)
      if FLAGS.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly

      settings.scenario = utils.SCENARIO_MAP[scenario]

      if FLAGS.qps:
        qps = float(FLAGS.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

      if FLAGS.time:
        settings.min_duration_ms = 60 * MILLI_SEC
        settings.max_duration_ms = 0
        qps = FLAGS.qps or 100
        settings.min_query_count = qps * FLAGS.time
        settings.max_query_count = int(1.1 * qps * FLAGS.time)
      else:
        settings.min_query_count = (1 << 21)

      if FLAGS.time or FLAGS.qps and FLAGS.accuracy:
        settings.mode = lg.TestMode.PerformanceOnly
      # FIXME: add SubmissionRun once available

      target_latency_ns = int(float(target_latency) * (NANO_SEC / MILLI_SEC))
      settings.single_stream_expected_latency_ns = target_latency_ns
      settings.multi_stream_target_latency_ns = target_latency_ns
      settings.server_target_latency_ns = target_latency_ns

      log_settings = lg.LogSettings()
      # TODO(brianderson): figure out how to use internal file path.
      log_settings.log_output.outdir = tempfile.mkdtemp()
      log_settings.log_output.copy_detail_to_stdout = True
      log_settings.log_output.copy_summary_to_stdout = True
      log_settings.enable_trace = False

      result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
      runner.start_run(result_dict, FLAGS.accuracy)

      lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

      if FLAGS.accuracy:
        runner.get_post_process().finalize(result_dict, runner.ds)

      utils.add_results(
          final_results, "{}-{}".format(scenario, target_latency),
          result_dict, last_timing,
          time.time() - runner.ds.last_loaded)

  #
  # write final results
  #
  if FLAGS.outdir:
    outfile = os.path.join(FLAGS.outdir, "results.txt")
    with tf.gfile.Open(outfile, "w") as f:
      json.dump(final_results, f, sort_keys=True, indent=4)
  else:
    json.dump(final_results, sys.stdout, sort_keys=True, indent=4)

  runner.finish()
  lg.DestroyQSL(qsl)
  lg.DestroySUT(sut)
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)
    if getattr(backend, "max_batchsize", -1) != -1:
        backend.max_batchsize = args.max_batchsize

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    audit_config_cp_loc = None
    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)

        # Check if audit.config file is used, copy to output directory before
        # we chdir to that location so loadgen can find it
        audit_files = glob.glob(
            "ncoresw/mlperf/vision/classification_and_detection/*audit.config")
        if len(audit_files):
            log.info("Found audit.config (" + audit_files[0] + ")")
            audit_config_cp_loc = os.path.join(output_dir, "audit.config")
            # If user already put audit.config at `output` directory, then use
            # that one. Otherwise, copy the one we found in the current
            # directory (before chdir to new output directory).
            if os.path.exists(audit_config_cp_loc):
                log.info(
                    "WARNING: audit.config already exists, so cannot copy over new audit file!"
                )
                log.info(audit_config_cp_loc)
                audit_config_cp_loc = None
            else:
                shutil.copy(audit_files[0], audit_config_cp_loc)

        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    warmup_queries = range(args.max_batchsize)
    ds.load_query_samples(warmup_queries)
    for _ in range(2):
        img, _ = ds.get_samples(warmup_queries)
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    # override target latency when it needs to be less than 1ms
    if args.model_name == "mobilenet":
        settings.single_stream_expected_latency_ns = 200000
    elif args.model_name == "resnet50":
        settings.single_stream_expected_latency_ns = 900000
    elif args.model_name == "ssd-mobilenet":
        settings.single_stream_expected_latency_ns = 900000

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples,
                          ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    # Dump the summary logs to stdout for convenience
    log.info("Output dir: " + os.path.abspath(output_dir))
    with open(os.path.join(output_dir, "mlperf_log_summary.txt"), 'r') as f:
        log.info(f.read())

    # Output accuracy txt file
    if args.accuracy:
        with open(os.path.join(output_dir, "accuracy.txt"), "w") as f_acc:
            # SSD accuracy calculation
            #----------------------------------------
            # The mAP is already stored in result_dict["mAP"], but we'll call
            # `accuracy_coco()` just to keep the submission process consistent.
            if args.model_name == "ssd-mobilenet":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            if args.model_name == "ssd-resnet34":
                accuracy_str = accuracy.CocoAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    coco_dir=args.dataset_path,
                    use_inv_map=True,
                    remove_48_empty_images=False).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

            # ImageNet accuracy calculation
            #----------------------------------------
            # The good / total values are already stored in result_dict["good"]
            # and result_dict["total"], but we'll call `accuracy_imagenet()`
            # just to keep the submission process consistent.
            else:
                accuracy_str = accuracy.ImagenetAcc(
                    mlperf_accuracy_file=os.path.join(
                        output_dir, "mlperf_log_accuracy.json"),
                    imagenet_val_file=os.path.join(
                        args.dataset_path,
                        "val_map.txt")).get_accuracy() + "\n"
                f_acc.write(accuracy_str)
                log.info(accuracy_str)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
        if audit_config_cp_loc != None:
            os.remove(audit_config_cp_loc)

    backend_destroy = getattr(backend, "destroy", None)
    if callable(backend_destroy):
        backend.destroy()