Exemplo n.º 1
0
    def run_harness(self):
        flag_dict = self.build_default_flags()
        flag_dict.update(self.build_scenario_specific_flags())

        # Handle engines
        if self.has_gpu:
            flag_dict["gpu_engines"] = self.gpu_engine

        # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly
        generate_measurements_entry(self.get_system_name(), self.name,
                                    self._get_submission_benchmark_name(),
                                    self.scenario, self.args["input_dtype"],
                                    self.args["precision"], flag_dict)

        # Stop here if we are only generating .conf files in measurements
        if self.generate_conf_files_only:
            return "Generated conf files"

        argstr = self._build_custom_flags(flag_dict)
        if type(argstr) is dict:
            argstr = args_to_string(flag_dict)

        # Handle environment variables
        if self.use_jemalloc:
            self.prepend_ld_preload(
                "/usr/lib/x86_64-linux-gnu/libjemalloc.so.2")

        cmd = "{:} {:}".format(self.executable, argstr)
        output = run_command(cmd, get_output=True, custom_env=self.env_vars)

        # Return harness result.
        return self._handle_harness_result(
            self.harness_get_result(output,
                                    scenario_result_regex[self.scenario]))
    def _build_custom_flags(self, flag_dict):
        # Triton does not use gpu_engines flag
        flag_dict["gpu_engines"] = None

        # Force performance sample count
        flag_dict["performance_sample_count"] = benchmark_qsl_size_map[self.name]

        # Server harness binary assumes GPU and uses --batch_size instead of --gpu_batch_size
        flag_dict["batch_size"] = flag_dict["gpu_batch_size"]
        flag_dict["gpu_batch_size"] = None

        engine_info = self.get_engine_info()
        flag_dict["model_store_path"] = self.model_store_path
        flag_dict["model_name"] = self.model_name
        flag_dict["model_version"] = self.model_version
        flag_dict["buffer_manager_thread_count"] = self.args.get("buffer_manager_thread_count", 0)
        flag_dict["pinned_input"] = True if flag_dict["buffer_manager_thread_count"] == 0 else False
        flag_dict["batch_triton_requests"] = self.args.get("batch_triton_requests", False)
        flag_dict["check_contiguity"] = (flag_dict["batch_triton_requests"] == True) and (self.scenario == "Offline")

        # Inform the server to use different QSL
        flag_dict["use_dlrm_qsl"] = (self.name == BENCHMARKS.DLRM)

        # Set up Triton model repo
        self.setup_triton_model_repo(engine_info)

        argstr = args_to_string(flag_dict) + " --scenario " + self.scenario + " --model " + self.name

        if self.name in [BENCHMARKS.SSDMobileNet, BENCHMARKS.SSDResNet34]:
            argstr += " --response_postprocess coco"

        return argstr
Exemplo n.º 3
0
    def run_harness(self):
        flag_dict = self.build_default_flags()
        flag_dict.update(self.build_scenario_specific_flags())

        # Handle engines
        if self.has_gpu:
            flag_dict["gpu_engines"] = self.gpu_engine

        # MLPINF-853: Special handing of --fast. Use min_duration=60000, and if Multistream, use min_query_count=1.
        if flag_dict.get("fast", False):
            if "min_duration" not in flag_dict:
                flag_dict["min_duration"] = 60000
            if self.scenario in [SCENARIOS.Offline, SCENARIOS.MultiStream]:
                if "min_query_count" not in flag_dict:
                    flag_dict["min_query_count"] = 1
            flag_dict["fast"] = None

        # Generates the entries in the `measurements/` directory, and updates flag_dict accordingly
        generate_measurements_entry(
            self.get_system_name(),
            self.name,
            self._get_submission_benchmark_name(),
            self.scenario,
            self.args["input_dtype"],
            self.args["precision"],
            flag_dict)

        # Stop here if we are only generating .conf files in measurements
        if self.generate_conf_files_only:
            return "Generated conf files"

        argstr = self._build_custom_flags(flag_dict)
        if type(argstr) is dict:
            argstr = args_to_string(flag_dict)

        # Handle environment variables
        if self.use_jemalloc:
            self.prepend_ld_preload("/usr/lib/x86_64-linux-gnu/libjemalloc.so.2")

        cmd = "{:} {:}".format(self.executable, argstr)
        output = run_command(cmd, get_output=True, custom_env=self.env_vars)

        # Return harness result.
        scenario_key = scenario_loadgen_log_keys[self.scenario]
        results = from_loadgen_by_keys(
            os.path.join(
                self.args["log_dir"],
                self.get_system_name(),
                self._get_submission_benchmark_name(),
                self.scenario),
            ["result_validity", scenario_key])

        if scenario_key not in results:
            result_string = "Cannot find performance result. Maybe you are running in AccuracyOnly mode."
        elif "result_validity" not in results:
            result_string = "{}: {}, Result validity unknown".format(scenario_key, results[scenario_key])
        else:
            result_string = "{}: {}, Result is {}".format(scenario_key, results[scenario_key], results["result_validity"])
        return self._handle_harness_result(result_string)
Exemplo n.º 4
0
    def _build_custom_flags(self, flag_dict):
        # Rename gpu_batch_size to batch_size
        batch_size = dict_get(self.args, "gpu_batch_size", default=None)
        flag_dict["batch_size"] = batch_size
        flag_dict["gpu_batch_size"] = None

        # Rename use_graphs to cuda_graph
        use_graphs = dict_get(self.args, "use_graphs", default=False)
        flag_dict["cuda_graph"] = use_graphs
        flag_dict["use_graphs"] = None

        # Rename max_seq_length to hp_max_seq_length
        max_seq_length = dict_get(self.args, "max_seq_length", default=None)
        flag_dict["hp_max_seq_length"] = max_seq_length
        flag_dict["max_seq_length"] = None

        # Handle more harness_rnnt knobs
        no_pipelined = dict_get(self.args, "nopipelined_execution", default=False)
        flag_dict["pipelined_execution"] = not no_pipelined
        flag_dict["nopipelined_execution"] = None

        # Handle more harness_rnnt knobs : disable batch sorting by sequence length
        no_sorting = dict_get(self.args, "nobatch_sorting", default=False)
        flag_dict["batch_sorting"] = not no_sorting
        flag_dict["nobatch_sorting"] = None

        # Handle yet another harness_rnnt knob: turning off DALI preprocessing for debug
        no_dali = dict_get(self.args, "noenable_audio_processing", default=False)
        flag_dict["enable_audio_processing"] = not no_dali
        flag_dict["noenable_audio_processing"] = None

        # Handle yet another harness_rnnt knob: disable DALI's scatter gather kernel
        no_copy_kernel = dict_get(self.args, "nouse_copy_kernel", default=False)
        flag_dict["use_copy_kernel"] = not no_copy_kernel
        flag_dict["nouse_copy_kernel"] = None

        # Rename gpu_inference_streams to streams_per_gpu
        num_inference = dict_get(self.args, "gpu_inference_streams", default=None)
        flag_dict["streams_per_gpu"] = num_inference
        flag_dict["gpu_inference_streams"] = None

        audio_fp16_input = dict_get(self.args, "audio_fp16_input", default=True)
        flag_dict["audio_fp16_input"] = audio_fp16_input

        start_from_device = dict_get(self.args, "start_from_device", default=False)
        flag_dict["start_from_device"] = start_from_device

        audio_input_suffix = "fp16" if audio_fp16_input else "fp32"
        flag_dict["audio_serialized_pipeline_file"] = "build/bin/dali" + "/dali_pipeline_gpu_" + audio_input_suffix + ".pth"

        argstr = args_to_string(flag_dict) + " --scenario {:} --model {:}".format(self.scenario, self.name)

        # Handle engine dir
        argstr += " --engine_dir={:}".format(self.engine_dir)

        return argstr
Exemplo n.º 5
0
    def _build_custom_flags(self, flag_dict):
        # Triton does not use gpu_engines flag
        flag_dict["gpu_engines"] = None

        # Force performance sample count
        flag_dict["performance_sample_count"] = benchmark_qsl_size_map[
            self.name]

        flag_dict["model_store_path"] = self.model_store_path
        flag_dict["model_name"] = self.model_name
        flag_dict["model_version"] = self.model_version
        flag_dict["buffer_manager_thread_count"] = self.args.get(
            "buffer_manager_thread_count", 0)
        flag_dict["pinned_input"] = True

        # Inform the server to use different QSL
        flag_dict["use_dlrm_qsl"] = (self.name == BENCHMARKS.DLRM)

        # Specify harness-specific flags here
        flag_dict["tensor_path"] = self.tensor_path
        if self.test_mode:
            flag_dict["test_mode"] = self.test_mode
        if self.map_path:
            flag_dict["map_path"] = self.map_path
        if self.coalesced:
            flag_dict["coalesced_tensor"] = self.coalesced

        self.setup_triton_model_repo()

        argstr = args_to_string(
            flag_dict
        ) + " --scenario " + self.scenario + " --model " + self.name

        # Assign proper callback function here
        if self.name == BENCHMARKS.ResNet50:
            argstr += " --response_postprocess ovrn50"
        elif self.name in [BENCHMARKS.SSDMobileNet, BENCHMARKS.SSDResNet34]:
            argstr += " --response_postprocess ovcoco"

        return argstr
    def _build_custom_flags(self, flag_dict):
        if self.has_dla:
            flag_dict["dla_engines"] = self.dla_engine

        if self.has_gpu and self.has_dla:
            pass
        elif self.has_gpu:
            flag_dict["max_dlas"] = 0
        elif self.has_dla:
            flag_dict["max_dlas"] = 1
        else:
            raise ValueError(
                "Cannot specify --no_gpu and --gpu_only at the same time")

        argstr = args_to_string(
            flag_dict
        ) + " --scenario " + self.scenario + " --model " + self.name

        if self.name in response_postprocess_map:
            argstr += " --response_postprocess " + response_postprocess_map[
                self.name]

        return argstr
Exemplo n.º 7
0
 def _build_custom_flags(self, flag_dict):
     return args_to_string(flag_dict) + " --scenario " + self.scenario + " --model " + self.name
Exemplo n.º 8
0
 def _build_custom_flags(self, flag_dict):
     # Handle use_jemalloc
     self.use_jemalloc = dict_get(flag_dict, "use_jemalloc", False)
     flag_dict['use_jemalloc'] = None
     argstr = args_to_string(flag_dict) + " --scenario " + self.scenario + " --model " + self.name
     return argstr