Exemplo n.º 1
0
    def build_default_flags(self):
        flag_dict = {}
        flag_dict["verbose"] = self.verbose

        # Handle plugins
        if self.name in plugin_map:
            plugins = plugin_map[self.name]
            for plugin in plugins:
                self.check_file_exists(plugin)
            flag_dict["plugins"] = ",".join(plugins)

        # Generate flags for logfile names.
        log_dir = self.get_full_log_dir()
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        flag_dict["logfile_outdir"] = log_dir
        flag_dict["logfile_prefix"] = "mlperf_log_"

        # Handle performance sample count
        perf_sample_count = dict_get(self.args, "performance_sample_count", None)
        if perf_sample_count is not None:
            flag_dict["performance_sample_count"] = perf_sample_count
        elif benchmark_qsl_size_map[self.name] > 0:
            flag_dict["performance_sample_count"] = benchmark_qsl_size_map[self.name]
        else:
            flag_dict["performance_sample_count"] = self.args["gpu_batch_size"]

        # Handle custom arguments
        for arg in self.flag_builder_custom_args:
            val = dict_get(self.args, arg, None)
            if val is not None:
                flag_dict[arg] = val

        return flag_dict
Exemplo n.º 2
0
    def build_scenario_specific_flags(self):
        flag_dict = {}

        prefix = self.qps_prefix

        if self.scenario == SCENARIOS.SingleStream:
            scenario_keys = common_args.SINGLE_STREAM_PARAMS
        elif self.scenario == SCENARIOS.Offline:
            scenario_keys = common_args.OFFLINE_PARAMS
        elif self.scenario == SCENARIOS.MultiStream:
            scenario_keys = common_args.MULTI_STREAM_PARAMS
        elif self.scenario == SCENARIOS.Server:
            scenario_keys = common_args.SERVER_PARAMS
        else:
            scenario_keys = []
            raise RuntimeError("Unknown Scenario \"{}\"".format(self.scenario))

        for arg in scenario_keys:
            val = dict_get(self.args, prefix + arg, None)
            if val is None:
                raise ValueError("Missing required key {:}".format(prefix +
                                                                   arg))
            flag_dict[arg] = val

        # Handle RUN_ARGS
        for arg in scenario_keys:
            val = dict_get(self.args, arg, None)
            if val is not None:
                flag_dict[arg] = val

        return flag_dict
Exemplo n.º 3
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(5 << 30))
        logging.info("Using workspace size: {:,}".format(workspace_size))
        super().__init__(args,
                         name=BENCHMARKS.BERT,
                         workspace_size=workspace_size)
        self.bert_config_path = "code/bert/tensorrt/bert_config.json"

        self.seq_len = 384  # default sequence length

        self.batch_size = dict_get(args, "batch_size", default=1)

        self.num_profiles = 1
        if 'gpu_inference_streams' in args:
            # use gpu_inference_streams to determine the number of duplicated profiles
            # in the engine when not using lwis mode
            self.num_profiles = args['gpu_inference_streams']

        self.is_int8 = args['precision'] == 'int8'

        if self.is_int8:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1_fake_quant.onnx")
        else:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1.onnx")

        self.bert_config = BertConfig(self.bert_config_path)

        self.enable_interleaved = False
        if self.is_int8 and 'enable_interleaved' in args:
            self.enable_interleaved = args['enable_interleaved']

        # Small-Tile GEMM Plugin
        # Since it doesn't support interleaved format, two options are mutually exclusive
        self.use_small_tile_gemm_plugin = self.args.get(
            "use_small_tile_gemm_plugin", False)
        self.gemm_plugin_fairshare_cache_size = self.args.get(
            "gemm_plugin_fairshare_cache_size", -1)
        if self.enable_interleaved and self.use_small_tile_gemm_plugin:
            assert False, "Small-Tile GEMM Plugin doesn't support interleaved format."

        # Query system id for architecture
        self.system = get_system()
        self.gpu_arch = self.system.arch

        if self.batch_size > 512:
            # tactics selection is limited at very large batch sizes
            self.builder_config.max_workspace_size = 7 << 30
        if 'nx' in self.system.gpu.lower():
            # use 1GB only for XavierNX
            self.builder_config.max_workspace_size = 1 << 30
Exemplo n.º 4
0
    def _build_custom_flags(self, flag_dict):
        # Rename gpu_batch_size to batch_size
        batch_size = dict_get(self.args, "gpu_batch_size", default=None)
        flag_dict["batch_size"] = batch_size
        flag_dict["gpu_batch_size"] = None

        # Rename use_graphs to cuda_graph
        use_graphs = dict_get(self.args, "use_graphs", default=False)
        flag_dict["cuda_graph"] = use_graphs
        flag_dict["use_graphs"] = None

        # Rename max_seq_length to hp_max_seq_length
        max_seq_length = dict_get(self.args, "max_seq_length", default=None)
        flag_dict["hp_max_seq_length"] = max_seq_length
        flag_dict["max_seq_length"] = None

        # Handle more harness_rnnt knobs
        no_pipelined = dict_get(self.args, "nopipelined_execution", default=False)
        flag_dict["pipelined_execution"] = not no_pipelined
        flag_dict["nopipelined_execution"] = None

        # Handle more harness_rnnt knobs : disable batch sorting by sequence length
        no_sorting = dict_get(self.args, "nobatch_sorting", default=False)
        flag_dict["batch_sorting"] = not no_sorting
        flag_dict["nobatch_sorting"] = None

        # Handle yet another harness_rnnt knob: turning off DALI preprocessing for debug
        no_dali = dict_get(self.args, "noenable_audio_processing", default=False)
        flag_dict["enable_audio_processing"] = not no_dali
        flag_dict["noenable_audio_processing"] = None

        # Handle yet another harness_rnnt knob: disable DALI's scatter gather kernel
        no_copy_kernel = dict_get(self.args, "nouse_copy_kernel", default=False)
        flag_dict["use_copy_kernel"] = not no_copy_kernel
        flag_dict["nouse_copy_kernel"] = None

        # Rename gpu_inference_streams to streams_per_gpu
        num_inference = dict_get(self.args, "gpu_inference_streams", default=None)
        flag_dict["streams_per_gpu"] = num_inference
        flag_dict["gpu_inference_streams"] = None

        audio_fp16_input = dict_get(self.args, "audio_fp16_input", default=True)
        flag_dict["audio_fp16_input"] = audio_fp16_input

        start_from_device = dict_get(self.args, "start_from_device", default=False)
        flag_dict["start_from_device"] = start_from_device

        audio_input_suffix = "fp16" if audio_fp16_input else "fp32"
        flag_dict["audio_serialized_pipeline_file"] = "build/bin/dali" + "/dali_pipeline_gpu_" + audio_input_suffix + ".pth"

        argstr = args_to_string(flag_dict) + " --scenario {:} --model {:}".format(self.scenario, self.name)

        # Handle engine dir
        argstr += " --engine_dir={:}".format(self.engine_dir)

        return argstr
Exemplo n.º 5
0
    def __init__(self, args, name="", skip_file_checks=False):
        self.args = args
        self.name = name
        self.verbose = dict_get(args, "verbose", default=None)
        if self.verbose:
            logging.info("===== Harness arguments for {:} =====".format(name))
            for key in args:
                logging.info("{:}={:}".format(key, args[key]))

        self.system_id = args["system_id"]
        self.scenario = args["scenario"]
        self.config_ver = args["config_ver"]
        self.engine_dir = "./build/engines/{:}/{:}/{:}".format(
            self.system_id, self.name, self.scenario)
        self.precision = args["precision"]

        # Detect devices used to set field prefixes
        self.has_gpu = dict_get(args, "gpu_batch_size",
                                default=None) is not None
        self.has_dla = dict_get(args, "dla_batch_size",
                                default=None) is not None
        self.qps_prefix = ""
        if self.has_gpu and self.has_dla:
            self.qps_prefix = "concurrent_"
        elif self.has_gpu:
            self.qps_prefix = "gpu_"
        elif self.has_dla:
            self.qps_prefix = "dla_"

        # Check if we actually need to execute the harness
        self.generate_conf_files_only = False
        if dict_get(self.args, "generate_conf_files_only", False):
            logging.info("Only generating measurements/ configuration entries")
            self.generate_conf_files_only = True
            self.args["generate_conf_files_only"] = None

        # Enumerate engine files
        # Engine not needed if we are only generating measurements/ entries
        self.skip_file_checks = skip_file_checks or self.generate_conf_files_only
        self.gpu_engine = None
        self.dla_engine = None
        self.enumerate_engines()

        # Enumerate harness executable
        self.executable = self._get_harness_executable()
        self.check_file_exists(self.executable)

        self.use_jemalloc = False

        self.env_vars = os.environ.copy()
        self.flag_builder_custom_args = []
Exemplo n.º 6
0
 def _get_submission_benchmark_name(self):
     full_benchmark_name = self.name
     if dict_get(self.args, "accuracy_level", "99%") == "99.9%":
         full_benchmark_name += "-99.9"
     elif self.name in BENCHMARKS.HIGH_ACC_ENABLED:
         full_benchmark_name += "-99"
     return full_benchmark_name
Exemplo n.º 7
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(1 << 30))
        logging.info("Use workspace_size: {:}".format(workspace_size))

        super().__init__(args,
                         name=BENCHMARKS.ResNet50,
                         workspace_size=workspace_size)

        # Model path
        self.model_path = dict_get(
            args,
            "model_path",
            default="code/resnet50/tensorrt/ofa_autosinian_is176.onnx")
        logging.info("Using AutoSinian optimized once-for-all network")

        self.cache_file = None
        self.need_calibration = False

        if self.precision == "int8":
            # Get calibrator variables
            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=1)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=500)
            force_calibration = dict_get(self.args,
                                         "force_calibration",
                                         default=False)
            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/resnet50/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args,
                                             "preprocessed_data_dir",
                                             default="build/preprocessed_data")
            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/imagenet/cal_map.txt")
            calib_image_dir = os.path.join(preprocessed_data_dir,
                                           "imagenet/ResNet50/fp32")

            # Set up calibrator
            self.calibrator = RN50Calibrator(
                calib_batch_size=calib_batch_size,
                calib_max_batches=calib_max_batches,
                force_calibration=force_calibration,
                cache_file=cache_file,
                image_dir=calib_image_dir,
                calib_data_map=calib_data_map)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
            self.need_calibration = force_calibration or not os.path.exists(
                cache_file)
Exemplo n.º 8
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(5 << 30))
        logging.info("Use workspace_size: {:}".format(workspace_size))
        super().__init__(args,
                         name=BENCHMARKS.BERT,
                         workspace_size=workspace_size)
        self.bert_config_path = "code/bert/tensorrt/bert_config.json"

        self.seq_len = 384  # default sequence length

        assert 'batch_size' in args, 'batch_size is not specified'
        self.batch_size = args['batch_size']

        self.num_profiles = 1
        if 'gpu_inference_streams' in args:
            # use gpu_inference_streams to determine the number of duplicated profiles
            # in the engine when not using lwis mode
            self.num_profiles = args['gpu_inference_streams']

        self.is_int8 = args['precision'] == 'int8'

        if self.is_int8:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1_fake_quant.onnx")
        else:
            self.model_path = dict_get(
                args,
                "model_path",
                default="build/models/bert/bert_large_v1_1.onnx")

        self.bert_config = BertConfig(self.bert_config_path)

        self.enable_il = False
        if self.is_int8 and 'enable_interleaved' in args:
            self.enable_il = args['enable_interleaved']

        if self.batch_size > 512:
            # tactics selection is limited at very large batch sizes
            self.builder_config.max_workspace_size = 7 << 30
        if 'nx' in self.system_id.lower():
            # use 1GB only for XavierNX
            self.builder_config.max_workspace_size = 1 << 30
Exemplo n.º 9
0
    def get_system_name(self):
        override_system_name = dict_get(self.args, "system_name", default=None)
        if override_system_name not in {None, ""}:
            return override_system_name

        system_name = self.system_id
        for kw in system_name_map.keys():
            if kw in self.system_id:
                system_name = "_".join([system_name_map[kw], system_name])
                break
        return "{:}_TRT{:}".format(system_name, TENSORRT_VERSION)
Exemplo n.º 10
0
    def get_system_name(self):
        override_system_name = dict_get(self.args, "system_name", default=None)
        if override_system_name not in {None, ""}:
            return override_system_name

        system_name = self.system_id
        for kw, prepend_name in system_name_map:
            if kw in self.system_id:
                system_name = "_".join([prepend_name, system_name])
                break
        full_system_name = "{:}_TRT{:}".format(system_name, TENSORRT_VERSION)
        return self._append_config_ver_name(full_system_name)
Exemplo n.º 11
0
    def build_scenario_specific_flags(self):
        """Return flags specific to current scenario."""

        flag_dict = {}

        prefix = self.qps_prefix
        scenario_keys = common_args.getScenarioMetricArgs(self.scenario)

        for arg in scenario_keys:
            val = dict_get(self.args, prefix + arg, None)
            if val is None:
                raise ValueError("Missing required key {:}".format(prefix +
                                                                   arg))
            flag_dict[arg] = val

        # Handle RUN_ARGS
        for arg in scenario_keys:
            val = dict_get(self.args, arg, None)
            if val is not None:
                flag_dict[arg] = val

        return flag_dict
Exemplo n.º 12
0
    def __init__(self, args):
        """Set up the config and calibrator for DLRM. Does not initialize."""

        workspace_size = dict_get(args, "workspace_size", default=(4 << 30))
        logging.info("Using workspace size: {:,}".format(workspace_size))

        super().__init__(args, name=BENCHMARKS.DLRM, workspace_size=workspace_size)

        with open("code/dlrm/tensorrt/mlperf_40m.limit.json") as f:
            self.dlrm_config = json.load(f)
        logging.info("DLRM config: {:}".format(self.dlrm_config))
        self.num_numerical_inputs = self.dlrm_config["num_numerical_features"]
        self.num_features = len(self.dlrm_config["categorical_feature_sizes"])
        self.num_interactions = (self.num_features + 1) * self.num_features // 2
        self.embedding_size = self.dlrm_config["embedding_dim"]
        self.embedding_rows = self.dlrm_config["categorical_feature_sizes"]
        self.embedding_rows_bound = 40000000
        self.embedding_rows = [min(i, self.embedding_rows_bound) for i in self.embedding_rows]
        self.embedding_rows_total = np.sum(np.array(self.embedding_rows))
        self.bottom_mlp_channels = self.dlrm_config["bottom_mlp_sizes"]
        self.bottom_mlp_names = ["bot_l.0", "bot_l.2", "bot_l.4"]
        self.output_padding = self.args.get("output_padding_granularity", 32)
        self.top_mlp_input_size = (self.num_interactions + self.embedding_size + self.output_padding - 1) // self.output_padding * self.output_padding
        self.top_mlp_channels = self.dlrm_config["top_mlp_sizes"]
        self.top_mlp_names = ["top_l.0", "top_l.2", "top_l.4", "top_l.6", "top_l.8"]
        self.model_filepath = "build/models/dlrm/tb00_40M.pt"
        self.embedding_weights_binary_filepath = "build/models/dlrm/40m_limit/dlrm_embedding_weights_int8_v3.bin"
        self.model_without_embedding_weights_filepath = "build/models/dlrm/40m_limit/model_test_without_embedding_weights_v3.pt"
        self.row_frequencies_binary_filepath = "build/models/dlrm/40m_limit/row_frequencies.bin"
        self.row_frequencies_src_dir = "build/models/dlrm/40m_limit/row_freq"
        self.embedding_weights_on_gpu_part = self.args.get("embedding_weights_on_gpu_part", 1.0)
        self.use_row_frequencies = True if self.embedding_weights_on_gpu_part < 1.0 else False
        self.num_profiles = self.args.get("gpu_inference_streams", 1)
        self.use_small_tile_gemm_plugin = self.args.get("use_small_tile_gemm_plugin", False)
        self.gemm_plugin_fairshare_cache_size = self.args.get("gemm_plugin_fairshare_cache_size", -1)
        self.enable_interleaved_top_mlp = self.args.get("enable_interleaved_top_mlp", False)

        if self.precision == "fp16":
            self.apply_flag(trt.BuilderFlag.FP16)
        elif self.precision == "int8":
            self.apply_flag(trt.BuilderFlag.INT8)

        if self.precision == "int8":
            # Get calibrator variables
            calib_batch_size = dict_get(self.args, "calib_batch_size", default=512)
            calib_max_batches = dict_get(self.args, "calib_max_batches", default=500)
            force_calibration = dict_get(self.args, "force_calibration", default=False)
            cache_file = dict_get(self.args, "cache_file", default="code/dlrm/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args, "preprocessed_data_dir", default="build/preprocessed_data")
            calib_data_dir = os.path.join(preprocessed_data_dir, "criteo/full_recalib/val_data_128000")

            # Set up calibrator
            self.calibrator = DLRMCalibrator(calib_batch_size=calib_batch_size, calib_max_batches=calib_max_batches,
                                             force_calibration=force_calibration, cache_file=cache_file, data_dir=calib_data_dir)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
            self.need_calibration = force_calibration or not os.path.exists(cache_file)
        else:
            self.need_calibration = False
Exemplo n.º 13
0
    def initialize(self):
        """
        Parse the processed model to create the network.
        """
        # Create network.
        self.network = self.builder.create_network(
            1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))

        channel_idx = 1

        # Input shape
        input_tensor_dim = [-1] + self.input_volume_dim
        input_tensor_dim.insert(channel_idx, self.num_input_channel)

        # Parse from onnx file.
        parser = trt.OnnxParser(self.network, self.logger)
        model = self.preprocess_onnx(onnx.load(self.model_path))
        success = parser.parse(onnx._serialize(model))
        if not success:
            raise RuntimeError(
                "3D-Unet onnx model parsing failed! Error: {:}".format(
                    parser.get_error(0).desc()))

        # Set input/output tensor dtype and formats
        input_tensor = self.network.get_input(0)
        output_tensor = self.network.get_output(0)
        input_tensor.shape = input_tensor_dim

        if self.input_dtype == "int8":
            input_tensor.dtype = trt.int8
        elif self.input_dtype == "fp16":
            input_tensor.dtype = trt.float16
        elif self.input_dtype == "fp32":
            input_tensor.dtype = trt.float32

        if self.input_format == "linear":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)
        elif self.input_format == "dhwc8":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.DHWC8)
        elif self.input_format == "cdhw32":
            input_tensor.allowed_formats = 1 << int(trt.TensorFormat.CDHW32)

        # Always use FP16 output
        # workaround for calibration not working with the identity layer properly
        force_calibration = dict_get(self.args,
                                     "force_calibration",
                                     default=False)
        output_tensor.dtype = trt.float16 if force_calibration == False else trt.float32
        output_tensor.allowed_formats = 1 << int(trt.TensorFormat.LINEAR)

        self.initialized = True
Exemplo n.º 14
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(2 << 30))
        logging.info("Using workspace size: {:,}".format(workspace_size))

        super().__init__(args,
                         name=BENCHMARKS.SSDResNet34,
                         workspace_size=workspace_size)

        # Model path
        self.model_path = dict_get(
            args,
            "model_path",
            default="build/models/SSDResNet34/resnet34-ssd1200.pytorch")

        if self.precision == "int8":
            force_calibration = dict_get(self.args,
                                         "force_calibration",
                                         default=False)
            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=10)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=50)
            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/ssd-resnet34/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args,
                                             "preprocessed_data_dir",
                                             default="build/preprocessed_data")
            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/coco/cal_map.txt")
            calib_image_dir = os.path.join(preprocessed_data_dir,
                                           "coco/train2017/SSDResNet34/fp32")

            self.calibrator = SSDResNet34EntropyCalibrator(
                calib_image_dir, cache_file, calib_batch_size,
                calib_max_batches, force_calibration, calib_data_map)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
Exemplo n.º 15
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(2 << 31))
        logging.info("Use workspace_size: {:}".format(workspace_size))

        super().__init__(args,
                         name="ssd-mobilenet",
                         workspace_size=workspace_size)

        # Model path
        self.model_path = dict_get(
            args,
            "model_path",
            default="build/models/SSDMobileNet/frozen_inference_graph.pb")

        if self.precision == "int8":
            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=1)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=500)
            force_calibration = dict_get(self.args,
                                         "force_calibration",
                                         default=False)
            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/ssd-mobilenet/tensorrt/calibrator.cache")
            preprocessed_data_dir = dict_get(self.args,
                                             "preprocessed_data_dir",
                                             default="build/preprocessed_data")
            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/coco/cal_map.txt")
            calib_image_dir = os.path.join(preprocessed_data_dir,
                                           "coco/train2017/SSDMobileNet/fp32")

            self.calibrator = SSDMobileNetEntropyCalibrator(
                calib_batch_size, calib_max_batches, force_calibration,
                cache_file, calib_image_dir, calib_data_map)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
Exemplo n.º 16
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(1 << 30))
        logging.info("Use workspace_size: {:}".format(workspace_size))

        super().__init__(args,
                         name=BENCHMARKS.ResNet50,
                         workspace_size=workspace_size)

        # Model path
        self.model_path = dict_get(
            args,
            "model_path",
            default="code/resnet50/tensorrt/resnet50_inspur_open.onnx")

        if self.precision == "int8":
            # Get calibrator variables
            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=1)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=500)
            force_calibration = dict_get(self.args,
                                         "force_calibration",
                                         default=False)
            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/resnet50/tensorrt/calibrator.cache")
            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/imagenet/cal_map.txt")
            calib_image_dir = "build/data/imagenet"

            # Set up calibrator
            self.calibrator = RN50Calibrator(
                calib_batch_size=calib_batch_size,
                calib_max_batches=calib_max_batches,
                force_calibration=force_calibration,
                cache_file=cache_file,
                image_dir=calib_image_dir,
                calib_data_map=calib_data_map)
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file
            self.need_calibration = force_calibration or not os.path.exists(
                cache_file)
Exemplo n.º 17
0
 def _build_custom_flags(self, flag_dict):
     # Handle use_jemalloc
     self.use_jemalloc = dict_get(flag_dict, "use_jemalloc", False)
     flag_dict['use_jemalloc'] = None
     argstr = args_to_string(flag_dict) + " --scenario " + self.scenario + " --model " + self.name
     return argstr
Exemplo n.º 18
0
    def __init__(self, args, name="", workspace_size=(1 << 30)):
        """
        Constructor
        :param args: arguments represented by a dictionary
        :param name: name of the benchmark
        """

        self.name = name
        self.args = args

        # Configuration variables
        self.verbose = dict_get(args, "verbose", default=False)
        if self.verbose:
            logging.info("========= BenchmarkBuilder Arguments =========")
            for arg in args:
                logging.info("{:}={:}".format(arg, args[arg]))

        self.system_id = args["system_id"]
        self.scenario = args["scenario"]
        self.config_ver = args["config_ver"]
        self.engine_dir = "./build/engines/{:}/{:}/{:}".format(
            self.system_id, self.name, self.scenario)

        # Set up logger, builder, and network.
        self.logger = trt.Logger(
            trt.Logger.VERBOSE if self.verbose else trt.Logger.INFO)
        trt.init_libnvinfer_plugins(self.logger, "")
        self.builder = trt.Builder(self.logger)
        self.builder_config = self.builder.create_builder_config()
        self.builder_config.max_workspace_size = workspace_size
        if dict_get(args, "verbose_nvtx", default=False):
            self.builder_config.profiling_verbosity = trt.ProfilingVerbosity.VERBOSE

        # Precision variables
        self.input_dtype = dict_get(args, "input_dtype", default="fp32")
        self.input_format = dict_get(args, "input_format", default="linear")
        self.precision = dict_get(args, "precision", default="int8")
        self.clear_flag(trt.BuilderFlag.TF32)
        if self.precision == "fp16":
            self.apply_flag(trt.BuilderFlag.FP16)
        elif self.precision == "int8":
            self.apply_flag(trt.BuilderFlag.INT8)

        # Device variables
        self.device_type = "gpu"
        self.dla_core = args.get("dla_core", None)
        if self.dla_core is not None:
            logging.info("Using DLA: Core {:}".format(self.dla_core))
            self.device_type = "dla"
            self.apply_flag(trt.BuilderFlag.GPU_FALLBACK)
            self.builder_config.default_device_type = trt.DeviceType.DLA
            self.builder_config.DLA_core = int(self.dla_core)

        if self.scenario == SCENARIOS.SingleStream:
            self.batch_size = 1
        elif self.scenario in [
                SCENARIOS.Server, SCENARIOS.Offline, SCENARIOS.MultiStream
        ]:
            self.batch_size = self.args.get("batch_size", 1)
        else:
            raise ValueError("Invalid scenario: {:}".format(self.scenario))

        # Currently, TRT has limitation that we can only create one execution
        # context for each optimization profile. Therefore, create more profiles
        # so that LWIS can create multiple contexts.
        self.num_profiles = self.args.get("gpu_copy_streams", 4)

        self.initialized = False
Exemplo n.º 19
0
    def __init__(self, args):
        workspace_size = dict_get(args, "workspace_size", default=(8 << 30))

        super().__init__(args,
                         name=BENCHMARKS.UNET,
                         workspace_size=workspace_size)

        # input channel
        self.num_input_channel = 4

        # input volume dimension
        self.input_volume_dim = [224, 224, 160]

        # use InstNorm3D plugin
        self.use_instnorm3d_plugin = True
        # use pixelShuffle plugin
        self.enable_pixelshuffle3d_plugin = True
        self.enable_pixelshuffle3d_plugin_concat_fuse = True
        # Deconv->Conv conversion
        self.use_conv_for_deconv = True
        self.pixel_shuffle_cdwh = True  # If false, do dhwc
        # use last layer plugin
        self.use_conv3d1x1x1k4_plugin = True

        # Model is imported from ONNX
        self.model_path = dict_get(
            args,
            "model_path",
            default="build/models/3d-unet/3dUNetBraTS.onnx")

        force_calibration = dict_get(self.args,
                                     "force_calibration",
                                     default=False)
        # Calibrator
        if self.precision == "int8" or force_calibration:
            self.apply_flag(trt.BuilderFlag.INT8)
            preprocessed_data_dir = dict_get(
                self.args,
                "preprocessed_data_dir",
                default="build/preprocessed_data/brats/calibration")

            calib_batch_size = dict_get(self.args,
                                        "calib_batch_size",
                                        default=2)
            calib_max_batches = dict_get(self.args,
                                         "calib_max_batches",
                                         default=20)

            calib_data_map = dict_get(self.args,
                                      "calib_data_map",
                                      default="data_maps/brats/cal_map.txt")
            calib_volume_dir = os.path.join(preprocessed_data_dir,
                                            "brats_npy/fp32")

            input_shape = [self.num_input_channel] + self.input_volume_dim

            cache_file = dict_get(
                self.args,
                "cache_file",
                default="code/3d-unet/tensorrt/calibrator.cache")
            self.calibrator = UNet3DLegacyCalibrator(
                calib_volume_dir, cache_file, calib_batch_size,
                calib_max_batches, force_calibration, calib_data_map,
                input_shape)
            assert self.calibrator, "Calibrator is not init'ed"
            assert self.calibrator.get_algorithm(
            ) == trt.CalibrationAlgoType.LEGACY_CALIBRATION, "Calibrator type is not Legacy"
            self.builder_config.int8_calibrator = self.calibrator
            self.cache_file = cache_file

        # TRT builder flag
        if self.precision == "fp16":
            self.apply_flag(trt.BuilderFlag.FP16)
        elif self.precision == "int8":
            self.apply_flag(trt.BuilderFlag.FP16)
            self.apply_flag(trt.BuilderFlag.INT8)