Пример #1
0
    def run_specify_abi(self, flags, configs, target_abi):
        if target_abi not in self.target_abis:
            six.print_('There is no device with soc: %s abi: %s' %
                       (self.target_socs, target_abi))
            return
        library_name = configs[YAMLKeyword.library_name]
        mace_lib_type = flags.mace_lib_type
        embed_model_data = \
            configs[YAMLKeyword.model_data_format] == ModelFormat.code
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)

        # get target name for run
        if flags.example:
            if mace_lib_type == MACELibType.static:
                target_name = EXAMPLE_STATIC_NAME
            else:
                target_name = EXAMPLE_DYNAMIC_NAME
        else:
            if mace_lib_type == MACELibType.static:
                target_name = MACE_RUN_STATIC_NAME
            else:
                target_name = MACE_RUN_DYNAMIC_NAME
        link_dynamic = mace_lib_type == MACELibType.dynamic
        model_output_dirs = []

        for model_name in configs[YAMLKeyword.models]:
            check_model_converted(library_name, model_name,
                                  configs[YAMLKeyword.model_graph_format],
                                  configs[YAMLKeyword.model_data_format],
                                  target_abi)
            if target_abi != ABIType.host:
                self.clear_data_dir()
            MaceLogger.header(
                StringFormatter.block('Run model {} on {}'.format(
                    model_name, self.device_name)))

            model_config = configs[YAMLKeyword.models][model_name]
            model_runtime = model_config[YAMLKeyword.runtime]
            subgraphs = model_config[YAMLKeyword.subgraphs]

            if not configs[YAMLKeyword.target_socs] \
                    or target_abi == ABIType.host:
                model_output_base_dir, model_output_dir, mace_model_dir = \
                    get_build_model_dirs(
                        library_name, model_name, target_abi, self,
                        model_config[YAMLKeyword.model_file_path])
            else:
                model_output_base_dir, model_output_dir, mace_model_dir = \
                    get_build_model_dirs(
                        library_name, model_name, target_abi, self,
                        model_config[YAMLKeyword.model_file_path])

            # clear temp model output dir
            if os.path.exists(model_output_dir):
                sh.rm('-rf', model_output_dir)
            os.makedirs(model_output_dir)

            is_tuned = False
            model_opencl_output_bin_path = ''
            model_opencl_parameter_path = ''
            if not flags.address_sanitizer \
                    and not flags.example \
                    and target_abi != ABIType.host \
                    and configs[YAMLKeyword.target_socs] \
                    and self.target_socs \
                    and model_runtime in [RuntimeType.gpu,
                                          RuntimeType.cpu_gpu] \
                    and not flags.disable_tuning:
                self.tuning(library_name, model_name, model_config,
                            configs[YAMLKeyword.model_graph_format],
                            configs[YAMLKeyword.model_data_format], target_abi,
                            mace_lib_type)
                model_output_dirs.append(model_output_dir)
                model_opencl_output_bin_path = \
                    '{}/{}/{}'.format(model_output_dir,
                                      BUILD_TMP_OPENCL_BIN_DIR,
                                      CL_COMPILED_BINARY_FILE_NAME)
                model_opencl_parameter_path = \
                    '{}/{}/{}'.format(model_output_dir,
                                      BUILD_TMP_OPENCL_BIN_DIR,
                                      CL_TUNED_PARAMETER_FILE_NAME)
                self.clear_data_dir()
                is_tuned = True
            elif target_abi != ABIType.host and self.target_socs:
                model_opencl_output_bin_path = get_opencl_binary_output_path(
                    library_name, target_abi, self)
                model_opencl_parameter_path = get_opencl_parameter_output_path(
                    library_name, target_abi, self)
            sh_commands.gen_random_input(
                model_output_dir,
                subgraphs[0][YAMLKeyword.input_tensors],
                subgraphs[0][YAMLKeyword.input_shapes],
                subgraphs[0][YAMLKeyword.validation_inputs_data],
                input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
                input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
            runtime_list = []
            if target_abi == ABIType.host:
                runtime_list.append(RuntimeType.cpu)
            elif model_runtime == RuntimeType.cpu_gpu:
                runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
            else:
                runtime_list.append(model_runtime)
            for runtime in runtime_list:
                device_type = parse_device_type(runtime)
                # run for specified soc
                run_output = self.tuning_run(
                    abi=target_abi,
                    target_dir=build_tmp_binary_dir,
                    target_name=target_name,
                    vlog_level=flags.vlog_level,
                    embed_model_data=embed_model_data,
                    model_output_dir=model_output_dir,
                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
                    output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
                    output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
                    mace_model_dir=mace_model_dir,
                    model_tag=model_name,
                    device_type=device_type,
                    running_round=flags.round,
                    restart_round=flags.restart_round,
                    limit_opencl_kernel_time=model_config[
                        YAMLKeyword.limit_opencl_kernel_time],
                    tuning=False,
                    out_of_range_check=flags.gpu_out_of_range_check,
                    model_graph_format=configs[YAMLKeyword.model_graph_format],
                    omp_num_threads=flags.omp_num_threads,
                    cpu_affinity_policy=flags.cpu_affinity_policy,
                    gpu_perf_hint=flags.gpu_perf_hint,
                    gpu_priority_hint=flags.gpu_priority_hint,
                    runtime_failure_ratio=flags.runtime_failure_ratio,
                    address_sanitizer=flags.address_sanitizer,
                    opencl_binary_file=model_opencl_output_bin_path,
                    opencl_parameter_file=model_opencl_parameter_path,
                    libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
                    link_dynamic=link_dynamic,
                    quantize_stat=flags.quantize_stat,
                )
                if flags.validate:
                    model_file_path, weight_file_path = get_model_files(
                        model_config[YAMLKeyword.model_file_path],
                        model_config[YAMLKeyword.model_sha256_checksum],
                        BUILD_DOWNLOADS_DIR,
                        model_config[YAMLKeyword.weight_file_path],
                        model_config[YAMLKeyword.weight_sha256_checksum])

                    validate_type = device_type
                    if model_config[YAMLKeyword.quantize] == 1:
                        validate_type = device_type + '_QUANTIZE'
                    sh_commands.validate_model(
                        abi=target_abi,
                        device=self,
                        model_file_path=model_file_path,
                        weight_file_path=weight_file_path,
                        platform=model_config[YAMLKeyword.platform],
                        device_type=device_type,
                        input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
                        output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
                        input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
                        output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
                        model_output_dir=model_output_dir,
                        input_data_types=subgraphs[0][
                            YAMLKeyword.input_data_types],
                        caffe_env=flags.caffe_env,
                        validation_threshold=subgraphs[0][
                            YAMLKeyword.validation_threshold][validate_type],
                        backend=subgraphs[0][YAMLKeyword.backend])
                if flags.report and flags.round > 0:
                    tuned = is_tuned and device_type == DeviceType.GPU
                    self.report_run_statistics(target_abi=target_abi,
                                               model_name=model_name,
                                               device_type=device_type,
                                               output_dir=flags.report_dir,
                                               tuned=tuned)
        if model_output_dirs:
            opencl_output_bin_path = get_opencl_binary_output_path(
                library_name, target_abi, self)
            opencl_parameter_bin_path = get_opencl_parameter_output_path(
                library_name, target_abi, self)

            # clear opencl output dir
            if os.path.exists(opencl_output_bin_path):
                sh.rm('-rf', opencl_output_bin_path)
            if os.path.exists(opencl_parameter_bin_path):
                sh.rm('-rf', opencl_parameter_bin_path)

            # merge all model's opencl binaries together
            sh_commands.merge_opencl_binaries(model_output_dirs,
                                              CL_COMPILED_BINARY_FILE_NAME,
                                              opencl_output_bin_path)
            # merge all model's opencl parameter together
            sh_commands.merge_opencl_parameters(model_output_dirs,
                                                CL_TUNED_PARAMETER_FILE_NAME,
                                                opencl_parameter_bin_path)
Пример #2
0
    def bm_specific_target(self, flags, configs, target_abi):
        library_name = configs[YAMLKeyword.library_name]
        embed_model_data = \
            configs[YAMLKeyword.model_data_format] == ModelFormat.code
        opencl_output_bin_path = ''
        opencl_parameter_path = ''
        link_dynamic = flags.mace_lib_type == MACELibType.dynamic

        if link_dynamic:
            bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
        else:
            bm_model_binary_name = BM_MODEL_STATIC_NAME
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
        if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
            opencl_output_bin_path = get_opencl_binary_output_path(
                library_name, target_abi, self)
            opencl_parameter_path = get_opencl_parameter_output_path(
                library_name, target_abi, self)

        for model_name in configs[YAMLKeyword.models]:
            check_model_converted(library_name, model_name,
                                  configs[YAMLKeyword.model_graph_format],
                                  configs[YAMLKeyword.model_data_format],
                                  target_abi)
            MaceLogger.header(
                StringFormatter.block('Benchmark model %s on %s' %
                                      (model_name, self.device_name)))
            model_config = configs[YAMLKeyword.models][model_name]
            model_runtime = model_config[YAMLKeyword.runtime]
            subgraphs = model_config[YAMLKeyword.subgraphs]

            model_output_base_dir, model_output_dir, mace_model_dir = \
                get_build_model_dirs(library_name, model_name,
                                     target_abi, self,
                                     model_config[YAMLKeyword.model_file_path])
            if os.path.exists(model_output_dir):
                sh.rm('-rf', model_output_dir)
            os.makedirs(model_output_dir)

            if target_abi != ABIType.host:
                self.clear_data_dir()
            sh_commands.gen_random_input(
                model_output_dir,
                subgraphs[0][YAMLKeyword.input_tensors],
                subgraphs[0][YAMLKeyword.input_shapes],
                subgraphs[0][YAMLKeyword.validation_inputs_data],
                input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
                input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
            runtime_list = []
            if target_abi == ABIType.host:
                runtime_list.append(RuntimeType.cpu)
            elif model_runtime == RuntimeType.cpu_gpu:
                runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu])
            else:
                runtime_list.append(model_runtime)
            for runtime in runtime_list:
                device_type = parse_device_type(runtime)
                self.benchmark_model(
                    abi=target_abi,
                    benchmark_binary_dir=build_tmp_binary_dir,
                    benchmark_binary_name=bm_model_binary_name,
                    vlog_level=0,
                    embed_model_data=embed_model_data,
                    model_output_dir=model_output_dir,
                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
                    output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
                    output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
                    mace_model_dir=mace_model_dir,
                    model_tag=model_name,
                    device_type=device_type,
                    model_graph_format=configs[YAMLKeyword.model_graph_format],
                    omp_num_threads=flags.omp_num_threads,
                    cpu_affinity_policy=flags.cpu_affinity_policy,
                    gpu_perf_hint=flags.gpu_perf_hint,
                    gpu_priority_hint=flags.gpu_priority_hint,
                    opencl_binary_file=opencl_output_bin_path,
                    opencl_parameter_file=opencl_parameter_path,
                    libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
                    link_dynamic=link_dynamic)
Пример #3
0
    def tuning(self, library_name, model_name, model_config,
               model_graph_format, model_data_format, target_abi,
               mace_lib_type):
        six.print_('* Tuning, it may take some time')
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
        mace_run_name = MACE_RUN_STATIC_NAME
        link_dynamic = False
        if mace_lib_type == MACELibType.dynamic:
            mace_run_name = MACE_RUN_DYNAMIC_NAME
            link_dynamic = True
        embed_model_data = model_data_format == ModelFormat.code

        # build for specified soc
        # device_wrapper = DeviceWrapper(device)

        model_output_base_dir, model_output_dir, mace_model_dir = \
            get_build_model_dirs(
                library_name, model_name, target_abi, self,
                model_config[YAMLKeyword.model_file_path])

        self.clear_data_dir()

        subgraphs = model_config[YAMLKeyword.subgraphs]
        # generate input data
        sh_commands.gen_random_input(
            model_output_dir,
            subgraphs[0][YAMLKeyword.input_tensors],
            subgraphs[0][YAMLKeyword.input_shapes],
            subgraphs[0][YAMLKeyword.validation_inputs_data],
            input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
            input_data_types=subgraphs[0][YAMLKeyword.input_data_types])

        self.tuning_run(
            abi=target_abi,
            target_dir=build_tmp_binary_dir,
            target_name=mace_run_name,
            vlog_level=0,
            embed_model_data=embed_model_data,
            model_output_dir=model_output_dir,
            input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
            output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
            input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
            output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
            mace_model_dir=mace_model_dir,
            model_tag=model_name,
            device_type=DeviceType.GPU,
            running_round=0,
            restart_round=1,
            limit_opencl_kernel_time=model_config[
                YAMLKeyword.limit_opencl_kernel_time],
            tuning=True,
            out_of_range_check=False,
            model_graph_format=model_graph_format,
            opencl_binary_file='',
            opencl_parameter_file='',
            libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
            link_dynamic=link_dynamic,
        )

        # pull opencl library
        self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME,
                  '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))

        # pull opencl parameter
        self.pull_from_data_dir(
            CL_TUNED_PARAMETER_FILE_NAME,
            '{}/{}'.format(model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))

        six.print_('Tuning done! \n')