def run(self, abi, host_bin_path, bin_name, args='', opencl_profiling=True, vlog_level=0, out_of_range_check=True, address_sanitizer=False, simpleperf=False): host_bin_full_path = '%s/%s' % (host_bin_path, bin_name) device_bin_full_path = '%s/%s' % (self.data_dir, bin_name) print( '================================================================') print('Trying to lock device %s' % self.address) with self.lock(): print('Run on device: %s, %s, %s' % (self.address, self.target_socs, self.device_name)) self.rm(self.data_dir) self.exec_command('mkdir -p %s' % self.data_dir) self.push(host_bin_full_path, device_bin_full_path) ld_preload = '' if address_sanitizer: self.push(sh_commands.find_asan_rt_library(abi), self.data_dir) ld_preload = 'LD_PRELOAD=%s/%s' % \ (self.data_dir, sh_commands.asan_rt_library_names(abi)) opencl_profiling = 1 if opencl_profiling else 0 out_of_range_check = 1 if out_of_range_check else 0 print('Run %s' % device_bin_full_path) stdout_buf = [] process_output = sh_commands.make_output_processor(stdout_buf) if simpleperf and self.system == SystemType.android: self.push(sh_commands.find_simpleperf_library(abi), self.data_dir) simpleperf_cmd = '%s/simpleperf' % self.data_dir exec_cmd = [ ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check, 'MACE_OPENCL_PROFILING=%d' % opencl_profiling, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, simpleperf_cmd, 'stat', '--group', 'raw-l1-dcache,raw-l1-dcache-refill', '--group', 'raw-l2-dcache,raw-l2-dcache-refill', '--group', 'raw-l1-dtlb,raw-l1-dtlb-refill', '--group', 'raw-l2-dtlb,raw-l2-dtlb-refill', device_bin_full_path, args, ] else: exec_cmd = [ ld_preload, 'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check, 'MACE_OPENCL_PROFILNG=%d' % opencl_profiling, 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, device_bin_full_path, args ] exec_cmd = ' '.join(exec_cmd) self.exec_command(exec_cmd, _tty_in=True, _out=process_output, _err_to_out=True) return ''.join(stdout_buf)
def tuning_run( self, abi, target_dir, target_name, vlog_level, embed_model_data, model_output_dir, input_nodes, output_nodes, input_shapes, output_shapes, mace_model_dir, model_tag, device_type, running_round, restart_round, limit_opencl_kernel_time, tuning, out_of_range_check, model_graph_format, opencl_binary_file, opencl_parameter_file, libmace_dynamic_library_path, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, input_file_name='model_input', output_file_name='model_out', runtime_failure_ratio=0.0, address_sanitizer=False, link_dynamic=False, quantize_stat=False, ): six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " "out_of_range_check=%s, omp_num_threads=%s, " "cpu_affinity_policy=%s, gpu_perf_hint=%s, " "gpu_priority_hint=%s" % (model_tag, running_round, restart_round, str(tuning), str(out_of_range_check), omp_num_threads, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) mace_model_path = "" if model_graph_format == ModelFormat.file: mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) if self.system == SystemType.host: libmace_dynamic_lib_path = \ os.path.dirname(libmace_dynamic_library_path) p = subprocess.Popen([ "env", "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path, "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0), "%s/%s" % (target_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), "--output_shape=%s" % ":".join(output_shapes), "--input_file=%s/%s" % (model_output_dir, input_file_name), "--output_file=%s/%s" % (model_output_dir, output_file_name), "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag), "--device=%s" % device_type, "--round=%s" % running_round, "--restart_round=%s" % restart_round, "--omp_num_threads=%s" % omp_num_threads, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, "--model_file=%s" % mace_model_path, ], stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = p.communicate() self.stdout = err + out six.print_(self.stdout) six.print_("Running finished!\n") elif self.system in [SystemType.android, SystemType.arm_linux]: self.rm(self.data_dir) self.exec_command('mkdir -p {}'.format(self.data_dir)) internal_storage_dir = self.create_internal_storage_dir() for input_name in input_nodes: formatted_name = common.formatted_file_name( input_file_name, input_name) self.push("%s/%s" % (model_output_dir, formatted_name), self.data_dir) if self.system == SystemType.android and address_sanitizer: self.push(sh_commands.find_asan_rt_library(abi), self.data_dir) if not embed_model_data: model_data_path = "%s/%s.data" % (mace_model_dir, model_tag) mace_check( os.path.exists(model_data_path), "Device", 'model data file not found,' ' please convert model first') self.push(model_data_path, self.data_dir) if device_type == common.DeviceType.GPU: if os.path.exists(opencl_binary_file): self.push(opencl_binary_file, self.data_dir) if os.path.exists(opencl_parameter_file): self.push(opencl_parameter_file, self.data_dir) self.push("third_party/nnlib/libhexagon_controller.so", self.data_dir) mace_model_phone_path = "" if model_graph_format == ModelFormat.file: mace_model_phone_path = "%s/%s.pb" % (self.data_dir, model_tag) self.push(mace_model_path, mace_model_phone_path) if link_dynamic: self.push(libmace_dynamic_library_path, self.data_dir) if self.system == SystemType.android: sh_commands.push_depended_so_libs( libmace_dynamic_library_path, abi, self.data_dir, self.address) self.push("%s/%s" % (target_dir, target_name), self.data_dir) stdout_buff = [] process_output = sh_commands.make_output_processor(stdout_buff) cmd = [ "LD_LIBRARY_PATH=%s" % self.data_dir, "MACE_TUNING=%s" % int(tuning), "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check), "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir, "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir, "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, "MACE_LOG_TENSOR_RANGE=%d" % (1 if quantize_stat else 0), ] if self.system == SystemType.android and address_sanitizer: cmd.extend([ "LD_PRELOAD=%s/%s" % (self.data_dir, sh_commands.asan_rt_library_names(abi)) ]) cmd.extend([ "%s/%s" % (self.data_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), "--input_shape=%s" % ":".join(input_shapes), "--output_shape=%s" % ":".join(output_shapes), "--input_file=%s/%s" % (self.data_dir, input_file_name), "--output_file=%s/%s" % (self.data_dir, output_file_name), "--model_data_file=%s/%s.data" % (self.data_dir, model_tag), "--device=%s" % device_type, "--round=%s" % running_round, "--restart_round=%s" % restart_round, "--omp_num_threads=%s" % omp_num_threads, "--cpu_affinity_policy=%s" % cpu_affinity_policy, "--gpu_perf_hint=%s" % gpu_perf_hint, "--gpu_priority_hint=%s" % gpu_priority_hint, "--model_file=%s" % mace_model_phone_path, "--opencl_binary_file=%s/%s" % (self.data_dir, os.path.basename(opencl_binary_file)), "--opencl_parameter_file=%s/%s" % (self.data_dir, os.path.basename(opencl_parameter_file)), ]) cmd = ' '.join(cmd) cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time())) cmd_file = "%s/%s" % (self.data_dir, cmd_file_name) tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name) with open(tmp_cmd_file, 'w') as file: file.write(cmd) self.push(tmp_cmd_file, cmd_file) os.remove(tmp_cmd_file) self.exec_command('sh {}'.format(cmd_file), _tty_in=True, _out=process_output, _err_to_out=True) self.stdout = "".join(stdout_buff) if not sh_commands.stdout_success(self.stdout): common.MaceLogger.error("Mace Run", "Mace run failed.") six.print_("Running finished!\n") else: six.print_('Unsupported system %s' % self.system, file=sys.stderr) raise Exception('Wrong device') return self.stdout