def _get_inputs_data(self, data_dir, inputs_tensor_info): inputs_map = {} if "" == self.args.input_path: for i, tensor_info in enumerate(inputs_tensor_info): input_data = np.random.random(tensor_info["shape"]).astype( self._convert_to_numpy_type(tensor_info["type"])) inputs_map[tensor_info["name"]] = input_data file_name = "input_" + str(i) + ".bin" input_data.tofile(os.path.join(data_dir, file_name)) utils.print_info_log( "save input file name: {}, shape: {}, dtype: {}".format( file_name, input_data.shape, input_data.dtype)) else: input_path = self.args.input_path.split(",") if len(inputs_tensor_info) != len(input_path): utils.print_error_log( "the number of model inputs tensor_info is not equal the number of " "inputs data, inputs tensor_info is: {}, inputs data is: {}" .format(len(inputs_tensor_info), len(input_path))) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR) for i, tensor_info in enumerate(inputs_tensor_info): input_data = np.fromfile( input_path[i], self._convert_to_numpy_type(tensor_info["type"])).reshape( tensor_info["shape"]) inputs_map[tensor_info["name"]] = input_data utils.print_info_log( "load input file name: {}, shape: {}, dtype: {}".format( os.path.basename(input_path[i]), input_data.shape, input_data.dtype)) return inputs_map
def verify_and_adapt_dynamic_shape(input_shapes, op_name, tensor): """ verify and adapt dynamic shape """ try: model_shape = list(tensor.shape) except ValueError: tensor.set_shape(input_shapes.get(op_name)) return tensor if op_name in input_shapes: fixed_tensor_shape = input_shapes.get(op_name) message = "The fixed input tensor dim not equal to model input dim." \ "tensor_name:%s, %s vs %s" % (op_name, str(fixed_tensor_shape), str(model_shape)) if len(fixed_tensor_shape) != len(model_shape): utils.print_error_log(message) raise utils.AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR) for index, dim in enumerate(model_shape): fixed_tensor_dim = int(fixed_tensor_shape[index]) if dim is not None and fixed_tensor_dim != dim: utils.print_error_log(message) raise utils.AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR) model_shape[index] = fixed_tensor_dim utils.print_info_log("Fix dynamic input shape of %s to %s" % (op_name, model_shape)) tensor.set_shape(model_shape) return tensor
def _make_inputs_data(self, inputs_tensor): if "" == self.args.input_path: input_path_list = [] for index, tensor in enumerate(inputs_tensor): if not tensor.shape: utils.print_error_log( "The shape of %s is unknown. Please usr -i to assign the input path." % tensor.name) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_BIN_FILE_ERROR) input_data = np.random.random(tf_common.convert_tensor_shape(tensor.shape)) \ .astype(tf_common.convert_to_numpy_type(tensor.dtype)) input_path = os.path.join(self.data_dir, "input_" + str(index) + ".bin") input_path_list.append(input_path) try: input_data.tofile(input_path) except Exception as err: utils.print_error_log("Failed to generate data %s. %s" % (input_path, err)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_BIN_FILE_ERROR) utils.print_info_log( "file name: {}, shape: {}, dtype: {}".format( input_path, input_data.shape, input_data.dtype)) self.input_path = ','.join(input_path_list) else: input_path = self.args.input_path.split(",") if len(inputs_tensor) != len(input_path): utils.print_error_log( "the number of model inputs tensor is not equal the number of " "inputs data, inputs tensor is: {}, inputs data is: {}". format(len(inputs_tensor), len(input_path))) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR)
def get_inputs_tensor(global_graph, input_shape_str): """ get input tensor """ input_shapes = utils.parse_input_shape(input_shape_str) inputs_tensor = [] tensor_index = {} operations = global_graph.get_operations() op_names = [op.name for op in operations if "Placeholder" == op.type] print(op_names) for _, tensor_name in enumerate(input_shapes): utils.check_input_name_in_model(op_names, tensor_name) for op in operations: # the operator with the 'Placeholder' type is the input operator of the model if "Placeholder" == op.type: op_name = op.name if op_name in tensor_index: tensor_index[op_name] += 1 else: tensor_index[op_name] = 0 tensor = global_graph.get_tensor_by_name( op.name + ":" + str(tensor_index[op_name])) tensor = verify_and_adapt_dynamic_shape(input_shapes, op.name, tensor) inputs_tensor.append(tensor) utils.print_info_log("model inputs tensor:\n{}\n".format(inputs_tensor)) return inputs_tensor
def _correct_the_wrong_order(left_index, right_index, golden_net_output_info): if left_index != right_index: tmp = golden_net_output_info[left_index] golden_net_output_info[left_index] = golden_net_output_info[ right_index] golden_net_output_info[right_index] = tmp utils.print_info_log( "swap the {} and {} item in golden_net_output_info!".format( left_index, right_index))
def execute_command(cmd: str): """ Execute shell command :param cmd: command :return: status code """ if cmd is None: utils.print_error_log("Command is None.") return -1 utils.print_info_log("[Run CMD]: %s" % cmd) complete_process = subprocess.run(cmd, shell=True) return complete_process.returncode
def msame_run(self, msame_dir): """ Function Description: run msame project Parameter: msame_dir: msame project directory Return Value: npu dump data path Exception Description: when invalid npu dump data path throw exception """ self._compare_shape_vs_bin_file() npu_data_output_dir = os.path.join(self.arguments.out_path, NPU_DUMP_DATA_BASE_PATH) utils.create_directory(npu_data_output_dir) model_name, extension = utils.get_model_name_and_extension( self.arguments.offline_model_path) acl_json_path = os.path.join(msame_dir, ACL_JSON_PATH) if not os.path.exists(acl_json_path): os.mknod(acl_json_path, mode=0o600) self._write_content_to_acl_json(acl_json_path, model_name, npu_data_output_dir) msame_cmd = [ "./" + MSAME_COMMAND_PATH, "--model", self.arguments.offline_model_path, "--input", self.arguments.input_path, "--device", self.arguments.device, "--output", npu_data_output_dir ] self._make_msame_cmd_for_shape_range(msame_cmd) os.chdir(os.path.join(msame_dir, OUT_PATH)) # do msame command utils.print_info_log( "Run command line: cd %s && %s" % (os.path.join(msame_dir, OUT_PATH), " ".join(msame_cmd))) utils.execute_command(msame_cmd) npu_dump_data_path, file_is_exist = utils.get_dump_data_path( npu_data_output_dir) if not file_is_exist: utils.print_error_log("The path {} dump data is not exist.".format( npu_dump_data_path)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_PATH_ERROR) # net output data path npu_net_output_data_path, file_is_exist = utils.get_dump_data_path( npu_data_output_dir, True) if not file_is_exist: utils.print_error_log( "The path {} net output data is not exist.".format( npu_net_output_data_path)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_PATH_ERROR) self._convert_net_output_to_numpy(npu_net_output_data_path) return npu_dump_data_path, npu_net_output_data_path
def main(): """ Function Description: main process function Exception Description: exit the program when an AccuracyCompare Exception occurs """ parser = argparse.ArgumentParser() _accuracy_compare_parser(parser) args = parser.parse_args(sys.argv[1:]) args.model_path = os.path.realpath(args.model_path) args.offline_model_path = os.path.realpath(args.offline_model_path) args.cann_path = os.path.realpath(args.cann_path) try: utils.check_file_or_directory_path(os.path.realpath(args.out_path), True) time_dir = time.strftime("%Y%m%d%H%M%S", time.localtime()) args.out_path = os.path.realpath(os.path.join(args.out_path, time_dir)) utils.check_file_or_directory_path(args.model_path) utils.check_file_or_directory_path(args.offline_model_path) utils.check_device_param_valid(args.device) # generate dump data by the original model golden_dump = _generate_golden_data_model(args) golden_dump_data_path = golden_dump.generate_dump_data() golden_net_output_info = golden_dump.get_net_output_info() # convert the om model to json output_json_path = AtcUtils(args).convert_model_to_json() # compiling and running source codes npu_dump = NpuDumpData(args, output_json_path) npu_dump_data_path, npu_net_output_data_path = npu_dump.generate_dump_data( ) expect_net_output_node = npu_dump.get_expect_output_name() # compare the entire network net_compare = NetCompare(npu_dump_data_path, golden_dump_data_path, output_json_path, args) net_compare.accuracy_network_compare() # Check and correct the mapping of net output node name. _check_output_node_name_mapping(expect_net_output_node, golden_net_output_info) net_compare.net_output_compare(npu_net_output_data_path, golden_net_output_info) # print the name of the first operator whose cosine similarity is less than 0.9 csv_object_item = net_compare.get_csv_object_by_cosine() if csv_object_item is not None: utils.print_info_log( "{} of the first operator whose cosine similarity is less than 0.9" .format(csv_object_item.get("LeftOp"))) else: utils.print_info_log( "No operator whose cosine value is less then 0.9 exists.") except utils.AccuracyCompareException as error: sys.exit(error.error_info)
def _load_graph(self): try: with tf.io.gfile.GFile(self.args.model_path, 'rb') as f: global_graph_def = tf.compat.v1.GraphDef.FromString(f.read()) self.global_graph = tf.Graph() with self.global_graph.as_default(): tf.import_graph_def(global_graph_def, name='') except Exception as err: utils.print_error_log("Failed to load the model %s. %s" % (self.args.model_path, err)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_OPEN_FILE_ERROR) utils.print_info_log("Load the model %s successfully." % self.args.model_path)
def _get_outputs_tensor(self): input_nodes, node_list = self._get_all_node_and_input_node() outputs_tensor = [] if self.args.output_nodes: outputs_tensor = self.args.output_nodes.strip().split(';') self._check_output_nodes_valid(outputs_tensor, node_list) else: output_nodes = list(set(node_list).difference(set(input_nodes))) for name in output_nodes: if self._check_node_output(name): outputs_tensor.append(name + ":0") utils.print_info_log( "The outputs tensor:\n{}\n".format(outputs_tensor)) return outputs_tensor
def _get_inputs_tensor_info(self, session): inputs_tensor_info = [] # 'session' is a class of 'onnxruntime.InferenceSession' # 'input' is a class of 'onnxruntime.NodeArg' input_tensor_names = [item.name for item in session.get_inputs()] for _, tensor_name in enumerate(self.input_shapes): utils.check_input_name_in_model(input_tensor_names, tensor_name) for input_item in session.get_inputs(): tensor_name = input_item.name tensor_type = input_item.type tensor_shape = tuple(input_item.shape) if utils.check_dynamic_shape(tensor_shape): if not self.input_shapes: utils.print_error_log( "The dynamic shape {} are not supported. Please " "set '-s' or '--input-shape' to fix the dynamic shape." .format(tensor_shape)) raise utils.AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_PARAM_ERROR) if self.input_shapes and tensor_name in self.input_shapes: input_shape = self.input_shapes.get(tensor_name) try: number_shape = [int(dim) for dim in input_shape] except (ValueError, TypeError): utils.print_error_log( utils.get_shape_not_match_message( InputShapeError.FORMAT_NOT_MATCH, self.args.input_shape)) raise utils.AccuracyCompareException( utils.ACCURACY_COMPARISON_INVALID_PARAM_ERROR) self._check_input_shape_fix_value(tensor_name, tensor_shape, number_shape) tensor_info = { "name": tensor_name, "shape": tuple(number_shape), "type": tensor_type } utils.print_info_log("Fix dynamic input shape of %s to %s" % (tensor_name, number_shape)) else: tensor_info = { "name": tensor_name, "shape": tensor_shape, "type": tensor_type } inputs_tensor_info.append(tensor_info) utils.print_info_log( "model inputs tensor info:\n{}\n".format(inputs_tensor_info)) return inputs_tensor_info
def _save_dump_data(self, dump_bins, onnx_dump_data_dir, old_onnx_model, net_output_node): res_idx = 0 for node in old_onnx_model.graph.node: for j, output in enumerate(node.output): file_name = node.name.replace('.', '_').replace('/', '_') + "." + str(j) + "." \ + str(round(time.time() * 1000000)) + ".npy" file_path = os.path.join(onnx_dump_data_dir, file_name) if output in net_output_node: self.net_output[net_output_node.index(output)] = file_path np.save(file_path, dump_bins[res_idx]) res_idx += 1 for key, value in self.net_output.items(): utils.print_info_log( "net_output node is:{}, file path is {}".format(key, value)) utils.print_info_log("dump data success")
def _modify_model_add_outputs_nodes(self, model_dir): old_onnx_model = onnx.load(self.args.model_path) utils.print_info_log("load model success") for index, node in enumerate(old_onnx_model.graph.node): if not node.name: node.name = node.op_type + "_" + str(index) outputs_name = [ name for name in enumerate_model_node_outputs(old_onnx_model) ] new_onnx_model = select_model_inputs_outputs(old_onnx_model, outputs_name) new_onnx_model_path = os.path.join( model_dir, "new_" + os.path.basename(self.args.model_path)) save_onnx_model(new_onnx_model, new_onnx_model_path) utils.print_info_log("modify model outputs success") return old_onnx_model, new_onnx_model_path
def get_inputs_data(inputs_tensor, input_paths): inputs_map = {} input_path = input_paths.split(",") for index, tensor in enumerate(inputs_tensor): try: input_data = np.fromfile(input_path[index], convert_to_numpy_type(tensor.dtype)) if tensor.shape: input_data = input_data.reshape(tensor.shape) inputs_map[tensor] = input_data utils.print_info_log( "load file name: {}, shape: {}, dtype: {}".format( os.path.basename(input_path[index]), input_data.shape, input_data.dtype)) except Exception as err: utils.print_error_log("Failed to load data %s. %s" % (input_path[index], err)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_BIN_FILE_ERROR) return inputs_map
def execute_msaccucmp_command(self, cmd, catch=False): """ Function Description: run the following command Parameter: cmd: command Return Value: status code """ utils.print_info_log('Execute command:%s' % cmd) result = [] process = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) while process.poll() is None: line = process.stdout.readline() line = line.strip() if line: print(line) compare_result = self._catch_compare_result(line, catch) result = compare_result if compare_result else result return process.returncode, result
def net_output_compare(self, npu_net_output_data_path, golden_net_output_info): """ net_output_compare """ if not golden_net_output_info: return npu_dump_file = {} file_index = 0 cmd = ["python3", "-V"] python_version = self._check_python_command_valid(cmd) msaccucmp_command_dir_path = os.path.join(self.arguments.cann_path, MSACCUCMP_DIR_PATH) msaccucmp_command_file_path = self._check_msaccucmp_file(msaccucmp_command_dir_path) utils.print_info_log("=================================compare Node_output=================================") utils.print_info_log("start to compare the Node_output at now, compare result is:") utils.print_warn_log("The comparison of Node_output may be incorrect in certain scenarios. If the precision" " is abnormal, please check whether the mapping between the comparison" " data is correct.") for dir_path, subs_paths, files in os.walk(npu_net_output_data_path): for each_file in sorted(files): if each_file.endswith(".npy"): npu_dump_file[file_index] = os.path.join(dir_path, each_file) msaccucmp_cmd = ["python" + python_version, msaccucmp_command_file_path, "compare", "-m", npu_dump_file.get(file_index), "-g", golden_net_output_info.get(file_index)] status, compare_result = self.execute_msaccucmp_command(msaccucmp_cmd, True) if status == 2 or status == 0: self.save_net_output_result_to_csv(npu_dump_file.get(file_index), golden_net_output_info.get(file_index), compare_result) utils.print_info_log("Compare Node_output:{} completely.".format(file_index)) else: utils.print_error_log("Failed to execute command: %s" % " ".join(msaccucmp_cmd)) raise AccuracyCompareException(utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR) file_index += 1 return
def accuracy_network_compare(self): """ Function Description: invoke the interface for network-wide comparsion Exception Description: when invalid msaccucmp command throw exception """ cmd = ["python3", "-V"] python_version = self._check_python_command_valid(cmd) msaccucmp_command_dir_path = os.path.join(self.arguments.cann_path, MSACCUCMP_DIR_PATH) msaccucmp_command_file_path = self._check_msaccucmp_file(msaccucmp_command_dir_path) self._check_pyc_to_python_version(msaccucmp_command_file_path, python_version) msaccucmp_cmd = ["python" + python_version, msaccucmp_command_file_path, "compare", "-m", self.npu_dump_data_path, "-g", self.cpu_dump_data_path, "-f", self.output_json_path, "-out", self.arguments.out_path] utils.print_info_log("msaccucmp command line: %s " % " ".join(msaccucmp_cmd)) status_code, _ = self.execute_msaccucmp_command(msaccucmp_cmd) if status_code == 2 or status_code == 0: utils.print_info_log("Finish compare the files in directory %s with those in directory %s." % ( self.npu_dump_data_path, self.cpu_dump_data_path)) else: utils.print_error_log("Failed to execute command: %s" % " ".join(msaccucmp_cmd)) raise AccuracyCompareException(utils.ACCURACY_COMPARISON_INVALID_DATA_ERROR)
def msame_compile(self, msame_dir): """ Function Description: compile msame project Parameter: msame_dir: msame project directory """ execute_path = os.path.join(msame_dir, OUT_PATH, MSAME_COMMAND_PATH) if os.path.exists(execute_path): utils.print_info_log( "The execute file %s exist. Skip the compile step." % execute_path) return utils.print_info_log("Start to compile %s" % msame_dir) out_path = os.path.join(msame_dir, OUT_PATH) build_sh_cmd = ["sh", BUILD_SH, "g++", out_path] os.chdir(msame_dir) # do build.sh command utils.print_info_log("Run command line: cd %s && %s" % (msame_dir, " ".join(build_sh_cmd))) utils.execute_command(build_sh_cmd) utils.print_info_log("Finish to compile %s." % msame_dir)
def _run_tf_dbg_dump(self, cmd_line): """Run tf debug with pexpect, should set tf debug ui_type='readline'""" tf_dbg = pexpect.spawn(cmd_line) tf_dbg.logfile = sys.stdout.buffer try: tf_dbg.expect('tfdbg>', timeout=tf_common.TF_DEBUG_TIMEOUT) utils.print_info_log("Start to run. Please wait....") tf_dbg.sendline('run') index = tf_dbg.expect( ['An error occurred during the run', 'tfdbg>'], timeout=tf_common.TF_DEBUG_TIMEOUT) if index == 0: raise AccuracyCompareException( utils.ACCURACY_COMPARISON_PYTHON_COMMAND_ERROR) except Exception as ex: tf_dbg.sendline('exit') utils.print_error_log("Failed to run command: %s. %s" % (cmd_line, ex)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_PYTHON_COMMAND_ERROR) tensor_name_path = os.path.join(self.tmp_dir, 'tf_tensor_names.txt') tf_dbg.sendline('lt > %s' % tensor_name_path) tf_dbg.expect('tfdbg>', timeout=tf_common.TF_DEBUG_TIMEOUT) if not os.path.exists(tensor_name_path): tf_dbg.sendline('exit') utils.print_error_log("Failed to save tensor name to file.") raise AccuracyCompareException( utils.ACCURACY_COMPARISON_PYTHON_COMMAND_ERROR) utils.print_info_log("Save tensor name to %s successfully." % tensor_name_path) pt_command_list = self._make_pt_command(tensor_name_path) utils.print_info_log("Start to run %d pt commands. Please wait..." % len(pt_command_list)) for cmd in pt_command_list: tf_dbg.sendline(cmd.strip()) tf_dbg.expect('tfdbg>', timeout=tf_common.TF_DEBUG_TIMEOUT) tf_dbg.sendline('exit') utils.print_info_log('Finish dump tf data.')
def convert_model_to_json(self): """ Function Description: convert om model to json Return Value: output json path Exception Description: when the model type is wrong throw exception """ model_name, extension = utils.get_model_name_and_extension( self.arguments.offline_model_path) if ".om" != extension: utils.print_error_log( 'The offline model file ends with an .om file.Please check {} file.' .format(self.arguments.offline_model_path)) raise AccuracyCompareException( utils.ACCURACY_COMPARISON_MODEL_TYPE_ERROR) utils.check_file_or_directory_path( (os.path.realpath(self.arguments.cann_path)), True) atc_command_file_path = os.path.join(self.arguments.cann_path, ATC_FILE_PATH) utils.check_file_or_directory_path(atc_command_file_path) output_json_path = os.path.join(self.arguments.out_path, "model", model_name + ".json") # do the atc command to convert om to json utils.print_info_log('Start to converting the model to json') atc_cmd = [ atc_command_file_path, "--mode=1", "--om=" + self.arguments.offline_model_path, "--json=" + output_json_path ] utils.print_info_log("ATC command line %s" % " ".join(atc_cmd)) utils.execute_command(atc_cmd) utils.print_info_log("Complete model conversion to json %s." % output_json_path) return output_json_path