def _print_result(self, infer_output): num = acl.mdl.get_dataset_num_buffers(infer_output) for i in range(num): temp_output_buf = acl.mdl.get_dataset_buffer(infer_output, i) infer_output_ptr = acl.get_data_buffer_addr(temp_output_buf) infer_output_size = acl.get_data_buffer_size_v2(temp_output_buf) output_host, _ = acl.rt.malloc_host(infer_output_size) acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST) if "ptr_to_bytes" in dir(acl.util): bytes_data = acl.util.ptr_to_bytes(output_host, infer_output_size) result = np.frombuffer(bytes_data, dtype=np.byte) else: result = acl.util.ptr_to_numpy(output_host, (infer_output_size,), NPY_BYTE) tuple_st = struct.unpack("1000f", bytearray(result)) vals = np.array(tuple_st).flatten() top_k = vals.argsort()[-1:-6:-1] possible = 0 print("\n======== top5 inference results: ========") for j in top_k: print("label:%d prob: %f" % (j, vals[j])) possible += vals[j] ret = acl.rt.free_host(output_host) check_ret("acl.rt.free_host", ret)
def callback_func(self, delete_list): print('callback func stage:') for temp in delete_list: _, infer_output = temp # device to host num = acl.mdl.get_dataset_num_buffers(infer_output) for i in range(num): temp_output_buf = acl.mdl.get_dataset_buffer(infer_output, i) infer_output_ptr = acl.get_data_buffer_addr(temp_output_buf) infer_output_size = acl.get_data_buffer_size_v2(temp_output_buf) output_host, ret = acl.rt.malloc_host(infer_output_size) check_ret("acl.rt.malloc_host", ret) ret = acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST) check_ret("acl.rt.memcpy", ret) output_host_dict = [ {"buffer": output_host, "size": infer_output_size}] result = self.get_result(output_host_dict) vals = np.array(result).flatten() top_k = vals.argsort()[-1:-6:-1] print("\n======== top5 inference results: =============") for j in top_k: print("[%d]: %f" % (j, vals[j])) print('callback func stage success')
def _output_dataset_to_numpy(self): dataset = [] plicy = None if self._run_mode == ACL_HOST: policy = ACL_MEMCPY_DEVICE_TO_HOST else: policy = ACL_MEMCPY_DEVICE_TO_DEVICE num = acl.mdl.get_dataset_num_buffers(self.output_dataset) for i in range(num): buffer = acl.mdl.get_dataset_buffer(self.output_dataset, i) data = acl.get_data_buffer_addr(buffer) size = int(acl.get_data_buffer_size(buffer)) output_ptr = self._output_info[i]["ptr"] output_tensor = self._output_info[i]["tensor"] ret = acl.rt.memcpy(output_ptr, output_tensor.size * output_tensor.itemsize, data, size, policy) if ret != ACL_ERROR_NONE: print("Memcpy inference output to local failed") return None dataset.append(output_tensor) return dataset
def _output_dataset_to_numpy(self): dataset = [] output_tensor_list = self._gen_output_tensor() num = acl.mdl.get_dataset_num_buffers(self._output_dataset) for i in range(num): buf = acl.mdl.get_dataset_buffer(self._output_dataset, i) data = acl.get_data_buffer_addr(buf) size = int(acl.get_data_buffer_size(buf)) output_ptr = output_tensor_list[i]["ptr"] output_data = output_tensor_list[i]["tensor"] if isinstance(output_data, bytes): data_size = len(output_data) else: data_size = output_data.size * output_data.itemsize ret = acl.rt.memcpy(output_ptr, data_size, data, size, self._copy_policy) if ret != const.ACL_SUCCESS: log_error("Memcpy inference output to local failed") return None if isinstance(output_data, bytes): output_data = np.frombuffer( output_data, dtype=output_tensor_list[i]["dtype"]).reshape( output_tensor_list[i]["shape"]) output_tensor = output_data.copy() else: output_tensor = output_data dataset.append(output_tensor) return dataset
def _print_result(self, infer_output): num = acl.mdl.get_dataset_num_buffers(infer_output) for i in range(num): dims, ret = acl.mdl.get_cur_output_dims(self.model_desc, i) check_ret("acl.mdl.get_cur_output_dims", ret) out_dim = dims['dims'] temp_output_buffer = acl.mdl.get_dataset_buffer(infer_output, i) infer_output_ptr = acl.get_data_buffer_addr(temp_output_buffer) infer_output_size = acl.get_data_buffer_size_v2(temp_output_buffer) output_host, _ = acl.rt.malloc_host(infer_output_size) acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST) if "ptr_to_bytes" in dir(acl.util): bytes_data = acl.util.ptr_to_bytes(output_host, infer_output_size) result = np.frombuffer(bytes_data, dtype=np.float32).reshape( tuple(out_dim)) else: result = acl.util.ptr_to_numpy(output_host, tuple(out_dim), NPY_FLOAT32) vals = np.array(result).flatten() top_k = vals.argsort()[-1:-6:-1] possible = 0 print("\n========= top5 inference results: =========") for j in top_k: print("label:%d prob: %f" % (j, vals[j])) possible += vals[j] print("result: class_label[{}],top1[{:f}],top5[{:f}] ".format( top_k[0], vals[top_k[0]], possible if possible < 1 else 1)) ret = acl.rt.free_host(output_host) check_ret("acl.rt.free_host", ret)
def _release_databuffer(self, data_buffer, free_memory=False): if free_memory: data_addr = acl.get_data_buffer_addr(data_buffer) if data_addr: acl.rt.free(data_addr) ret = acl.destroy_data_buffer(data_buffer) if ret != const.ACL_ERROR_NONE: log_error("Destroy data buffer error ", ret)
def _destroy_dataset_and_databuf(self, ): while self.dataset_list: dataset = self.dataset_list.pop() for temp in dataset: num_temp = acl.mdl.get_dataset_num_buffers(temp) for i in range(num_temp): data_buf_temp = acl.mdl.get_dataset_buffer(temp, i) if data_buf_temp: data = acl.get_data_buffer_addr(data_buf_temp) ret = acl.rt.free(data) check_ret("acl.rt.free", ret) ret = acl.destroy_data_buffer(data_buf_temp) check_ret("acl.destroy_data_buffer", ret) ret = acl.mdl.destroy_dataset(temp) check_ret("acl.mdl.destroy_dataset", ret)
def _output_dataset_to_numpy(self): dataset = [] num = acl.mdl.get_dataset_num_buffers(self.output_dataset) for i in range(num): buffer = acl.mdl.get_dataset_buffer(self.output_dataset, i) data = acl.get_data_buffer_addr(buffer) size = acl.get_data_buffer_size(buffer) if self._run_mode == ACL_HOST: data = copy_data_device_to_host(data, size) data_array = acl.util.ptr_to_numpy(data, (size, ), NPY_BYTE) data_array = self._unpack_output_item(data_array, self._output_info[i]["shape"], self._output_info[i]["type"]) dataset.append(data_array) return dataset
def _release_dataset(self): for dataset in [self.input_dataset, self.output_data]: if not dataset: continue # 获取data buffer的个数并逐一释放 num = acl.mdl.get_dataset_num_buffers(dataset) for i in range(num): data_buf = acl.mdl.get_dataset_buffer(dataset, i) if data_buf: data = acl.get_data_buffer_addr(data_buf) ret = acl.rt.free(data) check_ret("acl.rt.free", ret) ret = acl.destroy_data_buffer(data_buf) check_ret("acl.destroy_data_buffer", ret) ret = acl.mdl.destroy_dataset(dataset) check_ret("acl.mdl.destroy_dataset", ret)
def _release_dataset(self): """ Release dataset buffer for both input and output. """ for dataset in [self.input_dataset, self.output_data]: if not dataset: continue num = acl.mdl.get_dataset_num_buffers(dataset) for i in range(num): data_buf = acl.mdl.get_dataset_buffer(dataset, i) if data_buf: data = acl.get_data_buffer_addr(data_buf) ret = acl.rt.free(data) check_ret("acl.rt.free", ret) ret = acl.destroy_data_buffer(data_buf) check_ret("acl.destroy_data_buffer", ret) ret = acl.mdl.destroy_dataset(dataset) check_ret("acl.mdl.destroy_dataset", ret)
def _output_dataset_to_numpy(self): dataset = [] num = acl.mdl.get_dataset_num_buffers(self.output_dataset) for i in range(num): outbuffer = acl.mdl.get_dataset_buffer(self.output_dataset, i) data = acl.get_data_buffer_addr(outbuffer) size = acl.get_data_buffer_size(outbuffer) narray = np.zeros(size, dtype=np.byte) narray_ptr = acl.util.numpy_to_ptr(narray) ret = acl.rt.memcpy(narray_ptr, narray.size * narray.itemsize, data, size, ACL_MEMCPY_DEVICE_TO_DEVICE) if ret != ACL_ERROR_NONE: print("Memcpy inference output to local failed") return None output_nparray = self._unpack_bytes_array( narray, self._output_info[i]["shape"], self._output_info[i]["type"]) dataset.append(output_nparray) return dataset
def _output_dataset_to_numpy(self): dataset = [] num = acl.mdl.get_dataset_num_buffers(self.output_dataset) # iterative each output for i in range(num): # obtain memory address from output buffer buffer = acl.mdl.get_dataset_buffer(self.output_dataset, i) data = acl.get_data_buffer_addr(buffer) size = int(acl.get_data_buffer_size(buffer)) output_ptr = self._output_info[i]["ptr"] output_tensor = self._output_info[i]["tensor"] ret = acl.rt.memcpy(output_ptr, output_tensor.size*output_tensor.itemsize, data, size, ACL_MEMCPY_DEVICE_TO_DEVICE) if ret != ACL_ERROR_NONE: print("Memcpy inference output to local failed") return None dataset.append(output_tensor) return dataset
def _output_dataset_to_numpy(self): dataset = [] output_tensor_list = self._gen_output_tensor() num = acl.mdl.get_dataset_num_buffers(self._output_dataset) for i in range(num): buf = acl.mdl.get_dataset_buffer(self._output_dataset, i) data = acl.get_data_buffer_addr(buf) size = int(acl.get_data_buffer_size(buf)) output_ptr = output_tensor_list[i]["ptr"] output_tensor = output_tensor_list[i]["tensor"] ret = acl.rt.memcpy(output_ptr, output_tensor.size * output_tensor.itemsize, data, size, self._copy_policy) if ret != const.ACL_ERROR_NONE: log_error("Memcpy inference output to local failed") return None dataset.append(output_tensor) return dataset
def _get_result(self, infer_output): """ Transfer output result from device to host and decode it as numpy array. infer_output: The output dataset buffer for inference model. """ output = [] num = acl.mdl.get_dataset_num_buffers(infer_output) for i in range(num): temp_output_buf = acl.mdl.get_dataset_buffer(infer_output, i) infer_output_ptr = acl.get_data_buffer_addr(temp_output_buf) infer_output_size = acl.get_data_buffer_size_v2(temp_output_buf) output_host, _ = acl.rt.malloc_host(infer_output_size) acl.rt.memcpy(output_host, infer_output_size, infer_output_ptr, infer_output_size, ACL_MEMCPY_DEVICE_TO_HOST) result = acl.util.ptr_to_numpy(output_host, (infer_output_size, ), NPY_BYTE) "TODO: The unpack size depends on the cutlayer parameter. " result = struct.unpack("{:d}f".format(int(infer_output_size / 4)), bytearray(result)) # this is the ouput size output.append(result) ret = acl.rt.free_host(output_host) check_ret("acl.rt.free_host", ret) return np.array(output)
def _output_dataset_to_numpy(self): dataset = [] num = acl.mdl.get_dataset_num_buffers(self.output_dataset) #遍历每个输出 for i in range(num): #从输出buffer中获取输出数据内存地址 buffer = acl.mdl.get_dataset_buffer(self.output_dataset, i) data = acl.get_data_buffer_addr(buffer) size = acl.get_data_buffer_size(buffer) #创建一个numpy数组用于拷贝输出内存数据 narray = np.zeros(size, dtype=np.byte) narray_ptr = acl.util.numpy_to_ptr(narray) ret = acl.rt.memcpy(narray_ptr, narray.size * narray.itemsize, data, size, ACL_MEMCPY_DEVICE_TO_DEVICE) if ret != ACL_ERROR_NONE: print("Memcpy inference output to local failed") return None #根据模型输出的shape和数据类型,将内存数据解码为numpy数组 output_nparray = self._unpack_bytes_array( narray, self._output_info[i]["shape"], self._output_info[i]["type"]) dataset.append(output_nparray) return dataset