def __init__(self, device_id, model_path, model_input_width, model_input_height): self.device_id = device_id # int self.model_path = model_path # string self.model_id = None # pointer self.context = None # pointer self.input_data = None self.output_data = None self.model_desc = None # pointer when using self.load_input_dataset = None self.load_output_dataset = None self.init_resource() self._model_input_width = model_input_width self._model_input_height = model_input_height self.model_process = Model(self.context, self.stream, self.model_path) self.dvpp_process = Dvpp(self.stream, model_input_width, model_input_height) self.sing_op = SingleOp(self.stream)
class Classify(object): """ define gesture class """ def __init__(self, acl_resource, model_path, model_width, model_height): self._model_path = model_path self._model_width = model_width self._model_height = model_height self._dvpp = Dvpp(acl_resource) self._model = Model(model_path) def __del__(self): if self._dvpp: del self._dvpp print("[Sample] class Samle release source success") def pre_process(self, image): """ pre_precess """ yuv_image = self._dvpp.jpegd(image) resized_image = self._dvpp.resize(yuv_image, self._model_width, self._model_height) print("resize yuv end") return resized_image def inference(self, resized_image): """ inference """ return self._model.execute([ resized_image, ]) def post_process(self, infer_output, image_file): """ post_process """ print("post process") data = infer_output[0] vals = data.flatten() top_k = vals.argsort()[-1:-6:-1] print("images:{}".format(image_file)) print("======== top5 inference results: =============") for n in top_k: object_class = image_net_classes.get_image_net_class(n) print("label:%d confidence: %f, class: %s" % (n, vals[n], object_class)) #using pillow, the category with the highest confidence is written on the image and saved locally if len(top_k): object_class = image_net_classes.get_image_net_class(top_k[0]) output_path = os.path.join(os.path.join(SRC_PATH, "../outputs"), os.path.basename(image_file)) origin_img = Image.open(image_file) draw = ImageDraw.Draw(origin_img) font = ImageFont.load_default() draw.text((10, 50), object_class, font=font, fill=255) origin_img.save(output_path)
def __init__(self, acl_resource, model_path, model_width, model_height): self.total_buffer = None self._model_path = model_path self._model_width = model_width self._model_height = model_height self._model = Model(model_path) self._dvpp = Dvpp(acl_resource) print("The App arg is __init__")
def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource) #创建yolov3网络的图像信息输入数据 self._image_info = np.array([model_width, model_height, model_width, model_height], dtype=np.float32)
def init(self): self._init_resource() self._dvpp = Dvpp(self.stream, self.run_mode) ret = self._dvpp.init_resource() if ret != SUCCESS: print("Init dvpp failed") return FAILED self._model = Model(self.run_mode, self._model_path) ret = self._model.init_resource() if ret != SUCCESS: print("Init model failed") return FAILED return SUCCESS
def __init__(self, device_id, model_path, vdec_out_path, model_input_width, model_input_height): self.device_id = device_id # int self.model_path = model_path # string self.context = None # pointer self.stream = None self.model_input_width = model_input_width self.model_input_height = model_input_height, self.init_resource() self.model_process = Model(self.context, self.stream, model_path) self.vdec_process = Vdec(self.context, self.stream, vdec_out_path) self.dvpp_process = Dvpp(self.stream, model_input_width, model_input_height) self.model_input_width = model_input_width self.model_input_height = model_input_height self.vdec_out_path = vdec_out_path
class Classify(object): def __init__(self, model_path, model_width, model_height): self.device_id = 0 self.context = None self.stream = None self._model = None self.run_mode = None self._model_path = model_path self._model_width = model_width self._model_height = model_height self._dvpp = None def __del__(self): if self._model: del self._model if self._dvpp: del self._dvpp if self.stream: acl.rt.destroy_stream(self.stream) if self.context: acl.rt.destroy_context(self.context) acl.rt.reset_device(self.device_id) acl.finalize() print("[Sample] Sample release source success") def _init_resource(self): print("[Sample] init resource stage:") ret = acl.init() check_ret("acl.rt.set_device", ret) ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) self.run_mode, ret = acl.rt.get_run_mode() check_ret("acl.rt.get_run_mode", ret) print("Init resource stage success") def init(self): self._init_resource() self._dvpp = Dvpp(self.stream, self.run_mode) ret = self._dvpp.init_resource() if ret != SUCCESS: print("Init dvpp failed") return FAILED self._model = Model(self.run_mode, self._model_path) ret = self._model.init_resource() if ret != SUCCESS: print("Init model failed") return FAILED return SUCCESS def pre_process(self, image): yuv_image = self._dvpp.jpegd(image) print("decode jpeg end") resized_image = self._dvpp.resize(yuv_image, self._model_width, self._model_height) print("resize yuv end") return resized_image def inference(self, resized_image): return self._model.execute(resized_image.data(), resized_image.size) def post_process(self, infer_output, image_file): print("post process") data = infer_output[0] vals = data.flatten() top_k = vals.argsort()[-1:-6:-1] object_class = get_image_net_class(top_k[0]) output_path = os.path.join(os.path.join(SRC_PATH, "../outputs/"), 'out_' + os.path.basename(image_file)) origin_img = Image.open(image_file) draw = ImageDraw.Draw(origin_img) font = ImageFont.load_default() font.size = 50 draw.text((10, 50), object_class, font=font, fill=255) origin_img.save(output_path) object_class = get_image_net_class(top_k[0]) return object_class
def __init__(self, acl_resource, model_path, model_width, model_height): self._model_path = model_path self._model_width = model_width self._model_height = model_height self._dvpp = Dvpp(acl_resource) self._model = Model(model_path)
class Classify(object): def __init__(self, model_path, model_width, model_height): self.device_id = 0 self.context = None self.stream = None self._model_path = model_path self._model_width = model_width self._model_height = model_height self._dvpp = None def __del__(self): if self._model: del self._model if self._dvpp: del self._dvpp if self.stream: acl.rt.destroy_stream(self.stream) if self.context: acl.rt.destroy_context(self.context) acl.rt.reset_device(self.device_id) acl.finalize() print("[Sample] class Samle release source success") def destroy(self): self.__del__ def _init_resource(self): print("[Sample] init resource stage:") #ret = acl.init() #check_ret("acl.rt.set_device", ret) ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) self.run_mode, ret = acl.rt.get_run_mode() check_ret("acl.rt.get_run_mode", ret) print("Init resource stage success") def init(self): self._init_resource() self._dvpp = Dvpp(self.stream, self.run_mode) ret = self._dvpp.init_resource() if ret != SUCCESS: print("Init dvpp failed") return FAILED self._model = Model(self.run_mode, self._model_path) ret = self._model.init_resource() if ret != SUCCESS: print("Init model failed") return FAILED return SUCCESS def pre_process(self, image): yuv_image = self._dvpp.jpegd(image) print("decode jpeg end") resized_image = self._dvpp.resize(yuv_image, self._model_width, self._model_height) print("resize yuv end") return resized_image def inference(self, resized_image): return self._model.execute(resized_image.data(), resized_image.size) def post_process(self, infer_output, image_file): print("post process") data = infer_output[0] vals = data.flatten() top_k = vals.argsort()[-1:-6:-1] print("images:{}".format(image_file)) print("======== top5 inference results: =============") for n in top_k: object_class = get_image_net_class(n) print("label:%d confidence: %f, class: %s" % (n, vals[n], object_class)) object_class = get_image_net_class(top_k[0]) return object_class
class Sample(object): """ 样例入口 """ def __init__(self, device_id, model_path, model_input_width, model_input_height): self.device_id = device_id # int self.model_path = model_path # string self.model_id = None # pointer self.context = None # pointer self.input_data = None self.output_data = None self.model_desc = None # pointer when using self.load_input_dataset = None self.load_output_dataset = None self.init_resource() self._model_input_width = model_input_width self._model_input_height = model_input_height self.model_process = Model(self.context, self.stream, self.model_path) self.dvpp_process = Dvpp(self.stream, model_input_width, model_input_height) self.sing_op = SingleOp(self.stream) def release_resource(self): if self.model_process: del self.model_process if self.dvpp_process: del self.dvpp_process if self.sing_op: del self.sing_op if self.stream: acl.rt.destroy_stream(self.stream) if self.context: acl.rt.destroy_context(self.context) acl.rt.reset_device(self.device_id) acl.finalize() print("[Sample] class Samle release source success") def init_resource(self): print("[Sample] init resource stage:") acl.init() ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) print("[Sample] init resource stage success") def _transfer_to_device(self, img_path, dtype=np.uint8): img = np.fromfile(img_path, dtype=dtype) if "bytes_to_ptr" in dir(acl.util): bytes_data = img.tobytes() img_ptr = acl.util.bytes_to_ptr(bytes_data) else: img_ptr = acl.util.numpy_to_ptr(img) img_buffer_size = img.itemsize * img.size img_device, ret = acl.media.dvpp_malloc(img_buffer_size) check_ret("acl.media.dvpp_malloc", ret) ret = acl.rt.memcpy(img_device, img_buffer_size, img_ptr, img_buffer_size, ACL_MEMCPY_HOST_TO_DEVICE) check_ret("acl.rt.memcpy", ret) return img_device, img_buffer_size def forward(self, img_dict): img_path, _ = img_dict["path"], img_dict["dtype"] # copy images to device with Image.open(img_path) as image_file: width, height = image_file.size print("[Sample] width:{} height:{}".format(width, height)) print("[Sample] image:{}".format(img_path)) img_device, img_buffer_size = \ self._transfer_to_device(img_path, img_dict["dtype"]) # decode and resize dvpp_output_buffer, dvpp_output_size = \ self.dvpp_process.run(img_device, img_buffer_size, width, height) self.model_process.run( dvpp_output_buffer, dvpp_output_size) self.sing_op.run(self.model_process.get_result()) if img_device: acl.media.dvpp_free(img_device)
class Cartoonization(object): def __init__(self, model_path, model_width, model_height): self.device_id = 0 self.context = None self.stream = None self._model_path = model_path self._model_width = model_width self._model_height = model_height self._dvpp = None def __del__(self): if self._model: del self._model if self._dvpp: del self._dvpp if self.stream: acl.rt.destroy_stream(self.stream) if self.context: acl.rt.destroy_context(self.context) acl.rt.reset_device(self.device_id) acl.finalize() print("[Sample] class Samle release source success") def _init_resource(self): print("[Sample] init resource stage:") ret = acl.init() check_ret("acl.rt.set_device", ret) ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) self.run_mode, ret = acl.rt.get_run_mode() check_ret("acl.rt.get_run_mode", ret) print("[Sample] Init resource stage success") def init(self): # init acl resource self._init_resource() self._dvpp = Dvpp(self.stream, self.run_mode) # init dvpp ret = self._dvpp.init_resource() if ret != SUCCESS: print("Init dvpp failed") return FAILED # load model self._model = Model(self.run_mode, self._model_path) ret = self._model.init_resource() if ret != SUCCESS: print("Init model failed") return FAILED return SUCCESS def pre_process(self, image): yuv_image = self._dvpp.jpegd(image) crop_and_paste_image = \ self._dvpp.crop_and_paste(yuv_image, image.width, image.height, self._model_width, self._model_height) print("[Sample] crop_and_paste yuv end") return crop_and_paste_image def inference(self, resized_image): return self._model.execute(resized_image.data(), resized_image.size) def post_process(self, infer_output, image_file, origin_image): print("[Sample] post process") data = ((np.squeeze(infer_output[0]) + 1) * 127.5) img = cv2.cvtColor(data, cv2.COLOR_RGB2BGR) img = cv2.resize(img, (origin_image.width, origin_image.height)) output_path = os.path.join("../outputs", os.path.basename(image_file)) cv2.imwrite(output_path, img)
class Sample(object): def __init__(self, device_id, model_path, vdec_out_path, model_input_width, model_input_height): self.device_id = device_id # int self.model_path = model_path # string self.context = None # pointer self.stream = None self.model_input_width = model_input_width self.model_input_height = model_input_height, self.init_resource() self.model_process = Model(self.context, self.stream, model_path) self.vdec_process = Vdec(self.context, self.stream, vdec_out_path) self.dvpp_process = Dvpp(self.stream, model_input_width, model_input_height) self.model_input_width = model_input_width self.model_input_height = model_input_height self.vdec_out_path = vdec_out_path def init_resource(self): print("init resource stage:") acl.init() ret = acl.rt.set_device(self.device_id) check_ret("acl.rt.set_device", ret) self.context, ret = acl.rt.create_context(self.device_id) check_ret("acl.rt.create_context", ret) self.stream, ret = acl.rt.create_stream() check_ret("acl.rt.create_stream", ret) print("init resource stage success") def release_resource(self): print('[Sample] release source stage:') if self.dvpp_process: del self.dvpp_process if self.model_process: del self.model_process if self.vdec_process: del self.vdec_process if self.stream: ret = acl.rt.destroy_stream(self.stream) check_ret("acl.rt.destroy_stream", ret) if self.context: ret = acl.rt.destroy_context(self.context) check_ret("acl.rt.destroy_context", ret) ret = acl.rt.reset_device(self.device_id) check_ret("acl.rt.reset_device", ret) ret = acl.finalize() check_ret("acl.finalize", ret) print('[Sample] release source stage success') def _transfer_to_device(self, img): img_device = img["buffer"] img_buffer_size = img["size"] ''' if the buffer is not in device, need to copy to device, but here, the data is from vdec, no need to copy. ''' return img_device, img_buffer_size def forward(self, temp): _, input_width, input_height, _ = temp # vdec process,note:the input is h264 file,vdec output datasize need to be computed by strided width and height by 16*2 self.vdec_process.run(temp) images_buffer = self.vdec_process.get_image_buffer() if images_buffer: for img_buffer in images_buffer: img_device, img_buffer_size = \ self._transfer_to_device(img_buffer) print("vdec output, img_buffer_size = ", img_buffer_size) # vpc process, parameters:vdec output buffer and size, original picture width and height. dvpp_output_buffer, dvpp_output_size = \ self.dvpp_process.run(img_device, img_buffer_size, input_width, input_height) ret = acl.media.dvpp_free(img_device) check_ret("acl.media.dvpp_free", ret) self.model_process.run(dvpp_output_buffer, dvpp_output_size)
class Yolov3(object): def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource) #创建yolov3网络的图像信息输入数据 self._image_info = np.array([model_width, model_height, model_width, model_height], dtype=np.float32) def __del__(self): if self._dvpp: del self._dvpp print("Release yolov3 resource finished") def pre_process(self, image): #使用dvpp将图像缩放到模型要求大小 resized_image = self._dvpp.resize(image, self._model_width, self._model_height) #输出缩放后的图像和图像信息作为推理输入数据 return [resized_image, self._image_info] def post_process(self, infer_output, origin_img): #解析推理输出数据 detection_result_list = self._analyze_inference_output(infer_output, origin_img) #将yuv图像转换为jpeg图像 jpeg_image = self._dvpp.jpege(origin_img) return jpeg_image, detection_result_list def _analyze_inference_output(self, infer_output, origin_img): #yolov3网络有两个输出,第二个(下标1)输出为框的个数 box_num = int(infer_output[1][0, 0]) #第一个(下标0)输出为框信息 box_info = infer_output[0] #输出的框信息是在mode_width*model_height大小的图片上的坐标 #需要转换到原始图片上的坐标 scalex = origin_img.width / self._model_width scaley = origin_img.height / self._model_height detection_result_list = [] for i in range(box_num): #检测到的物体类别编号 id = int(box_info[0, LABEL * box_num + i]) if id >= len(labels): print("class id %d out of range" % (id)) continue detection_item = presenter_datatype.ObjectDetectionResult() detection_item.object_class = id #检测到的物体置信度 detection_item.confidence = box_info[0, SCORE * box_num + i] #物体位置框坐标 detection_item.box.lt.x = int(box_info[0, TOP_LEFT_X * box_num + i] * scalex) detection_item.box.lt.y = int(box_info[0, TOP_LEFT_Y * box_num + i] * scaley) detection_item.box.rb.x = int(box_info[0, BOTTOM_RIGHT_X * box_num + i] * scalex) detection_item.box.rb.y = int(box_info[0, BOTTOM_RIGHT_Y * box_num + i] * scaley) #将置信度和类别名称组织为字符串 if labels == []: detection_item.result_text = str(detection_item.object_class) + " " + str( round(detection_item.confidence * 100, 2)) + "%" else: detection_item.result_text = str(labels[detection_item.object_class]) + " " + str( round(detection_item.confidence * 100, 2)) + "%" detection_result_list.append(detection_item) return detection_result_list
class Classify(object): def __init__(self, acl_resource, model_path, model_width, model_height): self.total_buffer = None self._model_path = model_path self._model_width = model_width self._model_height = model_height self._model = Model(model_path) self._dvpp = Dvpp(acl_resource) print("The App arg is __init__") def __del__(self): if self.total_buffer: acl.rt.free(self.total_buffer) if self._dvpp: del self._dvpp print("[Sample] class Samle release source success") def pre_process(self, image): yuv_image = self._dvpp.jpegd(image) print("decode jpeg end") resized_image = self._dvpp.resize(yuv_image, self._model_width, self._model_height) print("resize yuv end") return resized_image def batch_process(self, resized_image_list, batch): resized_img_data_list = [] resized_img_size = resized_image_list[0].size total_size = batch * resized_img_size stride = 0 for resized_image in resized_image_list: resized_img_data_list.append(resized_image.data()) self.total_buffer, ret = acl.rt.malloc(total_size, ACL_MEM_MALLOC_HUGE_FIRST) check_ret("acl.rt.malloc", ret) for i in range(len(resized_image_list)): ret = acl.rt.memcpy(self.total_buffer + stride, resized_img_size,\ resized_img_data_list[i], resized_img_size,\ ACL_MEMCPY_DEVICE_TO_DEVICE) check_ret("acl.rt.memcpy", ret) stride += resized_img_size return total_size def inference(self, resized_image_list, batch): total_size = self.batch_process(resized_image_list, batch) batch_buffer = {'data': self.total_buffer, 'size':total_size} return self._model.execute([batch_buffer, ]) def post_process(self, infer_output, batch_image_files, number_of_images): print("post process") datas = infer_output[0] for number in range(number_of_images): data = datas[number] vals = data.flatten() top_k = vals.argsort()[-1:-6:-1] print("images:{}".format(batch_image_files[number])) print("======== top5 inference results: =============") for n in top_k: object_class = get_image_net_class(n) print("label:%d confidence: %f, class: %s" % (n, vals[n], object_class)) #Use Pillow to write the categories with the highest confidence on the image and save them locally if len(top_k): object_class = get_image_net_class(top_k[0]) output_path = os.path.join("../outputs", os.path.basename(batch_image_files[number])) origin_img = Image.open(batch_image_files[number]) draw = ImageDraw.Draw(origin_img) font = ImageFont.truetype("SourceHanSansCN-Normal.ttf", size=30) draw.text((10, 50), object_class, font=font, fill=255) origin_img.save(output_path)
def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource)
class VggSsd(object): def __init__(self, acl_resource, model_width, model_height): self._acl_resource = acl_resource self._model_width = model_width self._model_height = model_height #使用dvpp处理图像,当使用opencv或者PIL时则不需要创建dvpp实例 self._dvpp = Dvpp(acl_resource) def __del__(self): print("Release yolov3 resource finished") def pre_process(self, image): #使用dvpp将图像缩放到模型要求大小 resized_image = self._dvpp.resize(image, self._model_width, self._model_height) if resized_image == None: print("Resize image failed") return None #输出缩放后的图像和图像信息作为推理输入数据 return [ resized_image, ] # img_h = image.size[1] # img_w = image.size[0] # net_h = MODEL_HEIGHT # net_w = MODEL_WIDTH # scale = min(float(net_w) / float(img_w), float(net_h) / float(img_h)) # new_w = int(img_w * scale) # new_h = int(img_h * scale) # shift_x = (net_w - new_w) // 2 # shift_y = (net_h - new_h) // 2 # shift_x_ratio = (net_w - new_w) / 2.0 / net_w # shift_y_ratio = (net_h - new_h) / 2.0 / net_h # image_ = image.resize( (new_w, new_h)) # new_image = np.zeros((net_h, net_w, 3), np.uint8) # new_image[shift_y: new_h + shift_y, shift_x: new_w + shift_x, :] = np.array(image_) # new_image = new_image.astype(np.float32) # new_image = new_image / 255 # return new_image def post_process(self, infer_output, origin_img): #解析推理输出数据 detection_result_list = self._analyze_inference_output( infer_output, origin_img) #将yuv图像转换为jpeg图像 jpeg_image = self._dvpp.jpege(origin_img) return jpeg_image, detection_result_list def overlap(self, x1, x2, x3, x4): left = max(x1, x3) right = min(x2, x4) return right - left def cal_iou(self, box, truth): w = self.overlap(box[0], box[2], truth[0], truth[2]) h = self.overlap(box[1], box[3], truth[1], truth[3]) if w <= 0 or h <= 0: return 0 inter_area = w * h union_area = (box[2] - box[0]) * (box[3] - box[1]) + ( truth[2] - truth[0]) * (truth[3] - truth[1]) - inter_area return inter_area * 1.0 / union_area def apply_nms(self, all_boxes, thres): res = [] for cls in range(class_num): cls_bboxes = all_boxes[cls] sorted_boxes = sorted(cls_bboxes, key=lambda d: d[5])[::-1] p = dict() for i in range(len(sorted_boxes)): if i in p: continue truth = sorted_boxes[i] for j in range(i + 1, len(sorted_boxes)): if j in p: continue box = sorted_boxes[j] iou = self.cal_iou(box, truth) if iou >= thres: p[j] = 1 for i in range(len(sorted_boxes)): if i not in p: res.append(sorted_boxes[i]) return res def decode_bbox(self, conv_output, anchors, img_w, img_h, x_scale, y_scale, shift_x_ratio, shift_y_ratio): def _sigmoid(x): s = 1 / (1 + np.exp(-x)) return s h, w, _ = conv_output.shape pred = conv_output.reshape((h * w, 3, 5 + class_num)) pred[..., 4:] = _sigmoid(pred[..., 4:]) pred[..., 0] = (_sigmoid(pred[..., 0]) + np.tile(range(w), (3, h)).transpose( (1, 0))) / w pred[..., 1] = (_sigmoid(pred[..., 1]) + np.tile(np.repeat(range(h), w), (3, 1)).transpose( (1, 0))) / h pred[..., 2] = np.exp(pred[..., 2]) * anchors[:, 0:1].transpose( (1, 0)) / w pred[..., 3] = np.exp(pred[..., 3]) * anchors[:, 1:2].transpose( (1, 0)) / h bbox = np.zeros((h * w, 3, 4)) bbox[..., 0] = np.maximum( (pred[..., 0] - pred[..., 2] / 2.0 - shift_x_ratio) * x_scale * img_w, 0) # x_min bbox[..., 1] = np.maximum( (pred[..., 1] - pred[..., 3] / 2.0 - shift_y_ratio) * y_scale * img_h, 0) # y_min bbox[..., 2] = np.minimum( (pred[..., 0] + pred[..., 2] / 2.0 - shift_x_ratio) * x_scale * img_w, img_w) # x_max bbox[..., 3] = np.minimum( (pred[..., 1] + pred[..., 3] / 2.0 - shift_y_ratio) * y_scale * img_h, img_h) # y_max pred[..., :4] = bbox pred = pred.reshape((-1, 5 + class_num)) pred[:, 4] = pred[:, 4] * pred[:, 5:].max(1) pred = pred[pred[:, 4] >= conf_threshold] pred[:, 5] = np.argmax(pred[:, 5:], axis=-1) all_boxes = [[] for ix in range(class_num)] for ix in range(pred.shape[0]): box = [int(pred[ix, iy]) for iy in range(4)] box.append(int(pred[ix, 5])) box.append(pred[ix, 4]) all_boxes[box[4] - 1].append(box) return all_boxes def convert_labels(self, label_list): if isinstance(label_list, np.ndarray): label_list = label_list.tolist() label_names = [labels[int(index)] for index in label_list] return label_names def _analyze_inference_output(self, infer_output, origin_img): result_return = dict() #img_h = origin_img.size[1] #img_w = origin_img.size[0] img_h = origin_img.height img_w = origin_img.width scale = min( float(MODEL_WIDTH) / float(img_w), float(MODEL_HEIGHT) / float(img_h)) new_w = int(img_w * scale) new_h = int(img_h * scale) shift_x_ratio = (MODEL_WIDTH - new_w) / 2.0 / MODEL_WIDTH shift_y_ratio = (MODEL_HEIGHT - new_h) / 2.0 / MODEL_HEIGHT class_num = len(labels) num_channel = 3 * (class_num + 5) x_scale = MODEL_WIDTH / float(new_w) y_scale = MODEL_HEIGHT / float(new_h) all_boxes = [[] for ix in range(class_num)] for ix in range(3): pred = infer_output[2 - ix].reshape( (MODEL_HEIGHT // stride_list[ix], MODEL_WIDTH // stride_list[ix], num_channel)) anchors = anchor_list[ix] boxes = self.decode_bbox(pred, anchors, img_w, img_h, x_scale, y_scale, shift_x_ratio, shift_y_ratio) all_boxes = [all_boxes[iy] + boxes[iy] for iy in range(class_num)] res = self.apply_nms(all_boxes, iou_threshold) if not res: result_return['detection_classes'] = [] result_return['detection_boxes'] = [] result_return['detection_scores'] = [] # return result_return else: new_res = np.array(res) picked_boxes = new_res[:, 0:4] picked_boxes = picked_boxes[:, [1, 0, 3, 2]] picked_classes = self.convert_labels(new_res[:, 4]) picked_score = new_res[:, 5] result_return['detection_classes'] = picked_classes result_return['detection_boxes'] = picked_boxes.tolist() result_return['detection_scores'] = picked_score.tolist() # return result_return detection_result_list = [] for i in range(len(result_return['detection_classes'])): box = result_return['detection_boxes'][i] class_name = result_return['detection_classes'][i] confidence = result_return['detection_scores'][i] detection_item = presenter_datatype.ObjectDetectionResult() detection_item.confidence = confidence detection_item.box.lt.x = int(box[1]) detection_item.box.lt.y = int(box[0]) detection_item.box.rb.x = int(box[3]) detection_item.box.rb.y = int(box[2]) detection_item.result_text = str(class_name) detection_result_list.append(detection_item) return detection_result_list