def detect_image(self, image_id, image): self.confidence = 0.01 self.iou = 0.45 f = open("./input/detection-results/" + image_id + ".txt", "w") with torch.no_grad(): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) if len(outputs) == 0: return bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = str(conf[i]) left, top, right, bottom = bbox[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image_id, image): self.confidence = 0.01 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict( [base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) left, top, right, bottom = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input(np.expand_dims(photo, 0)) preds = self.model_rpn.predict(photo) # 将预测结果进行解码 anchors = get_anchors(self.get_img_output_length(width, height), width, height) rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :-1]) < self.confidence: continue label = np.argmax(P_cls[0, ii, :-1]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :-1]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :-1])) labels.append(label) if len(bboxes) == 0: return old_image # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image_id, image, savepath): image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) with torch.no_grad(): images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images).cuda() roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model( images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, feature, height=height, width=width, score_thresh=self.confidence) if len(outputs) == 0: return old_image if np.size(outputs, 0) > 4: outputs = outputs[np.argsort(outputs[:, 4])] outputs = outputs[-4:, :] bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] f = outputs[:, 6:] f_size = np.size(f, 0) if f_size < 4: k = 4 - f_size cc = 0 for lab in label: if (lab == 1) or (lab == 3) or (lab == 5): conbin_f = f[cc, :] conbin_f = conbin_f.reshape(1, 2048) for num in range(0, k): f = np.append(f, conbin_f, axis=0) break cc = cc + 1 bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) f = np.array(f, np.float32).reshape((4, 2048)) print(np.size(f)) # image = old_image # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 # font = ImageFont.truetype(font='model_data/simhei.ttf', # size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) # # for i, c in enumerate(label): # predicted_class = self.class_names[int(c)] # score = conf[i] # # left, top, right, bottom = bbox[i] # top = top - 5 # left = left - 5 # bottom = bottom + 5 # right = right + 5 # # top = max(0, np.floor(top + 0.5).astype('int32')) # left = max(0, np.floor(left + 0.5).astype('int32')) # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # # # 画框框 # label = '{} {:.2f}'.format(predicted_class, score) # draw = ImageDraw.Draw(image) # label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # # if top - label_size[1] >= 0: # text_origin = np.array([left, top - label_size[1]]) # else: # text_origin = np.array([left, top + 1]) # # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # outline=self.colors[int(c)]) # draw.rectangle( # [tuple(text_origin), tuple(text_origin + label_size)], # fill=self.colors[int(c)]) # draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # del draw # # image.save(savepath) if np.size(f, 0) != 4: print( "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ) return { 'image_id': image_id, 'image_h': height, 'image_w': width, 'num_boxes': np.size(bbox, 0), 'boxes': base64.b64encode(bbox), 'features': base64.b64encode(f) }
def detect_image(self, image): with torch.no_grad(): start_time = time.time() image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float32) / 255 photo = np.transpose(photo, (2, 0, 1)) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() #非凸性优化,边界框矫正 roi_cls_locs, roi_scores, rois, roi_indices = self.model(images) decodebox = DecodeBox(self.std, self.mean, self.num_classes) outputs = decodebox.forward(roi_cls_locs, roi_scores, rois, height=height, width=width, nms_iou=self.iou, score_thresh=self.confidence) if len(outputs) == 0: return old_image bbox = outputs[:, :4] conf = outputs[:, 4] label = outputs[:, 5] bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height bbox = np.array(bbox, np.int32) image = old_image thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2 font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) for i, c in enumerate(label): predicted_class = self.class_names[int(c)] score = conf[i] left, top, right, bottom = bbox[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) #图片输出 label = '{}'.format(predicted_class) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw print("time:", time.time() - start_time) return image
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # Reset the original image to the size of 600 short edges #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # Image preprocessing and normalization. #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # The prediction result of the suggestion box network is decoded #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # After obtaining the suggestion box and the shared feature layer, they are passed into the classifier for prediction #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict( [base_layer, temp_ROIs]) #-------------------------------------------------------------# # The prediction frame is obtained by decoding the suggestion box by using the prediction results of classifier #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def get_FPS(self, image, test_interval): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width,height], Image.BICUBIC) photo = np.array(image,dtype = np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo,0)) rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t1 = time.time() for _ in range(test_interval): rpn_pred = self.model_rpn.predict(photo) #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length(width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs]) #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence) if len(results[0])>0: results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def detect_image(self, image_id, image): self.confidence = 0.05 f = open("./input/detection-results/" + image_id + ".txt", "w") image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height]) photo = np.array(image, dtype=np.float64) # 图片预处理,归一化 photo = preprocess_input(np.expand_dims(photo, 0)) preds = self.model_rpn.predict(photo) # 将预测结果进行解码 anchors = get_anchors(self.get_img_output_length(width, height), width, height) rpn_results = self.bbox_util.detection_out(preds, anchors, 1, confidence_threshold=0) R = rpn_results[0][:, 2:] R[:, 0] = np.array(np.round(R[:, 0] * width / self.config.rpn_stride), dtype=np.int32) R[:, 1] = np.array(np.round(R[:, 1] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] = np.array(np.round(R[:, 2] * width / self.config.rpn_stride), dtype=np.int32) R[:, 3] = np.array(np.round(R[:, 3] * height / self.config.rpn_stride), dtype=np.int32) R[:, 2] -= R[:, 0] R[:, 3] -= R[:, 1] base_layer = preds[2] delete_line = [] for i, r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R, delete_line, axis=0) bboxes = [] probs = [] labels = [] for jk in range(R.shape[0] // self.config.num_rois + 1): ROIs = np.expand_dims(R[self.config.num_rois * jk:self.config.num_rois * (jk + 1), :], axis=0) if ROIs.shape[1] == 0: break if jk == R.shape[0] // self.config.num_rois: #pad R curr_shape = ROIs.shape target_shape = (curr_shape[0], self.config.num_rois, curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded [P_cls, P_regr] = self.model_classifier.predict([base_layer, ROIs]) for ii in range(P_cls.shape[1]): if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax( P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue label = np.argmax(P_cls[0, ii, :]) (x, y, w, h) = ROIs[0, ii, :] cls_num = np.argmax(P_cls[0, ii, :]) (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)] tx /= self.config.classifier_regr_std[0] ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] th /= self.config.classifier_regr_std[3] cx = x + w / 2. cy = y + h / 2. cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h x1 = cx1 - w1 / 2. y1 = cy1 - h1 / 2. x2 = cx1 + w1 / 2 y2 = cy1 + h1 / 2 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1, y1, x2, y2]) probs.append(np.max(P_cls[0, ii, :])) labels.append(label) if len(bboxes) == 0: return old_image # 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes, dtype=np.float32) boxes[:, 0] = boxes[:, 0] * self.config.rpn_stride / width boxes[:, 1] = boxes[:, 1] * self.config.rpn_stride / height boxes[:, 2] = boxes[:, 2] * self.config.rpn_stride / width boxes[:, 3] = boxes[:, 3] * self.config.rpn_stride / height results = np.array( self.bbox_util.nms_for_out(np.array(labels), np.array(probs), np.array(boxes), self.num_classes - 1, 0.4)) top_label_indices = results[:, 0] top_conf = results[:, 1] boxes = results[:, 2:] boxes[:, 0] = boxes[:, 0] * old_width boxes[:, 1] = boxes[:, 1] * old_height boxes[:, 2] = boxes[:, 2] * old_width boxes[:, 3] = boxes[:, 3] * old_height for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = str(top_conf[i]) left, top, right, bottom = boxes[i] f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str( int(top)), str(int(right)), str(int(bottom)))) f.close() return
def detect_image(self, image): #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") image_shape = np.array(np.shape(image)[0:2]) old_width, old_height = image_shape[1], image_shape[0] old_image = copy.deepcopy(image) #---------------------------------------------------------# # 给原图像进行resize,resize到短边为600的大小上 #---------------------------------------------------------# width, height = get_new_img_size(old_width, old_height) image = image.resize([width, height], Image.BICUBIC) photo = np.array(image, dtype=np.float64) #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# photo = preprocess_input(np.expand_dims(photo, 0)) rpn_pred = self.model_rpn_get_pred(photo) rpn_pred = [x.numpy() for x in rpn_pred] #-----------------------------------------------------------# # 将建议框网络的预测结果进行解码 #-----------------------------------------------------------# base_feature_width, base_feature_height = self.get_img_output_length( width, height) anchors = get_anchors([base_feature_width, base_feature_height], width, height) rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors) #-------------------------------------------------------------# # 在获得建议框和共享特征层后,将二者传入classifier中进行预测 #-------------------------------------------------------------# base_layer = rpn_pred[2] proposal_box = np.array(rpn_results)[:, :, 1:] temp_ROIs = np.zeros_like(proposal_box) temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]] classifier_pred = self.model_classifier_get_pred( [base_layer, temp_ROIs]) classifier_pred = [x.numpy() for x in classifier_pred] #-------------------------------------------------------------# # 利用classifier的预测结果对建议框进行解码,获得预测框 #-------------------------------------------------------------# results = self.bbox_util.detection_out_classifier( classifier_pred, proposal_box, self.config, self.confidence) if len(results[0]) == 0: return old_image results = np.array(results[0]) boxes = results[:, :4] top_conf = results[:, 4] top_label_indices = results[:, 5] boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = max( (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2, 1) image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min( np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min( np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label, top, left, bottom, right) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
def detect_image(self, image): # 1 处理图片 image_shape = np.array(np.shape(image)[0:2]) old_width = image_shape[1] old_height = image_shape[0] old_image = copy.deepcopy(image) # 1.1 图片resize,最短边600 width,height = get_new_img_size(old_width,old_height) image = image.resize([width,height]) photo = np.array(image,dtype = np.float64) # 2 图片预处理,归一化,导入rpn网络进行预测得到置信度和预测偏移值 photo = preprocess_input(np.expand_dims(photo,0)) preds = self.model_rpn.predict(photo) # 3 生成先验框 anchors = get_anchors(self.get_img_output_length(width,height),width,height) # 4 将预测结果进行解码--nms筛选得到建议框 ********************** rpn_results = self.bbox_util.detection_out(preds,anchors,1,confidence_threshold=0.8) # temp是解码后的建议框 # temp = copy.deepcopy(rpn_results[0][:, 2:]) R = rpn_results[0][:, 2:] # r.num=300 # 5 下面处理后的框是放在featureMap上,数值大小在0,38之间 左上角右下角 R[:,0] = np.array(np.round(R[:, 0]*width/self.config.rpn_stride),dtype=np.int32) R[:,1] = np.array(np.round(R[:, 1]*height/self.config.rpn_stride),dtype=np.int32) R[:,2] = np.array(np.round(R[:, 2]*width/self.config.rpn_stride),dtype=np.int32) R[:,3] = np.array(np.round(R[:, 3]*height/self.config.rpn_stride),dtype=np.int32) R[:, 2] -= R[:, 0] # 得到建议框的左上角xy R[:, 3] -= R[:, 1] # 得到建议框的宽高wh base_layer = preds[2] delete_line = [] for i,r in enumerate(R): if r[2] < 1 or r[3] < 1: delete_line.append(i) R = np.delete(R,delete_line,axis=0) bboxes = [] probs = [] labels = [] # 6 分批次把建议框传入classifier for jk in range(R.shape[0]//self.config.num_rois + 1): # 300//32+1 ROIs = np.expand_dims(R[self.config.num_rois*jk:self.config.num_rois*(jk+1), :], axis=0) # ROIs=32 if ROIs.shape[1] == 0: break # 对最后一次建议框填充到32个 if jk == R.shape[0]//self.config.num_rois: # pad R curr_shape = ROIs.shape target_shape = (curr_shape[0],self.config.num_rois,curr_shape[2]) ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype) ROIs_padded[:, :curr_shape[1], :] = ROIs ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :] ROIs = ROIs_padded # P_cls种类,置信度, P_regr [P_cls, P_regr] = self.model_classifier.predict([base_layer,ROIs]) # ROIs = 32 for ii in range(P_cls.shape[1]): # 6.1 判断置信度是否大于阈值 if np.max(P_cls[0, ii, :]) < self.confidence or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1): continue # 建议框类别 label = np.argmax(P_cls[0, ii, :]) # 建议框坐标 (x, y, w, h) = ROIs[0, ii, :] # cls_num = np.argmax(P_cls[0, ii, :]) (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)] # 建议框的调整参数 tx /= self.config.classifier_regr_std[0] # 8.0 ty /= self.config.classifier_regr_std[1] tw /= self.config.classifier_regr_std[2] # 4.0 th /= self.config.classifier_regr_std[3] # 6.2 解码 - 中心 cx = x + w/2. cy = y + h/2. # 调整后的中心 cx1 = tx * w + cx cy1 = ty * h + cy w1 = math.exp(tw) * w h1 = math.exp(th) * h # 左上角 x1 = cx1 - w1/2. y1 = cy1 - h1/2. x2 = cx1 + w1/2 y2 = cy1 + h1/2 # 右上角 x1 = int(round(x1)) y1 = int(round(y1)) x2 = int(round(x2)) y2 = int(round(y2)) bboxes.append([x1,y1,x2,y2]) probs.append(np.max(P_cls[0, ii, :])) labels.append(label) if len(bboxes)==0: return old_image # 6.3 筛选出其中得分高于confidence的框 labels = np.array(labels) probs = np.array(probs) boxes = np.array(bboxes,dtype=np.float32) # 把box转化为小数 boxes[:,0] = boxes[:,0]*self.config.rpn_stride/width boxes[:,1] = boxes[:,1]*self.config.rpn_stride/height boxes[:,2] = boxes[:,2]*self.config.rpn_stride/width boxes[:,3] = boxes[:,3]*self.config.rpn_stride/height # 6.4 NMS results = np.array(self.bbox_util.nms_for_out(np.array(labels),np.array(probs),np.array(boxes),self.num_classes-1,0.4)) top_label_indices = results[:,0] top_conf = results[:,1] boxes = results[:,2:] # 7 映射到原图 boxes[:,0] = boxes[:,0]*old_width boxes[:,1] = boxes[:,1]*old_height boxes[:,2] = boxes[:,2]*old_width boxes[:,3] = boxes[:,3]*old_height font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // width image = old_image for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)] score = top_conf[i] left, top, right, bottom = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image