def detect(self, image, threshold, anchors): image_data = self.trans(image).to(self.device) image_data = image_data.unsqueeze(dim=0) # yolov3 output_13, output_26, output_52 = self.net(image_data) output_13 = output_13.cpu().detach() output_26 = output_26.cpu().detach() output_52 = output_52.cpu().detach() indexs_13, outputs_13 = self.filter(output_13, threshold) boxes_13 = self.backToImage(indexs_13, outputs_13, anchors[13], 32) indexs_26, outputs_26 = self.filter(output_26, threshold) boxes_26 = self.backToImage(indexs_26, outputs_26, anchors[26], 16) indexs_52, outputs_52 = self.filter(output_52, threshold) boxes_52 = self.backToImage(indexs_52, outputs_52, anchors[52], 8) boxes_all = torch.cat((boxes_13, boxes_26, boxes_52), dim=0) # 做NMS删除重叠框 result_box = [] if boxes_all.shape[0] == 0: return boxes_all else: # 只根据前4个类别进行nms,只适用于训练"data/garbage_img"路径下的图片 for i in range(4): # for i in range(10): boxes_nms = boxes_all[boxes_all[:, 5] == i] if boxes_nms.size(0) > 0: result_box.extend(NMS(boxes_nms, 0.3, 2)) return torch.stack(result_box)
def o_onet(self, img, bboxes): h, w, c = img.shape bboxes = change_box(bboxes) bboxes[:, 0:4] = np.round(bboxes[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bboxes, w, h) crop_img = np.zeros((bboxes.shape[0], 48, 48, 3), dtype=np.float32) for i in range(bboxes.shape[0]): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = img[y[i]:ey[i], x[i]:ex[i], :] crop_img[i, :, :, :] = (cv2.resize(tmp, (48, 48)) - 127.5) / 128 scores, box, landmark = self.load_onet(crop_img) scores = scores[:, 1] keep_index = np.where(scores > 0.7)[0] bboxes = bboxes[keep_index] bboxes[:, 4] = scores[keep_index] box = box[keep_index] landmark = landmark[keep_index] w = bboxes[:, 2] - bboxes[:, 0] h = bboxes[:, 3] - bboxes[:, 1] landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(bboxes[:, 0], (5, 1))).T landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(bboxes[:, 1], (5, 1))).T res_boxes = calibrate_box(bboxes, box) nms = NMS(res_boxes, 0.6) res_boxes = res_boxes[nms] landmark = landmark[nms] return res_boxes, landmark
def detect_face(self,img): pnet_boxes = self.pnet_detect(img,self.thresh[0]) if pnet_boxes is None: return None,None pnet_num_boxes = pnet_boxes.shape[0] iters = int(np.ceil(pnet_num_boxes / self.batch_size)) total_rnet_boxes = [] for i in range(iters): start = i * self.batch_size end = min(start + self.batch_size, pnet_num_boxes) tmp = self.rnet_detect(img, pnet_boxes[start:end], self.thresh[1]) if len(total_rnet_boxes) == 0 and len(tmp) > 0: total_rnet_boxes = tmp elif len(total_rnet_boxes) > 0 and len(tmp) > 0: total_rnet_boxes = np.concatenate((total_rnet_boxes, tmp),axis=0) if len(total_rnet_boxes) == 0: return None,None rnet_sorted_total_boxes = total_rnet_boxes[np.lexsort(-total_rnet_boxes.T)] # 按最后一列排序 rnet_nms_boxes,_ = NMS(rnet_sorted_total_boxes, 0.4, 'm') rnet_reg_boxes = BoxRegression(rnet_nms_boxes) rnet_pad_boxes = BBoxPadSquare(rnet_reg_boxes, img.shape[1], img.shape[0]) rnet_num_boxes = rnet_pad_boxes.shape[0] iters = int(np.ceil(rnet_num_boxes / self.batch_size)) total_onet_boxes = [] total_landmarks = [] for i in range(iters): start = i * self.batch_size end = min(start + self.batch_size, rnet_num_boxes) tmp_box,tmp_landmarks = self.onet_detect(img, rnet_pad_boxes[start:end], self.thresh[2]) if len(total_onet_boxes) == 0 and len(tmp_box) > 0: total_onet_boxes = tmp_box total_landmarks = tmp_landmarks elif len(total_onet_boxes) > 0 and len(tmp_box) > 0: total_onet_boxes = np.concatenate((total_onet_boxes, tmp_box), axis=0) total_landmarks = np.concatenate((total_landmarks, tmp_landmarks), axis=0) if len(total_onet_boxes) == 0: return None,None onet_reg_boxes = BoxRegression(total_onet_boxes) onet_nms_boxes,valid_index = NMS(onet_reg_boxes,0.4, 'm') total_landmarks = total_landmarks[valid_index < 1] onet_pad_boxes = BBoxPad(onet_nms_boxes, img.shape[1], img.shape[0]) return onet_pad_boxes,total_landmarks
def pnet_detect(self,img,thresh): scales = self.generate_scales(img) h,w = img.shape[:2] pnet_boxes = [] for idx in range(len(scales)): ws = int(np.ceil(w * scales[idx])) hs = int(np.ceil(h * scales[idx])) resized_img = cv2.resize(img,(ws,hs),0,0,cv2.INTER_LINEAR) blob = cv2.dnn.blobFromImage(resized_img, 1.0 / 255.0, None, (0, 0, 0), False) self.pnet.setInput(blob) detections = self.pnet.forward(["conv4-2", "prob1"]) reg = np.squeeze(detections[0]) score = np.squeeze(detections[1][:, 1, :, :]) score_h, score_w = score.shape total_boxes = [] for i in range(score_h): for j in range(score_w): if score[i, j] < 1 - 0.6999: tmp = [] xmin = j * 2 / scales[idx] ymin = i * 2 / scales[idx] xmax = (j * 2 + 12 - 1) / scales[idx] ymax = (i * 2 + 12 - 1) / scales[idx] tmp.extend([xmin, ymin, xmax, ymax]) tmp.extend(reg[:, i, j]) tmp.append(score[i, j]) total_boxes.append(tmp) if len(pnet_boxes) == 0 and len(total_boxes) > 0: pnet_boxes = np.array(total_boxes) elif len(pnet_boxes) > 0 and len(total_boxes) > 0: pnet_boxes = np.concatenate((pnet_boxes,np.array(total_boxes)),axis=0) if len(pnet_boxes) == 0: return None pnet_sorted_total_boxes = pnet_boxes[np.lexsort(-pnet_boxes.T)] # 按最后一列排序 pnet_nms_boxes,_ = NMS(pnet_sorted_total_boxes) pnet_reg_boxes = BoxRegression(pnet_nms_boxes) pnet_pad_boxes = BBoxPadSquare(pnet_reg_boxes, w, h) return pnet_pad_boxes
def p_pent(self, img): scale = float(self.pnet_size / self.min_face) _img = process_img(img, scale) h, w, _ = _img.shape all_boxes = [] while min(h, w) > self.pnet_size: # print(_img.shape) p_cls, p_box = self.load_pnet(np.expand_dims(_img, axis=0)) boxes = generate_box(p_cls[:, :, 1], p_box, scale, 0.6) scale *= self.factor _img = process_img(img, scale) h, w, _ = _img.shape nms = NMS(boxes[:, :5], 0.5) boxes = boxes[nms] all_boxes.append(boxes) all_boxes = np.vstack(all_boxes) # box = all_boxes[:,:5] box_w = all_boxes[:, 2] - all_boxes[:, 0] box_h = all_boxes[:, 3] - all_boxes[:, 1] res_boxes = np.vstack([ all_boxes[:, 0] + all_boxes[:, 5] * box_w, all_boxes[:, 1] + all_boxes[:, 6] * box_h, all_boxes[:, 2] + all_boxes[:, 7] * box_w, all_boxes[:, 3] + all_boxes[:, 8] * box_h, all_boxes[:, 4] ]) #[5,NUM] ---> [NUM,5] print(res_boxes.shape) res_boxes = res_boxes.T print(res_boxes.shape) return res_boxes
def p_rent(self, img, bboxes): h, w, _ = img.shape bboxes = change_box(bboxes) bboxes[:, 0:4] = np.round(bboxes[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(bboxes, w, h) # print(edy - dy == ey - y) match_size = np.ones_like(tmpw) * 24 zeros = np.zeros_like(tmpw) ones = np.ones_like(tmpw) num = np.sum( np.where((np.minimum(tmpw, tmph) >= match_size), ones, zeros)) _img = np.zeros((num, 24, 24, 3), dtype=np.float32) for i in range(num): tmp = np.zeros((tmph[i], tmph[i], 3), dtype=np.float32) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = img[y[i]:ey[i], x[i]:ex[i], :] _img[i, :, :, :] = (cv2.resize(tmp, (24, 24)) - 127.5) / 127.5 cls_score, box = self.load_rnet(_img) cls_score = cls_score[:, 1] keep_index = np.where(cls_score > 0.7)[0] boxes = bboxes[keep_index] boxes[:, 4] = cls_score[keep_index] box = box[keep_index] nms = NMS(boxes, 0.7) boxes = boxes[nms] box = box[nms] result = calibrate_box(boxes, box) return result
def inference(self, image): """ get images list of arbitrary length, separate into small enough batches and doing batch inference """ skip_scale_branch_list = [] if image.ndim != 3 or image.shape[2] != 3: print('Only RGB images are supported.') return None input_height = self.input_shape[2] input_width = self.input_shape[3] input_batch = np.zeros((1, input_height, input_width, self.input_shape[1]), dtype=np.float32) left_pad = 0 top_pad = 0 if image.shape[0] / image.shape[1] > input_height / input_width: resize_scale = input_height / image.shape[0] input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) left_pad = int((input_width - input_image.shape[1]) / 2) input_batch[0, :, left_pad:left_pad + input_image.shape[1], :] = input_image else: resize_scale = input_width / image.shape[1] input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) top_pad = int((input_height - input_image.shape[0]) / 2) input_batch[0, top_pad:top_pad + input_image.shape[0], :, :] = input_image input_batch = input_batch.transpose([0, 3, 1, 2]) input_batch = np.array(input_batch, dtype=np.float32, order='C') self.inputs[0].host = input_batch outputs = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=self.engine.max_batch_size) outputs = [np.squeeze(output.reshape(shape)) for output, shape in zip(outputs, self.output_shapes)] bbox_collection = [] for i in range(self.num_output_scales): if i in skip_scale_branch_list: continue score_map = np.squeeze(outputs[i * 2]) bbox_map = np.squeeze(outputs[i * 2 + 1]) RF_center_Xs = np.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1])]) RF_center_Xs_mat = np.tile(RF_center_Xs, [score_map.shape[0], 1]) RF_center_Ys = np.array([self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0])]) RF_center_Ys_mat = np.tile(RF_center_Ys, [score_map.shape[1], 1]).T x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i] y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i] x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i] y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i] x_lt_mat = x_lt_mat x_lt_mat[x_lt_mat < 0] = 0 y_lt_mat = y_lt_mat y_lt_mat[y_lt_mat < 0] = 0 x_rb_mat = x_rb_mat x_rb_mat[x_rb_mat > input_width] = input_width y_rb_mat = y_rb_mat y_rb_mat[y_rb_mat > input_height] = input_height select_index = np.where(score_map > self.score_threshold) for idx in range(select_index[0].size): bbox_collection.append(( x_lt_mat[select_index[0][idx], select_index[1][idx]] - left_pad, y_lt_mat[select_index[0][idx], select_index[1][idx]] - top_pad, x_rb_mat[select_index[0][idx], select_index[1][idx]] - left_pad, y_rb_mat[select_index[0][idx], select_index[1][idx]] - top_pad, score_map[select_index[0][idx], select_index[1][idx]] )) # NMS bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True) if len(bbox_collection) > self.top_k: bbox_collection = bbox_collection[0:self.top_k] bbox_collection_np = np.array(bbox_collection, dtype=np.float32) bbox_collection_np = bbox_collection_np / resize_scale s = time.time() final_bboxes = NMS(bbox_collection_np, self.NMS_threshold) print("NMS time: ", time.time() - s) # final_bboxes_ = [] # for i in range(final_bboxes.shape[0]): # final_bboxes_.append((final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4])) final_bboxes_ = [ [ final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4] ] for i in range(final_bboxes.shape[0]) ] return final_bboxes_
def detect( cas_dir, subset, out_file_name, global_score_thrh, metric_type, thrh_type, thrh_value, interpolate_type, proc_type, proc_value, sample_offset, weight_inner, weight_outter, weight_global, att_filtering_value=None, ): assert (metric_type in ['score', 'multiply', 'att-filtering']) assert (thrh_type in ['mean', 'max']) assert (interpolate_type in ['quadratic', 'linear', 'nearest']) assert (proc_type in ['dilation', 'median']) out_detections = [] dataset_dict = dataset_dicts[subset] for video_name in dataset_dict.keys(): rgb_weight = 2 flow_weight = 1 avg_score, att_weight, branch_scores, global_score = get_late_fusion_cas( cas_dir, video_name, rgb_weight, flow_weight) duration = dataset_dict[video_name]['duration'] fps = dataset_dict[video_name]['frame_rate'] frame_cnt = dataset_dict[video_name]['frame_cnt'] global_score = softmax(global_score, dim=0) ################ Threshoding ################ for class_id in range(action_class_num): if global_score[class_id] <= global_score_thrh: continue if metric_type == 'score': # metric = softmax(avg_score, dim=1)[:, class_id:class_id + 1] metric = avg_score[:, class_id:class_id + 1] # metric = smooth(metric) metric = normalize(metric) elif metric_type == 'multiply': _score = softmax(avg_score, dim=1)[:, class_id:class_id + 1] metric = att_weight * _score # metric = smooth(metric) metric = normalize(metric) elif metric_type == 'att-filtering': assert (att_filtering_value is not None) metric = softmax(avg_score, dim=1)[:, class_id:class_id + 1] # metric = smooth(metric) metric = normalize(metric) metric[att_weight < att_filtering_value] = 0 metric = normalize(metric) ######################################### # print(metric.shape) metric = interpolate(metric[:, 0], feature_type, frame_cnt, sample_rate, snippet_size=base_snippet_size, kind=interpolate_type) # add smooth metric = smooth(metric) metric = np.expand_dims(metric, axis=1) thres_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] temp_out = [] for thrh_value in thres_list: mask = detect_with_thresholding(metric, thrh_type, thrh_value, proc_type, proc_value) temp_out.extend( mask_to_detections(mask, metric, weight_inner, weight_outter)) # NMS nms_threshold = 0.65 temp_out = NMS(temp_out, nms_threshold) ######################################### for entry in temp_out: entry[2] = class_id entry[3] += global_score[class_id] * weight_global entry[0] = (entry[0] + sample_offset) / fps entry[1] = (entry[1] + sample_offset) / fps entry[0] = max(0, entry[0]) entry[1] = max(0, entry[1]) entry[0] = min(duration, entry[0]) entry[1] = min(duration, entry[1]) ######################################### for entry_id in range(len(temp_out)): temp_out[entry_id] = [video_name] + temp_out[entry_id] out_detections += temp_out # add soft flag soft_flag = True if dataset_name == 'thumos14': output_detections_thumos14(out_detections, out_file_name) elif dataset_name in ['ActivityNet12', 'ActivityNet13']: if soft_flag: soft_output_detections_anet(out_detections, out_file_name, dataset_name, feature_type) else: output_detections_anet(out_detections, out_file_name, dataset_name, feature_type) return out_detections
def detect_image(self, image): image = Image.fromarray(image) self.sess.run(tf.global_variables_initializer()) ratio = np.array([image.size[0] / _SIZE, image.size[1] / _SIZE]) boxed_image, image_shape = resize_image(image, self.input_size) inputs = np.array(boxed_image, dtype='float32') / 255. inputs = np.expand_dims(inputs, 0) boxes, scores = self.sess.run([self.boxes, self.scores], feed_dict={ self.inputs: inputs, self.ratio: ratio }) mask = scores >= _SCORE_THRESHOLD boxes_ = [] scores_ = [] classes_ = [] for Class in range(len(self.class_names)): cls_boxes = boxes[np.array(mask[:, Class]), :] cls_scores = scores[np.array(mask[:, Class]), Class] while cls_boxes.shape[0] != 0: cls_boxes, cls_scores, max_box, max_score = NMS( cls_boxes, cls_scores, _IOU_THRESHOLD) boxes_.append(max_box) scores_.append(max_score) classes_.append(np.ones_like(max_score, dtype=int) * Class) out_boxes = np.reshape(boxes_, [-1, 4]) out_scores = np.reshape(scores_, [-1]) out_classes = np.reshape(classes_, [-1]) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) # Visualisation################################################################################################# colors = [] cls = '' color = tuple(np.random.randint(0, 256, 3)) for i in out_classes: if cls != i: color = tuple(np.random.randint(0, 256, 3)) cls = i colors.append(color) else: colors.append(color) font = ImageFont.truetype(font='./font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype(np.int32)) thickness = (image.size[0] + image.size[1]) // 500 # do day cua BB for i, c in list(enumerate(out_classes)): predicted_class = self.class_names[c] box = out_boxes[i] score = out_scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype(np.int32)) left = max(0, np.floor(left + 0.5).astype(np.int32)) bottom = min(image.size[1], np.floor(bottom + 0.5).astype(np.int32)) right = min(image.size[0], np.floor(right + 0.5).astype(np.int32)) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for j in range(thickness): draw.rectangle([left + j, top + j, right - j, bottom - j], outline=colors[i]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=colors[i]) draw.text(text_origin, label, fill=(0, 0, 0), font=font) del draw cv2.imwrite(FLAGS.output_img, np.array(image))
def predict(self, image, resize_scale=1, score_threshold=0.8, top_k=100, NMS_threshold=0.3, NMS_flag=True, skip_scale_branch_list=[]): if image.ndim != 3 or image.shape[2] != 3: print('Only RGB images are supported.') return None bbox_collection = [] shorter_side = min(image.shape[:2]) if shorter_side * resize_scale < 128: resize_scale = float(128) / shorter_side input_image = cv2.resize(image, (0, 0), fx=resize_scale, fy=resize_scale) input_image = input_image.astype(dtype=np.float32) input_image = input_image[:, :, :, np.newaxis] input_image = input_image.transpose([3, 2, 0, 1]) data_batch = DataBatch() data_batch.data = [mx.ndarray.array(input_image, self.ctx)] # tic = time.time() self.module.forward(data_batch=data_batch, is_train=False) results = self.module.get_outputs() outputs = [] for output in results: outputs.append(output.asnumpy()) # toc = time.time() # infer_time = (toc - tic) * 1000 for i in range(self.num_output_scales): if i in skip_scale_branch_list: continue score_map = np.squeeze(outputs[i * 2], (0, 1)) # score_map_show = score_map * 255 # score_map_show[score_map_show < 0] = 0 # score_map_show[score_map_show > 255] = 255 # cv2.imshow('score_map' + str(i), cv2.resize(score_map_show.astype(dtype=np.uint8), (0, 0), fx=2, fy=2)) # cv2.waitKey() bbox_map = np.squeeze(outputs[i * 2 + 1], 0) RF_center_Xs = np.array([ self.receptive_field_center_start[i] + self.receptive_field_stride[i] * x for x in range(score_map.shape[1]) ]) RF_center_Xs_mat = np.tile(RF_center_Xs, [score_map.shape[0], 1]) RF_center_Ys = np.array([ self.receptive_field_center_start[i] + self.receptive_field_stride[i] * y for y in range(score_map.shape[0]) ]) RF_center_Ys_mat = np.tile(RF_center_Ys, [score_map.shape[1], 1]).T x_lt_mat = RF_center_Xs_mat - bbox_map[0, :, :] * self.constant[i] y_lt_mat = RF_center_Ys_mat - bbox_map[1, :, :] * self.constant[i] x_rb_mat = RF_center_Xs_mat - bbox_map[2, :, :] * self.constant[i] y_rb_mat = RF_center_Ys_mat - bbox_map[3, :, :] * self.constant[i] x_lt_mat = x_lt_mat / resize_scale x_lt_mat[x_lt_mat < 0] = 0 y_lt_mat = y_lt_mat / resize_scale y_lt_mat[y_lt_mat < 0] = 0 x_rb_mat = x_rb_mat / resize_scale x_rb_mat[x_rb_mat > image.shape[1]] = image.shape[1] y_rb_mat = y_rb_mat / resize_scale y_rb_mat[y_rb_mat > image.shape[0]] = image.shape[0] select_index = np.where(score_map > score_threshold) for idx in range(select_index[0].size): bbox_collection.append( (x_lt_mat[select_index[0][idx], select_index[1][idx]], y_lt_mat[select_index[0][idx], select_index[1][idx]], x_rb_mat[select_index[0][idx], select_index[1][idx]], y_rb_mat[select_index[0][idx], select_index[1][idx]], score_map[select_index[0][idx], select_index[1][idx]])) # NMS bbox_collection = sorted(bbox_collection, key=lambda item: item[-1], reverse=True) if len(bbox_collection) > top_k: bbox_collection = bbox_collection[0:top_k] bbox_collection_numpy = np.array(bbox_collection, dtype=np.float32) final_bboxes = NMS(bbox_collection_numpy, NMS_threshold) final_bboxes_ = [] for i in range(final_bboxes.shape[0]): # bbox: (x1, y1, x2, y2, score, -1) final_bboxes_.append([ final_bboxes[i, 0], final_bboxes[i, 1], final_bboxes[i, 2], final_bboxes[i, 3], final_bboxes[i, 4], -1 ]) return final_bboxes_ # , infer_time