def post_process(dets_out, img, h, w, top_k=1, score_threshold=0.6, undo_transform=True): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=False, crop_masks=False, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] final_res = (img_gpu * 255).byte().cpu().numpy() final_res = cv2.cvtColor(final_res, cv2.COLOR_RGB2RGBA) if num_dets_to_consider == 0: return final_res masks = masks[:num_dets_to_consider, :, :, None] _mask = (masks * 255).byte().cpu().numpy()[0] # Then assign the mask to the last channel of the image final_res[:, :, 3] = _mask.squeeze() return final_res
def plot_tfboard_figure(cfg, vis_imgs, vis_show=False, show_grad=False, max_vis=3): num_col = len(vis_imgs.keys())+1 if show_grad else len(vis_imgs.keys()) bs = vis_imgs['gts'].size(0) vis_idxs = np.random.choice(bs, min(max_vis, bs), replace=False) fig, ax = plt.subplots(max(2, len(vis_idxs)), num_col) # show each image in one line for k, ik in enumerate(vis_idxs): rgb_img = vis_imgs['rgb'][ik] tp = vis_imgs['preds'][ik,0].cpu().detach().numpy() tg = vis_imgs['gts'][ik,0].cpu().detach().numpy() tw = vis_imgs['wghts'][ik,0].cpu().detach().numpy() # for multiple gpus if torch.cuda.device_count()>1: tp, tg, tw = tp[0], tg[0], tw[0] rgb_img = rgb_img[0] ti = undo_image_transformation(cfg.backbone, rgb_img) ax[k, 0].imshow(ti) ax[k, 1].imshow(tg) plt.colorbar(ax[k, 2].imshow(tw), ax=ax[k, 2]) plt.colorbar(ax[k, 3].imshow(tp), ax=ax[k, 3]) if show_grad: tpg = vis_imgs['grad'][ik].cpu().detach().numpy() plt.colorbar(ax[k, 5].imshow(tpg), ax=ax[k, 5]) # close axis for i in range(num_col): ax[k,i].axis('off') ax[k,i].tick_params(axis='both', left=False, top=False, right=False, bottom=False, labelright=False, labelbottom=False) if vis_show: plt.show() return fig
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, image_header=Header()): with torch.no_grad(): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ dets = Detections() if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = False, crop_masks = True, score_threshold = 0.3) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:100] classes, scores, boxes = [x[:100].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(100, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < 0.3: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) det = Detection() det.box.x1 = x1 det.box.y1 = y1 det.box.x2 = x2 det.box.y2 = y2 det.class_name = _class det.score = score mask_shape = np.shape(masks[j]) #print("Num dets: ", num_dets_to_consider) #print("Shape: ", mask_shape) mask_bb = np.squeeze(masks[j].cpu().numpy(), axis=2)[y1:y2,x1:x2] #print("Box: ", x1,",",x2,",",y1,",",y2) #print("Mask in box shape: ", np.shape(mask_bb)) mask_rs = np.reshape(mask_bb, -1) #print("New shape: ", np.shape(mask_rs)) #print("Mask:\n",mask_bb) det.mask.height = y2 - y1 det.mask.width = x2 - x1 det.mask.mask = np.array(mask_rs, dtype=bool) dets.detections.append(det) dets.header.stamp = image_header.stamp dets.header.frame_id = image_header.frame_id self.detections_pub.publish(dets) return img_numpy
def prep_display(dets_out, img, gt, gt_masks, h, w, undo_transform=True, class_color=False): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. gt and gt_masks are also allowed to be none (until I reimplement that functionality). """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:args.top_k] # We'll need this later classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] if classes.shape[0] == 0: return (img_gpu * 255).byte().cpu().numpy() def get_color(j): color = COLORS[(classes[j] * 5 if class_color else j * 5) % len(COLORS)] if not undo_transform: color = (color[2], color[1], color[0]) return color # Draw masks first on the gpu if args.display_masks and cfg.eval_mask_branch: for j in reversed(range(min(args.top_k, classes.shape[0]))): if scores[j] >= args.score_threshold: color = get_color(j) mask = masks[j, :, :, None] mask_color = mask @ (torch.Tensor(color).view(1, 3) / 255.0) mask_alpha = 0.45 # Alpha only the region of the image that contains the mask img_gpu = img_gpu * (1 - mask) \ + img_gpu * mask * (1-mask_alpha) + mask_color * mask_alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: for j in reversed(range(min(args.top_k, classes.shape[0]))): score = scores[j] if scores[j] >= args.score_threshold: x1, y1, x2, y2 = boxes[j, :] color = get_color(j) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = COCO_CLASSES[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, h, w, args, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].detach().cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand img_numpy_mask = (masks_color_summand * 255).byte().cpu().numpy() cv2.imwrite('results/mask_car_image.jpg', img_numpy_mask) print("Mask for all visible car is generated") if args.display_best_masks_only == True and args.top_k == 1: masks = masks[:num_dets_to_consider, :, :, None] num_dets_to_consider = min(args.top_k, classes.shape[0]) print('maskshape', (masks.shape)) for i in range(num_dets_to_consider): msk = masks[i, :, :, None] mask = msk.view(1, masks.shape[1], masks.shape[2], 1) print('newmaskshape', (mask.shape)) img_gpu_masked = img_gpu * (mask.sum(dim=0) >= 1).float().expand( -1, -1, 3) img_numpy_masked = (img_gpu_masked * 255).byte().cpu().numpy() cv2.imwrite('results/mask_image' + str(i) + '.jpg', img_numpy_masked) print("Mask for the most visible car is generated") if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_best_bboxes_only == 'True': crop = img_numpy[y1:y2, x1:x2] cv2.imwrite('results/crop_object.png', crop) print("crop for the most visible car is generated") if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ # print(img.shape) # torch.Size([480, 360, 3]) if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): # 这里面取了最高分的k个,由传入参数设定 classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改 # print(classes) # 类别说明 class 0: person, class 2: car # print(scores) # print(boxes) # 定义变量area_b,框的面积 person_index = (classes == 0) # person_index表示了第几个框是否是person类别 if person_index.any(): # 存在person这个类别 boxes = boxes[person_index] scores = scores[person_index] # 对person的框面积进行计算 area = np.zeros(len(scores)) for i in range(person_index.sum()): box = boxes[i] area[i] = (box[2] - box[0]) * (box[3] - box[1]) # 对person的框面积进行筛选 # 假设最小的人的面积: 25*100 像素,并约束阈值 # valid_person_index = ((area >= 2500) and (scores < 0.01)) valid_person_index = (area >= 2500) boxes = boxes[valid_person_index] scores = scores[valid_person_index] if valid_person_index.any(): # 筛选面积和阈值之后还有person print('----- Person detected -----') else: # 筛选面积和阈值之后已经没有person了 print('----- No person -----') num_dets_to_consider = valid_person_index.sum() else: # 直接就没有person类 print('----- No person -----') num_dets_to_consider = 0 if num_dets_to_consider == 0: # 没检测到人,返回原图 return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() # img_numpy = (masks * 255).byte().cpu().numpy() # 检测到框并输出文字 for j in reversed(range(num_dets_to_consider)): # 这个循环中的boxes, scores, classes都要减少一个维度 x1, y1, x2, y2 = boxes[j][:] color = get_color(classes[j]) score = scores[j] # 绘制检测框 cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) # 显示检测结果的文本 _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score ) # if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display_for_video(dets_out, img, h=None, w=None, save_folder=None, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str='', override_args: Config = None): if undo_transform: assert w is not None and h is not None, "with undo_transform=True, w,h params must be specified!" img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape img_numpy_ori = (img_gpu * 255).byte().cpu().numpy() global args if override_args is not None: args = override_args with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: masks = t[3][idx] classes, scores, boxes = [x[idx] for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] if class_color else j) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color global frame_compare if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: if frame_compare != save_folder[4]: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand img_numpy = (img_gpu * 255).byte().cpu().numpy() if num_dets_to_consider == 0: if os.path.isdir( save_folder[0]) and save_folder[4] % args.video_fps == 0: file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png' cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy) cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori) return [img_numpy, img_numpy_ori] font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 if args.display_text or args.display_bboxes: if frame_compare != save_folder[4]: frame_compare = save_folder[4] for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] # text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class if args.display_scores: text_str_class = f"{_class}" text_str_score = f": {score:.2f}" text_w_class, text_h_class = \ cv2.getTextSize(text_str_class, font_face, font_scale, font_thickness)[0] img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) img_numpy = ps.putBText(img_numpy, text_str_score, text_offset_x=x1, text_offset_y=y1 + text_h_class + 2, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) else: text_str_class = '%s' % (_class) img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) if save_folder[4] % args.video_fps == 0: dist = ocr(img_numpy_ori) result = save_folder[ 4], f"{dist}", f"{_class}", f"{score:.2f}", f"{x1}", f"{y1}", f"{x2}", f"{y2}" result_list.append(result) if os.path.isdir( save_folder[0]) and save_folder[4] % args.video_fps == 0: file_name = save_folder[1] + "_%05d" % save_folder[4] + '.png' cv2.imwrite(os.path.join(save_folder[3], file_name), img_numpy) cv2.imwrite(os.path.join(save_folder[2], file_name), img_numpy_ori) return [img_numpy, img_numpy_ori, result_list] return [img_numpy, img_numpy_ori]
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [ x[:args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: str_ = "" for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) #pub = rospy.Publisher('chatter',String,queue_size=10) #rate = rospy.Rate(50) #10hz #str_ += text_str #rospy.loginfo(str_) #pub.publish(str_) #rate.sleep() return img_numpy
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, batch_idx=0, create_mask=False, return_imgs=False): if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape #print(h, " ", w) with timer.env('Postprocess'): t = postprocess(dets_out, w, h, batch_idx, visualize_lincomb=self.args.display_linecomb, crop_masks=self.args.crop, score_threshold=self.args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: masks = t[3][:self.args.top_k] classes, scores, boxes = [ x[:self.args.top_k].cpu().numpy() for x in t[:3] ] num_dets_to_consider = min(self.args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.args.score_threshold: num_dets_to_consider = j break idx_fil = [] for i in range(num_dets_to_consider): if cfg.dataset.class_names[ classes[i]] == 'car' or cfg.dataset.class_names[ classes[i]] == 'truck': idx_fil.append(i) num_dets_to_consider = len(idx_fil) if num_dets_to_consider == 0: # no detection found so just output original image if not create_mask: return (img_gpu * 255).byte().cpu().numpy() elif return_imgs: return (img_gpu * 255).byte().cpu().numpy(), ImageResult( None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0) else: return ImageResult(None, None, None, np.zeros((h, w, 1), dtype='uint8'), 0) # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color if self.args.display_masks and cfg.eval_mask_branch: # after this, mask is of size [num_dets, h, w, l] #masks = masks[:num_dets_to_consider, :, :, None] #classes = classes[:num_dets_to_consider] #scores = scores[:num_dets_to_consider] #boxes = boxes[:num_dets_to_consider, :] masks = masks[idx_fil, :, :, None] classes = classes[idx_fil] scores = scores[idx_fil] boxes = boxes[idx_fil, :] if create_mask: mask_img = np.zeros((h, w, 1), dtype='uint8') for j in range(num_dets_to_consider): mask_img += 10 * (j + 1) * masks[j].cpu().numpy().astype( np.uint8) if not return_imgs: return ImageResult(classes, scores, boxes, mask_img, num_dets_to_consider) # prepare the rgb image for each mask given their color (of size [num_dets, w, h, l]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # this is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand # then draw the stuff that needs to be done on cpu # note make sure this is a uint8 tensor or opencv will not anti aliaz text for wahtever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if self.args.display_text or self.args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if self.args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if self.args.display_scores else _class text_pt = (x1, y1 - 3) text_color = [255, 255, 255] font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy, ImageResult(classes, scores, boxes, mask_img, num_dets_to_consider)
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape # print("height:", h, "width:", w) with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason has_stacked = False if args.display_text or args.display_bboxes: bboxes = { 'cp': [], 'qp': [], 'op': [], 'tray': [] } for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] print(classes[j], x1, y1, x2, y2) if (classes[j] == 2): # cp stacked bboxes['cp'].append([x1, y1, x2, y2]) has_stacked = True elif (classes[j] == 5): #qp stacked bboxes['qp'].append([x1, y1, x2, y2]) has_stacked = True elif (classes[j] == 8): #op stacked bboxes['op'].append([x1, y1, x2, y2]) has_stacked = True elif (classes[j] == 9): #tray bboxes['tray'].append([x1, y1, x2, y2]) # print("Crop_tray:",crop_tray_img) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_color = [255, 255, 255] text_pt = (x1, y1 - 3) cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) # counting not visible patties # 1 - non overlapped, 2- overlapped, 3 - stacked # 1 - ch, 2 qp, 3 op n_patties = np.sum(classes!=9) cp_count = np.sum(classes==0) + np.sum(classes==1) qp_count = np.sum(classes==3) + np.sum(classes==4) op_count = np.sum(classes==6) + np.sum(classes==7) visible_cp = np.sum(classes==0) + np.sum(classes==1) + np.sum(classes==2) visible_qp = np.sum(classes==3) + np.sum(classes==4) + np.sum(classes==5) visible_op = np.sum(classes==6) + np.sum(classes==7) + np.sum(classes==8) # find the maximum of the three by adding count to a list and choose model accordingly labels = [visible_cp, visible_qp, visible_op] max_index = labels.index(max(labels)) stacked_boxes = [] if max_index == 0: model_path = "weights/regressor/cp_stack_regressor" stacked_boxes = bboxes['cp'] elif max_index == 1: model_path = "weights/regressor/qp_stack_regressor" stacked_boxes = bboxes['qp'] else: model_path = "weights/regressor/op_stack_regressor" stacked_boxes = bboxes['op'] # sort the bboxes accordingly stacked_boxes = sorted(stacked_boxes, key=lambda x: x[1]) print('found stacked', len(stacked_boxes)) #stacked_boxes = remove_overlapping(stacked_boxes) #### calculation of X if len(bboxes['tray']) > 0: prediction = 0 tray_height = bboxes['tray'][0][3] - bboxes['tray'][0][1] loaded_model = pickle.load(open(model_path, 'rb')) for box in stacked_boxes: stack_height = box[3] - box[1] gap = box[1] - bboxes['tray'][0][1] X = np.array((gap / tray_height, stack_height / tray_height)) this_prediction = round(loaded_model.predict(X.reshape(1, -1))[0]) print("this prediction: ", this_prediction) prediction = prediction + this_prediction # load the pickle model in memory, scale the input and feed it into the model if max_index == 0: cp_count = cp_count + prediction elif max_index == 1: qp_count = qp_count + prediction else: op_count = op_count + prediction # print("model_path:",model_path) count_text = "VISIBLE: {} ... CP: {}, QP: {}, OP: {}".format(n_patties, cp_count, qp_count, op_count) count_text_w, count_text_h = cv2.getTextSize(count_text, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) padding = 20 count_text_pt = (w - count_text_w - padding, h - count_text_h) cv2.putText(img_numpy, count_text, count_text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA ) # print(count_text) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ # print(img.shape) # torch.Size([480, 360, 3]) if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] # 这里面取了最高分的k个,由传入参数设定 classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] # 获取到了最高k个的类别、分数、框,因此可以在这里进行修改 # print(classes) # 类别说明 class 0: person, class 2: car # print(scores) # print(boxes) """ index_person = 0 person_found = True # 遍历类别数组,如果遇到person就跳出 while (classes[index_person]): # 这样当class是0的时候,检测的就是person,就记录下index index_person += 1 # 如果整个图片都没找到person if (index_person == args.top_k): person_found = False break if (not person_found): print('----- No person -----') num_dets_to_consider = 0 else: # 这里加入了一个修改,把除了person之外的其他检测结果屏蔽掉 classes_all, scores_all, boxes_all = classes, scores, boxes classes = classes_all[index_person] scores = scores_all[index_person] boxes = boxes_all[index_person] num_dets_to_consider = 1 # print(masks.shape) # torch.Size([10, 480, 360]) masks_all = masks masks = masks_all[index_person] """ # 之前的方法有个BUG,就是对小person的score大于主要person时,输出错误的结果,修改如下 # 定义变量area_b,框的面积 person_index = (classes == 0) # person_index表示了第几个框是否是person类别 if person_index.any(): # 存在person这个类别 # 如果只检测到1个人,直接取这个人就可以 classes = classes[person_index] scores = scores[person_index] boxes = boxes[person_index] masks = masks[person_index] if (person_index.sum() > 1): # 检测到多个人,需要取最大面积框 # 之前已经把person类过滤出来了,还需要逐个算面积 area = classes for i in range(person_index.sum()): box = boxes[i] area[i] = (box[2] - box[0]) * (box[3] - box[1]) # 最后再从person这类里面的框中挑选出最大面积的那个 person_index = (area == area.max()) classes = classes[person_index] scores = scores[person_index] boxes = boxes[person_index] masks = masks[person_index] num_dets_to_consider = 1 else: # 没有person类 print('----- No person -----') num_dets_to_consider = 0 # raise Exception("Keyboard~") # 因为只保留一个框,因此不进行阈值测试 """ num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break """ if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] # 这里需要删掉第一个维度 # masks = masks[:num_dets_to_consider, :, :, None] # print(masks.shape) # torch.Size([1, 480, 360]) if (num_dets_to_consider): masks = masks[:, :, :, None] else: masks = [] # debug settings # print(masks.shape) # torch.Size([1, 480, 360, 1]) # mask_img = np.reshape(masks.cpu().numpy(), [480, 360]) # print(np.max(mask_img)) # >>> 1.0 # cv2.namedWindow('Test', cv2.WINDOW_AUTOSIZE) # cv2.imshow('Test', mask_img) # cv2.waitKey(0) # cv2.destroyAllWindows() # cv2.imwrite('test.png', mask_img) mask_img = (masks * 255).byte().cpu().numpy() # print(masks.shape) # print(mask_img.shape) # (480, 360, 1) # 这里需要删掉第一个维度 mask_img = mask_img[0, :, :, 0] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) # debug # print(masks.repeat(1, 1, 1, 3).shape) # print(colors.shape) # torch.Size([1, 1, 1, 3]) # masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # print(np.max(masks_color.cpu().numpy())) # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] # 这里的benchsize全都改成1了 masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # 注意这个时候还是float小数 # 这句不用管,反正执行不到,无视就行 if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) masks_color_summand = masks_color[0] img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # debug看一下图像有没有问题,看完再注释掉 # cv2.namedWindow('Debug', cv2.WINDOW_AUTOSIZE) # cv2.imshow('Debug', img_gpu.cpu().numpy()) # cv2.waitKey(0) # cv2.destroyAllWindows() # --- 貌似一直到这都没毛病的~ # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() # img_numpy = (masks * 255).byte().cpu().numpy() if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): # 这个循环中的boxes, scores, classes都要减少一个维度 x1, y1, x2, y2 = boxes[:] color = get_color(0) score = scores if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) img_crop = img.byte().cpu().numpy() # print(np.max(mask_img)) for i in range(3): img_crop[:, :, i] = img_crop[:, :, i] * (mask_img // 255) # debug看一下图像有没有问题,看完再注释掉 # cv2.namedWindow('Debug', cv2.WINDOW_AUTOSIZE) # cv2.imshow('Debug', img_crop) # cv2.waitKey(0) # cv2.destroyAllWindows() return img_numpy, mask_img, img_crop
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ lineThickness = 2 if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=self.display_lincomb, crop_masks=self.crop, score_threshold=self.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): # idx = t[1].argsort(0, descending=True)[top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:self.top_k] classes, scores, boxes = [ x[:self.top_k].cpu().detach().numpy() for x in t[:3] ] num_dets_to_consider = min(self.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if self.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod( dim=0) + masks_color_summand if self.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().detach().numpy() if self.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if self.display_text or self.display_bboxes: distance_boxes = [] def all_subsets(ss): return chain( *map(lambda x: combinations(ss, x), range(0, len(ss) + 1))) def draw_distance(boxes): """ input : boxes(type=list) Make all possible combinations between the detected boxes of persons perform distance measurement between the boxes to measure distancing """ red_counter = 0 ## Countting people who are in high risk green_counter = 0 for subset in all_subsets(boxes): if len(subset) == 2: a = np.array((subset[0][2], subset[0][3])) b = np.array((subset[1][2], subset[1][3])) dist = np.linalg.norm( a - b ) ## Eucledian distance if you want differnt ways to measure distance b/w two boxes you can use the following options # dist = spatial.distance.cosine(a, b) # # print ('Eucledian distance is version-1', dist) # # print ('Eucledian distance is', spatial.distance.euclidean(a, b)) # print ('Cosine distance is', dist) if dist < 250: red_counter += len(subset) cv2.line(img_numpy, (subset[0][2], subset[0][3]), (subset[1][2], subset[1][3]), (0, 0, 255), lineThickness) elif dist < 300: green_counter += len(subset) cv2.line(img_numpy, (subset[0][2], subset[0][3]), (subset[1][2], subset[1][3]), (0, 255, 0), lineThickness) log["total_person_in_red_zone"] = red_counter // 2 log["total_person_in_green_zone"] = green_counter // 2 # gc.collect() for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if self.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.display_text: _class = cfg.dataset.class_names[classes[j]] if _class == "person": log["total_person"] = num_dets_to_consider distance_boxes.append(boxes[j, :].tolist()) draw_distance(distance_boxes) text_str = '%s: %.2f' % ( _class, score) if self.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display_mod(dets_out, img, h, w, depth_map, rel_depth, undo_transform=True, mask_alpha=1.0): # was mask_alpha=0.45 """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ score_threshold = 0.15 top_k = 15 if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, score_threshold=score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:top_k] # top_k = 15 if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < score_threshold: num_dets_to_consider = j break classes = classes[:num_dets_to_consider] # added # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache # color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) #original color_idx = j # black if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if num_dets_to_consider > 0: # was ...>0 # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # print("masks_og.shape", masks.shape) # begin added // filter out the person masks and class indices people_masks_idxs = [] classes_to_mask = [] x = [ ] # save the center points of the boxes in the same order as the masks y = [] for i, j in enumerate(classes): if j == 0: # j = 0 for person class # filter out only people's masks people_masks_idxs.append(i) classes_to_mask.append(j) x1, y1, x2, y2 = boxes[i, :] x.append(int((x1 + x2) / 2)) y.append(int((y1 + y2) / 2)) num_dets_to_consider = len(classes_to_mask) if num_dets_to_consider == 0: # if no people, return black image return ((img_gpu * 0).byte().cpu().numpy() ) # make it black before returning x_arr = np.array(y) y_arr = np.array(x) obj_depths = [] for i in range(x_arr.size): # store the depths of the people obj_depths.append(depth_map[x_arr[i], y_arr[i], 0]) # print("depth at object i: ", x_arr[i], y_arr[i], " : ", obj_depths[i]) obj_depths = np.array(obj_depths) people_masks_idxs = np.array(people_masks_idxs) sorted_idx_by_depth = np.array( np.argsort(-obj_depths) ) # sort the masks and people_loc by depth in Descending order # x = x[sorted_idx_by_depth] # y = y[sorted_idx_by_depth] obj_depths = obj_depths[sorted_idx_by_depth] people_masks_idxs = people_masks_idxs[sorted_idx_by_depth] depth_thres = obj_depths[0] * ( 1.0 - rel_depth ) # filter out the people within the depth_threshold people_masks_idxs = people_masks_idxs[[ i for i, v in enumerate(obj_depths) if v >= depth_thres ]] np.array(people_masks_idxs).T.tolist() masks = masks[people_masks_idxs] num_dets_to_consider = len(people_masks_idxs) colors = torch.cat( [get_color(0, on_gpu=img_gpu.device.index).view(1, 1, 1, 3)], dim=0) tmp = masks[0] if num_dets_to_consider > 1: for msk in masks[1:]: tmp = tmp + msk # print("masks.shape: ", masks.shape) # print("tmp.shape: ", (tmp.unsqueeze(0)).shape) masks = tmp.unsqueeze(0) masks[masks != 0.0] = 1.0 inv_alph_masks = masks * (-mask_alpha) + 1 masks_color = (inv_alph_masks.repeat(1, 1, 1, 3)) * colors * mask_alpha inv_alph_masks = masks.repeat(1, 1, 1, 3) # inv_alph_masks = masks # inv_alph_masks = masks # print("masks : ", masks) # masks = (masks-1.)*-1. # print("masks : ", masks) # inv_alph_masks = masks * (-mask_alpha)+1 # masks_color = masks_color*0.5 # end added # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] # masks_color_summand = masks_color[0] # if num_dets_to_consider > 1: # inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) # masks_color_cumul = masks_color[1:] * inv_alph_cumul # masks_color_summand += masks_color_cumul.sum(dim=0) # img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand # original # print("inv_alph_masks.shape: ", (torch.squeeze(inv_alph_masks,0)).shape) # print("masks_color.shape: ", (torch.squeeze(masks_color,0)).shape) img_gpu = img_gpu * torch.squeeze(inv_alph_masks, 0) + torch.squeeze( masks_color, 0) # added # img_gpu = img_gpu img_numpy = (img_gpu * 255.0).byte().cpu().numpy() return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ global first_frame, old_obj_info name = [] mask_img = [] if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): #idx = t[1].argsort(0, descending=True)[:args.top_k] idx1 = t[1].argsort() idx = idx1.argsort() if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] mask_picture = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] for i in range(len(classes)): name.append(cfg.dataset.class_names[classes[i]]) mask_img.append(mask_picture[i:i + 1, :, :, None]) #obj_info, obj_num = data_save(mask_img, classes, scores, boxes) start = time.time() obj_info, obj_num = sort_info.data_save(mask_img, classes, name, scores, boxes, first_frame, old_obj_info) end = time.time() print('aaaaaaaaaa', end - start) first_frame = True num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (obj_info[j][0] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float() #only show mask #img_gpu = img_gpu * masks[0] #mike0225 mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float() #0209 global mask_numpy mask_numpy = (mask_img * 255).byte().cpu().numpy() #0209 mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: global frame_count, state_pre, flag, predict_pos, centerX, centerY, degree, mask_color, mask_flag, pub_Flag frame_count += 1 pub_array_msg = obj_array() for j in range(obj_num): global img_num, temp_x, temp_y, yhat if obj_info[j][2] != 0: #0502------------------------------------------------------------------- mask_image = img_gpu * (obj_info[j][3].sum(dim=0) > 0.5).float() mask_numpy1 = (mask_image * 255).byte().cpu().numpy() mask_color = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY) ''' kernel = np.ones((5,5), np.uint8) mask_color = cv2.erode(mask_color, kernel, iterations = 1) mask_color = cv2.dilate(mask_color, kernel, iterations = 1) ''' mask_flag = False #------------------------------------------------------------------------- if frame_count % 20 == 3: #----------------------------- obj_info[j][5].append(mask_color) mask_flag = True #cv2.imwrite('/home/chien/123/test_{}.jpg'.format(j),mask_numpy1) if len(obj_info[j][5]) > 2: ''' for k in range(len(obj_info[j][5])): cv2.imwrite('/home/chien/123/test_{}.jpg'.format(k),obj_info[j][5][k]) ''' obj_msg = obj_infomsg() obj_msg.id = obj_info[j][0] obj_msg.object_name = obj_info[j][1] imagedata1 = np.array(obj_info[j][5]) imagedata1 = imagedata1.reshape((-1, 3, 480, 640, 1)) imagedata1 = imagedata1 / 255. start = time.time() yhat = model.predict(imagedata1, verbose=0) end = time.time() ''' print(end-start) print('---------------') ''' if obj_info[j][6] == []: for i in range(5): x1 = yhat[1][0][i][1] * 320 + 320 y1 = yhat[1][0][i][2] * 240 + 240 degree1 = arctan_recovery( yhat[1][0][i][3], yhat[1][0][i][4]) temp_x1, temp_y1 = trans_degree( x1, y1, degree1) obj_info[j][6].append( (x1, y1, temp_x1, temp_y1)) else: for i in range(5): x1 = yhat[1][0][i][1] * 320 + 320 y1 = yhat[1][0][i][2] * 240 + 240 degree1 = arctan_recovery( yhat[1][0][i][3], yhat[1][0][i][4]) temp_x1, temp_y1 = trans_degree( x1, y1, degree1) obj_info[j][6][i] = (x1, y1, temp_x1, temp_y1) ''' obj_info[j][6].pop(0) x1 = yhat[1][0][4][1]*320+320 y1 = yhat[1][0][4][2]*240+240 degree1 = arctan_recovery(yhat[1][0][4][3],yhat[1][0][4][4]) temp_x1,temp_y1=trans_degree(x1,y1,degree1) obj_info[j][6].append((x1,y1,temp_x1,temp_y1)) ''' obj_msg.x = yhat[1][0][4][ 1] * 320 + 320 #yhat[1][0][3][1]*320+320 obj_msg.y = yhat[1][0][4][2] * 240 + 240 obj_msg.degree = arctan_recovery( yhat[1][0][4][3], yhat[1][0][4][4]) tx1, ty1 = trans_degree(obj_msg.x, obj_msg.y, obj_msg.degree) ''' print( obj_msg.degree) cv2.circle(img_numpy, (int(obj_msg.x),int(obj_msg.y)),5,(0, 0, 255),5) cv2.line(img_numpy,(int(obj_msg.x+tx1),int(obj_msg.y+ty1)),(int(obj_msg.x-tx1),int(obj_msg.y-ty1)),(0,0,255),5) ''' #print( obj_msg.degree) pub_array_msg.Obj_list.append(obj_msg) pub_Flag = True obj_info[j][5].pop(0) #0->1 ''' global pointx,pointy,real_pointx,real_pointy, point_count ,use_count use_count+=1 if use_count >=10: pointx.append(obj_info[j][6][4][0]) pointy.append(obj_info[j][6][4][1]) point_count += 1 if point_count >= 5: real_pointx.append(yhat[0][0][2][1]*320+320) real_pointy.append(yhat[0][0][2][2]*240+240) ''' if obj_info[j][6] != []: for i in range(5): px = obj_info[j][6][i][0] py = obj_info[j][6][i][1] temp_px = obj_info[j][6][i][2] temp_py = obj_info[j][6][i][3] cv2.circle(img_numpy, (int(px), int(py)), 5, (0, 0, 255), 5) cv2.line(img_numpy, (int(px + temp_px), int(py + temp_py)), (int(px - temp_px), int(py - temp_py)), (0, 0, 255), 5) color = get_color(obj_info[j][0]) score = obj_info[j][3] if args.display_bboxes: cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][3], obj_info[j][4][5]), color, 1) if args.display_text: _class = obj_info[j][1] #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class text_str = '%s: %s' % (obj_info[j][0], _class ) if args.display_scores else _class #text_str = '%s: %s' % (_class, obj_info[j][2]) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][2] + text_w, obj_info[j][4][4] - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if pub_Flag == True: #print(pub_array_msg) array_pub.publish(pub_array_msg) pub_Flag = False old_obj_info = obj_info return img_numpy
def prep_display(net, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True preds = net.detect( { 'loc': dets_out[0], 'conf': dets_out[1], 'mask': dets_out[2], 'priors': dets_out[3], 'proto': dets_out[4] }, net) t = postprocess(preds, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break img_tmp = torch.zeros(img_gpu.shape) for i in range(num_dets_to_consider): cfg.dataset.class_names[classes[i]] mask = masks[i] classy = cfg.dataset.class_names[classes[i]] if args.classes == None or classy in args.classes: img_tmp[mask == 1] = img_gpu[mask == 1] # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_tmp * 255).byte().cpu().numpy() if num_dets_to_consider == 0: return img_numpy return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] for index, val in enumerate(zip(classes, scores, boxes)): print(classes[index], boxes[index], scores[index],'index', index) num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:1, :, :, None] img_gpu = (masks.sum(dim=0) >= 1).float().expand(-1, -1, 3).contiguous() else: img_gpu *= 0 if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() #cv2.imwrite('images/output/', args.images + '.jpg', img_numpy) print(args.image, args.images) if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): #idx = t[1].argsort(0, descending=True)[:args.top_k] idx1 = t[1].argsort() idx = idx1.argsort() if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] mask_picture = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] obj_info, obj_num = data_save(mask_picture, classes, scores, boxes) #print(classes) #print('---------') #np.save('masks.npy', masks.cpu().numpy()) #print(obj_info[0][4][0], obj_info[0][4][1]) num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (obj_info[j][0] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] #img_gpu = img_gpu * (masks.sum(dim=0) > 0.5).float() #only show mask #img_gpu = img_gpu * masks[0] #mike0225 mask_img = img_gpu * (masks.sum(dim=0) > 0.5).float() #0209 global mask_numpy mask_numpy = (mask_img * 255).byte().cpu().numpy() #0209 mask_numpy = cv2.cvtColor(mask_numpy, cv2.COLOR_BGR2GRAY) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha #mask_img[0:text_h+8, 0:text_w+8] *= 0.6 #0209 # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: global frame_count, state_pre, flag, predict_pos, centerX, centerY, preX, preY, degree frame_count += 1 for j in range(obj_num): #mask_info = obj_info[j][5] global mask_numpy1, img_num, temp_x, temp_y mask_image = mask_picture[j:j + 1, :, :, None] mask_image = img_gpu * (mask_image.sum(dim=0) > 0.5).float() #0209 mask_numpy1 = (mask_image * 255).byte().cpu().numpy() #0209 mask_numpy1 = cv2.cvtColor(mask_numpy1, cv2.COLOR_BGR2GRAY) if obj_info[j][2] == 1: ''' if frame_count%10 == 3: centerX.append(obj_info[j][4][0]) centerY.append(obj_info[j][4][1]) predict_pos[j][0].append(obj_info[j][4][0]) predict_pos[j][1].append(obj_info[j][4][1]) if predict_pos[j][0][0] == 0: predict_pos[j][0].pop(0) if predict_pos[j][1][0] == 0: predict_pos[j][1].pop(0) if len(predict_pos[j][0]) > 2: #predict_pos[j][2] = predict_next( predict_pos[j][0], predict_pos[j][1]) degree, predict_pos[j][2] = predict1_next( mask_numpy1, predict_pos[j][0], predict_pos[j][1]) # test0227 temp_x,temp_y=trans_degree(predict_pos[j][2][0,4,0],predict_pos[j][2][0,4,1],degree) predict_pos[j][0].pop(0) #0->1 predict_pos[j][1].pop(0) if state_pre == True: if predict_pos[j][2] != []: for i in range(5): if (predict_pos[j][2][0,i,0]) > 640 or (predict_pos[j][2][0,i,1]) > 480: pass else: pass #cv2.circle(img_numpy,(predict_pos[j][2][0,i,0],predict_pos[j][2][0,i,1]),5,(0,0,213),-1) cv2.line(img_numpy,(int(obj_info[j][4][0]+temp_x),int(obj_info[j][4][1]+temp_y)),(int(obj_info[j][4][0]-temp_x),int(obj_info[j][4][1]-temp_y)),(0,0,255),3) if flag ==False: for i in range(5): preX.append(predict_pos[j][2][0,i,0]) preY.append(predict_pos[j][2][0,i,1]) #preY.append(num) else: preX.append(predict_pos[j][2][0,4,0]) preY.append(predict_pos[j][2][0,4,1]) #preY.append(num) flag = True ''' color = get_color(obj_info[j][0]) score = obj_info[j][3] if args.display_bboxes: cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][3], obj_info[j][4][5]), color, 1) if args.display_text: _class = obj_info[j][1] #text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class text_str = '%s: %s' % (obj_info[j][0], _class ) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (obj_info[j][4][2], obj_info[j][4][4] - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (obj_info[j][4][2], obj_info[j][4][4]), (obj_info[j][4][2] + text_w, obj_info[j][4][4] - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) else: for i in range(2): predict_pos[j][i] = [0] predict_pos[j][2] = [] return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape # print("height:", h, "width:", w) with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) torch.cuda.synchronize() with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand from skimage.feature import hog def bin_spatial(img, color_space='RGB', size=(32, 32)): # Convert image to new color space (if specified) if color_space != 'RGB': if color_space == 'HSV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) elif color_space == 'LUV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2LUV) elif color_space == 'HLS': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2HLS) elif color_space == 'YUV': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YUV) elif color_space == 'YCrCb': feature_image = cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb) else: feature_image = np.copy(img) # Use cv2.resize().ravel() to create the feature vector # small_img = cv2.resize(feature_image, (32, 32)) features = feature_image.ravel() # Remove this line! # Return the feature vector return features # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() crop_tray_img = img_numpy.copy() if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if classes[j] == 6: #_class == "tray": crop_tray_img = crop_tray_img[y1:y2, x1:x2] # process tray cropped image using regression model to predict hidden patties height, width = crop_tray_img.shape[0], crop_tray_img.shape[1] aspect_ratio = height / width height = int(aspect_ratio * 256) # print(type(crop_tray_img)) crop_tray_img = np.array(crop_tray_img, dtype='uint8') crop_tray_img = Image.fromarray(crop_tray_img).resize( (256, height), Image.BICUBIC) crop_tray_img = np.asarray(crop_tray_img, dtype=float) # print("Crop_tray:",crop_tray_img) # print("Crop_tray shape:",crop_tray_img.shape) if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_color = [255, 255, 255] text_pt = (x1, y1 - 3) cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) # counting not visible patties n_patties = np.sum(classes != 6) chicken_count = np.sum(classes == 0) + np.sum(classes == 1) ham_quarter_count = np.sum(classes == 2) + np.sum(classes == 3) ham_1by10_count = np.sum(classes == 4) + np.sum(classes == 5) # find the maximum of the three by adding count to a list and choose model accordingly labels = [chicken_count, ham_quarter_count, ham_1by10_count] max_index = labels.index(max(labels)) if max_index == 0: model_path = "weights/regressor/cp_regressor_crop" elif max_index == 1: model_path = "weights/regressor/qp_regressor_crop" else: model_path = "weights/regressor/op_regressor_crop" # load the pickle model in memory, scale the input and feed it into the model import pickle from sklearn.preprocessing import StandardScaler loaded_model = pickle.load(open(model_path, 'rb')) n_bin = 32 originalFeatures = bin_spatial(crop_tray_img) originalFeatures, _ = np.histogram(originalFeatures, n_bin, density=True) originalFeatures = np.array([originalFeatures]) sc_X = StandardScaler() originalFeatures = sc_X.fit_transform( originalFeatures.reshape(n_bin, 1)).reshape(1, n_bin) # print("originalFeatures:",originalFeatures) prediction = round(loaded_model.predict(originalFeatures)[0]) # print("model_path:",model_path) if max_index == 0 and chicken_count < 4: prediction = chicken_count count_text = "Calculated: {} Visible Patties: {} -> Chicken: {}, Ham Quarter: {}, HAM 1by10: {}".format( prediction, n_patties, chicken_count, ham_quarter_count, ham_1by10_count) count_text_w, count_text_h = cv2.getTextSize(count_text, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) count_text_pt = (w - count_text_w, h - count_text_h) cv2.putText(img_numpy, count_text, count_text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) # print(count_text) return img_numpy
def prep_display_for_img(dets_out, img, h=None, w=None, undo_transform=True, class_color=False, mask_alpha=0.45): if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: masks = t[3][idx] classes, scores, boxes = [x[idx] for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] if class_color else j) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: masks = masks[:num_dets_to_consider, :, :, None] colors = torch.cat([ get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha inv_alph_masks = masks * (-mask_alpha) + 1 masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand img_numpy = (img_gpu * 255).byte().cpu().numpy() if num_dets_to_consider == 0: return img_numpy font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] if args.display_scores: text_str_class = f"{_class}" text_str_score = f": {score:.2f}" text_w_class, text_h_class = cv2.getTextSize( text_str_class, font_face, font_scale, font_thickness)[0] img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) img_numpy = ps.putBText(img_numpy, text_str_score, text_offset_x=x1, text_offset_y=y1 + text_h_class + 2, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) else: text_str_class = '%s' % _class img_numpy = ps.putBText(img_numpy, text_str_class, text_offset_x=x1, text_offset_y=y1, vspace=0, hspace=0, font=font_face, font_scale=0.6, thickness=font_thickness, alpha=0.7, background_RGB=color, text_RGB=(255, 255, 255)) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) torch.cuda.synchronize() all_pred = [] #print(len(t)) #print(type(t)) #print("classes") #print(t[0].cpu().numpy()) #print(len(t[0].cpu().numpy())) #print(t[1].cpu().numpy()) # bbox print(t[2].cpu().numpy()) #print(t[3].cpu().numpy()) # classes, scores, boxes, masks categories = t[0].cpu().numpy() scores = t[1].cpu().numpy() masks = t[3].cpu().numpy() #print(masks.shape) n_instances = len(scores) #if len(categories) > 0: # If any objects are detected in this image for i in range(n_instances): # Loop all instances # save information of the instance in a dictionary then append on all_pred list pred = {} #pred['image_id'] = imgid # this imgid must be same as the key of test.json pred['category_id'] = int(categories[i]) + 1 pred['segmentation'] = binary_mask_to_rle(masks[i,:,:]) # save binary mask to RLE, e.g. 512x512 -> rle pred['score'] = float(scores[i]) all_pred.append(pred) with timer.env('Copy'): if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][:args.top_k] classes, scores, boxes = [x[:args.top_k].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy, all_pred if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy, all_pred
def prep_display(self, dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = self.args.display_lincomb, crop_masks = self.args.crop, score_threshold = self.args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:self.args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] self.classes, self.scores, self.boxes = classes, scores, boxes num_dets_to_consider = min(self.args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < self.args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in self.color_cache[on_gpu]: return self.color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. self.color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if self.args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # remove overlapped area of mask results for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] overlapped_list = [] box_size = int((x2-x1)*(y2-y1)) color = get_color(j) score = scores[j] for k in reversed(range(num_dets_to_consider)): if (k != j): a1, b1, a2, b2 = boxes[k, :] box_size_sub = int((a2-a1)*(b2-b1)) if ((min(a2, x2) - max(a1, x1) > 0) and (min(b2, y2) - max(b1, y1) > 0)): # overlapped area S_jk = (min(a2, x2) - max(a1, x1)) * (min(b2, y2) - max(b1, y1)) if (S_jk / box_size > 0.9): # included other BBox pass elif (S_jk / box_size_sub > 0.3): # Subtract overlapped area in current bounding box # Find overlapped Bbox position x_list = [x1, x2, a1, a2] y_list = [y1, y2, b1, b2] x_list.sort() y_list.sort() overlapped_list.append([int(x_list[1]), int(y_list[1]), int(x_list[2]), int(y_list[2])]) for ov_bbox in overlapped_list: masks[j][ov_bbox[1]: ov_bbox[3], ov_bbox[0]: ov_bbox[2]] = 0 self.masks = masks # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha self.masks_color = colors self.masks_color_2 = masks_color # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand self.img_gpu = img_gpu if self.args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if self.args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) self.num_dets_to_consider = num_dets_to_consider if num_dets_to_consider == 0: return img_numpy if self.args.display_text or self.args.display_bboxes: self.text_str = {} draw_masks = self.masks.squeeze(-1).to(torch.device("cpu")).detach().numpy().astype(np.float32) update_masks = self.masks.clone() overlapped_list = [] for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] box_size = int((x2-x1)*(y2-y1)) color = get_color(j) score = scores[j] if self.args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if self.args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s:%d_%.2f' % (_class, classes[j], score) if self.args.display_scores else _class self.text_str[j] = text_str font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 + 15) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 + text_h + 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display_single(dets_out, img, pad_h, pad_w, img_ids=None, img_meta=None, undo_transform=True, mask_alpha=0.45, fps_str='', display_mode=None): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. -- display_model: 'train', 'test', 'None' means groundtruth results """ if undo_transform: img_numpy = undo_image_transformation(img, img_meta, pad_h, pad_w) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 pad_h, pad_w, _ = img.shape with timer.env('Postprocess'): cfg.mask_proto_debug = args.mask_proto_debug cfg.preserve_aspect_ratio = False dets_out = postprocess_ytbvis(dets_out, pad_h, pad_w, img_meta, display_mask=True, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=cfg.eval_conf_thresh, img_ids=img_ids, mask_det_file=args.mask_det_file) torch.cuda.synchronize() scores = dets_out['score'][:args.top_k].detach().cpu().numpy() boxes = dets_out['box'][:args.top_k].detach().cpu().numpy() if 'segm' in dets_out: masks = dets_out['segm'][:args.top_k] args.display_masks = True else: args.display_masks = False classes = dets_out['class'][:args.top_k].detach().cpu().numpy() num_dets_to_consider = min(args.top_k, classes.shape[0]) color_type = dets_out['box_ids'] for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break if num_dets_to_consider == 0: # No detections found so just output the original image return (img_gpu * 255).byte().cpu().numpy() # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([ get_color(j, color_type, on_gpu=img_gpu.device.index, undo_transform=undo_transform).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j, color_type) # plot priors h, w, _ = img_meta['img_shape'] priors = dets_out['priors'].detach().cpu().numpy() if j < dets_out['priors'].size(0): cpx, cpy, pw, ph = priors[j, :] * [w, h, w, h] px1, py1 = cpx - pw / 2.0, cpy - ph / 2.0 px2, py2 = cpx + pw / 2.0, cpy + ph / 2.0 px1, py1, px2, py2 = int(px1), int(py1), int(px2), int(py2) pcolor = [255, 0, 255] # plot the range of features for classification and regression pred_scales = [24, 48, 96, 192, 384] x = torch.clamp(torch.tensor([x1, x2]), min=2, max=638).tolist(), y = torch.clamp(torch.tensor([y1, y2]), min=2, max=358).tolist(), x, y = x[0], y[0] if display_mode is not None: score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x[0], y[0]), (x[1], y[1]), color, 1) if j < dets_out['priors'].size(0): cv2.rectangle(img_numpy, (px1, py1), (px2, py2), pcolor, 2, lineType=8) # cv2.rectangle(img_numpy, (x[4], y[4]), (x[5], y[5]), fcolor, 2) if args.display_text: if classes[j] - 1 < 0: _class = 'None' else: _class = cfg.classes[classes[j] - 1] if display_mode == 'test': # if cfg.use_maskiou and not cfg.rescore_bbox: train_centerness = False if train_centerness: rescore = dets_out['DIoU_score'][j] * score text_str = '%s: %.2f: %.2f: %s' % (_class, score, rescore, str(color_type[j].cpu().numpy())) \ if args.display_scores else _class else: text_str = '%s: %.2f: %s' % ( _class, score, str(color_type[j].cpu().numpy()) ) if args.display_scores else _class else: text_str = '%s' % _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.5 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=False, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = jt.array(img_numpy) else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb=args.display_lincomb, crop_masks=args.crop, score_threshold=args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx, _ = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: color = COLORS[color_idx] if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = jt.array(list(color)).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pyjt tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider].unsqueeze(3) # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = jt.contrib.concat([ get_color(j, 0).view(1, 1, 1, 3) for j in range(num_dets_to_consider) ], dim=0) #print(masks.repeat(1,1,1,3).shape,colors.shape,mask_alpha) masks_color = masks.repeat(1, 1, 1, 3) * colors.repeat( 1, masks.shape[1], masks.shape[2], 1) * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider - 1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h + 8, 0:text_w + 8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).uint8().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 1) if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % ( _class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy
def prep_display(dets_out, img, h, w, undo_transform=True, class_color=True, mask_alpha=0.45, fps_str=''): """ Note: If undo_transform=False then im_h and im_w are allowed to be None. """ if undo_transform: img_numpy = undo_image_transformation(img, w, h) img_gpu = torch.Tensor(img_numpy).cuda() else: img_gpu = img / 255.0 h, w, _ = img.shape with timer.env('Postprocess'): save = cfg.rescore_bbox cfg.rescore_bbox = True t = postprocess(dets_out, w, h, visualize_lincomb = args.display_lincomb, crop_masks = args.crop, score_threshold = args.score_threshold) cfg.rescore_bbox = save with timer.env('Copy'): idx = t[1].argsort(0, descending=True)[:args.top_k] if cfg.eval_mask_branch: # Masks are drawn on the GPU, so don't copy masks = t[3][idx] classes, scores, boxes = [x[idx].cpu().numpy() for x in t[:3]] num_dets_to_consider = min(args.top_k, classes.shape[0]) for j in range(num_dets_to_consider): if scores[j] < args.score_threshold: num_dets_to_consider = j break #-----------------------1128n 22:49d---------------------------# # if cfg.dataset.class_names[classes[j]] == 'tree': # continue # #num_dets_to_consider =j # break #--------------------------------------------------------------# # Quick and dirty lambda for selecting the color for a particular index # Also keeps track of a per-gpu color cache for maximum speed def get_color(j, on_gpu=None): global color_cache color_idx = (classes[j] * 5 if class_color else j * 5) % len(COLORS) if on_gpu is not None and color_idx in color_cache[on_gpu]: return color_cache[on_gpu][color_idx] else: #color = COLORS[color_idx] color = (100, 149, 237) #rgb light blue for line # color = (124, 252, 0) #rgb light green for tree if not undo_transform: # The image might come in as RGB or BRG, depending color = (color[2], color[1], color[0]) if on_gpu is not None: color = torch.Tensor(color).to(on_gpu).float() / 255. color_cache[on_gpu][color_idx] = color return color # First, draw the masks on the GPU where we can do it really fast # Beware: very fast but possibly unintelligible mask-drawing code ahead # I wish I had access to OpenGL or Vulkan but alas, I guess Pytorch tensor operations will have to suffice if args.display_masks and cfg.eval_mask_branch and num_dets_to_consider > 0: # After this, mask is of size [num_dets, h, w, 1] masks = masks[:num_dets_to_consider, :, :, None] ########################################################################## """ nzCount=-1 for i in range(num_dets_to_consider): temp_class_check = cfg.dataset.class_names[classes[i]] if temp_class_check == 'line': msk = masks[i,:,:,None] mask=msk.view(1,masks.shape[1], masks.shape[2], masks.shape[3]) img_gpu=(mask.sum(dim=0)>=1).float().expand(-1,-1,3).contiguous() img_numpy_aux=(img_gpu * 255).byte().cpu().numpy() img_numpy_aux = cv2.cvtColor(img_numpy_aux, cv2.COLOR_BGR2GRAY) if nzCount == -1: nzCount=0 img_numpy=img_numpy_aux else: if cv2.countNonZero(img_numpy_aux) > cv2.countNonZero(img_numpy): img_numpy=img_numpy_aux img_gpu=(masks.sum(dim=0)>=1).float().expand(-1,-1,3).contiguous() else: img_gpu *- 0 """ ########################################################################## # Prepare the RGB images for each mask given their color (size [num_dets, h, w, 1]) colors = torch.cat([get_color(j, on_gpu=img_gpu.device.index).view(1, 1, 1, 3) for j in range(num_dets_to_consider)], dim=0) masks_color = masks.repeat(1, 1, 1, 3) * colors * mask_alpha # This is 1 everywhere except for 1-mask_alpha where the mask is inv_alph_masks = masks * (-mask_alpha) + 1 # I did the math for this on pen and paper. This whole block should be equivalent to: # for j in range(num_dets_to_consider): # img_gpu = img_gpu * inv_alph_masks[j] + masks_color[j] masks_color_summand = masks_color[0] if num_dets_to_consider > 1: inv_alph_cumul = inv_alph_masks[:(num_dets_to_consider-1)].cumprod(dim=0) masks_color_cumul = masks_color[1:] * inv_alph_cumul masks_color_summand += masks_color_cumul.sum(dim=0) img_gpu = img_gpu * inv_alph_masks.prod(dim=0) + masks_color_summand ########################################################################## if args.display_fps: # Draw the box for the fps on the GPU font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 0.6 font_thickness = 1 text_w, text_h = cv2.getTextSize(fps_str, font_face, font_scale, font_thickness)[0] img_gpu[0:text_h+8, 0:text_w+8] *= 0.6 # 1 - Box alpha # Then draw the stuff that needs to be done on the cpu # Note, make sure this is a uint8 tensor or opencv will not anti alias text for whatever reason img_numpy = (img_gpu * 255).byte().cpu().numpy() if args.display_fps: # Draw the text on the CPU text_pt = (4, text_h + 2) text_color = [255, 255, 255] cv2.putText(img_numpy, fps_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) if num_dets_to_consider == 0: return img_numpy if args.display_text or args.display_bboxes: for j in reversed(range(num_dets_to_consider)): x1, y1, x2, y2 = boxes[j, :] color = get_color(j) score = scores[j] if args.display_bboxes: cv2.rectangle(img_numpy, (x1, y1), (x2, y2), color, 3) ###changed: 1->3 if args.display_text: _class = cfg.dataset.class_names[classes[j]] text_str = '%s: %.2f' % (_class, score) if args.display_scores else _class font_face = cv2.FONT_HERSHEY_DUPLEX font_scale = 1 ###changed: 0.6 -> 3 font_thickness = 1 ###changed: 1 -> 3 text_w, text_h = cv2.getTextSize(text_str, font_face, font_scale, font_thickness)[0] # if _class == 'line': # text_pt = (x1, y1 + 6) # else: # continue # #text_pt = (x1, y1 + 6) text_pt = (x1, y1 - 3) text_color = [255, 255, 255] cv2.rectangle(img_numpy, (x1, y1), (x1 + text_w, y1 - text_h - 4), color, -1) cv2.putText(img_numpy, text_str, text_pt, font_face, font_scale, text_color, font_thickness, cv2.LINE_AA) return img_numpy