def __call__(self, src, bbox): # resize with random interpolation h, w, _ = src.shape interp = np.random.randint(1, 5) scale = 1.2 src = timage.imresize(src, int(self._width * scale), int(self._height * scale), interp=interp) bbox = tbbox.resize( bbox, (w, h), (int(self._width * scale), int(self._height * scale))) # random color jittering img = experimental.image.random_color_distort(src) # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.95, max_scale=1.05, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, bbox): img = src # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = random_color_distort(src) # random cropping #! keep aspect ration = 1 h, w, _ = img.shape bbox, crop = random_crop_with_constraints(label, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) if (self._bilateral_kernel_size is not None) and ( self._sigma_vals is not None) or self._grayscale: img = img.asnumpy() if (self._bilateral_kernel_size is not None) and (self._sigma_vals is not None): img = cv2.bilateralFilter(img, self._bilateral_kernel_size, self._sigma_vals, self._sigma_vals) if self._grayscale: img = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img = np.repeat(img[:, :, None], 3, axis=2) img = nd.array(img) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label, segm): """Apply transform to training image/label.""" # resize shorter side but keep in max_size h, w, _ = src.shape if self._random_resize: short = randint(self._short[0], self._short[1]) else: short = self._short img = timage.resize_short_within(src, short, self._max_size, interp=1) bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0])) # segm = [tmask.resize(polys, (w, h), (img.shape[1], img.shape[0])) for polys in segm] # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # segm = [tmask.flip(polys, (w, h), flip_x=flips[0]) for polys in segm] # gt_masks (n, im_height, im_width) of uint8 -> float32 (cannot take uint8) # masks = [mx.nd.array(tmask.to_mask(polys, (w, h))) for polys in segm] masks = cocomask.decode(segm) # hxwxn mask_list = [] for i in range(masks.shape[-1]): mask = cv2.resize(masks[:,:,i], (img.shape[1],img.shape[0]), interpolation=cv2.INTER_NEAREST) mask_list.append(mx.nd.array(mask)) # n * (im_height, im_width) -> (n, im_height, im_width) masks = mx.nd.stack(*mask_list, axis=0) if flips[0]: masks = mx.nd.flip(masks, axis=2) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype), masks # generate RPN target so cpu workers can help reduce the workload # feat_h, feat_w = (img.shape[1] // self._stride, img.shape[2] // self._stride) gt_bboxes = mx.nd.array(bbox[:, :4]) if self._multi_stage: oshapes = [] anchor_targets = [] for feat_sym in self._feat_sym: oshapes.append(feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0]) for anchor, oshape in zip(self._anchors, oshapes): anchor = anchor[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4)) anchor_targets.append(anchor) anchor_targets = mx.nd.concat(*anchor_targets, dim=0) cls_target, box_target, box_mask = self._target_generator( gt_bboxes, anchor_targets, img.shape[2], img.shape[1]) else: oshape = self._feat_sym.infer_shape(data=(1, 3, img.shape[1], img.shape[2]))[1][0] anchor = self._anchors[:, :, :oshape[2], :oshape[3], :].reshape((-1, 4)) cls_target, box_target, box_mask = self._target_generator( gt_bboxes, anchor, img.shape[2], img.shape[1]) return img, bbox.astype(img.dtype), masks, cls_target, box_target, box_mask
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype(img.dtype)
def __call__(self, src, label): """Apply transform to training image/label.""" """color distort""" # img = random_color_distort(src) # print("previous label shape = ", label.shape) target = np.zeros(shape=(label.shape[0], )) """Pyramid Anchor sampling""" img, boxes, label = self.random_baiducrop(src, label[:, :4], target) # print("label shape = ", label.shape) # print('boxes shape =', boxes.shape) bbox = boxes # img = mx.nd.array(img) """color distort""" img = mx.nd.array(img) img = random_color_distort(img) # """random crop, keep aspect ration=1""" # h, w, _ = img.shape # bbox, crop_size = random_crop_with_constraints(label, (w, h)) # x_offset, y_offset, new_width, new_height = crop_size # img = mx.image.fixed_crop(img, x_offset, y_offset, new_width, new_height) """resize with random interpolation""" h, w, _ = img.shape interp = np.random.randint(0, 5) img = gimage.imresize(img, self._width, self._height, interp=interp) bbox = gbbox.resize(bbox, (w, h), (self._width, self._height)) """random horizontal flip""" h, w, _ = img.shape img, flips = gimage.random_flip(img, px=0.5) bbox = gbbox.flip(bbox, (w, h), flip_x=flips[0]) """To Tensor & Normalization""" img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox # @TODO: generating training target so cpu workers can help reduce the workload on gpu face_anchors, head_anchors, body_anchors = self._anchors gt_bboxes = mx.nd.array(bbox[:, :4]).expand_dims(0) gt_ids = mx.nd.zeros((1, gt_bboxes.shape[1], 1), dtype=gt_bboxes.dtype) face_cls_targets, face_box_targets, _ = self._target_generator( face_anchors, None, gt_bboxes, gt_ids) head_cls_targets, head_box_targets, _ = self._target_generator( head_anchors, None, gt_bboxes, gt_ids) body_cls_targets, body_box_targets, _ = self._target_generator( body_anchors, None, gt_bboxes, gt_ids) return img, \ face_cls_targets[0], head_cls_targets[0], body_cls_targets[0], \ face_box_targets[0], head_box_targets[0], body_box_targets[0]
def transform_gt_bbox(img_path, model, bbox): if model == 'yolo': short, max_size = 416, 1024 if model == 'rcnn': short, max_size = 600, 1000 img = mx.image.imread(img_path) h, w, _ = img.shape resized_img = timage.resize_short_within(img, short, max_size) bbox = tbbox.resize(bbox, (w, h), (resized_img.shape[1], resized_img.shape[0])) return bbox
def process_frame(image, net, ctx): # currently only supports batch size 1 todo image = np.squeeze(image) image = mx.nd.array(image, dtype='uint8') x, _ = transform_test(image, 600, max_size=1000) x = x.copyto(ctx[0]) # get prediction results ids, scores, bboxes = net(x) oh, ow, _ = image.shape _, _, ih, iw = x.shape bboxes[0] = tbbox.resize(bboxes[0], in_size=(iw, ih), out_size=(ow, oh)) return bboxes[0].asnumpy(), scores[0].asnumpy(), ids[0].asnumpy()
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) anchor_cls_targets, anchor_box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) anchor_cls_targets = mx.nd.where(anchor_cls_targets > 0, mx.nd.ones_like(anchor_cls_targets), anchor_cls_targets) # positive anchor is 1, negative anchor is 0 and ignored is -1. return img, anchor_cls_targets[0], anchor_box_targets[ 0], bbox[:, :5].astype(img.dtype)
def __call__(self, src, bbox): """Apply transform to training image/label.""" if not self._val: # random color jittering src = experimental.image.random_color_distort(src) img = src # random cropping h, w, _ = img.shape bbox, crop = random_crop_with_constraints(bbox, (w, h), self._height, self._width, min_scale=0.9, max_scale=1, max_trial=50) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(1, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) if not self._val: # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img[0, :, :] = mx.nd.subtract(img[0, :, :] * 256, self._mean[0]) img[1, :, :] = mx.nd.subtract(img[1, :, :] * 256, self._mean[1]) img[2, :, :] = mx.nd.subtract(img[2, :, :] * 256, self._mean[2]) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) #如果有anchors的输入,则执行下面的运算。计算以前的格式是: #gt_bboxes里面是一个图像上的box位置,实际像素点位 当前图片里面的框框个数x4 #gt_ids里面是对应图像上的label,当前图片里面的框框个数x1 #下面为batch_size腾出空间 gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize with random interpolation h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, (w, h), (self._width, self._height)) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def new_trainloader_call(self, src, label): ''' define a new call for trainloader by changing the data augmentation ''' # random color jittering img = experimental.image.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand(img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) return img, cls_targets[0], box_targets[0]
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) if (self._bilateral_kernel_size is not None) and ( self._sigma_vals is not None) or self._grayscale: img = img.asnumpy() if (self._bilateral_kernel_size is not None) and (self._sigma_vals is not None): img = cv2.bilateralFilter(img, self._bilateral_kernel_size, self._sigma_vals, self._sigma_vals) if self._grayscale: img = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img = np.repeat(img[:, :, None], 3, axis=2) img = nd.array(img) img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype(img.dtype)
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = experimental.image.random_color_distort(src) # img, bbox = img,label # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = experimental.bbox.random_crop_with_constraints( bbox, (w, h)) x0, y0, w, h = crop img = mx.image.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip # h, w, _ = img.shape # img, flips = timage.random_flip(img, px=0.5) # bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # rabdom rotation h, w, _ = img.shape clockwise_rotation_num = np.random.randint(0, 4) if clockwise_rotation_num == 0: pass elif clockwise_rotation_num == 1: ###顺时针90度 img = nd.transpose(img, [1, 0, 2]) img = img[:, ::-1, :] bbox = np.array([ h - bbox[:, 3], bbox[:, 0], h - bbox[:, 1], bbox[:, 2], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 1) % 4 elif clockwise_rotation_num == 2: ##顺时针180度 img = img[::-1, ::-1, :] bbox = np.array([ w - bbox[:, 2], h - bbox[:, 3], w - bbox[:, 0], h - bbox[:, 1], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 2) % 4 else: # 顺时针270度 img = nd.transpose(img, [1, 0, 2]) img = img[::-1, :, :] bbox = np.array([ bbox[:, 1], w - bbox[:, 2], bbox[:, 3], w - bbox[:, 0], bbox[:, 4], bbox[:, 5] ]).T bbox[:, 5] = (bbox[:, 5] + 3) % 4 # to tensor img = mx.nd.image.to_tensor(img) img = mx.nd.image.normalize(img, mean=self._mean, std=self._std) if self._anchors is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = mx.nd.array(bbox[np.newaxis, :, :4]) gt_ids = mx.nd.array(bbox[np.newaxis, :, 4:5]) gt_ori = mx.nd.array(bbox[np.newaxis, :, 5:6]) cls_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ids) ori_targets, box_targets, _ = self._target_generator( self._anchors, None, gt_bboxes, gt_ori) return img, cls_targets[0], ori_targets[0], box_targets[0]
def network_inference(self): # a = cv2.waitKey(0) # close window when ESC is pressed # while a is not 27: color_img = self.color_img depth_image = self.depth_image depth_height_res, depth_width_res = depth_image.shape # It is to correct the image size to fit a perfect square # color_img = np.zeros((640, 640, 3)).astype('uint8') # color_img[0:479] = color_img_raw[0:479] # color_img = color_img.astype('uint8') # Image pre-processing frame = mx.nd.array(cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB)).astype('uint8') frame = timage.imresize(frame, self.width, self.height, 1) frame_tensor = mx.nd.image.to_tensor(frame) frame_tensor = mx.nd.image.normalize(frame_tensor, mean=self.mean, std=self.std) # with TimeIt('Obj detection time'): # Run frame through network class_IDs, scores, bounding_boxes = self.net(frame_tensor.expand_dims(axis=0).as_in_context(self.ctx)) # Filter bounding boxes by their scores fbounding_boxes, fscores, fclass_IDs = self.filter_predictions(bounding_boxes, scores, class_IDs) # we need to resize the bounding box back to the original resolution (640, 480) (width, height) resized_bbox = tbbox.resize(fbounding_boxes, (self.width, self.height), (self.depth_img_width, self.depth_img_height)) img = timage.imresize(frame, self.depth_img_width, self.depth_img_height, 1) # check if the bounding box is inside the 300x300 area of the GG-CNN grasping area GGCNN_area = [190, 0, 480, 300] GGCNN_area_center = [320, 150] # width, height img_2 = img.asnumpy() img = cv2.rectangle(img_2, (GGCNN_area[0], GGCNN_area[1]), (GGCNN_area[2], GGCNN_area[3]), (255, 0, 0), 1) bbox_list, fscores_list, fclass_IDs_list = [], [], [] # bounding boxes of the chosen class # If any object is found if fclass_IDs.size > 0: # If the request object is found if self.pipeline_required_class in fclass_IDs: print('found obj') # we need to find all ocurrences of the class identified to consider # situation where we have false positives as well chosen_class_index = [i for i, x in enumerate(fclass_IDs) if x == self.pipeline_required_class] for class_index in chosen_class_index: bbox_list.append(resized_bbox[class_index]) fscores_list.append(fscores[class_index]) fclass_IDs_list.append(fclass_IDs[class_index]) max_score = max(fscores_list) largest_score_bb_index = [i for i, x in enumerate(fscores_list) if x == max_score] bbox_list = [bbox_list[largest_score_bb_index[0]]] fscores_list = [fscores_list[largest_score_bb_index[0]]] fclass_IDs_list = [fclass_IDs_list[largest_score_bb_index[0]]] bbox_list = self.resize_bounding_boxes(bbox_list) self.labels = fclass_IDs_list self.bboxes = bbox_list for index, bbox in enumerate(bbox_list): # bbox_list.append(bbox) # fscores_list.append(fscores_list[index]) # fclass_IDs_list.append(fclass_IDs_list[index]) if bbox[0] > GGCNN_area[0] and bbox[1] > GGCNN_area[1] and bbox[2] < GGCNN_area[2] and \ bbox[3] < GGCNN_area[3]: print('obj inside ggcnn_area') self.receive_bb_status = True # Set the flag detection_ready self.detection_ready.publish(True) self.reposition_robot_flag.publish(False) else: print('obj outside ggcnn_area') bbox_center_point_x = (bbox[2] - bbox[0])/2 + bbox[0] # width bbox_center_point_y = (bbox[3] - bbox[1])/2 + bbox[1] # height dist_x = bbox_center_point_x - GGCNN_area_center[0] # width dist_y = GGCNN_area_center[1] - bbox_center_point_y # height dist_x_dir = dist_x/abs(dist_x) dist_y_dir = dist_y/abs(dist_y) ggcnn_center_area = depth_image[GGCNN_area_center[1], GGCNN_area_center[0]] self.horizontal_FOV = 52 self.vertical_FOV = 60 largura_2 = 2.0 * ggcnn_center_area * np.tan(self.horizontal_FOV * abs(dist_x) / depth_width_res / 2.0 / 180.0 * np.pi) / 1000 * dist_x_dir altura_2 = 2.0 * ggcnn_center_area * np.tan(self.vertical_FOV * abs(dist_y) / depth_height_res / 2.0 / 180.0 * np.pi) / 1000 * dist_y_dir reposition_points = Float32MultiArray() reposition_points.data = [largura_2, altura_2] self.reposition_coord.publish(reposition_points) self.detection_ready.publish(True) self.reposition_robot_flag.publish(True) else: print('The object ({}) was not found'.format(self.classes[self.pipeline_required_class])) self.detection_ready.publish(False) self.reposition_robot_flag.publish(False) else: print('No objects (including the requested one ({})) were found'.format(self.classes[self.pipeline_required_class])) self.detection_ready.publish(False) self.reposition_robot_flag.publish(False) bbox_list = np.array(bbox_list) fscores_list = np.array(fscores_list) fclass_IDs_list = np.array(fclass_IDs_list) img = gcv.utils.viz.cv_plot_bbox(img, bbox_list, fscores_list, fclass_IDs_list, class_names=self.net.classes) depth_image = cv2.cvtColor(depth_image, cv2.COLOR_GRAY2BGR) depth_image = depth_image.astype('uint8') img = img.astype('uint8') added_image = cv2.addWeighted(depth_image, 0.7, img, 0.8, 0) self.img_pub.publish(CvBridge().cv2_to_imgmsg(added_image, 'bgr8'))