示例#1
0
def _get_image_blob(roidb, scale_inds):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  processed_ims_left = []
  processed_ims_right = []
  im_scales = []
  for i in range(num_images):
    img_left = cv2.imread(roidb[i]['img_left'])
    img_right = cv2.imread(roidb[i]['img_right'])

    if roidb[i]['flipped']:
      img_left_flip = img_right[:, ::-1, :].copy()
      img_right = img_left[:, ::-1, :].copy()
      img_left = img_left_flip

    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    img_left, img_right, im_scale = prep_im_for_blob(img_left, img_right, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims_left.append(img_left)
    processed_ims_right.append(img_right)

  # Create a blob to hold the input images
  blob_left, blob_right = im_list_to_blob(processed_ims_left, processed_ims_right)

  return blob_left, blob_right, im_scales
示例#2
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
    def _imagePreprocess(self, blob, fix_size = False):
        assert not fix_size, "When grasp labels are included, the input image can not be fixed-size."
        keep_b = np.arange(blob['gt_boxes'].shape[0])
        keep_g = np.arange(blob['gt_grasps'].shape[0])
        if self.augmentation:
            blob['data'] = self.augImageOnly(blob['data'])
            blob['data'], blob['gt_boxes'], blob['gt_grasps'], keep_b, keep_g = \
                self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], grasps=blob['gt_grasps'],
                                boxes_keep=keep_b, grasps_keep=keep_g)

        # choose one predefined size, TODO: support multi-instance batch
        random_scale_ind = np.random.randint(0, high=len(cfg.SCALES))
        blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size)
        blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1])
        blob['im_info'][2:4] = (im_scale['y'], im_scale['x'])
        # modify bounding boxes according to resize parameters
        blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x']
        blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y']
        blob['gt_grasps'][:, 0::2] *= im_scale['x']
        blob['gt_grasps'][:, 1::2] *= im_scale['y']
        blob['gt_grasp_inds'] = blob['gt_grasp_inds'][keep_g]
        blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS)
        blob['node_inds'] = blob['node_inds'][keep_b]
        blob['parent_lists'] = [blob['parent_lists'][p_ind] for p_ind in list(keep_b)]
        blob['child_lists'] = [blob['child_lists'][c_ind] for c_ind in list(keep_b)]
        return blob
示例#4
0
def _get_image_blob(roidb, target_size):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'])

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size[i],
                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
示例#5
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)
    
    processed_ims = []
    im_scales = []
    im_shapes = np.zeros((0, 2), dtype=np.float32)
    for i in range(num_images):
        img_path = roidb[i]['image']

        im = cv2.imread(roidb[i]['image'])

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        
        im, im_scale, im_shape = prep_im_for_blob(im, cfg.PIXEL_MEANS, 
                                                  target_size, 
                                                  cfg.TRAIN.MAX_SIZE)

        im_scales.append(im_scale)
        processed_ims.append(im)
        im_shapes = np.vstack((im_shapes, im_shape))

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)
    return blob, im_scales, im_shapes
示例#6
0
def _get_image_blob(roidb, scale_inds):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'])

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    # normalize (minus the mean) and scale, return the scaled_img & scale 
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  # im_list_to_blob has zero padding for different size of imgs
  blob = im_list_to_blob(processed_ims)  # a np.array image of [B, H, W, C]

  return blob, im_scales  # batch of imgs, list of scales
示例#7
0
文件: utils.py 项目: Tung-I/FRCNN
def support_im_preprocess(im_list, cfg, support_im_size):
    n_of_shot = len(im_list)
    support_data_all = np.zeros(
        (n_of_shot, 3, support_im_size, support_im_size), dtype=np.float32)
    for i, im in enumerate(im_list):
        im = im[:, :, ::-1]  # rgb -> bgr
        target_size = np.min(im.shape[0:2])  # don't change the size
        im, _ = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                 cfg.TRAIN.MAX_SIZE)
        _h, _w = im.shape[0], im.shape[1]
        if _h > _w:
            resize_scale = float(support_im_size) / float(_h)
            unfit_size = int(_w * resize_scale)
            im = cv2.resize(im, (unfit_size, support_im_size),
                            interpolation=cv2.INTER_LINEAR)
        else:
            resize_scale = float(support_im_size) / float(_w)
            unfit_size = int(_h * resize_scale)
            im = cv2.resize(im, (support_im_size, unfit_size),
                            interpolation=cv2.INTER_LINEAR)
        h, w = im.shape[0], im.shape[1]
        support_data_all[i, :, :h, :w] = np.transpose(im, (2, 0, 1))
    support_data = torch.from_numpy(support_data_all).unsqueeze(0)

    return support_data
示例#8
0
    def __getitem__(self, index):
        # testing
        index_ratio = index
        # though it is called minibatch, in fact it contains only one img here
        minibatch_db = [self._roidb[index_ratio]]

        # load query
        blobs = get_minibatch(minibatch_db)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
        data_height, data_width = data.size(1), data.size(2)
        data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])
        num_boxes = gt_boxes.size(0)
        all_cls_in_im = []
        for i in range(num_boxes):
            _cls = int(gt_boxes[i, 4])
            all_cls_in_im.append(_cls)
        all_cls_in_im = list(set(all_cls_in_im))
        if len(all_cls_in_im) > self.num_way:
            random.seed(self.epi_random_seed)  # fix 
            selected_ways = random.sample(all_cls_in_im, k=self.num_way)
        else:
            other_cls = list(range(self._num_classes))
            other_cls.remove(0)
            for _cls_ind in all_cls_in_im:
                other_cls.remove(_cls_ind)
            random.seed(self.epi_random_seed)  # fix 
            random_neg_cls = random.sample(other_cls, k=(self.num_way - len(all_cls_in_im)))
            selected_ways = all_cls_in_im
            selected_ways.extend(random_neg_cls)

        # get supports
        support_data_all = np.zeros((self.testing_shot * self.num_way, 3, self.support_im_size, self.support_im_size), dtype=np.float32)

        for n in range(self.num_way):
            selected_supports = self.support_pool[selected_ways[n]]
            
            for i, _path in enumerate(selected_supports):
                support_im = imread(_path)[:,:,::-1]  # rgb -> bgr
                target_size = np.min(support_im.shape[0:2])  # don't change the size
                support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
                _h, _w = support_im.shape[0], support_im.shape[1]
                if _h > _w:
                    resize_scale = float(self.support_im_size) / float(_h)
                    unfit_size = int(_w * resize_scale)
                    support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR)
                else:
                    resize_scale = float(self.support_im_size) / float(_w)
                    unfit_size = int(_h * resize_scale)
                    support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR)
                h, w = support_im.shape[0], support_im.shape[1]
                support_data_all[self.testing_shot*n+i, :, :h, :w] = np.transpose(support_im, (2, 0, 1)) 
            supports = torch.from_numpy(support_data_all)


        return data, im_info, gt_boxes, num_boxes, supports, selected_ways
示例#9
0
def _get_image_blob(roidb, scale_inds, RGB, NIR, DEPTH):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        if RGB:
            im = imread(roidb[i]['image'])
            if len(im.shape) == 2:
                im = im[:, :, np.newaxis]
                im = np.concatenate((im, im, im), axis=2)
            # flip the channel, since the original one using cv2
            # rgb -> bgr
            im = im[:, :, ::-1]
            if NIR | DEPTH:
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                if NIR:
                    im = np.concatenate(
                        (im, I_D['NIR_DEPTH_res_crop'][:, :, :1]), axis=2)
                if DEPTH:
                    im = np.concatenate(
                        (im, I_D['NIR_DEPTH_res_crop'][:, :, 1:]), axis=2)
        elif NIR:
            if not DEPTH:
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                im = I_D['NIR_DEPTH_res_crop'][:, :, :1]
                im = np.concatenate((im, im, im), axis=2)
            else:
                I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                       '_intensity_depth.mat')
                im = I_D['NIR_DEPTH_res_crop']
                im = np.concatenate((im, im), axis=2)
        elif DEPTH:
            I_D = scipy.io.loadmat(roidb[i]['image'][:87] +
                                   '_intensity_depth.mat')
            im = I_D['NIR_DEPTH_res_crop'][:, :, 1:]
            im = np.concatenate((im, im, im), axis=2)
        else:
            print('Any color space was selected')

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, RGB, NIR, DEPTH)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims, RGB, NIR, DEPTH)

    return blob, im_scales
def _get_clip_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified scales.
  """
    # print(roidb)
    clip_len = 8
    num_center_images = len(roidb)

    processed_clips = []
    im_scales = []

    for i in xrange(num_center_images):
        numf = roidb[i]['numf']
        key_frame = roidb[i]['image']
        key_frame_root_dir = key_frame[:-16]
        key_frame = key_frame.split('/')[-1]
        center_index = int(key_frame[5:-4])

        clip = []
        for j in range(clip_len):
            if center_index - clip_len // 2 + j > 0 and center_index - clip_len // 2 + j < int(
                    numf):
                im_path = os.path.join(
                    key_frame_root_dir,
                    "frame{:06d}.jpg".format(center_index - clip_len // 2 + j))
            elif center_index - clip_len // 2 + j <= 0:
                im_path = os.path.join(key_frame_root_dir,
                                       "frame{:06d}.jpg".format(1))
            else:
                im_path = os.path.join(key_frame_root_dir,
                                       "frame{:06d}.jpg".format(int(numf)))
            im = imread(im_path)

            if len(im.shape) == 2:
                im = im[:, :, np.newaxis]
                im = np.concatenate((im, im, im), axis=2)

            # flip the channel, since the original one using cv2
            # rgb -> bgr
            im = im[:, :, ::-1]

            if roidb[i]['flipped']:
                im = im[:, ::-1, :]

            target_size = cfg.TRAIN.SCALES[scale_inds[i]]
            im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                            cfg.TRAIN.MAX_SIZE)
            clip.append(im)

        im_scales.append(im_scale)
        processed_clips.append(clip)

    # Create a blob to hold the input clips
    blob = clip_list_to_blob(processed_clips, clip_len)

    return blob, im_scales
示例#11
0
def _get_image_blob(roidb, scale_inds,depth=False):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)
  processed_ims = []
  im_scales = []
  for i in range(num_images):
    #im = cv2.imread(roidb[i]['image'])
    im = imread(roidb[i]['image'])
    depth_name = roidb[i]['image'].replace("JPEGImages","DepthImages")
    depth_val = imread(depth_name)
    depth_val = np.expand_dims(depth_val,-1)
    # st()
    # DepthImages/

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]
    # st()
    if depth:
      im = np.concatenate([im,depth_val],-1)

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    if depth:  
      im, im_scale = prep_im_for_blob(im, cfg.DEPTH_MEANS, target_size,
                      cfg.TRAIN.MAX_SIZE)
    else:
      im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                  cfg.TRAIN.MAX_SIZE)
    im_scales.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
示例#12
0
def query_im_preprocess(im_data, cfg):
    target_size = cfg.TRAIN.SCALES[0]
    im_data, im_scale = prep_im_for_blob(im_data, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
    im_data = torch.from_numpy(im_data)
    im_info = np.array([[im_data.shape[0], im_data.shape[1], im_scale]], dtype=np.float32)
    im_info = torch.from_numpy(im_info)
    gt_boxes = torch.from_numpy(np.array([0]))
    num_boxes = torch.from_numpy(np.array([0]))
    query = im_data.permute(2, 0, 1).contiguous().unsqueeze(0)
    
    return query, im_info, gt_boxes, num_boxes
示例#13
0
def _get_video_blob(roidb, scale_inds):
    """Builds an input blob from the videos in the roidb at the specified
    scales.
    """
    processed_videos = []
    video_scales = []
    for i, item in enumerate(roidb):
        # just one scale implementated
        video_length = cfg.TRAIN.LENGTH[scale_inds[0]]
        video = np.zeros(
            (video_length, cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE, 3))
        #if cfg.INPUT == 'video':
        j = 0
        #random_idx = [np.random.randint(cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE),
        #                np.random.randint(cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE)]
        image_w, image_h, crop_w, crop_h = cfg.TRAIN.FRAME_SIZE[
            1], cfg.TRAIN.FRAME_SIZE[
                0], cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE
        offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h,
                                                      crop_w, crop_h)
        random_idx = offsets[npr.choice(len(offsets))]
        if DEBUG:
            print("offsets: {}, random_idx: {}".format(offsets, random_idx))
        for video_info in item['frames']:
            prefix = item['fg_name'] if video_info[0] else item['bg_name']
            step = video_info[3] if cfg.INPUT == 'video' else 1
            for idx in range(video_info[1], video_info[2], video_info[3]):
                frame = cv2.imread('%s/image_%s.jpg' %
                                   (prefix, str(idx + 1).zfill(5)))
                frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS,
                                         tuple(cfg.TRAIN.FRAME_SIZE[::-1]),
                                         cfg.TRAIN.CROP_SIZE, random_idx)
                if item['flipped']:
                    frame = frame[:, ::-1, :]

                if DEBUG:
                    cv2.imshow('frame', frame / 255.0)
                    cv2.waitKey(0)
                    cv2.destroyAllWindows()

                video[j] = frame
                j = j + 1
        # padding for the same length
        while (j < video_length):
            video[j] = frame
            j = j + 1

        processed_videos.append(video)

    # Create a blob to hold the input images
    blob = video_list_to_blob(processed_videos)

    return blob
示例#14
0
def _get_image_blob(roidb, scale_inds):
  """Builds an input blob from the images in the roidb at the specified
  scales.
  """
  num_images = len(roidb)

  im_scales = []
  processed_ims = []
  processed_dps = []

  for i in range(num_images):
    im = imread(roidb[i]['image'])
    dp = np.load(roidb[i]['depth'])
    # dp = np.zeros((im.shape[0], im.shape[1], 7))

    if len(im.shape) == 2:
      im = im[:,:,np.newaxis]
      im = np.concatenate((im,im,im), axis=2)
    # flip the channel, since the original one using cv2
    # rgb -> bgr
    im = im[:,:,::-1]

    if roidb[i]['flipped']:
      im = im[:, ::-1, :]
      dp = dp[:, ::-1, :]
    target_size = cfg.TRAIN.SCALES[scale_inds[i]]
    im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                    cfg.TRAIN.MAX_SIZE)
    dp, de_scale = prep_im_for_blob(dp, cfg.DEPTH_MEANS, target_size,
                    cfg.TRAIN.MAX_SIZE)

    im_scales.append(im_scale)
    processed_ims.append(im)
    processed_dps.append(dp)

  # Create a blob to hold the input images
  im_blob = im_list_to_blob(processed_ims, 3)
  dp_blob = im_list_to_blob(processed_dps, 7)

  return im_blob, dp_blob, im_scales
示例#15
0
def _get_image_blob(roidb, scale_inds, augment=False, seed=2020):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    assert len(roidb) == 1, "Single batch only"

    # gt boxes: (x1, y1, x2, y2, cls)
    if cfg.TRAIN.USE_ALL_GT:
        # Include all ground truth boxes
        gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
    else:
        # For the COCO ground truth boxes, exclude the ones that are ''iscrowd''
        gt_inds = np.where(
            (roidb[0]['gt_classes'] != 0)
            & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
    gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
    gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :]
    # gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
    gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]

    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        # im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])
        # print(roidb[i]['image'])
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        # data augmentation
        if augment:
            im, gt_boxes = augmentor(im, gt_boxes, seed=seed)
        # imsave("target_aug.jpg", im[:, :, ::-1])
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)
        gt_boxes[:, 0:4] = gt_boxes[:, 0:4] * im_scale

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales, gt_boxes
def get_video_blob(roidb):
    """Builds an input blob from the videos in the roidb at the specified
    scales.
    """
    processed_videos = []
    item = roidb

    for key in item:
        print(key, ": ", item[key])

    video_length = cfg.TRAIN.LENGTH[0]
    video = np.zeros(
        (video_length, cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE, 3))

    j = 0
    random_idx = [
        int((cfg.TRAIN.FRAME_SIZE[1] - cfg.TRAIN.CROP_SIZE) / 2),
        int((cfg.TRAIN.FRAME_SIZE[0] - cfg.TRAIN.CROP_SIZE) / 2)
    ]

    for video_info in item['frames']:
        step = video_info[3] if cfg.INPUT == 'video' else 1
        prefix = item['fg_name'] if video_info[0] else item['bg_name']
        for idx in xrange(video_info[1], video_info[2], step):
            frame = cv2.imread('%s/image_%s.jpg' %
                               (prefix, str(idx + 1).zfill(5)))
            frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS,
                                     tuple(cfg.TRAIN.FRAME_SIZE[::-1]),
                                     cfg.TRAIN.CROP_SIZE, random_idx)

            if item['flipped']:
                frame = frame[:, ::-1, :]

            if DEBUG:
                cv2.imshow('frame', frame / 255.0)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

            video[j] = frame
            j = j + 1

    # padding for the same length
    while (j < video_length):
        video[j] = frame
        j = j + 1
    processed_videos.append(video)

    # Create a blob to hold the input images
    blob = video_list_to_blob(processed_videos)

    return torch.from_numpy(blob)
 def _imagePreprocess(self, blob, fix_size = True):
     keep = np.arange(blob['gt_grasps'].shape[0])
     if self.augmentation:
         blob['data'] = self.augImageOnly(blob['data'])
         blob['data'], _, blob['gt_grasps'], _, _ = self.augmGraspdet(image=blob['data'], grasps=blob['gt_grasps'], grasps_keep=keep)
     # choose one predefined size, TODO: support multi-instance batch
     random_scale_ind = np.random.randint(0, high=len(cfg.SCALES))
     blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size)
     blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1])
     blob['im_info'][2:4] = (im_scale['y'], im_scale['x'])
     blob['gt_grasps'][:, 0::2] *= im_scale['x']
     blob['gt_grasps'][:, 1::2] *= im_scale['y']
     blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS)
     return blob
示例#18
0
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    processed_dls = []  # processed drive line segmentation GT

    im_scales = []
    # dl_scales = []

    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image'])
        im = imread(roidb[i]['image'])
        # print("_get_image_blob() roidb[i]['image']", roidb[i]['image'])

        # Add by Jie, Read drive line mask
        dl = imread(roidb[i]['gt_line_mask'])
        dl = dl.astype(np.uint8)
        # print("_get_image_blob() roidb[i]['gt_line_mask']", roidb[i]['gt_line_mask'])

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:, :, ::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
            dl = dl[:, ::-1]

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        dl, dl_scale = prep_dl_for_blob(dl, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        # dl_scales.append(dl_scale)
        processed_ims.append(im)
        processed_dls.append(dl)

    # Create a blob to hold the input images
    blob_im = im_list_to_blob(processed_ims)

    blob_dl = dl_list_to_blob(processed_dls)

    return blob_im, im_scales, blob_dl  # , dl_scales
示例#19
0
    def __getitem__(self, index):
        # testing
        index_ratio = index
        # though it is called minibatch, in fact it contains only one img here
        minibatch_db = [self._roidb[index_ratio]]

        # load query
        blobs = get_minibatch(minibatch_db)
        data = torch.from_numpy(blobs['data'])
        im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
        data_height, data_width = data.size(1), data.size(2)
        data = data.permute(0, 3, 1,
                            2).contiguous().view(3, data_height, data_width)
        im_info = im_info.view(3)
        gt_boxes = torch.from_numpy(blobs['gt_boxes'])
        num_boxes = gt_boxes.size(0)

        # get supports
        support_data_all = np.zeros(
            (self.testing_shot, 3, self.support_im_size, self.support_im_size),
            dtype=np.float32)
        current_gt_class_id = int(gt_boxes[0][4])
        selected_supports = self.support_pool[current_gt_class_id]

        for i, _path in enumerate(selected_supports):
            support_im = imread(_path)[:, :, ::-1]  # rgb -> bgr
            target_size = np.min(
                support_im.shape[0:2])  # don't change the size
            support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS,
                                             target_size, cfg.TRAIN.MAX_SIZE)
            _h, _w = support_im.shape[0], support_im.shape[1]
            if _h > _w:
                resize_scale = float(self.support_im_size) / float(_h)
                unfit_size = int(_w * resize_scale)
                support_im = cv2.resize(support_im,
                                        (unfit_size, self.support_im_size),
                                        interpolation=cv2.INTER_LINEAR)
            else:
                resize_scale = float(self.support_im_size) / float(_w)
                unfit_size = int(_h * resize_scale)
                support_im = cv2.resize(support_im,
                                        (self.support_im_size, unfit_size),
                                        interpolation=cv2.INTER_LINEAR)
            h, w = support_im.shape[0], support_im.shape[1]
            support_data_all[i, :, :h, :w] = np.transpose(
                support_im, (2, 0, 1))
        supports = torch.from_numpy(support_data_all)

        return data, im_info, gt_boxes, num_boxes, supports
    def load_query(self, choice, id=0):

        if self.training:
            # Random choice query catgory image
            all_data = self._query[choice]
            data = random.choice(all_data)
        else:
            # Take out the purpose category for testing
            catgory = self.cat_list[choice]
            # list all the candidate image
            all_data = self._query[catgory]

            # Use image_id to determine the random seed
            # The list l is candidate sequence, which random by image_id
            random.seed(id)
            l = list(range(len(all_data)))

            random.shuffle(l)
            # print ("l:", l)
            # choose the candidate sequence and take out the data information
            # position=l[self.query_position%len(l)]
            position = l[0]
            data = all_data[position]

        # Get image
        path = data['image_path']
        im = imread(path)

        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        im = crop(im, data['boxes'], cfg.TRAIN.query_size)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        # im = im[:,:,::-1]
        if random.randint(0, 99) / 100 > 0.5 and self.training:
            im = im[:, ::-1, :]

        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS,
                                        cfg.TRAIN.query_size,
                                        cfg.TRAIN.MAX_SIZE)

        query = im_list_to_blob([im])

        return query
示例#21
0
def _get_image_blob(roidb, scale_inds):
	"""Builds an input blob from the images in the roidb at the specified
	scales.
	"""
	num_images = len(roidb)

	processed_ims = []
	im_scales = []
	for i in range(num_images):
		im = fits.open(roidb[i]['image'], ignore_missing_end=True)[0].data

		### use log transpoze
		# im = np.log(1 + np.abs(im))

		###  make normalization  by liuqiang
		max_value = np.max(im)
		min_value = np.min(im)
		mean_value = np.mean(im)
		im = (im - mean_value)/(max_value - min_value)

		H = im.shape[0]
		W = im.shape[1]


		if len(im.shape) == 2:
			im = im[:,:,np.newaxis]
			im_empty = np.zeros((H,W),dtype=float)
			im_empty = im_empty[:,:,np.newaxis]
			im = np.concatenate((im,im,im),axis=2)
		# flip the channel, since the original one using cv2
		# rgb -> bgr
		im = im[:,:,::-1]

		if roidb[i]['flipped']:
			im = im[:, ::-1, :]
		target_size = cfg.TRAIN.SCALES[scale_inds[i]]
		im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
		                                cfg.TRAIN.MAX_SIZE)
		im_scales.append(im_scale)
		processed_ims.append(im)

	# Create a blob to hold the input images
	blob = im_list_to_blob(processed_ims)

	return blob, im_scales
def _get_image_blob(roidb, scale_inds):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    # 有几张图片,根据输入->只有一张
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    # 只有一张
    for i in range(num_images):
        #im = cv2.imread(roidb[i]['image']) 因为版本问题进行修改
        # 读取字典中image的键值 -> 文件的路径,读取图片
        im = imageio.imread(roidb[i]['image'])
        # 如果图像是二维(无色彩信息)
        if len(im.shape) == 2:
            # 增加了第三个维度
            im = im[:, :, np.newaxis]
            #对第三个维度进行扩展(为了程序兼容2维图像)
            im = np.concatenate((im, im, im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        # 使im倒叙(对第三个通道),(特殊用法[i:j:s(步长)])
        # 为了兼容cv2
        im = im[:, :, ::-1]

        # 如果需要反转对第二通道进行倒叙
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        # 获取短边像素
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        #(cfg.PIXEL_MEANS)是像素均值,(cfg.TRAIN.MAX_SIZE)是长边像素
        # 返回缩放后的图片和缩放比
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        # 形成缩放列表
        im_scales.append(im_scale)
        # 形成图片表
        processed_ims.append(im)
        # 其实这里列表中也就只有一个元素,这么做可能是为了兼容性??

    # Create a blob to hold the input images
    # 得到图片的np数组
    blob = im_list_to_blob(processed_ims)
    # 返回图片的np数组,和缩放比
    return blob, im_scales
示例#23
0
def _get_image_blob(roidb, scale_inds):
    num_images = len(roidb)
    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['file_path'])

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    blob = im_list_to_blob(processed_ims)
    return blob, im_scales
    def _imagePreprocess(self, blob, fix_size=True):
        keep = np.arange(blob['gt_boxes'].shape[0])
        if self.augmentation:
            blob['data'] = self.augImageOnly(blob['data'])
            blob['data'], blob['gt_boxes'], _, keep, _ = self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], boxes_keep=keep)

        # choose one predefined size, TODO: support multi-instance batch
        random_scale_ind = np.random.randint(0, high=len(cfg.SCALES))
        blob['data'], im_scale = prep_im_for_blob(blob['data'], cfg.SCALES[random_scale_ind], cfg.TRAIN.COMMON.MAX_SIZE, fix_size)
        # modify bounding boxes according to resize parameters
        blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1])
        blob['im_info'][2:4] = (im_scale['y'], im_scale['x'])
        blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x']
        blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y']
        blob['data'] = image_normalize(blob['data'], mean=cfg.PIXEL_MEANS, std=cfg.PIXEL_STDS)
        blob['node_inds'] = blob['node_inds'][keep]
        blob['parent_lists'] = [blob['parent_lists'][p_ind] for p_ind in list(keep)]
        blob['child_lists'] = [blob['child_lists'][c_ind] for c_ind in list(keep)]
        return blob
示例#25
0
def _get_image_blob(roidb, scale_inds, training):
    """Builds an input blob from the images in the roidb at the specified
    scales.
    """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        '''
        This part might need to be changed
        delete cv2 related code,
        change to 2d if possible, of preferable
        '''
        #im = cv2.imread(roidb[i]['image'])
        im = imageio.imread(roidb[i]['image'])

        if len(im.shape) == 2:
            im = im[:,:,np.newaxis]
            im = np.concatenate((im,im,im), axis=2)
        # 2d image to 3d image

        # flip the channel, since the original one using cv2
        # rgb -> bgr
        im = im[:,:,::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        # flip height-wise
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        # 1 is always expected
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, training)
        # im is resized with im_scale ratio
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    # change image lists to blob.
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
示例#26
0
def _get_image_blob(roidb, scale_inds, transfrom):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])

        target_size = cfg.TRAIN.SCALES[scale_inds[i]]
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE, transfrom)
        im_scales.append(im_scale)
        processed_ims.append(im)
    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
示例#27
0
def prepare_im_func(prefix, random_idx, frame_idx, flipped):        
    frame_path = os.path.join(prefix, 'image_'+str(frame_idx).zfill(5)+'.jpg')
    frame = cv2.imread(frame_path)
    # process the boundary frame
    if frame is None:          
        frames = sorted(os.listdir(prefix))
        frame_path = os.path.join(prefix, frames[-1])
        frame = cv2.imread(frame_path)         
    
    frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx)
       
    if flipped:
        frame = frame[:, ::-1, :]

    if DEBUG:
        cv2.imshow('frame', frame/255.0)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
    return frame        
示例#28
0
    def _imagePreprocess(self, blob, fix_size=True):
        keep = np.arange(blob['gt_boxes'].shape[0])
        if self.augmentation:
            if self.augImageOnly is not None:
                blob['data'] = self.augImageOnly(blob['data'])
            if self.augObjdet is not None:                blob['data'], blob['gt_boxes'], _, _, _ = \
self.augObjdet(image=blob['data'], boxes=blob['gt_boxes'], boxes_keep=keep)
        # choose one predefined size, TODO: support multi-instance batch
        random_scale_ind = np.random.randint(0, high=len(cfg.SCALES))
        blob['data'], im_scale = prep_im_for_blob(blob['data'],
                                                  cfg.SCALES[random_scale_ind],
                                                  cfg.TRAIN.COMMON.MAX_SIZE,
                                                  fix_size)
        # modify bounding boxes according to resize parameters
        blob['im_info'][:2] = (blob['data'].shape[0], blob['data'].shape[1])
        blob['im_info'][2:4] = (im_scale['y'], im_scale['x'])
        blob['gt_boxes'][:, :-1][:, 0::2] *= im_scale['x']
        blob['gt_boxes'][:, :-1][:, 1::2] *= im_scale['y']
        blob['data'] = image_normalize(blob['data'],
                                       mean=self.pixel_means,
                                       std=self.pixel_stds)
        return blob
示例#29
0
def get_image_blob(im):
  """Converts an image into a network input.
  Arguments:
    im: data of image
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
  im_scales = []
  processed_ims = []
  scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=1)

  target_size = cfg.TRAIN.SCALES[scale_inds[0]]
  im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size, cfg.TRAIN.MAX_SIZE)

  im_scales.append(im_scale)
  processed_ims.append(im)

  # Create a blob to hold the input images
  blob = im_list_to_blob(processed_ims)

  return blob, im_scales
def _get_image_blob(roidb, scale_inds):
    """
    load the image from local path, subtract pixel mean and resize the image
    :param roidb: annotation list [{}] for one image, the {} contains all labels
    :param scale_inds: [0]
    :return blob: an image 4D array (1, 3, h, w)
            im_scales: a float number
    """
    num_images = len(roidb)  # 1
    processed_ims = []
    im_scales = []

    for i in range(num_images):
        im = cv2.imread(roidb[i]['image'])
        # im = imread(roidb[i]['image'])
        # if len(im.shape) == 2:
        #   im = im[:,:,np.newaxis]
        #   im = np.concatenate((im,im,im), axis=2)
        # flip the channel, since the original one using cv2
        # rgb -> bgr
        # im = im[:,:,::-1]

        if roidb[i]['flipped']:
            im = im[:, ::-1, :]

        # subtract pixel mean and resize the image
        target_size = cfg.TRAIN.SCALES[scale_inds[i]]  # 600
        im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
                                        cfg.TRAIN.MAX_SIZE)
        im_scales.append(im_scale)
        processed_ims.append(im)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
示例#31
0
def _get_video_blob(roidb, scale_inds, phase='train', step_frame=1, length_support=768):
    """Builds an input blob from the videos in the roidb at the specified
    scales.
    """
    processed_videos = []
    
    for i, item in enumerate(roidb):
        # just one scale implementated
        video_length = length_support
        video = np.zeros((video_length, cfg.TRAIN.CROP_SIZE,
                        cfg.TRAIN.CROP_SIZE, 3))
        j = 0

        if phase == 'train':
            random_idx = [np.random.randint(cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE),
                            np.random.randint(cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE)]
            # TODO: data argumentation
            #image_w, image_h, crop_w, crop_h = cfg.TRAIN.FRAME_SIZE[1], cfg.TRAIN.FRAME_SIZE[0], cfg.TRAIN.CROP_SIZE, cfg.TRAIN.CROP_SIZE
            #offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h) 
            #random_idx = offsets[ npr.choice(len(offsets)) ]
        else:
            random_idx = [int((cfg.TRAIN.FRAME_SIZE[1]-cfg.TRAIN.CROP_SIZE) / 2), 
                      int((cfg.TRAIN.FRAME_SIZE[0]-cfg.TRAIN.CROP_SIZE) / 2)]
                                      
        if DEBUG:
            print ("offsets: {}, random_idx: {}".format(offsets, random_idx))
            
        video_info = item['frames'][0] #for video_info in item['frames']:
        step = step_frame
        prefix = item['fg_name'] if video_info[0] else item['bg_name']


        if cfg.TEMP_SPARSE_SAMPLING:       
            if phase == 'train':
                segment_offsets = npr.randint(step, size=len(range(video_info[1], video_info[2], step)))
            else:
                segment_offsets = np.zeros(len(range(video_info[1], video_info[2], step))) + step // 2
        else:            
            segment_offsets = np.zeros(len(range(video_info[1], video_info[2], step)))

        times = math.ceil((video_info[2]-video_info[1])/length_support)
        for i, idx in enumerate(range(video_info[1], video_info[2], times*step)):
            frame_idx = int(segment_offsets[i]+idx+1)            
            frame_path = os.path.join(prefix, 'image_'+str(frame_idx).zfill(5)+'.jpg')
            frame = cv2.imread(frame_path)
            # process the boundary frame
            if frame is None:          
                frames = sorted(os.listdir(prefix))
                frame_path = os.path.join(prefix, frames[-1])
                frame = cv2.imread(frame_path)         
            # crop to 112 with a random offset
            frame = prep_im_for_blob(frame, cfg.PIXEL_MEANS, tuple(cfg.TRAIN.FRAME_SIZE[::-1]), cfg.TRAIN.CROP_SIZE, random_idx)
               
            if item['flipped']:
                frame = frame[:, ::-1, :]

            if DEBUG:
                cv2.imshow('frame', frame/255.0)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

            video[j] = frame
            j = j + 1
            
        video[j:video_length] = video[j-1]
        
    processed_videos.append(video)
    # Create a blob to hold the input images, dimension trans CLHW
    blob = video_list_to_blob(processed_videos)

    return blob