Exemplo n.º 1
0
    def get_minibatch(self, imdb, vis=False):
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        images, layouts, boxes, grids = imdb.get_background_minibatch()
        rois = ds_utils.centers_to_rois(grids[:,0], grid_shape[:2], grid_shape[:2])

        cens_onehot  = to_categorical(grids[:,0], self.cen_dims)
        sizes_onehot = to_categorical(grids[:,1], self.size_dims)

        if vis:
            output_dir = osp.join(self.output_dir, 'minibatch')
            ds_utils.maybe_create(output_dir)

            for i in xrange(batch_size):

                img = images[i].copy()
                lyo = layouts[i].copy()

                cen_id  = np.argmax(cens_onehot[i,:])
                size_id = np.argmax(sizes_onehot[i,:])

                true_xywh = boxes[i, :]
                true_xywh = ds_utils.denormalize_xywh(true_xywh.reshape((1,4)), resolution[1], resolution[0])
                true_xyxy = ds_utils.xywh_to_xyxy(true_xywh, resolution[1], resolution[0]).squeeze()

                grid_xywh = ds_utils.indices_to_boxes(\
                            np.array([cen_id, size_id]).reshape((1,2)), \
                            grid_shape)
                grid_xywh = ds_utils.denormalize_xywh(grid_xywh, resolution[1], resolution[0])
                grid_xyxy = ds_utils.xywh_to_xyxy(grid_xywh, resolution[1], resolution[0]).squeeze()


                cv2.rectangle(img, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \
                            (0, 255, 0), 1)
                cv2.rectangle(img, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \
                            (255, 0, 0), 1)

                cv2.rectangle(lyo, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \
                            (0, 255, 0), 1)
                cv2.rectangle(lyo, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \
                            (255, 0, 0), 1)

                roi = rois[i].copy()
                roi = cv2.resize((roi*255).astype(np.uint8), (resolution[1], resolution[0]))

                output_path = osp.join(output_dir, 'img_%06d.jpg'%i)
                cv2.imwrite(output_path, img)

                output_path = osp.join(output_dir, 'lyo_%06d.jpg'%i)
                cv2.imwrite(output_path, lyo)

                output_path = osp.join(output_dir, 'roi_%06d.jpg'%i)
                cv2.imwrite(output_path, roi)

        return images, layouts, rois, cens_onehot, sizes_onehot
Exemplo n.º 2
0
    def _load_json_annotation(self, index):
        """
        requires the following format @ 
        <PATH_TO_ANNOTATIONS>/*xml
        """
        if type(index) is int:
            index = 'COCO_' + self._getBaseImageSet() + '_' +\
                        str(index).zfill(12)
        filename = os.path.join(self._annoPath, index + '.json')
        with open(filename, "r") as f:
            anno = json.load(f)

        # set the number of objects
        num_objs = 0
        for ix, obj in enumerate(anno['annotation']):
            cls = obj['category_id']
            if self._find_cls(cls) != -1:
                num_objs += 1

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        ix = 0
        for obj in anno['annotation']:
            cls = obj['category_id']
            cls = self._find_cls(cls)
            if cls == -1:
                continue
            boxes[ix, :] = xywh_to_xyxy(np.array(obj['bbox'])[np.newaxis, :])
            x1, y1, x2, y2 = boxes[ix, :]
            gt_classes[ix] = obj['category_id']
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
            ix += 1

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {
            'boxes': boxes,
            'gt_classes': gt_classes,
            'gt_overlaps': overlaps,
            'flipped': False,
            'seg_areas': seg_areas,
            'set': self._setID
        }
Exemplo n.º 3
0
    def draw_evaluation_layouts(self, output_dir, isRandom, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy = np.array(entry['box']).copy().astype(np.int)
            lyo_path = entry['layout']
            # im_name, im_ext = osp.splitext(osp.basename(lyo_path))

            layout = cv2.imread(lyo_path, cv2.IMREAD_COLOR)

            if isRandom:
                width = entry['width']
                height = entry['height']
                max_dim = np.maximum(width, height)
                ox = int(0.5 * (max_dim - width))
                oy = int(0.5 * (max_dim - height))

                cen_id = np.random.randint(0, 15 * 15)
                size_id = np.random.randint(0, 15 * 15)
                box_id = np.array([cen_id, size_id]).reshape((1, 2))

                xywh = ds_utils.indices_to_boxes(box_id, [15, 15, 15, 15])
                xywh = ds_utils.denormalize_xywh(xywh, max_dim, max_dim)
                xyxy = ds_utils.xywh_to_xyxy(xywh, max_dim,
                                             max_dim).flatten().astype(np.int)

                xyxy[0] -= ox
                xyxy[1] -= oy
                xyxy[2] -= ox
                xyxy[3] -= oy

            layout[xyxy[1]:(xyxy[3] + 1), xyxy[0]:(xyxy[2] + 1), :] = 0
            output_path = osp.join(output_dir, osp.basename(lyo_path))

            cv2.imwrite(output_path, layout)
            print i
Exemplo n.º 4
0
    def draw_evaluation_layouts(self, output_dir, isRandom, ctxdb=None):

        if ctxdb == None:
            ctxdb = self.objdb

        num_samples = len(ctxdb)

        for i in range(num_samples):
            entry = ctxdb[i]
            xyxy  = np.array(entry['box']).copy().astype(np.int)
            lyo_path = entry['layout']
            # im_name, im_ext = osp.splitext(osp.basename(lyo_path))

            layout = cv2.imread(lyo_path, cv2.IMREAD_COLOR)

            if isRandom:
                width  = entry['width']
                height = entry['height']
                max_dim = np.maximum(width, height)
                ox = int(0.5 * (max_dim - width))
                oy = int(0.5 * (max_dim - height))

                cen_id  = np.random.randint(0, 15 * 15)
                size_id = np.random.randint(0, 15 * 15)
                box_id  = np.array([cen_id, size_id]).reshape((1,2))

                xywh = ds_utils.indices_to_boxes(box_id, [15, 15, 15, 15])
                xywh = ds_utils.denormalize_xywh(xywh, max_dim, max_dim)
                xyxy = ds_utils.xywh_to_xyxy(xywh, max_dim, max_dim).flatten().astype(np.int)

                xyxy[0] -= ox; xyxy[1] -= oy;
                xyxy[2] -= ox; xyxy[3] -= oy;




            layout[xyxy[1]:(xyxy[3]+1), xyxy[0]:(xyxy[2]+1), :] = 0
            output_path = osp.join(output_dir, osp.basename(lyo_path))

            cv2.imwrite(output_path, layout)
            print i
Exemplo n.º 5
0
    def sampler(self, test_db, epoch=0, K=3, vis=False):
        # assume each entry in test_db has field: 'bg_image', 'bg_layout'
        self.center_inference.set_weights(self.get_center_branch_weights(self.model))
        self.size_inference.set_weights(self.get_size_branch_weights(self.model))

        output_dir = osp.join(self.output_dir, 'prediction_jsons')
        ds_utils.maybe_create(output_dir)
        if vis:
            vis_dir = osp.join(self.output_dir, 'prediction_vis')
            ds_utils.maybe_create(vis_dir)
            # hm_dir = osp.join(self.output_dir, 'prediction_heatmap')
            # ds_utils.maybe_create(hm_dir)


        res_db = []
        num_samples = len(test_db)
        
        for i in range(num_samples):
            entry   = test_db[i]
            im_path = entry['bg_image']
            im_name, im_ext = osp.splitext(osp.basename(im_path))
            ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            img, ox, oy = ds_utils.create_squared_image(ori_img, cfg.PIXEL_MEANS)
            width  = img.shape[1];height = img.shape[0]

            xywhs, grids, heatmap = self.single_sample(entry,K=K)
            xywhs = ds_utils.denormalize_xywh(xywhs, width, height)
            xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height)

            xyxys[:,0] -= ox; xyxys[:,1] -= oy
            xyxys[:,2] -= ox; xyxys[:,3] -= oy
            xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1], ori_img.shape[0])
            heatmap = heatmap[oy:(oy+ori_img.shape[0]), ox:(ox+ori_img.shape[1]), :]

            res = {}
            res['bg_image'] = im_path
            res['name']  = im_name
            res['boxes'] = xyxys.tolist()
            json_path = osp.join(output_dir, im_name+'.json')
            with open(json_path, 'w') as res_file:
                json.dump(res, res_file, indent=4, separators=(',', ': '))

            if vis:
                vis_img = ori_img
                fontScale = 0.0007 * math.sqrt(2 * width * height)
                for j in range(xyxys.shape[0]):
                    bb = xyxys[j]
                    color = self.palette[j%len(self.palette)]
                    cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]), color, 4)
                    # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2),
                    #             cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1)

                tmp = np.ones_like(heatmap, dtype=np.float)
                tmp[:,:,1] += heatmap[:,:,1]/255.0
                overlay = np.multiply(vis_img, tmp)
                overlay = np.minimum(overlay, 255).astype(np.uint8)
                final = np.concatenate((vis_img, overlay, heatmap), axis=1)
                # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext)
                # cv2.imwrite(output_path, final)

                output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_ol'+im_ext)
                cv2.imwrite(output_path, overlay)
                output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+'_hm'+im_ext)
                cv2.imwrite(output_path, heatmap)

            for j in range(len(res['boxes'])):
                entry = {}
                entry['bg_image'] = im_path
                entry['name']  = im_name
                entry['box']   = xyxys[j]
                entry['rank']  = j
                res_db.append(entry)
        
        return res_db
Exemplo n.º 6
0
    def get_minibatch(self, imdb, vis=False):
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.PREDICT_RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        images, layouts, boxes, grids = imdb.get_background_minibatch()
        rois = ds_utils.centers_to_rois(grids[:, 0], grid_shape[:2],
                                        grid_shape[:2])

        cens_onehot = to_categorical(grids[:, 0], self.cen_dims)
        sizes_onehot = to_categorical(grids[:, 1], self.size_dims)

        if vis:
            output_dir = osp.join(self.output_dir, 'minibatch')
            ds_utils.maybe_create(output_dir)

            for i in xrange(batch_size):

                img = images[i].copy()
                lyo = layouts[i].copy()

                cen_id = np.argmax(cens_onehot[i, :])
                size_id = np.argmax(sizes_onehot[i, :])

                true_xywh = boxes[i, :]
                true_xywh = ds_utils.denormalize_xywh(
                    true_xywh.reshape((1, 4)), resolution[1], resolution[0])
                true_xyxy = ds_utils.xywh_to_xyxy(true_xywh, resolution[1],
                                                  resolution[0]).squeeze()

                grid_xywh = ds_utils.indices_to_boxes(\
                            np.array([cen_id, size_id]).reshape((1,2)), \
                            grid_shape)
                grid_xywh = ds_utils.denormalize_xywh(grid_xywh, resolution[1],
                                                      resolution[0])
                grid_xyxy = ds_utils.xywh_to_xyxy(grid_xywh, resolution[1],
                                                  resolution[0]).squeeze()


                cv2.rectangle(img, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \
                            (0, 255, 0), 1)
                cv2.rectangle(img, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \
                            (255, 0, 0), 1)

                cv2.rectangle(lyo, (true_xyxy[0], true_xyxy[1]), (true_xyxy[2], true_xyxy[3]), \
                            (0, 255, 0), 1)
                cv2.rectangle(lyo, (grid_xyxy[0], grid_xyxy[1]), (grid_xyxy[2], grid_xyxy[3]), \
                            (255, 0, 0), 1)

                roi = rois[i].copy()
                roi = cv2.resize((roi * 255).astype(np.uint8),
                                 (resolution[1], resolution[0]))

                output_path = osp.join(output_dir, 'img_%06d.jpg' % i)
                cv2.imwrite(output_path, img)

                output_path = osp.join(output_dir, 'lyo_%06d.jpg' % i)
                cv2.imwrite(output_path, lyo)

                output_path = osp.join(output_dir, 'roi_%06d.jpg' % i)
                cv2.imwrite(output_path, roi)

        return images, layouts, rois, cens_onehot, sizes_onehot
Exemplo n.º 7
0
    def sampler(self, test_db, epoch=0, K=3, vis=False):
        # assume each entry in test_db has field: 'bg_image', 'bg_layout'
        self.center_inference.set_weights(
            self.get_center_branch_weights(self.model))
        self.size_inference.set_weights(
            self.get_size_branch_weights(self.model))

        output_dir = osp.join(self.output_dir, 'prediction_jsons')
        ds_utils.maybe_create(output_dir)
        if vis:
            vis_dir = osp.join(self.output_dir, 'prediction_vis')
            ds_utils.maybe_create(vis_dir)
            # hm_dir = osp.join(self.output_dir, 'prediction_heatmap')
            # ds_utils.maybe_create(hm_dir)

        res_db = []
        num_samples = len(test_db)

        for i in range(num_samples):
            entry = test_db[i]
            im_path = entry['bg_image']
            im_name, im_ext = osp.splitext(osp.basename(im_path))
            ori_img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            img, ox, oy = ds_utils.create_squared_image(
                ori_img, cfg.PIXEL_MEANS)
            width = img.shape[1]
            height = img.shape[0]

            xywhs, grids, heatmap = self.single_sample(entry, K=K)
            xywhs = ds_utils.denormalize_xywh(xywhs, width, height)
            xyxys = ds_utils.xywh_to_xyxy(xywhs, width, height)

            xyxys[:, 0] -= ox
            xyxys[:, 1] -= oy
            xyxys[:, 2] -= ox
            xyxys[:, 3] -= oy
            xyxys = ds_utils.clip_boxes(xyxys, ori_img.shape[1],
                                        ori_img.shape[0])
            heatmap = heatmap[oy:(oy + ori_img.shape[0]),
                              ox:(ox + ori_img.shape[1]), :]

            res = {}
            res['bg_image'] = im_path
            res['name'] = im_name
            res['boxes'] = xyxys.tolist()
            json_path = osp.join(output_dir, im_name + '.json')
            with open(json_path, 'w') as res_file:
                json.dump(res, res_file, indent=4, separators=(',', ': '))

            if vis:
                vis_img = ori_img
                fontScale = 0.0007 * math.sqrt(2 * width * height)
                for j in range(xyxys.shape[0]):
                    bb = xyxys[j]
                    color = self.palette[j % len(self.palette)]
                    cv2.rectangle(vis_img, (bb[0], bb[1]), (bb[2], bb[3]),
                                  color, 4)
                    # cv2.putText(vis_img, '{:}'.format(j), (bb[0], bb[1] - 2),
                    #             cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 255), 1)

                tmp = np.ones_like(heatmap, dtype=np.float)
                tmp[:, :, 1] += heatmap[:, :, 1] / 255.0
                overlay = np.multiply(vis_img, tmp)
                overlay = np.minimum(overlay, 255).astype(np.uint8)
                final = np.concatenate((vis_img, overlay, heatmap), axis=1)
                # output_path = osp.join(vis_dir, '%04d_'%epoch+im_name+im_ext)
                # cv2.imwrite(output_path, final)

                output_path = osp.join(
                    vis_dir, '%04d_' % epoch + im_name + '_ol' + im_ext)
                cv2.imwrite(output_path, overlay)
                output_path = osp.join(
                    vis_dir, '%04d_' % epoch + im_name + '_hm' + im_ext)
                cv2.imwrite(output_path, heatmap)

            for j in range(len(res['boxes'])):
                entry = {}
                entry['bg_image'] = im_path
                entry['name'] = im_name
                entry['box'] = xyxys[j]
                entry['rank'] = j
                res_db.append(entry)

        return res_db
Exemplo n.º 8
0
    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur:self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []
        centers = []
        ratios = []
        masks = []
        # normalized xywh representation
        bboxes = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois = self.roidb[db_inds[i]]
            im_path = rois['image']
            width = rois['width']
            height = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats = rois['clses'].copy()
            areas = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats = gt_cats[order]
            areas = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:, 0] += offset_x
                gt_boxes[:, 1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img,
                                            (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        centers = pad_sequences(centers,
                                maxlen=max_seq_len,
                                padding='post',
                                truncating='post',
                                value=0.)
        ratios = pad_sequences(ratios,
                               maxlen=max_seq_len,
                               padding='post',
                               truncating='post',
                               value=0.)
        masks = pad_sequences(masks,
                              maxlen=max_seq_len,
                              padding='post',
                              truncating='post',
                              value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack(
                    (centers[i, :], ratios[i, :])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1],
                                             grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i, :, :])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1],
                                                resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1],
                                                resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1],
                                                resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir,
                                       '%06d_' % i + im_name + '.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks
Exemplo n.º 9
0
    def get_rnn_minibatch(self, max_seq_len, square=True, vis=False):

        #######################################################################
        # rename the config parameters to make the codes look clear
        batch_size = cfg.TRAIN.BATCH_SIZE
        resolution = cfg.RESOLUTION
        grid_shape = cfg.GRID_SHAPE

        #######################################################################
        # indices of the minibatch
        if self.roidb_cur + batch_size >= len(self.roidb):
            self.permute_roidb_indices()
        db_inds = self.roidb_perm[self.roidb_cur : self.roidb_cur + batch_size]
        self.roidb_cur += batch_size
        #######################################################################

        #######################################################################
        # to be returned
        objects = []; centers = []; ratios  = []; masks   = []
        # normalized xywh representation
        bboxes  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        # grid box offset
        deltas  = np.zeros((batch_size, max_seq_len, 4), dtype=np.float32)
        images  = np.zeros((batch_size, resolution[0], \
                            resolution[1], resolution[2]), dtype=np.float32)
        #######################################################################

        for i in xrange(batch_size):
            rois     = self.roidb[db_inds[i]]
            im_path  = rois['image']
            width    = rois['width']
            height   = rois['height']
            gt_boxes = rois['boxes'].copy()
            gt_cats  = rois['clses'].copy()
            areas    = rois['seg_areas']

            # number of instances should not exceed max_seq_len
            num_instances = min(gt_boxes.shape[0], max_seq_len)

            # image data, flip if necessary
            img = cv2.imread(im_path, cv2.IMREAD_COLOR)
            if rois['flipped']:
                # print('flipped %d'%i)
                img = cv2.flip(img, 1)

            # sort the objects in the sequence based on their areas
            order    = np.argsort(areas)[::-1]
            gt_boxes = gt_boxes[order, :]
            gt_cats  = gt_cats[order]
            areas    = areas[order]
            # print areas

            # [x1, y1, x2, y2] to [x, y, w, h]
            gt_boxes = ds_utils.xyxy_to_xywh(gt_boxes)

            # if we need square images
            if square:
                img, offset_x, offset_y = \
                    ds_utils.create_squared_image(img, cfg.PIXEL_MEANS)
                gt_boxes[:,0] += offset_x
                gt_boxes[:,1] += offset_y
                width = height = img.shape[0]

            # normalize
            gt_boxes = ds_utils.normalize_xywh(gt_boxes, width, height)

            # truncate the sequences
            gt_boxes = gt_boxes[:num_instances, :]

            # discreted output positions
            grid_indices = ds_utils.xywh_to_index(gt_boxes, \
                grid_shape[1], grid_shape[0])

            # deltas between grid boxes and ground truth boxes
            grid_boxes  = ds_utils.index_to_xywh(grid_indices, \
                grid_shape[1], grid_shape[0])
            grid_deltas = ds_utils.bbox_transform(grid_boxes, gt_boxes)

            # images of the same shape
            images[i, :, :, :] = cv2.resize(img, (resolution[1], resolution[0]))
            # use the last 'num_instances' objects
            bboxes[i, :num_instances, :] = np.expand_dims(gt_boxes, axis=0)
            # grid offsets
            deltas[i, :num_instances, :] = np.expand_dims(grid_deltas, axis=0)
            # object indicators
            objects.append(gt_cats[:num_instances].tolist())
            # masks for loss function
            masks.append(np.ones((num_instances, )).tolist())
            # grid centers and sizes
            centers.append(grid_indices[:, 0].tolist())
            ratios.append(grid_indices[:, 1].tolist())

        # padding
        objects = pad_sequences(objects, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        centers = pad_sequences(centers, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        ratios  = pad_sequences(ratios, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)
        masks   = pad_sequences(masks, maxlen=max_seq_len,
                      padding='post', truncating='post', value=0.)

        if vis:
            output_dir = osp.abspath(osp.join(cfg.ROOT_DIR, 'output', \
                                              cfg.EXP_DIR, self.name, \
                                              'rnn_minibatch'))
            if not osp.exists(output_dir):
                os.makedirs(output_dir)

            for i in xrange(batch_size):
                rois = self.roidb[db_inds[i]]
                im_name, im_ext = osp.splitext(osp.basename(rois['image']))
                msk = masks[i, :]

                # ground truth boxes
                ibb = bboxes[i, :, :].copy()
                iid = objects[i, :].copy()
                iim = images[i, :, :, :].copy()

                # grid bboxes
                grid_indices = np.vstack((centers[i,:], ratios[i,:])).transpose()
                gbb = ds_utils.index_to_xywh(grid_indices, grid_shape[1], grid_shape[0])

                # regressed bboxes
                rbb = ds_utils.bbox_transform_inv(gbb, deltas[i,:,:])

                # Denormalize
                ibb = ds_utils.denormalize_xywh(ibb, resolution[1], resolution[0])
                gbb = ds_utils.denormalize_xywh(gbb, resolution[1], resolution[0])
                rbb = ds_utils.denormalize_xywh(rbb, resolution[1], resolution[0])

                ibb = ds_utils.xywh_to_xyxy(ibb, resolution[1], resolution[0])
                gbb = ds_utils.xywh_to_xyxy(gbb, resolution[1], resolution[0])
                rbb = ds_utils.xywh_to_xyxy(rbb, resolution[1], resolution[0])

                # fontScale = 0.0007 * math.sqrt(float(\
                #     resolution[0]*resolution[0]+resolution[1]*resolution[1]))

                for j in xrange(ibb.shape[0]):
                    if msk[j] == 0:
                        break

                    id = iid[j]
                    cls = self.classes[id]

                    # ground truth boxes
                    bb = ibb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 255, 0), 2)
                    # grid boxes
                    bb = gbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (255, 0, 0), 1)
                    # regressed boxes
                    bb = rbb[j, :].astype(np.int16)
                    cv2.rectangle(iim, (bb[0], bb[1]), (bb[2], bb[3]), \
                                (0, 0, 255), 1)
                    # cv2.putText(iim, '{:}_{:}'.format(j, cls), \
                    #             (bb[0], bb[1] - 2), \
                    #             cv2.FONT_HERSHEY_SIMPLEX, \
                    #             fontScale, (0, 0, 255), 1)

                output_path = osp.join(output_dir, '%06d_'%i+im_name+'.jpg')
                cv2.imwrite(output_path, iim)

        return images, objects, bboxes, deltas, centers, ratios, masks