def __init__(self, name):
     assert dataset_catalog.contains(name), \
         'Unknown dataset name: {}'.format(name)
     assert os.path.exists(dataset_catalog.get_im_dir(name)), \
         'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name))
     assert os.path.exists(dataset_catalog.get_ann_fn(name)), \
         'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name))
     logger.debug('Creating: {}'.format(name))
     self.name = name
     self.image_directory = dataset_catalog.get_im_dir(name)
     self.image_prefix = dataset_catalog.get_im_prefix(name)
     self.COCO = COCO(dataset_catalog.get_ann_fn(name))
     self.debug_timer = Timer()
     # Set up dataset classes
     category_ids = self.COCO.getCatIds()
     categories = [c['name'] for c in self.COCO.loadCats(category_ids)]
     self.category_to_id_map = dict(zip(categories, category_ids))
     self.classes = ['__background__'] + categories
     self.num_classes = len(self.classes)
     self.json_category_id_to_contiguous_id = {
         v: i + 1
         for i, v in enumerate(self.COCO.getCatIds())
     }
     self.contiguous_category_id_to_json_id = {
         v: k
         for k, v in self.json_category_id_to_contiguous_id.items()
     }
     self._init_keypoints()
Пример #2
0
 def __init__(self, model):
     # Window size for smoothing tracked values (with median filtering)
     self.WIN_SZ = 20
     # Output logging period in SGD iterations
     self.LOG_PERIOD = 20
     self.smoothed_losses_and_metrics = {
         key: SmoothedValue(self.WIN_SZ)
         for key in model.losses + model.metrics
     }
     self.losses_and_metrics = {
         key: 0
         for key in model.losses + model.metrics
     }
     self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
     self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
     self.iter_total_loss = np.nan
     self.iter_timer = Timer()
     self.model = model
def test_net_on_dataset(weights_file,
                        dataset_name,
                        proposal_file,
                        output_dir,
                        multi_gpu=False,
                        gpu_id=0):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset(
            weights_file, dataset_name, proposal_file, num_images, output_dir)
    else:
        all_boxes, all_segms, all_keyps = test_net(weights_file,
                                                   dataset_name,
                                                   proposal_file,
                                                   output_dir,
                                                   gpu_id=gpu_id)
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(
        test_timer.average_time))
    results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms,
                                           all_keyps, output_dir)
    return results
def generate_rpn_on_dataset(weights_file,
                            dataset_name,
                            _proposal_file_ignored,
                            output_dir,
                            multi_gpu=False,
                            gpu_id=0):
    """Run inference on a dataset."""
    dataset = JsonDataset(dataset_name)
    test_timer = Timer()
    test_timer.tic()
    if multi_gpu:
        num_images = len(dataset.get_roidb())
        _boxes, _scores, _ids, rpn_file = multi_gpu_generate_rpn_on_dataset(
            weights_file, dataset_name, _proposal_file_ignored, num_images,
            output_dir)
    else:
        # Processes entire dataset range by default
        _boxes, _scores, _ids, rpn_file = generate_rpn_on_range(
            weights_file,
            dataset_name,
            _proposal_file_ignored,
            output_dir,
            gpu_id=gpu_id)
    test_timer.toc()
    logger.info('Total inference time: {:.3f}s'.format(
        test_timer.average_time))
    return evaluate_proposal_file(dataset, rpn_file, output_dir)
def generate_proposals_on_roidb(
    model,
    roidb,
    start_ind=None,
    end_ind=None,
    total_num_images=None,
    gpu_id=0,
):
    """Generate RPN proposals on all images in an imdb."""
    _t = Timer()
    num_images = len(roidb)
    roidb_boxes = [[] for _ in range(num_images)]
    roidb_scores = [[] for _ in range(num_images)]
    roidb_ids = [[] for _ in range(num_images)]
    if start_ind is None:
        start_ind = 0
        end_ind = num_images
        total_num_images = num_images
    for i in range(num_images):
        roidb_ids[i] = roidb[i]['id']
        im = cv2.imread(roidb[i]['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            _t.tic()
            roidb_boxes[i], roidb_scores[i] = im_proposals(model, im)
            _t.toc()
        if i % 10 == 0:
            ave_time = _t.average_time
            eta_seconds = ave_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            logger.info(
                ('rpn_generate: range [{:d}, {:d}] of {:d}: '
                 '{:d}/{:d} {:.3f}s (eta: {})').format(start_ind + 1, end_ind,
                                                       total_num_images,
                                                       start_ind + i + 1,
                                                       start_ind + num_images,
                                                       ave_time, eta))

    return roidb_boxes, roidb_scores, roidb_ids
Пример #6
0
class TrainingStats(object):
    """Track vital training statistics."""
    def __init__(self, model):
        # Window size for smoothing tracked values (with median filtering)
        self.WIN_SZ = 20
        # Output logging period in SGD iterations
        self.LOG_PERIOD = 20
        self.smoothed_losses_and_metrics = {
            key: SmoothedValue(self.WIN_SZ)
            for key in model.losses + model.metrics
        }
        self.losses_and_metrics = {
            key: 0
            for key in model.losses + model.metrics
        }
        self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
        self.smoothed_mb_qsize = SmoothedValue(self.WIN_SZ)
        self.iter_total_loss = np.nan
        self.iter_timer = Timer()
        self.model = model

    def IterTic(self):
        self.iter_timer.tic()

    def IterToc(self):
        return self.iter_timer.toc(average=False)

    def ResetIterTimer(self):
        self.iter_timer.reset()

    def UpdateIterStats(self):
        """Update tracked iteration statistics."""
        for k in self.losses_and_metrics.keys():
            if k in self.model.losses:
                self.losses_and_metrics[k] = nu.sum_multi_gpu_blob(k)
            else:
                self.losses_and_metrics[k] = nu.average_multi_gpu_blob(k)
        for k, v in self.smoothed_losses_and_metrics.items():
            v.AddValue(self.losses_and_metrics[k])
        self.iter_total_loss = np.sum(
            np.array([self.losses_and_metrics[k] for k in self.model.losses]))
        self.smoothed_total_loss.AddValue(self.iter_total_loss)
        self.smoothed_mb_qsize.AddValue(
            self.model.roi_data_loader._minibatch_queue.qsize())

    def LogIterStats(self, cur_iter, lr):
        """Log the tracked statistics."""
        if (cur_iter % self.LOG_PERIOD == 0
                or cur_iter == cfg.SOLVER.MAX_ITER - 1):
            stats = self.GetStats(cur_iter, lr)
            log_json_stats(stats)

    def GetStats(self, cur_iter, lr):
        eta_seconds = self.iter_timer.average_time * (cfg.SOLVER.MAX_ITER -
                                                      cur_iter)
        eta = str(datetime.timedelta(seconds=int(eta_seconds)))
        mem_stats = c2_py_utils.GetGPUMemoryUsageStats()
        mem_usage = np.max(mem_stats['max_by_gpu'][:cfg.NUM_GPUS])
        stats = dict(iter=cur_iter,
                     lr=float(lr),
                     time=self.iter_timer.average_time,
                     loss=self.smoothed_total_loss.GetMedianValue(),
                     eta=eta,
                     mb_qsize=int(
                         np.round(self.smoothed_mb_qsize.GetMedianValue())),
                     mem=int(np.ceil(mem_usage / 1024 / 1024)))
        for k, v in self.smoothed_losses_and_metrics.items():
            stats[k] = v.GetMedianValue()
        return stats
class JsonDataset(object):
    """A class representing a COCO json dataset."""
    def __init__(self, name):
        assert dataset_catalog.contains(name), \
            'Unknown dataset name: {}'.format(name)
        assert os.path.exists(dataset_catalog.get_im_dir(name)), \
            'Im dir \'{}\' not found'.format(dataset_catalog.get_im_dir(name))
        assert os.path.exists(dataset_catalog.get_ann_fn(name)), \
            'Ann fn \'{}\' not found'.format(dataset_catalog.get_ann_fn(name))
        logger.debug('Creating: {}'.format(name))
        self.name = name
        self.image_directory = dataset_catalog.get_im_dir(name)
        self.image_prefix = dataset_catalog.get_im_prefix(name)
        self.COCO = COCO(dataset_catalog.get_ann_fn(name))
        self.debug_timer = Timer()
        # Set up dataset classes
        category_ids = self.COCO.getCatIds()
        categories = [c['name'] for c in self.COCO.loadCats(category_ids)]
        self.category_to_id_map = dict(zip(categories, category_ids))
        self.classes = ['__background__'] + categories
        self.num_classes = len(self.classes)
        self.json_category_id_to_contiguous_id = {
            v: i + 1
            for i, v in enumerate(self.COCO.getCatIds())
        }
        self.contiguous_category_id_to_json_id = {
            v: k
            for k, v in self.json_category_id_to_contiguous_id.items()
        }
        self._init_keypoints()

    def get_roidb(self,
                  gt=False,
                  proposal_file=None,
                  min_proposal_size=2,
                  proposal_limit=-1,
                  crowd_filter_thresh=0):
        """Return an roidb corresponding to the json dataset. Optionally:
           - include ground truth boxes in the roidb
           - add proposals specified in a proposals file
           - filter proposals based on a minimum side length
           - filter proposals that intersect with crowd regions
        """
        assert gt is True or crowd_filter_thresh == 0, \
            'Crowd filter threshold must be 0 if ground-truth annotations ' \
            'are not included.'
        image_ids = self.COCO.getImgIds()
        image_ids.sort()
        roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))
        for entry in roidb:
            self._prep_roidb_entry(entry)
        if gt:
            # Include ground-truth object annotations
            self.debug_timer.tic()
            for entry in roidb:
                self._add_gt_annotations(entry)
            logger.debug('_add_gt_annotations took {:.3f}s'.format(
                self.debug_timer.toc(average=False)))
        if proposal_file is not None:
            # Include proposals from a file
            self.debug_timer.tic()
            self._add_proposals_from_file(roidb, proposal_file,
                                          min_proposal_size, proposal_limit,
                                          crowd_filter_thresh)
            logger.debug('_add_proposals_from_file took {:.3f}s'.format(
                self.debug_timer.toc(average=False)))
        _add_class_assignments(roidb)
        return roidb

    def _prep_roidb_entry(self, entry):
        """Adds empty metadata fields to an roidb entry."""
        # Reference back to the parent dataset
        entry['dataset'] = self
        # Make file_name an abs path
        im_path = os.path.join(self.image_directory,
                               self.image_prefix + entry['file_name'])
        assert os.path.exists(im_path), 'Image \'{}\' not found'.format(
            im_path)
        entry['image'] = im_path
        entry['flipped'] = False
        entry['has_visible_keypoints'] = False
        # Empty placeholders
        entry['boxes'] = np.empty((0, 4), dtype=np.float32)
        entry['segms'] = []
        entry['gt_classes'] = np.empty((0), dtype=np.int32)
        entry['seg_areas'] = np.empty((0), dtype=np.float32)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(
            np.empty((0, self.num_classes), dtype=np.float32))
        entry['is_crowd'] = np.empty((0), dtype=np.bool)
        # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index
        # in the list of rois that satisfy np.where(entry['gt_classes'] > 0)
        entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints),
                                             dtype=np.int32)
        # Remove unwanted fields that come from the json file (if they exist)
        for k in ['date_captured', 'url', 'license', 'file_name']:
            if k in entry:
                del entry[k]

    def _add_gt_annotations(self, entry):
        """Add ground truth annotation metadata to an roidb entry."""
        ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None)
        objs = self.COCO.loadAnns(ann_ids)
        # Sanitize bboxes -- some are invalid
        valid_objs = []
        valid_segms = []
        width = entry['width']
        height = entry['height']
        for obj in objs:
            # crowd regions are RLE encoded and stored as dicts
            if isinstance(obj['segmentation'], list):
                # Valid polygons have >= 3 points, so require >= 6 coordinates
                obj['segmentation'] = [
                    p for p in obj['segmentation'] if len(p) >= 6
                ]
            if obj['area'] < cfg.TRAIN.GT_MIN_AREA:
                continue
            if 'ignore' in obj and obj['ignore'] == 1:
                continue
            # Convert form (x1, y1, w, h) to (x1, y1, x2, y2)
            x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox'])
            x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(
                x1, y1, x2, y2, height, width)
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 0 and x2 > x1 and y2 > y1:
                obj['clean_bbox'] = [x1, y1, x2, y2]
                valid_objs.append(obj)
                valid_segms.append(obj['segmentation'])
        num_valid_objs = len(valid_objs)

        boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype)
        gt_classes = np.zeros((num_valid_objs),
                              dtype=entry['gt_classes'].dtype)
        gt_overlaps = np.zeros((num_valid_objs, self.num_classes),
                               dtype=entry['gt_overlaps'].dtype)
        seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype)
        is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype)
        box_to_gt_ind_map = np.zeros((num_valid_objs),
                                     dtype=entry['box_to_gt_ind_map'].dtype)
        if self.keypoints is not None:
            gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints),
                                    dtype=entry['gt_keypoints'].dtype)

        im_has_visible_keypoints = False
        for ix, obj in enumerate(valid_objs):
            cls = self.json_category_id_to_contiguous_id[obj['category_id']]
            boxes[ix, :] = obj['clean_bbox']
            gt_classes[ix] = cls
            seg_areas[ix] = obj['area']
            is_crowd[ix] = obj['iscrowd']
            box_to_gt_ind_map[ix] = ix
            if self.keypoints is not None:
                gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj)
                if np.sum(gt_keypoints[ix, 2, :]) > 0:
                    im_has_visible_keypoints = True
            if obj['iscrowd']:
                # Set overlap to -1 for all classes for crowd objects
                # so they will be excluded during training
                gt_overlaps[ix, :] = -1.0
            else:
                gt_overlaps[ix, cls] = 1.0
        entry['boxes'] = np.append(entry['boxes'], boxes, axis=0)
        entry['segms'].extend(valid_segms)
        # To match the original implementation:
        # entry['boxes'] = np.append(
        #     entry['boxes'], boxes.astype(np.int).astype(np.float), axis=0)
        entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes)
        entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas)
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd)
        entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'],
                                               box_to_gt_ind_map)
        if self.keypoints is not None:
            entry['gt_keypoints'] = np.append(entry['gt_keypoints'],
                                              gt_keypoints,
                                              axis=0)
            entry['has_visible_keypoints'] = im_has_visible_keypoints

    def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size,
                                 top_k, crowd_thresh):
        """Add proposals from a proposals file to an roidb."""
        logger.info('Loading proposals from: {}'.format(proposal_file))
        with open(proposal_file, 'r') as f:
            proposals = pickle.load(f)
        id_field = 'indexes' if 'indexes' in proposals else 'ids'  # compat fix
        _sort_proposals(proposals, id_field)
        box_list = []
        for i, entry in enumerate(roidb):
            if i % 2500 == 0:
                logger.info(' {:d}/{:d}'.format(i + 1, len(roidb)))
            boxes = proposals['boxes'][i]
            # Sanity check that these boxes are for the correct image id
            assert entry['id'] == proposals[id_field][i]
            # Remove duplicate boxes and very small boxes and then take top k
            boxes = box_utils.clip_boxes_to_image(boxes, entry['height'],
                                                  entry['width'])
            keep = box_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = box_utils.filter_small_boxes(boxes, min_proposal_size)
            boxes = boxes[keep, :]
            if top_k > 0:
                boxes = boxes[:top_k, :]
            box_list.append(boxes)
        _merge_proposal_boxes_into_roidb(roidb, box_list)
        if crowd_thresh > 0:
            _filter_crowd_proposals(roidb, crowd_thresh)

    def _init_keypoints(self):
        """Initialize COCO keypoint information."""
        self.keypoints = None
        self.keypoint_flip_map = None
        self.keypoints_to_id_map = None
        self.num_keypoints = 0
        # Thus far only the 'person' category has keypoints
        if 'person' in self.category_to_id_map:
            cat_info = self.COCO.loadCats([self.category_to_id_map['person']])
        else:
            return

        # Check if the annotations contain keypoint data or not
        if 'keypoints' in cat_info[0]:
            keypoints = cat_info[0]['keypoints']
            self.keypoints_to_id_map = dict(
                zip(keypoints, range(len(keypoints))))
            self.keypoints = keypoints
            self.num_keypoints = len(keypoints)
            self.keypoint_flip_map = {
                'left_eye': 'right_eye',
                'left_ear': 'right_ear',
                'left_shoulder': 'right_shoulder',
                'left_elbow': 'right_elbow',
                'left_wrist': 'right_wrist',
                'left_hip': 'right_hip',
                'left_knee': 'right_knee',
                'left_ankle': 'right_ankle'
            }

    def _get_gt_keypoints(self, obj):
        """Return ground truth keypoints."""
        if 'keypoints' not in obj:
            return None
        kp = np.array(obj['keypoints'])
        x = kp[0::3]  # 0-indexed x coordinates
        y = kp[1::3]  # 0-indexed y coordinates
        # 0: not labeled; 1: labeled, not inside mask;
        # 2: labeled and inside mask
        v = kp[2::3]
        num_keypoints = len(obj['keypoints']) / 3
        assert num_keypoints == self.num_keypoints
        gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32)
        for i in range(self.num_keypoints):
            gt_kps[0, i] = x[i]
            gt_kps[1, i] = y[i]
            gt_kps[2, i] = v[i]
        return gt_kps