Exemplo n.º 1
class TestMultiboxCoder(unittest.TestCase):
    def setUp(self):
        self.coder = MultiboxCoder(self.grids, self.aspect_ratios, self.steps,
                                   self.sizes, (0.1, 0.2))
        self.n_bbox = sum(grid * grid * (len(ar) + 1) * 2
                          for grid, ar in zip(self.grids, self.aspect_ratios))
        self.bbox = _random_array((5, 4))
        self.label = np.random.randint(0, self.n_fg_class, size=5)
        self.mb_loc = _random_array((self.n_bbox, 4))
        self.mb_conf = _random_array((self.n_bbox, self.n_fg_class + 1))

    def test_to_cpu(self):
        self.assertEqual(self.coder.xp, np)

    def test_to_gpu(self):
        self.assertEqual(self.coder.xp, cuda.cupy)

    def test_dafault_bbox(self):
        self.assertEqual(self.coder._default_bbox.shape, (self.n_bbox, 4))

    def _check_encode(self, bbox, label):
        xp = self.coder.xp

        mb_loc, mb_label = self.coder.encode(bbox, label)

        self.assertIsInstance(mb_loc, xp.ndarray)
        self.assertEqual(mb_loc.shape, (self.n_bbox, 4))

        self.assertIsInstance(mb_label, xp.ndarray)
        self.assertEqual(mb_label.shape, (self.n_bbox, ))

    def test_encode_cpu(self):
        self._check_encode(self.bbox, self.label)

    def test_encode_gpu(self):
        self._check_encode(cuda.to_gpu(self.bbox), cuda.to_gpu(self.label))

    def _check_decode(self, mb_loc, mb_conf):
        xp = self.coder.xp

        bbox, label, score = self.coder.decode(mb_loc, mb_conf,

        self.assertIsInstance(bbox, xp.ndarray)
        self.assertEqual(bbox.ndim, 2)
        self.assertLessEqual(bbox.shape[0], self.n_bbox * self.n_fg_class)
        self.assertEqual(bbox.shape[1], 4)

        self.assertIsInstance(label, xp.ndarray)
        self.assertEqual(label.ndim, 1)
        self.assertEqual(label.shape[0], bbox.shape[0])

        self.assertIsInstance(score, xp.ndarray)
        self.assertEqual(score.ndim, 1)
        self.assertEqual(score.shape[0], bbox.shape[0])

    def test_decode_cpu(self):
        self._check_decode(self.mb_loc, self.mb_conf)

    def test_decode_gpu(self):
        self._check_decode(cuda.to_gpu(self.mb_loc), cuda.to_gpu(self.mb_conf))
Exemplo n.º 2
class SSD(chainer.Chain):
    """Base class of Single Shot Multibox Detector.

    This is a base class of Single Shot Multibox Detector [#]_.

    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

        extractor: A link which extracts feature maps.
            This link must have :obj:`insize`, :obj:`grids` and

            * :obj:`insize`: An integer which indicates \
            the size of input images. Images are resized to this size before \
            feature extraction.
            * :obj:`grids`: An iterable of integer. Each integer indicates \
            the size of feature map. This value is used by \
            * :meth:`__call_`: A method which computes feature maps. \
            It must take a batched images and return batched feature maps.
        multibox: A link which computes :obj:`mb_locs` and :obj:`mb_confs`
            from feature maps.
            This link must have :obj:`n_class`, :obj:`aspect_ratios` and

            * :obj:`n_class`: An integer which indicates the number of \
            classes. \
            This value should include the background class.
            * :obj:`aspect_ratios`: An iterable of tuple of integer. \
            Each tuple indicates the aspect ratios of default bounding boxes \
            at each feature maps. This value is used by \
            * :meth:`__call__`: A method which computes \
            :obj:`mb_locs` and :obj:`mb_confs`. \
            It must take a batched feature maps and \
            return :obj:`mb_locs` and :obj:`mb_confs`.
        steps (iterable of float): The step size for each feature map.
            This value is used by
        sizes (iterable of float): The base size of default bounding boxes
            for each feature map. This value is used by
        variance (tuple of floats): Two coefficients for decoding
            the locations of bounding boxes.
            This value is used by
            The default value is :obj:`(0.1, 0.2)`.

        nms_thresh (float): The threshold value
            for :func:`~chainercv.utils.non_maximum_suppression`.
            The default value is :obj:`0.45`.
            This value can be changed directly or by using :meth:`use_preset`.
        score_thresh (float): The threshold value for confidence score.
            If a bounding box whose confidence score is lower than this value,
            the bounding box will be suppressed.
            The default value is :obj:`0.6`.
            This value can be changed directly or by using :meth:`use_preset`.


    def __init__(
            self, extractor, multibox,
            steps, sizes, variance=(0.1, 0.2),
        self.mean = mean

        super(SSD, self).__init__()
        with self.init_scope():
            self.extractor = extractor
            self.multibox = multibox

        self.coder = MultiboxCoder(
            extractor.grids, multibox.aspect_ratios, steps, sizes, variance)

    def insize(self):
        return self.extractor.insize

    def n_fg_class(self):
        return self.multibox.n_class - 1

    def to_cpu(self):
        super(SSD, self).to_cpu()

    def to_gpu(self, device=None):
        super(SSD, self).to_gpu(device)

    def __call__(self, x):
        """Compute localization and classification from a batch of images.

        This method computes two variables, :obj:`mb_locs` and :obj:`mb_confs`.
        :func:`self.coder.decode` converts these variables to bounding box
        coordinates and confidence scores.
        These variables are also used in training SSD.

            x (chainer.Variable): A variable holding a batch of images.
                The images are preprocessed by :meth:`_prepare`.

            tuple of chainer.Variable:
            This method returns two variables, :obj:`mb_locs` and

            * **mb_locs**: A variable of float arrays of shape \
                :math:`(B, K, 4)`, \
                where :math:`B` is the number of samples in the batch and \
                :math:`K` is the number of default bounding boxes.
            * **mb_confs**: A variable of float arrays of shape \
                :math:`(B, K, n\_fg\_class + 1)`.

        return self.multibox(self.extractor(x))

    def _prepare(self, img):
        img = img.astype(np.float32)
        img = transforms.resize(img, (self.insize, self.insize))
        img -= self.mean
        return img

    def use_preset(self, preset):
        """Use the given preset during prediction.

        This method changes values of :obj:`nms_thresh` and
        :obj:`score_thresh`. These values are a threshold value
        used for non maximum suppression and a threshold value
        to discard low confidence proposals in :meth:`predict`,

        If the attributes need to be changed to something
        other than the values provided in the presets, please modify
        them by directly accessing the public attributes.

            preset ({'visualize', 'evaluate'}): A string to determine the
                preset to use.

        if preset == 'visualize':
            self.nms_thresh = 0.45
            self.score_thresh = 0.6
        elif preset == 'evaluate':
            self.nms_thresh = 0.45
            self.score_thresh = 0.01
            raise ValueError('preset must be visualize or evaluate')

    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bounding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.


        x = []
        sizes = []
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            sizes.append((H, W))

        with chainer.using_config('train', False), \
            x = chainer.Variable(self.xp.stack(x))
            mb_locs, mb_confs = self(x)
        mb_locs, mb_confs = mb_locs.array, mb_confs.array

        bboxes = []
        labels = []
        scores = []
        for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes):
            bbox, label, score = self.coder.decode(
                mb_loc, mb_conf, self.nms_thresh, self.score_thresh)
            bbox = transforms.resize_bbox(
                bbox, (self.insize, self.insize), size)

        return bboxes, labels, scores
