def __call__(self, pic): """ Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ return F.to_tensor(pic)
def transform_test(imgs, short=416, max_size=1024, stride=1, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): """A util function to transform all images to tensors as network input by applying normalizations. This function support 1 NDArray or iterable of NDArrays. Parameters ---------- imgs : PIL.Image or iterable of PIL.Image Image(s) to be transformed. short : int, default=416 Resize image short side to this `short` and keep aspect ratio. Note that yolo network max_size : int, optional Maximum longer side length to fit image. This is to limit the input image shape. Aspect ratio is intact because we support arbitrary input size in our YOLO implementation. stride : int, optional, default is 1 The stride constraint due to precise alignment of bounding box prediction module. Image's width and height must be multiples of `stride`. Use `stride = 1` to relax this constraint. mean : iterable of float Mean pixel values. std : iterable of float Standard deviations of pixel values. Returns ------- (Tensor, numpy.array) or list of such tuple A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as original un-normalized color image for display. If multiple image names are supplied, return two lists. You can use `zip()`` to collapse it. """ if isinstance(imgs, np.ndarray): imgs = [imgs] for im in imgs: assert isinstance(im, np.ndarray), "Expect NDArray, got {}".format( type(im)) tensors = [] origs = [] for img in imgs: img = timage.resize_short_within(img, short, max_size, mult_base=stride) orig_img = img.astype('uint8') img = vf.to_tensor(img) img = vf.normalize(img, mean=mean, std=std) tensors.append(img.unsqueeze(0)) origs.append(orig_img) if len(tensors) == 1: return tensors[0], origs[0] return tensors, origs
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize h, w, _ = src.shape img = timage.imresize(src, self._width, self._height, interp=9) bbox = tbbox.resize(label, in_size=(w, h), out_size=(self._width, self._height)) img = vf.to_tensor(img) img = vf.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype(type_map[img.dtype])
def __call__(self, src, label): """Apply transform to training image/label.""" # random color jittering img = eximage.random_color_distort(src) # random expansion with prob 0.5 if np.random.uniform(0, 1) > 0.5: img, expand = timage.random_expand( img, fill=[m * 255 for m in self._mean]) bbox = tbbox.translate(label, x_offset=expand[0], y_offset=expand[1]) else: img, bbox = img, label # random cropping h, w, _ = img.shape bbox, crop = exbbox.random_crop_with_constraints(bbox, (w, h)) x0, y0, w, h = crop img = timage.fixed_crop(img, x0, y0, w, h) # resize with random interpolation h, w, _ = img.shape interp = np.random.randint(0, 5) img = timage.imresize(img, self._width, self._height, interp=interp) bbox = tbbox.resize(bbox, (w, h), (self._width, self._height)) # random horizontal flip h, w, _ = img.shape img, flips = timage.random_flip(img, px=0.5) bbox = tbbox.flip(bbox, (w, h), flip_x=flips[0]) # to tensor img = vf.to_tensor(img / 255) img = vf.normalize(img, mean=self._mean, std=self._std) if self._target_generator is None: return img, bbox.astype(img.dtype) # generate training target so cpu workers can help reduce the workload on gpu gt_bboxes = torch.from_numpy(bbox[np.newaxis, :, :4]) gt_ids = torch.from_numpy(bbox[np.newaxis, :, 4:5]) if self._mixup: gt_mixratio = torch.from_numpy(bbox[np.newaxis, :, -1:]) else: gt_mixratio = None objectness, center_targets, scale_targets, weights, class_targets = self._target_generator( self._height, self._width, self._feat_maps, self._anchors, self._offsets, gt_bboxes, gt_ids, gt_mixratio) return (img, objectness[0], center_targets[0], scale_targets[0], weights[0], class_targets[0], gt_bboxes[0])
def transform_test(imgs, short=600, max_size=1000, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): """A util function to transform all images to tensors as network input by applying normalizations. This function support 1 NDArray or iterable of NDArrays. Parameters ---------- imgs : NDArray or iterable of NDArray Image(s) to be transformed. short : int, optional, default is 600 Resize image short side to this `short` and keep aspect ratio. max_size : int, optional, default is 1000 Maximum longer side length to fit image. This is to limit the input image shape, avoid processing too large image. mean : iterable of float Mean pixel values. std : iterable of float Standard deviations of pixel values. Returns ------- (mxnet.NDArray, numpy.ndarray) or list of such tuple A (1, 3, H, W) mxnet NDArray as input to network, and a numpy ndarray as original un-normalized color image for display. If multiple image names are supplied, return two lists. You can use `zip()`` to collapse it. """ if isinstance(imgs, np.ndarray): imgs = [imgs] for im in imgs: assert isinstance(im, np.ndarray), "Expect NDArray, got {}".format( type(im)) tensors = [] origs = [] for img in imgs: img = timage.resize_short_within(img, short, max_size) orig_img = img.astype('uint8') img = vf.to_tensor(img) img = vf.normalize(img, mean=mean, std=std) tensors.append(img.unsqueeze(0)) origs.append(orig_img) if len(tensors) == 1: return tensors[0], origs[0] return tensors, origs
def __call__(self, src, label, mask): """Apply transform to validation image/label.""" # resize shorter side but keep in max_size h, _, _ = src.shape img = timage.resize_short_within(src, self._short, self._max_size, interp=1) # no scaling ground-truth, return image scaling ratio instead im_scale = float(img.shape[0]) / h img = vf.to_tensor(img) img = vf.normalize(img, mean=self._mean, std=self._std) return img, torch.tensor([img.shape[-2], img.shape[-1], im_scale], dtype=torch.float32)
def __call__(self, src, label): """Apply transform to validation image/label.""" # resize shorter side but keep in max_size h, w, _ = src.shape img = timage.resize_short_within(src, self._short, self._max_size, interp=1) # no scaling ground-truth, return image scaling ratio instead bbox = tbbox.resize(label, (w, h), (img.shape[1], img.shape[0])) im_scale = h / float(img.shape[0]) img = vf.to_tensor(img) img = vf.normalize(img, mean=self._mean, std=self._std) return img, bbox.astype('float32'), torch.tensor([im_scale], dtype=torch.float32)