Пример #1
0
    def __init__(self, labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
        '''
        Arguments:
            labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
                of an image contains which bounding box coordinate. The dictionary maps at least the keywords
                'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
        '''

        self.labels_format = labels_format

        # This randomly samples one of the lower IoU bounds defined
        # by the `sample_space` every time it is called.
        self.bound_generator = BoundGenerator(sample_space=((None, None),
                                                            (0.1, None),
                                                            (0.2, None),
                                                            (0.3, None)),
                                              weights=None)

        # Produces coordinates for candidate patches such that the height
        # and width of the patches are between 0.3 and 1.0 of the height
        # and width of the respective image and the aspect ratio of the
        # patches is between 0.5 and 2.0.
        self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w',
                                                              min_scale=0.5,
                                                              max_scale=1.0,
                                                              scale_uniformly=False,
                                                              min_aspect_ratio = 0.5,
                                                              max_aspect_ratio = 2.0)

        # Filters out boxes whose center point does not lie within the
        # chosen patches.
        self.box_filter = BoxFilter(check_overlap=True,
                                    check_min_area=False,
                                    check_degenerate=False,
                                    overlap_criterion='center_point',
                                    labels_format=self.labels_format)

        # Determines whether a given patch is considered a valid patch.
        # Defines a patch to be valid if at least one ground truth bounding box
        # (n_boxes_min == 1) has an IoU overlap with the patch that
        # meets the requirements defined by `bound_generator`.
        self.image_validator = ImageValidator(overlap_criterion='iou',
                                              n_boxes_min=1,
                                              labels_format=self.labels_format,
                                              border_pixels='half')

        # Performs crops according to the parameters set in the objects above.
        # Runs until either a valid patch is found or the original input image
        # is returned unaltered. Runs a maximum of 50 trials to find a valid
        # patch for each new sampled IoU threshold. Every 50 trials, the original
        # image is returned as is with probability (1 - prob) = 0.143.
        self.random_crop = RandomPatchInf(patch_coord_generator=self.patch_coord_generator,
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          bound_generator=self.bound_generator,
                                          n_trials_max=20,
                                          clip_boxes=True,
                                          prob=0.25,
                                          labels_format=self.labels_format)
Пример #2
0
    def __init__(self,
                 img_height=300,
                 img_width=300,
                 background=(123, 117, 104),
                 labels_format={
                     'class_id': 0,
                     'xmin': 1,
                     'ymin': 2,
                     'xmax': 3,
                     'ymax': 4
                 }):
        '''
        Arguments:
            height (int): The desired height of the output images in pixels.
            width (int): The desired width of the output images in pixels.
            background (list/tuple, optional): A 3-tuple specifying the RGB color value of the
                background pixels of the translated images.
            labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
                of an image contains which bounding box coordinate. The dictionary maps at least the keywords
                'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
        '''

        self.labels_format = labels_format

        self.photometric_distortions = SSDPhotometricDistortions()
        self.expand = SSDExpand(background=background,
                                labels_format=self.labels_format)
        self.random_crop = SSDRandomCrop(labels_format=self.labels_format)
        self.random_flip = RandomFlip(dim='horizontal',
                                      prob=0.5,
                                      labels_format=self.labels_format)

        # This box filter makes sure that the resized images don't contain any degenerate boxes.
        # Resizing the images could lead the boxes to becomes smaller. For boxes that are already
        # pretty small, that might result in boxes with height and/or width zero, which we obviously
        # cannot allow.
        self.box_filter = BoxFilter(check_overlap=False,
                                    check_min_area=False,
                                    check_degenerate=True,
                                    labels_format=self.labels_format)

        self.resize = ResizeRandomInterp(height=img_height,
                                         width=img_width,
                                         interpolation_modes=[
                                             cv2.INTER_NEAREST,
                                             cv2.INTER_LINEAR, cv2.INTER_CUBIC,
                                             cv2.INTER_AREA, cv2.INTER_LANCZOS4
                                         ],
                                         box_filter=self.box_filter,
                                         labels_format=self.labels_format)

        self.sequence = [
            self.photometric_distortions, self.expand, self.random_crop,
            self.random_flip, self.resize
        ]
Пример #3
0
    def __init__(self,
                 img_height=300,
                 img_width=300,
                 background=(123, 117, 104),
                 labels_format={
                     'class_id': 0,
                     'xmin': 1,
                     'ymin': 2,
                     'xmax': 3,
                     'ymax': 4
                 }):

        self.labels_format = labels_format

        self.photometric_distortions = SSDPhotometricDistortions()
        self.expand = SSDExpand(background=background,
                                labels_format=self.labels_format)
        self.random_crop = SSDRandomCrop(labels_format=self.labels_format)
        self.random_flip = RandomFlip(dim='horizontal',
                                      prob=0.5,
                                      labels_format=self.labels_format)

        # This box filter makes sure that the resized images don't contain any degenerate boxes.
        # Resizing the images could lead the boxes to becomes smaller. For boxes that are already
        # pretty small, that might result in boxes with height and/or width zero, which we obviously
        # cannot allow.
        self.box_filter = BoxFilter(check_overlap=False,
                                    check_min_area=False,
                                    check_degenerate=True,
                                    labels_format=self.labels_format)

        self.resize = ResizeRandomInterp(height=img_height,
                                         width=img_width,
                                         interpolation_modes=[
                                             cv2.INTER_NEAREST,
                                             cv2.INTER_LINEAR, cv2.INTER_CUBIC,
                                             cv2.INTER_AREA, cv2.INTER_LANCZOS4
                                         ],
                                         box_filter=self.box_filter,
                                         labels_format=self.labels_format)

        self.sequence = [
            self.photometric_distortions, self.expand, self.random_crop,
            self.random_flip, self.resize
        ]
Пример #4
0
    def generate(self,
                 batch_size=32,
                 shuffle=True,
                 transformations=[],
                 label_encoder=None,
                 returns={'processed_images', 'encoded_labels'},
                 keep_images_without_gt=False,
                 degenerate_box_handling='remove'):
        '''
        Generates batches of samples and (optionally) corresponding labels indefinitely.

        Can shuffle the samples consistently after each complete pass.

        Optionally takes a list of arbitrary image transformations to apply to the
        samples ad hoc.

        Arguments:
            batch_size (int, optional): The size of the batches to be generated.
            shuffle (bool, optional): Whether or not to shuffle the dataset before each pass.
                This option should always be `True` during training, but it can be useful to turn shuffling off
                for debugging or if you're using the generator for prediction.
            transformations (list, optional): A list of transformations that will be applied to the images and labels
                in the given order. Each transformation is a callable that takes as input an image (as a Numpy array)
                and optionally labels (also as a Numpy array) and returns an image and optionally labels in the same
                format.
            label_encoder (callable, optional): Only relevant if labels are given. A callable that takes as input the
                labels of a batch (as a list of Numpy arrays) and returns some structure that represents those labels.
                The general use case for this is to convert labels from their input format to a format that a given object
                detection model needs as its training targets.
            returns (set, optional): A set of strings that determines what outputs the generator yields. The generator's output
                is always a tuple with the processed images as its first element and, if labels and a label encoder are given,
                the encoded labels as its second element. Apart from that, the output tuple can contain additional outputs
                according to the keywords specified here. The possible keyword strings and their respective outputs are:
                * 'processed_images': An array containing the processed images. Will always be in the outputs, so it doesn't
                    matter whether or not you include this keyword in the set.
                * 'encoded_labels': The encoded labels tensor. Will always be in the outputs if a label encoder is given,
                    so it doesn't matter whether or not you include this keyword in the set if you pass a label encoder.
                * 'processed_labels': The processed, but not yet encoded labels. This is a list that contains for each
                    batch image a Numpy array with all ground truth boxes for that image. Only available if ground truth is available.
                * 'filenames': A list containing the file names (full paths) of the images in the batch.
                * 'image_ids': A list containing the integer IDs of the images in the batch. Only available if there
                    are image IDs available.
                * 'original_images': A list containing the original images in the batch before any processing.
                * 'original_labels': A list containing the original ground truth boxes for the images in this batch before any
                    processing. Only available if ground truth is available.
                The order of the outputs in the tuple is the order of the list above. If `returns` contains a keyword for an
                output that is unavailable, that output omitted in the yielded tuples and a warning will be raised.
            keep_images_without_gt (bool, optional): If `False`, images for which there aren't any ground truth boxes before
                any transformations have been applied will be removed from the batch. If `True`, such images will be kept
                in the batch.
            degenerate_box_handling (str, optional): How to handle degenerate boxes, which are boxes that have `w <= 0` and/or
                `h <= 0`. Degenerate boxes can sometimes be in the dataset, or non-degenerate boxes can become degenerate
                after they were processed by transformations. Note that the generator checks for degenerate boxes after all
                transformations have been applied (if any), but before the labels were passed to the `label_encoder` (if one was given).
                Can be one of 'warn' or 'remove'. If 'warn', the generator will merely print a warning to let you know that there
                are degenerate boxes in a batch. If 'remove', the generator will remove degenerate boxes from the batch silently.

        Yields:
            The next batch as a tuple of items as defined by the `returns` argument. By default, this will be
            a 2-tuple containing the processed batch images as its first element and the encoded ground truth boxes
            tensor as its second element if in training mode, or a 1-tuple containing only the processed batch images if
            not in training mode. Any additional outputs must be specified in the `returns` argument.
        '''


        if self.dataset_size == 0:
            raise DatasetError("Cannot generate batches because you did not load a dataset.")

        #############################################################################################
        # Warn if any of the set returns aren't possible.
        #############################################################################################

        if self.labels is None:
            if any([ret in returns for ret in ['original_labels', 'processed_labels', 'encoded_labels']]):
                warnings.warn("Since no labels were given, none of 'original_labels', 'processed_labels' and 'encoded_labels' " +
                              "are possible returns, but you set `returns = {}`. The impossible returns will be missing from the output".format(returns))
        elif label_encoder is None:
            if any([ret in returns for ret in ['encoded_labels']]):
                warnings.warn("Since no label encoder was given, 'encoded_labels' aren't possible returns, " +
                              "but you set `returns = {}`. The impossible returns will be missing from the output".format(returns))
        if (self.image_ids is None) and ('image_ids' in returns):
            warnings.warn("No image IDs were given, therefore 'image_ids' is not a possible return, " +
                          "but you set `returns = {}`. The impossible returns will be missing from the output".format(returns))

        #############################################################################################
        # Do a few preparatory things like maybe shuffling the dataset initially.
        #############################################################################################

        if shuffle:
            objects_to_shuffle = [self.dataset_indices]
            if not (self.filenames is None):
                objects_to_shuffle.append(self.filenames)
            if not (self.labels is None):
                objects_to_shuffle.append(self.labels)
            if not (self.image_ids is None):
                objects_to_shuffle.append(self.image_ids)
            if not (self.images is None):
                objects_to_shuffle.append(self.images)
            shuffled_objects = sklearn.utils.shuffle(*objects_to_shuffle)
            for i in range(len(objects_to_shuffle)):
                objects_to_shuffle[i][:] = shuffled_objects[i]


        if degenerate_box_handling == 'remove':
            box_filter = BoxFilter(check_overlap=False,
                                   check_min_area=False,
                                   check_degenerate=True)

        #############################################################################################
        # Generate mini batches.
        #############################################################################################

        current = 0

        while True:

            batch_X, batch_y = [], []

            if current >= self.dataset_size:
                current = 0

            #########################################################################################
            # Maybe shuffle the dataset if a full pass over the dataset has finished.
            #########################################################################################

                if shuffle:
                    objects_to_shuffle = [self.dataset_indices]
                    if not (self.filenames is None):
                        objects_to_shuffle.append(self.filenames)
                    if not (self.labels is None):
                        objects_to_shuffle.append(self.labels)
                    if not (self.image_ids is None):
                        objects_to_shuffle.append(self.image_ids)
                    if not (self.images is None):
                        objects_to_shuffle.append(self.images)
                    shuffled_objects = sklearn.utils.shuffle(*objects_to_shuffle)
                    for i in range(len(objects_to_shuffle)):
                        objects_to_shuffle[i][:] = shuffled_objects[i]


            #########################################################################################
            # Get the images, image filenames, (maybe) image IDs, and (maybe) labels for this batch.
            #########################################################################################

            # We prioritize our options in the following order:
            # 1) If we have the images already loaded in memory, get them from there.
            # 2) Else, we'll have to load the individual image files from disk.
            batch_indices = self.dataset_indices[current:current + batch_size]
            if not (self.images is None):
                for i in batch_indices:
                    batch_X.append(self.images[i])
                if not (self.filenames is None):
                    batch_filenames = self.filenames[current:current + batch_size]
                else:
                    batch_filenames = None
            else:
                batch_filenames = self.filenames[current:current + batch_size]
                for filename in batch_filenames:
                    with Image.open(filename) as image:
                        batch_X.append(np.array(image, dtype=np.uint8))

            # Get the labels for this batch (if there are any).
            if not (self.labels is None):
                batch_y = deepcopy(self.labels[current:current+batch_size])
            else:
                batch_y = None

            # Get the image IDs for this batch (if there are any).
            if not (self.image_ids is None):
                batch_image_ids = self.image_ids[current:current+batch_size]
            else:
                batch_image_ids = None

            if 'original_images' in returns:
                batch_original_images = deepcopy(batch_X) # The original, unaltered images
            if 'original_labels' in returns and not self.labels is None:
                batch_original_labels = deepcopy(batch_y) # The original, unaltered labels

            current += batch_size

            #########################################################################################
            # Maybe perform image transformations.
            #########################################################################################

            batch_items_to_remove = [] # In case we need to remove any images from the batch, store their indices in this list.
            for i in range(len(batch_X)):

                if not (self.labels is None):
                    # Convert the labels for this image to an array (in case they aren't already).
                    batch_y[i] = np.array(batch_y[i])
                    # If this image has no ground truth boxes, maybe we don't want to keep it in the batch.
                    if (batch_y[i].size == 0) and not keep_images_without_gt:
                        batch_items_to_remove.append(i)
                        continue

                # Apply any image transformations we may have received.
                if transformations:

                    for transform in transformations:

                        if not (self.labels is None):

                            batch_X[i], batch_y[i] = transform(batch_X[i], batch_y[i])

                            if batch_X[i] is None: # In case the transform failed to produce an output image, which is possible for some random transforms.
                                batch_items_to_remove.append(i)
                                continue

                        else:

                            batch_X[i] = transform(batch_X[i])

                if self.show_images:
                    visualize(batch_X[i], gt_labels=batch_y[i])
                #########################################################################################
                # Check for degenerate boxes in this batch item.
                #########################################################################################

                w = 3
                h = 4

                # Check for degenerate ground truth bounding boxes before attempting any computations.
                if np.any(batch_y[i][:,w] <= 0) or np.any(batch_y[i][:,h] <= 0):
                    if degenerate_box_handling == 'warn':
                        warnings.warn("Detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, ".format(i, batch_y[i]) +
                                      "i.e. bounding boxes where h <= 0 and/or w <= 0. " +
                                      "This could mean that your dataset contains degenerate ground truth boxes, or that any image transformations you may apply might " +
                                      "result in degenerate ground truth boxes, or that you are parsing the ground truth in the wrong coordinate format." +
                                      "Degenerate ground truth bounding boxes may lead to NaN errors during the training.")
                    elif degenerate_box_handling == 'remove':
                        batch_y[i] = box_filter(batch_y[i])
                        if (batch_y[i].size == 0) and not keep_images_without_gt:
                            batch_items_to_remove.append(i)

            #########################################################################################
            # Remove any items we might not want to keep from the batch.
            #########################################################################################

            if batch_items_to_remove:
                for j in sorted(batch_items_to_remove, reverse=True):
                    # This isn't efficient, but it hopefully shouldn't need to be done often anyway.
                    batch_X.pop(j)
                    batch_filenames.pop(j)
                    if not (self.labels is None): batch_y.pop(j)
                    if not (self.image_ids is None): batch_image_ids.pop(j)
                    if 'original_images' in returns: batch_original_images.pop(j)
                    if 'original_labels' in returns and not (self.labels is None): batch_original_labels.pop(j)

            #########################################################################################

            # CAUTION: Converting `batch_X` into an array will result in an empty batch if the images have varying sizes
            #          or varying numbers of channels. At this point, all images must have the same size and the same
            #          number of channels.

            batch_X = np.array(batch_X)
            if (batch_X.size == 0):
                raise DegenerateBatchError("You produced an empty batch. This might be because the images in the batch vary " +
                                           "in their size and/or number of channels. Note that after all transformations " +
                                           "(if any were given) have been applied to all images in the batch, all images " +
                                           "must be homogenous in size along all axes.")
            ########################################################################################
            # visualize the batch items if needed
            ########################################################################################

            if self.show_images:
                for i in range(len(batch_X)):
                    visualize(batch_X[i], gt_labels=batch_y[i])

            #########################################################################################
            # If we have a label encoder, encode our labels.
            #########################################################################################

            if not (label_encoder is None or self.labels is None):
                batch_y_encoded = label_encoder(batch_y, diagnostics=False)

            else:
                batch_y_encoded = None

            #########################################################################################
            # Compose the output.
            #########################################################################################

            ret = []
            if 'processed_images' in returns: ret.append(batch_X)
            if 'encoded_labels' in returns: ret.append(batch_y_encoded)
            if 'processed_labels' in returns: ret.append(batch_y)
            if 'filenames' in returns: ret.append(batch_filenames)
            if 'image_ids' in returns: ret.append(batch_image_ids)
            if 'original_images' in returns: ret.append(batch_original_images)
            if 'original_labels' in returns: ret.append(batch_original_labels)
            yield ret
Пример #5
0
    def __init__(self,
                 random_brightness=(-48, 48, 0.5),
                 random_contrast=(0.5, 1.8, 0.5),
                 random_saturation=(0.5, 1.8, 0.5),
                 random_hue=(18, 0.5),
                 random_flip=0.5,
                 random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
                 random_scale=(0.5, 2.0, 0.5),
                 n_trials_max=3,
                 clip_boxes=True,
                 overlap_criterion='area',
                 bounds_box_filter=(0.3, 1.0),
                 bounds_validator=(0.5, 1.0),
                 n_boxes_min=1,
                 background=(0, 0, 0),
                 labels_format={
                     'class_id': 0,
                     'xmin': 1,
                     'ymin': 2,
                     'xmax': 3,
                     'ymax': 4
                 }):

        if (random_scale[0] >= 1) or (random_scale[1] <= 1):
            raise ValueError(
                "This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1."
            )

        self.n_trials_max = n_trials_max
        self.clip_boxes = clip_boxes
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min
        self.background = background
        self.labels_format = labels_format

        # 图像变换之后保留哪些boxes
        self.box_filter = BoxFilter(check_overlap=True,
                                    check_min_area=True,
                                    check_degenerate=True,
                                    overlap_criterion=self.overlap_criterion,
                                    overlap_bounds=self.bounds_box_filter,
                                    min_area=16,
                                    labels_format=self.labels_format)

        # 训练图像是否有效
        self.image_validator = ImageValidator(
            overlap_criterion=self.overlap_criterion,
            bounds=self.bounds_validator,
            n_boxes_min=self.n_boxes_min,
            labels_format=self.labels_format)

        # Utility distortions
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.convert_to_3_channels = ConvertTo3Channels()  # 确保所有图像3通道

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0],
                                                  upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0],
                                              upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0],
                                                  upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0],
                                    prob=random_hue[1])

        # Geometric transformations
        self.random_flip = RandomFlip(dim='horizontal',
                                      prob=random_flip,
                                      labels_format=self.labels_format)
        self.random_translate = RandomTranslate(
            dy_minmax=random_translate[0],
            dx_minmax=random_translate[1],
            prob=random_translate[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)
        self.random_zoom_in = RandomScale(min_factor=1.0,
                                          max_factor=random_scale[1],
                                          prob=random_scale[2],
                                          clip_boxes=self.clip_boxes,
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          n_trials_max=self.n_trials_max,
                                          background=self.background,
                                          labels_format=self.labels_format)
        self.random_zoom_out = RandomScale(
            min_factor=random_scale[0],
            max_factor=1.0,
            prob=random_scale[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)

        # 放大
        self.sequence1 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_translate, self.random_zoom_in, self.random_flip
        ]

        # 缩小
        self.sequence2 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.convert_to_uint8,
            self.convert_RGB_to_HSV, self.convert_to_float32,
            self.random_saturation, self.random_hue, self.convert_to_uint8,
            self.convert_HSV_to_RGB, self.convert_to_float32,
            self.random_contrast, self.convert_to_uint8, self.random_zoom_out,
            self.random_translate, self.random_flip
        ]
Пример #6
0
    def get_encoded_boxlabel(self,
                             batch_size=32,
                             label_encoder=None,
                             keep_images_without_gt=False,
                             degenerate_box_handling='remove',
                             mask_groundth_dir=None):
        '''
        Generates batches of samples and (optionally) corresponding labels indefinitely.

        Arguments:
            batch_size (int, optional): The size of the batches to be generated.
            shuffle (bool, optional): Whether or not to shuffle the dataset before each pass.
                This option should always be `True` during training, but it can be useful to turn shuffling off
                for debugging or if you're using the generator for prediction.

            label_encoder (callable, optional): Only relevant if labels are given. A callable that takes as input the
                labels of a batch (as a list of Numpy arrays) and returns some structure that represents those labels.
                The general use case for this is to convert labels from their input format to a format that a given object
                detection model needs as its training targets.
        '''

        self.dataset_size = batch_size

        #############################################################################################
        # Do a few preparatory things like maybe shuffling the dataset initially.
        #############################################################################################
        if degenerate_box_handling == 'remove':
            box_filter = BoxFilter(check_overlap=False,
                                   check_min_area=False,
                                   check_degenerate=True,
                                   labels_format=self.labels_format)

        # Override the labels formats of all the transformations to make sure they are set correctly.

        #############################################################################################
        # Generate mini batches.
        #############################################################################################

        current = 0

        batch_X, batch_y = [], []

        if current >= self.dataset_size:
            current = 0

        #########################################################################################
        # Get the images, (maybe) image IDs, (maybe) labels, etc. for this batch.
        #########################################################################################

        # We prioritize our options in the following order:
        # 1) If we have the images already loaded in memory, get them from there.
        # 2) Else, if we have an HDF5 dataset, get the images from there.
        # 3) Else, if we have neither of the above, we'll have to load the individual image
        #    files from disk.
        batch_indices = self.dataset_indices[current:current + batch_size]
        if not (self.images is None):
            for i in batch_indices:
                batch_X.append(self.images[i])
            if not (self.filenames is None):
                batch_filenames = self.filenames[current:current + batch_size]
            else:
                batch_filenames = None
        else:
            batch_filenames = self.filenames[current:current + batch_size]
            for filename in batch_filenames:
                with Image.open(filename) as image:
                    batch_X.append(np.array(image, dtype=np.uint8))

        # Get the labels for this batch (if there are any).
        if not (self.labels is None):
            batch_y = deepcopy(self.labels[current:current + batch_size])
        else:
            batch_y = None

        #print('len(batch_y):', len(batch_y))
        # Get the image IDs for this batch (if there are any).
        if not (self.image_ids is None):
            batch_image_ids = self.image_ids[current:current + batch_size]
        else:
            batch_image_ids = None

        current += batch_size

        #########################################################################################
        # Maybe perform image transformations.
        #########################################################################################

        #########################################################################################
        # Check for degenerate boxes in this batch item.
        #########################################################################################
        for i in range(len(batch_X)):

            if not (self.labels is None):

                xmin = self.labels_format['xmin']
                ymin = self.labels_format['ymin']
                xmax = self.labels_format['xmax']
                ymax = self.labels_format['ymax']

                if np.any(batch_y[i][:, xmax] -
                          batch_y[i][:, xmin] <= 0) or np.any(
                              batch_y[i][:, ymax] - batch_y[i][:, ymin] <= 0):
                    if degenerate_box_handling == 'warn':
                        warnings.warn(
                            "Detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, "
                            .format(i, batch_y[i]) +
                            "i.e. bounding boxes where xmax <= xmin and/or ymax <= ymin. "
                            +
                            "This could mean that your dataset contains degenerate ground truth boxes, or that any image transformations you may apply might "
                            +
                            "result in degenerate ground truth boxes, or that you are parsing the ground truth in the wrong coordinate format."
                            +
                            "Degenerate ground truth bounding boxes may lead to NaN errors during the training."
                        )
                    elif degenerate_box_handling == 'remove':
                        batch_y[i] = box_filter(batch_y[i])

        #print('length(batch_y):', len(batch_y))
        #########################################################################################

        # CAUTION: Converting `batch_X` into an array will result in an empty batch if the images have varying sizes
        #          or varying numbers of channels. At this point, all images must have the same size and the same
        #          number of channels.

        batch_X = np.array(batch_X)
        #########################################################################################
        # If we have a label encoder, encode our labels.
        #########################################################################################

        #print('len(self.labels):', len(self.labels))
        #print(self.labels is None)
        #print(label_encoder is None)

        if not (label_encoder is None or self.labels is None):
            batch_y_encoded = label_encoder(batch_y, diagnostics=False)
            batch_matched_anchors = None

            #print('here')

        else:
            batch_y_encoded = None
            batch_matched_anchors = None

            #print('here2')

        #print('length(batch_y):',len(batch_y_encoded))

        return np.asarray(batch_X), np.asarray(batch_y_encoded)
    def __init__(self,
                 resize_height,
                 resize_width,
                 random_brightness=(-48, 48, 0.5),
                 random_contrast=(0.5, 1.8, 0.5),
                 random_saturation=(0.5, 1.8, 0.5),
                 random_hue=(18, 0.5),
                 random_flip=0.5,
                 min_scale=0.3,
                 max_scale=2.0,
                 min_aspect_ratio=0.5,
                 max_aspect_ratio=2.0,
                 n_trials_max=3,
                 clip_boxes=True,
                 overlap_criterion='area',
                 bounds_box_filter=(0.3, 1.0),
                 bounds_validator=(0.5, 1.0),
                 n_boxes_min=1,
                 background=(0, 0, 0),
                 labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):

        self.n_trials_max = n_trials_max
        self.clip_boxes = clip_boxes
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min
        self.background = background
        self.labels_format = labels_format

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter_patch = BoxFilter(check_overlap=True,
                                          check_min_area=False,
                                          check_degenerate=False,
                                          overlap_criterion=self.overlap_criterion,
                                          overlap_bounds=self.bounds_box_filter,
                                          labels_format=self.labels_format)

        self.box_filter_resize = BoxFilter(check_overlap=False,
                                           check_min_area=True,
                                           check_degenerate=True,
                                           min_area=16,
                                           labels_format=self.labels_format)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion,
                                              bounds=self.bounds_validator,
                                              n_boxes_min=self.n_boxes_min,
                                              labels_format=self.labels_format)

        # Utility transformations
        self.convert_to_3_channels = ConvertTo3Channels()  # Make sure all images end up having 3 channels.
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.resize = Resize(height=resize_height,
                             width=resize_width,
                             box_filter=self.box_filter_resize,
                             labels_format=self.labels_format)

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0], upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0], upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0], upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0], prob=random_hue[1])

        # Geometric transformations
        self.random_flip = RandomFlip(dim='horizontal', prob=random_flip, labels_format=self.labels_format)
        self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar',
                                                              min_scale=min_scale,
                                                              max_scale=max_scale,
                                                              scale_uniformly=False,
                                                              min_aspect_ratio=min_aspect_ratio,
                                                              max_aspect_ratio=max_aspect_ratio)
        self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator,
                                        box_filter=self.box_filter_patch,
                                        image_validator=self.image_validator,
                                        n_trials_max=self.n_trials_max,
                                        clip_boxes=self.clip_boxes,
                                        prob=1.0,
                                        can_fail=False,
                                        labels_format=self.labels_format)

        # Define the processing chain
        self.transformations = [self.convert_to_3_channels,
                                self.convert_to_float32,
                                self.random_brightness,
                                self.random_contrast,
                                self.convert_to_uint8,
                                self.convert_RGB_to_HSV,
                                self.convert_to_float32,
                                self.random_saturation,
                                self.random_hue,
                                self.convert_to_uint8,
                                self.convert_HSV_to_RGB,
                                self.random_patch,
                                self.random_flip,
                                self.resize]
Пример #8
0
    def __init__(self,
                 resize_height,
                 resize_width,
                 random_brightness=(-20, 20, 0.5),
                 random_contrast=(0.8, 1.0, 0.5),
                 random_saturation=(0.8, 1.8, 0.5),
                 random_hue=(10, 0.5),
                 random_flip=0.5,
                 random_rotate_small=([np.pi / 40, np.pi / 30], 0.5),
                 random_rotate_big=([np.pi / 2, np.pi, 3 * np.pi / 2], 0.5),
                 min_scale=0.8,
                 max_scale=1.05,
                 min_aspect_ratio=0.8,
                 max_aspect_ratio=1.2,
                 n_trials_max=3,
                 overlap_criterion='center_point',
                 bounds_box_filter=(0.3, 1.0),
                 bounds_validator=(0.5, 1.0),
                 n_boxes_min=1,
                 random_translate=((0.03, 0.05), (0.03, 0.05), 0.5),
                 random_scale=(0.9, 1.1, 0.5),
                 proba_no_aug=1 / 3):

        self.n_trials_max = n_trials_max
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min

        self.proba_no_aug = proba_no_aug  # the probability of not performing any transformations

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter = BoxFilter(check_overlap=True,
                                    check_min_area=False,
                                    check_degenerate=False,
                                    overlap_criterion=self.overlap_criterion,
                                    overlap_bounds=self.bounds_box_filter)

        self.box_filter_resize = BoxFilter(check_overlap=False,
                                           check_min_area=True,
                                           check_degenerate=True,
                                           min_area=16)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(
            overlap_criterion=self.overlap_criterion,
            bounds=self.bounds_validator,
            n_boxes_min=self.n_boxes_min)

        # Utility transformations
        self.convert_to_3_channels = ConvertTo3Channels(
        )  # Make sure all images end up having 3 channels.
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.resize = Resize(height=resize_height,
                             width=resize_width,
                             box_filter=self.box_filter_resize)

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0],
                                                  upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0],
                                              upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0],
                                                  upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0],
                                    prob=random_hue[1])

        # Geometric transformations
        self.random_horizontal_flip = RandomFlip(dim='horizontal',
                                                 prob=random_flip)
        self.random_vertical_flip = RandomFlip(dim='vertical',
                                               prob=random_flip)
        self.random_translate = RandomTranslate(
            dy_minmax=random_translate[0],
            dx_minmax=random_translate[1],
            prob=random_translate[2],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_rotate_small = RandomRotate(
            angles=random_rotate_small[0],
            prob=random_rotate_small[1],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_rotate_big = RandomRotate(
            angles=random_rotate_big[0],
            prob=random_rotate_big[1],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        self.random_zoom_in = RandomScale(min_factor=1.0,
                                          max_factor=random_scale[1],
                                          prob=random_scale[2],
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          n_trials_max=self.n_trials_max)

        self.random_zoom_out = RandomScale(
            min_factor=random_scale[0],
            max_factor=random_scale[0],
            prob=random_scale[2],
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max)

        # random patch generator is not used for the moment but it could be useful in your project
        self.patch_coord_generator = PatchCoordinateGenerator(
            must_match='h_w',
            min_scale=min_scale,
            max_scale=max_scale,
            scale_uniformly=False,
            min_aspect_ratio=min_aspect_ratio,
            max_aspect_ratio=max_aspect_ratio)

        self.random_patch = RandomPatch(
            patch_coord_generator=self.patch_coord_generator,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            prob=0.5,
            can_fail=False)

        # If we zoom in, do translation before scaling.
        self.sequence1 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_translate, self.random_rotate_big,
            self.random_rotate_small, self.random_zoom_in, self.random_patch,
            self.resize
        ]

        # If we zoom out, do translation after scaling.
        self.sequence2 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_zoom_out, self.random_translate,
            self.random_rotate_big, self.random_rotate_small,
            self.random_patch, self.resize
        ]

        self.sequence3 = [
            self.convert_to_3_channels, self.convert_to_uint8,
            self.random_horizontal_flip, self.random_vertical_flip,
            self.random_translate, self.random_rotate_big,
            self.random_rotate_small, self.resize
        ]
    def __init__(
        self,
        random_brightness=(-48, 48, 0.5),
        random_contrast=(0.5, 1.8, 0.5),
        random_saturation=(0.5, 1.8, 0.5),
        random_hue=(18, 0.5),
        random_flip=0.5,
        # 最后一个元素表示 prob
        random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
        # 最后一个元素表示 prob
        random_scale=(0.5, 2.0, 0.5),
        # translate or scale 后的 image 如果不合格可以重复进行的最大次数
        n_trials_max=3,
        clip_boxes=True,
        overlap_criterion_box_filter='area',
        overlap_criterion_validator='area',
        bounds_box_filter=(0.3, 1.0),
        bounds_validator=(0.5, 1.0),
        n_boxes_min=1,
        background=(0, 0, 0),
        labels_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax')):

        if (random_scale[0] >= 1) or (random_scale[1] <= 1):
            raise ValueError(
                "This sequence of transformations only makes sense"
                "if the minimum scaling factor is <1 and the maximum scaling factor is >1."
            )
        self.n_trials_max = n_trials_max
        self.clip_boxes = clip_boxes
        self.overlap_criterion_box_filter = overlap_criterion_box_filter
        self.overlap_criterion_validator = overlap_criterion_validator
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min
        self.background = background
        self.labels_format = labels_format

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter = BoxFilter(
            check_overlap=True,
            check_min_area=True,
            check_degenerate=True,
            overlap_criterion=self.overlap_criterion_box_filter,
            overlap_bounds=self.bounds_box_filter,
            min_area=16,
            labels_format=self.labels_format)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(
            overlap_criterion=self.overlap_criterion_validator,
            overlap_bounds=self.bounds_validator,
            n_boxes_min=self.n_boxes_min,
            labels_format=self.labels_format)

        # Utility distortions
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        # Make sure all images end up having 3 channels.
        self.convert_to_3_channels = ConvertTo3Channels()

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0],
                                                  upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0],
                                              upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0],
                                                  upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0],
                                    prob=random_hue[1])

        # Geometric transformations
        self.random_flip = RandomFlip(dim='horizontal',
                                      prob=random_flip,
                                      labels_format=self.labels_format)
        self.random_translate = RandomTranslate(
            dy_minmax=random_translate[0],
            dx_minmax=random_translate[1],
            prob=random_translate[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)
        self.random_zoom_in = RandomScale(min_factor=1.0,
                                          max_factor=random_scale[1],
                                          prob=random_scale[2],
                                          clip_boxes=self.clip_boxes,
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          n_trials_max=self.n_trials_max,
                                          background=self.background,
                                          labels_format=self.labels_format)
        self.random_zoom_out = RandomScale(
            min_factor=random_scale[0],
            max_factor=1.0,
            prob=random_scale[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)

        # If we zoom in, do translation before scaling.
        self.sequence1 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.random_translate, self.random_zoom_in, self.random_flip
        ]

        # If we zoom out, do scaling before translation.
        self.sequence2 = [
            self.convert_to_3_channels, self.convert_to_float32,
            self.random_brightness, self.random_contrast,
            self.convert_to_uint8, self.convert_RGB_to_HSV,
            self.convert_to_float32, self.random_saturation, self.random_hue,
            self.convert_to_uint8, self.convert_HSV_to_RGB,
            self.convert_to_float32, self.random_zoom_out,
            self.random_translate, self.random_flip
        ]
Пример #10
0
    def __init__(
        self,
        random_brightness=(-48, 48, 0.5),
        random_contrast=(0.5, 1.8, 0.5),
        random_saturation=(0.5, 1.8, 0.5),
        random_hue=(18, 0.5),
        random_flip=0.5,
        random_translate=((0.03, 0.5), (0.03, 0.5), 0.5),
        random_scale=(0.5, 2.0, 0.5),
        random_gaussian_noise=(0.5, 0., 10),  # gaussine noise
        random_poisson_noise=(0.5, 60),  # poisson noise
        random_salt_pepper_noise=(0.5, 0.5,
                                  0.005),  # salt&pepper or impalse noise 
        random_row_defect=(0.5, 1),  # row defect
        random_col_defect=(0.5, 1),  # col defect
        n_trials_max=3,
        clip_boxes=True,
        overlap_criterion='area',
        bounds_box_filter=(0.3, 1.0),
        bounds_validator=(0.5, 1.0),
        n_boxes_min=1,
        background=(0, 0, 0),
        labels_format={
            'class_id': 0,
            'xmin': 1,
            'ymin': 2,
            'xmax': 3,
            'ymax': 4
        }):

        if (random_scale[0] >= 1) or (random_scale[1] <= 1):
            raise ValueError(
                "This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1."
            )

        self.n_trials_max = n_trials_max
        self.clip_boxes = clip_boxes
        self.overlap_criterion = overlap_criterion
        self.bounds_box_filter = bounds_box_filter
        self.bounds_validator = bounds_validator
        self.n_boxes_min = n_boxes_min
        self.background = background
        self.labels_format = labels_format

        # Determines which boxes are kept in an image after the transformations have been applied.
        self.box_filter = BoxFilter(check_overlap=True,
                                    check_min_area=True,
                                    check_degenerate=True,
                                    overlap_criterion=self.overlap_criterion,
                                    overlap_bounds=self.bounds_box_filter,
                                    min_area=16,
                                    labels_format=self.labels_format)

        # Determines whether the result of the transformations is a valid training image.
        self.image_validator = ImageValidator(
            overlap_criterion=self.overlap_criterion,
            bounds=self.bounds_validator,
            n_boxes_min=self.n_boxes_min,
            labels_format=self.labels_format)

        # Utility distortions
        self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV')
        self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB')
        self.convert_to_float32 = ConvertDataType(to='float32')
        self.convert_to_uint8 = ConvertDataType(to='uint8')
        self.convert_to_3_channels = ConvertTo3Channels(
        )  # Make sure all images end up having 3 channels.
        self.convert_to_1_channel = ConvertTo1Channel(
        )  # Make sure all images end up having 3 channels.

        # Photometric transformations
        self.random_brightness = RandomBrightness(lower=random_brightness[0],
                                                  upper=random_brightness[1],
                                                  prob=random_brightness[2])
        self.random_contrast = RandomContrast(lower=random_contrast[0],
                                              upper=random_contrast[1],
                                              prob=random_contrast[2])
        self.random_saturation = RandomSaturation(lower=random_saturation[0],
                                                  upper=random_saturation[1],
                                                  prob=random_saturation[2])
        self.random_hue = RandomHue(max_delta=random_hue[0],
                                    prob=random_hue[1])

        # Geometric transformations
        self.random_flip = RandomFlip(dim='horizontal',
                                      prob=random_flip,
                                      labels_format=self.labels_format)
        self.random_translate = RandomTranslate(
            dy_minmax=random_translate[0],
            dx_minmax=random_translate[1],
            prob=random_translate[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)
        self.random_zoom_in = RandomScale(min_factor=1.0,
                                          max_factor=random_scale[1],
                                          prob=random_scale[2],
                                          clip_boxes=self.clip_boxes,
                                          box_filter=self.box_filter,
                                          image_validator=self.image_validator,
                                          n_trials_max=self.n_trials_max,
                                          background=self.background,
                                          labels_format=self.labels_format)
        self.random_zoom_out = RandomScale(
            min_factor=random_scale[0],
            max_factor=1.0,
            prob=random_scale[2],
            clip_boxes=self.clip_boxes,
            box_filter=self.box_filter,
            image_validator=self.image_validator,
            n_trials_max=self.n_trials_max,
            background=self.background,
            labels_format=self.labels_format)

        # noises and sensor defects
        self.random_RowDefect = RandomRowDefect(prob=random_row_defect[0],
                                                thikness=random_row_defect[1])
        self.random_col_defect = RandomColDefect(prob=random_col_defect[0],
                                                 thikness=random_col_defect[1])
        self.random_salt_pepper = RandomSaltPepperNoise(
            prob=random_salt_pepper_noise[0],
            salt_vs_pepper_ratio=random_salt_pepper_noise[1],
            percentage=random_salt_pepper_noise[2])
        self.random_poisson = RandomPoissonNoise(
            prob=random_poisson_noise[0], Lambda=random_poisson_noise[1])
        self.random_gaussian = RandomGaussianNoise(
            prob=random_gaussian_noise[0],
            mean=random_gaussian_noise[1],
            sigma=random_gaussian_noise[2])

        # If we zoom in, do translation before scaling.
        self.sequence1 = [
            self.convert_to_1_channel,
            self.convert_to_float32,
            self.random_brightness,
            self.random_contrast,
            #                          self.convert_to_uint8,
            #                          self.convert_RGB_to_HSV,
            #                          self.convert_to_float32,
            #                          self.random_saturation,
            #                          self.random_hue,
            self.convert_to_uint8,
            #                          self.convert_HSV_to_RGB,
            self.random_translate,
            self.random_zoom_in,
            self.random_flip,
            self.random_salt_pepper,
            self.random_poisson,
            self.random_gaussian,
            self.random_col_defect,
            self.convert_to_1_channel
        ]

        # If we zoom out, do scaling before translation.
        self.sequence2 = [
            self.convert_to_1_channel,
            self.convert_to_float32,
            self.random_brightness,
            #                          self.convert_to_uint8,
            #                          self.convert_RGB_to_HSV,
            #                          self.convert_to_float32,
            #                          self.random_saturation,
            #                          self.random_hue, #
            #                          self.convert_to_uint8,
            #                          self.convert_HSV_to_RGB,
            self.convert_to_float32,
            self.random_contrast,
            self.convert_to_uint8,
            self.random_zoom_out,
            self.random_translate,
            self.random_flip,
            self.random_salt_pepper,
            self.random_poisson,
            self.random_gaussian,
            self.random_col_defect,
            self.convert_to_1_channel
        ]
Пример #11
0
                                    variances=variances,
                                    matching_type='multi',
                                    pos_iou_threshold=0.5,
                                    neg_iou_limit=0.5,
                                    normalize_coords=normalize_coords)

# Create Transformations
convert_to_3_channels = ConvertTo3Channels()
convert_to_uint8 = ConvertDataType(to='uint8')
resize = Resize(height=img_height, width=img_width)
random_flip_hor = RandomFlip(dim='horizontal', prob=0.5)
random_flip_ver = RandomFlip(dim='vertical', prob=0.5)

ssd_expand = SSDExpand()

box_filter = BoxFilter(overlap_criterion='area', overlap_bounds=(0.4, 1.0))
image_validator = ImageValidator(overlap_criterion='area',
                                 bounds=(0.3, 1.0),
                                 n_boxes_min=1)
random_translate = RandomTranslate(dy_minmax=(0.03, 0.3),
                                   dx_minmax=(0.03, 0.3),
                                   prob=0.5,
                                   clip_boxes=False,
                                   box_filter=None,
                                   image_validator=image_validator,
                                   n_trials_max=3)

augmentations = [
    convert_to_3_channels, convert_to_uint8, random_flip_hor, random_flip_ver,
    random_translate
]
Пример #12
0
    def generate(self,
                 batch_size=32,
                 transformations=[],
                 label_encoder=None,
                 returns={'processed_images', 'encoded_labels'},
                 keep_images_without_gt=False,
                 degenerate_box_handling='remove'):

        if degenerate_box_handling == 'remove':
            box_filter = BoxFilter(check_overlap=False,
                                   check_min_area=False,
                                   check_degenerate=True,
                                   labels_format=self.labels_format)

        # Override the labels formats of all the transformations to make sure they are set correctly.
        if not (self.labels is None):
            for transform in transformations:
                transform.labels_format = self.labels_format

        #############################################################################################
        # Generate mini batches.
        #############################################################################################

        current = 0

        while True:

            batch_X, batch_y = [], []

            if current >= self.dataset_size:
                current = 0

            #########################################################################################
            # Get the images, (maybe) image IDs, (maybe) labels, etc. for this batch.
            #########################################################################################

            # We prioritize our options in the following order:
            # 1) If we have the images already loaded in memory, get them from there.
            # 2) Else, if we have an HDF5 dataset, get the images from there.
            # 3) Else, if we have neither of the above, we'll have to load the individual image
            #    files from disk.
            batch_indices = self.dataset_indices[current:current + batch_size]
            batch_indices = range(1)
            if not (self.images is None):

                batch_X.append(self.images)

                if not (self.filenames is None):
                    batch_filenames = self.filenames[current:current +
                                                     batch_size]
                else:
                    batch_filenames = None
            else:
                print("Images is None")
                batch_filenames = self.filenames[current:current + batch_size]
                for filename in batch_filenames:
                    with Image.open(filename) as image:
                        batch_X.append(np.array(image, dtype=np.uint8))

            if 'original_images' in returns:
                batch_original_images = deepcopy(
                    batch_X)  # The original, unaltered images
            if 'original_labels' in returns:
                batch_original_labels = deepcopy(
                    batch_y)  # The original, unaltered labels

            current += batch_size

            #########################################################################################
            # Maybe perform image transformations.
            #########################################################################################

            batch_items_to_remove = [
            ]  # In case we need to remove any images from the batch, store their indices in this list.
            batch_inverse_transforms = []
            for i in range(len(batch_X)):

                if not (self.labels is None):
                    # Convert the labels for this image to an array (in case they aren't already).
                    batch_y[i] = np.array(batch_y[i])
                    # If this image has no ground truth boxes, maybe we don't want to keep it in the batch.
                    if (batch_y[i].size == 0) and not keep_images_without_gt:
                        batch_items_to_remove.append(i)
                        batch_inverse_transforms.append([])
                        continue

                # Apply any image transformations we may have received.
                if transformations:

                    inverse_transforms = []

                    for transform in transformations:
                        # print(transform)
                        if not (self.labels is None):

                            if ('inverse_transform' in returns) and (
                                    'return_inverter' in inspect.signature(
                                        transform).parameters):
                                batch_X[i], batch_y[
                                    i], inverse_transform = transform(
                                        batch_X[i],
                                        batch_y[i],
                                        return_inverter=True)
                                inverse_transforms.append(inverse_transform)
                            else:
                                batch_X[i], batch_y[i] = transform(
                                    batch_X[i], batch_y[i])

                            if batch_X[
                                    i] is None:  # In case the transform failed to produce an output image, which is possible for some random transforms.
                                batch_items_to_remove.append(i)
                                batch_inverse_transforms.append([])
                                continue

                        else:

                            if ('inverse_transform' in returns) and (
                                    'return_inverter' in inspect.signature(
                                        transform).parameters):
                                batch_X[i], inverse_transform = transform(
                                    batch_X[i], return_inverter=True)
                                inverse_transforms.append(inverse_transform)
                            else:
                                batch_X[i] = transform(batch_X[i])

                    batch_inverse_transforms.append(inverse_transforms[::-1])

                #########################################################################################
                # Check for degenerate boxes in this batch item.
                #########################################################################################

                if not (self.labels is None):

                    xmin = self.labels_format['xmin']
                    ymin = self.labels_format['ymin']
                    xmax = self.labels_format['xmax']
                    ymax = self.labels_format['ymax']

                    if np.any(batch_y[i][:, xmax] -
                              batch_y[i][:, xmin] <= 0) or np.any(
                                  batch_y[i][:, ymax] -
                                  batch_y[i][:, ymin] <= 0):
                        if degenerate_box_handling == 'warn':
                            warnings.warn(
                                "Detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, "
                                .format(i, batch_y[i]) +
                                "i.e. bounding boxes where xmax <= xmin and/or ymax <= ymin. "
                                +
                                "This could mean that your dataset contains degenerate ground truth boxes, or that any image transformations you may apply might "
                                +
                                "result in degenerate ground truth boxes, or that you are parsing the ground truth in the wrong coordinate format."
                                +
                                "Degenerate ground truth bounding boxes may lead to NaN errors during the training."
                            )
                        elif degenerate_box_handling == 'remove':
                            batch_y[i] = box_filter(batch_y[i])
                            if (batch_y[i].size
                                    == 0) and not keep_images_without_gt:
                                batch_items_to_remove.append(i)

            #########################################################################################
            # Remove any items we might not want to keep from the batch.
            #########################################################################################

            if batch_items_to_remove:
                for j in sorted(batch_items_to_remove, reverse=True):
                    # This isn't efficient, but it hopefully shouldn't need to be done often anyway.
                    batch_X.pop(j)
                    batch_filenames.pop(j)
                    if batch_inverse_transforms:
                        batch_inverse_transforms.pop(j)
                    if not (self.labels is None): batch_y.pop(j)
                    # if not (self.image_ids is None): batch_image_ids.pop(j)
                    # if not (self.eval_neutral is None): batch_eval_neutral.pop(j)
                    if 'original_images' in returns:
                        batch_original_images.pop(j)
                    if 'original_labels' in returns and not (self.labels is
                                                             None):
                        batch_original_labels.pop(j)

            #########################################################################################

            # CAUTION: Converting `batch_X` into an array will result in an empty batch if the images have varying sizes
            #          or varying numbers of channels. At this point, all images must have the same size and the same
            #          number of channels.
            batch_X = np.array(batch_X)
            if (batch_X.size == 0):
                raise DegenerateBatchError(
                    "You produced an empty batch. This might be because the images in the batch vary "
                    +
                    "in their size and/or number of channels. Note that after all transformations "
                    +
                    "(if any were given) have been applied to all images in the batch, all images "
                    + "must be homogenous in size along all axes.")

            #########################################################################################
            # If we have a label encoder, encode our labels.
            #########################################################################################

            if not (label_encoder is None or self.labels is None):

                if ('matched_anchors' in returns) and isinstance(
                        label_encoder, SSDInputEncoder):
                    batch_y_encoded, batch_matched_anchors = label_encoder(
                        batch_y, diagnostics=True)
                else:
                    batch_y_encoded = label_encoder(batch_y, diagnostics=False)
                    batch_matched_anchors = None

            else:
                batch_y_encoded = None
                batch_matched_anchors = None

            #########################################################################################
            # Compose the output.
            #########################################################################################

            ret = []
            if 'processed_images' in returns: ret.append(batch_X)
            if 'encoded_labels' in returns: ret.append(batch_y_encoded)
            if 'matched_anchors' in returns: ret.append(batch_matched_anchors)
            if 'processed_labels' in returns: ret.append(batch_y)
            if 'filenames' in returns: ret.append(batch_filenames)
            # if 'image_ids' in returns: ret.append(batch_image_ids)
            # if 'evaluation-neutral' in returns: ret.append(batch_eval_neutral)
            if 'inverse_transform' in returns:
                ret.append(batch_inverse_transforms)
            if 'original_images' in returns: ret.append(batch_original_images)
            if 'original_labels' in returns: ret.append(batch_original_labels)

            yield ret
    def generate(self,
                 batch_size=32,
                 shuffle=True,
                 transformations=[],
                 label_encoder=None,
                 returns={'processed_images', 'encoded_labels'},
                 keep_images_without_gt=False,
                 degenerate_box_handling='remove'):
        '''
        生成批量样本和对应的标签,shuffle,数据增强
        参数:
            batch_size (int, optional): 生成batch的大小
            shuffle (bool, optional): 在每次传递之前是否打乱数据集,训练时'True',调试或预测时可以关闭
            transformations (list, optional):将被应用到给定顺序的图像和标签上的transformations列表.
            label_encoder (callable, optional):将标签从输入格式转换成训练需要的形式

            返回 (set, optional):生成器的输出 
                * 'processed_images': 处理过的图像,这个关键字你加不加都没关系,反正一定是会在输出中的。
                * 'encoded_labels': 编码后的标签
                * 'matched_anchors':只有当labels_encoder是一个SSDInputEncoder对象时可用。 
                    和'encoded_labels'相同,但包含所有匹配default box的坐标,而不是ground truth坐标。
                     这可以用于可视化与每个ground truth匹配的default box。
                    训练模式可用。
                * 'processed_labels': 经过处理后编码前的标签。
                * 'filenames': 文件名称(全路径)
                * 'image_ids': 图像ID
                * 'evaluation-neutral': A nested list of lists of booleans. Each list contains `True` or `False` for every ground truth
                    bounding box of the respective image depending on whether that bounding box is supposed to be evaluation-neutral (`True`)
                    or not (`False`). May return `None` if there exists no such concept for a given dataset. An example for
                    evaluation-neutrality are the ground truth boxes annotated as "difficult" in the Pascal VOC datasets, which are
                    usually treated to be neutral in a model evaluation.

                    布尔列表的嵌套列表。

                    每个列表包含相应图像的每个ground truth边界框的“True”或“False”,具
                    体取决于该边界框是否应该是评估中性(“True”)或不是(“False”)。

                    如果给定数据集不存在这样的概念,则可以返回“None”。
 
                    评估中立性的一个例子是在Pascal VOC数据集中注释为“difficult”的地面实况框,在模型评估中通常将其视为中性。
                * 'inverse_transform': 
                    嵌套列表,包含批次中每个项目的“逆变器”功能列表。
                     这些反相器函数将图像的(预测的)标签作为输入,并将应用于原始图像的变换的反转应用于它们。

                    这使得模型可以对变换后的图像进行预测,然后将这些预测转换回原始图像。

                    这主要与评估相关:如果要在具有不同图像大小的数据集上评估模型,则必须以某种方式(例如通过调整大小或裁剪)对图像进行变换,以使它们具有相同的大小。
                    模型将要在这些变形的图像上预测boxes,但是评估的时候,需要在原始图像上预测,而不是形变的图像,这意味着你需要将预测的坐标对应回原始图像
                    

                    请注意,对于每个图像,逆变器都起作用
                     图像需要按照在该图像的相应列表中给出的顺序应用。
                * 'original_images': 经过处理前的原始图像
                * 'original_labels': 经过处理前ground truth boxes.
                元组中输出的顺序是上面列表的顺序。 如果`returns`包含一个不可用的输出的关键字,那么在输出的元组中省略该输出并引发警告。
            keep_images_without_gt (bool, optional): 如果是False, 没有目标的图像将被移除
            degenerate_box_handling (str, optional): 如何处理不正常的boxes。'warn'警告或 'remove'移除

        Yields:
            下一批作为`returns`参数定义的tuple中的项目相同
        '''

        if self.dataset_size == 0:
            raise DatasetError(
                "Cannot generate batches because you did not load a dataset.")

        #############################################################################################
        # Warn if any of the set returns aren't possible.
        #############################################################################################

        if self.labels is None:
            if any([
                    ret in returns for ret in
                [
                    'original_labels', 'processed_labels', 'encoded_labels',
                    'matched_anchors', 'evaluation-neutral'
                ]
            ]):
                warnings.warn(
                    "Since no labels were given, none of 'original_labels', 'processed_labels', 'evaluation-neutral', 'encoded_labels', and 'matched_anchors' "
                    +
                    "are possible returns, but you set `returns = {}`. The impossible returns will be `None`."
                    .format(returns))
        elif label_encoder is None:
            if any([
                    ret in returns
                    for ret in ['encoded_labels', 'matched_anchors']
            ]):
                warnings.warn(
                    "Since no label encoder was given, 'encoded_labels' and 'matched_anchors' aren't possible returns, "
                    +
                    "but you set `returns = {}`. The impossible returns will be `None`."
                    .format(returns))
        elif not isinstance(label_encoder, SSDInputEncoder):
            if 'matched_anchors' in returns:
                warnings.warn(
                    "`label_encoder` is not an `SSDInputEncoder` object, therefore 'matched_anchors' is not a possible return, "
                    +
                    "but you set `returns = {}`. The impossible returns will be `None`."
                    .format(returns))

        #############################################################################################
        #做一些准备工作,比如初始时打乱数据集。
        #############################################################################################

        if shuffle:
            objects_to_shuffle = [self.dataset_indices]
            if not (self.filenames is None):
                objects_to_shuffle.append(self.filenames)
            if not (self.labels is None):
                objects_to_shuffle.append(self.labels)
            if not (self.image_ids is None):
                objects_to_shuffle.append(self.image_ids)
            if not (self.eval_neutral is None):
                objects_to_shuffle.append(self.eval_neutral)
            shuffled_objects = sklearn.utils.shuffle(*objects_to_shuffle)
            for i in range(len(objects_to_shuffle)):
                objects_to_shuffle[i][:] = shuffled_objects[i]

        if degenerate_box_handling == 'remove':
            box_filter = BoxFilter(check_overlap=False,
                                   check_min_area=False,
                                   check_degenerate=True,
                                   labels_format=self.labels_format)

        # 重写所有转换的标签格式以确保它们设置正确。
        if not (self.labels is None):
            for transform in transformations:
                transform.labels_format = self.labels_format

        #############################################################################################
        # 生成小批量.
        #############################################################################################

        current = 0

        while True:

            batch_X, batch_y = [], []
            if current >= self.dataset_size:
                current = 0
                #########################################################################################
                # 传递一次数据集后打乱
                #########################################################################################
                if shuffle:
                    objects_to_shuffle = [self.dataset_indices]
                    if not (self.filenames is None):
                        objects_to_shuffle.append(self.filenames)
                    if not (self.labels is None):
                        objects_to_shuffle.append(self.labels)
                    if not (self.image_ids is None):
                        objects_to_shuffle.append(self.image_ids)
                    if not (self.eval_neutral is None):
                        objects_to_shuffle.append(self.eval_neutral)
                    shuffled_objects = sklearn.utils.shuffle(
                        *objects_to_shuffle)
                    for i in range(len(objects_to_shuffle)):
                        objects_to_shuffle[i][:] = shuffled_objects[i]
            #########################################################################################
            # 得到图像, (maybe) image IDs, (maybe) labels, etc. for this batch.
            # 1) 如果内存中已经加载图像,直接从中获取
            # 2) 否则,如果有HDF5数据集,从这里取出图像.
            # 3) 如果都没有,只能一张一张从磁盘加载图片了
            #########################################################################################
            batch_indices = self.dataset_indices[current:current + batch_size]
            if not (self.images is None):
                for i in batch_indices:
                    batch_X.append(self.images[i])
                if not (self.filenames is None):
                    batch_filenames = self.filenames[current:current +
                                                     batch_size]
                else:
                    batch_filenames = None
            elif not (self.hdf5_dataset is None):
                for i in batch_indices:
                    batch_X.append(self.hdf5_dataset['images'][i].reshape(
                        self.hdf5_dataset['image_shapes'][i]))
                if not (self.filenames is None):
                    batch_filenames = self.filenames[current:current +
                                                     batch_size]
                else:
                    batch_filenames = None
            else:
                batch_filenames = self.filenames[current:current + batch_size]
                for filename in batch_filenames:
                    with Image.open(filename) as image:
                        batch_X.append(np.array(image, dtype=np.uint8))

            # 得到这一批的标签(如果有).
            if not (self.labels is None):
                batch_y = deepcopy(self.labels[current:current + batch_size])
            else:
                batch_y = None

            if not (self.eval_neutral is None):
                batch_eval_neutral = self.eval_neutral[current:current +
                                                       batch_size]
            else:
                batch_eval_neutral = None

            # 得到这一批的图像id (if there are any).
            if not (self.image_ids is None):
                batch_image_ids = self.image_ids[current:current + batch_size]
            else:
                batch_image_ids = None

            if 'original_images' in returns:
                batch_original_images = deepcopy(batch_X)  # 原始未改变的图像
            if 'original_labels' in returns:
                batch_original_labels = deepcopy(batch_y)  # 原始未改变的标签

            current += batch_size

            #########################################################################################
            # 数据增强.
            #########################################################################################

            batch_items_to_remove = []  # 如果我们要从batch中移除图像,存储它们的索引.
            batch_inverse_transforms = []

            for i in range(len(batch_X)):

                if not (self.labels is None):
                    # 将图像的标签转成数组
                    batch_y[i] = np.array(batch_y[i])
                    # 如果这张图像没有ground truth boxes, 我们可能就不需要它了.
                    if (batch_y[i].size == 0) and not keep_images_without_gt:
                        batch_items_to_remove.append(i)
                        batch_inverse_transforms.append([])
                        continue

                # 数据增强
                if transformations:
                    inverse_transforms = []
                    for transform in transformations:
                        if not (self.labels is None):
                            if ('inverse_transform' in returns) and (
                                    'return_inverter' in inspect.signature(
                                        transform).parameters):
                                batch_X[i], batch_y[
                                    i], inverse_transform = transform(
                                        batch_X[i],
                                        batch_y[i],
                                        return_inverter=True)
                                inverse_transforms.append(inverse_transform)
                            else:
                                batch_X[i], batch_y[i] = transform(
                                    batch_X[i], batch_y[i])

                            if batch_X[i] is None:  # 如果形变失败,没有生成任何图像
                                batch_items_to_remove.append(i)
                                batch_inverse_transforms.append([])
                                continue

                        else:
                            if ('inverse_transform' in returns) and (
                                    'return_inverter' in inspect.signature(
                                        transform).parameters):
                                batch_X[i], inverse_transform = transform(
                                    batch_X[i], return_inverter=True)
                                inverse_transforms.append(inverse_transform)
                            else:
                                batch_X[i] = transform(batch_X[i])

                    batch_inverse_transforms.append(inverse_transforms[::-1])

                #########################################################################################
                # 检查这一批有没有不合理的boxes
                #########################################################################################

                if not (self.labels is None):

                    xmin = self.labels_format['xmin']
                    ymin = self.labels_format['ymin']
                    xmax = self.labels_format['xmax']
                    ymax = self.labels_format['ymax']

                    if np.any(batch_y[i][:, xmax] -
                              batch_y[i][:, xmin] <= 0) or np.any(
                                  batch_y[i][:, ymax] -
                                  batch_y[i][:, ymin] <= 0):
                        if degenerate_box_handling == 'warn':
                            warnings.warn(
                                "Detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, "
                                .format(i, batch_y[i]) +
                                "i.e. bounding boxes where xmax <= xmin and/or ymax <= ymin. "
                                +
                                "This could mean that your dataset contains degenerate ground truth boxes, or that any image transformations you may apply might "
                                +
                                "result in degenerate ground truth boxes, or that you are parsing the ground truth in the wrong coordinate format."
                                +
                                "Degenerate ground truth bounding boxes may lead to NaN errors during the training."
                            )
                        elif degenerate_box_handling == 'remove':
                            batch_y[i] = box_filter(batch_y[i])
                            if (batch_y[i].size
                                    == 0) and not keep_images_without_gt:
                                batch_items_to_remove.append(i)

            #########################################################################################
            # 移除这一批我们也许不想保留的项目
            #########################################################################################

            if batch_items_to_remove:
                for j in sorted(batch_items_to_remove, reverse=True):
                    # 这样做效率不高,但一般不需要经常这样做。
                    batch_X.pop(j)
                    batch_filenames.pop(j)
                    if batch_inverse_transforms:
                        batch_inverse_transforms.pop(j)
                    if not (self.labels is None): batch_y.pop(j)
                    if not (self.image_ids is None): batch_image_ids.pop(j)
                    if not (self.eval_neutral is None):
                        batch_eval_neutral.pop(j)
                    if 'original_images' in returns:
                        batch_original_images.pop(j)
                    if 'original_labels' in returns and not (self.labels is
                                                             None):
                        batch_original_labels.pop(j)

            #########################################################################################

            # CAUTION: 注意:如果图像具有不同的大小或不同数量的通道,则将“batch_X”转换为数组将导致空批处理。
            #                基于这点,所有图像必须具有相同的大小和相同数量的通道

            batch_X = np.array(batch_X)
            if (batch_X.size == 0):
                raise DegenerateBatchError(
                    "You produced an empty batch. This might be because the images in the batch vary "
                    +
                    "in their size and/or number of channels. Note that after all transformations "
                    +
                    "(if any were given) have been applied to all images in the batch, all images "
                    + "must be homogenous in size along all axes.")

            #########################################################################################
            # 如果我们有标签编码器,请编码我们的标签。
            #########################################################################################

            if not (label_encoder is None or self.labels is None):

                if ('matched_anchors' in returns) and isinstance(
                        label_encoder, SSDInputEncoder):
                    batch_y_encoded, batch_matched_anchors = label_encoder(
                        batch_y, diagnostics=True)
                else:
                    batch_y_encoded = label_encoder(batch_y, diagnostics=False)
                    batch_matched_anchors = None

            else:
                batch_y_encoded = None
                batch_matched_anchors = None

            ret = []
            if 'processed_images' in returns: ret.append(batch_X)
            if 'encoded_labels' in returns: ret.append(batch_y_encoded)
            if 'matched_anchors' in returns: ret.append(batch_matched_anchors)
            if 'processed_labels' in returns: ret.append(batch_y)
            if 'filenames' in returns: ret.append(batch_filenames)
            if 'image_ids' in returns: ret.append(batch_image_ids)
            if 'evaluation-neutral' in returns: ret.append(batch_eval_neutral)
            if 'inverse_transform' in returns:
                ret.append(batch_inverse_transforms)
            if 'original_images' in returns: ret.append(batch_original_images)
            if 'original_labels' in returns: ret.append(batch_original_labels)

            yield ret