Exemplo n.º 1
0
 def brightness_augmentor(input_im):
     if self.enable_random_brightness and random.random() > 0.5:
         input_im = Augmentor.brightness(input_im,
                                         **self.brightness_params)
         return input_im
     else:
         return input_im
    def __prepare_batch(self):
        im_batch = numpy.zeros((self.batch_size,
                                self.num_image_channels,
                                self.net_input_height,
                                self.net_input_width),
                               dtype=numpy.float32)

        label_batch_list = [numpy.zeros((self.batch_size,
                                         self.num_output_channels,
                                         v,
                                         v),
                                        dtype=numpy.float32)
                            for v in self.feature_map_size_list]

        mask_batch_list = [numpy.zeros((self.batch_size,
                                        self.num_output_channels,
                                        v,
                                        v),
                                       dtype=numpy.float32)
                           for v in self.feature_map_size_list]

        data_batch = DataBatch(self.mxnet_module)

        loop = 0
        while loop < self.batch_size:

            if loop < self.num_neg_images_per_batch:  # fill neg images first

                rand_idx = random.choice(self.negative_index)

                im, _, __ = self.data_provider.read_by_index(rand_idx)

                random_resize_factor = random.random() * (self.neg_image_resize_factor_interval[1] - self.neg_image_resize_factor_interval[0]) + self.neg_image_resize_factor_interval[0]

                im = cv2.resize(im, (0, 0), fy=random_resize_factor, fx=random_resize_factor)

                h_interval = im.shape[0] - self.net_input_height
                w_interval = im.shape[1] - self.net_input_width
                if h_interval >= 0:
                    y_top = random.randint(0, h_interval)
                else:
                    y_pad = int(-h_interval / 2)
                if w_interval >= 0:
                    x_left = random.randint(0, w_interval)
                else:
                    x_pad = int(-w_interval / 2)

                im_input = numpy.zeros((self.net_input_height, self.net_input_width, self.num_image_channels),
                                       dtype=numpy.uint8)

                if h_interval >= 0 and w_interval >= 0:
                    im_input[:, :, :] = im[y_top:y_top + self.net_input_height, x_left:x_left + self.net_input_width, :]
                elif h_interval >= 0 and w_interval < 0:
                    im_input[:, x_pad:x_pad + im.shape[1], :] = im[y_top:y_top + self.net_input_height, :, :]
                elif h_interval < 0 and w_interval >= 0:
                    im_input[y_pad:y_pad + im.shape[0], :, :] = im[:, x_left:x_left + self.net_input_width, :]
                else:
                    im_input[y_pad:y_pad + im.shape[0], x_pad:x_pad + im.shape[1], :] = im[:, :, :]

                # data augmentation
                if self.enable_horizon_flip and random.random() > 0.5:
                    im_input = Augmentor.flip(im_input, 'h')
                if self.enable_vertical_flip and random.random() > 0.5:
                    im_input = Augmentor.flip(im_input, 'v')

                if random.random() > 0.5:
                    if self.enable_random_brightness and random.random() > 0.5:
                        im_input = Augmentor.brightness(im_input, **self.brightness_params)
                    if self.enable_random_saturation and random.random() > 0.5:
                        im_input = Augmentor.saturation(im_input, **self.saturation_params)
                    if self.enable_random_contrast and random.random() > 0.5:
                        im_input = Augmentor.contrast(im_input, **self.contrast_params)
                    if self.enable_blur and random.random() > 0.5:
                        kernel_size = random.choice(self.blur_kernel_size_list)
                        self.blur_params['kernel_size'] = kernel_size
                        im_input = Augmentor.blur(im_input, **self.blur_params)

                # display for debug-------------------------------------------------
                # cv2.imshow('im', im_pad.astype(dtype=numpy.uint8))
                # cv2.waitKey()

                im_input = im_input.astype(numpy.float32)
                im_input = im_input.transpose([2, 0, 1])

                im_batch[loop] = im_input
                for label_batch in label_batch_list:
                    label_batch[loop, 1, :, :] = 1
                for mask_batch in mask_batch_list:
                    mask_batch[loop, 0:2, :, :] = 1

            else:
                rand_idx = random.choice(self.positive_index)
                im, _, bboxes_org = self.data_provider.read_by_index(rand_idx)

                num_bboxes = bboxes_org.shape[0]

                bboxes = bboxes_org.copy()

                # data augmentation
                if self.enable_horizon_flip and random.random() > 0.5:
                    im = Augmentor.flip(im, 'h')
                    bboxes[:, 0] = im.shape[1] - (bboxes[:, 0] + bboxes[:, 2])
                if self.enable_vertical_flip and random.random() > 0.5:
                    im = Augmentor.flip(im, 'v')
                    bboxes[:, 1] = im.shape[0] - (bboxes[:, 1] + bboxes[:, 3])

                # display for debug-------------------------------------------
                # im_show = im.copy()
                # for n in range(num_bboxes):
                #     cv2.rectangle(im_show, (int(bboxes[n,0]),int(bboxes[n,1])), (int(bboxes[n,0]+bboxes[n,2]),int(bboxes[n,1]+bboxes[n,3])), (255,255,0), 1)
                # cv2.imshow('im_show', im_show)
                # cv2.waitKey()

                # randomly select a bbox
                bbox_idx = random.randint(0, num_bboxes - 1)

                # randomly select a reasonable scale for the selected bbox (selection strategy may vary from task to task)
                target_bbox = bboxes[bbox_idx, :]
                longer_side = max(target_bbox[2:])
                if longer_side <= self.bbox_small_list[0]:
                    scale_idx = 0
                elif longer_side <= self.bbox_small_list[1]:
                    scale_idx = random.randint(0, 1)
                # elif longer_side <= self.bbox_small_list[2]:
                #     scale_idx = random.randint(0, 2)
                else:
                    if random.random() > 0.9:
                        scale_idx = random.randint(0, self.num_output_scales)
                    else:
                        scale_idx = random.randint(0, self.num_output_scales - 1)

                # choose a side length in the selected scale
                if scale_idx == self.num_output_scales:
                    scale_idx -= 1
                    side_length = self.bbox_large_list[-1] + random.randint(0, self.bbox_large_list[-1] * 0.5)
                else:
                    side_length = self.bbox_small_list[scale_idx] + \
                                  random.randint(0, self.bbox_large_list[scale_idx] - self.bbox_small_list[scale_idx])

                target_scale = float(side_length) / longer_side

                # resize bboxes
                bboxes = bboxes * target_scale
                target_bbox = target_bbox * target_scale

                # determine the states of a bbox in each scale
                green = [[False for i in range(num_bboxes)] for j in range(self.num_output_scales)]
                gray = [[False for i in range(num_bboxes)] for j in range(self.num_output_scales)]
                valid = [[False for i in range(num_bboxes)] for j in range(self.num_output_scales)]

                for i in range(num_bboxes):
                    temp_bbox = bboxes[i, :]
                    large_side = max(temp_bbox[2:])
                    for j in range(self.num_output_scales):
                        if self.bbox_small_list[j] <= large_side <= self.bbox_large_list[j]:
                            green[j][i] = True
                            valid[j][i] = True
                        elif self.bbox_small_gray_list[j] <= large_side <= self.bbox_large_gray_list[j]:
                            gray[j][i] = True
                            valid[j][i] = True

                # resize the original image
                im = cv2.resize(im, None, fx=target_scale, fy=target_scale)

                # crop the original image centered on the center of the selected bbox with vibration
                vibration_length = int(self.receptive_field_stride[scale_idx] / 2)
                offset_x = random.randint(-vibration_length, vibration_length)
                offset_y = random.randint(-vibration_length, vibration_length)
                crop_left = int(target_bbox[0] + target_bbox[2] / 2 + offset_x - self.net_input_width / 2.0)
                if crop_left < 0:
                    crop_left_pad = -int(crop_left)
                    crop_left = 0
                else:
                    crop_left_pad = 0
                crop_top = int(target_bbox[1] + target_bbox[3] / 2 + offset_y - self.net_input_height / 2.0)
                if crop_top < 0:
                    crop_top_pad = -int(crop_top)
                    crop_top = 0
                else:
                    crop_top_pad = 0
                crop_right = int(target_bbox[0] + target_bbox[2] / 2 + offset_x + self.net_input_width / 2.0)
                if crop_right > im.shape[1]:
                    crop_right = im.shape[1]

                crop_bottom = int(target_bbox[1] + target_bbox[3] / 2 + offset_y + self.net_input_height / 2.0)
                if crop_bottom > im.shape[0]:
                    crop_bottom = im.shape[0]

                im = im[crop_top:crop_bottom, crop_left:crop_right, :]
                im_input = numpy.zeros((self.net_input_height, self.net_input_width, 3), dtype=numpy.uint8)
                im_input[crop_top_pad:crop_top_pad + im.shape[0], crop_left_pad:crop_left_pad + im.shape[1], :] = im

                # image augmentation
                if random.random() > 0.5:
                    if self.enable_random_brightness and random.random() > 0.5:
                        im_input = Augmentor.brightness(im_input, **self.brightness_params)
                    if self.enable_random_saturation and random.random() > 0.5:
                        im_input = Augmentor.saturation(im_input, **self.saturation_params)
                    if self.enable_random_contrast and random.random() > 0.5:
                        im_input = Augmentor.contrast(im_input, **self.contrast_params)
                    if self.enable_blur and random.random() > 0.5:
                        kernel_size = random.choice(self.blur_kernel_size_list)
                        self.blur_params['kernel_size'] = kernel_size
                        im_input = Augmentor.blur(im_input, **self.blur_params)

                # display for debug-------------------------------------------------
                # im_show = im_input.copy()
                # for n in range(num_bboxes):
                #     cv2.rectangle(im_show, (int(bboxes[n, 0] - crop_left + crop_left_pad), int(bboxes[n, 1] - crop_top + crop_top_pad)),
                #                   (int(bboxes[n, 0] + bboxes[n, 2] - crop_left + crop_left_pad),int(bboxes[n, 1] + bboxes[n, 3] - crop_top + crop_top_pad)),
                #                   (255, 0, 255), 1)
                # cv2.imshow('im_show', im_show)
                # cv2.waitKey()

                im_input = im_input.astype(dtype=numpy.float32)
                im_input = im_input.transpose([2, 0, 1])

                # construct GT feature maps for each scale
                label_list = []
                mask_list = []
                for i in range(self.num_output_scales):

                    # compute the center coordinates of all RFs
                    receptive_field_centers = numpy.array(
                        [self.receptive_field_center_start[i] + w * self.receptive_field_stride[i] for w in range(self.feature_map_size_list[i])])

                    shift_x = (self.net_input_width / 2.0 - target_bbox[2] / 2) - target_bbox[0]
                    shift_y = (self.net_input_height / 2.0 - target_bbox[3] / 2) - target_bbox[1]
                    temp_label = numpy.zeros((self.num_output_channels, self.feature_map_size_list[i], self.feature_map_size_list[i]),
                                             dtype=numpy.float32)
                    temp_mask = numpy.zeros((self.num_output_channels, self.feature_map_size_list[i], self.feature_map_size_list[i]),
                                            dtype=numpy.float32)
                    temp_label[1, :, :] = 1
                    temp_mask[0:2, :, :] = 1

                    score_map_green = numpy.zeros((self.feature_map_size_list[i], self.feature_map_size_list[i]),
                                                  dtype=numpy.int32)
                    score_map_gray = numpy.zeros((self.feature_map_size_list[i], self.feature_map_size_list[i]),
                                                 dtype=numpy.int32)
                    for j in range(num_bboxes):
                        if not valid[i][j]:
                            continue
                        temp_bbox = bboxes[j, :]

                        # skip the bbox that does not appear in the cropped area
                        if temp_bbox[0] + temp_bbox[2] + shift_x <= 0 or temp_bbox[0] + shift_x >= self.net_input_width \
                                or temp_bbox[1] + temp_bbox[3] + shift_y <= 0 or temp_bbox[1] + shift_y >= self.net_input_height:
                            continue

                        temp_bbox_left_bound = temp_bbox[0] + shift_x
                        temp_bbox_right_bound = temp_bbox[0] + temp_bbox[2] + shift_x
                        temp_bbox_top_bound = temp_bbox[1] + shift_y
                        temp_bbox_bottom_bound = temp_bbox[1] + temp_bbox[3] + shift_y

                        left_RF_center_index = max(0, math.ceil((temp_bbox_left_bound - self.receptive_field_center_start[i]) / self.receptive_field_stride[i]))
                        right_RF_center_index = min(self.feature_map_size_list[i] - 1, math.floor((temp_bbox_right_bound - self.receptive_field_center_start[i]) / self.receptive_field_stride[i]))
                        top_RF_center_index = max(0, math.ceil((temp_bbox_top_bound - self.receptive_field_center_start[i]) / self.receptive_field_stride[i]))
                        bottom_RF_center_index = min(self.feature_map_size_list[i] - 1, math.floor((temp_bbox_bottom_bound - self.receptive_field_center_start[i]) / self.receptive_field_stride[i]))

                        # ignore the face with no RF centers inside
                        if right_RF_center_index < left_RF_center_index or bottom_RF_center_index < top_RF_center_index:
                            continue

                        if gray[i][j]:
                            score_map_gray[top_RF_center_index:bottom_RF_center_index + 1, left_RF_center_index:right_RF_center_index + 1] = 1

                        else:
                            score_map_green[top_RF_center_index:bottom_RF_center_index + 1, left_RF_center_index:right_RF_center_index + 1] += 1

                            x_centers = receptive_field_centers[left_RF_center_index:right_RF_center_index + 1]
                            y_centers = receptive_field_centers[top_RF_center_index:bottom_RF_center_index + 1]
                            x0_location_regression = (x_centers - temp_bbox_left_bound) / self.normalization_constant[i]
                            y0_location_regression = (y_centers - temp_bbox_top_bound) / self.normalization_constant[i]
                            x1_location_regression = (x_centers - temp_bbox_right_bound) / self.normalization_constant[i]
                            y1_location_regression = (y_centers - temp_bbox_bottom_bound) / self.normalization_constant[i]

                            temp_label[2, top_RF_center_index:bottom_RF_center_index + 1,
                            left_RF_center_index:right_RF_center_index + 1] = \
                                numpy.tile(x0_location_regression, [bottom_RF_center_index - top_RF_center_index + 1, 1])

                            temp_label[3, top_RF_center_index:bottom_RF_center_index + 1,
                            left_RF_center_index:right_RF_center_index + 1] = \
                                numpy.tile(y0_location_regression, [right_RF_center_index - left_RF_center_index + 1, 1]).T

                            temp_label[4, top_RF_center_index:bottom_RF_center_index + 1,
                            left_RF_center_index:right_RF_center_index + 1] = \
                                numpy.tile(x1_location_regression, [bottom_RF_center_index - top_RF_center_index + 1, 1])

                            temp_label[5, top_RF_center_index:bottom_RF_center_index + 1,
                            left_RF_center_index:right_RF_center_index + 1] = \
                                numpy.tile(y1_location_regression, [right_RF_center_index - left_RF_center_index + 1, 1]).T

                    score_gray_flag = numpy.logical_or(score_map_green > 1, score_map_gray > 0)
                    location_green_flag = score_map_green == 1

                    temp_label[0, :, :][location_green_flag] = 1
                    temp_label[1, :, :][location_green_flag] = 0
                    for c in range(self.num_output_channels):
                        if c == 0 or c == 1:
                            temp_mask[c, :, :][score_gray_flag] = 0
                            continue
                        # for bbox regression, only green area is available
                        temp_mask[c, :, :][location_green_flag] = 1

                    # display for debug----------------------------------------------------------------
                    # temp_label_score_show = temp_label[0, :, :] * temp_mask[0, :, :]
                    # temp_label_score_show = temp_label_score_show * 255
                    # cv2.imshow('temp_label_score_show', cv2.resize(temp_label_score_show.astype(dtype=numpy.uint8), (0, 0), fx=2, fy=2))
                    # cv2.waitKey()

                    label_list.append(temp_label)
                    mask_list.append(temp_mask)

                im_batch[loop] = im_input
                for n in range(self.num_output_scales):
                    label_batch_list[n][loop] = label_list[n]
                    mask_batch_list[n][loop] = mask_list[n]
            loop += 1

        data_batch.append_data(im_batch)

        for n in range(self.num_output_scales):
            data_batch.append_label(mask_batch_list[n])
            data_batch.append_label(label_batch_list[n])

        return data_batch