Exemplo n.º 1
0
    def __data_generation(self, labels_temp):
        x = np.empty((self.batch_size, self.image_height, self.image_width,
                      self.channels))
        y = np.empty((self.batch_size, self.label_dim))
        for i, f in enumerate(labels_temp):
            image = cv2.imread(os.path.join(self.images_dir, f + ".jpg"))
            orig_width = int(image.shape[1])
            orig_height = int(image.shape[0])

            # Generate edge image
            result = ed.auto_canny(image, self.multi_channel)
            if not self.multi_channel:
                result = np.reshape(result,
                                    (result.shape[0], result.shape[1], 1))

            # Read ground truth
            bboxes = []
            tree = et.parse(os.path.join(self.labels_dir, f + ".xml"))
            root = tree.getroot()
            for obj in root.findall("./object"):
                label_name = obj.find('name').text
                label = self.class_labels.index(label_name)
                bndbox = obj.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)
                bboxes.append((xmin, ymin, xmax, ymax, label))

            if np.random.randint(0, 100) <= 50:
                # Create none class window
                # Create label vector
                label = 0.0

                # Mask out any objects with white noise
                for box in bboxes:
                    result[box[1]:box[3], box[0]:box[2]] = np.zeros(
                        (box[3] - box[1], box[2] - box[0], self.channels))

                # Cut out random window
                scale = np.random.randint(1, 6)
                rescaled_width = scale * orig_width
                rescaled_height = scale * orig_height
                while self.image_height >= rescaled_height or self.image_width >= rescaled_width:
                    scale += 1
                    rescaled_width = scale * orig_width
                    rescaled_height = scale * orig_height
                result = cv2.resize(result, (rescaled_width, rescaled_height))
                xmin = np.random.randint(0, rescaled_width - self.image_width)
                ymin = np.random.randint(0,
                                         rescaled_height - self.image_height)
                xmax = xmin + self.image_width
                ymax = ymin + self.image_height
                window = result[ymin:ymax, xmin:xmax]
            else:
                # Create object class window
                # Select random target object
                target_box = random.choice(bboxes)
                bboxes.remove(target_box)
                # Create label vector
                label = 1.0

                # Find image region with object (bbox + margin)
                if target_box[2] - target_box[0] > target_box[3] - target_box[
                        1]:
                    margin = int(float(target_box[2] - target_box[0]) * 0.2)
                    window_xmin = max(target_box[0] - margin, 0)
                    window_xmax = min(target_box[2] + margin, orig_width)
                    window_ymin = max(target_box[1] - margin, 0)
                    window_ymax = min(target_box[3] + margin, orig_height)
                    cutout = result[window_ymin:window_ymax,
                                    window_xmin:window_xmax]
                else:
                    margin = int(float(target_box[3] - target_box[1]) * 0.2)
                    window_ymin = max(target_box[1] - margin, 0)
                    window_ymax = min(target_box[3] + margin, orig_height)
                    window_xmin = max(target_box[0] - margin, 0)
                    window_xmax = min(target_box[2] + margin, orig_width)
                    cutout = result[window_ymin:window_ymax,
                                    window_xmin:window_xmax]

                if self.use_augmentation:
                    # Augment 50% of data points
                    if np.random.randint(0, 100) <= 50:
                        rand_val = np.random.randint(0, 3)
                        # Resize to min 50% of size
                        if rand_val == 0:
                            aspect_ration = float(cutout.shape[1]) / float(
                                cutout.shape[0])
                            new_height = float(
                                cutout.shape[0]) * np.random.uniform(0.5, 1.0)
                            new_width = new_height * aspect_ration
                            cutout = cv2.resize(
                                cutout, (int(new_width), int(new_height)))
                        # Rotate by max +/- 90°
                        elif rand_val == 1:
                            angle = round(np.random.uniform(-1, 1) * 90.0)
                            image_center = tuple(
                                np.array(cutout.shape[1::-1]) / 2)
                            rot_mat = cv2.getRotationMatrix2D(
                                image_center, angle, 1.0)
                            cutout = cv2.warpAffine(cutout,
                                                    rot_mat,
                                                    cutout.shape[1::-1],
                                                    flags=cv2.INTER_LINEAR)
                        # Flip image vertically
                        else:
                            cutout = cv2.flip(cutout, 1)

                if not self.multi_channel:
                    cutout = np.reshape(cutout,
                                        (cutout.shape[0], cutout.shape[1], 1))

                # Place image region centred on square black background
                if cutout.shape[1] > cutout.shape[0]:
                    window = np.zeros(
                        (cutout.shape[1], cutout.shape[1], self.channels))
                    margin = int((window.shape[0] - cutout.shape[0]) / 2)
                    window[margin:(margin + cutout.shape[0]), :, :] = cutout
                else:
                    window = np.zeros(
                        (cutout.shape[0], cutout.shape[0], self.channels))
                    margin = int((window.shape[1] - cutout.shape[1]) / 2)
                    window[:, margin:(margin + cutout.shape[1]), :] = cutout

                window = cv2.resize(window,
                                    (self.image_width, self.image_height))
            window = np.array(window, dtype=np.float)
            if not self.multi_channel:
                window = np.reshape(window,
                                    (window.shape[0], window.shape[1], 1))
            window /= 255.0

            x[i] = window
            y[i] = label

        return x, y
    def predict(self, image):
        orig_width = image.shape[1]
        orig_height = image.shape[0]

        if self.use_hed:
            edge_image = self.hed.get_edge_image(image,
                                                 orig_width,
                                                 orig_height,
                                                 normalized=False)
            edge_image = np.reshape(
                edge_image, (edge_image.shape[0], edge_image.shape[1], 1))
        else:
            edge_image = ed.auto_canny(image, self.use_multichannel)
            if not self.use_multichannel:
                edge_image = np.reshape(
                    edge_image, (edge_image.shape[0], edge_image.shape[1], 1))

        # Zero padding to make square
        if edge_image.shape[0] > edge_image.shape[1]:
            x_padding = edge_image.shape[0] - edge_image.shape[1]
            x_padding_start = round(x_padding / 2.0)
            x_end = x_padding_start + edge_image.shape[1]
            new_edge_image = np.zeros(
                (edge_image.shape[0], edge_image.shape[1] + x_padding,
                 edge_image.shape[2]))
            new_edge_image[:, x_padding_start:x_end, :] = edge_image[:, :, :]
            edge_image = new_edge_image
        elif edge_image.shape[1] > edge_image.shape[0]:
            y_padding = edge_image.shape[1] - edge_image.shape[0]
            y_padding_top = round(y_padding / 2.0)
            y_end = y_padding_top + edge_image.shape[0]
            new_edge_image = np.zeros(
                (edge_image.shape[0] + y_padding, edge_image.shape[1],
                 edge_image.shape[2]))
            new_edge_image[y_padding_top:y_end, :, :] = edge_image[:, :, :]
            edge_image = new_edge_image

        # Run exhaustive sliding window
        result = []
        model = self.__get_model()
        model.load_weights(self.weight_file)
        window_width = self.image_width
        window_height = self.image_height
        scales = range(1, 6)
        overlap = 0.5
        for scale in scales:
            image_width = round(scale * window_width)
            image_height = round(scale * window_height)
            resize_x = float(orig_width) / float(image_width)
            resize_y = float(orig_height) / float(image_height)
            steps = int(((scale / overlap) - 1))
            for x in range(0, steps):
                x_offset = x * (window_width * overlap)
                for y in range(0, steps):
                    y_offset = y * (window_height * overlap)
                    resized_image = np.array(cv2.resize(
                        edge_image, (image_width, image_height)),
                                             dtype=np.float)
                    if self.use_hed:
                        resized_image = np.reshape(resized_image,
                                                   (resized_image.shape[0],
                                                    resized_image.shape[1], 1))
                    xmin = int(x_offset)
                    xmax = int(x_offset + window_width)
                    ymin = int(y_offset)
                    ymax = int(y_offset + window_height)
                    window = resized_image[ymin:ymax, xmin:xmax]
                    window = np.reshape(
                        window,
                        (1, window.shape[0], window.shape[1], window.shape[2]))
                    window = window / 255.0
                    prediction = model.predict(window, 1)[0]
                    true_xmin = int(x_offset * resize_x)
                    true_ymin = int(y_offset * resize_y)
                    true_xmax = true_xmin + int(window_width * resize_x)
                    true_ymax = true_ymin + int(window_height * resize_y)
                    window_result = [
                        true_xmin, true_ymin, true_xmax, true_ymax, prediction
                    ]
                    result.append(window_result)
        return result
Exemplo n.º 3
0
    def predict(self, image, use_refinement=True):
        orig_width = image.shape[1]
        orig_height = image.shape[0]

        if self.use_hed:
            edge_image = self.hed.get_edge_image(image,
                                                 orig_width,
                                                 orig_height,
                                                 normalized=False)
            edge_image = np.reshape(
                edge_image, (edge_image.shape[0], edge_image.shape[1], 1))
        else:
            edge_image = ed.auto_canny(image, self.use_multichannel)
            if not self.use_multichannel:
                edge_image = np.reshape(
                    edge_image, (edge_image.shape[0], edge_image.shape[1], 1))

        # Zero padding to make square
        if edge_image.shape[0] > edge_image.shape[1]:
            x_padding = edge_image.shape[0] - edge_image.shape[1]
            x_padding_start = round(x_padding / 2.0)
            x_end = x_padding_start + edge_image.shape[1]
            new_edge_image = np.zeros(
                (edge_image.shape[0], edge_image.shape[1] + x_padding,
                 edge_image.shape[2]))
            new_edge_image[:, x_padding_start:x_end, :] = edge_image[:, :, :]
            edge_image = new_edge_image
        elif edge_image.shape[1] > edge_image.shape[0]:
            y_padding = edge_image.shape[1] - edge_image.shape[0]
            y_padding_top = round(y_padding / 2.0)
            y_end = y_padding_top + edge_image.shape[0]
            new_edge_image = np.zeros(
                (edge_image.shape[0] + y_padding, edge_image.shape[1],
                 edge_image.shape[2]))
            new_edge_image[y_padding_top:y_end, :, :] = edge_image[:, :, :]
            edge_image = new_edge_image

        # Run exhaustive sliding window
        result = []
        model = self.__get_model()
        model.load_weights(self.weight_file)
        window_width = self.image_width
        window_height = self.image_height
        mask_width = int(float(self.image_width) * 0.25)
        mask_steps_x = 4
        mask_height = int(float(self.image_height) * 0.25)
        mask_steps_y = 4
        scales = range(1, 6)
        overlap = 0.5
        for scale in scales:
            image_width = round(scale * window_width)
            image_height = round(scale * window_height)
            resized_image = np.array(cv2.resize(edge_image,
                                                (image_width, image_height)),
                                     dtype=np.float)
            if self.use_hed:
                resized_image = np.reshape(
                    resized_image,
                    (resized_image.shape[0], resized_image.shape[1], 1))
            resize_x = float(orig_width) / float(image_width)
            resize_y = float(orig_height) / float(image_height)
            steps = int(((scale / overlap) - 1))
            for x in range(0, steps):
                x_offset = x * (window_width * overlap)
                for y in range(0, steps):
                    y_offset = y * (window_height * overlap)
                    xmin = int(x_offset)
                    xmax = int(x_offset + window_width)
                    ymin = int(y_offset)
                    ymax = int(y_offset + window_height)
                    window = resized_image[ymin:ymax, xmin:xmax]
                    if self.use_multichannel:
                        window = np.reshape(window,
                                            (1, window.shape[0],
                                             window.shape[1], window.shape[2]))
                    else:
                        window = np.reshape(
                            window, (1, window.shape[0], window.shape[1], 1))
                    window = window / 255.0

                    mask_x_min = 0
                    mask_x_max = 0
                    mask_y_min = 0
                    mask_y_max = 0

                    original_prediction = model.predict(window, 1)[0]
                    # Mask out areas and reevaluate for box refinement
                    if use_refinement and original_prediction >= 0.75:
                        for i in range(mask_steps_x):
                            new_window = self.__mask_out_region(
                                window, 0, 0, i * mask_width,
                                self.image_height)
                            new_prediction = model.predict(new_window, 1)[0]
                            if original_prediction < new_prediction:
                                window = new_window
                                mask_x_min = i * mask_width
                            else:
                                break
                        for i in range(mask_steps_x):
                            new_window = self.__mask_out_region(
                                window, self.image_width - i * mask_width, 0,
                                self.image_width, self.image_height)
                            new_prediction = model.predict(new_window, 1)[0]
                            if original_prediction < new_prediction:
                                window = new_window
                                mask_x_max = i * mask_width
                            else:
                                break
                        for i in range(mask_steps_y):
                            new_window = self.__mask_out_region(
                                window, 0, 0, self.image_width,
                                i * mask_height)
                            new_prediction = model.predict(new_window, 1)[0]
                            if original_prediction < new_prediction:
                                window = new_window
                                mask_y_min = i * mask_height
                            else:
                                break
                        for i in range(mask_steps_y):
                            new_window = self.__mask_out_region(
                                window, 0, self.image_height - i * mask_height,
                                self.image_width, self.image_height)
                            new_prediction = model.predict(new_window, 1)[0]
                            if original_prediction < new_prediction:
                                window = new_window
                                mask_y_max = i * mask_height
                            else:
                                break

                    true_xmin = int((x_offset + mask_x_min) * resize_x)
                    true_ymin = int((y_offset + mask_y_min) * resize_y)
                    true_xmax = true_xmin + int(
                        (window_width - mask_x_max) * resize_x)
                    true_ymax = true_ymin + int(
                        (window_height - mask_y_max) * resize_y)
                    window_result = [
                        true_xmin, true_ymin, true_xmax, true_ymax,
                        original_prediction
                    ]
                    result.append(window_result)
        return result