Пример #1
0
class VideoLoader(DataLoader):

    def __init__(self, video_path, image_size=(416, 416)):
        super().__init__()
        self.video_process = VideoProcess()
        self.dataset_process = ImageDataSetProcess()
        if not self.video_process.isVideoFile(video_path) or \
                not self.video_process.openVideo(video_path):
            raise Exception("Invalid path!", video_path)
        self.image_size = image_size
        self.count = int(self.video_process.getFrameCount())
        self.color = (127.5, 127.5, 127.5)

    def __iter__(self):
        self.index = -1
        return self

    def __next__(self):
        self.index += 1
        success, src_image, rgb_image = self.video_process.readRGBFrame()

        if not success:
            raise StopIteration

        # padded resize
        rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image,
                                                                   self.image_size,
                                                                   self.color)
        rgb_image = self.dataset_process.image_normaliza(rgb_image)
        numpy_image = self.dataset_process.numpy_transpose(rgb_image)
        torch_image = self.all_numpy_to_tensor(numpy_image, 0)
        return src_image, torch_image

    def __len__(self):
        return self.count
class ComputeImagesMean():

    def __init__(self, image_size):
        self.image_size = image_size
        self.dir_process = DirProcess()
        self.image_process = ImageProcess()
        self.dataset_process = ImageDataSetProcess()

    def compute(self, train_path):
        numpy_images = []
        path, _ = os.path.split(train_path)
        images_dir = os.path.join(path, "../JPEGImages")
        for line_data in self.dir_process.getFileData(train_path):
            data_list = [x.strip() for x in line_data.split() if x.strip()]
            if len(data_list) >= 1:
                image_path = os.path.join(images_dir, data_list[0])
                src_image, rgb_image = self.image_process.readRgbImage(image_path)
                rgb_image = self.dataset_process.image_resize(rgb_image, self.image_size)
                normaliza_image = self.dataset_process.image_normaliza(rgb_image)
                numpy_images.append(normaliza_image)
            else:
                print("read %s image path error!" % data_list)
        numpy_images = np.stack(numpy_images)
        mean = np.mean(numpy_images, axis=(0, 1, 2))
        std = np.std(numpy_images, axis=(0, 1, 2))
        return mean, std
Пример #3
0
class SegmentDatasetProcess(BaseDataSetProcess):
    def __init__(self):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.image_pad_color = (0, 0, 0)
        self.label_pad_color = 250

    def normaliza_dataset(self, src_image):
        image = self.dataset_process.image_normaliza(src_image)
        image = self.dataset_process.numpy_transpose(image)
        return image

    def resize_dataset(self,
                       src_image,
                       image_size,
                       label,
                       volid_label_seg=None,
                       valid_label_seg=None):
        image, ratio, pad = self.dataset_process.image_resize_square(
            src_image, image_size, color=self.image_pad_color)
        target = self.encode_segmap(np.array(label, dtype=np.uint8),
                                    volid_label_seg, valid_label_seg)
        target, ratio, pad = self.dataset_process.image_resize_square(
            target, image_size, self.label_pad_color)
        return image, target

    def change_label(self, label, valid_label_seg):
        valid_masks = np.zeros(label.shape)
        for l in range(0, len(valid_label_seg)):
            valid_mask = label == l  # set false to position of seg that not in valid_label_seg
            valid_masks += valid_mask  # set 0.0 to position of seg that not in valid_label_seg
        valid_masks[valid_masks == 0] = -1
        seg = np.float32(label) * valid_masks
        seg[seg < 0] = self.label_pad_color
        seg = np.uint8(seg)
        return seg

    def encode_segmap(self, mask, volid_label, valid_label):
        classes = -np.ones([100, 100])
        valid = [x for j in valid_label for x in j]
        for i in range(0, len(valid_label)):
            classes[i, :len(valid_label[i])] = valid_label[i]
        for label in volid_label:
            mask[mask == label] = self.label_pad_color
        for validc in valid:
            mask[mask == validc] = np.uint8(np.where(classes == validc)[0])

        return mask
class ImagesLoader(DataLoader):

    def __init__(self, input_dir, image_size=(416, 416)):
        super().__init__()
        self.image_size = image_size
        self.imageProcess = ImageProcess()
        self.dirProcess = DirProcess()
        self.dataset_process = ImageDataSetProcess()
        temp_files = self.dirProcess.getDirFiles(input_dir, "*.*")
        self.files = list(temp_files)
        self.count = len(self.files)
        self.color = (127.5, 127.5, 127.5)

    def __iter__(self):
        self.index = -1
        return self

    def __next__(self):
        self.index += 1
        if self.index == self.count:
            raise StopIteration
        image_path = self.files[self.index]

        # Read image
        srcImage, rgb_image = self.imageProcess.readRgbImage(image_path)

        # Padded resize
        rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image,
                                                                   self.image_size,
                                                                   self.color)
        rgb_image = self.dataset_process.image_normaliza(rgb_image)
        numpy_image = self.dataset_process.numpy_transpose(rgb_image)
        torch_image = self.all_numpy_to_tensor(numpy_image)
        return srcImage, torch_image

    def __len__(self):
        return self.count
Пример #5
0
class ClassifyDatasetProcess(BaseDataSetProcess):
    def __init__(self, mean, std):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.mean = np.array(mean, dtype=np.float32)
        self.std = np.array(std, dtype=np.float32)
        self.normalize_transform = self.torchvision_process.torch_normalize(
            flag=0, mean=self.mean, std=self.std)

    def normaliza_dataset(self, src_image, normaliza_type=0):
        result = None
        if normaliza_type == 0:  # numpy normalize
            normaliza_image = self.dataset_process.image_normaliza(src_image)
            image = self.dataset_process.numpy_normaliza(
                normaliza_image, self.mean, self.std)
            image = self.dataset_process.numpy_transpose(image, image.dtype)
            result = self.numpy_to_torch(image, flag=0)
        elif normaliza_type == 1:  # torchvision normalize
            result = self.normalize_transform(src_image)
        return result

    def resize_image(self, src_image, image_size):
        image = self.dataset_process.image_resize(src_image, image_size)
        return image
Пример #6
0
class DetectionDataSetProcess(BaseDataSetProcess):
    def __init__(self):
        super().__init__()
        self.dataset_process = ImageDataSetProcess()
        self.image_pad_color = (0, 0, 0)

    def normaliza_dataset(self, src_image, labels=None, image_size=None):
        image = self.dataset_process.image_normaliza(src_image)
        image = self.dataset_process.numpy_transpose(image)
        result = None
        if labels is not None:
            result = np.zeros((len(labels), 5), dtype=np.float32)
            for index, rect in enumerate(labels):
                class_id = rect.class_id
                x, y = rect.center()
                x /= image_size[0]
                y /= image_size[1]
                width = rect.width() / image_size[0]
                height = rect.height() / image_size[1]
                result[index, :] = np.array([class_id, x, y, width, height])
        return image, result

    def resize_dataset(self,
                       src_image,
                       image_size,
                       boxes=None,
                       class_name=None):
        labels = []
        image, ratio, pad = self.dataset_process.image_resize_square(
            src_image, image_size, color=self.image_pad_color)
        if boxes is not None:
            for box in boxes:
                if box.name in class_name:
                    rect = Rect2D()
                    rect.class_id = class_name.index(box.name)
                    rect.min_corner.x = ratio * box.min_corner.x + pad[0] // 2
                    rect.min_corner.y = ratio * box.min_corner.y + pad[1] // 2
                    rect.max_corner.x = ratio * box.max_corner.x + pad[0] // 2
                    rect.max_corner.y = ratio * box.max_corner.y + pad[1] // 2
                    labels.append(rect)
        return image, labels

    def change_outside_labels(self, labels):
        delete_index = []
        # reject warped points outside of image (0.999 for the image boundary)
        for i, label in enumerate(labels):
            if label[2] + label[4] / 2 >= float(1):
                yoldH = label[2] - label[4] / 2
                label[2] = (yoldH + float(0.999)) / float(2)
                label[4] = float(0.999) - yoldH
            if label[1] + label[3] / 2 >= float(1):
                yoldW = label[1] - label[3] / 2
                label[1] = (yoldW + float(0.999)) / float(2)
                label[3] = float(0.999) - yoldW
            # filter the small object (w for label[3] in 1280 is limit to 6.8 pixel (6.8/1280=0.0053))
            if label[3] < 0.0053 or label[4] < 0.0055:
                # filter the small object (h for label[4] in 720 is limit to 4.0 pixel (4.0/1280=0.0053))
                delete_index.append(i)

        labels = np.delete(labels, delete_index, axis=0)
        return labels