class VideoLoader(DataLoader): def __init__(self, video_path, image_size=(416, 416)): super().__init__() self.video_process = VideoProcess() self.dataset_process = ImageDataSetProcess() if not self.video_process.isVideoFile(video_path) or \ not self.video_process.openVideo(video_path): raise Exception("Invalid path!", video_path) self.image_size = image_size self.count = int(self.video_process.getFrameCount()) self.color = (127.5, 127.5, 127.5) def __iter__(self): self.index = -1 return self def __next__(self): self.index += 1 success, src_image, rgb_image = self.video_process.readRGBFrame() if not success: raise StopIteration # padded resize rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image, self.image_size, self.color) rgb_image = self.dataset_process.image_normaliza(rgb_image) numpy_image = self.dataset_process.numpy_transpose(rgb_image) torch_image = self.all_numpy_to_tensor(numpy_image, 0) return src_image, torch_image def __len__(self): return self.count
class ComputeImagesMean(): def __init__(self, image_size): self.image_size = image_size self.dir_process = DirProcess() self.image_process = ImageProcess() self.dataset_process = ImageDataSetProcess() def compute(self, train_path): numpy_images = [] path, _ = os.path.split(train_path) images_dir = os.path.join(path, "../JPEGImages") for line_data in self.dir_process.getFileData(train_path): data_list = [x.strip() for x in line_data.split() if x.strip()] if len(data_list) >= 1: image_path = os.path.join(images_dir, data_list[0]) src_image, rgb_image = self.image_process.readRgbImage(image_path) rgb_image = self.dataset_process.image_resize(rgb_image, self.image_size) normaliza_image = self.dataset_process.image_normaliza(rgb_image) numpy_images.append(normaliza_image) else: print("read %s image path error!" % data_list) numpy_images = np.stack(numpy_images) mean = np.mean(numpy_images, axis=(0, 1, 2)) std = np.std(numpy_images, axis=(0, 1, 2)) return mean, std
class SegmentDatasetProcess(BaseDataSetProcess): def __init__(self): super().__init__() self.dataset_process = ImageDataSetProcess() self.image_pad_color = (0, 0, 0) self.label_pad_color = 250 def normaliza_dataset(self, src_image): image = self.dataset_process.image_normaliza(src_image) image = self.dataset_process.numpy_transpose(image) return image def resize_dataset(self, src_image, image_size, label, volid_label_seg=None, valid_label_seg=None): image, ratio, pad = self.dataset_process.image_resize_square( src_image, image_size, color=self.image_pad_color) target = self.encode_segmap(np.array(label, dtype=np.uint8), volid_label_seg, valid_label_seg) target, ratio, pad = self.dataset_process.image_resize_square( target, image_size, self.label_pad_color) return image, target def change_label(self, label, valid_label_seg): valid_masks = np.zeros(label.shape) for l in range(0, len(valid_label_seg)): valid_mask = label == l # set false to position of seg that not in valid_label_seg valid_masks += valid_mask # set 0.0 to position of seg that not in valid_label_seg valid_masks[valid_masks == 0] = -1 seg = np.float32(label) * valid_masks seg[seg < 0] = self.label_pad_color seg = np.uint8(seg) return seg def encode_segmap(self, mask, volid_label, valid_label): classes = -np.ones([100, 100]) valid = [x for j in valid_label for x in j] for i in range(0, len(valid_label)): classes[i, :len(valid_label[i])] = valid_label[i] for label in volid_label: mask[mask == label] = self.label_pad_color for validc in valid: mask[mask == validc] = np.uint8(np.where(classes == validc)[0]) return mask
class ImagesLoader(DataLoader): def __init__(self, input_dir, image_size=(416, 416)): super().__init__() self.image_size = image_size self.imageProcess = ImageProcess() self.dirProcess = DirProcess() self.dataset_process = ImageDataSetProcess() temp_files = self.dirProcess.getDirFiles(input_dir, "*.*") self.files = list(temp_files) self.count = len(self.files) self.color = (127.5, 127.5, 127.5) def __iter__(self): self.index = -1 return self def __next__(self): self.index += 1 if self.index == self.count: raise StopIteration image_path = self.files[self.index] # Read image srcImage, rgb_image = self.imageProcess.readRgbImage(image_path) # Padded resize rgb_image, _, _ = self.dataset_process.image_resize_square(rgb_image, self.image_size, self.color) rgb_image = self.dataset_process.image_normaliza(rgb_image) numpy_image = self.dataset_process.numpy_transpose(rgb_image) torch_image = self.all_numpy_to_tensor(numpy_image) return srcImage, torch_image def __len__(self): return self.count
class ClassifyDatasetProcess(BaseDataSetProcess): def __init__(self, mean, std): super().__init__() self.dataset_process = ImageDataSetProcess() self.mean = np.array(mean, dtype=np.float32) self.std = np.array(std, dtype=np.float32) self.normalize_transform = self.torchvision_process.torch_normalize( flag=0, mean=self.mean, std=self.std) def normaliza_dataset(self, src_image, normaliza_type=0): result = None if normaliza_type == 0: # numpy normalize normaliza_image = self.dataset_process.image_normaliza(src_image) image = self.dataset_process.numpy_normaliza( normaliza_image, self.mean, self.std) image = self.dataset_process.numpy_transpose(image, image.dtype) result = self.numpy_to_torch(image, flag=0) elif normaliza_type == 1: # torchvision normalize result = self.normalize_transform(src_image) return result def resize_image(self, src_image, image_size): image = self.dataset_process.image_resize(src_image, image_size) return image
class DetectionDataSetProcess(BaseDataSetProcess): def __init__(self): super().__init__() self.dataset_process = ImageDataSetProcess() self.image_pad_color = (0, 0, 0) def normaliza_dataset(self, src_image, labels=None, image_size=None): image = self.dataset_process.image_normaliza(src_image) image = self.dataset_process.numpy_transpose(image) result = None if labels is not None: result = np.zeros((len(labels), 5), dtype=np.float32) for index, rect in enumerate(labels): class_id = rect.class_id x, y = rect.center() x /= image_size[0] y /= image_size[1] width = rect.width() / image_size[0] height = rect.height() / image_size[1] result[index, :] = np.array([class_id, x, y, width, height]) return image, result def resize_dataset(self, src_image, image_size, boxes=None, class_name=None): labels = [] image, ratio, pad = self.dataset_process.image_resize_square( src_image, image_size, color=self.image_pad_color) if boxes is not None: for box in boxes: if box.name in class_name: rect = Rect2D() rect.class_id = class_name.index(box.name) rect.min_corner.x = ratio * box.min_corner.x + pad[0] // 2 rect.min_corner.y = ratio * box.min_corner.y + pad[1] // 2 rect.max_corner.x = ratio * box.max_corner.x + pad[0] // 2 rect.max_corner.y = ratio * box.max_corner.y + pad[1] // 2 labels.append(rect) return image, labels def change_outside_labels(self, labels): delete_index = [] # reject warped points outside of image (0.999 for the image boundary) for i, label in enumerate(labels): if label[2] + label[4] / 2 >= float(1): yoldH = label[2] - label[4] / 2 label[2] = (yoldH + float(0.999)) / float(2) label[4] = float(0.999) - yoldH if label[1] + label[3] / 2 >= float(1): yoldW = label[1] - label[3] / 2 label[1] = (yoldW + float(0.999)) / float(2) label[3] = float(0.999) - yoldW # filter the small object (w for label[3] in 1280 is limit to 6.8 pixel (6.8/1280=0.0053)) if label[3] < 0.0053 or label[4] < 0.0055: # filter the small object (h for label[4] in 720 is limit to 4.0 pixel (4.0/1280=0.0053)) delete_index.append(i) labels = np.delete(labels, delete_index, axis=0) return labels