def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = Image.open(img_path).convert('RGB') img = np.array(img) # Handle images with less than three channels if len(img.shape) != 3: img = img[None, :, :] img = img.repeat(3, 0) h, w, _ = img.shape # np格式的img是H*W*C h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) # Pad to square resolution img, pad = pad_to_square(img, 0) padded_h, padded_w, _ = img.shape # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() assert os.path.exists(label_path) # 确保label_path必定存在,即图片必定存在label boxes = np.loadtxt(label_path).reshape(-1, 5) # Extract coordinates for unpadded + unscaled image x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) # Adjust for added padding x1 += pad[0] # pad是从低维到高维的,感觉这样写是有问题的,应该只与pad[0][2]有关,不过一般都是相等的 y1 += pad[2] x2 += pad[0] y2 += pad[2] # Returns (x, y, w, h) boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w # 原来的数值是boxw_ori/imgw_ori, 现在变成了(boxw_ori/imgw_ori)*imgw_ori/imgw_pad=boxw_ori/imgw_pad boxes[:, 4] *= h_factor / padded_h # Apply augmentations # img, 以最长边为标准进行padding得到的uint8图像 # boxes, (cls, x, y, w, h)都以pad后得到的img的高度进行了归一化 if self.augment: img, boxes = augment(img, boxes) img = transforms.ToTensor()(img) # ToTensor已经将像素值进行了归一化 targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = torch.from_numpy( boxes) # 0维在collate_fn中是作为idx用了,用于指定target对应的图片 return img_path, img, targets
def __getitem__(self, index): img_path = self.img_files[index % len(self.img_files)].rstrip() img = Image.open(img_path).convert('RGB') img = np.array(img) if len(img.shape) != 3: img = img[None, :, :] img = img.repeat(3, 0) h, w, _ = img.shape h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1) img, pad = pad_to_square(img, 0) padded_h, padded_w, _ = img.shape label_path = self.label_files[index % len(self.img_files)].rstrip() assert os.path.exists(label_path) boxes = np.loadtxt(label_path).reshape(-1, 5) x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2) y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2) x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2) y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2) x1 += pad[0] y1 += pad[2] x2 += pad[0] y2 += pad[2] boxes[:, 1] = ((x1 + x2) / 2) / padded_w boxes[:, 2] = ((y1 + y2) / 2) / padded_h boxes[:, 3] *= w_factor / padded_w boxes[:, 4] *= h_factor / padded_h if self.augment: img, boxes = augment(img, boxes) img = transforms.ToTensor()(img) targets = torch.zeros((len(boxes), 6)) targets[:, 1:] = torch.from_numpy(boxes) return img_path, img, targets