Exemplo n.º 1
0
    def __next__(self):
        self.count += 1
        # print('11')
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size  #
        ib = min((self.count + 1) * self.batch_size, self.nF)
        # ia = self.count * 4
        # ib = min((self.count + 1) * 4, self.nF2)

        if self.multi_scale:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            # if index >= 4:
            #     img_path = self.temp[self.shuffled_vector_2[files_index]]
            #     label_path = self.temp_label[self.shuffled_vector_2[files_index]]
            # else:
            img_path = self.img_files[self.shuffled_vector[files_index]]
            # print(img_path)
            # img_path = 'D:/00-Data/rtpose_datasets/COCO/images' + img_path
            # print(img_path)
            label_path = self.label_files[self.shuffled_vector[files_index]]
            # print(label_path.split('/')[-1])
            label_path = 'D:/01-ComputerVisionEntries/10-HumanDetection/PyTorch-YOLOv3-master/labels/train/' + label_path.split(
                '/')[-1]
            # print(label_path)
            # face_path =
            # print(label_path)
            # print(label_path)
            img = cv2.imread(img_path)  # BGR
            # print(img_path)
            if img is None:
                print(img_path)
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            # print(h, w,_)
            img, ratio, padw, padh = resize_square(img,
                                                   height=height,
                                                   color=(127.5, 127.5, 127.5))
            # print(padw, padh)
            # Load labels
            if os.path.isfile(label_path):
                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 5)

                # Normalized xywh to pixel xyxy format
                labels0[:, 1] = (labels0[:, 1] + labels0[:, 3] / 2)  # center x
                labels0[:, 2] = (labels0[:, 2] + labels0[:, 4] / 2)  # center y
                labels = labels0.copy()

                # x1,y1, x2,y2  #ratio = old/new
                labels[:, 1] = ratio * w * (
                    labels0[:, 1] -
                    labels0[:, 3] / 2) + padw  # 将归一化的坐标值--恢复--转换成resize之后的坐标值
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))
            # print(img.shape)
            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # print(labels[:, 1])

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize

        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # Channel first and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        # img_all -= self.rgb_mean
        # img_all /= self.rgb_std
        # print(img_all.shape)
        img_all /= 255.0

        return torch.from_numpy(img_all), labels_all
Exemplo n.º 2
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]
        hyp = self.hyp

        # Load image
        img = self.imgs[index]
        if img is None:
            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'Image Not Found ' + img_path
            r = self.img_size / max(img.shape)  # size ratio
            if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
                h, w, _ = img.shape
                img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # INTER_LINEAR fastest

        # Augment colorspace
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
            V = img_hsv[:, :, 2].astype(np.float32)  # value

            a = random.uniform(-1, 1) * hyp['hsv_s'] + 1
            b = random.uniform(-1, 1) * hyp['hsv_v'] + 1
            S *= a
            V *= b

            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        # Letterbox
        h, w, _ = img.shape
        if self.rect:
            shape = self.batch_shapes[self.batch[index]]
            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='rect')
        else:
            shape = self.img_size
            img, ratiow, ratioh, padw, padh = letterbox(img, new_shape=shape, mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img, labels,
                                        degrees=hyp['degrees'],
                                        translate=hyp['translate'],
                                        scale=hyp['scale'],
                                        shear=hyp['shear'])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 3
0
    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        if self.multi_scale:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(os.path.join("image_train", img_path))  # BGR
            #print(img.shape)
            if img is None:
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = letterbox(img, height=height)

            # Load labels
            if os.path.isfile(os.path.join("labels_txt", label_path)):
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    labels0 = np.loadtxt(os.path.join("labels_txt",
                                                      label_path),
                                         dtype=np.float32).reshape(-1, 5)
                    #print(os.path.join("data_train",label_path))
                    #data = np.loadtxt(myfile, unpack=True)
                #labels0 = np.loadtxt(os.path.join("data_train",label_path), dtype=np.float32).reshape(-1, 5)
                #print('='*50, os.path.join("labels_txt",label_path))
                #print()
                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                #labels[:, 0] = np.int32(labels[:, 0])
                labels[:, 1] = ratio * w * (labels0[:, 1] -
                                            labels0[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
                #print(labels)

            else:
                print(os.path.join("data_train", label_path))
                print("st wrong")
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                # print(os.path.join("data_train",label_path))
                # print(labels)

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all /= 255.0

        return torch.from_numpy(img_all), labels_all
Exemplo n.º 4
0
    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        img_all, labels_all, img_paths, img_shapes = [], [], [], []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'File Not Found ' + img_path

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = letterbox(img, height=self.img_size)

            # Load labels
            if os.path.isfile(label_path):
                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 5)

                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                labels[:, 1] = ratio * w * (labels0[:, 1] -
                                            labels0[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (labels0[:, 2] -
                                            labels0[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (labels0[:, 1] +
                                            labels0[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (labels0[:, 2] +
                                            labels0[:, 4] / 2) + padh
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-5, 5),
                                               translate=(0.10, 0.10),
                                               scale=(0.90, 1.10))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:,
                       1:5] = xyxy2xywh(labels[:, 1:5].copy()) / self.img_size

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            if nL > 0:
                labels = np.concatenate((np.zeros(
                    (nL, 1), dtype='float32') + index, labels), 1)
                labels_all.append(labels)

            img_all.append(img)
            img_paths.append(img_path)
            img_shapes.append((h, w))

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all /= 255.0

        labels_all = torch.from_numpy(np.concatenate(labels_all, 0))
        return torch.from_numpy(img_all), labels_all, img_paths, img_shapes
Exemplo n.º 5
0
    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, None, 255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, None, 255, out=V)

            img_hsv[:, :, 1] = S  # .astype(np.uint8)
            img_hsv[:, :, 2] = V  # .astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=self.img_size)

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path, 'r') as file:
                lines = file.read().splitlines()

            x = np.array([x.split() for x in lines], dtype=np.float32)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-10, 10),
                                        translate=(0.10, 0.10),
                                        scale=(0.80, 1.20))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 6
0
    def __getitem__(self, index):
        # 新的下角标
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        hyp = self.hyp
        mosaic = False and self.augment
        # 如果开启镶嵌增强、数据增强
        # 加载四张图片,作为一个镶嵌,具体看下文解析。
        if mosaic:
            # 加载镶嵌内容
            img, labels = load_mosaic(self, index)
            shapes = None

        else:
            # 加载图片
            img, (h0, w0), (h, w) = load_image(self, index)

            # 仿射变换
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size 
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  

            # 加载标注文件
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # 如果标签没有加载,读取label_path内容
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # 将归一化后的xywh转化为左上角、右下角的表达形式
                    labels = x.copy()
                    labels[:, 1] = ratio[0] * w * (
                        x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
                    labels[:, 2] = ratio[1] * h * (
                        x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
                    labels[:, 3] = ratio[0] * w * (x[:, 1] +
                                                   x[:, 3] / 2) + pad[0]
                    labels[:, 4] = ratio[1] * h * (x[:, 2] +
                                                   x[:, 4] / 2) + pad[1]

        if self.augment:
            # 图片空间的数据增强
            if not mosaic:
                # 如果没有使用镶嵌的方法,那么对图片进行随机放射
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # 增强hsv空间
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

        nL = len(labels)  # 标注文件个数

        if nL:
            # 将 xyxy 格式转化为 xywh 格式
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])# x1, y1, x2, y2

            # 归一化到0-1之间
            labels[:, [2, 4]] /= img.shape[0]  # height y1/h , y2/h
            labels[:, [1, 3]] /= img.shape[1]  # width  x1/h , x2/h

        if self.augment:
            # 随机左右翻转
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # 随机上下翻转
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # 图像维度转换
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, img_path, shapes
Exemplo n.º 7
0
    def __getitem__(self, index):
        img = self.load_image(index)
        anns, K = self.load_annotations(index)

        # Change h, w -> w, h
        size = np.array([i for i in img.shape[:-1]], np.float32)[::-1]
        center = np.array([i/2 for i in img.shape[:-1]], np.float32)[::-1]
        # return img, K

        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """

        flipped = False
        if (self.is_train) and (np.random.rand() < self.flip_prob):
            flipped = True
            img = cv2.flip(img, 1)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (np.random.rand() < self.aug_prob):
            img, target, trans_mat = random_affine(img, degrees=0, translate=.1, scale=.1)
            affine = True
            '''
            TODO: affine the label mat
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            
            TODO:There is something wrong when clip after resize
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.input_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.input_height - 1)
            '''

        resize = False
        if img.shape[0] != self.input_height | img.shape[1] != self.input_width:
            img, ratio, pad = resize_image_with_pad(img, (self.input_height, self.input_width))
            resize = True
        
        labels = np.zeros((len(anns), 9))
        for i, a in enumerate(anns):
            a = a.copy()
            _cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1
            
            # We can get 2D bbox by labels or calculate by camera&3D bbox directly
            point, box2d, box3d = encode_label(
                K, rot_y, a["dimensions"], locs
            )
            
            # 当图像中的物体不全时,计算出的2D框会超出图像大小范围,这里先使用标注信息替代
            box2d = a["bbox"]
            labels[i, 0] = _cls
            labels[i, 1:5] = np.array(box2d)
            labels[i, 5:8] = np.array(a["dimensions"])
            labels[i, 8] = rot_y
            # h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]
        
        nL = len(labels)
        if nL > 0:
            if resize:
                labels[:, 1] = ratio[0] * labels[:, 1] + pad[0]  # pad width
                labels[:, 2] = ratio[1] * labels[:, 2] + pad[1]  # pad height
                labels[:, 3] = ratio[0] * labels[:, 3] + pad[0]
                labels[:, 4] = ratio[1] * labels[:, 4] + pad[1]
            
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

            #
            labels[labels < 0] = 0.0
        label_out = torch.zeros((nL, self.out_parms+1))
        if nL > 0:
            label_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        shapes = (size[1], size[0]), (size[1]/img.shape[2], size[0]/img.shape[1], pad)
        return torch.from_numpy(img), label_out[:, :6], self.files[index], shapes
Exemplo n.º 8
0
    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        height = self.height
        # height = random.choice([15, 17, 19, 21]) * 32

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            # img_path = self.files[self.shuffled_vector[files_index]]  # BGR
            img_path = '%s/%g.bmp' % (self.path,
                                      self.shuffled_vector[files_index])
            # img_path = '/Users/glennjocher/Downloads/DATA/xview/train_images/2294.bmp'

            img0 = cv2.imread(img_path)
            if img0 is None:
                continue

            augment_hsv = True
            if augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img0, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img0)

            # Load labels
            chip = img_path.rsplit('/')[-1]
            i = (self.mat['id'] == float(
                chip.replace('.tif', '').replace('.bmp', ''))).nonzero()[0]
            labels1 = self.mat['targets'][i]

            # Remove buildings and small cars
            # labels1 = labels1[(labels1[:, 0] != 5) & (labels1[:, 0] != 48)]

            img1, labels1, M = random_affine(img0,
                                             targets=labels1,
                                             degrees=(-20, 20),
                                             translate=(0.01, 0.01),
                                             scale=(0.70, 1.30))  # RGB

            nL1 = len(labels1)
            border = height / 2 + 1

            # Pick 100 random points inside image
            r = np.ones((100, 3))
            r[:, :2] = np.random.rand(
                100, 2) * (np.array(img0.shape)[[1, 0]] - border * 2) + border
            r = (r @ M.T)[:, :2]
            r = r[np.all(r > border, 1)
                  & np.all(r < img1.shape[0] - border, 1)]

            # import matplotlib.pyplot as plt
            # plt.imshow(img1[:, :, ::-1])
            # plt.plot(labels1[:, [1, 3, 3, 1, 1]].T, labels1[:, [2, 2, 4, 4, 2]].T, '.-')
            # plt.plot(r[:,0],r[:,1],'.')

            if nL1 > 0:
                weights = []
                for k in range(len(r)):
                    x = (labels1[:, 1] + labels1[:, 3]) / 2
                    y = (labels1[:, 2] + labels1[:, 4]) / 2
                    c = labels1[(abs(r[k, 0] - x) < height / 2) &
                                (abs(r[k, 1] - y) < height / 2), 0]
                    if len(c) == 0:
                        weights.append(1e-16)
                    else:
                        weights.append(self.class_weights[c.astype(
                            np.int8)].sum())

                weights = np.array(weights)
                weights /= weights.sum()
                r = r[np.random.choice(len(r),
                                       size=8,
                                       p=weights,
                                       replace=False)]

            if nL1 > 0:
                area0 = (labels1[:, 3] - labels1[:, 1]) * (labels1[:, 4] -
                                                           labels1[:, 2])

            h, w, _ = img1.shape
            for j in range(8):
                labels = np.array([], dtype=np.float32)

                pad_x = int(r[j, 0] - height / 2)
                pad_y = int(r[j, 1] - height / 2)
                if nL1 > 0:
                    labels = labels1.copy()
                    labels[:, [1, 3]] -= pad_x
                    labels[:, [2, 4]] -= pad_y
                    np.clip(labels[:, 1:5], 0, height, out=labels[:, 1:5])

                    lw = labels[:, 3] - labels[:, 1]
                    lh = labels[:, 4] - labels[:, 2]
                    area = lw * lh
                    ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16))

                    # objects must have width and height > 4 pixels
                    labels = labels[(lw > 4) & (lh > 4) & (area > 20) &
                                    (area / area0 > 0.1) & (ar < 10)]

                # pad_x, pad_y, counter = 0, 0, 0
                # while (counter < len(r)) & (len(labels) == 0):
                #     pad_x = int(r[counter, 0] - height / 2)
                #     pad_y = int(r[counter, 1] - height / 2)
                #
                #     if nL1 == 0:
                #         break
                #
                #     labels = labels1.copy()
                #     labels[:, [1, 3]] -= pad_x
                #     labels[:, [2, 4]] -= pad_y
                #     labels[:, 1:5] = np.clip(labels[:, 1:5], 0, height)
                #
                #     lw = labels[:, 3] - labels[:, 1]
                #     lh = labels[:, 4] - labels[:, 2]
                #     area = lw * lh
                #     ar = np.maximum(lw / (lh + 1e-16), lh / (lw + 1e-16))
                #
                #     # objects must have width and height > 4 pixels
                #     labels = labels[(lw > 4) & (lh > 4) & (area / area0 > 0.2) & (ar < 15)]
                #     counter += 1

                img = img1[pad_y:pad_y + height, pad_x:pad_x + height]

                # import matplotlib.pyplot as plt
                # plt.subplot(4, 4, j+1).imshow(img[:, :, ::-1])
                # plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')

                nL = len(labels)
                if nL > 0:
                    # convert labels to xywh
                    labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height
                    # remap xview classes 11-94 to 0-61
                    # labels[:, 0] = xview_classes2indices(labels[:, 0])

                # random lr flip
                if random.random() > 0.5:
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random ud flip
                if random.random() > 0.5:
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

                img_all.append(img)
                labels_all.append(torch.from_numpy(labels))

        # Randomize
        i = np.random.permutation(len(labels_all))
        img_all = [img_all[j] for j in i]
        labels_all = [labels_all[j] for j in i]

        # Normalize
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        img_all -= self.rgb_mean
        img_all /= self.rgb_std

        return torch.from_numpy(img_all), labels_all
Exemplo n.º 9
0
def detect(input_image, save_img=True):

    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    global frame_num, model
    global pubCentBlem, pubCentUnBlem

    # Initialize
    #device = torch_utils.select_device(opt.device)
    #if os.path.exists(out):
    #    shutil.rmtree(out)  # delete output folder
    #os.makedirs(out)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA
    if half:
        model.half()  # to FP16

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    print("class names array ", names)
    # names = ['blemished', 'unblemished', 'glove', 'belt', 'bin', 'head']
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    bounding_boxes_all_images = []

    img0 = input_image.astype('float32')
    #img0 = cv2.resize(img0, (640,480), interpolation = cv2.INTER_AREA)
    img = letterbox(img0, new_shape=imgsz)[0]
    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)
    dataset = [("frame_num" + str(frame_num) + '.jpg', img, img0, None)]

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = torch_utils.time_synchronized()
        pred = model(img, augment=opt.augment)[0]
        # pred = model(img)[0]

        t2 = torch_utils.time_synchronized()

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t3 = torch_utils.time_synchronized()

        # needed specific to each image
        Loc2arrCent = {}
        arrCentBlem = []
        arrCentUnBlem = []
        Loc2Cls = {}

        # Process detections

        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  #  normalization gain whwh
            bounding_boxes = {}

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                minx = 5000
                miny = 5000
                maxx = 0
                maxy = 0

                box_num = 0
                bounding_boxes = {}
                for *xyxy, conf, cls in det:
                    box_num += 1
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                            gn).view(-1).tolist()  # normalized xywh
                    #print("Box num:", box_num, " label: ",names[int(cls)], " xyxy: ", int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]), "xywh: ", float(xywh[0]), float(xywh[1]), float(xywh[2]), float(xywh[3]))
                    if (bounding_boxes.get(names[int(cls)], None) == None):
                        bounding_boxes[names[int(cls)]] = [[
                            int(xyxy[0]),
                            int(xyxy[1]),
                            abs(int(xyxy[2]) - int(xyxy[0])),
                            abs(int(xyxy[3]) - int(xyxy[1]))
                        ]]
                    else:
                        bounding_boxes[names[int(cls)]].append([
                            int(xyxy[0]),
                            int(xyxy[1]),
                            abs(int(xyxy[2]) - int(xyxy[0])),
                            abs(int(xyxy[3]) - int(xyxy[1]))
                        ])

                    tlx, tly, brx, bry = int(xyxy[0]), int(xyxy[1]), int(
                        xyxy[2]), int(xyxy[3])
                    if tlx < minx:
                        minx = tlx
                    if tly < miny:
                        miny = tly
                    if bry > maxy:
                        maxy = bry
                    if brx > maxx:
                        maxx = brx  #crop_img = img[y:y+h, x:x+w]

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(save_path[:save_path.rfind('.')] + '.txt',
                                  'a') as file:
                            file.write(('%g ' * 5 + '\n') %
                                       (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        # print("label input to plot_one_box ",label)
                        # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=1)

                    # append onion centroid to list of centroids
                    if names[int(cls)] == 'blemished' or names[int(
                            cls)] == 'unblemished':
                        centx = (tlx + brx) / 2
                        centy = (tly + bry) / 2

                        if centx > 250:
                            # FOR CAMERA LOOKING FROM FRONT
                            # if center is on the belt
                            if names[int(cls)] == 'blemished':
                                Loc2Cls[centx] = 0
                            else:
                                Loc2Cls[centx] = 1

                            Loc2arrCent[centx] = (centx, centy)
                            if names[int(
                                    cls
                            )] == 'blemished' and centx not in arrCentBlem:
                                arrCentBlem.append(centx)
                                arrCentBlem.append(centy)
                            else:
                                if centx not in arrCentUnBlem:
                                    arrCentUnBlem.append(centx)
                                    arrCentUnBlem.append(centy)

############################################### all bounding boxes for this frames: "bounding_boxes" ########################################################################################################

            bounding_boxes_all_images.append(bounding_boxes)
            print("Frame number = ", frame_num, "Bounding boxes: ",
                  bounding_boxes)
            frame_num += 1
            # Print time (inference + NMS)
            print('%s Inference. (%.3fs)' % (s, t2 - t1))
            print('%s NMS. (%.3fs)' % (s, t3 - t2))
            #print('im0.shape before',im0.shape)
            ############################################### cropping
            #im0 = im0[miny:maxy, minx:maxx]
            #print('im0.shape after',im0.shape)

            # Stream results
            view_img = True
            if view_img:
                #cv2_imshow( im0)
                not_showing = True
                #cv2.imshow(p, im0)

                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            save_img = True
            if save_img:  #'src/beginner_tutorials/scripts/yolov5/inference/output'
                #save_path = '/home/psuresh/src/beginner_tutorials/scripts/yolov5/inference/output/frame1.jpg'
                cv2.imwrite(save_path, im0)  #if dataset.mode == 'images':
########################################################## comment this line to avoid saving images  ########################################################################################################

# publish centroids for current image
        print("publishing arrCentBlem ", arrCentBlem)
        print("publishing arrCentUnBlem ", arrCentUnBlem)
        pubCentBlem.publish(Float32MultiArray(data=arrCentBlem))
        pubCentUnBlem.publish(Float32MultiArray(data=arrCentUnBlem))

        # make array LocOrderedPreds
        LocOrderedPreds = list(
            OrderedDict(sorted(Loc2Cls.items(), key=lambda t: t[0])).values())
        print("publishing LocOrderedPreds ", LocOrderedPreds)
        pubLocOrderedPreds.publish(Int32MultiArray(data=LocOrderedPreds))
        arrCentxyOrd = list(
            OrderedDict(sorted(Loc2arrCent.items(),
                               key=lambda t: t[0])).values())
        print("arrCentxyOrd ", arrCentxyOrd)
        arrCentLocOrd = []
        for i in range(len(arrCentxyOrd)):
            arrCentLocOrd.append(arrCentxyOrd[i][0])
            arrCentLocOrd.append(arrCentxyOrd[i][1])
        print("publishing arrCentLocOrd ", arrCentLocOrd)
        pubCentLocOrd.publish(Float32MultiArray(data=arrCentLocOrd))

    #global img_processed
    #img_processed = True

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return bounding_boxes_all_images
Exemplo n.º 10
0
    def detect(self, outfile=None):
        frame_cnt = -1

        if outfile is not None:
            f = open(outfile, 'w')

        print("begin....")

        while self.vdo.grab():
            frame_cnt += 1

            if frame_cnt % 3 == 0:
                continue

            start = time.time()
            _, ori_im = self.vdo.retrieve()
            im = ori_im

            t1_begin = time.time()
            bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im)
            t1_end = time.time()

            t2_begin = time.time()
            if bbox_xxyy is not None:
                # select class
                # mask = cls_ids == 0
                # bbox_xxyy = bbox_xxyy[mask]

                # bbox_xxyy[:, 3:] *= 1.2
                # cls_conf = cls_conf[mask]

                bbox_xcycwh = xyxy2xywh(bbox_xxyy)
                outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)

                if len(outputs) > 0:
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, -1]
                    # 画框
                    ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)

                    # frame, id, tlwh(%.2f),1,-1,-1,-1
                    if outfile is not None:
                        box_xywh = xyxy2tlwh(bbox_xyxy)
                        for i in range(len(box_xywh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_cnt + 1, outputs[i, -1],
                                int(box_xywh[i][0]), int(box_xywh[i][1]),
                                int(box_xywh[i][2]), int(box_xywh[i][3]))
                            f.write(write_line)

            t2_end = time.time()

            end = time.time()
            print(
                "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
                % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin),
                   (end - start), ((t1_end - t1_begin) * 100 /
                                   ((end - start))), (1 / (end - start))))
            if self.args.display:
                cv2.imshow("test", ori_im)
                cv2.waitKey(1)

            if self.args.save_path:
                self.output.write(ori_im)

        if outfile is not None:
            f.close()
Exemplo n.º 11
0
    def __getitem__(self, index):

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        img, (h0, w0), (h, w) = load_image(self, index)

        # Letterbox
        shape = self.batch_shapes[self.batch[
            index]] if self.rect else self.img_size  # final letterboxed shape
        img, ratio, pad = letterbox(img,
                                    shape,
                                    auto=False,
                                    scaleup=self.augment)
        shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()],
                                 dtype=np.float32)

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        nL = len(labels)  # number of labels

        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        # print (f'before loader {torch.from_numpy(img).shape}_{labels_out}_{img_path}')
        return torch.from_numpy(img), labels_out, img_path, shapes
Exemplo n.º 12
0
    def __getitem__(self, index):
        img_uri = self.img_files[index]
        img_labels = self.labels[index]
        # don't download, since it was already downloaded in the init
        img_path = img_uri
        img_name = ("_".join(map(str, img_path.split("_")[-5:])))
        orig_img = PIL.Image.open(img_path).convert('RGB')
        if orig_img is None:
            raise Exception(
                "Empty image: {img_path}".format(img_path=img_path))

        if self.vis_batch and len(img_labels) > 0:
            vis_orig_img = copy.deepcopy(orig_img)
            labels = add_class_dimension_to_labels(img_labels)
            labels = xyhw2xyxy_corner(labels, skip_class_dimension=True)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + ".jpg")
            visualize_and_save_to_local(vis_orig_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")
            print(f'new image uploaded to {tmp_path}')

        # First, handle image re-shaping
        if self.ts:
            scale = self.scales[index]
            scaled_img = scale_image(orig_img, scale)
            scaled_img_width, scaled_img_height = scaled_img.size
            patch_width, patch_height = self.width, self.height

            vert_pad, horiz_pad = pre_tile_padding(scaled_img_width,
                                                   scaled_img_height,
                                                   patch_width, patch_height)
            padded_img = torchvision.transforms.functional.pad(
                scaled_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            padded_img_width, padded_img_height = padded_img.size

            _, _, n_patches, _, _ = get_patch_spacings(padded_img_width,
                                                       padded_img_height,
                                                       patch_width,
                                                       patch_height)

            patch_index = random.randint(0, n_patches - 1)
            if self.debug_mode:
                patch_index = 0
            img, boundary = get_patch(padded_img, patch_width, patch_height,
                                      patch_index)
        else:
            orig_img_width, orig_img_height = orig_img.size
            vert_pad, horiz_pad, ratio = calculate_padding(
                orig_img_height, orig_img_width, self.height, self.width)
            img = torchvision.transforms.functional.pad(
                orig_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            img = torchvision.transforms.functional.resize(
                img, (self.height, self.width))

        # If no labels, no need to do augmentation (this should change in the future)
        #   so immediately return with the padded image and empty labels
        if len(img_labels) == 0:
            labels = torch.zeros((len(img_labels), 5))
            img = torchvision.transforms.functional.to_tensor(img)
            labels = F.pad(
                labels,
                pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                mode="constant")
            return img_uri, img, labels

        # Next, handle label re-shaping
        labels = add_class_dimension_to_labels(img_labels)
        labels = xyhw2xyxy_corner(labels)
        if self.ts:
            labels = scale_labels(labels, self.scales[index])
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_scaled.jpg")
                visualize_and_save_to_local(padded_img,
                                            labels,
                                            tmp_path,
                                            box_color="red")

            labels_temp = filter_and_offset_labels(labels, boundary)

            if self.vis_batch:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    visualize_and_save_to_local(vis_patch_img,
                                                labels,
                                                tmp_path,
                                                box_color="blue")
            if self.upload_dataset:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    upload_label_and_image_to_gcloud(vis_patch_img, labels,
                                                     tmp_path)

            else:
                labels = filter_and_offset_labels(labels, boundary)
        else:
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            labels = scale_labels(labels, ratio)
            labels_temp = labels

            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_pad_resized.jpg")
                visualize_and_save_to_local(img,
                                            labels,
                                            tmp_path,
                                            box_color="blue")

        labels = labels_temp
        if self.vis_batch and self.data_aug:
            vis_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_before_aug.jpg")
            visualize_and_save_to_local(vis_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="red")
        if self.augment_hsv or self.data_aug:
            if random.random() > 0.5:
                img = self.jitter(img)
                # no transformation on labels

        # Augment image and labels
        img_width, img_height = img.size
        if self.augment_affine or self.data_aug:
            if random.random() > 0:
                angle = random.uniform(-10, 10)
                translate = (random.uniform(-40,
                                            40), random.uniform(-40,
                                                                40))  ## WORKS
                scale = random.uniform(0.9, 1.1)
                shear = random.uniform(-3, 3)
                img = torchvision.transforms.functional.affine(img,
                                                               angle,
                                                               translate,
                                                               scale,
                                                               shear,
                                                               2,
                                                               fillcolor=(127,
                                                                          127,
                                                                          127))
                labels = affine_labels(img_height, img_width, labels, -angle,
                                       translate, scale, (-shear, 0))

        if self.bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        # random left-right flip
        if self.lr_flip:
            if random.random() > 0.5:
                img = torchvision.transforms.functional.hflip(img)
                # Is this correct?
                # Not immediately obvious, when composed with the angle shift above
                labels[:, 1] = img_width - labels[:, 1]
                labels[:, 3] = img_width - labels[:, 3]

        # GaussianBlur, needs further development
        if self.blur:
            if random.random() > 0.2:
                arr = np.asarray(img)
                angle = random.uniform(40, -40)
                sigma = random.uniform(0, 3.00)
                seq = iaa.Sequential([iaa.GaussianBlur(sigma=sigma)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #AdditiveGaussianNoise
        if self.noise:
            if random.random() > 0.3:
                arr = np.asarray(img)
                scale = random.uniform(0, 0.03 * 255)
                seq = iaa.Sequential([
                    iaa.AdditiveGaussianNoise(loc=0,
                                              scale=scale,
                                              per_channel=0.5)
                ])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #SigmoidContrast, need further development
        if self.contrast:
            if random.random() > 0.5:
                arr = np.asarray(img)
                cutoff = random.uniform(0.45, 0.75)
                gain = random.randint(5, 10)
                seq = iaa.Sequential(
                    [iaa.SigmoidContrast(gain=gain, cutoff=cutoff)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #Sharpen, need further development
        if self.sharpen:
            if random.random() > 0.3:
                arr = np.asarray(img)
                alpha = random.uniform(0, 0.5)
                seq = iaa.Sharpen(alpha=alpha)
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        if self.vis_batch and self.data_aug:
            vis_post_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_post_augmentation.jpg")
            visualize_and_save_to_local(vis_post_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")

        if self.vis_batch:
            self.vis_counter += 1
            if self.vis_counter > (self.vis_batch - 1):
                sys.exit('Finished visualizing enough images. Exiting!')

        labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
        labels[:, (1, 3)] /= self.width
        labels[:, (2, 4)] /= self.height

        img = torchvision.transforms.functional.to_tensor(img)
        labels = F.pad(labels,
                       pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                       mode="constant")
        if (labels < 0).sum() > 0:
            raise Exception(f"labels for image {img_uri} have negative values")
        return img_uri, img, labels
Exemplo n.º 13
0
    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # if hasattr(self, 'imgs'):
        #    img = self.imgs[index]  # BGR
        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path
        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(
            img, height=self.img_size)  # 将每幅图resize到img_size

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            with open(label_path, 'r') as file:
                lines = file.read().splitlines(
                )  # 每一行的内容: class x_center y_center w h 比如 4 0.43 0.36 0.06 0.24,坐标都是归一化过的
            x = np.array([x.split() for x in lines],
                         dtype=np.float32)  # x: (box_num, 5)
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (
                    x[:, 1] - x[:, 3] / 2
                ) + padw  # 因为图像resize了,所以labels中的坐标信息也要相对变化  TODO:理解的不是很透彻
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh
                print(labels)
        # Augment image and labels
        #if self.augment:
        #    img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        # TODO
        # if self.augment:
        #     # random left-right flip
        #     lr_flip = True
        #     if lr_flip and random.random() > 0.5:
        #         img = np.fliplr(img)
        #         if nL:
        #             labels[:, 1] = 1 - labels[:, 1]
        #
        #     # random up-down flip
        #     ud_flip = False
        #     if ud_flip and random.random() > 0.5:
        #         img = np.flipud(img)
        #         if nL:
        #             labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros(
            (nL, 6))  # clw note: maybe leave index 0 for batch_size dim
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 14
0
    def __getitem__(self, index):
        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        if hasattr(self, 'imgs'):  # preloaded
            img = self.imgs[index]
        else:
            img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        # Letterbox
        h, w, *_ = img.shape
        if self.pad_rectangular:
            new_shape = self.batch_shapes[self.batch[index]]
            img, ratio, padw, padh = letterbox(img,
                                               new_shape=new_shape,
                                               mode='rect')
        else:
            img, ratio, padw, padh = letterbox(img,
                                               new_shape=self.img_size,
                                               mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            # with open(label_path, 'r') as f:
            #     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
            x = self.labels[index]

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratio * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratio * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratio * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-5, 5),
                                        translate=(0.10, 0.10),
                                        scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 15
0
    def detect(self):
        frame_no = -1
        # skip_no = 2

        if self.output_file:
            f = open(output_file, "w")

        while self.vidCap.grab():
            frame_no += 1

            # skip frames every n frames
            # if frame_no % skip_no != 0:
            #     continue

            # start time
            total_begin = time.time()

            _, img = self.vidCap.retrieve()

            # yolov3部分
            yolo_begin = time.time()
            bbox_xyxy, cls_conf, cls_ids = self.yolov3.predict(img)
            # [x1,y1,x2,y2]
            yolo_end = time.time()

            # deepsort部分
            ds_begin = time.time()
            if bbox_xyxy is not None:
                bbox_cxcywh = xyxy2xywh(bbox_xyxy)

                outputs = self.deepsort.update(bbox_cxcywh, cls_conf, img)

                if len(outputs) > 0:
                    # [x1,y1,x2,y2] id
                    bbox_xyxy = outputs[:, :4]
                    ids = outputs[:, -1]
                    img = draw_bboxes(img, bbox_xyxy, ids)

                    # frame,id,tlwh,1,-1,-1,-1
                    if self.output_file:
                        bbox_tlwh = xyxy2xywh(bbox_xyxy)
                        for i in range(len(bbox_tlwh)):
                            write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
                                frame_no + 1, outputs[i, -1],
                                int(bbox_tlwh[i][0]), int(bbox_tlwh[i][1]),
                                int(bbox_tlwh[i][2]), int(bbox_tlwh[i][3]))
                            f.write(write_line)
            ds_end = time.time()

            total_end = time.time()

            if frame_no % 500 == 0:
                print("frame:%04d|det:%.4f|deep sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" % (frame_no,
                                                                                               (yolo_end - yolo_begin),
                                                                                               (ds_end - ds_begin),
                                                                                               (total_end - total_begin),
                                                                                               ((yolo_end - yolo_begin) * 100 / (
                                                                                                   total_end - total_begin)),
                                                                                               (1 / (total_end - total_begin))))

            if self.display is True:
                cv2.imshow("Test", img)
                cv2.waitKey(1)

            if self.save_path:
                self.output.write(img)

        if self.output_file:
            f.close()
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        # Load image
        img = self.imgs[index]
        if img is None:
            img = cv2.imread(img_path)  # BGR
            assert img is not None, 'File Not Found ' + img_path
            if self.n < 1001:
                self.imgs[index] = img  # cache image into memory

        # Augment colorspace
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
            V = img_hsv[:, :, 2].astype(np.float32)  # value

            a = (random.random() * 2 - 1) * fraction + 1
            b = (random.random() * 2 - 1) * fraction + 1
            S *= a
            V *= b

            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        # Letterbox
        h, w, _ = img.shape
        if self.rect:
            shape = self.batch_shapes[self.batch[index]]
            img, ratiow, ratioh, padw, padh = letterbox(img,
                                                        new_shape=shape,
                                                        mode='rect')
        else:
            shape = self.img_size
            img, ratiow, ratioh, padw, padh = letterbox(img,
                                                        new_shape=shape,
                                                        mode='square')

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()],
                                 dtype=np.float32)
                    self.labels[index] = x  # save for next time

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=(-5, 5),
                                        translate=(0.10, 0.10),
                                        scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 17
0
    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration

        ia = self.count * self.batch_size
        ib = min((self.count + 1) * self.batch_size, self.nF)

        multi_scale = False
        if multi_scale and self.augment:
            # Multi-Scale YOLO Training
            height = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
        else:
            # Fixed-Scale YOLO Training
            height = self.height

        img_all = []
        labels_all = []
        for index, files_index in enumerate(range(ia, ib)):
            img_path = self.img_files[self.shuffled_vector[files_index]]
            label_path = self.label_files[self.shuffled_vector[files_index]]

            img = cv2.imread(img_path)  # BGR
            if img is None:
                print('nooooooooooimages')
                continue

            augment_hsv = True
            if self.augment and augment_hsv:
                # SV augmentation by 50%
                fraction = 0.50
                img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                S = img_hsv[:, :, 1].astype(np.float32)
                V = img_hsv[:, :, 2].astype(np.float32)

                a = (random.random() * 2 - 1) * fraction + 1
                S *= a
                if a > 1:
                    np.clip(S, a_min=0, a_max=255, out=S)

                a = (random.random() * 2 - 1) * fraction + 1
                V *= a
                if a > 1:
                    np.clip(V, a_min=0, a_max=255, out=V)

                img_hsv[:, :, 1] = S.astype(np.uint8)
                img_hsv[:, :, 2] = V.astype(np.uint8)
                cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

            h, w, _ = img.shape
            img, ratio, padw, padh = resize_square(img,
                                                   height=height,
                                                   color=(127.5, 127.5, 127.5))

            # Load labels
            name_classes = load_classes(
                '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/new_duration.names')
            pitch_classes = load_classes(
                '/Users/jx/Desktop/jjjjjxxxx/omr_yolo3/cfg/pitch.names')
            if os.path.isfile(label_path):

                labels0 = np.loadtxt(label_path,
                                     dtype=np.float32).reshape(-1, 7)
                # Normalized xywh to pixel xyxy format
                labels = labels0.copy()
                # labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw
                #labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh
                #labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw
                #labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh
                labels[:, 1] = ratio * labels0[:, 1] + padw
                labels[:, 2] = ratio * labels0[:, 2] + padh
                labels[:, 3] = ratio * labels0[:, 3] + padw
                labels[:, 4] = ratio * labels[:, 4] + padh
                durations = []
                pitchs = []
                # for i in labels0[:,5]:
                #    if float(i)==float(1):
                #        durations.append(9)
                #    elif str(i) not in name_classes and str(i)!='0.0':
                #        durations.append(6)
                #    else:
                #        for idx,j in enumerate(name_classes):
                #            if float(i)== float(j):
                #              durations.append(idx)
                for i in labels0[:, 5]:
                    if str(int(i)) in name_classes:
                        durations.append(int(i))
                    else:
                        durations.append(10)
                ##超出音高范围或者没有音高
                for i in labels0[:, 6]:
                    if str(int(i)) in pitch_classes:
                        pitchs.append(int(i))
                    elif int(i) > 15:
                        pitchs.append(15)
                    else:
                        pitchs.append(-5)
                labels[:, 5] = durations
                labels[:, 6] = pitchs
            else:
                labels = np.array([])

            # Augment image and labels
            if self.augment:
                img, labels, M = random_affine(img,
                                               labels,
                                               degrees=(-3, 3),
                                               translate=(0.1, 0.1),
                                               scale=(0.9, 1.1))

            plotFlag = False
            if plotFlag:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 10)) if index == 0 else None
                plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1])
                plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                         labels[:, [2, 2, 4, 4, 2]].T, '.-')
                plt.axis('off')

            nL = len(labels)
            if nL > 0:
                # convert xyxy to xywh
                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height

            if self.augment:
                # random left-right flip
                lr_flip = True
                if lr_flip & (random.random() > 0.5):
                    img = np.fliplr(img)
                    if nL > 0:
                        labels[:, 1] = 1 - labels[:, 1]

                # random up-down flip
                ud_flip = False
                if ud_flip & (random.random() > 0.5):
                    img = np.flipud(img)
                    if nL > 0:
                        labels[:, 2] = 1 - labels[:, 2]

            img_all.append(img)
            labels_all.append(torch.from_numpy(labels))

        # Normalize
        assert len(img_all) != 0
        img_all = np.stack(img_all)[:, :, :, ::-1].transpose(
            0, 3, 1, 2)  # BGR to RGB and cv2 to pytorch
        img_all = np.ascontiguousarray(img_all, dtype=np.float32)
        # img_all -= self.rgb_mean
        # img_all /= self.rgb_std
        img_all /= 255.0
        return torch.from_numpy(img_all), labels_all
Exemplo n.º 18
0
    def __getitem__(self, index):
        # 在训练中为false
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)  # index是一个int类型
            shapes = None

        else:
            # Load image
            # h,w是经过调整之后的 其中有一个值等于img_size img是经过插值之后的图像(且是BGR格式) 其中一边等于img_size
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            # shape存放的height 和 width
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            # self.labels[index]表示index对应的图片中所有的gtbox []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            # 这里的xyxy是未归一化的 xywh也是未归一化的
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]
        # [batch, cls, x, y, w, h]
        labels_out = torch.zeros((nL, 6))  # nl
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        # print(img.shape)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
Exemplo n.º 19
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w, _ = img.shape

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w, _ = img.shape

            #Added code to perform warp affine to 640 X 384
            '''
            #cv2.imshow("Before padding: ", img0)
            #cv2.waitKey(0)

            inp_width = 640
            inp_height = 384

            c = np.array([1920 / 2., 1080/ 2.], dtype=np.float32)
            s = 1920

            trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])

            inp_image = cv2.warpAffine(img0, trans_input, (inp_width, inp_height), flags=cv2.INTER_LINEAR)

            #cv2.imshow('Warp Affine: ', inp_image)

            #cv2.waitKey(0)

            #img = letterbox(inp_image, new_shape=self.img_size)[0]

            img = inp_image
            '''
            #End of added warp affine

            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
                #Hard coded values
                #img, ratio, padw, padh = letterbox(img, np.array([384, 640]), mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')
            '''
            print("Ratio: ", ratio)
            print("padw: ", padw)
            print('padh: ', padh)

            cv2.imshow('output', img)

            if cv2.waitKey(0) == ord('q'):
                exit()
            '''

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment colorspace
            augment_hsv(img,
                        hgain=self.hyp['hsv_h'],
                        sgain=self.hyp['hsv_s'],
                        vgain=self.hyp['hsv_v'])

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 20
0
    def get_data(self, img_path, label_path):
        height = self.height
        width = self.width
        img = cv2.imread(img_path)  # BGR
        if img is None:
            raise ValueError('File corrupt {}'.format(img_path))
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, a_min=0, a_max=255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, a_min=0, a_max=255, out=V)

            img_hsv[:, :, 1] = S.astype(np.uint8)
            img_hsv[:, :, 2] = V.astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=height, width=width)

        # Load labels
        if os.path.isfile(label_path):
            labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)

            # Normalized xywh to pixel xyxy format
            labels = labels0.copy()
            labels[:,
                   2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
            labels[:,
                   3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
            labels[:,
                   4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
            labels[:,
                   5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
        else:
            labels = np.array([])

        # Augment image and labels
        if self.augment:
            img, labels, M = random_affine(img,
                                           labels,
                                           degrees=(-5, 5),
                                           translate=(0.10, 0.10),
                                           scale=(0.50, 1.20))

        plotFlag = False
        if plotFlag:
            import matplotlib
            matplotlib.use('Agg')
            import matplotlib.pyplot as plt
            plt.figure(figsize=(50, 50))
            plt.imshow(img[:, :, ::-1])
            plt.plot(labels[:, [1, 3, 3, 1, 1]].T,
                     labels[:, [2, 2, 4, 4, 2]].T, '.-')
            plt.axis('off')
            plt.savefig('test.jpg')
            time.sleep(10)

        nL = len(labels)
        if nL > 0:
            # convert xyxy to xywh
            labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy())  # / height
            labels[:, 2] /= width
            labels[:, 3] /= height
            labels[:, 4] /= width
            labels[:, 5] /= height
        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip & (random.random() > 0.5):
                img = np.fliplr(img)
                if nL > 0:
                    labels[:, 2] = 1 - labels[:, 2]

        img = np.ascontiguousarray(img[:, :, ::-1])  # BGR to RGB

        if self.transforms is not None:
            img = self.transforms(img)

        return img, labels, img_path, (h, w)
Exemplo n.º 21
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w, _ = img.shape

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w, _ = img.shape
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Exemplo n.º 22
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp https://arxiv.org/pdf/1710.09412.pdf
            # if random.random() < 0.5:
            #     img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
            #     r = np.random.beta(0.3, 0.3)  # mixup ratio, alpha=beta=0.3
            #     img = (img * r + img2 * (1 - r)).astype(np.uint8)
            #     labels = np.concatenate((labels, labels2), 0)

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
Exemplo n.º 23
0
    def augment_collection(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic and random.random() < 0.5:  # modify: add random to mosaic
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment:
            # Augment colorspace
            augment_hsv(img,
                        hgain=self.hyp['hsv_h'],
                        sgain=self.hyp['hsv_s'],
                        vgain=self.hyp['hsv_v'])

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            g = 1
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'] * g,
                                        translate=hyp['translate'] * g,
                                        scale=hyp['scale'] * g,
                                        shear=hyp['shear'] * g)

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False  # acitve for topview
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        # labels_out = torch.zeros((nL, 6)) # todo: add gt_score to labels_out(i.e. targets)
        labels_out = np.zeros(
            (nL, 7),
            dtype='float32')  # add gt_score to labels_out(i.e. targets)
        if nL:
            # labels_out[:, 1:] = torch.from_numpy(labels)

            labels_out[:, 1] = (labels[:, 0])  # cls
            labels_out[:, 2] = 1  # gt_score
            labels_out[:, 3:] = (labels[:, 1:])  # [x y w h]

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if norm_with_mean_std:
            img[0] = (img[0] - 0.485) / 0.229
            img[1] = (img[1] - 0.456) / 0.224
            img[2] = (img[2] - 0.406) / 0.225

        return img, labels_out, img_path, (h, w)
Exemplo n.º 24
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        hyp = self.hyp
        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]
            ratio, pad = None, None

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[
                               0]  # pad width
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[
                               1]  # pad height
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, img_path, ((h, w), (ratio,
                                                                      pad))
Exemplo n.º 25
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x is not None and x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
                #labels[:, 5:] = x[:, 5:]    ###6
                for i in range(5, 21, 2):  ###12
                    labels[:, i] = ratio[0] * w * x[:, i] + pad[0]  ###12
                    labels[:,
                           i + 1] = ratio[1] * h * x[:, i + 1] + pad[1]  ###12

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width
            for i in range(5, 21, 2):  ###12
                labels[:, i + 1] /= img.shape[0]  ###12
                labels[:, i] /= img.shape[1]  ###12

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]
                    for i in range(5, 20, 2):  ###12
                        labels[:, i] = 1 - labels[:, i]  ###12

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]
                    for i in range(6, 21, 2):  ###12
                        labels[:, i] = 1 - labels[:, i]  ###12

        labels_out = torch.zeros((nL, 6 + 16))  ###6
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
Exemplo n.º 26
0
    def __getitem__(self, index):  # 重写 Dataset父类的方法
        hyp = self.hyp
        if self.mosaic:
            # load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None
        else:
            # load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scale_up=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()  # label: class, x, y, w, h
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp["degrees"],
                                            translate=hyp["translate"],
                                            scale=hyp["scale"],
                                            shear=hyp["shear"])

            # Augment colorspace
            augment_hsv(img,
                        h_gain=hyp["hsv_h"],
                        s_gain=hyp["hsv_s"],
                        v_gain=hyp["hsv_v"])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0-1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]  # 1 - x_center

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]  # 1 - y_center

        labels_out = torch.zeros((nL, 6))  # nL: number of labels
        if nL:
            # labels_out[:, 0] = index
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert BGR to RGB, and HWC to CHW(3x512x512)
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img)

        return torch.from_numpy(
            img), labels_out, self.img_files[index], shapes, index
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        img_path = self.img_files[index]
        label_path = self.label_files[index]

        mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            h, w = img.shape[:2]

        else:
            # Load image
            img = load_image(self, index)

            # Letterbox
            h, w = img.shape[:2]
            if self.rect:
                img, ratio, padw, padh = letterbox(
                    img, self.batch_shapes[self.batch[index]], mode='rect')
            else:
                img, ratio, padw, padh = letterbox(img,
                                                   self.img_size,
                                                   mode='square')

            # Load labels
            labels = []
            if os.path.isfile(label_path):
                x = self.labels[index]
                if x is None:  # labels not preloaded
                    with open(label_path, 'r') as f:
                        x = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)

                if x.size > 0:
                    # Normalized xywh to pixel xyxy format
                    labels = x.copy()
                    labels[:,
                           1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
                    labels[:,
                           2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
                    labels[:,
                           3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
                    labels[:,
                           4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh

        if self.augment or self.augment is False:  #notice test need't augment,so the channel is bgr
            # # Augment colorspace
            # augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v'])
            ## cv2.imshow("xxxx",img)
            ## cv2.waitKey(0)
            # # Augment imagespace
            # g = 0.0 if mosaic else 1.0  # do not augment mosaics
            # hyp = self.hyp
            # img, labels = random_affine(img, labels,
            #                             degrees=hyp['degrees'] * g,
            #                             translate=hyp['translate'] * g,
            #                             scale=hyp['scale'] * g,
            #                             shear= hyp['shear'] *g)
            # Augment colorspace
            img = img[:, :, (2, 1, 0)]  #bgr(cv2) to rgb(plt)
            p = [
                iaa.Multiply([1, 2.5, 0.5, 1.5]),
                iaa.SigmoidContrast(gain=10, cutoff=[0.75, 1, 0.5]),
                # iaa.SigmoidContrast(gain=3,cutoff=[0,1],per_channel=0.9)
            ]
            sequence_iaa = iaa.Sequential([random.choice(p)])
            img = sequence_iaa.augment_image(img)

            # Augment imagespace
            g = 0.0 if mosaic else 1.0  # do not augment mosaics
            hyp = self.hyp
            img, labels = random_affine(img,
                                        labels,
                                        degrees=0,
                                        translate=0,
                                        scale=0,
                                        shear=0)

            # cv2.imshow("xxxx",img)
            # cv2.waitKey(0)

            # print(hyp['shear'] * g)
            # from matplotlib import pyplot as plt
            # plt.imshow(img)
            # plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
            # plt.show()

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        # img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = img[:, :, :].transpose(2, 0, 1)  # RGB, to 3x416x416

        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)