Python tensor_from_rgb_image示例，pytorch_toolbelt.utils.torch_utils.tensor_from_rgb_image Python示例

示例#1

0

显示文件

文件： catalyst_utils.py 项目： wangg12/pytorch-toolbelt

    def on_loader_end(self, state: RunnerState) -> None:
        logger = _get_tensorboard_logger(state)

        if self.best_score is not None:
            best_samples = self.visualize_batch(self.best_input,
                                                self.best_output)
            for i, image in enumerate(best_samples):
                logger.add_image(f"Best Batch/{i}/epoch",
                                 tensor_from_rgb_image(image), state.step)

        if self.worst_score is not None:
            worst_samples = self.visualize_batch(self.worst_input,
                                                 self.worst_output)
            for i, image in enumerate(worst_samples):
                logger.add_image(f"Worst Batch/{i}/epoch",
                                 tensor_from_rgb_image(image), state.step)

示例#2

0

显示文件

    def __getitem__(self, idx):
        idx = idx % len(self.samples)

        image_path, mask_path = self.samples[idx]
        image_id = image_path.stem

        image = load_rgb(image_path, lib=self.imread_lib)
        mask = load_grayscale(mask_path)

        # apply augmentations
        sample = self.transform(image=image, mask=mask)
        image, mask = sample["image"], sample["mask"]

        if self.downsample_mask_factor is not None and self.downsample_mask_factor != 1:
            mask_height, mask_width = mask.shape[:2]

            new_mask_height = mask_height // self.downsample_mask_factor
            new_mask_width = mask_width // self.downsample_mask_factor

            mask = cv2.resize(mask, (new_mask_width, new_mask_height),
                              interpolation=cv2.INTER_NEAREST)

        if self.class_id is not None:
            mask = torch.unsqueeze(torch.from_numpy(mask == self.class_id),
                                   0).float()
        else:
            mask = torch.from_numpy(mask).long()

        return {
            "image_id": image_id,
            "features": tensor_from_rgb_image(image),
            "targets": mask
        }

示例#3

0

显示文件

文件： inference.py 项目： zyg11/retinaface-1

    def __getitem__(self, idx):
        image_path = self.file_paths[idx]
        raw_image = load_rgb(image_path, lib="cv2")
        image = raw_image.astype(np.float32)

        if self.origin_size:
            resize = 1
        else:
            # testing scale
            im_shape = image.shape
            image_size_min = np.min(im_shape[:2])
            image_size_max = np.max(im_shape[:2])
            resize = float(self.target_size) / float(image_size_min)
            # prevent bigger axis from being more than max_size:
            if np.round(resize * image_size_max) > self.max_size:
                resize = float(self.max_size) / float(image_size_max)

            image = cv2.resize(image,
                               None,
                               None,
                               fx=resize,
                               fy=resize,
                               interpolation=cv2.INTER_LINEAR)

        image = self.transform(image=image)["image"]

        return {
            "torched_image": tensor_from_rgb_image(image),
            "resize": resize,
            "raw_image": raw_image,
            "image_path": str(image_path),
        }

示例#4

0

显示文件

文件： inference.py 项目： heorhii-bolotov/wheatDet

    def __getitem__(self, idx: int) -> Optional[Dict[str, Any]]:
        image_path = self.file_paths[idx]

        image = cv2.imread(str(image_path))
        if image is None:
            return None

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image_height, image_width = image.shape[:2]

        image = self.resize(image=image)["image"]

        paded = pad_to_size(target_size=(self.max_size, self.max_size),
                            image=image)

        image = paded["image"]
        pads = paded["pads"]

        image = self.transform(image=image)["image"]

        return {
            "torched_image": tensor_from_rgb_image(image),
            "image_path": str(image_path),
            "pads": np.array(pads),
            "image_height": image_height,
            "image_width": image_width,
        }

示例#5

0

显示文件

def test_tiles_split_merge_cuda():
    if not torch.cuda.is_available():
        return

    class MaxChannelIntensity(nn.Module):
        def __init__(self):
            super().__init__()

        def forward(self, input):
            max_channel, _ = torch.max(input, dim=1, keepdim=True)
            return max_channel

    image = np.random.random((5000, 5000, 3)).astype(np.uint8)
    tiler = ImageSlicer(image.shape,
                        tile_size=(512, 512),
                        tile_step=(256, 256),
                        weight='pyramid')
    tiles = [tensor_from_rgb_image(tile) for tile in tiler.split(image)]

    model = MaxChannelIntensity().eval().cuda()

    merger = CudaTileMerger(tiler.target_shape, 1, tiler.weight)
    for tiles_batch, coords_batch in DataLoader(list(zip(tiles, tiler.crops)),
                                                batch_size=8,
                                                pin_memory=True):
        tiles_batch = tiles_batch.float().cuda()
        pred_batch = model(tiles_batch)

        merger.integrate_batch(pred_batch, coords_batch)

    merged = np.moveaxis(to_numpy(merger.merge()), 0, -1).astype(np.uint8)
    merged = tiler.crop_to_orignal_size(merged)

    np.testing.assert_equal(merged, image.max(axis=2, keepdims=True))

示例#6

0

显示文件

    def __call__(self, force_apply=True, **kwargs):
        kwargs.update({'image': tensor_from_rgb_image(kwargs['image'])})
        if 'mask' in kwargs.keys():
            kwargs.update(
                {'mask': tensor_from_mask_image(kwargs['mask'].float())})

        return kwargs

示例#7

0

显示文件

def inference_tiles(inference_model,
                    img_full,
                    device='cuda',
                    shape=(32, 1, 768, 448),
                    weight='mean',
                    mean=88.904434,
                    std=62.048634,
                    plot=False):
    bs = shape[0]
    input_x = shape[2]
    input_y = shape[3]

    # Cut large image into overlapping tiles
    tiler = ImageSlicer(img_full.shape,
                        tile_size=(input_x, input_y),
                        tile_step=(input_x // 2, input_y // 2),
                        weight=weight)

    # HCW -> CHW. Optionally, do normalization here
    tiles = [
        tensor_from_rgb_image(tile)
        for tile in tiler.split(cv2.cvtColor(img_full, cv2.COLOR_GRAY2RGB))
    ]

    # Allocate a CUDA buffer for holding entire mask
    merger = CudaTileMerger(tiler.target_shape,
                            channels=1,
                            weight=tiler.weight)

    # Run predictions for tiles and accumulate them
    for tiles_batch, coords_batch in DataLoader(list(zip(tiles, tiler.crops)),
                                                batch_size=bs,
                                                pin_memory=True):

        # Move tile to GPU
        tiles_batch = ((tiles_batch.float() - mean) / std).to(device)

        # Predict
        pred_batch = inference_model(tiles_batch)

        # Merge on GPU
        merger.integrate_batch(pred_batch, coords_batch)

        if plot:
            for i in range(pred_batch.to('cpu').numpy().shape[0]):
                plt.imshow(tiles_batch.to('cpu').numpy()[i, 0, :, :])
                plt.show()
                plt.imshow(pred_batch.to('cpu').numpy()[i, 0, :, :])
                plt.colorbar()
                plt.show()

    # Normalize accumulated mask and convert back to numpy
    merged_mask = np.moveaxis(to_numpy(merger.merge()), 0,
                              -1).astype('float32')
    merged_mask = tiler.crop_to_orignal_size(merged_mask)

    torch.cuda.empty_cache()

    return merged_mask.squeeze()

示例#8

0

显示文件

def predict_mask(image,
                 model,
                 dims=3,
                 size=394,
                 step=192,
                 batch_size=8,
                 plot_image=False,
                 dstdir=None,
                 img_name='image1.png'):
    if image.ndim == 2:
        image = np.expand_dims(image, 2)
    if image.shape[-1] != dims:
        if image.shape[-1] == 1:
            image = np.repeat(image, 3, axis=2)
        elif image.shape[-1] == 3:
            image = np.expand_dims(image[:, :, 0], 2)
    print(image.shape)

    # Cut large image into overlapping tiles
    tiler = ImageSlicer(image.shape,
                        tile_size=(size, size),
                        tile_step=(step, step),
                        weight='pyramid')

    # HCW -> CHW. Optionally, do normalization here
    tiles = [tensor_from_rgb_image(tile) for tile in tiler.split(image)]

    # Allocate a CUDA buffer for holding entire mask
    merger = CudaTileMerger(tiler.target_shape, 1, tiler.weight)

    # Run predictions for tiles and accumulate them
    with torch.no_grad():
        for tiles_batch, coords_batch in DataLoader(list(
                zip(tiles, tiler.crops)),
                                                    batch_size=batch_size,
                                                    pin_memory=True):
            #         print(tiles_batch.shape)
            tiles_batch = tiles_batch.float().cuda()
            pred_batch = model(tiles_batch)
            pred_mask = pred_batch.max(dim=1)[1].float()

            merger.integrate_batch(pred_mask, coords_batch)

    # Normalize accumulated mask and convert back to numpy
    merged_mask = np.moveaxis(to_numpy(merger.merge()), 0, -1).astype(np.uint8)
    merged_mask = tiler.crop_to_orignal_size(merged_mask)

    if plot_image:
        assert dstdir is not None, 'dstdir should be passed'
        fig, ax = plt.subplots(ncols=2, figsize=(20, 10))
        ax[0].imshow(image[:, :, 0], cmap='gray')
        ax[1].imshow(merged_mask[:, :, 0], alpha=0.3)
        fig.savefig(osp.join(dstdir, img_name),
                    bbox_inches='tight',
                    pad_inches=0)
        print(osp.join(dstdir, img_name))
    return merged_mask

示例#9

0

显示文件

文件： dataset_utils.py 项目： wangg12/pytorch-toolbelt

    def __getitem__(self, index):
        image = self._get_image(index)
        mask = self._get_mask(index)
        data = self.transform(image=image, mask=mask)

        return {
            'features': tensor_from_rgb_image(data['image']),
            'targets': tensor_from_mask_image(data['mask']).float(),
            'image_id': self.image_ids[index]
        }

示例#10

0

显示文件

文件： dataset_utils.py 项目： zenonnp/pytorch-toolbelt

    def __getitem__(self, index):
        image = self._get_image(index)
        mask = self._get_mask(index)
        data = self.transform(image=image, mask=mask)

        return {
            "features": tensor_from_rgb_image(data["image"]),
            "targets": tensor_from_mask_image(data["mask"]).float(),
            "image_id": self.image_ids[index],
        }

示例#11

0

显示文件

    def __getitem__(self, index):
        image = cv2.imread(self.images[index], cv2.IMREAD_COLOR)

        data = self.transform(image=image)
        data['mask'] = canny_edges(data['image'])
        data = self.normalize(**data)
        data['image'] = tensor_from_rgb_image(data['image'])
        data['mask'] = torch.from_numpy(data['mask']).float().unsqueeze(0)

        return {'features': data['image'], 'targets': data['mask']}

示例#12

0

显示文件

    def __getitem__(self, index: int) -> Dict[str, Any]:
        labels = self.labels[index]
        file_name = labels["file_name"]
        image = load_rgb(self.image_path / file_name)

        # annotations will have the format
        # 4: box, 10 landmarks, 1: landmarks / no landmarks, 1: mask / no_mask

        annotations = np.zeros((0, 16))

        for label in labels["annotations"]:
            annotation = np.zeros((1, 16))
            # bbox
            annotation[0, 0] = label["x_min"]
            annotation[0, 1] = label["y_min"]
            annotation[0, 2] = label["x_min"] + label["width"]
            annotation[0, 3] = label["y_min"] + label["height"]

            if label["landmarks"]:
                landmarks = np.array(label["landmarks"])
                # landmarks
                annotation[0, 4:14] = landmarks[self.valid_annotation_indices]
                if annotation[0, 4] < 0:
                    annotation[0, 14] = -1
                else:
                    annotation[0, 14] = 1

            if "dlib_landmarks" in label and self.add_mask_prob is not None and random.random() < self.add_mask_prob:
                points = label["dlib_landmarks"]
                target_points, _, _ = extract_target_points_and_characteristic(np.array(points).astype(np.int32))
                image = cv2.fillPoly(image, [target_points], color=random_color())
                annotation[0, 15] = 1
            else:
                annotation[0, 15] = 0

            annotations = np.append(annotations, annotation, axis=0)

        target = np.array(annotations)

        image, target = self.preproc(image, target)

        image = albu.Compose(
            [
                albu.RandomBrightnessContrast(brightness_limit=0.125, contrast_limit=(0.5, 1.5), p=0.5),
                albu.HueSaturationValue(hue_shift_limit=18, val_shift_limit=0, p=0.5),
                albu.Resize(height=self.image_size, width=self.image_size, p=1),
                albu.Normalize(p=1),
            ]
        )(image=image)["image"]

        return {
            "image": tensor_from_rgb_image(image),
            "annotation": target.astype(np.float32),
            "file_name": file_name,
        }

示例#13

0

显示文件

文件： test_inference.py 项目： yangsenwxy/Kaggle-2019-Blindness-Detection

def test_inference():
    model_checkpoint = '../pretrained/seresnext50_gap_512_medium_aptos2019_idrid_fold0_hopeful_easley.pth'
    checkpoint = torch.load(model_checkpoint)
    model_name = checkpoint['checkpoint_data']['cmd_args']['model']

    num_classes = len(get_class_names())
    model = get_model(model_name, pretrained=False, num_classes=num_classes)
    model.load_state_dict(checkpoint['model_state_dict'])

    for image_fname in [
            # '4_left.png',
            # '35_left.png',
            '44_right.png',
            '68_right.png',
            # '92_left.png'
    ]:
        transform = get_test_transform(image_size=(512, 512), crop_black=True)

        image = cv2.imread(image_fname)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        image_transformed = transform(image=image)['image']
        image_transformed = tensor_from_rgb_image(image_transformed).unsqueeze(
            0)

        with torch.no_grad():
            model = model.eval().cuda()
            predictions = model(image_transformed.cuda())
            print(predictions['logits'].softmax(dim=1))
            print(predictions['regression'])

        add_mild_dr = AddMicroaneurisms(p=1)
        data = add_mild_dr(image=image, diagnosis=0)
        image_transformed = transform(image=data['image'])['image']
        image_transformed = tensor_from_rgb_image(image_transformed).unsqueeze(
            0)

        with torch.no_grad():
            model = model.eval().cuda()
            predictions = model(image_transformed.cuda())
            print(predictions['logits'].softmax(dim=1))
            print(predictions['regression'])

示例#14

0

显示文件

文件： catalyst_utils.py 项目： wangg12/pytorch-toolbelt

 def _log_image(self,
                loggers,
                mode: str,
                image,
                name,
                step: int,
                suffix=""):
     for logger in loggers:
         if isinstance(logger, TensorboardLogger):
             logger.loggers[mode].add_image(f"{name}{suffix}",
                                            tensor_from_rgb_image(image),
                                            step)

示例#15

0

显示文件

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]

        image = load_rgb(image_path, lib=self.imread_library)

        # apply transformations
        normalized_image = self.transform(image=image)["image"]

        if self.factor is not None:
            normalized_image, pads = pad(normalized_image, factor=self.factor)

            return {
                "image_id": image_path.stem,
                "features": tensor_from_rgb_image(normalized_image),
                "pads": np.array(pads),
            }

        return {
            "image_id": image_path.stem,
            "features": tensor_from_rgb_image(normalized_image)
        }

示例#16

0

显示文件

文件： visualization.py 项目： ternaus/pytorch-toolbelt

    def _log_samples(self, samples, name, logger, step):
        if 'tensorboard' in self.targets:
            for i, image in enumerate(samples):
                logger.add_image(f"{self.target_metric}/{name}/{i}", tensor_from_rgb_image(image), step)

        if 'matplotlib' in self.targets:
            for i, image in enumerate(samples):
                plt.figure()
                plt.imshow(image)
                plt.tight_layout()
                plt.axis('off')
                plt.show()

示例#17

0

显示文件

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        image_path = self.samples[idx]

        image = load_rgb(image_path, lib="cv2")

        # apply augmentations
        image = self.transform(image=image)["image"]

        return {
            "image_id": image_path.stem,
            "image": tensor_from_rgb_image(image),
            "image_path": str(image_path)
        }

示例#18

0

显示文件

def inference(inference_model, img_full, device='cuda'):
    x, y, ch = img_full.shape

    input_x = config['training']['crop_size'][0]
    input_y = config['training']['crop_size'][1]

    # Cut large image into overlapping tiles
    tiler = ImageSlicer(img_full.shape, tile_size=(input_x, input_y),
                        tile_step=(input_x // 2, input_y // 2), weight=args.weight)

    # HCW -> CHW. Optionally, do normalization here
    tiles = [tensor_from_rgb_image(tile) for tile in tiler.split(img_full)]

    # Allocate a CUDA buffer for holding entire mask
    merger = CudaTileMerger(tiler.target_shape, channels=1, weight=tiler.weight)

    # Run predictions for tiles and accumulate them
    for tiles_batch, coords_batch in DataLoader(list(zip(tiles, tiler.crops)), batch_size=args.bs, pin_memory=True):
        # Move tile to GPU
        tiles_batch = (tiles_batch.float() / 255.).to(device)
        # Predict and move back to CPU
        pred_batch = inference_model(tiles_batch)

        # Merge on GPU
        merger.integrate_batch(pred_batch, coords_batch)

        # Plot
        if args.plot:
            for i in range(args.bs):
                if args.bs != 1:
                    plt.imshow(pred_batch.cpu().detach().numpy().astype('float32').squeeze()[i, :, :])
                else:
                    plt.imshow(pred_batch.cpu().detach().numpy().astype('float32').squeeze())
                plt.show()

    # Normalize accumulated mask and convert back to numpy
    merged_mask = np.moveaxis(to_numpy(merger.merge()), 0, -1).astype('float32')
    merged_mask = tiler.crop_to_orignal_size(merged_mask)
    # Plot
    if args.plot:
        for i in range(args.bs):
            if args.bs != 1:
                plt.imshow(merged_mask)
            else:
                plt.imshow(merged_mask.squeeze())
            plt.show()

    torch.cuda.empty_cache()
    gc.collect()

    return merged_mask.squeeze()

示例#19

0

显示文件

    def __getitem__(self, item):
        image = cv2.imread(self.images[item])  # Read with OpenCV instead PIL. It's faster
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        height, width = image.shape[:2]

        original = self.normalize(image=image)['image']
        transformed = self.transform(image=image)['image']

        data = {'image': tensor_from_rgb_image(transformed),
                'original': tensor_from_rgb_image(original),
                'image_id': id_from_fname(self.images[item])}

        if self.meta_features:
            log_height = math.log(height)
            log_width = math.log(width)
            aspect_ratio = log_height / log_width
            mean = np.mean(image, axis=(0, 1))

            meta_features = np.array([
                log_height,
                log_width,
                aspect_ratio,
                mean[0],
                mean[1],
                mean[2]
            ])

            data['meta_features'] = meta_features

        if self.targets is not None:
            target = self.dtype(self.targets[item])
            if self.target_as_array:
                data['targets'] = np.array([target])
            else:
                data['targets'] = target

        return data

示例#20

0

显示文件

    def __getitem__(self, index: int) -> Dict[str, Any]:
        labels = self.labels[index]

        file_name = labels["file_name"]

        if self.image_path is None:
            image = load_rgb(labels["file_path"])
        else:
            image = load_rgb(self.image_path / file_name)

        # annotations will have the format
        # 4: box, 10 landmarks, 1: landmarks / no landmarks
        num_annotations = 4 + 10 + 1
        annotations = np.zeros((0, num_annotations))

        image_height, image_width = image.shape[:2]

        for label in labels["annotations"]:
            annotation = np.zeros((1, num_annotations))
            x_min, y_min, x_max, y_max = label["bbox"]

            annotation[0, 0] = np.clip(x_min, 0, image_width - 1)
            annotation[0, 1] = np.clip(y_min, 0, image_height - 1)
            annotation[0, 2] = np.clip(x_max, x_min + 1, image_width - 1)
            annotation[0, 3] = np.clip(y_max, y_min + 1, image_height - 1)

            if "landmarks" in label and label["landmarks"]:
                landmarks = np.array(label["landmarks"])
                # landmarks
                annotation[0, 4:14] = landmarks.reshape(-1, 10)
                if annotation[0, 4] < 0:
                    annotation[0, 14] = -1
                else:
                    annotation[0, 14] = 1

            annotations = np.append(annotations, annotation, axis=0)

        if self.rotate90:
            image, annotations = random_rotate_90(image,
                                                  annotations.astype(int))

        image, annotations = self.preproc(image, annotations)

        image = self.transform(image=image)["image"]

        return {
            "image": tensor_from_rgb_image(image),
            "annotation": annotations.astype(np.float32),
            "file_name": file_name,
        }

示例#21

0

显示文件

文件： dataloaders.py 项目： dimitrius-ion/check_orientation

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        idx = idx % len(self.image_paths)

        image_path = self.image_paths[idx]

        image = load_rgb(image_path, lib="cv2")

        # apply augmentations
        image = self.transform(image=image)["image"]

        orientation = random.randint(0, 3)
        image = np.rot90(image, orientation)

        return {"image_id": image_path.stem, "features": tensor_from_rgb_image(image), "targets": orientation}

示例#22

0

显示文件

文件： visualization.py 项目： wangg12/pytorch-toolbelt

def render_figure_to_tensor(figure):
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    figure.canvas.draw()

    # string = figure.canvas.tostring_argb()

    image = np.array(figure.canvas.renderer._renderer)
    plt.close(figure)
    del figure

    image = tensor_from_rgb_image(image)
    return image

示例#23

0

显示文件

    def __getitem__(self, i):
        # read data
        image = fs.read_rgb_image(self.images_fps[i])
        mask = fs.read_image_as_is(self.masks_fps[i])
        assert mask.max() < len(CLASSES)

        # apply augmentations
        sample = self.transform(image=image, mask=mask)
        image, mask = sample['image'], sample['mask']

        return {
            "image_id": id_from_fname(self.images_fps[i]),
            "features": tensor_from_rgb_image(image),
            "targets": torch.from_numpy(mask).long()
        }

示例#24

0

显示文件

文件： dataset.py 项目： zyg11/retinaface-1

    def __getitem__(self, index: int) -> Dict[str, Any]:
        labels = self.labels[index]
        file_name = labels["file_name"]
        image = load_rgb(self.image_path / file_name)

        # annotations will have the format
        # 4: box, 10 landmarks, 1: landmarks / no landmarks
        num_annotations = 4 + 10 + 1
        annotations = np.zeros((0, num_annotations))

        image_height, image_width = image.shape[:2]

        for label in labels["annotations"]:
            annotation = np.zeros((1, num_annotations))
            # bbox

            annotation[0, 0] = np.clip(label["x_min"], 0, image_width - 1)
            annotation[0, 1] = np.clip(label["y_min"], 0, image_height - 1)
            annotation[0, 2] = np.clip(label["x_min"] + label["width"], 1,
                                       image_width - 1)
            annotation[0, 3] = np.clip(label["y_min"] + label["height"], 1,
                                       image_height - 1)

            if not 0 <= annotation[0, 0] < annotation[0, 2] < image_width:
                continue
            if not 0 <= annotation[0, 1] < annotation[0, 3] < image_height:
                continue

            if "landmarks" in label and label["landmarks"]:
                landmarks = np.array(label["landmarks"])
                # landmarks
                annotation[0, 4:14] = landmarks[self.valid_annotation_indices]
                if annotation[0, 4] < 0:
                    annotation[0, 14] = -1
                else:
                    annotation[0, 14] = 1

            annotations = np.append(annotations, annotation, axis=0)

        image, target = self.preproc(image, annotations)

        image = self.transform(image=image)["image"]

        return {
            "image": tensor_from_rgb_image(image),
            "annotation": target.astype(np.float32),
            "file_name": file_name,
        }

示例#25

0

显示文件

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        
        image_path, mask_path = self.samples[idx]
        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path)
        
        sample = self.transform(image = image, mask = mask)
        
        image, mask = sample["image"], sample["mask"]

        mask = torch.from_numpy(mask)
        
        return {
            "image_id" : Path(image_path).stem, 
            "features" : tensor_from_rgb_image(image), 
            "masks" :   mask, 
            }

示例#26

0

显示文件

文件： utils.py 项目： ivashnyov/Severstal-Steel-Defect-Detection

def run_validation(data_df, model, data_folder, augmentation, tiles=False):
    total_dice_coeffs = []
    mean_dice_per_image = []
    for image_n in tqdm(range(data_df.shape[0])):
        image = cv2.imread(
            os.path.join(data_folder, data_df.index.values[image_n]))
        augmented = augmentation(image=image)
        image_processed = augmented['image']
        if tiles:
            tiler = ImageSlicer(image_processed.shape[:2],
                                tile_size=(224, 224),
                                tile_step=(56, 56),
                                weight='mean')
            merger = CudaTileMerger(tiler.target_shape, 4, tiler.weight)
            tiles = [
                tensor_from_rgb_image(tile)
                for tile in tiler.split(image_processed)
            ]
            for tiles_batch, coords_batch in DataLoader(list(
                    zip(tiles, tiler.crops)),
                                                        batch_size=16,
                                                        pin_memory=True):
                tiles_batch = tiles_batch.float().cuda()
                pred_batch = torch.nn.Sigmoid()(model(tiles_batch))
                merger.integrate_batch(pred_batch, coords_batch)
            predictions = np.moveaxis(to_numpy(merger.merge()), 0, -1)
            predictions = tiler.crop_to_orignal_size(predictions)
        else:
            image_processed = torch.from_numpy(
                np.expand_dims(image_processed.transpose((2, 0, 1)),
                               0)).float()
            predictions = torch.nn.Sigmoid()(model(
                image_processed.cuda())[0]).detach().cpu().numpy()
            predictions = np.moveaxis(predictions, 0, -1)
        predictions_bin = (predictions > 0.5).astype(int)
        fname, masks = make_mask(image_n, data_df)
        dices_image = []
        for defect_type in range(4):
            computed_dice = dice(masks[:, :, defect_type],
                                 predictions_bin[:, :, defect_type])
            total_dice_coeffs.append(computed_dice)
            dices_image.append(computed_dice)
        mean_dice_per_image.append(np.mean(dices_image))
    return np.mean(total_dice_coeffs), mean_dice_per_image

示例#27

0

显示文件

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        image_path, mask_path = self.samples[idx]

        image = load_rgb(image_path)
        mask = load_grayscale(mask_path)

        # apply augmentations
        sample = self.transform(image=image, mask=mask)
        image, mask = sample["image"], sample["mask"]

        mask = (mask > 0).astype(np.uint8)

        mask = torch.from_numpy(mask)

        return {
            "image_id": image_path.stem,
            "features": tensor_from_rgb_image(image),
            "masks": torch.unsqueeze(mask, 0).float(),
        }

示例#28

0

显示文件

    def __getitem__(self, index):
        image = self._get_image(index)
        mask = self._get_mask(index)
        data = self.transform(image=image, mask=mask)

        image = data["image"]
        mask = data["mask"]

        data = {
            INPUT_IMAGE_KEY: tensor_from_rgb_image(image),
            INPUT_MASK_KEY: tensor_from_mask_image(mask).float(),
            INPUT_IMAGE_ID_KEY: self.image_ids[index],
            "crop_coords": self.crop_coords_str[index],
        }

        if self.need_weight_mask:
            data[INPUT_MASK_WEIGHT_KEY] = tensor_from_mask_image(
                compute_weight_mask(mask)).float()

        return data

示例#29

0

显示文件

文件： detect_and_crop_on_videos.py 项目： ternaus/Pytorch_Retinaface

def prepare_frames(frames: np.array, resize_coeff: Optional[Tuple[int, int]],
                   transform: albu.Compose) -> Tuple[torch.tensor, float]:
    if resize_coeff is not None:
        target_size = min(resize_coeff)
        max_size = max(resize_coeff)

        image_height = frames.shape[1]
        image_width = frames.shape[2]

        image_size_min = min([image_width, image_height])
        image_size_max = max([image_width, image_height])

        resize_factor = float(target_size) / float(image_size_min)
        if np.round(resize_factor * image_size_max) > max_size:
            resize_factor = float(max_size) / float(image_size_max)
    else:
        resize_factor = 1

    result: List[torch.tensor] = []

    for frame in frames:
        if resize_coeff is not None and resize_factor != 1:
            frame = cv2.resize(frame,
                               None,
                               None,
                               fx=resize_factor,
                               fy=resize_factor,
                               interpolation=cv2.INTER_LINEAR)

        new_frame = transform(image=frame)["image"]

        result += [tensor_from_rgb_image(new_frame)]

    if len(result) != 1:
        result = torch.stack(result)
    else:
        result = torch.unsqueeze(result[0], 0)

    return result, resize_factor

示例#30

0

显示文件

def test_tiles_split_merge_non_dividable_cuda():
    image = np.random.random((5632, 5120, 3)).astype(np.uint8)
    tiler = ImageSlicer(image.shape,
                        tile_size=(1280, 1280),
                        tile_step=(1280, 1280),
                        weight='mean')
    tiles = tiler.split(image)

    merger = CudaTileMerger(tiler.target_shape,
                            channels=image.shape[2],
                            weight=tiler.weight)
    for tile, coordinates in zip(tiles, tiler.crops):
        # Integrate as batch of size 1
        merger.integrate_batch(
            tensor_from_rgb_image(tile).unsqueeze(0).float().cuda(),
            [coordinates])

    merged = merger.merge()
    merged = rgb_image_from_tensor(merged, mean=0, std=1, max_pixel_value=1)
    merged = tiler.crop_to_orignal_size(merged)

    np.testing.assert_equal(merged, image)