Пример #1
0
    def load_dataset(self,
                     csv_file,
                     root_dir=None,
                     augment=False,
                     shuffle=True,
                     batch_size=1):
        """Create a tree dataset for inference
        Csv file format is .csv file with the columns "image_path", "xmin","ymin","xmax","ymax" for the image name and bounding box position.
        Image_path is the relative filename, not absolute path, which is in the root_dir directory. One bounding box per line.

        Args:
            csv_file: path to csv file
            root_dir: directory of images. If none, uses "image_dir" in config
            augment: Whether to create a training dataset, this activates data augmentations
        Returns:
            ds: a pytorch dataset
        """

        ds = dataset.TreeDataset(
            csv_file=csv_file,
            root_dir=root_dir,
            transforms=dataset.get_transform(augment=augment),
            label_dict=self.label_dict)

        data_loader = torch.utils.data.DataLoader(
            ds,
            batch_size=batch_size,
            shuffle=shuffle,
            collate_fn=utilities.collate_fn,
            num_workers=self.config["workers"],
        )

        return data_loader
Пример #2
0
def test_single_class_with_empty(tmpdir):
    """Add fake empty annotations to test parsing """
    csv_file1 = get_data("example.csv")
    csv_file2 = get_data("OSBS_029.csv")

    df1 = pd.read_csv(csv_file1)
    df2 = pd.read_csv(csv_file2)
    df = pd.concat([df1, df2])

    df.loc[df.image_path == "OSBS_029.tif", "xmin"] = 0
    df.loc[df.image_path == "OSBS_029.tif", "ymin"] = 0
    df.loc[df.image_path == "OSBS_029.tif", "xmax"] = 0
    df.loc[df.image_path == "OSBS_029.tif", "ymax"] = 0

    df.to_csv("{}_test_empty.csv".format(tmpdir))

    root_dir = os.path.dirname(get_data("OSBS_029.png"))
    ds = dataset.TreeDataset(csv_file="{}_test_empty.csv".format(tmpdir),
                             root_dir=root_dir,
                             label_dict={"Tree": 0})
    assert len(ds) == 2
    #First image has annotations
    assert not torch.sum(ds[0][2]["boxes"]) == 0
    #Second image has no annotations
    assert torch.sum(ds[1][2]["boxes"]) == 0
Пример #3
0
def test_predict_dataloader():
    csv_file = get_data("example.csv")
    root_dir = os.path.dirname(csv_file)
    ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, train=False)
    image = next(iter(ds))
    #Assert image is channels first format
    assert image.shape[0] == 3
Пример #4
0
def run():
    csv_file = get_data("OSBS_029.csv")
    root_dir = os.path.dirname(csv_file)

    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=dataset.get_transform(augment=True))

    for x in range(1000):
        next(iter(ds))
Пример #5
0
def test_collate():
    """Due to data augmentations the dataset class may yield empty bounding box annotations"""
    csv_file = get_data("example.csv")
    root_dir = os.path.dirname(csv_file)
    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=dataset.get_transform(augment=False))

    for i in range(len(ds)):
        #Between 0 and 1
        batch = ds[i]
        collated_batch = utilities.collate_fn(batch)
        assert len(collated_batch) == 2
Пример #6
0
def test_TreeDataset_transform(augment):
    csv_file = get_data("example.csv")
    root_dir = os.path.dirname(csv_file)
    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=dataset.get_transform(augment=augment))

    for i in range(len(ds)):
        #Between 0 and 1
        path, image, targets = ds[i]
        assert image.max() <= 1
        assert image.min() >= 0
        assert targets["boxes"].shape == (79, 4)
        assert targets["labels"].shape == (79, )
Пример #7
0
def test_TreeDataset(csv_file, label_dict):
    root_dir = os.path.dirname(csv_file)
    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=None,
                             label_dict=label_dict)
    raw_data = pd.read_csv(csv_file)
    
    assert len(ds) == len(raw_data.image_path.unique())
    
    for i in range(len(ds)):
        #Between 0 and 1
        path, image, targets = ds[i]
        assert image.max() <= 1
        assert image.min() >= 0
        assert targets["boxes"].shape == (raw_data.shape[0],4)
        assert targets["labels"].shape == (raw_data.shape[0],)
        assert len(np.unique(targets["labels"])) == len(raw_data.label.unique())
Пример #8
0
    def log_images(self, pl_module):

        ds = dataset.TreeDataset(
            csv_file=self.csv_file,
            root_dir=self.root_dir,
            transforms=dataset.get_transform(augment=False),
            label_dict=pl_module.label_dict)

        if self.n > len(ds):
            self.n = len(ds)

        ds = torch.utils.data.Subset(ds, np.arange(0, self.n, 1))

        data_loader = torch.utils.data.DataLoader(
            ds, batch_size=1, shuffle=False, collate_fn=utilities.collate_fn)

        pl_module.model.eval()

        for batch in data_loader:
            paths, images, targets = batch

            if not pl_module.device.type == "cpu":
                images = [x.to(pl_module.device) for x in images]

            predictions = pl_module.model(images)

            for path, image, prediction, target in zip(paths, images,
                                                       predictions, targets):
                image = image.permute(1, 2, 0)
                image = image.cpu()
                visualize.plot_prediction_and_targets(image=image,
                                                      predictions=prediction,
                                                      targets=target,
                                                      image_name=path,
                                                      savedir=self.savedir)
                plt.close()
        try:
            saved_plots = glob.glob("{}/*.png".format(self.savedir))
            for x in saved_plots:
                pl_module.logger.experiment.log_image(x)
        except Exception as e:
            print(
                "Could not find logger in ligthning module, skipping upload, images were saved to {}, error was rasied {}"
                .format(self.savedir, e))
Пример #9
0
def test_multi_image_warning():
    tmpdir = tempfile.gettempdir()
    csv_file1 = get_data("example.csv")
    csv_file2 = get_data("OSBS_029.csv")
    df1 = pd.read_csv(csv_file1)
    df2 = pd.read_csv(csv_file2)
    df = pd.concat([df1, df2])
    csv_file = "{}/multiple.csv".format(tmpdir)
    df.to_csv(csv_file)

    root_dir = os.path.dirname(csv_file1)
    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=dataset.get_transform(augment=False))

    for i in range(len(ds)):
        #Between 0 and 1
        batch = ds[i]
        collated_batch = utilities.collate_fn([None, batch, batch])
        len(collated_batch[0]) == 2
Пример #10
0
def predict_file(model,
                 csv_file,
                 root_dir,
                 savedir,
                 device,
                 iou_threshold=0.1,
                 color=(0, 165, 255),
                 thickness=1):
    """Create a dataset and predict entire annotation file

    Csv file format is .csv file with the columns "image_path", "xmin","ymin","xmax","ymax" for the image name and bounding box position.
    Image_path is the relative filename, not absolute path, which is in the root_dir directory. One bounding box per line.
    If "label" column is present, these are assumed to be annotations and will be plotted in a different color than predictions

    Args:
        csv_file: path to csv file
        root_dir: directory of images. If none, uses "image_dir" in config
        savedir: Optional. Directory to save image plots.
        device: pytorch device of 'cuda' or 'cpu' for gpu prediction. Set internally.
        color: color of the bounding box as a tuple of BGR color, e.g. orange annotations is (0, 165, 255)
        thickness: thickness of the rectangle border line in px
    Returns:
        df: pandas dataframe with bounding boxes, label and scores for each image in the csv file
    """

    model.eval()
    df = pd.read_csv(csv_file)
    #Dataloader (when not shuffled) returns a tensor for each image in order
    paths = df.image_path.unique()
    ds = dataset.TreeDataset(csv_file=csv_file,
                             root_dir=root_dir,
                             transforms=None,
                             train=False)
    prediction_list = []
    with torch.no_grad():
        for i in ds:
            i = i.to(device)
            prediction = model(torch.unsqueeze(i, 0))
            prediction_list.append(prediction)

    prediction_list = [item for sublist in prediction_list for item in sublist]

    results = []
    for index, prediction in enumerate(prediction_list):
        #If there is more than one class, apply NMS Loop through images and apply cross
        prediction = visualize.format_boxes(prediction)
        if len(prediction.label.unique()) > 1:
            prediction = across_class_nms(prediction,
                                          iou_threshold=iou_threshold)

        if savedir:
            # Just predict the images, even though we have the annotations
            image = np.array(Image.open("{}/{}".format(
                root_dir, paths[index])))[:, :, ::-1]
            image = visualize.plot_predictions(image, prediction)

            #Plot annotations if they exist
            annotations = df[df.image_path == paths[index]]

            image = visualize.plot_predictions(image,
                                               annotations,
                                               color=color,
                                               thickness=thickness)
            cv2.imwrite(
                "{}/{}.png".format(savedir,
                                   os.path.splitext(paths[index])[0]), image)

        prediction["image_path"] = paths[index]
        results.append(prediction)

    results = pd.concat(results, ignore_index=True)

    return results