def load_dataset(self, csv_file, root_dir=None, augment=False, shuffle=True, batch_size=1): """Create a tree dataset for inference Csv file format is .csv file with the columns "image_path", "xmin","ymin","xmax","ymax" for the image name and bounding box position. Image_path is the relative filename, not absolute path, which is in the root_dir directory. One bounding box per line. Args: csv_file: path to csv file root_dir: directory of images. If none, uses "image_dir" in config augment: Whether to create a training dataset, this activates data augmentations Returns: ds: a pytorch dataset """ ds = dataset.TreeDataset( csv_file=csv_file, root_dir=root_dir, transforms=dataset.get_transform(augment=augment), label_dict=self.label_dict) data_loader = torch.utils.data.DataLoader( ds, batch_size=batch_size, shuffle=shuffle, collate_fn=utilities.collate_fn, num_workers=self.config["workers"], ) return data_loader
def test_single_class_with_empty(tmpdir): """Add fake empty annotations to test parsing """ csv_file1 = get_data("example.csv") csv_file2 = get_data("OSBS_029.csv") df1 = pd.read_csv(csv_file1) df2 = pd.read_csv(csv_file2) df = pd.concat([df1, df2]) df.loc[df.image_path == "OSBS_029.tif", "xmin"] = 0 df.loc[df.image_path == "OSBS_029.tif", "ymin"] = 0 df.loc[df.image_path == "OSBS_029.tif", "xmax"] = 0 df.loc[df.image_path == "OSBS_029.tif", "ymax"] = 0 df.to_csv("{}_test_empty.csv".format(tmpdir)) root_dir = os.path.dirname(get_data("OSBS_029.png")) ds = dataset.TreeDataset(csv_file="{}_test_empty.csv".format(tmpdir), root_dir=root_dir, label_dict={"Tree": 0}) assert len(ds) == 2 #First image has annotations assert not torch.sum(ds[0][2]["boxes"]) == 0 #Second image has no annotations assert torch.sum(ds[1][2]["boxes"]) == 0
def test_predict_dataloader(): csv_file = get_data("example.csv") root_dir = os.path.dirname(csv_file) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, train=False) image = next(iter(ds)) #Assert image is channels first format assert image.shape[0] == 3
def run(): csv_file = get_data("OSBS_029.csv") root_dir = os.path.dirname(csv_file) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=dataset.get_transform(augment=True)) for x in range(1000): next(iter(ds))
def test_collate(): """Due to data augmentations the dataset class may yield empty bounding box annotations""" csv_file = get_data("example.csv") root_dir = os.path.dirname(csv_file) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=dataset.get_transform(augment=False)) for i in range(len(ds)): #Between 0 and 1 batch = ds[i] collated_batch = utilities.collate_fn(batch) assert len(collated_batch) == 2
def test_TreeDataset_transform(augment): csv_file = get_data("example.csv") root_dir = os.path.dirname(csv_file) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=dataset.get_transform(augment=augment)) for i in range(len(ds)): #Between 0 and 1 path, image, targets = ds[i] assert image.max() <= 1 assert image.min() >= 0 assert targets["boxes"].shape == (79, 4) assert targets["labels"].shape == (79, )
def test_TreeDataset(csv_file, label_dict): root_dir = os.path.dirname(csv_file) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=None, label_dict=label_dict) raw_data = pd.read_csv(csv_file) assert len(ds) == len(raw_data.image_path.unique()) for i in range(len(ds)): #Between 0 and 1 path, image, targets = ds[i] assert image.max() <= 1 assert image.min() >= 0 assert targets["boxes"].shape == (raw_data.shape[0],4) assert targets["labels"].shape == (raw_data.shape[0],) assert len(np.unique(targets["labels"])) == len(raw_data.label.unique())
def log_images(self, pl_module): ds = dataset.TreeDataset( csv_file=self.csv_file, root_dir=self.root_dir, transforms=dataset.get_transform(augment=False), label_dict=pl_module.label_dict) if self.n > len(ds): self.n = len(ds) ds = torch.utils.data.Subset(ds, np.arange(0, self.n, 1)) data_loader = torch.utils.data.DataLoader( ds, batch_size=1, shuffle=False, collate_fn=utilities.collate_fn) pl_module.model.eval() for batch in data_loader: paths, images, targets = batch if not pl_module.device.type == "cpu": images = [x.to(pl_module.device) for x in images] predictions = pl_module.model(images) for path, image, prediction, target in zip(paths, images, predictions, targets): image = image.permute(1, 2, 0) image = image.cpu() visualize.plot_prediction_and_targets(image=image, predictions=prediction, targets=target, image_name=path, savedir=self.savedir) plt.close() try: saved_plots = glob.glob("{}/*.png".format(self.savedir)) for x in saved_plots: pl_module.logger.experiment.log_image(x) except Exception as e: print( "Could not find logger in ligthning module, skipping upload, images were saved to {}, error was rasied {}" .format(self.savedir, e))
def test_multi_image_warning(): tmpdir = tempfile.gettempdir() csv_file1 = get_data("example.csv") csv_file2 = get_data("OSBS_029.csv") df1 = pd.read_csv(csv_file1) df2 = pd.read_csv(csv_file2) df = pd.concat([df1, df2]) csv_file = "{}/multiple.csv".format(tmpdir) df.to_csv(csv_file) root_dir = os.path.dirname(csv_file1) ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=dataset.get_transform(augment=False)) for i in range(len(ds)): #Between 0 and 1 batch = ds[i] collated_batch = utilities.collate_fn([None, batch, batch]) len(collated_batch[0]) == 2
def predict_file(model, csv_file, root_dir, savedir, device, iou_threshold=0.1, color=(0, 165, 255), thickness=1): """Create a dataset and predict entire annotation file Csv file format is .csv file with the columns "image_path", "xmin","ymin","xmax","ymax" for the image name and bounding box position. Image_path is the relative filename, not absolute path, which is in the root_dir directory. One bounding box per line. If "label" column is present, these are assumed to be annotations and will be plotted in a different color than predictions Args: csv_file: path to csv file root_dir: directory of images. If none, uses "image_dir" in config savedir: Optional. Directory to save image plots. device: pytorch device of 'cuda' or 'cpu' for gpu prediction. Set internally. color: color of the bounding box as a tuple of BGR color, e.g. orange annotations is (0, 165, 255) thickness: thickness of the rectangle border line in px Returns: df: pandas dataframe with bounding boxes, label and scores for each image in the csv file """ model.eval() df = pd.read_csv(csv_file) #Dataloader (when not shuffled) returns a tensor for each image in order paths = df.image_path.unique() ds = dataset.TreeDataset(csv_file=csv_file, root_dir=root_dir, transforms=None, train=False) prediction_list = [] with torch.no_grad(): for i in ds: i = i.to(device) prediction = model(torch.unsqueeze(i, 0)) prediction_list.append(prediction) prediction_list = [item for sublist in prediction_list for item in sublist] results = [] for index, prediction in enumerate(prediction_list): #If there is more than one class, apply NMS Loop through images and apply cross prediction = visualize.format_boxes(prediction) if len(prediction.label.unique()) > 1: prediction = across_class_nms(prediction, iou_threshold=iou_threshold) if savedir: # Just predict the images, even though we have the annotations image = np.array(Image.open("{}/{}".format( root_dir, paths[index])))[:, :, ::-1] image = visualize.plot_predictions(image, prediction) #Plot annotations if they exist annotations = df[df.image_path == paths[index]] image = visualize.plot_predictions(image, annotations, color=color, thickness=thickness) cv2.imwrite( "{}/{}.png".format(savedir, os.path.splitext(paths[index])[0]), image) prediction["image_path"] = paths[index] results.append(prediction) results = pd.concat(results, ignore_index=True) return results