def write_images(self, epoch: int, mode: str = "Train"):
        """Writes images with predictions written on them to TensorBoard.

        Args:
            epoch (int): Current epoch
            mode (str): Either "Train" or "Validation"
        """
        clean_print("Writing images", end="\r")
        tb_writer = self.train_tb_writer if mode == "Train" else self.val_tb_writer
        dataloader = self.train_dataloader if mode == "Train" else self.val_dataloader

        batch = dataloader.next_batch()
        imgs, labels = batch[0][:self.max_outputs], batch[1][:self.max_outputs]

        # Get some predictions
        predictions = self.model(imgs)

        imgs: npt.NDArray[np.uint8] = self.denormalize_imgs_fn(imgs)
        labels = labels.cpu().detach().numpy()
        predictions = nn.functional.softmax(predictions,
                                            dim=-1).cpu().detach().numpy()
        out_imgs = draw_pred_img(imgs, predictions, labels,
                                 self.data_config.LABEL_MAP, self.tb_img_size)

        # Add them to TensorBoard
        for image_index, out_img in enumerate(out_imgs):
            tb_writer.add_image(f"{mode}/prediction_{image_index}",
                                out_img,
                                global_step=epoch,
                                dataformats="HWC")

        dataloader.reset_epoch(
        )  # Reset the epoch to not cause issues for other functions
def name_loader(data_path: Path,
                label_map: dict[int, str],
                limit: int = None,
                load_data: bool = False,
                data_preprocessing_fn: Optional[Callable[[Path], np.ndarray]] = None,
                return_img_paths: bool = False,
                shuffle: bool = False,
                ) -> (tuple[npt.NDArray[np.uint8], npt.NDArray[Path], list[Path]]
                      | tuple[npt.NDArray[np.uint8], npt.NDArray[Path]]):
    """Loading function for datasets where the class is in the name of the file.

    Args:
        data_path (Path): Path to the root folder of the dataset.
        label_map (dict): dictionarry mapping an int to a class
        limit (int, optional): If given then the number of elements for each class in the dataset
                               will be capped to this number
        load_data (bool): If true then this function returns the images already loaded instead of their paths.
                          The images are loaded using the preprocessing functions (they must be provided)
        data_preprocessing_fn (callable, optional): Function used to load data (imgs) from their paths.
        return_img_paths: If true, then the image paths will also be returned.
        shuffle: If true then the data is shuffled once before being returned

    Return:
        numpy array containing the images' paths and the associated label or the loaded data
    """
    if return_img_paths:
        all_paths = []

    labels, data = [], []
    for key in range(len(label_map)):
        exts = [".jpg", ".png"]
        image_paths = list([p for p in data_path.rglob(f"{label_map[key]}*") if p.suffix in exts])
        if return_img_paths:
            all_paths.extend(image_paths if not limit else image_paths[:limit])

        for i, image_path in enumerate(image_paths, start=1):
            clean_print(f"Loading data {image_path}    ({i}/{len(image_paths)}) for class label_map[key]", end="\r")
            if load_data:
                data.append(data_preprocessing_fn(image_path))
            else:
                data.append(image_path)
            labels.append(key)
            if limit and i >= limit:
                break

    data, labels, image_paths = np.asarray(data), np.asarray(labels), np.asarray(image_paths, dtype=object)
    if shuffle:
        index_list = np.arange(len(labels))
        np.random.shuffle(index_list)
        data, labels, = data[index_list], labels[index_list]
        if return_img_paths:
            all_paths = all_paths[index_list]

    if return_img_paths:
        return data, labels, all_paths
    else:
        return data, labels
def n_to_1_loader(data_path: Path,
                  label_map: Dict[int, str],
                  limit: Optional[int] = None,
                  load_videos: bool = False,
                  grayscale: bool = True) -> np.ndarray:
    """
    Args:
        data_path: Path to the root folder of the dataset.
                   This folder is expected to contain subfolders for each class, with the videos inside.
        label_map: dictionarry mapping an int to a class
        limit (int, optional): If given then the number of elements for each class in the dataset
                            will be capped to this number
        load_videos: If true then this function returns the videos instead of their paths
        grayscale: If set to true and using the load_videos option, images will be converted to grayscale
    Return:
        numpy array containing the paths/videos and the associated label
    """
    data = []
    for key in range(len(label_map)):
        file_types = (Path("*.avi"), Path("*.mp4"))
        pathname = data_path / label_map[key]
        video_paths = []
        [
            video_paths.extend(list(pathname.glob("**" / ext)))
            for ext in file_types
        ]
        for i, video_path in enumerate(video_paths):
            clean_print(
                f"Loading data {str(video_path)}    ({i}/{len(video_paths)}) for class {label_map[key]}"
            )
            if load_videos:
                cap = cv2.VideoCapture(video_path)
                video = []
                while (cap.isOpened()):
                    frame_ok, frame = cap.read()
                    if frame_ok:
                        if grayscale:
                            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                            frame = np.expand_dims(
                                frame,
                                -1)  # To keep a channel dimension (gray scale)
                        else:
                            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        video.append(frame)
                    else:
                        break
                cap.release()
                data.append([np.asarray(video), key])
            else:
                data.append([video_path, key])
            if limit and len(data) == limit:
                return np.asarray(data)

    return np.asarray(data)
Exemplo n.º 4
0
def default_loader(
        data_path: Path,
        get_mask_path_fn: Callable[[Path], Path],
        limit: int = None,
        load_data: bool = False,
        data_preprocessing_fn: Optional[Callable[[Path], np.ndarray]] = None,
        labels_preprocessing_fn: Optional[Callable[[Path], np.ndarray]] = None,
        verbose: bool = True) -> tuple[np.ndarray, np.ndarray]:
    """Loads image and masks for image segmentation.

    This function assumes that the masks' paths contain either "mask" or "seg" (and that the main image does not).

    Args:
        data_path (Path): Path to the dataset folder
        get_mask_path_fn (callable): Function that returns the mask's path corresponding to a given image path
        limit (int, optional): If given then the number of elements for each class in the dataset
                            will be capped to this number
        load_data (bool): If true then this function returns the images already loaded instead of their paths.
                          The images are loaded using the preprocessing functions (they must be provided)
        data_preprocessing_fn (callable, optional): Function used to load data from their paths.
        labels_preprocessing_fn (callable, optional): Function used to load labels from their paths.
        verbose (bool): Verbose mode, print loading progress.

    Return:
        numpy arrays containing the paths/images and the associated label
    """
    data: list[np.ndarray | Path] = []
    labels: list[np.ndarray | Path] = []

    exts = [".jpg", ".png", ".bmp"]
    file_list = list([
        p for p in data_path.rglob('*')
        if p.suffix in exts and "seg" not in str(p) and "mask" not in str(p)
    ])
    nb_imgs = len(file_list)
    for i, img_path in enumerate(file_list, start=1):
        if verbose:
            clean_print(f"Processing image {img_path.name}    ({i}/{nb_imgs})",
                        end="\r" if i != nb_imgs else "\n")

        segmentation_map_path = get_mask_path_fn(img_path)
        if load_data:
            data.append(data_preprocessing_fn(img_path))
            labels.append(labels_preprocessing_fn(segmentation_map_path))
        else:
            data.append(img_path)
            labels.append(segmentation_map_path)

        if limit and i >= limit:
            break

    return np.asarray(data), np.asarray(labels)
Exemplo n.º 5
0
def main():
    parser = ArgumentParser()
    parser.add_argument("model_path", type=Path, help="Path to the checkpoint to use")
    parser.add_argument("data_path", type=Path, help="Path to the test dataset")
    parser.add_argument("--show", "--s", action="store_true", help="Show the images where the network failed.")
    args = parser.parse_args()

    inference_start_time = time.perf_counter()

    # Creates and load the model
    model = build_model(ModelConfig.MODEL, DataConfig.NB_CLASSES,
                        model_path=args.model_path, eval=True, **get_config_as_dict(ModelConfig))
    print("Weights loaded", flush=True)

    data, labels, paths = data_loader(args.data_path, DataConfig.LABEL_MAP,
                                      data_preprocessing_fn=default_load_data, return_img_paths=True)
    base_cpu_pipeline = (transforms.resize(ModelConfig.IMAGE_SIZES), )
    base_gpu_pipeline = (transforms.to_tensor(), transforms.normalize(labels_too=True))
    data_transformations = transforms.compose_transformations((*base_cpu_pipeline, *base_gpu_pipeline))
    print("\nData loaded", flush=True)

    results = []  # Variable used to keep track of the classification results
    for img, label, img_path in zip(data, labels, paths):
        clean_print(f"Processing image {img_path}", end="\r")
        img, label = data_transformations([img], [label])
        with torch.no_grad():
            output = model(img)
            output = torch.nn.functional.softmax(output, dim=-1)
            prediction = torch.argmax(output)
            pred_correct = label == prediction
            if pred_correct:
                results.append(1)
            else:
                results.append(0)

            if args.show and not pred_correct:
                out_img = draw_pred_img(img, output, label, DataConfig.LABEL_MAP, size=ModelConfig.IMAGE_SIZES)
                out_img = cv2.cvtColor(out_img[0], cv2.COLOR_RGB2BGR)
                while True:
                    cv2.imshow("Image", out_img)
                    key = cv2.waitKey(10)
                    if key == ord("q"):
                        cv2.destroyAllWindows()
                        break

    results = np.asarray(results)
    total_time = time.perf_counter() - inference_start_time
    print("\nFinished running inference on the test dataset.")
    print(f"Total inference time was {total_time:.3f}s, which averages to {total_time/len(results):.5f}s per image")
    print(f"Precision: {np.mean(results)}")
Exemplo n.º 6
0
def default_loader(data_path: Path,
                   label_map: dict[int, str],
                   limit: int = None,
                   shuffle: bool = False,
                   verbose: bool = True
                   ) -> tuple[np.ndarray, np.ndarray]:
    """Default loading function for image classification.

    The data folder is expected to contain subfolders for each class, with the images inside.

    Args:
        data_path (Path): Path to the root folder of the dataset.
        label_map (dict): dictionarry mapping an int to a class
        limit (int, optional): If given then the number of elements for each class in the dataset
                               will be capped to this number
        shuffle (bool): If true then the data is shuffled once before being returned
        verbose (bool): Verbose mode, print loading progress.

    Return:
        2 numpy arrays, one containing the images' paths and the other containing the labels.
    """
    labels, data = [], []
    exts = (".png", ".jpg", ".bmp")
    for key in range(len(label_map)):
        class_dir_path = data_path / label_map[key]
        img_paths = [path for path in class_dir_path.rglob('*') if path.suffix in exts]
        for i, img_path in enumerate(img_paths, start=1):
            if verbose:
                clean_print(f"Processing image {img_path.name}    ({i}/{len(img_paths)}) for class {label_map[key]}",
                            end="\r" if (i != len(img_paths) and i != limit) else "\n")
            data.append(img_path)
            labels.append(key)
            if limit and i >= limit:
                break

    data, labels = np.asarray(data), np.asarray(labels)
    if shuffle:
        index_list = np.arange(len(labels))
        np.random.shuffle(index_list)
        data, labels, = data[index_list], labels[index_list]

    return data, labels
Exemplo n.º 7
0
    def get_metrics(self,
                    mode: str = "Train",
                    **kwargs) -> dict[str, dict[str, Any]]:
        """See base class."""
        metrics: dict[str, dict] = {"scalars": {}, "imgs": {}}

        clean_print("Computing confusion matrix", end="\r")
        self.compute_confusion_matrix(mode=mode)

        clean_print("Computing average accuracy", end="\r")
        avg_acc = self.get_avg_acc()
        metrics["scalars"]["Average Accuracy"] = avg_acc

        clean_print("Computing per class accuracy", end="\r")
        per_class_acc = self.get_class_accuracy()
        for key, acc in enumerate(per_class_acc):
            metrics["scalars"][
                f"Per Class Accuracy/{self.label_map[key]}"] = acc

        clean_print("Creating confusion matrix image", end="\r")
        confusion_matrix = self.get_confusion_matrix()
        metrics["imgs"]["Confusion Matrix"] = confusion_matrix

        return metrics
def n_to_n_loader_from_images(
        data_path: Path,
        label_map: Dict[int, str],
        sequence_length: int,
        limit: Optional[int] = None,
        load_videos: bool = False,
        filters: Optional[list[str]] = None,
        grayscale: bool = False) -> Tuple[np.ndarray, np.ndarray]:
    """
    Loading function for when every frame has an associated label
    Args:
        data_path: Path to the root folder of the dataset.
                   This folder is expected to contain subfolders for each class, with the videos inside.
                   It should also contain a label.json file with the labels (file paths and time stamps)
        label_map: dictionarry mapping an int to a class
        sequence_length: Length of the sequences fed to the network
        limit (int, optional): If given then the number of elements for each class in the dataset
                            will be capped to this number
        load_videos: If true then this function returns the videos instead of their paths
        filters: Filters data whose path include the given filters (for exemple: ["subfolder1", "class2"])
        grayscale: If set to true and using the load_videos option, images will be converted to grayscale
    Return:
        numpy array containing the paths/videos and the associated labels
    """
    # Read label file
    label_file_path = data_path / "labels.json"
    assert label_file_path.is_file(), "Label file is missing"

    with open(label_file_path) as json_file:
        json_labels = json.load(json_file)
        labels = json_labels["entries"]

    nb_labels = len(labels)
    dataset_data = []
    dataset_labels = []
    for i, label in enumerate(labels, start=1):
        if filters and not any(f in label["file_path"].split(sep)
                               for f in filters):
            continue

        sample_base_path = data_path / label["file_path"]
        clean_print(
            f"Loading data {str(sample_base_path)}    ({i}/{nb_labels})",
            end="\r")

        image_paths = list(sample_base_path.glob("*.jpg"))
        image_paths = sorted([str(image_path) for image_path in image_paths])

        assert len(image_paths
                   ), f"Images for video {str(sample_base_path)} are missing"

        label = read_n_to_n_label(label, label_map, len(image_paths))

        for start_index in range(0, len(label) - sequence_length):
            if load_videos:
                dataset_data.append([
                    cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
                    for image_path in image_paths[start_index:start_index +
                                                  sequence_length]
                ])
            else:
                dataset_data.append(image_paths[start_index:start_index +
                                                sequence_length])
            dataset_labels.append(label[start_index:start_index +
                                        sequence_length])

            if limit and len(dataset_labels) >= limit:
                break
        if limit and len(dataset_labels) >= limit:
            break

    return dataset_data, dataset_labels
def n_to_n_loader(data_path: Path,
                  label_map: Dict[int, str],
                  limit: Optional[int] = None,
                  load_videos: bool = False,
                  grayscale: bool = False) -> np.ndarray:
    """
    Loading function for when every frame has an associated label
    Args:
        data_path: Path to the root folder of the dataset.
                   This folder is expected to contain subfolders for each class, with the videos inside.
                   It should also contain a label.json file with the labels (file paths and time stamps)
        label_map: dictionarry mapping an int to a class
        limit (int, optional): If given then the number of elements for each class in the dataset
                            will be capped to this number
        load_videos: If true then this function returns the videos instead of their paths
        grayscale: If set to true and using the load_videos option, images will be converted to grayscale
    Return:
        numpy array containing the paths/videos and the associated labels
    """
    # Read label file
    label_file_path = data_path / "labels.json"
    assert label_file_path.is_file(), "Label file is missing"

    with open(label_file_path) as json_file:
        json_labels = json.load(json_file)
        labels = json_labels["entries"]

    nb_labels = len(labels)
    data = []
    for i, label in enumerate(labels, start=1):
        video_path = data_path / label["file_path"]
        clean_print(f"Loading data {str(video_path)}    ({i}/{nb_labels})")

        assert video_path.is_file(), f"Video {video_path} is missing"
        cap = cv2.VideoCapture(video_path)
        video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        cap.release()
        label = read_n_to_n_label(label, label_map, video_length)

        if load_videos:
            cap = cv2.VideoCapture(str(video_path))
            video = []
            while (cap.isOpened()):
                frame_ok, frame = cap.read()
                if frame_ok:
                    if grayscale:
                        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                        frame = np.expand_dims(
                            frame,
                            -1)  # To keep a channel dimension (gray scale)
                    else:
                        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    video.append(frame)
                else:
                    break
            cap.release()
            data.append([np.asarray(video), label])
        else:
            data.append([video_path, label])
        if limit and i == limit:
            break

    data = np.asarray(data, dtype=object)
    return data
Exemplo n.º 10
0
def main():
    parser = ArgumentParser()
    parser.add_argument("model_path",
                        type=Path,
                        help="Path to the checkpoint to use")
    parser.add_argument("data_path",
                        type=Path,
                        help="Path to the test dataset")
    args = parser.parse_args()

    # Creates and load the model
    model = build_model(ModelConfig.MODEL,
                        DataConfig.NB_CLASSES,
                        model_path=args.model_path,
                        eval=True,
                        **get_config_as_dict(ModelConfig))
    print("Weights loaded", flush=True)

    data, labels, paths = data_loader(args.data_path,
                                      DataConfig.LABEL_MAP,
                                      data_preprocessing_fn=default_load_data,
                                      return_img_paths=True)
    base_cpu_pipeline = (transforms.resize(ModelConfig.IMAGE_SIZES), )
    base_gpu_pipeline = (transforms.to_tensor(),
                         transforms.normalize(labels_too=True))
    data_transformations = transforms.compose_transformations(
        (*base_cpu_pipeline, *base_gpu_pipeline))
    print("\nData loaded", flush=True)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    for img, label, img_path in zip(data, labels, paths):
        clean_print(f"Processing image {img_path}", end="\r")
        img, label = data_transformations([img], [label])

        # Feed the image to the model
        output = model(img)
        output = torch.nn.functional.softmax(output, dim=-1)

        # Get top prediction and turn it into a one hot
        prediction = output.argmax(dim=1)
        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][prediction] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        one_hot = torch.sum(one_hot.to(device) * output)

        # Get gradients and activations
        model.zero_grad()
        one_hot.backward(retain_graph=True)
        grads_val = model.get_gradients()[-1].cpu().data.numpy()

        activations = model.get_activations()
        activations = activations.cpu().data.numpy()[0, :]

        # Make gradcam mask
        weights = np.mean(grads_val, axis=(1, 2))
        cam = np.zeros(activations.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * activations[i, :, :]

        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, ModelConfig.IMAGE_SIZES)
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)

        # Draw prediction (logits) on the image
        img = draw_pred_img(img,
                            output,
                            label,
                            DataConfig.LABEL_MAP,
                            size=ModelConfig.IMAGE_SIZES)

        # Fuse input image and gradcam mask
        heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
        heatmap = np.float32(heatmap)
        cam = heatmap + np.float32(img)
        cam = cam / np.max(cam)

        while True:
            cv2.imshow("Image", cam)
            key = cv2.waitKey(10)
            if key == ord("q"):
                cv2.destroyAllWindows()
                break