Пример #1
0
    def __init__(self, root, transform=None, labelfile=None):
        self.root = root
        self.transform = transform

        generated_dirs = sorted([d.name for d in os.scandir(
            self.root) if d.is_dir() and not d.name.endswith('_gt')])
        gt_dirs = sorted([d.name for d in os.scandir(
            self.root) if d.is_dir() and d.name.endswith('_gt')])

        assert len(gt_dirs) == len(generated_dirs)

        gt_files = []
        generated_files = []
        for i in range(len(gt_dirs)):
            gt_files.extend([os.path.join(gt_dirs[i], x)
                             for x in os.listdir(os.path.join(self.root, gt_dirs[i]))])
            generated_files.extend([os.path.join(generated_dirs[i], x)
                                    for x in os.listdir(os.path.join(self.root, generated_dirs[i]))])

        gt_imgs = [x for x in gt_files if has_file_allowed_extension(
            x, IMG_EXTENSIONS)]
        generated_imgs = [
            x for x in generated_files if has_file_allowed_extension(x, IMG_EXTENSIONS)]
        self.gt_filenames = gt_imgs
        self.generated_filenames = generated_imgs

        assert len(self.gt_filenames) == len(self.generated_filenames)

        if len(self.gt_filenames) == 0:
            raise(RuntimeError("Found 0 files in folder: " + root + "\n"
                               "Supported extensions are: " + ",".join(IMG_EXTENSIONS)))

        self.loader = default_loader
Пример #2
0
 def find_images(dir):
     paths = []
     for fname in sorted(os.listdir(dir)):
         if has_file_allowed_extension(fname, IMG_EXTENSIONS):
             path = os.path.join(dir, fname)
             paths.append(path)
     return paths
Пример #3
0
def make_dataset(dir, class_to_idx, extensions, test_fun, num_images):
    '''
    necessary for class CustomImageFolder
    '''
    images = []
    dir = os.path.expanduser(dir)
    c0 = 0
    c1 = 0
    for target in sorted(os.listdir(dir)):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if has_file_allowed_extension(
                        fname, extensions) and test_fun(
                        fname, class_to_idx[target], num_images):
                    if class_to_idx[target]:
                        c1 += 1
                    else:
                        c0 += 1
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[target])
                    images.append(item)
    print('- number of images in: class0', c0, ' / class1', c1)

    return images
Пример #4
0
    def __init__(self,
                 root,
                 loader=default_loader,
                 extensions=IMG_EXTENSIONS,
                 transform=None,
                 target_transform=None,
                 normalize_lab=None):
        self.images = []
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
        self.normalize_lab = normalize_lab
        root = os.path.expanduser(root)
        for rootdir, _, fnames in sorted(os.walk(root)):
            for fname in sorted(fnames):
                if fname[0] == '.':
                    continue
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(rootdir, fname)
                    self.images.append(path)
        if len(self.images) == 0:
            raise (RuntimeError("Found 0 files in folder of: " + root + "\n"
                                "Supported extensions are: " +
                                ",".join(extensions)))

        if normalize_lab is not None:
            m, std = torch.Tensor(normalize_lab[0]).float(), torch.Tensor(
                normalize_lab[1]).float()
            self.normalize_lab = (m, std)
Пример #5
0
    def make_dataset(self):
        img_names = sorted(os.listdir(self.dir_images))
        mask_names = sorted(os.listdir(self.dir_masks))

        img_names = [
            x for x in img_names
            if has_file_allowed_extension(x, self.extensions)
        ]
        mask_names = [
            x for x in mask_names
            if has_file_allowed_extension(x, self.extensions)
        ]

        assert len(img_names) == len(mask_names)

        return img_names, mask_names
Пример #6
0
 def make_dataset(folder):
     images = []
     for file in os.listdir(folder):
         if has_file_allowed_extension(file, IMG_EXTENSIONS):
             path = os.path.join(folder, file)
             images.append(path)
     return images
    def make_dataset(self, image_name_list=None):
        if image_name_list is not None:

            if self.file_count_limit is None:
                self.file_count_limit = len(image_name_list)

            images = [os.path.join(self.root, fname) for fname in \
                              image_name_list[:self.file_count_limit]]

        else:
            images = []
            root_dir = os.path.expanduser(self.root)

            for i, fname in enumerate(os.listdir(root_dir)):

                if has_file_allowed_extension(fname, self.extensions):

                    if self.file_count_limit is not None and \
                                        i == self.file_count_limit:
                        break

                    path = os.path.join(root_dir, fname)
                    images.append(path)

        return images
Пример #8
0
def make_dataset(dir, class_to_idx, extensions, domains, start=1934):
    images = []
    meta = []

    dir = os.path.expanduser(dir)
    for target in sorted(os.listdir(dir)):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(root, fname)
                    year = int(path.split('/')[-1].split('_')[0])
                    city = (path.split('/')[-1].split('_')[1])
                    region = REGIONS_DICT[city]
                    pivot_year = start + (year - start) // 10 * 10

                    if (pivot_year, region) in domains:
                        item = (path, class_to_idx[target])
                        images.append(item)
                        meta.append([year, region])

    return images, meta
Пример #9
0
    def load_data(
            self,
            data: Union[str, Tuple[List[str], List[Any]]],
            dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]:
        if self.isdir(data):
            classes, class_to_idx = self.find_classes(data)
            if not classes:
                return self.predict_load_data(data)
            else:
                self.set_state(LabelsState(classes))

            if dataset is not None:
                dataset.num_classes = len(classes)

            data = make_dataset(data, class_to_idx, extensions=self.extensions)
            return [{
                DefaultDataKeys.INPUT: input,
                DefaultDataKeys.TARGET: target
            } for input, target in data]
        return list(
            filter(
                lambda sample: has_file_allowed_extension(
                    sample[DefaultDataKeys.INPUT], self.extensions),
                super().load_data(data, dataset),
            ))
Пример #10
0
    def load_data(self, data: Union[Tuple[str, str], Tuple[List[str],
                                                           List[str]]],
                  dataset: BaseAutoDataset) -> Sequence[Mapping[str, Any]]:
        input_data, target_data = data

        if self.isdir(input_data) and self.isdir(target_data):
            input_files = os.listdir(input_data)
            target_files = os.listdir(target_data)

            all_files = set(input_files).intersection(set(target_files))

            if len(all_files) != len(input_files) or len(all_files) != len(
                    target_files):
                rank_zero_warn(
                    f"Found inconsistent files in input_dir: {input_data} and target_dir: {target_data}. Some files"
                    " have been dropped.",
                    UserWarning,
                )

            input_data = [os.path.join(input_data, file) for file in all_files]
            target_data = [
                os.path.join(target_data, file) for file in all_files
            ]

        if not isinstance(input_data, list) and not isinstance(
                target_data, list):
            input_data = [input_data]
            target_data = [target_data]

        if len(input_data) != len(target_data):
            raise MisconfigurationException(
                f"The number of input files ({len(input_data)}) and number of target files ({len(target_data)}) must be"
                " the same.", )

        data = filter(
            lambda sample:
            (has_file_allowed_extension(sample[0], self.extensions) and
             has_file_allowed_extension(sample[1], self.extensions)),
            zip(input_data, target_data),
        )

        data = [{
            DefaultDataKeys.INPUT: input,
            DefaultDataKeys.TARGET: target
        } for input, target in data]

        return data
Пример #11
0
    def from_folder(cls,
                    folder: Union[str, pathlib.Path],
                    transform: Optional[Callable] = _default_valid_transforms,
                    loader: Callable = _pil_loader,
                    batch_size: int = 64,
                    num_workers: Optional[int] = None,
                    **kwargs):
        """
        Creates a ImageClassificationData object from folders of images arranged in this way: ::

            folder/dog_xxx.png
            folder/dog_xxy.png
            folder/dog_xxz.png
            folder/cat_123.png
            folder/cat_nsdf3.png
            folder/cat_asd932_.png

        Args:
            folder: Path to the data folder.
            transform: Image transform to apply to the data.
            loader: A function to load an image given its path.
            batch_size: Batch size for data loading.
            num_workers: The number of workers to use for parallelized loading.
                Defaults to None which equals the number of available CPU threads.

        Returns:
            ImageClassificationData: the constructed data module

        Examples:
            >>> img_data = ImageClassificationData.from_folder("my_folder/") # doctest: +SKIP

        """
        if not os.path.isdir(folder):
            raise MisconfigurationException("folder should be a directory")

        filenames = os.listdir(folder)

        if any(not has_file_allowed_extension(f, IMG_EXTENSIONS)
               for f in filenames):
            raise MisconfigurationException(
                "No images with allowed extensions {IMG_EXTENSIONS} where found in {folder}"
            )

        test_ds = (FlashDatasetFolder(
            folder,
            transform=transform,
            loader=loader,
            with_targets=False,
            img_paths=[os.path.join(folder, f) for f in filenames]))

        datamodule = cls(
            test_ds=test_ds,
            batch_size=batch_size,
            num_workers=num_workers,
        )

        datamodule.data_pipeline = ImageClassificationDataPipeline(
            valid_transform=transform, loader=loader)
        return datamodule
Пример #12
0
def is_image_file(filename):
    """Checks if a file is an allowed image extension.
    Args:
        filename (string): path to a file
    Returns:
        bool: True if the filename ends with a known image extension
    """
    return has_file_allowed_extension(filename, IMG_EXTENSIONS)
Пример #13
0
def make_dataset(dir, extensions):
    images = []
    for root, _, fnames in sorted(os.walk(dir)):
        for fname in sorted(fnames):
            if has_file_allowed_extension(fname, extensions):
                path = os.path.join(root, fname)
                item = path
                images.append(item)
    return images
Пример #14
0
def make_dataset(dir, extensions):
    images = []
    dir = os.path.expanduser(dir)
    for root, _, fnames in sorted(os.walk(dir)):
        for fname in sorted(fnames):
            if has_file_allowed_extension(fname, extensions):
                path = os.path.join(root, fname)
                images.append(path)

    return images
Пример #15
0
 def _load_data_dir(cls,
                    data: Any,
                    dataset: Optional[AutoDataset] = None) -> List[str]:
     if isinstance(data, list):
         dataset.num_classes = len(data)
         out = []
         for p, label in data:
             if os.path.isdir(p):
                 for f in os.listdir(p):
                     if has_file_allowed_extension(f, IMG_EXTENSIONS):
                         out.append([os.path.join(p, f), label])
             elif os.path.isfile(p) and has_file_allowed_extension(
                     p, IMG_EXTENSIONS):
                 out.append([p, label])
         return out
     else:
         classes, class_to_idx = cls._find_classes(data)
         dataset.num_classes = len(classes)
         return make_dataset(data, class_to_idx, IMG_EXTENSIONS, None)
Пример #16
0
 def read_images(self, dir, extensions):
     images = {}
     for root, _, fnames in sorted(os.walk(dir)):
         for fname in sorted(fnames):
             if has_file_allowed_extension(fname, extensions):
                 m = self.ID_PATTERN.search(fname)
                 if m is not None:
                     rid = m.group(1)
                     path = os.path.join(root, fname)
                     images[rid] = path
     return images
Пример #17
0
    def item_class(dir, extensions):
        from torchvision.datasets.folder import has_file_allowed_extension

        item_class = ItemClass(os.path.basename(dir), [])
        for target in sorted(os.listdir(dir)):
            d = os.path.join(dir, target)
            if os.path.isdir(d):
                item_class.add_sub_class(ImageDir.item_class(d, extensions))
            else:
                if has_file_allowed_extension(d, extensions):
                    item_class.items.append(d)
        return item_class
Пример #18
0
 def __init__(self, root, loader=None, transform=None):
     assert os.path.exists(root)
     self.classes, class_to_idx = find_classes(os.path.join(root, 'train'))
     self.image_paths = []
     path = os.path.join(root, 'test', 'images')
     for p, _, fnames in sorted(os.walk(path)):
         for fname in sorted(fnames):
             if has_file_allowed_extension(fname, IMG_EXTENSIONS):
                 path = os.path.join(p, fname)
                 self.image_paths.append(path)
     self.loader = pil_loader if loader is None else loader
     self.transform = transform
Пример #19
0
 def read_reports(self, dir, extensions):
     reports = {}
     for root, _, fnames in sorted(os.walk(dir)):
         for fname in sorted(fnames):
             if has_file_allowed_extension(fname, extensions):
                 m = self.ID_PATTERN.search(fname)
                 if m is not None:
                     rid = m.group(1)
                     report = os.path.join(root, fname)
                     if self.cache_text:
                         self.extract_section(report)
                     reports[rid] = report
     return reports
Пример #20
0
    def _get_predicting_files(samples: Union[Sequence, str]) -> List[str]:
        files = []
        if isinstance(samples, str):
            samples = [samples]

        if isinstance(samples, (list, tuple)) and all(os.path.isdir(s) for s in samples):
            files = [os.path.join(sp, f) for sp in samples for f in os.listdir(sp)]

        elif isinstance(samples, (list, tuple)) and all(os.path.isfile(s) for s in samples):
            files = samples

        files = list(filter(lambda p: has_file_allowed_extension(p, IMG_EXTENSIONS), files))

        return files
Пример #21
0
 def _load_data_dir(
     cls,
     data: Any,
     dataset: Optional[AutoDataset] = None,
 ) -> Tuple[Optional[List[str]], List[Tuple[str, int]]]:
     if isinstance(data, list):
         # TODO: define num_classes elsewhere. This is a bad assumption since the list of
         # labels might not contain the complete set of ids so that you can infer the total
         # number of classes to train in your dataset.
         dataset.num_classes = len(data)
         out: List[Tuple[str, int]] = []
         for p, label in data:
             if os.path.isdir(p):
                 # TODO: there is an issue here when a path is provided along with labels.
                 # os.listdir cannot assure the same file order as the passed labels list.
                 files_list: List[str] = os.listdir(p)
                 if len(files_list) > 1:
                     raise ValueError(
                         f"The provided directory contains more than one file."
                         f"Directory: {p} -> Contains: {files_list}")
                 for f in files_list:
                     if has_file_allowed_extension(f, IMG_EXTENSIONS):
                         out.append([os.path.join(p, f), label])
             elif os.path.isfile(p) and has_file_allowed_extension(
                     str(p), IMG_EXTENSIONS):
                 out.append([p, label])
             else:
                 raise TypeError(f"Unexpected file path type: {p}.")
         return None, out
     else:
         classes, class_to_idx = cls._find_classes(data)
         # TODO: define num_classes elsewhere. This is a bad assumption since the list of
         # labels might not contain the complete set of ids so that you can infer the total
         # number of classes to train in your dataset.
         dataset.num_classes = len(classes)
         return classes, make_dataset(data, class_to_idx, IMG_EXTENSIONS,
                                      None)
Пример #22
0
    def predict_load_data(self,
                          data: Union[str, List[str]],
                          dataset: Optional[Any] = None) -> Sequence[Mapping[str, Any]]:
        if self.isdir(data):
            data = [os.path.join(data, file) for file in os.listdir(data)]

        if not isinstance(data, list):
            data = [data]

        return list(
            filter(
                lambda sample: has_file_allowed_extension(sample[DefaultDataKeys.INPUT], self.extensions),
                super().predict_load_data(data),
            )
        )
Пример #23
0
def make_dataset(list_file, class_to_idx, extensions, domains):
    images = []
    meta=[]
    with open(list_file) as f:
    	lines = f.readlines()
    for l in lines:
        fname, domain, target = l.strip().split(' ')
        year,viewpoint=domain.split('-')
        if has_file_allowed_extension(fname, extensions) and (year,viewpoint) in domains:
                    path = fname
                    item = (path, class_to_idx[target])
                    meta.append([int(year),int(viewpoint)])
                    images.append(item)

    return images, meta
Пример #24
0
def make_dataset(fol, class_to_idx, extensions):
    images = []
    fol = os.path.expanduser(fol)
    for target in sorted(os.listdir(fol)):
        d = os.path.join(fol, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[target])
                    images.append(item)

    return images
def make_dataset(dir, class_to_idx, extensions):
    ''' helper to read SVRT dataset
    '''
    images = []
    dir = os.path.expanduser(dir)

    for root, _, fnames in sorted(os.walk(dir)):
        for fname in sorted(fnames):
            if has_file_allowed_extension(fname, extensions):
                path = os.path.join(root, fname)
                idx_class = int(
                    fname[7]
                )  # the image names start with 'sample_1_...' or 'sample_0_...'
                item = (path, idx_class)
                images.append(item)
    return images
def make_dataset_withbbox(dir, class_to_idx, extensions, bounding_box):
    images = []
    dir = os.path.expanduser(dir)
    for target in sorted(class_to_idx.keys()):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if has_file_allowed_extension(fname, extensions):
                    idx = int(fname.split('.')[0])
                    path = os.path.join(root, fname)
                    item = (path, class_to_idx[target], bounding_box[idx, :])
                    images.append(item)
    return images
Пример #27
0
def _make_dataset(dir, class_to_idx, extensions):
    images = []
    dir = os.path.expanduser(dir)
    for target in sorted(class_to_idx.keys()):
        d = os.path.join(dir, target)
        if not os.path.isdir(d):
            continue

        for root, _, fnames in sorted(os.walk(d)):
            for fname in sorted(fnames):
                if has_file_allowed_extension(fname, extensions):
                    path = os.path.join(root, fname)
                    track_idx = int(''.join(filter(str.isdigit, target)))
                    item = (path, track_idx)
                    images.append(item)

    return images
Пример #28
0
def make_dataset(dir, class_to_idx, img_to_classes, read_labels):
    images = []
    dir = os.path.expanduser(dir)
    for file in sorted(os.listdir(dir)):
        d = os.path.join(dir, file)
        if os.path.isdir(d):
            continue
        if has_file_allowed_extension(
                d, ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']):
            img_id = file.split(".")[0]
            target = np.zeros(len(class_to_idx), dtype=np.float32)
            if read_labels:
                idx_with_class = [
                    class_to_idx[c] for c in img_to_classes[img_id]
                ]
                target[idx_with_class] = 1
            item = (d, target, int(img_id))
            images.append(item)

    return images
Пример #29
0
 def is_valid_file(x: str) -> bool:
     return has_file_allowed_extension(
         x, cast(tuple[str, ...], extensions))
Пример #30
0
 def is_valid_file(x):
     return has_file_allowed_extension(x, extensions)