Пример #1
0
 def _check_integrity(self) -> bool:
     return all(
         check_integrity(self._get_path(filename), md5)
         for filename, md5 in self.checksum.items())
Пример #2
0
    def __init__(self,
                 data_root,
                 split_mode,
                 download_code=None,
                 download=False,
                 extract=False):
        self.data_root = path.expanduser(data_root)
        self.split_mode = split_mode

        self._zip_path = path.join(self.data_root, 'DRIVE', 'DRIVE.zip')
        self._zip_md5 = 'a91f25272507b1f53132d03a69030de8'

        if self.split_mode == 'train':
            self._image_dir = path.join(self.data_root, 'DRIVE', 'training',
                                        'images')
            self._target_dir = path.join(self.data_root, 'DRIVE', 'training',
                                         '1st_manual')
            self._mask_dir = path.join(self.data_root, 'DRIVE', 'training',
                                       'mask')
            self.sample_keys = ['image', 'target', 'mask']

        elif self.split_mode in ('valid', 'test'):
            self._image_dir = path.join(self.data_root, 'DRIVE', 'test',
                                        'images')
            self._target_dir = path.join(self.data_root, 'DRIVE', 'test',
                                         '1st_manual')
            self._mask_dir = path.join(self.data_root, 'DRIVE', 'test', 'mask')
            self._target_aux_dir = path.join(self.data_root, 'DRIVE', 'test',
                                             '2nd_manual')
            self.sample_keys = ['image', 'target', 'mask', 'target_aux']

        else:
            LOGGER.error('Invalid split mode: %s', self.split_mode)
            raise NotImplementedError('Invalid split mode: {}'.format(
                self.split_mode))

        if download:
            self._download(download_code)
        elif extract:
            self._extract()

        if not check_integrity(self._zip_path, self._zip_md5):
            LOGGER.error('DRIVE dataset not found or corrupted')
            raise RuntimeError('DRIVE dataset not found or corrupted')

        self._image_paths = sorted(
            list_files(root=self._image_dir,
                       suffix=('.tif', '.TIF'),
                       prefix=True))
        self._target_paths = sorted(
            list_files(root=self._target_dir,
                       suffix=('.gif', '.GIF'),
                       prefix=True))
        self._mask_paths = sorted(
            list_files(root=self._mask_dir,
                       suffix=('.gif', '.GIF'),
                       prefix=True))
        if self.split_mode in ('valid', 'test'):
            self._target_aux_paths = sorted(
                list_files(root=self._target_aux_dir,
                           suffix=('.gif', '.GIF'),
                           prefix=True))
            assert len(self._image_paths) == len(
                self._target_aux_paths), 'DRIVE dataset corrupted'

        assert len(self._image_paths) == len(
            self._target_paths), 'DRIVE dataset corrupted'
        assert len(self._image_paths) == len(
            self._mask_paths), 'DRIVE dataset corrupted'

        LOGGER.debug('Retrieved all %d samples for DRIVE dataset',
                     len(self._image_paths))

        # for compatibility of other datasets
        self.indices = list(range(len(self._image_paths)))
Пример #3
0
 def _check_integrity(self):
     root = self.root
     md5 = self.split_list[self.split][2]
     fpath = os.path.join(root, self.filename)
     return check_integrity(fpath, md5)
Пример #4
0
 def _check_exists(self) -> bool:
     return all(
         check_integrity(os.path.join(
             self.raw_folder, os.path.splitext(os.path.basename(url))[0])
         ) for url, _ in self.resources
     )
Пример #5
0
    def __init__(
            self,
            root: str,
            train: bool = None,
            data_type: str = 'event',
            frames_number: int = None,
            split_by: str = None,
            duration: int = None,
            padding_frame: bool = True,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
    ) -> None:
        '''
        :param root: root path of the dataset
        :type root: str
        :param train: whether use the train set. Set ``True`` or ``False`` for those datasets provide train/test
            division, e.g., DVS128 Gesture dataset. If the dataset does not provide train/test division, e.g., CIFAR10-DVS,
            please set ``None`` and use :class:`~split_to_train_test_set` function to get train/test set
        :type train: bool
        :param data_type: `event` or `frame`
        :type data_type: str
        :param frames_number: the integrated frame number
        :type frames_number: int
        :param split_by: `time` or `number`
        :type split_by: str
        :param duration: the time duration of each frame
        :type duration: int
        :param padding_frame: whether padding the frames number to the maximum number of frames
        :type padding_frame: bool
        :param transform: a function/transform that takes in
            a sample and returns a transformed version.
            E.g, ``transforms.RandomCrop`` for images.
        :type transform: callable
        :param target_transform: a function/transform that takes
            in the target and transforms it.
        :type target_transform: callable

        The base class for neuromorphic dataset. Users can define a new dataset by inheriting this class and implementing
        all abstract methods. Users can refer to :class:`spikingjelly.datasets.dvs128_gesture.DVS128Gesture`.

        If ``data_type == 'event'``
            the sample in this dataset is a dict whose keys are ['t', 'x', 'y', 'p'] and values are ``numpy.ndarray``.

        If ``data_type == 'frame'`` and ``frames_number`` is not ``None``
            events will be integrated to frames with fixed frames number. ``split_by`` will define how to split events.
            See :class:`cal_fixed_frames_number_segment_index` for
            more details.

        If ``data_type == 'frame'``, ``frames_number`` is ``None``, and ``duration`` is not ``None``
            events will be integrated to frames with fixed time duration. If ``padding_frame`` is ``True``, each sample
            will be padded to the same frames number (length), which is the maximum frames number of all frames.

        '''

        events_np_root = os.path.join(root, 'events_np')

        if not os.path.exists(events_np_root):

            download_root = os.path.join(root, 'download')

            if os.path.exists(download_root):
                print(f'The [{download_root}] directory for saving downloaed files already exists, check files...')
                # check files
                resource_list = self.resource_url_md5()
                for i in range(resource_list.__len__()):
                    file_name, url, md5 = resource_list[i]
                    fpath = os.path.join(download_root, file_name)
                    if not utils.check_integrity(fpath=fpath, md5=md5):
                        print(f'The file [{fpath}] does not exist or is corrupted.')

                        if os.path.exists(fpath):
                            # If file is corrupted, we will remove it.
                            os.remove(fpath)
                            print(f'Remove [{fpath}]')

                        if self.downloadable():
                            # If file does not exist, we will download it.
                            print(f'Download [{file_name}] from [{url}] to [{download_root}]')
                            utils.download_url(url=url, root=download_root, filename=file_name, md5=md5)
                        else:
                            raise NotImplementedError(
                                f'This dataset can not be downloaded by SpikingJelly, please download [{file_name}] from [{url}] manually and put files at {download_root}.')

            else:
                os.mkdir(download_root)
                print(f'Mkdir [{download_root}] to save downloaded files.')
                resource_list = self.resource_url_md5()
                if self.downloadable():
                    # download and extract file
                    for i in range(resource_list.__len__()):
                        file_name, url, md5 = resource_list[i]
                        print(f'Download [{file_name}] from [{url}] to [{download_root}]')
                        utils.download_url(url=url, root=download_root, filename=file_name, md5=md5)
                else:
                    raise NotImplementedError(f'This dataset can not be downloaded by SpikingJelly, '
                                              f'please download files manually and put files at [{download_root}]. '
                                              f'The resources file_name, url, and md5 are: \n{resource_list}')

            # We have downloaded files and checked files. Now, let us extract the files
            extract_root = os.path.join(root, 'extract')
            if os.path.exists(extract_root):
                print(f'The directory [{extract_root}] for saving extracted files already exists.\n'
                      f'SpikingJelly will not check the data integrity of extracted files.\n'
                      f'If extracted files are not integrated, please delete [{extract_root}] manually, '
                      f'then SpikingJelly will re-extract files from [{download_root}].')
                # shutil.rmtree(extract_root)
                # print(f'Delete [{extract_root}].')
            else:
                os.mkdir(extract_root)
                print(f'Mkdir [{extract_root}].')
                self.extract_downloaded_files(download_root, extract_root)

            # Now let us convert the origin binary files to npz files
            os.mkdir(events_np_root)
            print(f'Mkdir [{events_np_root}].')
            print(f'Start to convert the origin data from [{extract_root}] to [{events_np_root}] in np.ndarray format.')
            self.create_events_np_files(extract_root, events_np_root)

        H, W = self.get_H_W()

        if data_type == 'event':
            _root = events_np_root
            _loader = np.load
            _transform = transform
            _target_transform = target_transform

        elif data_type == 'frame':
            if frames_number is not None:
                assert frames_number > 0 and isinstance(frames_number, int)
                assert split_by == 'time' or split_by == 'number'
                frames_np_root = os.path.join(root, f'frames_number_{frames_number}_split_by_{split_by}')
                if os.path.exists(frames_np_root):
                    print(f'The directory [{frames_np_root}] already exists.')
                else:
                    os.mkdir(frames_np_root)
                    print(f'Mkdir [{frames_np_root}].')

                    # create the same directory structure
                    create_same_directory_structure(events_np_root, frames_np_root)

                    # use multi-thread to accelerate
                    t_ckp = time.time()
                    with ThreadPoolExecutor(max_workers=min(multiprocessing.cpu_count(), 64)) as tpe:
                        print(f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].')
                        for e_root, e_dirs, e_files in os.walk(events_np_root):
                            if e_files.__len__() > 0:
                                output_dir = os.path.join(frames_np_root, os.path.relpath(e_root, events_np_root))
                                for e_file in e_files:
                                    events_np_file = os.path.join(e_root, e_file)
                                    print(f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].')
                                    tpe.submit(integrate_events_file_to_frames_file_by_fixed_frames_number, events_np_file, output_dir, split_by, frames_number, H, W, True)

                    print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')

                _root = frames_np_root
                _loader = load_npz_frames
                _transform = transform
                _target_transform = target_transform

            elif duration is not None:
                self.max_frames_number = 0
                assert duration > 0 and isinstance(duration, int)
                frames_np_root = os.path.join(root, f'duration_{duration}')
                if os.path.exists(frames_np_root):
                    print(f'The directory [{frames_np_root}] already exists.')
                    fn_name = os.path.join(frames_np_root, 'max_frames_number.npy')
                    self.max_frames_number = np.load(fn_name).item()
                    print(f'max_frames_number = [{self.max_frames_number}].')

                else:
                    os.mkdir(frames_np_root)
                    print(f'Mkdir [{frames_np_root}].')
                    # create the same directory structure
                    create_same_directory_structure(events_np_root, frames_np_root)
                    # use multi-thread to accelerate
                    t_ckp = time.time()
                    future_list = []
                    with ThreadPoolExecutor(max_workers=min(multiprocessing.cpu_count(), 64)) as tpe:
                        print(f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].')
                        for e_root, e_dirs, e_files in os.walk(events_np_root):
                            if e_files.__len__() > 0:
                                output_dir = os.path.join(frames_np_root, os.path.relpath(e_root, events_np_root))
                                for e_file in e_files:
                                    events_np_file = os.path.join(e_root, e_file)
                                    print(f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].')
                                    future_list.append(tpe.submit(integrate_events_file_to_frames_file_by_fixed_duration, events_np_file, output_dir, duration, H, W, True))

                    for future in future_list:
                        self.max_frames_number = max(self.max_frames_number, future.result())

                    # save the max_frames_number in frames_np_root
                    fn_name = os.path.join(frames_np_root, 'max_frames_number.npy')
                    np.save(fn_name, self.max_frames_number)
                    print(f'Save max_frames_number to [{fn_name}].')
                    print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')

                if padding_frame:
                    if transform is None:
                        transform_with_padding = PadFrames(self.max_frames_number)
                    else:
                        transform_with_padding = transforms.Compose([PadFrames(self.max_frames_number), transform])

                    _root = frames_np_root
                    _loader = load_npz_frames
                    _transform = transform_with_padding
                    _target_transform = target_transform

                else:
                    _root = frames_np_root
                    _loader = load_npz_frames
                    _transform = transform
                    _target_transform = target_transform

            else:
                raise ValueError('frames_number and duration can not both be None.')

        if train is not None:
            if train:
                _root = os.path.join(_root, 'train')
            else:
                _root = os.path.join(_root, 'test')

        super().__init__(root=_root, loader=_loader, extensions='.npz', transform=_transform,
                         target_transform=_target_transform)
Пример #6
0
 def _check_integrity(self):
     for filename, md5 in self.filelist:
         fpath = os.path.join(self.root, self.basedir, filename)
         if not check_integrity(fpath, md5):
             return False
     return True
Пример #7
0
 def _load_meta_file(self):
     if check_integrity(self.meta_file):
         return torch.load(self.meta_file)
     else:
         raise RuntimeError("Meta file not found or corrupted.",
                            "You can use download=True to create it.")
Пример #8
0
 def _check_integrity(self):
     root = self.root
     filename = self.filename
     fpath = os.path.join(root, filename)
     return check_integrity(fpath)
Пример #9
0
def _check_integrity(root) -> bool:
    zip_filename = 'all_runs'
    if not check_integrity(join(root, zip_filename + '.zip'),
                           zips_md5[zip_filename]):
        return False
    return True
 def download(root):
     if not check_integrity(os.path.join(root, "EuroSAT.zip")):
         download_and_extract_archive(URL, root, md5=MD5)
Пример #11
0
    def download(self):
        import tarfile
        import pickle
        import shutil

        if self._check_integrity_data():
            return

        gz_filename = '{0}.tar.gz'.format(self.gz_folder)
        download_url(self.download_url,
                     self.root,
                     filename=gz_filename,
                     md5=self.gz_md5)
        with tarfile.open(os.path.join(self.root, gz_filename), 'r:gz') as tar:
            tar.extractall(path=self.root)

        train_filename = os.path.join(self.root, self.gz_folder, 'train')
        check_integrity(train_filename, self.files_md5['train'])
        with open(train_filename, 'rb') as f:
            data = pickle.load(f, encoding='bytes')
            images = data[b'data']
            fine_labels = data[b'fine_labels']
            coarse_labels = data[b'coarse_labels']

        test_filename = os.path.join(self.root, self.gz_folder, 'test')
        check_integrity(test_filename, self.files_md5['test'])
        with open(test_filename, 'rb') as f:
            data = pickle.load(f, encoding='bytes')
            images = np.concatenate((images, data[b'data']), axis=0)
            fine_labels = np.concatenate((fine_labels, data[b'fine_labels']),
                                         axis=0)
            coarse_labels = np.concatenate(
                (coarse_labels, data[b'coarse_labels']), axis=0)

        images = images.reshape((-1, 3, 32, 32))
        images = images.transpose((0, 2, 3, 1))

        meta_filename = os.path.join(self.root, self.gz_folder, 'meta')
        check_integrity(meta_filename, self.files_md5['meta'])
        with open(meta_filename, 'rb') as f:
            data = pickle.load(f, encoding='latin1')
            fine_label_names = data['fine_label_names']
            coarse_label_names = data['coarse_label_names']

        filename = os.path.join(self.root, self.filename)
        fine_names = dict()
        with h5py.File(filename, 'w') as f:
            for i, coarse_name in enumerate(coarse_label_names):
                group = f.create_group(coarse_name)
                fine_indices = np.unique(fine_labels[coarse_labels == i])
                for j in fine_indices:
                    dataset = group.create_dataset(
                        fine_label_names[j], data=images[fine_labels == j])
                fine_names[coarse_name] = [
                    fine_label_names[j] for j in fine_indices
                ]

        filename_fine_names = os.path.join(self.root, self.filename_fine_names)
        with open(filename_fine_names, 'w') as f:
            json.dump(fine_names, f)

        gz_folder = os.path.join(self.root, self.gz_folder)
        if os.path.isdir(gz_folder):
            shutil.rmtree(gz_folder)
        if os.path.isfile('{0}.tar.gz'.format(gz_folder)):
            os.remove('{0}.tar.gz'.format(gz_folder))
Пример #12
0
 def _check_integrity(self) -> bool:
     return check_integrity(
         os.path.join(self.root, self.dataset_name + '.tar.gz'),
         self.zip_md5)
Пример #13
0
 def _load_meta_file(root):
     meta_file = os.path.join(root, 'meta.bin')
     if check_integrity(meta_file):
         return torch.load(meta_file)
     raise RuntimeError("Meta file not found or corrupted.",
                        "You can use download=True to create it.")
Пример #14
0
    def __init__(
        self,
        root: str,
        train: bool = None,
        data_type: str = 'event',
        frames_number: int = None,
        split_by: str = None,
        duration: int = None,
        custom_integrate_function: Callable = None,
        custom_integrated_frames_dir_name: str = None,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
    ) -> None:
        '''
        :param root: root path of the dataset
        :type root: str
        :param train: whether use the train set. Set ``True`` or ``False`` for those datasets provide train/test
            division, e.g., DVS128 Gesture dataset. If the dataset does not provide train/test division, e.g., CIFAR10-DVS,
            please set ``None`` and use :class:`~split_to_train_test_set` function to get train/test set
        :type train: bool
        :param data_type: `event` or `frame`
        :type data_type: str
        :param frames_number: the integrated frame number
        :type frames_number: int
        :param split_by: `time` or `number`
        :type split_by: str
        :param duration: the time duration of each frame
        :type duration: int
        :param custom_integrate_function: a user-defined function that inputs are ``events, H, W``.
            ``events`` is a dict whose keys are ``['t', 'x', 'y', 'p']`` and values are ``numpy.ndarray``
            ``H`` is the height of the data and ``W`` is the weight of the data.
            For example, H=128 and W=128 for the DVS128 Gesture dataset.
            The user should define how to integrate events to frames, and return frames.
        :type custom_integrate_function: Callable
        :param custom_integrated_frames_dir_name: The name of directory for saving frames integrating by ``custom_integrate_function``.
            If ``custom_integrated_frames_dir_name`` is ``None``, it will be set to ``custom_integrate_function.__name__``
        :type custom_integrated_frames_dir_name: str or None
        :param transform: a function/transform that takes in
            a sample and returns a transformed version.
            E.g, ``transforms.RandomCrop`` for images.
        :type transform: callable
        :param target_transform: a function/transform that takes
            in the target and transforms it.
        :type target_transform: callable
        The base class for neuromorphic dataset. Users can define a new dataset by inheriting this class and implementing
        all abstract methods. Users can refer to :class:`spikingjelly.datasets.dvs128_gesture.DVS128Gesture`.
        If ``data_type == 'event'``
            the sample in this dataset is a dict whose keys are ``['t', 'x', 'y', 'p']`` and values are ``numpy.ndarray``.
        If ``data_type == 'frame'`` and ``frames_number`` is not ``None``
            events will be integrated to frames with fixed frames number. ``split_by`` will define how to split events.
            See :class:`cal_fixed_frames_number_segment_index` for
            more details.
        If ``data_type == 'frame'``, ``frames_number`` is ``None``, and ``duration`` is not ``None``
            events will be integrated to frames with fixed time duration.
        If ``data_type == 'frame'``, ``frames_number`` is ``None``, ``duration`` is ``None``, and ``custom_integrate_function`` is not ``None``:
            events will be integrated by the user-defined function and saved to the ``custom_integrated_frames_dir_name`` directory in ``root`` directory.
            Here is an example from SpikingJelly's tutorials:
            .. code-block:: python
                from spikingjelly.datasets.dvs128_gesture import DVS128Gesture
                from typing import Dict
                import numpy as np
                import spikingjelly.datasets as sjds
                def integrate_events_to_2_frames_randomly(events: Dict, H: int, W: int):
                    index_split = np.random.randint(low=0, high=events['t'].__len__())
                    frames = np.zeros([2, 2, H, W])
                    t, x, y, p = (events[key] for key in ('t', 'x', 'y', 'p'))
                    frames[0] = sjds.integrate_events_segment_to_frame(x, y, p, H, W, 0, index_split)
                    frames[1] = sjds.integrate_events_segment_to_frame(x, y, p, H, W, index_split, events['t'].__len__())
                    return frames
                root_dir = 'D:/datasets/DVS128Gesture'
                train_set = DVS128Gesture(root_dir, train=True, data_type='frame', custom_integrate_function=integrate_events_to_2_frames_randomly)
                from spikingjelly.datasets import play_frame
                frame, label = train_set[500]
                play_frame(frame)
        '''

        events_np_root = os.path.join(root, 'events_np')

        if not os.path.exists(events_np_root):

            download_root = os.path.join(root, 'download')

            if os.path.exists(download_root):
                print(
                    f'The [{download_root}] directory for saving downloaded files already exists, check files...'
                )
                # check files
                resource_list = self.resource_url_md5()
                for i in range(resource_list.__len__()):
                    file_name, url, md5 = resource_list[i]
                    fpath = os.path.join(download_root, file_name)
                    if not utils.check_integrity(fpath=fpath, md5=md5):
                        print(
                            f'The file [{fpath}] does not exist or is corrupted.'
                        )

                        if os.path.exists(fpath):
                            # If file is corrupted, we will remove it.
                            os.remove(fpath)
                            print(f'Remove [{fpath}]')

                        if self.downloadable():
                            # If file does not exist, we will download it.
                            print(
                                f'Download [{file_name}] from [{url}] to [{download_root}]'
                            )
                            utils.download_url(url=url,
                                               root=download_root,
                                               filename=file_name,
                                               md5=md5)
                        else:
                            raise NotImplementedError(
                                f'This dataset can not be downloaded by SpikingJelly, please download [{file_name}] from [{url}] manually and put files at {download_root}.'
                            )

            else:
                os.mkdir(download_root)
                print(f'Mkdir [{download_root}] to save downloaded files.')
                resource_list = self.resource_url_md5()
                if self.downloadable():
                    # download and extract file
                    for i in range(resource_list.__len__()):
                        file_name, url, md5 = resource_list[i]
                        print(
                            f'Download [{file_name}] from [{url}] to [{download_root}]'
                        )
                        utils.download_url(url=url,
                                           root=download_root,
                                           filename=file_name,
                                           md5=md5)
                else:
                    raise NotImplementedError(
                        f'This dataset can not be downloaded by SpikingJelly, '
                        f'please download files manually and put files at [{download_root}]. '
                        f'The resources file_name, url, and md5 are: \n{resource_list}'
                    )

            # We have downloaded files and checked files. Now, let us extract the files
            extract_root = os.path.join(root, 'extract')
            if os.path.exists(extract_root):
                print(
                    f'The directory [{extract_root}] for saving extracted files already exists.\n'
                    f'SpikingJelly will not check the data integrity of extracted files.\n'
                    f'If extracted files are not integrated, please delete [{extract_root}] manually, '
                    f'then SpikingJelly will re-extract files from [{download_root}].'
                )
                # shutil.rmtree(extract_root)
                # print(f'Delete [{extract_root}].')
            else:
                os.mkdir(extract_root)
                print(f'Mkdir [{extract_root}].')
                self.extract_downloaded_files(download_root, extract_root)

            # Now let us convert the origin binary files to npz files
            os.mkdir(events_np_root)
            print(f'Mkdir [{events_np_root}].')
            print(
                f'Start to convert the origin data from [{extract_root}] to [{events_np_root}] in np.ndarray format.'
            )
            self.create_events_np_files(extract_root, events_np_root)

        H, W = self.get_H_W()

        if data_type == 'event':
            _root = events_np_root
            _loader = np.load
            _transform = transform
            _target_transform = target_transform

        elif data_type == 'frame':
            if frames_number is not None:
                assert frames_number > 0 and isinstance(frames_number, int)
                assert split_by == 'time' or split_by == 'number'
                frames_np_root = os.path.join(
                    root, f'frames_number_{frames_number}_split_by_{split_by}')
                if os.path.exists(frames_np_root):
                    print(f'The directory [{frames_np_root}] already exists.')
                else:
                    os.mkdir(frames_np_root)
                    print(f'Mkdir [{frames_np_root}].')

                    # create the same directory structure
                    create_same_directory_structure(events_np_root,
                                                    frames_np_root)

                    # use multi-thread to accelerate
                    t_ckp = time.time()
                    with ThreadPoolExecutor(
                            max_workers=configure.
                            max_threads_number_for_datasets_preprocess) as tpe:
                        print(
                            f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].'
                        )
                        for e_root, e_dirs, e_files in os.walk(events_np_root):
                            if e_files.__len__() > 0:
                                output_dir = os.path.join(
                                    frames_np_root,
                                    os.path.relpath(e_root, events_np_root))
                                for e_file in e_files:
                                    events_np_file = os.path.join(
                                        e_root, e_file)
                                    print(
                                        f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].'
                                    )
                                    tpe.submit(
                                        integrate_events_file_to_frames_file_by_fixed_frames_number,
                                        self.load_events_np, events_np_file,
                                        output_dir, split_by, frames_number, H,
                                        W, True)

                    print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')

                _root = frames_np_root
                _loader = load_npz_frames
                _transform = transform
                _target_transform = target_transform

            elif duration is not None:
                assert duration > 0 and isinstance(duration, int)
                frames_np_root = os.path.join(root, f'duration_{duration}')
                if os.path.exists(frames_np_root):
                    print(f'The directory [{frames_np_root}] already exists.')

                else:
                    os.mkdir(frames_np_root)
                    print(f'Mkdir [{frames_np_root}].')
                    # create the same directory structure
                    create_same_directory_structure(events_np_root,
                                                    frames_np_root)
                    # use multi-thread to accelerate
                    t_ckp = time.time()
                    with ThreadPoolExecutor(
                            max_workers=configure.
                            max_threads_number_for_datasets_preprocess) as tpe:
                        print(
                            f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].'
                        )
                        for e_root, e_dirs, e_files in os.walk(events_np_root):
                            if e_files.__len__() > 0:
                                output_dir = os.path.join(
                                    frames_np_root,
                                    os.path.relpath(e_root, events_np_root))
                                for e_file in e_files:
                                    events_np_file = os.path.join(
                                        e_root, e_file)
                                    print(
                                        f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].'
                                    )
                                    tpe.submit(
                                        integrate_events_file_to_frames_file_by_fixed_duration,
                                        self.load_events_np, events_np_file,
                                        output_dir, duration, H, W, True)

                    print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')

                _root = frames_np_root
                _loader = load_npz_frames
                _transform = transform
                _target_transform = target_transform

            elif custom_integrate_function is not None:
                if custom_integrated_frames_dir_name is None:
                    custom_integrated_frames_dir_name = custom_integrate_function.__name__

                frames_np_root = os.path.join(
                    root, custom_integrated_frames_dir_name)
                if os.path.exists(frames_np_root):
                    print(f'The directory [{frames_np_root}] already exists.')
                else:
                    os.mkdir(frames_np_root)
                    print(f'Mkdir [{frames_np_root}].')
                    # create the same directory structure
                    create_same_directory_structure(events_np_root,
                                                    frames_np_root)
                    # use multi-thread to accelerate
                    t_ckp = time.time()
                    with ThreadPoolExecutor(
                            max_workers=configure.
                            max_threads_number_for_datasets_preprocess) as tpe:
                        print(
                            f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].'
                        )
                        for e_root, e_dirs, e_files in os.walk(events_np_root):
                            if e_files.__len__() > 0:
                                output_dir = os.path.join(
                                    frames_np_root,
                                    os.path.relpath(e_root, events_np_root))
                                for e_file in e_files:
                                    events_np_file = os.path.join(
                                        e_root, e_file)
                                    print(
                                        f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].'
                                    )
                                    tpe.submit(
                                        save_frames_to_npz_and_print,
                                        os.path.join(
                                            output_dir,
                                            os.path.basename(events_np_file)),
                                        custom_integrate_function(
                                            np.load(events_np_file), H, W))

                    print(f'Used time = [{round(time.time() - t_ckp, 2)}s].')

                _root = frames_np_root
                _loader = load_npz_frames
                _transform = transform
                _target_transform = target_transform

            else:
                raise ValueError(
                    'At least one of "frames_number", "duration" and "custom_integrate_function" should not be None.'
                )

        if train is not None:
            if train:
                _root = os.path.join(_root, 'train')
            else:
                _root = os.path.join(_root, 'test')

        super().__init__(root=_root,
                         loader=_loader,
                         extensions=('.npz', ),
                         transform=_transform,
                         target_transform=_target_transform)
Пример #15
0
def download_url(url,
                 root,
                 filename=None,
                 md5=None,
                 timeout=4,
                 retries=4,
                 verbose=False,
                 silent=False):
    """Download a file accessible via URL with mutiple retries

    Args:
        url (str or tuple<str, str>): URL to request
        root (pathlib.Path): folder where the file will be saved in
        filename (str, optional): name of the output file
        md5 (str, optional): md5 for integrity verification
        timeout (float, optional): number of seconds before the request times out
        retries (int, optional): number of additional allowed download attempts
        verbose (bool, optional): whether status can be displayed in console
        silent (bool, optional): whether Exception should be raised upon download failure
    """

    if isinstance(url, tuple):
        url, filename = url

    if not isinstance(url, str):
        raise TypeError('expected argument url to be of type <str>')

    # Root folder
    root = Path(root).expanduser()
    root.mkdir(parents=True, exist_ok=True)
    if not filename:
        filename = get_fname(url)

    fpath = root.joinpath(filename)

    # Download file
    if check_integrity(fpath, md5):
        if verbose:
            print(f'Using downloaded and verified file: {fpath}')
    else:
        success = False
        # Allow multiple retries
        for idx in range(retries + 1):
            try:
                url_retrieve(url, fpath, timeout)
                success = True
            except Exception as e:
                # Try switching to http
                if url.startswith('https'):
                    try:
                        url_retrieve(url.replace('https:', 'http:'), fpath,
                                     timeout)
                        success = True
                    except Exception:
                        success = False
                # Handle exception
                if not success and (idx == retries):
                    if not silent:
                        raise e
                    elif verbose:
                        print(e)
            if success:
                break
Пример #16
0
 def _check_integrity(self):
     root = os.path.join(self.root, self.dataset_name, self.processed_folder)
     md5 = self.split_list[self.split][2]
     fpath = os.path.join(root, self.filename)
     return check_integrity(fpath, md5)
Пример #17
0
 def _check_integrity(self):
     path_to_zip = os.path.join(self.root, self.filename)
     return check_integrity(path_to_zip, self.checksum)
Пример #18
0
 def _check_integrity(self):
     return check_integrity(fpath=str(self.download_root / self.filename),
                            md5=self.file_md5)
Пример #19
0
 def _check_integrity(self):
     for f, md5 in self.checklist:
         fpath = os.path.join(self.root, self.base_folder, f)
         if not check_integrity(fpath, md5):
             return False
     return True
Пример #20
0
 def parse_archives(self):
     if not check_integrity(os.path.join(self.root, META_FILE)):
         print("creating meta file...")
         parse_devkit_archive(self.root)
     print("parsing val archive...")
     parse_val_archive(self.root, disable=self.disable_parse_val)
Пример #21
0
def _verify_archive(root, file, md5):
    if not check_integrity(os.path.join(root, file), md5):
        msg = (
            "The archive {} is not present in the root directory or is corrupted. "
            "You need to download it externally and place it in {}.")
        raise RuntimeError(msg.format(file, root))
Пример #22
0
    def _check_integrity(self):
        for zip_path, zip_md5 in zip(self._zip_paths, self._zip_md5s):
            if not check_integrity(zip_path, zip_md5):
                return False

        return True
Пример #23
0
 def download(root):
     file_name = os.path.basename(URL)
     # check existence of zipped file
     if not check_integrity(os.path.join(root, file_name)):
         # if not exist, download to under root
         download_and_extract_archive(URL, root, md5=MD5)
Пример #24
0
 def _check_integrity(self):
     fpath = os.path.join(self.root, self.filename)
     if not check_integrity(fpath, self.tgz_md5):
         return False
     return True
Пример #25
0
 def _check_integrity(self):
     data_path = os.path.join(self.root, self.images_filename)
     labels_path = os.path.join(self.root, self.labels_filename)
     return (check_integrity(data_path, self.images_md5)
             and check_integrity(labels_path, self.labels_md5))
Пример #26
0
 def _check_integrity(self):
     return check_integrity(
         os.path.join(self.root, self.test_file),
         self.test_file_md5) and (not self.train or check_integrity(
             os.path.join(self.root, self.train_file), self.train_file_md5))
Пример #27
0
 def _check_integrity(self):
     root = self.root
     fpath = os.path.join(root, self.filename)
     if not check_integrity(fpath, self.md5_checksum):
         return False
     return True
Пример #28
0
 def _check_integrity(self):
     if not check_integrity(join(self.root, self.filename + '.zip'),
                            self.zips_md5[self.filename]):
         return False
     return True
    def __init__(self,
                 root,
                 fold="train",
                 transform=None,
                 target_transform=None,
                 download=False):

        fold = fold.lower()

        self.train = False
        self.test = False
        self.val = False

        if fold == "train":
            self.train = True
        elif fold == "test":
            self.test = True
        elif fold == "val":
            self.val = True
        else:
            raise RuntimeError("Not train-val-test")

        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform

        fpath = os.path.join(root, self.filename)
        print('fpath =', fpath)
        print('tgz_md5 =', self.tgz_md5)
        if not check_integrity(fpath, self.tgz_md5):
            raise RuntimeError('You have not placed the' +
                               ' cifar-100-cs543-python.tar.gz file in the' +
                               ' root folder you specified. Please check' +
                               ' and retry.')

        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' Download it and extract the file again.')

        # now load the picked numpy arrays
        if self.train or self.val:
            self.train_data = []
            self.train_labels = []
            for fentry in self.train_list:
                f = fentry[0]
                file = os.path.join(self.root, self.base_folder, f)
                fo = open(file, 'rb')
                if sys.version_info[0] == 2:
                    entry = pickle.load(fo)
                else:
                    entry = pickle.load(fo, encoding='latin1')
                self.train_data.append(entry['data'])
                if 'labels' in entry:
                    self.train_labels += entry['labels']
                else:
                    self.train_labels += entry['fine_labels']
                fo.close()

            self.train_data = np.concatenate(self.train_data)
            self.train_data = self.train_data.reshape((50000, 3, 32, 32))
            self.train_data = self.train_data.transpose(
                (0, 2, 3, 1))  # convert to HWC

            p = np.arange(0, 50000, 10)
            mask_train = np.ones((50000, ), dtype=bool)
            mask_train[p] = False
            mask_val = np.zeros((50000, ), dtype=bool)
            mask_val[p] = True

            copy_all_data = np.array(self.train_data)
            self.val_data = np.array(copy_all_data[mask_val])
            self.train_data = np.array(copy_all_data[mask_train])

            copy_all_labels = np.array(self.train_labels)
            self.val_labels = np.array(copy_all_labels[mask_val])
            self.train_labels = np.array(copy_all_labels[mask_train])

        elif self.test:
            f = self.test_list[0][0]
            file = os.path.join(self.root, self.base_folder, f)
            fo = open(file, 'rb')
            if sys.version_info[0] == 2:
                entry = pickle.load(fo)
            else:
                entry = pickle.load(fo, encoding='latin1')
            self.test_data = entry['data']

            if 'labels' in entry:
                self.test_labels = entry['labels']
            else:
                self.test_labels = entry['fine_labels']
            fo.close()
            self.test_data = self.test_data.reshape((10000, 3, 32, 32))
            self.test_data = self.test_data.transpose(
                (0, 2, 3, 1))  # convert to HWC
Пример #30
0
    def _check_integrity(self):
        for tar_path, tar_md5 in zip(self._tar_paths, self._tar_md5s):
            if not check_integrity(tar_path, tar_md5):
                return False

        return True