Пример #1
0
 def __init__(self, data, transform, cache_num=sys.maxsize, cache_rate=1.0, num_workers=0):
     """
     Args:
         data (Iterable): input data to load and transform to generate dataset for model.
         transform (Callable): transforms to execute operations on input data.
         cache_num (int): number of items to be cached. Default is `sys.maxsize`.
             will take the minimum of (cache_num, data_length x cache_rate, data_length).
         cache_rate (float): percentage of cached data in total, default is 1.0 (cache all).
             will take the minimum of (cache_num, data_length x cache_rate, data_length).
         num_workers (int): the number of worker threads to use.
             If 0 a single thread will be used. Default is 0.
     """
     if not isinstance(transform, Compose):
         transform = Compose(transform)
     super().__init__(data, transform)
     self.cache_num = min(cache_num, int(len(self) * cache_rate), len(self))
     if self.cache_num > 0:
         self._cache = [None] * self.cache_num
         print("Load and cache transformed data...")
         if num_workers > 0:
             self._item_processed = 0
             self._thread_lock = threading.Lock()
             with ThreadPool(num_workers) as p:
                 p.map(
                     self._load_cache_item_thread,
                     [(i, data[i], transform.transforms) for i in range(self.cache_num)],
                 )
         else:
             for i in range(self.cache_num):
                 self._cache[i] = self._load_cache_item(data[i], transform.transforms)
                 process_bar(i + 1, self.cache_num)
Пример #2
0
 def __init__(self, data, transform, cache_num=sys.maxsize, cache_rate=1.0):
     """
     Args:
         data (Iterable): input data to load and transform to generate dataset for model.
         transform (Callable): transforms to execute operations on input data.
         cache_num (int): number of items to be cached. Default is `sys.maxsize`.
             will take the minimum of (cache_num, data_length x cache_rate, data_length).
         cache_rate (float): percentage of cached data in total, default is 1.0 (cache all).
             will take the minimum of (cache_num, data_length x cache_rate, data_length).
     """
     if not isinstance(transform, Compose):
         transform = Compose(transform)
     super().__init__(data, transform)
     self.cache_num = min(cache_num, int(len(self) * cache_rate), len(self))
     self._cache = list()
     print('Load and cache transformed data...')
     for i in range(self.cache_num):
         process_bar(i + 1, self.cache_num)
         item = data[i]
         for _transform in transform.transforms:
             # execute all the deterministic transforms before the first random transform
             if isinstance(_transform, Randomizable):
                 break
             item = apply_transform(_transform, item)
         self._cache.append(item)
Пример #3
0
 def _load_cache_item_thread(self, args):
     i, item, transforms = args
     self._cache[i] = self._load_cache_item(item, transforms)
     with self._thread_lock:
         self._item_processed += 1
         process_bar(self._item_processed, self.cache_num)
Пример #4
0
 def _process_hook(blocknum, blocksize, totalsize):
     process_bar(blocknum * blocksize, totalsize)