示例#1
0
    def __init__(self,
                 cache_dir,
                 labels,
                 is_cache_complete=None,
                 coder=SafeFastPrimitivesCoder()):
        self._cache_dir = cache_dir
        self._coder = coder
        self._labels = labels
        self._path = os.path.join(self._cache_dir, *self._labels)
        self._is_cache_complete = (is_cache_complete
                                   if is_cache_complete else lambda _: True)

        from apache_beam.runners.interactive.pipeline_instrument import CacheKey
        self._pipeline_id = CacheKey.from_str(labels[-1]).pipeline_id
 def _wait_until_file_exists(self, timeout_secs=30):
   """Blocks until the file exists for a maximum of timeout_secs.
   """
   # Wait for up to `timeout_secs` for the file to be available.
   start = time.time()
   while not os.path.exists(self._path):
     time.sleep(1)
     if time.time() - start > timeout_secs:
       from apache_beam.runners.interactive.pipeline_instrument import CacheKey
       pcollection_var = CacheKey.from_str(self._labels[-1]).var
       raise RuntimeError(
           'Timed out waiting for cache file for PCollection `{}` to be '
           'available with path {}.'.format(pcollection_var, self._path))
   return open(self._path, mode='rb')
示例#3
0
    def __init__(self,
                 cache_dir,
                 labels,
                 is_cache_complete=None,
                 coder=None,
                 limiters=None):
        if not coder:
            coder = SafeFastPrimitivesCoder()

        if not is_cache_complete:
            is_cache_complete = lambda _: True

        if not limiters:
            limiters = []

        self._cache_dir = cache_dir
        self._coder = coder
        self._labels = labels
        self._path = os.path.join(self._cache_dir, *self._labels)
        self._is_cache_complete = is_cache_complete
        self._limiters = limiters

        from apache_beam.runners.interactive.pipeline_instrument import CacheKey
        self._pipeline_id = CacheKey.from_str(labels[-1]).pipeline_id