示例#1
0
  def _SetPath(self, path):
    """Sets the current path to watch for new events.

    This also records the size of the old path, if any. If the size can't be
    found, an error is logged.

    Args:
      path: The full path of the file to watch.
    """
    old_path = self._path
    if old_path and not io_wrapper.IsCloudPath(old_path):
      try:
        # We're done with the path, so store its size.
        size = tf.io.gfile.stat(old_path).length
        logger.debug('Setting latest size of %s to %d', old_path, size)
        self._finalized_sizes[old_path] = size
      except tf.errors.OpError as e:
        logger.error('Unable to get size of %s: %s', old_path, e)

    self._path = path
    self._loader = self._loader_factory(path)
    def _GetNextPath(self):
        """Gets the next path to load from.

        This function also does the checking for out-of-order writes as it iterates
        through the paths.

        Returns:
          The next path to load events from, or None if there are no more paths.
        """
        paths = sorted(
            path for path in io_wrapper.ListDirectoryAbsolute(self._directory)
            if self._path_filter(path))
        if not paths:
            return None

        if self._path is None:
            return paths[0]

        # Don't bother checking if the paths are GCS (which we can't check) or if
        # we've already detected an OOO write.
        if (not io_wrapper.IsCloudPath(paths[0])
                and not self._ooo_writes_detected):
            # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes.
            current_path_index = bisect.bisect_left(paths, self._path)
            ooo_check_start = max(
                0, current_path_index - self._OOO_WRITE_CHECK_COUNT)
            for path in paths[ooo_check_start:current_path_index]:
                if self._HasOOOWrite(path):
                    self._ooo_writes_detected = True
                    break

        next_paths = list(path for path in paths
                          if self._path is None or path > self._path)
        if next_paths:
            return min(next_paths)
        else:
            return None
示例#3
0
 def testIsCloudPathLocalIsFalse(self):
     self.assertFalse(io_wrapper.IsCloudPath("/tmp/foo"))
示例#4
0
 def testIsCloudPathFileIsFalse(self):
     self.assertFalse(io_wrapper.IsCloudPath("file:///tmp/foo"))
示例#5
0
 def testIsCloudPathCnsIsTrue(self):
     self.assertTrue(io_wrapper.IsCloudPath("/cns/foo/bar"))
示例#6
0
 def testIsCloudPathS3IsTrue(self):
     self.assertTrue(io_wrapper.IsCloudPath("s3://bucket/foo"))
示例#7
0
 def testIsCloudPathGcsIsTrue(self):
     self.assertTrue(io_wrapper.IsCloudPath("gs://bucket/foo"))