def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval): """Starts a thread to automatically reload the given multiplexer. The thread will reload the multiplexer by calling `ReloadMultiplexer` every `load_interval` seconds, starting immediately. Args: multiplexer: The `EventMultiplexer` to add runs to and reload. path_to_run: A dict mapping from paths to run names, where `None` as the run name is interpreted as a run name equal to the path. load_interval: How many seconds to wait after one load before starting the next load. Returns: A started `threading.Thread` that reloads the multiplexer. """ # We don't call multiplexer.Reload() here because that would make # AddRunsFromDirectory block until the runs have all loaded. for path in path_to_run.keys(): if gcs.IsGCSPath(path): gcs.CheckIsSupported() logging.info( 'Assuming %s is intended to be a Google Cloud Storage path because ' 'it starts with %s. If it isn\'t, prefix it with \'/.\' (i.e., use ' '/.%s instead)', path, gcs.PATH_PREFIX, path) def _ReloadForever(): while True: ReloadMultiplexer(multiplexer, path_to_run) time.sleep(load_interval) thread = threading.Thread(target=_ReloadForever) thread.daemon = True thread.start() return thread
def ListDirectoryAbsolute(directory): """Yields all files in the given directory. The paths are absolute.""" if gcs.IsGCSPath(directory): return gcs.ListDirectory(directory) else: return (os.path.join(directory, path) for path in gfile.ListDirectory(directory))
def _SetPath(self, path): old_path = self._path if old_path and not gcs.IsGCSPath(old_path): # We're done with the path, so store its size. size = io_wrapper.Size(old_path) logging.debug('Setting latest size of %s to %d', old_path, size) self._finalized_sizes[old_path] = size self._path = path self._loader = self._loader_factory(path)
def ParseEventFilesSpec(logdir): """Parses `logdir` into a map from paths to run group names. The events files flag format is a comma-separated list of path specifications. A path specification either looks like 'group_name:/path/to/directory' or '/path/to/directory'; in the latter case, the group is unnamed. Group names cannot start with a forward slash: /foo:bar/baz will be interpreted as a spec with no name and path '/foo:bar/baz'. Globs are not supported. Args: logdir: A comma-separated list of run specifications. Returns: A dict mapping directory paths to names like {'/path/to/directory': 'name'}. Groups without an explicit name are named after their path. If logdir is None, returns an empty dict, which is helpful for testing things that don't require any valid runs. """ files = {} if logdir is None: return files for specification in logdir.split(','): # If it's a gcs path, don't split on colon if gcs.IsGCSPath(specification): run_name = None path = specification # If the spec looks like /foo:bar/baz, then we assume it's a path with a # colon. elif ':' in specification and specification[0] != '/': # We split at most once so run_name:/path:with/a/colon will work. run_name, _, path = specification.partition(':') else: run_name = None path = specification if not os.path.isabs(path) and not gcs.IsGCSPath(path): # Create absolute path out of relative one. path = os.path.join(os.path.realpath('.'), path) files[path] = run_name return files
def CreateFileLoader(path): """Creates a file loader for the given path. Args: path: A string representing either a normal path or a GCS Returns: An object with a Load() method that yields event_pb2.Event protos. """ if gcs.IsGCSPath(path): return gcs_file_loader.GCSFileLoader(path) else: return event_file_loader.EventFileLoader(path)
def AddRunsFromDirectory(self, path, name=None): """Load runs from a directory; recursively walks subdirectories. If path doesn't exist, no-op. This ensures that it is safe to call `AddRunsFromDirectory` multiple times, even before the directory is made. If path is a directory, load event files in the directory (if any exist) and recursively call AddRunsFromDirectory on any subdirectories. This mean you can call AddRunsFromDirectory at the root of a tree of event logs and TensorBoard will load them all. If the `EventMultiplexer` is already loaded this will cause the newly created accumulators to `Reload()`. Args: path: A string path to a directory to load runs from. name: Optionally, what name to apply to the runs. If name is provided and the directory contains run subdirectories, the name of each subrun is the concatenation of the parent name and the subdirectory name. If name is provided and the directory contains event files, then a run is added called "name" and with the events from the path. Raises: ValueError: If the path exists and isn't a directory. Returns: The `EventMultiplexer`. """ subdirs = [] if gcs.IsGCSPath(path): subdirs = [ subdir for (subdir, files) in gcs.ListRecursively(path) if list( filter(event_accumulator.IsTensorFlowEventsFile, files)) ] else: if not gfile.Exists(path): return # Maybe it hasn't been created yet, fail silently to retry later if not gfile.IsDirectory(path): raise ValueError( 'AddRunsFromDirectory: path exists and is not a ' 'directory, %s' % path) subdirs = [ subdir for (subdir, _, files) in gfile.Walk(path) if list( filter(event_accumulator.IsTensorFlowEventsFile, files)) ] for subdir in subdirs: logging.info('Adding events from directory %s', subdir) rpath = os.path.relpath(subdir, path) subname = os.path.join(name, rpath) if name else rpath self.AddRun(subdir, name=subname) return self
def _GeneratorFromPath(path): """Create an event generator for file or directory at given path string.""" if gcs.IsGCSPath(path): provider = directory_watcher.SequentialGCSProvider( path, path_filter=IsTensorFlowEventsFile) return directory_watcher.DirectoryWatcher( provider, gcs_file_loader.GCSFileLoader) elif gfile.IsDirectory(path): provider = directory_watcher.SequentialGFileProvider( path, path_filter=IsTensorFlowEventsFile) return directory_watcher.DirectoryWatcher( provider, event_file_loader.EventFileLoader) else: return event_file_loader.EventFileLoader(path)
def ListRecursively(top): """Walks a directory tree, yielding (dir_path, file_paths) tuples. For each of `top` and its subdirectories, yields a tuple containing the path to the directory and the path to each of the contained files. Note that unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file paths are all absolute. If the directory does not exist, this yields nothing. Args: top: A path to a directory.. Yields: A list of (dir_path, file_paths) tuples. """ if gcs.IsGCSPath(top): for x in gcs.ListRecursively(top): yield x else: for dir_path, _, filenames in gfile.Walk(top): yield (dir_path, (os.path.join(dir_path, filename) for filename in filenames))
def _SetPath(self, path): """Sets the current path to watch for new events. This also records the size of the old path, if any. If the size can't be found, an error is logged. Args: path: The full path of the file to watch. """ old_path = self._path if old_path and not gcs.IsGCSPath(old_path): try: # We're done with the path, so store its size. size = io_wrapper.Size(old_path) logging.debug('Setting latest size of %s to %d', old_path, size) self._finalized_sizes[old_path] = size except errors.OpError as e: logging.error('Unable to get size of %s: %s', old_path, e) self._path = path self._loader = self._loader_factory(path)
def _GetNextPath(self): """Gets the next path to load from. This function also does the checking for out-of-order writes as it iterates through the paths. Returns: The next path to load events from, or None if there are no more paths. """ paths = sorted( path for path in io_wrapper.ListDirectoryAbsolute(self._directory) if self._path_filter(path)) if not paths: return None if self._path is None: return paths[0] # Don't bother checking if the paths are GCS (which we can't check) or if # we've already detected an OOO write. if not gcs.IsGCSPath(paths[0]) and not self._ooo_writes_detected: # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes. current_path_index = bisect.bisect_left(paths, self._path) ooo_check_start = max( 0, current_path_index - self._OOO_WRITE_CHECK_COUNT) for path in paths[ooo_check_start:current_path_index]: if self._HasOOOWrite(path): self._ooo_writes_detected = True break next_paths = list(path for path in paths if self._path is None or path > self._path) if next_paths: return min(next_paths) else: return None
def Size(path): """Returns the number of bytes in the given file. Doesn't work on GCS.""" if gcs.IsGCSPath(path): raise NotImplementedError("io_wrapper.Size doesn't support GCS paths") else: return gfile.Open(path).Size()
def Exists(path): if gcs.IsGCSPath(path): return gcs.Exists(path) else: return gfile.Exists(path)
def IsDirectory(path): """Returns true if path exists and is a directory.""" if gcs.IsGCSPath(path): return gcs.IsDirectory(path) else: return gfile.IsDirectory(path)
def __init__(self, gcs_path): if not gcs.IsGCSPath(gcs_path): raise ValueError('A GCS path is required') self._gcs_path = gcs_path self._gcs_offset = 0