def test_reuse(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) r2 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r2, manager.StartReused) self.assertEqual(r1.info, r2.info) infos = manager.get_all() self.assertEqual(infos, [r1.info]) self._assert_live(r1.info, expected_logdir="./logs")
def test_launch_new_because_incompatible(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) r2 = manager.start(["--logdir=./adders", "--port=0"]) self.assertIsInstance(r2, manager.StartLaunched) self.assertNotEqual(r1.info.port, r2.info.port) self.assertNotEqual(r1.info.pid, r2.info.pid) infos = manager.get_all() self.assertItemsEqual(infos, [r1.info, r2.info]) self._assert_live(r1.info, expected_logdir="./logs") self._assert_live(r2.info, expected_logdir="./adders")
def test_tensorboard_binary_environment_variable(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") tempdir = tempfile.mkdtemp() filepath = os.path.join(tempdir, "tensorbad") program = textwrap.dedent(r""" #!/bin/sh printf >&2 'tensorbad: fatal: something bad happened\n' printf 'tensorbad: also some stdout\n' exit 77 """.lstrip()) with open(filepath, "w") as outfile: outfile.write(program) os.chmod(filepath, 0o777) environ = {"TENSORBOARD_BINARY": filepath} environ_patcher = mock.patch.dict(os.environ, environ) environ_patcher.start() self.addCleanup(environ_patcher.stop) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=77, stderr="tensorbad: fatal: something bad happened\n", stdout="tensorbad: also some stdout\n", ), ) self.assertEqual(manager.get_all(), [])
def test_timeout(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") tempdir = tempfile.mkdtemp() pid_file = os.path.join(tempdir, "pidfile") self._stub_tensorboard( name="wait-a-minute", program=textwrap.dedent( r""" #!/bin/sh printf >%s '%%s' "$$" printf >&2 'warn: I am tired\n' sleep 60 """.lstrip() % pipes.quote(os.path.realpath(pid_file)), ), ) start_result = manager.start( ["--logdir=./logs", "--port=0"], timeout=datetime.timedelta(seconds=1), ) self.assertIsInstance(start_result, manager.StartTimedOut) with open(pid_file) as infile: expected_pid = int(infile.read()) self.assertEqual(start_result, manager.StartTimedOut(pid=expected_pid)) self.assertEqual(manager.get_all(), [])
def test_exit_success(self): # TensorBoard exiting with success but not writing the info file is # still a failure to launch. if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-with-0", program=textwrap.dedent( r""" #!/bin/sh printf >&2 'info: something good happened\n' printf 'also some standard output\n' exit 0 """.lstrip(), ), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=0, stderr="info: something good happened\n", stdout="also some standard output\n", ), ) self.assertEqual(manager.get_all(), [])
def test_exit_failure(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-with-77", program=textwrap.dedent( r""" #!/bin/sh printf >&2 'fatal: something bad happened\n' printf 'also some stdout\n' exit 77 """.lstrip(), ), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=77, stderr="fatal: something bad happened\n", stdout="also some stdout\n", ), ) self.assertEqual(manager.get_all(), [])
def test_reuse_after_kill(self): if os.name == "nt": self.skipTest("Can't send SIGTERM or SIGINT on Windows.") r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) os.kill(r1.info.pid, signal.SIGTERM) os.waitpid(r1.info.pid, 0) r2 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r2, manager.StartLaunched) self.assertEqual(r1.info.cache_key, r2.info.cache_key) # It's not technically guaranteed by POSIX that the following holds, # but it will unless the OS preemptively recycles PIDs or we somehow # cycled exactly through the whole PID space. Neither Linux nor # macOS recycles PIDs, so we should be fine. self.assertNotEqual(r1.info.pid, r2.info.pid) self._assert_live(r2.info, expected_logdir="./logs")
def start(self, model_id): self._create_symlink_dir() ip = os.environ[server.IP_ENV_VAR] parsed_args = [ "--logdir", self.dest_path, "--reload_multifile", "true", "--bind_all" ] start_result = manager.start(parsed_args) if isinstance(start_result, manager.StartLaunched): path = 'http://{ip}:{port}'.format(ip=ip, port=start_result.info.port) return { 'model_id': model_id, 'path': path, 'port': start_result.info.port, 'dashboard_id': self.dashboard_ID, 'created_at': time.time(), 'pid': start_result.info.pid } else: message = ( "ERROR: Failed to launch TensorBoard (exited with %d).%s" % (start_result.exit_code, start_result.stderr)) logger.error('Failed to start Tensorboard: \n {}'.format(message)) return { 'model_id': model_id, 'dashboard_id': self.dashboard_ID, 'created_at': time.time(), 'error': message }
def test_exec_failure_with_explicit_binary(self): path = os.path.join(".", "non", "existent") self._patch_environ({"TENSORBOARD_BINARY": path}) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartExecFailed) self.assertEqual(start_result.os_error.errno, errno.ENOENT) self.assertEqual(start_result.explicit_binary, path)
def test_launch_new_because_info_file_deleted(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) # Now suppose that someone comes and wipes /tmp/... self.assertEqual(len(manager.get_all()), 1, manager.get_all()) shutil.rmtree(self.tmproot) os.mkdir(self.tmproot) self.assertEqual(len(manager.get_all()), 0, manager.get_all()) # ...so that starting even the same command forces a relaunch: r2 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r2, manager.StartLaunched) # (picked a new port) self.assertEqual(r1.info.cache_key, r2.info.cache_key) infos = manager.get_all() self.assertItemsEqual(infos, [r2.info]) self._assert_live(r1.info, expected_logdir="./logs") self._assert_live(r2.info, expected_logdir="./logs")
def test_exec_failure_with_no_explicit_binary(self): if os.name == "nt": # Can't use ENOENT without an absolute path (it's not treated as # an exec failure). self.skipTest("Not clear how to trigger this case on Windows.") self._patch_environ({"PATH": "nope"}) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartExecFailed) self.assertEqual(start_result.os_error.errno, errno.ENOENT) self.assertIs(start_result.explicit_binary, None)
def test_failure_unreadable_stdio(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-and-nuke-tmp", program=textwrap.dedent( r""" #!/bin/sh rm -r %s exit 22 """ % pipes.quote(self.tmproot), ).lstrip(), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed(exit_code=22, stderr=None, stdout=None,), ) self.assertEqual(manager.get_all(), [])
def test_simple_start(self): start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartLaunched) self._assert_live(start_result.info, expected_logdir="./logs")
def start(args_string): """Launch and display a TensorBoard instance as if at the command line. Args: args_string: Command-line arguments to TensorBoard, to be interpreted by `shlex.split`: e.g., "--logdir ./logs --port 0". Shell metacharacters are not supported: e.g., "--logdir 2>&1" will point the logdir at the literal directory named "2>&1". """ context = _get_context() try: import IPython import IPython.display except ImportError: IPython = None if context == _CONTEXT_NONE: handle = None print("Launching TensorBoard...") else: handle = IPython.display.display( IPython.display.Pretty("Launching TensorBoard..."), display_id=True, ) def print_or_update(message): if handle is None: print(message) else: handle.update(IPython.display.Pretty(message)) parsed_args = shlex.split(args_string, comments=True, posix=True) start_result = manager.start(parsed_args) if isinstance(start_result, manager.StartLaunched): _display( port=start_result.info.port, print_message=False, display_handle=handle, ) elif isinstance(start_result, manager.StartReused): template = ( "Reusing TensorBoard on port {port} (pid {pid}), started {delta} ago. " "(Use '!kill {pid}' to kill it.)") message = template.format( port=start_result.info.port, pid=start_result.info.pid, delta=_time_delta_from_info(start_result.info), ) print_or_update(message) _display( port=start_result.info.port, print_message=False, display_handle=None, ) elif isinstance(start_result, manager.StartFailed): def format_stream(name, value): if value == "": return "" elif value is None: return "\n<could not read %s>" % name else: return "\nContents of %s:\n%s" % (name, value.strip()) message = ( "ERROR: Failed to launch TensorBoard (exited with %d).%s%s" % ( start_result.exit_code, format_stream("stderr", start_result.stderr), format_stream("stdout", start_result.stdout), )) print_or_update(message) elif isinstance(start_result, manager.StartExecFailed): the_tensorboard_binary = ( "%r (set by the `TENSORBOARD_BINARY` environment variable)" % (start_result.explicit_binary, ) if start_result.explicit_binary is not None else "`tensorboard`") if start_result.os_error.errno == errno.ENOENT: message = ( "ERROR: Could not find %s. Please ensure that your PATH contains " "an executable `tensorboard` program, or explicitly specify the path " "to a TensorBoard binary by setting the `TENSORBOARD_BINARY` " "environment variable." % (the_tensorboard_binary, )) else: message = "ERROR: Failed to start %s: %s" % ( the_tensorboard_binary, start_result.os_error, ) print_or_update(textwrap.fill(message)) elif isinstance(start_result, manager.StartTimedOut): message = ("ERROR: Timed out waiting for TensorBoard to start. " "It may still be running as pid %d." % start_result.pid) print_or_update(message) else: raise TypeError("Unexpected result from `manager.start`: %r.\n" "This is a TensorBoard bug; please report it." % start_result)
def start(args_string): """Launch and display a TensorBoard instance as if at the command line. Args: args_string: Command-line arguments to TensorBoard, to be interpreted by `shlex.split`: e.g., "--logdir ./logs --port 0". Shell metacharacters are not supported: e.g., "--logdir 2>&1" will point the logdir at the literal directory named "2>&1". """ context = _get_context() try: import IPython import IPython.display except ImportError: IPython = None print('context', context) if context == _CONTEXT_NONE: handle = None print("Launching TensorBoard...") else: handle = IPython.display.display( IPython.display.Pretty("Launching TensorBoard..."), display_id=True, ) def print_or_update(message): if handle is None: print(message) else: handle.update(IPython.display.Pretty(message)) parsed_args = shlex.split(args_string, comments=True, posix=True) parsed_args = shlex.split(args_string, comments=True, posix=True) tb_url = parsed_args[-1] print('tb_url', tb_url) service_url = tb_url parsed_args.pop() print('parsed_args', parsed_args) start_result = manager.start(parsed_args) if isinstance(start_result, manager.StartLaunched): print('is instance 1') _display( service_url=service_url, port=start_result.info.port, print_message=False, display_handle=handle, ) elif isinstance(start_result, manager.StartReused): print('is instance 2') template = ( "Reusing TensorBoard on port {port} (pid {pid}), started {delta} ago. " "(Use '!kill {pid}' to kill it.)") message = template.format( port=start_result.info.port, pid=start_result.info.pid, delta=_time_delta_from_info(start_result.info), ) print_or_update(message) _display( service_url=service_url, port=start_result.info.port, print_message=False, display_handle=None, ) elif isinstance(start_result, manager.StartFailed): print('is instance 3') def format_stream(name, value): if value == "": return "" elif value is None: return "\n<could not read %s>" % name else: return "\nContents of %s:\n%s" % (name, value.strip()) message = ( "ERROR: Failed to launch TensorBoard (exited with %d).%s%s" % ( start_result.exit_code, format_stream("stderr", start_result.stderr), format_stream("stdout", start_result.stdout), )) print_or_update(message) elif isinstance(start_result, manager.StartTimedOut): print('is instance 4') message = ("ERROR: Timed out waiting for TensorBoard to start. " "It may still be running as pid %d." % start_result.pid) print_or_update(message) else: raise TypeError("Unexpected result from `manager.start`: %r.\n" "This is a TensorBoard bug; please report it." % start_result)