def setup_tensorboard(args): from tensorboard import notebook, manager import signal import shutil # Kill tensorboard for info in manager.get_all(): data_source = manager.data_source_from_info(info) print(f"port {info.port}: {data_source} (pid {info.pid})") if data_source == "logdir {args.output_dir}": pid = info.pid logger.info(f"Killing tensorboard at pid: {pid}") os.kill(pid, signal.SIGKILL) break # Delete output directory if os.path.exists(args.output_dir): logger.info(f"Deleting {args.output_dir}") shutil.rmtree(args.output_dir) logger.info(f"Creating {args.output_dir}") os.makedirs(args.output_dir) # Start notebook notebook.start(f"--logdir {args.output_dir}") # Kill tensorboard for info in manager.get_all(): data_source = manager.data_source_from_info(info) print(f"port {info.port}: {data_source} (pid {info.pid})") if data_source == "logdir {args.output_dir}": port = info.port print() notebook.display(port=port, height=1000) break
def _display(host=None, port=None, height=None, print_message=False, display_handle=None): """Internal version of `display`. Args: port: As with `display`. height: As with `display`. print_message: True to print which TensorBoard instance was selected for display (if applicable), or False otherwise. display_handle: If not None, an IPython display handle into which to render TensorBoard. """ if height is None: height = 800 if host is None: host = "localhost" if port is None: infos = manager.get_all() if not infos: raise ValueError( "Can't display TensorBoard: no known instances running.") else: info = max(manager.get_all(), key=lambda x: x.start_time) port = info.port else: infos = [i for i in manager.get_all() if i.port == port] info = (max(infos, key=lambda x: x.start_time) if infos else None) if print_message: if info is not None: message = ( "Selecting TensorBoard with {data_source} " "(started {delta} ago; port {port}, pid {pid}).").format( data_source=manager.data_source_from_info(info), delta=_time_delta_from_info(info), port=info.port, pid=info.pid, ) print(message) else: # The user explicitly provided a port, and we don't have any # additional information. There's nothing useful to say. pass fn = { _CONTEXT_COLAB: _display_colab, _CONTEXT_IPYTHON: _display_ipython, _CONTEXT_NONE: _display_cli, }[_get_context()] return fn(host=host, port=port, height=height, display_handle=display_handle)
def test_tensorboard_binary_environment_variable(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") tempdir = tempfile.mkdtemp() filepath = os.path.join(tempdir, "tensorbad") program = textwrap.dedent(r""" #!/bin/sh printf >&2 'tensorbad: fatal: something bad happened\n' printf 'tensorbad: also some stdout\n' exit 77 """.lstrip()) with open(filepath, "w") as outfile: outfile.write(program) os.chmod(filepath, 0o777) environ = {"TENSORBOARD_BINARY": filepath} environ_patcher = mock.patch.dict(os.environ, environ) environ_patcher.start() self.addCleanup(environ_patcher.stop) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=77, stderr="tensorbad: fatal: something bad happened\n", stdout="tensorbad: also some stdout\n", ), ) self.assertEqual(manager.get_all(), [])
def test_timeout(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") tempdir = tempfile.mkdtemp() pid_file = os.path.join(tempdir, "pidfile") self._stub_tensorboard( name="wait-a-minute", program=textwrap.dedent( r""" #!/bin/sh printf >%s '%%s' "$$" printf >&2 'warn: I am tired\n' sleep 60 """.lstrip() % pipes.quote(os.path.realpath(pid_file)), ), ) start_result = manager.start( ["--logdir=./logs", "--port=0"], timeout=datetime.timedelta(seconds=1), ) self.assertIsInstance(start_result, manager.StartTimedOut) with open(pid_file) as infile: expected_pid = int(infile.read()) self.assertEqual(start_result, manager.StartTimedOut(pid=expected_pid)) self.assertEqual(manager.get_all(), [])
def test_exit_success(self): # TensorBoard exiting with success but not writing the info file is # still a failure to launch. if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-with-0", program=textwrap.dedent( r""" #!/bin/sh printf >&2 'info: something good happened\n' printf 'also some standard output\n' exit 0 """.lstrip(), ), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=0, stderr="info: something good happened\n", stdout="also some standard output\n", ), ) self.assertEqual(manager.get_all(), [])
def test_exit_failure(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-with-77", program=textwrap.dedent( r""" #!/bin/sh printf >&2 'fatal: something bad happened\n' printf 'also some stdout\n' exit 77 """.lstrip(), ), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed( exit_code=77, stderr="fatal: something bad happened\n", stdout="also some stdout\n", ), ) self.assertEqual(manager.get_all(), [])
def list(): """Print a listing of known running TensorBoard instances. TensorBoard instances that were killed uncleanly (e.g., with SIGKILL or SIGQUIT) may appear in this list even if they are no longer running. Conversely, this list may be missing some entries if your operating system's temporary directory has been cleared since a still-running TensorBoard instance started. """ infos = manager.get_all() if not infos: print("No known TensorBoard instances running.") return print("Known TensorBoard instances:") for info in infos: template = ( " - port {port}: {data_source} (started {delta} ago; pid {pid})") print( template.format( port=info.port, data_source=manager.data_source_from_info(info), delta=_time_delta_from_info(info), pid=info.pid, ))
def test_launch_new_because_info_file_deleted(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) # Now suppose that someone comes and wipes /tmp/... self.assertEqual(len(manager.get_all()), 1, manager.get_all()) shutil.rmtree(self.tmproot) os.mkdir(self.tmproot) self.assertEqual(len(manager.get_all()), 0, manager.get_all()) # ...so that starting even the same command forces a relaunch: r2 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r2, manager.StartLaunched) # (picked a new port) self.assertEqual(r1.info.cache_key, r2.info.cache_key) infos = manager.get_all() self.assertItemsEqual(infos, [r2.info]) self._assert_live(r1.info, expected_logdir="./logs") self._assert_live(r2.info, expected_logdir="./logs")
def test_reuse(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) r2 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r2, manager.StartReused) self.assertEqual(r1.info, r2.info) infos = manager.get_all() self.assertEqual(infos, [r1.info]) self._assert_live(r1.info, expected_logdir="./logs")
def test_launch_new_because_incompatible(self): r1 = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(r1, manager.StartLaunched) r2 = manager.start(["--logdir=./adders", "--port=0"]) self.assertIsInstance(r2, manager.StartLaunched) self.assertNotEqual(r1.info.port, r2.info.port) self.assertNotEqual(r1.info.pid, r2.info.pid) infos = manager.get_all() self.assertItemsEqual(infos, [r1.info, r2.info]) self._assert_live(r1.info, expected_logdir="./logs") self._assert_live(r2.info, expected_logdir="./adders")
def test_get_all_ignores_bad_files(self): with open(os.path.join(self.info_dir, "pid-1234.info"), "w") as outfile: outfile.write("good luck parsing this\n") with open(os.path.join(self.info_dir, "pid-5678.info"), "w") as outfile: outfile.write('{"valid_json":"yes","valid_tbinfo":"no"}\n') with open(os.path.join(self.info_dir, "pid-9012.info"), "w") as outfile: outfile.write('if a tbinfo has st_mode==0, does it make a sound?\n') os.chmod(os.path.join(self.info_dir, "pid-9012.info"), 0o000) with mock.patch.object(tb_logging.get_logger(), "warning") as fn: self.assertEqual(manager.get_all(), []) self.assertEqual(fn.call_count, 2) # 2 invalid, 1 unreadable (silent)
def test_get_all(self): def add_info(i): with mock.patch("os.getpid", lambda: 76540 + i): manager.write_info_file(_make_info(i)) def remove_info(i): with mock.patch("os.getpid", lambda: 76540 + i): manager.remove_info_file() self.assertItemsEqual(manager.get_all(), []) add_info(1) self.assertItemsEqual(manager.get_all(), [_make_info(1)]) add_info(2) self.assertItemsEqual(manager.get_all(), [_make_info(1), _make_info(2)]) remove_info(1) self.assertItemsEqual(manager.get_all(), [_make_info(2)]) add_info(3) self.assertItemsEqual(manager.get_all(), [_make_info(2), _make_info(3)]) remove_info(3) self.assertItemsEqual(manager.get_all(), [_make_info(2)]) remove_info(2) self.assertItemsEqual(manager.get_all(), [])
def test_failure_unreadable_stdio(self): if os.name == "nt": # TODO(@wchargin): This could in principle work on Windows. self.skipTest("Requires a POSIX shell for the stub script.") self._stub_tensorboard( name="fail-and-nuke-tmp", program=textwrap.dedent( r""" #!/bin/sh rm -r %s exit 22 """ % pipes.quote(self.tmproot), ).lstrip(), ) start_result = manager.start(["--logdir=./logs", "--port=0"]) self.assertIsInstance(start_result, manager.StartFailed) self.assertEqual( start_result, manager.StartFailed(exit_code=22, stderr=None, stdout=None,), ) self.assertEqual(manager.get_all(), [])