def visualize(hdfs_root_logdir): """ Visualize all TensorBoard events for a given path in HopsFS. This is intended for use after running TensorFlow jobs to visualize them all in the same TensorBoard. tflauncher.launch returns the path in HopsFS which should be handed as argument for this method to visualize all runs. Args: :hdfs_root_logdir: the path in HopsFS to enter as the logdir for TensorBoard """ sc = util._find_spark().sparkContext app_id = str(sc.applicationId) pypath = os.getenv("PYSPARK_PYTHON") logdir = os.getcwd() + '/tensorboard_events/' if os.path.exists(logdir): shutil.rmtree(logdir) os.makedirs(logdir) else: os.makedirs(logdir) #find free port tb_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tb_socket.bind(('', 0)) tb_addr, tb_port = tb_socket.getsockname() tb_path = util._find_tensorboard() tb_socket.close() tb_env = os.environ.copy() tb_env['CUDA_VISIBLE_DEVICES'] = '' tb_env['LC_ALL'] = 'C' tb_proc = subprocess.Popen([ pypath, tb_path, "--logdir=%s" % logdir, "--port=%d" % tb_port, "--host=%s" % "0.0.0.0" ], env=tb_env, preexec_fn=util._on_executor_exit('SIGTERM')) host = socket.gethostname() tb_url = "http://{0}:{1}".format(host, tb_port) tb_endpoint = hopshdfs._get_experiments_dir( ) + "/" + app_id + "/TensorBoard.visualize" #dump tb host:port to hdfs pydoop.hdfs.dump(tb_url, tb_endpoint, user=hopshdfs.project_user()) handle = hopshdfs.get() hdfs_logdir_entries = handle.list_directory(hdfs_root_logdir) for entry in hdfs_logdir_entries: file_name, extension = splitext(entry['name']) if not extension == '.log': pydoop.hdfs.get(entry['name'], logdir) tb_proc.wait() stdout, stderr = tb_proc.communicate() print(stdout) print(stderr)
def _register(hdfs_exec_dir, endpoint_dir, exec_num, local_logdir=False): """ Args: hdfs_exec_dir: endpoint_dir: exec_num: local_logdir: Returns: """ global tb_pid if tb_pid != 0: subprocess.Popen(["kill", str(tb_pid)]) _reset_global() global events_logdir events_logdir = hdfs_exec_dir global local_logdir_bool local_logdir_bool = local_logdir if tb_pid == 0: global pypath pypath = os.getenv("PYSPARK_PYTHON") #find free port tb_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tb_socket.bind(('',0)) global tb_port tb_addr, tb_port = tb_socket.getsockname() global tb_path tb_path = util._find_tensorboard() tb_socket.close() tb_env = os.environ.copy() tb_env['CUDA_VISIBLE_DEVICES'] = '' tb_proc = None global local_logdir_path if local_logdir: local_logdir_path = os.getcwd() + '/local_logdir' if os.path.exists(local_logdir_path): shutil.rmtree(local_logdir_path) os.makedirs(local_logdir_path) else: os.makedirs(local_logdir_path) local_logdir_path = local_logdir_path + '/' tb_proc = subprocess.Popen([pypath, tb_path, "--logdir=%s" % local_logdir_path, "--port=%d" % tb_port, "--host=%s" % "0.0.0.0"], env=tb_env, preexec_fn=util._on_executor_exit('SIGTERM')) else: tb_proc = subprocess.Popen([pypath, tb_path, "--logdir=%s" % events_logdir, "--port=%d" % tb_port, "--host=%s" % "0.0.0.0"], env=tb_env, preexec_fn=util._on_executor_exit('SIGTERM')) tb_pid = tb_proc.pid host = socket.gethostname() global tb_url tb_url = "http://{0}:{1}".format(host, tb_port) global endpoint endpoint = endpoint_dir + "/TensorBoard.task" + str(exec_num) #dump tb host:port to hdfs pydoop.hdfs.dump(tb_url, endpoint, user=hopshdfs.project_user()) return endpoint, tb_pid