def add_cmd(title=None, xt_cmd=None, silent=False): title = file_utils.fix_slashes(title) xt_cmd = file_utils.fix_slashes(xt_cmd) cmd = {"title": title, "xt_cmd": xt_cmd, "silent": silent} cmds.append(cmd)
def compare_json_values(self, name, valuea, valuex): if isinstance(valuea, dict): self.compare_dicts(name, valuea, valuex) elif isinstance(valuea, (list, tuple)): self.compare_lists(name, valuea, valuex) elif isinstance(valuea, str): if name in ["target_file", "cmd_parts", "cmds"]: valuea = file_utils.fix_slashes(valuea) valuex = file_utils.fix_slashes(valuex) self.compare_text(name, valuea, valuex) else: self._assert_match(name, valuea, valuex)
def __init__(self, store, run_dir, mirror_dest, wildcard_path, grok_url, ws_name, run_name): # path = '.' # wildcard = "*.tfevents.*" self.run_dir = run_dir wildcard_path = os.path.expanduser(wildcard_path) wildcard_path = wildcard_path.replace("\\", "/") if not wildcard_path.startswith("/"): wildcard_path = os.path.join(run_dir, wildcard_path) if "*" in wildcard_path: path = os.path.dirname(wildcard_path) wildcard = os.path.basename(wildcard_path) else: path = wildcard_path wildcard = None path = file_utils.fix_slashes(path) console.print("MirrorWorker: path={}, wildcard={}".format( path, wildcard)) # in case program will create dir, but it hasn't yet been created file_utils.ensure_dir_exists(path) self.event_handler = MyHandler(store, mirror_dest, grok_url, ws_name, run_name, path, wildcard) self.observer = Observer() self.observer.schedule(self.event_handler, path, recursive=True)
def add_first_cmds(self, cmds, script_name, change_dir): self.append(cmds, 'echo ----- START of XT-level processing -----') self.append(cmds, "echo running: " + script_name) self.append( cmds, 'echo initial cwd: {}'.format( "%cd%" if self.is_windows else "$PWD")) cwd = utils.get_controller_cwd(self.is_windows, False) cwd = file_utils.fix_slashes(cwd, is_linux=not self.is_windows) if self.is_windows: self.append(cmds, 'echo 1st ARG, node_id= %1%') self.append(cmds, 'echo 2nd ARG, run_name= %2%') self.append(cmds, "mkdir {} 2>nul".format(cwd), echo_before=True) else: # echo commands as they are executed self.append(cmds, 'set -x') self.append(cmds, 'echo 1st ARG, node_id= $1') self.append(cmds, 'echo 2nd ARG, run_name= $2') self.append(cmds, "mkdir {} -p".format(cwd), echo_before=True) if change_dir: self.append(cmds, "cd {}".format(cwd), echo_before=True) self.append( cmds, 'echo after cd, cwd: {}'.format( "%cd%" if self.is_windows else "$PWD"))
def __init__(self, wildcard_path): # path = '.' # wildcard = "*.tfevents.*" wildcard_path = os.path.expanduser(wildcard_path) wildcard_path = wildcard_path.replace("\\", "/") if "*" in wildcard_path: path = os.path.dirname(wildcard_path) wildcard = os.path.basename(wildcard_path) else: path = wildcard_path wildcard = None path = file_utils.fix_slashes(path) #console.print("WatchWorker: path={}, wildcard={}".format(path, wildcard)) # in case program will create dir, but it hasn't yet been created file_utils.ensure_dir_exists(path) self.event_handler = MyHandler() self.observer = Observer() #console.print("WATCHING: " + path) self.observer.schedule(self.event_handler, path, recursive=True)
def read_file(self, fn, start_offset, end_offset): fn = file_utils.fix_slashes(fn, is_linux=True) # # leverage the read_file() function in psm.py # ssh_cmd = "cd ~/.xt/cwd; python -c 'import psm; psm.read_file(\"{}\", {}, {})'" \ # .format(fn, start_offset, end_offset) # error_code, read_bytes = process_utils.sync_run_ssh(None, self.box_addr, ssh_cmd, capture_as_bytes=True, report_error=False) new_bytes = b"" try: with self.ftp_client.file(fn) as infile: infile.seek(start_offset) if end_offset: new_bytes = infile.read(end_offset - start_offset) else: new_bytes = infile.read() except BaseException as ex: console.diag("exception: ex={}".format(ex)) #new_bytes = read_bytes if not error_code else b"" return new_bytes
def process_args(self, args): run_script = None parent_script = None run_cmd_from_script = None target_file = args["script"] target_args = args["script_args"] code_upload = args["code_upload"] # user may have wrong slashes for this OS target_file = file_utils.fix_slashes(target_file) if os.path.isabs(target_file): errors.syntax_error("path to app file must be specified with a relative path: {}".format(target_file)) is_rerun = "is_rerun" in args if is_rerun: # will be running from script dir, so remove any path to script file self.script_dir = os.path.dirname(target_file) target_file = os.path.basename(target_file) if target_file.endswith(".py"): # PYTHON target cmd_parts = ["python"] cmd_parts.append("-u") cmd_parts.append(target_file) else: cmd_parts = [target_file] if target_args: # split on unquoted spaces arg_parts = utils.cmd_split(target_args) cmd_parts += arg_parts if target_file == "docker": self.is_docker = True if not self.is_docker and code_upload and not os.path.exists(target_file): errors.env_error("script file not found: {}".format(target_file)) ps_path = args["parent_script"] if ps_path: parent_script = file_utils.read_text_file(ps_path, as_lines=True) if target_file.endswith(".bat") or target_file.endswith(".sh"): # a RUN SCRIPT was specified as the target run_script = file_utils.read_text_file(target_file, as_lines=True) run_cmd_from_script = scriptor.get_run_cmd_from_script(run_script) compute = args["target"] box_def = self.config.get("boxes", compute, suppress_warning=True) setup = utils.safe_value(box_def, "setup") compute_def = self.config.get_compute_def(compute) if compute_def: # must be defined in [compute-targets] compute_def = self.config.get_compute_def(compute) if not "service" in compute_def: errors.config_error("compute target '{}' must define a 'service' property".format(compute)) service = compute_def["service"] if service in ["local", "pool"]: # its a list of box names boxes = compute_def["boxes"] if len(boxes)==1 and boxes[0] == "localhost": pool = None box = "local" service_type = "pool" else: pool = compute box = None service_type = "pool" else: # it a set of compute service properties pool = compute box = None service_name = compute_def["service"] service_type = self.config.get_service_type(service_name) elif box_def: # translate single box name to a compute_def box = compute pool = None service_type = "pool" compute_def = {"service": service_type, "boxes": [box], setup: setup} else: errors.config_error("unknown target or box: {}".format(compute)) args["target"] = compute args["compute_def"] = compute_def args["service_type"] = service_type # for legacy code args["box"] = box args["pool"] = pool return service_type, cmd_parts, ps_path, parent_script, target_file, run_script, run_cmd_from_script, \ compute, compute_def
def build_docker_cmd(self, docker_name, target_file, cmd_parts, script_dir, snapshot_dir, job_secret, args): for_windows = True docker_def = self.config.get("dockers", docker_name, default_value=None) if not docker_def: errors.config_error("docker '{}' not found in config file".format(docker_name)) registry_name = docker_def["registry"] image = docker_def["image"] if registry_name: # get REGISTRY credentials registry_creds = self.config.get("external-services", registry_name, suppress_warning=True) if not registry_creds: config_error("'{}' must be specified in [external-services] section of XT config file".format(registry_name)) login_server = registry_creds["login-server"] else: login_server = None #pwd = "%cd%" if for_windows else "$(pwd)" script_dir = file_utils.fix_slashes(script_dir, True) mappings = "-v {}:/usr/src".format(script_dir) options = "--rm" # collect env vars env_vars = {"XT_IN_DOCKER": 1, "XT_USERNAME": pc_utils.get_username()} scriptor.add_controller_env_vars(env_vars, self.config, job_secret, "node0") # fixup backslash char for target_file if ".py" in target_file: app = "python -u" #target_file = file_utils.fix_slashes(target_file, True) target_file = os.path.basename(target_file) else: app = target_file target_file = "" full_image = login_server + "/" + image if login_server else image # build a mapping for data? data_local = args["data_local"] if data_local: if "$scriptdir" in data_local: data_local = data_local.replace("$scriptdir", script_dir) data_local = os.path.realpath(data_local) mappings += " -v {}:/usr/data".format(data_local) env_vars["XT_DATA_DIR"] = "/usr/data" # write env vars to file in snapshot dir FN_EV = "__dockev__.txt" fn_env_var = os.path.join(snapshot_dir, FN_EV) lines = [name + "=" + str(value) for name,value in env_vars.items()] text = "\n".join(lines) file_utils.write_text_file(fn_env_var, text) # specify env var file (in current directory) to docker options += " --env-file={}".format(FN_EV) # inherit ENV VARS from running environment options += " -e XT_RUN_NAME -e XT_WORKSPACE_NAME -e XT_EXPERIMENT_NAME" docker_cmd = "docker run {} {} {} {} /usr/src/{}".format(options, mappings, full_image, app, target_file) new_parts = utils.cmd_split(docker_cmd) return new_parts
def upload(self, local_path, store_path, share, workspace, experiment, job, run, feedback, show_output=True): use_blobs = True use_multi = True upload_count = 0 # exapnd ~/ in front of local path local_path = os.path.expanduser(local_path) if os.path.exists(local_path) and os.path.isfile(local_path): use_multi = False #console.print("local_path=", local_path) # if directory, default to copy nested if os.path.isdir(local_path): local_path += "/**" use_multi = True if not store_path or store_path == ".": if not use_multi: # single file defaults to the base name of the local file store_path = os.path.basename(local_path) else: store_path = "." fs = self.create_file_accessor(use_blobs, share, workspace, experiment, job, run) uri = fs.get_uri(store_path) actual_path, _ = file_utils.split_wc_path(local_path) actual_path = file_utils.relative_path(actual_path) actual_path = file_utils.fix_slashes(actual_path) if not os.path.exists(actual_path): errors.env_error( "Cannot find the local file/folder: {}".format(actual_path)) feedback_progress = FeedbackProgress(feedback, show_output) progress_callback = feedback_progress.progress if feedback else None if use_multi: # upload MULTIPLE files/blobs file_names, local_path = file_utils.get_local_filenames(local_path) what = "blobs" if use_blobs else "files" if len(file_names) == 0: if show_output: console.print("no matching files found in: {}".format( what, actual_path)) return elif len(file_names) == 1: what = "blob" if use_blobs else "file" if show_output: console.print("\nto {}, uploading {} {}:".format( uri, len(file_names), what)) #file_utils.ensure_dir_exists(local_path) max_name_len = max([len(name) for name in file_names]) name_width = 1 + max_name_len #console.print("max_name_len=", max_name_len, ", name_width=", name_width) for f, fn in enumerate(file_names): blob_path = self.make_dest_fn(local_path, fn, store_path) actual_fn = file_utils.fix_slashes(fn) if show_output: file_msg = "file {}/{}".format(1 + f, len(file_names)) console.print(" {2:}: {1:<{0:}} ".format( name_width, actual_fn + ":", file_msg), end="", flush=True) feedback_progress.start() fs.upload_file(blob_path, actual_fn, progress_callback=progress_callback) feedback_progress.end() upload_count += 1 else: # upload SINGLE file/blob what = "blob" if use_blobs else "file" if show_output: console.print("\nto: {}, uploading {}:".format(uri, what)) blob_name = os.path.basename(local_path) local_path = file_utils.fix_slashes(local_path) if show_output: #console.print("store_path=", store_path, ", local_path=", local_path) console.print(" {}: ".format(local_path), end="", flush=True) feedback_progress.start() fs.upload_file(store_path, local_path, progress_callback=progress_callback) feedback_progress.end() upload_count += 1 return upload_count
def download(self, store_path, local_path, share, workspace, experiment, job, run, feedback, snapshot, show_output=True): use_blobs = True use_multi = True # default until we test if store_path exists as a file/blob download_count = 0 fs = self.create_file_accessor(use_blobs, share, workspace, experiment, job, run) # test for existance of store_path as a blob/file if not "*" in store_path and not "?" in store_path: if fs.does_file_exist(store_path): use_multi = False if local_path: # exapnd ~/ in front of local path local_path = os.path.expanduser(local_path) else: # path not specified for local if use_multi: local_path = "." else: local_path = "./" + os.path.basename(store_path) uri = fs.get_uri(store_path) # default store folder to recursive if use_multi and not "*" in store_path and not "?" in store_path: store_path += "/**" use_snapshot = snapshot feedback_progress = FeedbackProgress(feedback, show_output) progress_callback = feedback_progress.progress if feedback else None if use_multi: # download MULTI blobs/files what = "blobs" if use_blobs else "files" single_what = what[0:-1] if show_output: console.print("collecting {} names from: {}...".format( single_what, uri), end="") _, blob_names = fs.get_filenames(store_path, full_paths=False) if show_output: console.print() if len(blob_names) == 0: console.print("no matching {} found in: {}".format(what, uri)) return 0 elif len(blob_names) == 1: what = "blob" if use_blobs else "file" if show_output: console.print("\ndownloading {} {}...:".format( len(blob_names), what)) file_utils.ensure_dir_exists(local_path) max_name_len = max( [len(local_path + "/" + name) for name in blob_names]) name_width = 1 + max_name_len #console.print("max_name_len=", max_name_len, ", name_width=", name_width) for f, bn in enumerate(blob_names): dest_fn = file_utils.fix_slashes(local_path + "/" + bn) if show_output: file_msg = "file {}/{}".format(1 + f, len(blob_names)) console.print(" {2:}: {1:<{0:}} ".format( name_width, dest_fn + ":", file_msg), end="", flush=True) feedback_progress.start() full_bn = uri + "/" + bn if uri else bn fs.download_file(full_bn, dest_fn, progress_callback=progress_callback, use_snapshot=use_snapshot) feedback_progress.end() download_count += 1 else: # download SINGLE blobs/files what = "blob" if use_blobs else "file" if not fs.does_file_exist(store_path): errors.store_error("{} not found: {}".format(what, uri)) local_path = file_utils.fix_slashes(local_path) if show_output: console.print("\nfrom {}, downloading {}:".format(uri, what)) console.print(" {}: ".format(local_path), end="", flush=True) feedback_progress.start() fs.download_file(store_path, local_path, progress_callback=progress_callback, use_snapshot=use_snapshot) feedback_progress.end() download_count += 1 return download_count
def restart_psm_if_needed(self): ''' processing: - if PSM is running on old psm.py, kill the process and restart it. - if PMS is not running, start it. ''' kill_needed = False start_needed = False fn_src = os.path.join(file_utils.get_my_file_dir(__file__), constants.PSM) fn_dest = file_utils.path_join(self.xt_path, constants.PSM, for_windows=self.box_is_windows) running = bool(self._get_psm_process_id()) #print("PSM running=", running) if running: # do file contents match? text_src = file_utils.read_text_file(fn_src) text_dest = "" if self.remote_file_exists(fn_dest): # read text of fn_dest on remote box with self.ftp_client.open(fn_dest, "rb") as infile: bytes_dest = infile.read() text_dest = bytes_dest.decode() # normalize NEWLINE chars before comparison # (ftp_client seems to add CR when called frm windows) text_src = text_src.replace("\r\n", "\n") text_dest = text_dest.replace("\r\n", "\n") if text_src != text_dest: kill_needed = True else: start_needed = True if kill_needed: p = self._get_psm_process_id() ssh_cmd = "kill -kill {}".format(p) self.run_cmd(ssh_cmd) start_needed = True if start_needed: # create required dirs self._make_dir(self.psm_queue_path) self._make_dir(self.cwd_path) # copy psm.py # caution: node slashes in fn_dest must all match box's OS style fn_dest = file_utils.fix_slashes(fn_dest, is_linux=True) status = self.ftp_client.put(fn_src, fn_dest) # run psm fn_log = os.path.join(self.xt_path, constants.PSMLOG) if self.box_is_windows: cmd_parts = ["cmd", "/c", "python -u {} > {}".format(fn_dest, fn_log)] cmd = " ".join(cmd_parts) else: fn_log = file_utils.fix_slashes(fn_log, is_linux=True) cmd = 'nohup bash --login -c "python -u {}" </dev/null > {} 2>&1 &'.format(fn_dest, fn_log) #print("cmd=", cmd) #process_utils.sync_run_ssh(self, self.box_addr, cmd) self.run_cmd(cmd) for i in range(20): # don't return until PSM is running running = bool(self._get_psm_process_id()) if running: break time.sleep(.5) if not running: errors.general_error("Could not start remote PSM on box={}".format(self.box_addr))