示例#1
0
def add_cmd(title=None, xt_cmd=None, silent=False):

    title = file_utils.fix_slashes(title)
    xt_cmd = file_utils.fix_slashes(xt_cmd)

    cmd = {"title": title, "xt_cmd": xt_cmd, "silent": silent}
    cmds.append(cmd)
示例#2
0
    def compare_json_values(self, name, valuea, valuex):

        if isinstance(valuea, dict):
            self.compare_dicts(name, valuea, valuex)
        elif isinstance(valuea, (list, tuple)):
            self.compare_lists(name, valuea, valuex)
        elif isinstance(valuea, str):
            if name in ["target_file", "cmd_parts", "cmds"]:
                valuea = file_utils.fix_slashes(valuea)
                valuex = file_utils.fix_slashes(valuex)
            self.compare_text(name, valuea, valuex)
        else:
            self._assert_match(name, valuea, valuex)
示例#3
0
    def __init__(self, store, run_dir, mirror_dest, wildcard_path, grok_url,
                 ws_name, run_name):
        # path = '.'
        # wildcard = "*.tfevents.*"

        self.run_dir = run_dir

        wildcard_path = os.path.expanduser(wildcard_path)
        wildcard_path = wildcard_path.replace("\\", "/")

        if not wildcard_path.startswith("/"):
            wildcard_path = os.path.join(run_dir, wildcard_path)

        if "*" in wildcard_path:
            path = os.path.dirname(wildcard_path)
            wildcard = os.path.basename(wildcard_path)
        else:
            path = wildcard_path
            wildcard = None

        path = file_utils.fix_slashes(path)
        console.print("MirrorWorker: path={}, wildcard={}".format(
            path, wildcard))

        # in case program will create dir, but it hasn't yet been created
        file_utils.ensure_dir_exists(path)

        self.event_handler = MyHandler(store, mirror_dest, grok_url, ws_name,
                                       run_name, path, wildcard)
        self.observer = Observer()
        self.observer.schedule(self.event_handler, path, recursive=True)
示例#4
0
    def add_first_cmds(self, cmds, script_name, change_dir):

        self.append(cmds, 'echo ----- START of XT-level processing -----')
        self.append(cmds, "echo running: " + script_name)

        self.append(
            cmds, 'echo initial cwd: {}'.format(
                "%cd%" if self.is_windows else "$PWD"))
        cwd = utils.get_controller_cwd(self.is_windows, False)
        cwd = file_utils.fix_slashes(cwd, is_linux=not self.is_windows)

        if self.is_windows:
            self.append(cmds, 'echo 1st ARG, node_id= %1%')
            self.append(cmds, 'echo 2nd ARG, run_name= %2%')
            self.append(cmds, "mkdir {} 2>nul".format(cwd), echo_before=True)
        else:
            # echo commands as they are executed
            self.append(cmds, 'set -x')

            self.append(cmds, 'echo 1st ARG, node_id= $1')
            self.append(cmds, 'echo 2nd ARG, run_name= $2')
            self.append(cmds, "mkdir {} -p".format(cwd), echo_before=True)

        if change_dir:
            self.append(cmds, "cd {}".format(cwd), echo_before=True)
            self.append(
                cmds, 'echo after cd, cwd: {}'.format(
                    "%cd%" if self.is_windows else "$PWD"))
示例#5
0
    def __init__(self, wildcard_path):
        # path = '.'
        # wildcard = "*.tfevents.*"

        wildcard_path = os.path.expanduser(wildcard_path)
        wildcard_path = wildcard_path.replace("\\", "/")

        if "*" in wildcard_path:
            path = os.path.dirname(wildcard_path)
            wildcard = os.path.basename(wildcard_path)
        else:
            path = wildcard_path
            wildcard = None

        path = file_utils.fix_slashes(path)
        #console.print("WatchWorker: path={}, wildcard={}".format(path, wildcard))

        # in case program will create dir, but it hasn't yet been created
        file_utils.ensure_dir_exists(path)

        self.event_handler = MyHandler()
        self.observer = Observer()
        #console.print("WATCHING: " + path)
        self.observer.schedule(self.event_handler, path, recursive=True)
    def read_file(self, fn, start_offset, end_offset):

        fn = file_utils.fix_slashes(fn, is_linux=True)

        # # leverage the read_file() function in psm.py
        # ssh_cmd = "cd ~/.xt/cwd; python -c 'import psm; psm.read_file(\"{}\", {}, {})'" \
        #     .format(fn, start_offset, end_offset)

        # error_code, read_bytes = process_utils.sync_run_ssh(None, self.box_addr, ssh_cmd, capture_as_bytes=True, report_error=False)
        new_bytes = b""

        try:
            with self.ftp_client.file(fn) as infile:
                infile.seek(start_offset)

                if end_offset:
                    new_bytes = infile.read(end_offset - start_offset) 
                else:
                    new_bytes = infile.read() 
        except BaseException as ex:
            console.diag("exception: ex={}".format(ex))

        #new_bytes = read_bytes if not error_code else b""
        return new_bytes
示例#7
0
    def process_args(self, args):

        run_script = None
        parent_script = None
        run_cmd_from_script = None
        target_file = args["script"]
        target_args = args["script_args"]
        code_upload = args["code_upload"]

        # user may have wrong slashes for this OS
        target_file = file_utils.fix_slashes(target_file)

        if os.path.isabs(target_file):
            errors.syntax_error("path to app file must be specified with a relative path: {}".format(target_file))

        is_rerun = "is_rerun" in args
        if is_rerun:
            # will be running from script dir, so remove any path to script file
            self.script_dir = os.path.dirname(target_file)
            target_file = os.path.basename(target_file)

        if target_file.endswith(".py"):
            # PYTHON target
            cmd_parts = ["python"]
            cmd_parts.append("-u")
            cmd_parts.append(target_file)
        else:
            cmd_parts = [target_file] 

        if target_args:
            # split on unquoted spaces
            arg_parts = utils.cmd_split(target_args)
            cmd_parts += arg_parts

        if target_file == "docker":
            self.is_docker = True
            
        if not self.is_docker and code_upload and not os.path.exists(target_file):
            errors.env_error("script file not found: {}".format(target_file))

        ps_path = args["parent_script"]
        if ps_path:
            parent_script = file_utils.read_text_file(ps_path, as_lines=True)

        if target_file.endswith(".bat") or target_file.endswith(".sh"):
            # a RUN SCRIPT was specified as the target
            run_script = file_utils.read_text_file(target_file, as_lines=True)
            run_cmd_from_script = scriptor.get_run_cmd_from_script(run_script)

        compute = args["target"]
        box_def = self.config.get("boxes", compute, suppress_warning=True)
        setup = utils.safe_value(box_def, "setup")

        compute_def = self.config.get_compute_def(compute)        
        if compute_def:
            # must be defined in [compute-targets]
            compute_def = self.config.get_compute_def(compute)

            if not "service" in compute_def:
                errors.config_error("compute target '{}' must define a 'service' property".format(compute))

            service = compute_def["service"]
            if service in ["local", "pool"]:
                # its a list of box names
                boxes = compute_def["boxes"]
                if len(boxes)==1 and boxes[0] == "localhost":
                    pool = None
                    box = "local"
                    service_type = "pool"
                else:
                    pool = compute
                    box = None
                    service_type = "pool"
            else:
                # it a set of compute service properties
                pool = compute
                box = None
                service_name = compute_def["service"]
                service_type = self.config.get_service_type(service_name)
        elif box_def:
            # translate single box name to a compute_def
            box = compute
            pool = None
            service_type = "pool"
            compute_def = {"service": service_type, "boxes": [box], setup: setup}
        else:
            errors.config_error("unknown target or box: {}".format(compute))

        args["target"] = compute
        args["compute_def"] = compute_def
        args["service_type"] = service_type

        # for legacy code
        args["box"] = box
        args["pool"] = pool

        return service_type, cmd_parts, ps_path, parent_script, target_file, run_script, run_cmd_from_script, \
            compute, compute_def
示例#8
0
    def build_docker_cmd(self, docker_name, target_file, cmd_parts, script_dir, snapshot_dir, job_secret, args):
        for_windows = True

        docker_def = self.config.get("dockers", docker_name, default_value=None)
        if not docker_def:
            errors.config_error("docker '{}' not found in config file".format(docker_name))

        registry_name = docker_def["registry"]
        image = docker_def["image"]
        
        if registry_name:
            # get REGISTRY credentials
            registry_creds = self.config.get("external-services", registry_name, suppress_warning=True)
            if not registry_creds:
                config_error("'{}' must be specified in [external-services] section of XT config file".format(registry_name))

            login_server = registry_creds["login-server"]
        else:
            login_server = None

        #pwd = "%cd%" if for_windows else "$(pwd)"
        script_dir = file_utils.fix_slashes(script_dir, True)
        mappings = "-v {}:/usr/src".format(script_dir)
        options = "--rm"

        # collect env vars 
        env_vars = {"XT_IN_DOCKER": 1, "XT_USERNAME": pc_utils.get_username()}
        scriptor.add_controller_env_vars(env_vars, self.config, job_secret, "node0")

        # fixup backslash char for target_file
        if ".py" in target_file:
            app = "python -u"
            #target_file = file_utils.fix_slashes(target_file, True)
            target_file = os.path.basename(target_file)
        else:
            app = target_file
            target_file = ""

        full_image = login_server + "/" + image if login_server else image

        # build a mapping for data?
        data_local = args["data_local"]
        if data_local:
            if "$scriptdir" in data_local:
                data_local = data_local.replace("$scriptdir", script_dir)

            data_local = os.path.realpath(data_local)
            mappings += " -v {}:/usr/data".format(data_local)
            env_vars["XT_DATA_DIR"] = "/usr/data"

        # write env vars to file in snapshot dir
        FN_EV = "__dockev__.txt"
        fn_env_var = os.path.join(snapshot_dir, FN_EV)
        lines = [name + "=" + str(value) for name,value in env_vars.items()]
        text = "\n".join(lines)
        file_utils.write_text_file(fn_env_var, text)

        # specify env var file (in current directory) to docker
        options += " --env-file={}".format(FN_EV)

        # inherit ENV VARS from running environment
        options += " -e XT_RUN_NAME -e XT_WORKSPACE_NAME -e XT_EXPERIMENT_NAME"

        docker_cmd = "docker run {} {} {} {} /usr/src/{}".format(options, mappings, full_image, app, target_file)
        new_parts = utils.cmd_split(docker_cmd)
        return new_parts
示例#9
0
    def upload(self,
               local_path,
               store_path,
               share,
               workspace,
               experiment,
               job,
               run,
               feedback,
               show_output=True):

        use_blobs = True
        use_multi = True
        upload_count = 0

        # exapnd ~/ in front of local path
        local_path = os.path.expanduser(local_path)

        if os.path.exists(local_path) and os.path.isfile(local_path):
            use_multi = False

        #console.print("local_path=", local_path)

        # if directory, default to copy nested
        if os.path.isdir(local_path):
            local_path += "/**"
            use_multi = True

        if not store_path or store_path == ".":
            if not use_multi:
                # single file defaults to the base name of the local file
                store_path = os.path.basename(local_path)
            else:
                store_path = "."

        fs = self.create_file_accessor(use_blobs, share, workspace, experiment,
                                       job, run)
        uri = fs.get_uri(store_path)
        actual_path, _ = file_utils.split_wc_path(local_path)

        actual_path = file_utils.relative_path(actual_path)
        actual_path = file_utils.fix_slashes(actual_path)

        if not os.path.exists(actual_path):
            errors.env_error(
                "Cannot find the local file/folder: {}".format(actual_path))

        feedback_progress = FeedbackProgress(feedback, show_output)
        progress_callback = feedback_progress.progress if feedback else None

        if use_multi:
            # upload MULTIPLE files/blobs
            file_names, local_path = file_utils.get_local_filenames(local_path)
            what = "blobs" if use_blobs else "files"

            if len(file_names) == 0:
                if show_output:
                    console.print("no matching files found in: {}".format(
                        what, actual_path))
                return
            elif len(file_names) == 1:
                what = "blob" if use_blobs else "file"

            if show_output:
                console.print("\nto {}, uploading {} {}:".format(
                    uri, len(file_names), what))

            #file_utils.ensure_dir_exists(local_path)
            max_name_len = max([len(name) for name in file_names])
            name_width = 1 + max_name_len
            #console.print("max_name_len=", max_name_len, ", name_width=", name_width)

            for f, fn in enumerate(file_names):
                blob_path = self.make_dest_fn(local_path, fn, store_path)
                actual_fn = file_utils.fix_slashes(fn)

                if show_output:
                    file_msg = "file {}/{}".format(1 + f, len(file_names))
                    console.print("  {2:}: {1:<{0:}} ".format(
                        name_width, actual_fn + ":", file_msg),
                                  end="",
                                  flush=True)

                feedback_progress.start()
                fs.upload_file(blob_path,
                               actual_fn,
                               progress_callback=progress_callback)
                feedback_progress.end()

                upload_count += 1
        else:
            # upload SINGLE file/blob
            what = "blob" if use_blobs else "file"

            if show_output:
                console.print("\nto: {}, uploading {}:".format(uri, what))

            blob_name = os.path.basename(local_path)
            local_path = file_utils.fix_slashes(local_path)

            if show_output:
                #console.print("store_path=", store_path, ", local_path=", local_path)
                console.print("  {}:    ".format(local_path),
                              end="",
                              flush=True)

            feedback_progress.start()
            fs.upload_file(store_path,
                           local_path,
                           progress_callback=progress_callback)
            feedback_progress.end()

            upload_count += 1

        return upload_count
示例#10
0
    def download(self,
                 store_path,
                 local_path,
                 share,
                 workspace,
                 experiment,
                 job,
                 run,
                 feedback,
                 snapshot,
                 show_output=True):

        use_blobs = True
        use_multi = True  # default until we test if store_path exists as a file/blob
        download_count = 0

        fs = self.create_file_accessor(use_blobs, share, workspace, experiment,
                                       job, run)

        # test for existance of store_path as a blob/file
        if not "*" in store_path and not "?" in store_path:
            if fs.does_file_exist(store_path):
                use_multi = False

        if local_path:
            # exapnd ~/ in front of local path
            local_path = os.path.expanduser(local_path)
        else:
            # path not specified for local
            if use_multi:
                local_path = "."
            else:
                local_path = "./" + os.path.basename(store_path)

        uri = fs.get_uri(store_path)

        # default store folder to recursive
        if use_multi and not "*" in store_path and not "?" in store_path:
            store_path += "/**"

        use_snapshot = snapshot

        feedback_progress = FeedbackProgress(feedback, show_output)
        progress_callback = feedback_progress.progress if feedback else None

        if use_multi:
            # download MULTI blobs/files

            what = "blobs" if use_blobs else "files"
            single_what = what[0:-1]

            if show_output:
                console.print("collecting {} names from: {}...".format(
                    single_what, uri),
                              end="")

            _, blob_names = fs.get_filenames(store_path, full_paths=False)

            if show_output:
                console.print()

            if len(blob_names) == 0:
                console.print("no matching {} found in: {}".format(what, uri))
                return 0
            elif len(blob_names) == 1:
                what = "blob" if use_blobs else "file"

            if show_output:
                console.print("\ndownloading {} {}...:".format(
                    len(blob_names), what))

            file_utils.ensure_dir_exists(local_path)
            max_name_len = max(
                [len(local_path + "/" + name) for name in blob_names])
            name_width = 1 + max_name_len
            #console.print("max_name_len=", max_name_len, ", name_width=", name_width)

            for f, bn in enumerate(blob_names):
                dest_fn = file_utils.fix_slashes(local_path + "/" + bn)

                if show_output:
                    file_msg = "file {}/{}".format(1 + f, len(blob_names))
                    console.print("  {2:}: {1:<{0:}} ".format(
                        name_width, dest_fn + ":", file_msg),
                                  end="",
                                  flush=True)

                feedback_progress.start()
                full_bn = uri + "/" + bn if uri else bn
                fs.download_file(full_bn,
                                 dest_fn,
                                 progress_callback=progress_callback,
                                 use_snapshot=use_snapshot)
                feedback_progress.end()

                download_count += 1
        else:
            # download SINGLE blobs/files
            what = "blob" if use_blobs else "file"

            if not fs.does_file_exist(store_path):
                errors.store_error("{} not found: {}".format(what, uri))

            local_path = file_utils.fix_slashes(local_path)

            if show_output:
                console.print("\nfrom {}, downloading {}:".format(uri, what))
                console.print("  {}:    ".format(local_path),
                              end="",
                              flush=True)

            feedback_progress.start()
            fs.download_file(store_path,
                             local_path,
                             progress_callback=progress_callback,
                             use_snapshot=use_snapshot)
            feedback_progress.end()

            download_count += 1

        return download_count
    def restart_psm_if_needed(self):
        '''
        processing:
            - if PSM is running on old psm.py, kill the process and restart it.  
            - if PMS is not running, start it.
        '''
        kill_needed = False
        start_needed = False

        fn_src = os.path.join(file_utils.get_my_file_dir(__file__), constants.PSM)
        fn_dest = file_utils.path_join(self.xt_path, constants.PSM, for_windows=self.box_is_windows)

        running = bool(self._get_psm_process_id())
        #print("PSM running=", running)

        if running:
            # do file contents match?
            text_src = file_utils.read_text_file(fn_src)
            text_dest = ""

            if self.remote_file_exists(fn_dest):
                # read text of fn_dest on remote box
                with self.ftp_client.open(fn_dest, "rb") as infile:
                    bytes_dest = infile.read()
                    text_dest = bytes_dest.decode()

                # normalize NEWLINE chars before comparison 
                # (ftp_client seems to add CR when called frm windows)
                text_src = text_src.replace("\r\n", "\n")
                text_dest = text_dest.replace("\r\n", "\n")

            if text_src != text_dest:
                kill_needed = True
        else:
            start_needed = True

        if kill_needed:
            p = self._get_psm_process_id()
            ssh_cmd = "kill -kill {}".format(p)
            self.run_cmd(ssh_cmd)
            start_needed = True

        if start_needed:
            # create required dirs
            self._make_dir(self.psm_queue_path)
            self._make_dir(self.cwd_path)

            # copy psm.py
            # caution: node slashes in fn_dest must all match box's OS style
            fn_dest = file_utils.fix_slashes(fn_dest, is_linux=True)
            status = self.ftp_client.put(fn_src, fn_dest)

            # run psm
            fn_log = os.path.join(self.xt_path, constants.PSMLOG)

            if self.box_is_windows:
                cmd_parts = ["cmd", "/c", "python -u {} > {}".format(fn_dest, fn_log)]
                cmd = " ".join(cmd_parts)
            else:
                fn_log = file_utils.fix_slashes(fn_log, is_linux=True)
                cmd = 'nohup bash --login -c "python -u {}" </dev/null > {} 2>&1 &'.format(fn_dest, fn_log) 
                #print("cmd=", cmd)

            #process_utils.sync_run_ssh(self, self.box_addr, cmd)
            self.run_cmd(cmd)

            for i in range(20):
                # don't return until PSM is running
                running = bool(self._get_psm_process_id())
                if running:
                    break

                time.sleep(.5)

            if not running:
                errors.general_error("Could not start remote PSM on box={}".format(self.box_addr))