Exemplo n.º 1
0
    def test_impl(self, search_type):
        # run test on impl
        test_dir = os.path.realpath(os.path.dirname(__file__))

        # using YAML config file #1
        fn_text = os.path.join(test_dir, "miniSweeps.yaml")
        config_text = file_utils.read_text_file(fn_text)
        self.test_from_config(fn_text, config_text, search_type)

        # using YAML config file #2
        fn_yaml = os.path.join(test_dir, "hp_search.yaml")
        yaml_text = file_utils.read_text_file(fn_yaml)
        self.test_from_config(fn_yaml, yaml_text, search_type)
Exemplo n.º 2
0
    def restart_psm_if_needed(self):
        '''
        processing:
            - if PSM is running on old psm.py, kill the process and restart it.  
            - if PMS is not running, start it.
        '''
        kill_needed = False
        start_needed = False

        fn_src = os.path.join(file_utils.get_my_file_dir(__file__),
                              constants.PSM)
        fn_dest = os.path.join(self.cwd_path, constants.PSM)

        running = self._is_psm_running()
        #print("PSM running=", running)

        if running:
            # do file contents match?
            text_src = file_utils.read_text_file(fn_src)
            text_dest = file_utils.read_text_file(fn_dest) if os.path.exists(
                fn_dest) else None
            if text_src != text_dest:
                kill_needed = True
        else:
            start_needed = True

        if kill_needed:
            p = self._get_psm_process()
            p.kill()
            start_needed = True

        if start_needed:
            # always copy psm.py (for xt dev/debug purposes)
            shutil.copyfile(fn_src, fn_dest)

            # run psm
            fn_log = os.path.join(self.cwd_path, constants.PSMLOG)

            if self.box_is_windows:
                cmd_parts = [
                    "cmd", "/c", "python -u {} > {}".format(fn_dest, fn_log)
                ]
            else:
                cmd_parts = [
                    "bash", "-c", "python -u {} > {}".format(fn_dest, fn_log)
                ]

            fn_psm_log = os.path.expanduser("~/.xt/cwd/runpsm.log")
            process_utils.start_async_run_detached(cmd_parts, self.cwd_path,
                                                   fn_psm_log)
Exemplo n.º 3
0
    def read_user_multi_commands(self, using_hp, run_script, cmd_parts, args):
        cmds = None
        
        lines = self.config.get("commands")
        if lines:
            # commands specified in the config file
            args["multi_commands"] = True
            multi_commands = True
        else:
            # did user specify --multi-commands
            multi_commands = args["multi_commands"]

        if multi_commands:
            if using_hp:
                errors.combo_error("Cannot specify both -multi-commands and hyperparameter search")

            # read MULTI CMDS
            if not lines:
                fn_cmds = args["script"]  # run_script if run_script else cmd_parts[0]
                lines = file_utils.read_text_file(fn_cmds, as_lines=True)
                lines = [line.strip() for line in lines if line and not line.strip().startswith("#")]

            cmds = [self.fixup_script_in_cmd(line) for line in lines]

        return cmds
Exemplo n.º 4
0
    def help_topics(self, topic, browse, prefix="topics", title="help topics"):

        # build list of help topics from xtlib/help_topics directory
        topics_dir = os.path.join(file_utils.get_xtlib_dir(), "help_topics",
                                  prefix)
        if not os.path.isdir(topics_dir):
            errors.env_error("Missing help topics dir: {}".format(topics_dir))
        topic_files, _ = file_utils.get_local_filenames(topics_dir)

        # build a map from topic names to the files
        topic_map = {file_utils.root_name(fn): fn for fn in topic_files}

        if not topic:
            console.print("available {}:".format(title))
            keys = list(topic_map.keys())
            keys.sort()

            for topic_name in keys:
                console.print("  {}".format(topic_name))

            console.print()
            console.print(
                "To display a help topic, use 'xt help topic <topic name>'")
        else:
            # print a specific topic
            topic_low = topic.lower()
            if not topic_low in topic_map:
                errors.general_error(
                    "help topic not found: {}".format(topic_low))

            text = file_utils.read_text_file(topic_map[topic_low])
            print(text)
Exemplo n.º 5
0
    def compare_log_files(self, fn_approved, fnx):
        '''
        we mask and compare at file level.  if errors, we
        drill down to property level for easier debugging.
        only file compare counts should be counted as tests.
        '''
        name = os.path.basename(fn_approved)

        approved_text = file_utils.read_text_file(fn_approved)
        texta = self.mask_out_regular_changes(approved_text)

        tested_text = file_utils.read_text_file(fnx)
        textx = self.mask_out_regular_changes(tested_text)

        match = texta == textx
        if not match:
            # files did not match after masking, but property level matching may work
            self.file_compare_errors += 1
            before_asserts = self.assert_count

            # compare detail; we should see an assert on a smaller property
            file_ext = os.path.splitext(fnx)[1]

            if texta.startswith("{") and file_ext in [".json", ".log"]:
                # debug 
                print("loading JSON: fn_approved={}, fnx={}".format(fn_approved, fnx))
                file_utils.write_text_file("texta.json", texta)
                file_utils.write_text_file("textx.json", textx)

                dda = json.loads(texta)
                ddx = json.loads(textx)
                self.compare_json_values(name, dda, ddx)
            else:
                texta = self.mask_out_regular_changes_from_wrapper(approved_text)
                testx = self.mask_out_regular_changes_from_wrapper(tested_text)
                self.compare_text(name, texta, textx)

            if self.assert_count == before_asserts:
                # no new asserts, so undo our file error count
                # this happens because some failed file compares
                # succeed when compared at property level
                self.file_compare_errors -= 1
            elif STOP_ON_FIRST_ERROR:
                self._assert( False )
Exemplo n.º 6
0
def get_secret(name):
    name = correct_name(name)
    secrets = {}

    if os.path.exists(FN_SECRETS):
        text = file_utils.read_text_file(FN_SECRETS)
        secrets = json.loads(text)

    value = secrets[name] if name in secrets else None
    console.diag("get_secret: name={}, value={}".format(name, value))
    return value
Exemplo n.º 7
0
    def get_running_entry_name(self):
        text = None

        controller_cwd = utils.get_controller_cwd(self.box_is_windows,
                                                  is_local=True)

        fn_current = os.path.join(controller_cwd,
                                  constants.CURRENT_RUNNING_ENTRY)
        if os.path.exists(fn_current):
            text = file_utils.read_text_file(fn_current).strip()

        return text
Exemplo n.º 8
0
    def _load_grok_creds(self):
        fn_keys = "keys.bin"
        loaded = False

        if not os.path.exists(fn_keys):
            fn_keys = os.path.expanduser("~/.xt/teams/{}/keys.bin".format(
                self.team_name))

        if os.path.exists(fn_keys):
            # GROK server creds
            creds = file_utils.read_text_file(fn_keys)
            self.apply_creds(creds)

            fn_cert = os.path.join(os.path.dirname(fn_keys), "xt_cert.pem")
            if os.path.exists(fn_cert):
                cert = file_utils.read_text_file(fn_keys)
                self.keys["xt_server_cert"] = cert

            console.diag("init_creds: using grok server 'keys.bin' file")
            loaded = True

        return loaded
Exemplo n.º 9
0
    def import_job_mongo_document(self, fn_mongo_job, new_workspace,
                                  prev_job_id, new_job_id):
        text = file_utils.read_text_file(fn_mongo_job)
        job = json.loads(text)

        # update job_id
        job["_id"] = new_job_id
        job["job_id"] = new_job_id
        job["job_num"] = job_helper.get_job_number(prev_job_id)

        # update workspace
        job["ws_name"] = new_workspace

        # add to mongo
        self.store.mongo.update_job_info(new_job_id, job)
Exemplo n.º 10
0
    def import_run_mongo_document(self, mongo_run_fn, workspace, new_workspace,
                                  prev_job_id, new_job_id, run_name):
        text = file_utils.read_text_file(mongo_run_fn)
        run = json.loads(text)

        # update job_id
        run["job_id"] = new_job_id

        # update workspace
        run["ws"] = new_workspace

        # add to mongo
        self.store.mongo.update_run_info(new_workspace, run_name, run)

        end_id = utils.safe_value(run, "end_id")
        return end_id
Exemplo n.º 11
0
def set_secret(name, value):
    name = correct_name(name)
    console.diag("set_secret: name={}, value={}".format(name, value))

    file_utils.ensure_dir_exists(file=FN_SECRETS)

    secrets = {}

    # read existing secrets, if any
    if os.path.exists(FN_SECRETS):
        text = file_utils.read_text_file(FN_SECRETS)
        secrets = json.loads(text)

    secrets[name] = value

    # write updates secrets
    text = json.dumps(secrets)
    file_utils.write_text_file(FN_SECRETS, text)
Exemplo n.º 12
0
    def __init__(self,
                 config=None,
                 store=None,
                 xt_logging=True,
                 aml_logging=True,
                 checkpoints_enabled=True,
                 tensorboard_path=None,
                 supress_normal_output=False):
        ''' 
        this initializes an XT Run object so that ML apps can use XT services from within their app, including:
            - hyperparameter logging
            - metrics logging
            - uploading files to an XT share
            - downloading files from an XT share
            - checkpoint support
            - explict HP search calls

        note: Azure ML child runs seem to get their env variables inherited from their parent run 
        correctly, so we no need to use parent run for info. '''

        self.store = None
        self.xt_logging = False
        self.metric_report_count = 0
        self.metric_names = OrderedDict()
        self.supress_normal_output = supress_normal_output

        # tensorboard writers
        self.train_writer = None
        self.test_writer = None

        # 2nd set of writers for Philly
        self.train_writer2 = None
        self.test_writer2 = None

        self.tensorboard_path = tensorboard_path
        if self.tensorboard_path:
            # TENSORBOARD WORKAROUND: this code causes tensorboard files to be closed when they are appended to
            # this allow us to just write the files to MOUNTED output dir (and not have to mirror them)
            try:
                from tensorboard.compat import tf
                delattr(tf.io.gfile.LocalFileSystem, 'append')
            except:
                import tensorflow as tf
                import tensorboard as tb
                tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
                delattr(tf.io.gfile.LocalFileSystem, 'append')

            self.init_tensorboard()

        self.ws_name = os.getenv("XT_WORKSPACE_NAME", None)
        self.exper_name = os.getenv("XT_EXPERIMENT_NAME", None)
        self.run_name = os.getenv("XT_RUN_NAME", None)
        self.resume_name = os.getenv("XT_RESUME_NAME")

        # load context, if present
        self.context = None

        if self.run_name:
            fn_context = os.path.abspath(constants.FN_RUN_CONTEXT)

            if os.path.exists(fn_context):
                json_context = file_utils.read_text_file(fn_context)

                context_dict = json.loads(json_context)
                self.context = utils.dict_to_object(context_dict)
                if not supress_normal_output:
                    console.print("run context loaded: {}".format(
                        self.context.run_name))
            else:
                if not supress_normal_output:
                    console.print(
                        "run context file not found: {}".format(fn_context))

        mc = os.getenv("XT_MONGO_CONN_STR")
        self.mongo_conn_str = utils.base64_to_text(mc)

        # convert store_creds from string to dict
        sc = os.getenv("XT_STORE_CREDS")
        self.store_creds = utils.base64_to_text(sc)
        if self.store_creds:
            self.store_creds = json.loads(self.store_creds)

        provider_code_path = os.getenv("XT_STORE_CODE_PATH")

        run_cache_dir = None
        self.config = config

        if config:
            run_cache_dir = config.get("general", "run-cache-dir")

        is_aml_run = bool(os.getenv("AML_WORKSPACE_NAME"))

        if is_aml_run:
            # load azure libraries on demand
            from .backends.backend_aml import AzureML
            from azureml.core import Run as AmlRun

            self.aml_run = AmlRun.get_context(
            )  # assumes app is running under AML
        else:
            self.aml_run = None

        self.aml_logging = aml_logging
        self.is_aml_child = False
        self.is_aml = False  # TODO: remove need for this since we now treat AML runs normally

        if True:  # self.run_name:
            if not self.store_creds and not config:
                # if store_creds not set, this app is running outside of XT control
                # provide access to XT store for dev/test purposes
                config = get_merged_config(suppress_warning=True)

            self.config = config

            self.store = Store(self.store_creds,
                               provider_code_path=provider_code_path,
                               run_cache_dir=run_cache_dir,
                               mongo_conn_str=self.mongo_conn_str,
                               config=config)

            # if not supress_normal_output:
            #     console.print("XT logging enabled: ", self.run_name)

        # distributed training support
        self.rank = None
        self.world_size = None
        self.master_ip = None
        self.master_port = None

        self.xt_logging = xt_logging and self.run_name != None
        self.checkpoints_enabled = checkpoints_enabled

        self.direct_run = not os.getenv("XT_CONTROLLER")

        if self.xt_logging and self.direct_run and self.store:
            # log stuff normally done by controller at start of run
            self.store.log_run_event(self.ws_name, self.run_name, "started",
                                     {})
            self.store.mongo.run_start(self.ws_name, self.run_name)
            if self.context:
                self.store.mongo.job_run_start(self.context.job_id)
Exemplo n.º 13
0
    def process_args(self, args):

        run_script = None
        parent_script = None
        run_cmd_from_script = None
        target_file = args["script"]
        target_args = args["script_args"]
        code_upload = args["code_upload"]

        # user may have wrong slashes for this OS
        target_file = file_utils.fix_slashes(target_file)

        if os.path.isabs(target_file):
            errors.syntax_error("path to app file must be specified with a relative path: {}".format(target_file))

        is_rerun = "is_rerun" in args
        if is_rerun:
            # will be running from script dir, so remove any path to script file
            self.script_dir = os.path.dirname(target_file)
            target_file = os.path.basename(target_file)

        if target_file.endswith(".py"):
            # PYTHON target
            cmd_parts = ["python"]
            cmd_parts.append("-u")
            cmd_parts.append(target_file)
        else:
            cmd_parts = [target_file] 

        if target_args:
            # split on unquoted spaces
            arg_parts = utils.cmd_split(target_args)
            cmd_parts += arg_parts

        if target_file == "docker":
            self.is_docker = True
            
        if not self.is_docker and code_upload and not os.path.exists(target_file):
            errors.env_error("script file not found: {}".format(target_file))

        ps_path = args["parent_script"]
        if ps_path:
            parent_script = file_utils.read_text_file(ps_path, as_lines=True)

        if target_file.endswith(".bat") or target_file.endswith(".sh"):
            # a RUN SCRIPT was specified as the target
            run_script = file_utils.read_text_file(target_file, as_lines=True)
            run_cmd_from_script = scriptor.get_run_cmd_from_script(run_script)

        compute = args["target"]
        box_def = self.config.get("boxes", compute, suppress_warning=True)
        setup = utils.safe_value(box_def, "setup")

        compute_def = self.config.get_compute_def(compute)        
        if compute_def:
            # must be defined in [compute-targets]
            compute_def = self.config.get_compute_def(compute)

            if not "service" in compute_def:
                errors.config_error("compute target '{}' must define a 'service' property".format(compute))

            service = compute_def["service"]
            if service in ["local", "pool"]:
                # its a list of box names
                boxes = compute_def["boxes"]
                if len(boxes)==1 and boxes[0] == "localhost":
                    pool = None
                    box = "local"
                    service_type = "pool"
                else:
                    pool = compute
                    box = None
                    service_type = "pool"
            else:
                # it a set of compute service properties
                pool = compute
                box = None
                service_name = compute_def["service"]
                service_type = self.config.get_service_type(service_name)
        elif box_def:
            # translate single box name to a compute_def
            box = compute
            pool = None
            service_type = "pool"
            compute_def = {"service": service_type, "boxes": [box], setup: setup}
        else:
            errors.config_error("unknown target or box: {}".format(compute))

        args["target"] = compute
        args["compute_def"] = compute_def
        args["service_type"] = service_type

        # for legacy code
        args["box"] = box
        args["pool"] = pool

        return service_type, cmd_parts, ps_path, parent_script, target_file, run_script, run_cmd_from_script, \
            compute, compute_def
Exemplo n.º 14
0
    def get_config_template(self, template):
        # load default config file as lines
        fn = get_default_config_template_path()
        default_text = file_utils.read_text_file(fn)
        default_lines = default_text.split("\n")

        # convert lines to sections dict
        sections = yaml.safe_load(default_text)

        if not template or template == "empty":

            # EMPTY
            hdr = \
                "# local xt_config.yaml\n" + \
                "# uncomment the below lines to start populating your config file\n\n"

            text = \
                "#general:\n" + \
                "    #workspace: 'ws1'\n" + \
                "    #experiment: 'exper1'\n"

        elif template == "philly":

            # PHILLY
            hdr = "# local xt_config.yaml for Philly compute service\n\n"
            text = self.copy_and_merge_sections(
                sections, [
                    "external-services.philly",
                    "external-services.philly-registry",
                    "external-services.phoenixkeyvault",
                    "external-services.phoenixmongodb",
                    "external-services.phoenixregistry",
                    "external-services.phoenixstorage", "xt-services",
                    "compute-targets.philly", "setups.philly",
                    "dockers.philly-pytorch", "general"
                ],
                update_keys={"xt-services.target": "philly"})

        elif template == "batch":

            # BATCH
            hdr = "# local xt_config.yaml (for Azure Batch compute services)\n\n"
            text = self.copy_and_merge_sections(
                sections, [
                    "external-services.phoenixbatch",
                    "external-services.phoenixkeyvault",
                    "external-services.phoenixmongodb",
                    "external-services.phoenixregistry",
                    "external-services.phoenixstorage", "xt-services",
                    "compute-targets.batch", "azure-batch-images", "general"
                ],
                update_keys={"xt-services.target": "batch"})

        elif template == "aml":

            # AML
            hdr = "# local xt_config.yaml (for Azure ML compute service)\n\n"
            text = self.copy_and_merge_sections(
                sections, [
                    "external-services.phoenixaml",
                    "external-services.phoenixkeyvault",
                    "external-services.phoenixmongodb",
                    "external-services.phoenixregistry",
                    "external-services.phoenixstorage", "xt-services",
                    "compute-targets.aml", "aml-options", "general"
                ],
                update_keys={"xt-services.target": "aml"})

        elif template == "pool":

            # POOL
            hdr = "# local xt_config.yaml (for local machine and Pool compute service)\n\n"
            text = self.copy_and_merge_sections(sections, [
                "external-services.phoenixkeyvault",
                "external-services.phoenixmongodb",
                "external-services.phoenixregistry",
                "external-services.phoenixstorage", "xt-services",
                "compute-targets.local", "compute-targets.local-docker",
                "boxes", "setups.local", "dockers.pytorch-xtlib",
                "dockers.pytorch-xtlib-local", "general"
            ])

        elif template == "all":

            # ALL
            hdr = "# local xt_config.yaml (for all compute services)\n\n"
            text = "\n".join(default_lines)

        else:
            errors.syntax_error(
                "unrecognized --create value: {}".format(template))

        return hdr + text
Exemplo n.º 15
0
    def import_workspace_core(self, temp_dir, input_file, new_workspace,
                              job_prefix, overwrite, show_output):

        # unzip files and use contents.json
        file_helper.unzip_files(input_file, temp_dir)

        fn_contents = os.path.join(temp_dir, "contents.json")
        text = file_utils.read_text_file(fn_contents)
        contents = json.loads(text)

        workspaces = contents["workspaces"]
        if len(workspaces) > 1:
            errors.combo_error(
                "import of archive files with multiple workspaces not yet supported"
            )

        workspace = workspaces[0]
        jobs = contents["jobs"]

        if not new_workspace:
            new_workspace = workspace

        if self.store.does_workspace_exist(new_workspace):
            errors.combo_error(
                "cannot import to an existing workspace name: {}".format(
                    new_workspace))

        if show_output:
            console.print(
                "\nimporting workspace {} ({} jobs) as {} from: {}".format(
                    workspace, len(jobs), new_workspace, input_file))

        if not overwrite:
            # before making any changes, verify all job names are available
            job_ids = []

            for jc in jobs:
                prev_job_id = jc["job_id"]
                prev_base = prev_job_id.split("_")[-1]
                new_job_id = "{}_{}".format(job_prefix, prev_base)
                job_ids.append(new_job_id)

            filter_dict = {"job_id": {"$in": job_ids}}
            records = self.store.mongo.get_info_for_jobs(
                filter_dict, {"_id": 1})
            if records:
                id = records[0]["_id"]
                errors.general_error(
                    "at least 1 job ID with prefix already exists: {}".format(
                        id))

        # create the new workspace
        self.store.create_workspace(new_workspace)

        # now, import each JOB
        max_run_seen = 0
        max_end_seen = 0

        for jc in jobs:
            prev_job_id = jc["job_id"]
            prev_base = prev_job_id.split("_")[-1]

            new_job_id = "{}_{}".format(job_prefix, prev_base)
            runs = jc["runs"]

            if show_output:
                console.print("  importing: {} => {} ({} runs)".format(
                    prev_job_id, new_job_id, len(runs)))

            # create MONGO JOB document
            mongo_job_fn = os.path.join(
                temp_dir, "mongo/jobs/{}/mongo_job.json".format(prev_job_id))
            self.import_job_mongo_document(mongo_job_fn, new_workspace,
                                           prev_job_id, new_job_id)

            # create STORAGE JOB blobs
            storage_job_path = os.path.join(
                temp_dir, "storage/jobs/{}".format(prev_job_id))
            self.import_job_storage_blobs(storage_job_path, new_workspace,
                                          prev_job_id, new_job_id)

            # for each run in job
            for run_name in runs:

                run_number = run_helper.get_parent_run_number(run_name)
                max_run_seen = max(max_run_seen, run_number)

                # copy MONGO RUN document
                mongo_run_fn = os.path.join(
                    temp_dir,
                    "mongo/workspaces/{}/runs/{}/mongo_run.json".format(
                        workspace, run_name))
                end_id = self.import_run_mongo_document(
                    mongo_run_fn, workspace, new_workspace, prev_job_id,
                    new_job_id, run_name)
                max_end_seen = max(max_end_seen, end_id)

                # copy STORAGE RUN blobs
                storage_run_path = os.path.join(
                    temp_dir, "storage/workspaces/{}/runs/{}".format(
                        workspace, run_name))
                self.import_run_storage_blobs(storage_run_path, workspace,
                                              new_workspace, prev_job_id,
                                              new_job_id, run_name)

        # update MONGO counters for new workspace
        self.store.mongo.init_workspace_counters(new_workspace,
                                                 1 + max_run_seen,
                                                 1 + max_end_seen)
Exemplo n.º 16
0
    def restart_psm_if_needed(self):
        '''
        processing:
            - if PSM is running on old psm.py, kill the process and restart it.  
            - if PMS is not running, start it.
        '''
        kill_needed = False
        start_needed = False

        fn_src = os.path.join(file_utils.get_my_file_dir(__file__), constants.PSM)
        fn_dest = file_utils.path_join(self.xt_path, constants.PSM, for_windows=self.box_is_windows)

        running = bool(self._get_psm_process_id())
        #print("PSM running=", running)

        if running:
            # do file contents match?
            text_src = file_utils.read_text_file(fn_src)
            text_dest = ""

            if self.remote_file_exists(fn_dest):
                # read text of fn_dest on remote box
                with self.ftp_client.open(fn_dest, "rb") as infile:
                    bytes_dest = infile.read()
                    text_dest = bytes_dest.decode()

                # normalize NEWLINE chars before comparison 
                # (ftp_client seems to add CR when called frm windows)
                text_src = text_src.replace("\r\n", "\n")
                text_dest = text_dest.replace("\r\n", "\n")

            if text_src != text_dest:
                kill_needed = True
        else:
            start_needed = True

        if kill_needed:
            p = self._get_psm_process_id()
            ssh_cmd = "kill -kill {}".format(p)
            self.run_cmd(ssh_cmd)
            start_needed = True

        if start_needed:
            # create required dirs
            self._make_dir(self.psm_queue_path)
            self._make_dir(self.cwd_path)

            # copy psm.py
            # caution: node slashes in fn_dest must all match box's OS style
            fn_dest = file_utils.fix_slashes(fn_dest, is_linux=True)
            status = self.ftp_client.put(fn_src, fn_dest)

            # run psm
            fn_log = os.path.join(self.xt_path, constants.PSMLOG)

            if self.box_is_windows:
                cmd_parts = ["cmd", "/c", "python -u {} > {}".format(fn_dest, fn_log)]
                cmd = " ".join(cmd_parts)
            else:
                fn_log = file_utils.fix_slashes(fn_log, is_linux=True)
                cmd = 'nohup bash --login -c "python -u {}" </dev/null > {} 2>&1 &'.format(fn_dest, fn_log) 
                #print("cmd=", cmd)

            #process_utils.sync_run_ssh(self, self.box_addr, cmd)
            self.run_cmd(cmd)

            for i in range(20):
                # don't return until PSM is running
                running = bool(self._get_psm_process_id())
                if running:
                    break

                time.sleep(.5)

            if not running:
                errors.general_error("Could not start remote PSM on box={}".format(self.box_addr))