def _maybe_apply_strict_cols(args): if args.strict_cols: if args.cols: cli.error("--strict-cols and --cols cannot both be specified") args.cols = args.strict_cols args.skip_core = True args.skip_op_cols = True
def get_gpu_summary(self): if not self._stats_cmd: cli.error("nvidia-smi not available") stats = [] for raw in self._read_raw_gpu_stats(self._stats_cmd): stats.append(self._format_gpu_stats(raw)) return stats
def run(ctx, args): """Runs an experiment """ print(args) # Strip potential operation from experiment name experiment, op_name = _strip_op_name_from_experiment(args) # Safe load of experiment file path try: experiment_config_file = \ config.get_project_config()["experiments"].get(experiment) except KeyError: cli.error("No experiments found. " "Are you sure you're in a Tracker project?") # Load configuration file experiment_config = config.load(experiment_config_file) # Create operation object # - Here we scan through the sourcecode # and extract the (hyper-)parameters op = oplib.Operation( op_name, _op_run_dir(args), _get_experiment_dict_by_name(experiment, experiment_config), _op_gpus(args), args.yes) # Prompt user to confirm run parameters if args.yes or _confirm_run(args, experiment, op): for n in range(args.trials): cli.out("Trial {}/{}".format(n + 1, args.trials)) # Run the trial _run(args, op)
def run(ctx, args): """Runs an experiment """ # Strip potential operation from experiment name exp_name, op_name = _strip_op_name_from_experiment(args) if op_name is None: op_name = DEFAULT_OP log.debug("Running experiment: '{}' with default operation: '{}' " "as no op was provided by the user!".format( exp_name, op_name)) # Safe load of experiment file path try: exp_conf_file = \ config.get_project_config()["experiments"].get(exp_name) except KeyError: cli.error("No experiments found. " "Are you sure you're in a Tracker project?") # Load configuration file exp_conf = config.load(exp_conf_file) # Create operation object op = oplib.Operation(op_name, _op_run_dir(args), _op_experiment(exp_name, exp_conf), _op_remote(args), _op_gpus(args)) # Prompt user to confirm run parameters if args.yes or _confirm_run(args, exp_name, op): for n in range(args.trials): cli.out("Trial {}/{}".format(n + 1, args.trials)) # Run the trial _run(args, op)
def _diff(path1, path2, args): cmd_base = command.shlex_split(_diff_cmd(args)) cmd = cmd_base + [path1, path2] log.debug("diff cmd: %r", cmd) try: subprocess.call(cmd) except OSError as e: cli.error("error running '%s': %s" % (" ".join(cmd), e))
def run_dir_for_id(run_id): try: return _path_for_id(run_id) except NoSuchRun: cli.out("The trial with id: '{}' was not found\n" "Show trials by running 'tracker experiment NAME " "--list_trials'.".format(run_id)) cli.error("No such directory", errno.ENOENT)
def remote_op(op, prompt, default_resp, args): if not args.yes: cli.out(prompt) if args.yes or cli.confirm("Continue?", default_resp): try: op() except OperationNotSupported as e: cli.error(e) except OperationError as e: cli.error(e)
def _handle_remote_op_error(e, remote): if e.args[0] == "running": assert len(e.args) == 2, e.args msg = ("{run_id} is still running\n" "Wait for it to stop or try 'tracker stop" "{run_id} -r {remote_name}' " "to stop it.".format(run_id=e.args[1], remote_name=remote.name)) else: msg = e.args[0] cli.error(msg)
def remote_status(args): """ Command to aquire status of specific remote. """ remote = remotelib.remote_for_args(args) try: remote.status(args.verbose) except remotelib.Down as e: cli.error("Remote %s is not available (%s)" % (remote.name, e), exit_status=2) except remotelib.OperationError as e: cli.error(e)
def _run_for_pid(pid): pid = _try_int(pid) if pid is None: return None for exp in os.listdir(pathlib.path("experiments")): experiment_dir = os.path.join(pathlib.path("experiments"), exp) for run_id, run_dir in pathlib.iter_dirs(experiment_dir): run = runlib.Run(run_id, run_dir) if run.pid and (run.pid == pid or _parent_pid(run.pid) == pid): return run cli.error("cannot find run for pid %i" % pid)
def get_project_names_and_dirs(): trackerfile = TrackerFile() projects = trackerfile.get("projects", {}) if projects: data = [{ "name": name, "path": r.get("path", ""), } for d in projects for name, r in d.items()] return data else: cli.error("No projects specified in {}".format( config.get_user_config_path()))
def _read_pid(path): try: f = open(path, "r") except IOError as e: if e.errno != 2: raise return None else: raw = f.readline().strip() try: return int(raw) except ValueError: cli.error("pidfile %s does not contain a valid pid" % path)
def get_remotes(): remotes = get_user_config().get("remotes", {}) if remotes: data = [ { "name": name, "type": r.get("type", ""), "host": r.get("host", ""), "desc": r.get("description", ""), } for name, r in sorted(remotes.items()) ] return data else: cli.error("No remotes specified in {}".format( get_user_config_path()))
def _maybe_apply_default_runs(args): n_runs = len(args.runs) if n_runs == 0: raise NotImplementedError # args.run = ("2", "1") elif n_runs == 1: cli.out( "The `diff` command requires two runs.\n" "Try specifying a second run or 'tracker diff --help' " "for more information.") cli.error() elif n_runs > 2: cli.out( "The `diff` command cannot compare more than two runs.\n" "Try specifying just two runs or 'tracker diff --help' " "for more information.") cli.error() else: assert n_runs == 2, args
def _run_remote(op, args): remote = remotelib.remote_for_args(args) try: run_id = remote.run_op(**_run_kw(args)) except remotelib.RunFailed as e: _handle_remote_run_failed(e, remote) except remotelib.RemoteProcessError as e: _handle_remote_process_error(e) except remotelib.RemoteProcessDetached as e: _handle_remote_process_detached(e, args.remote) except remotelib.OperationError as e: _handle_remote_op_error(e, remote) except remotelib.OperationNotSupported: cli.error("{} does not support this operation".format(remote.name)) else: if args.background: cli.out("{run_id} is running remotely on {remote}\n" "To watch use 'tracker watch {run_id} -r {remote}'".format( run_id=run_id[:8], remote=args.remote))
def cd(ctx, args): """Change directory into any project created by Tracker. Lists all projects defined under the `projects` key in the Tracker home configuration file (default placed: ~/.tracker/) """ log.debug("Searching for projects") # Retrieve project directory by its name project_dir = projects.get_project_dir_by_name(args.project_name) if not os.path.isdir(project_dir): cli.error("No such directory: {}".format(project_dir)) else: try: os.chdir(project_dir) # The active shell will not change directory without rerunning # /bin/bash os.system("/bin/bash") except OSError as e: cli.error(e)
def add_project(self, project): """Adds project to the global Tracker file Arguments: project {<dict>} -- project dictionary """ if self._data is None: self._data = {} if "projects" not in self._data or self._data["projects"] is None: self._data["projects"] = [] project_name = list(project.keys())[0] for d in self._data["projects"]: if project_name in d: cli.error( "A project of that name ('{}') already exists!".format( project_name)) # if project in self._data["projects"]: # cli.error("Project: '{}' already exists!".format(project)) self._data["projects"].append(project) self._write()
def _run_local(op, args): try: returncode = op.run(_op_pidfile(args)) except resources.ResourceError as e: cli.error( "Run failed as a resource could not be obtained: {}".format(e)) except oplib.ProcessError as e: cli.error("Run failed: {}".format(e)) else: log.debug("Exited with return code {}".format(returncode)) if returncode != 0: cli.error(exit_status=returncode)
def remote_for_args(args): assert args.remote, args try: return for_name(args.remote) except NoSuchRemote: cli.error( "remote '%s' is not defined\n" "Show remotes by running 'tracker remotes' or " "'tracker remotes --help' for more information." % args.remote) except UnsupportedRemoteType as e: cli.error( "remote '%s' in ~/.tracker/tracker.yaml has unsupported " "type: %s" % (args.remote, e.args[0])) except MissingRequiredConfig as e: cli.error( "remote '%s' in ~/.tracker/tracker.yaml is missing required " "config: %s" % (args.remote, e.args[0]))
def _handle_remote_process_error(e): cli.error(exit_status=e.exit_status)
def _handle_remote_run_failed(e, remote): run_id = os.path.basename(e.remote_run_dir) cli.out("Try 'tracker runs info %s -O -r %s' to view its output." % (run_id[:8], remote.name), err=True) cli.error()
def _handle_no_run_for_pid_arg(pid_arg): # Assume pid_arg is a pidfile path. cli.error("%s does not exist" % pid_arg)
def _validate_args(args): if args.csv and args.table: cli.error("--table and --csv cannot both be specified")