), Arg("--csv", action="store_true", help="print as CSV"), ], is_default=True, ), Cmd("config", config, "display experiment config", [experiment_id_arg("experiment ID")]), Cmd( "describe", describe, "describe experiment", [ Arg("experiment_ids", help="comma-separated list of experiment IDs to describe"), Arg("--metrics", action="store_true", help="display full metrics"), Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print as JSON"), Arg("--outdir", type=Path, help="directory to save output"), ), ], ), Cmd( "download-model-def", download_model_def, "download model definition", [ experiment_id_arg("experiment ID"), Arg("--output-dir", type=Path, help="output directory", default="."), ], ), Cmd( "list-trials lt",
# fmt: off args_description = [ Cmd("tensorboard", None, "manage TensorBoard instances", [ Cmd("list ls", partial(command.list_tasks), "list TensorBoard instances", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all TensorBoards (including other users')"), Group(format_args["json"], format_args["csv"]), ], is_default=True), Cmd("start", start_tensorboard, "start new TensorBoard instance", [ Arg("experiment_ids", type=int, nargs="*", help= "experiment IDs to load into TensorBoard. At most 100 trials from " "the specified experiment will be loaded into TensorBoard. If the " "experiment has more trials, the 100 best-performing trials will " "be used."), Arg("-t", "--trial-ids", nargs=ONE_OR_MORE, type=int,
return api.pprint_task_logs(args.master, resp["id"], follow=True) # fmt: off args_description = [ Cmd("command cmd", None, "manage commands", [ Cmd("list ls", command.list_tasks, "list commands", [ Arg("-q", "--quiet", action="store_true", help="only display the IDs"), Arg("--all", "-a", action="store_true", help="show all commands (including other users')"), Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print as JSON"), ), ], is_default=True), Cmd("config", command.config, "display command config", [ Arg("command_id", type=str, help="command ID"), ]), Cmd("run", run_command, "create command", [ Arg("entrypoint", type=str, nargs=REMAINDER, help="entrypoint command and arguments to execute"), Arg("--config-file", default=None, type=FileType("r"), help="command config file (.yaml)"), Arg("-v", "--volume", action="append", default=[], help=VOLUME_DESC), Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC), Arg("--config", action="append", default=[], help=CONFIG_DESC),
None, "local help", [ Cmd( "cluster-up", handle_cluster_up, "Create a Determined cluster", [ Group( Arg( "--master-config-path", type=Path, default=None, help="path to master configuration", ), Arg( "--storage-host-path", type=Path, default=DEFAULT_STORAGE_HOST_PATH, help= "Storage location for cluster data (e.g. checkpoints)", ), ), Arg( "--agents", type=int, default=1, help="number of agents to start (on this machine)", ), Arg("--master-port", type=int,
args_description = [ Cmd( "t|rial", None, "manage trials", [ Cmd( "describe", describe_trial, "describe trial", [ Arg("trial_id", type=int, help="trial ID"), Arg("--metrics", action="store_true", help="display full metrics"), Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print JSON"), ), ], ), Cmd( "download", download, "download checkpoint for trial", [ Arg("trial_id", type=int, help="trial ID"), Group( Arg( "--best", action="store_true", help="download the checkpoint with the best validation metric", ),
# The `tail` parameter only makes sense the first time we # fetch logs. response = api.get( args.master, "logs", params={"greater_than_id": str(latest_log_id)} ) latest_log_id = process_response(response, latest_log_id) except KeyboardInterrupt: break # fmt: off args_description = [ Cmd("master", None, "manage master", [ Cmd("config", config, "fetch master config", [ Group(format_args["json"], format_args["yaml"]) ]), Cmd("info", get_master, "fetch master info", [ Group(format_args["json"], format_args["yaml"]) ]), Cmd("logs", logs, "fetch master logs", [ Arg("-f", "--follow", action="store_true", help="follow the logs of master, similar to tail -f"), Arg("--tail", type=int, help="number of lines to show, counting from the end " "of the log (default is all)") ]), ]) ] # type: List[Any] # fmt: on
def agent_id_completer(_1: str, parsed_args: argparse.Namespace, _2: Any) -> List[str]: r = api.get(parsed_args.master, "agents") return list(r.json().keys()) # fmt: off args_description = [ Cmd("a|gent", None, "manage agents", [ Cmd("list", list_agents, "list agents", [ Group( Arg("--csv", action="store_true", help="print as CSV"), Arg("--json", action="store_true", help="print as JSON"), ), ], is_default=True), Cmd("enable", patch_agent(True), "enable agent", [ Group( Arg("agent_id", help="agent ID", nargs="?", completer=agent_id_completer), Arg("--all", action="store_true", help="enable all agents"), ) ]), Cmd("disable", patch_agent(False), "disable agent", [ Group( Arg("agent_id",
common_log_options = [ Arg( "-f", "--follow", action="store_true", help="follow the logs of a running task, similar to tail -f", ), Group( Arg( "--head", type=int, help= "number of lines to show, counting from the beginning of the log", ), Arg( "--tail", type=int, help="number of lines to show, counting from the end of the log", ), ), Arg( "--allocation-id", dest="allocation_ids", action="append", help="allocations to show logs from (repeat for multiple values)", ), Arg( "--agent-id", dest="agent_ids",
[ Cmd( "list ls", ls, "list jobs", [ Arg( "-p", "--resource-pool", type=str, help="The target resource pool, if any.", ), *pagination_args, Group( format_args["json"], format_args["yaml"], format_args["table"], format_args["csv"], ), ], is_default=True, ), Cmd( "u|pdate", update, "update job", [ Arg("job_id", type=str, help="The target job ID"), Group( Arg( "-p", "--priority",