Exemplo n.º 1
0
import argparse
import warnings
from typing import List, Union

from determined import __version__
from determined.common.declarative_argparse import Arg, Cmd, add_args

from .aws.cli import args_description as aws_args_description
from .gcp.cli import args_description as gcp_args_description
from .local.cli import args_description as local_args_description

args_subs: List[Union[Arg, Cmd]] = [
    # TODO(DET-5171): Remove --version flag when det-deploy is deprecated.
    Arg("--version", action="version", version="%(prog)s {}".format(__version__)),
    Arg("--no-preflight-checks", action="store_true", help="Disable preflight checks"),
    Arg(
        "--no-wait-for-master",
        action="store_true",
        help="Do not wait for master to come up after AWS or GCP clusters are deployed",
    ),
    local_args_description,
    aws_args_description,
    gcp_args_description,
]

DEPLOY_CMD_NAME = "d|eploy"
args_description = Cmd(
    DEPLOY_CMD_NAME,
    None,
    "manage deployments",
    args_subs,
Exemplo n.º 2
0

args_description = Cmd(
    "e|xperiment",
    None,
    "manage experiments",
    [
        # Inspection commands.
        Cmd(
            "list",
            list_experiments,
            "list experiments",
            [
                Arg(
                    "--all",
                    "-a",
                    action="store_true",
                    help="show all experiments (including archived and other users')",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
            is_default=True,
        ),
        Cmd("config", config, "display experiment config", [experiment_id_arg("experiment ID")]),
        Cmd(
            "describe",
            describe,
            "describe experiment",
            [
                Arg("experiment_ids", help="comma-separated list of experiment IDs to describe"),
                Arg("--metrics", action="store_true", help="display full metrics"),
                Group(
Exemplo n.º 3
0
@authentication.required
def remove_templates(args: Namespace) -> None:
    api.delete(args.master, path="templates/" + args.template_name)
    print(colored("Removed template {}".format(args.template_name), "green"))


# fmt: off

args_description = [
    Cmd("template tpl", None, "manage config templates", [
        Cmd("list ls",
            list_template,
            "list config templates", [
                Arg("-d",
                    "--details",
                    action="store_true",
                    help="show the configs of the templates"),
            ],
            is_default=True),
        Cmd("describe", describe_template, "describe config template", [
            Arg("template_name", type=str, help="template name"),
        ]),
        Cmd("set", set_template, "set config template", [
            Arg("template_name", help="template name"),
            Arg("template_file",
                type=FileType("r"),
                help="config template file (.yaml)")
        ]),
        Cmd("remove rm", remove_templates, "remove config template",
            [Arg("template_name", help="template name")]),
    ])
Exemplo n.º 4
0
 "gcp",
 None,
 "GCP help",
 [
     Cmd(
         "down",
         handle_down,
         "delete gcp cluster",
         [
             ArgGroup(
                 "optional named arguments",
                 None,
                 [
                     Arg(
                         "--local-state-path",
                         type=str,
                         default=os.getcwd(),
                         help="local directory for storing cluster state",
                     ),
                     Arg(
                         "--yes",
                         action="store_true",
                         help="no prompt when deleting resources",
                     ),
                     Arg(
                         "--no-prompt",
                         dest="yes",
                         action="store_true",
                         help=argparse.SUPPRESS,
                     ),
                 ],
             ),
Exemplo n.º 5
0
            str(port),
            "{}@{}".format(username, shell["id"]),
            *additional_opts,
        ]

        subprocess.run(cmd)

        print(colored("To reconnect, run: det shell open {}".format(shell["id"]), "green"))


# fmt: off

args_description = [
    Cmd("shell", None, "manage shells", [
        Cmd("list", command.list, "list shells", [
            Arg("-q", "--quiet", action="store_true",
                help="only display the IDs"),
            Arg("--all", "-a", action="store_true",
                help="show all shells (including other users')")
        ], is_default=True),
        Cmd("config", command.config,
            "display shell config", [
                Arg("id", type=str, help="shell ID"),
            ]),
        Cmd("start", start_shell, "start a new shell", [
            Arg("ssh_opts", nargs="*", help="additional SSH options when connecting to the shell"),
            Arg("--config-file", default=None, type=FileType("r"),
                help="command config file (.yaml)"),
            Arg("-v", "--volume", action="append", default=[],
                help=VOLUME_DESC),
            Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
            Arg("--config", action="append", default=[], help=CONFIG_DESC),
Exemplo n.º 6
0
    print(colored("Using search configuration:", "green"))
    yml = yaml.YAML()
    yml.indent(mapping=2, sequence=4, offset=2)
    yml.dump(experiment_config["searcher"], sys.stdout)
    print()
    print("This search will create a total of {} trial(s).".format(
        sum(j["results"].values())))
    print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=False)


# fmt: off

args_description = [
    Arg("-u",
        "--user",
        help="run as the given user",
        metavar="username",
        default=None),
    Arg("-m",
        "--master",
        help="master address",
        metavar="address",
        default=get_default_master_address()),
    Arg("-v",
        "--version",
        action="version",
        help="print CLI version and exit",
        version="%(prog)s {}".format(determined.__version__)),
    experiment.args_description,
    checkpoint.args_description,
    Cmd(
Exemplo n.º 7
0
args_description = [
    Cmd(
        "m|odel",
        None,
        "manage models",
        [
            Cmd(
                "list",
                list_models,
                "list all models in the registry",
                [
                    Arg(
                        "--sort-by",
                        type=str,
                        choices=["name", "description", "creation_time", "last_updated_time"],
                        default="last_updated_time",
                        help="sort models by the given field",
                    ),
                    Arg(
                        "--order-by",
                        type=str,
                        choices=["asc", "desc"],
                        default="asc",
                        help="order models in either ascending or descending order",
                    ),
                    Arg("--json", action="store_true", help="print as JSON"),
                ],
                is_default=True,
            ),
            Cmd(
Exemplo n.º 8
0
 None,
 "GKE help",
 [
     Cmd(
         "up",
         handle_up,
         "create gke cluster",
         [
             ArgGroup(
                 "required named arguments",
                 None,
                 [
                     Arg(
                         "--cluster-id",
                         type=str,
                         default=None,
                         required=True,
                         help="a unique name for the gke cluster",
                     ),
                 ],
             ),
             ArgGroup(
                 "optional named arguments",
                 None,
                 [
                     Arg(
                         "--agent-node-pool-name",
                         "--gpu-node-pool-name",
                         type=str,
                         default=None,
                         help="a unique name for the GPU node pool",
Exemplo n.º 9
0
            render_event_stream(msg)


@authentication_required
def open_notebook(args: Namespace) -> None:
    resp = api.get(args.master, "api/v1/notebooks/{}".format(args.notebook_id)).json()["notebook"]
    check_eq(resp["state"], "STATE_RUNNING", "Notebook must be in a running state")
    api.open(args.master, resp["serviceAddress"])


# fmt: off

args_description = [
    Cmd("notebook", None, "manage notebooks", [
        Cmd("list ls", command.list, "list notebooks", [
            Arg("-q", "--quiet", action="store_true",
                help="only display the IDs"),
            Arg("--all", "-a", action="store_true",
                help="show all notebooks (including other users')")
        ], is_default=True),
        Cmd("config", command.config,
            "display notebook config", [
                Arg("id", type=str, help="notebook ID"),
            ]),
        Cmd("start", start_notebook, "start a new notebook", [
            Arg("--config-file", default=None, type=FileType("r"),
                help="command config file (.yaml)"),
            Arg("-v", "--volume", action="append", default=[],
                help=VOLUME_DESC),
            Arg("-c", "--context", default=None, type=Path, help=CONTEXT_DESC),
            Arg("--config", action="append", default=[], help=CONFIG_DESC),
            Arg("--template", type=str,
Exemplo n.º 10
0
    return deploy_aws("down", args)


args_description = Cmd(
    "aws",
    None,
    "AWS help",
    [
        Cmd(
            "list",
            handle_list,
            "list CloudFormation stacks",
            [
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
            ],
        ),
        Cmd(
            "down",
            handle_down,
            "delete CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
Exemplo n.º 11
0
        args.master,
        "api/v1/notebooks/{}".format(notebook_id)).json()["notebook"]
    check_eq(resp["state"], "STATE_RUNNING",
             "Notebook must be in a running state")
    api.browser_open(args.master, resp["serviceAddress"])


# fmt: off

args_description = [
    Cmd("notebook", None, "manage notebooks", [
        Cmd("list ls",
            command.list_tasks,
            "list notebooks", [
                Arg("-q",
                    "--quiet",
                    action="store_true",
                    help="only display the IDs"),
                Arg("--all",
                    "-a",
                    action="store_true",
                    help="show all notebooks (including other users')")
            ],
            is_default=True),
        Cmd("config", command.config, "display notebook config", [
            Arg("notebook_id", type=str, help="notebook ID"),
        ]),
        Cmd("start", start_notebook, "start a new notebook", [
            Arg("--config-file",
                default=None,
                type=FileType("r"),
                help="command config file (.yaml)"),
Exemplo n.º 12
0
def agent_id_completer(_1: str, parsed_args: argparse.Namespace,
                       _2: Any) -> List[str]:
    r = api.get(parsed_args.master, "agents")
    return list(r.json().keys())


# fmt: off

args_description = [
    Cmd("a|gent", None, "manage agents", [
        Cmd("list",
            list_agents,
            "list agents", [
                Group(
                    Arg("--csv", action="store_true", help="print as CSV"),
                    Arg("--json", action="store_true", help="print as JSON"),
                ),
            ],
            is_default=True),
        Cmd("enable", patch_agent(True), "enable agent", [
            Group(
                Arg("agent_id",
                    help="agent ID",
                    nargs="?",
                    completer=agent_id_completer),
                Arg("--all", action="store_true", help="enable all agents"),
            )
        ]),
        Cmd("disable", patch_agent(False), "disable agent", [
            Group(
Exemplo n.º 13
0
        print("Deletion of checkpoints {} is in progress".format(args.checkpoints_uuids))
    else:
        print("Aborting deletion of checkpoints.")


main_cmd = Cmd(
    "c|heckpoint",
    None,
    "manage checkpoints",
    [
        Cmd(
            "download",
            download,
            "download checkpoint from persistent storage",
            [
                Arg("uuid", type=str, help="Download a checkpoint by specifying its UUID."),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    help="Desired output directory for the checkpoint.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the path to the checkpoint.",
                ),
            ],
        ),
        Cmd(
Exemplo n.º 14
0
        follow=args.follow,
        agent_ids=args.agent_ids,
        container_ids=args.container_ids,
        rank_ids=args.rank_ids,
        sources=args.sources,
        stdtypes=args.stdtypes,
        level_above=args.level,
        timestamp_before=args.timestamp_before,
        timestamp_after=args.timestamp_after,
    )


common_log_options = [
    Arg(
        "-f",
        "--follow",
        action="store_true",
        help="follow the logs of a running task, similar to tail -f",
    ),
    Group(
        Arg(
            "--head",
            type=int,
            help=
            "number of lines to show, counting from the beginning of the log",
        ),
        Arg(
            "--tail",
            type=int,
            help="number of lines to show, counting from the end of the log",
        ),
    ),
Exemplo n.º 15
0
                # fetch logs.
                response = api.get(
                    args.master, "logs", params={"greater_than_id": str(latest_log_id)}
                )
                latest_log_id = process_response(response, latest_log_id)
            except KeyboardInterrupt:
                break


# fmt: off

args_description = [
    Cmd("master", None, "manage master", [
        Cmd("config", config, "fetch master config", [
            Group(format_args["json"], format_args["yaml"])
        ]),
        Cmd("info", get_master, "fetch master info", [
            Group(format_args["json"], format_args["yaml"])
        ]),
        Cmd("logs", logs, "fetch master logs", [
            Arg("-f", "--follow", action="store_true",
                help="follow the logs of master, similar to tail -f"),
            Arg("--tail", type=int,
                help="number of lines to show, counting from the end "
                "of the log (default is all)")
        ]),
    ])
]  # type: List[Any]

# fmt: on
Exemplo n.º 16
0
def experiment_id_arg(help: str) -> Arg:  # noqa: A002
    return Arg("experiment_id", type=int, help=help)
Exemplo n.º 17
0
        timestamp_after=args.timestamp_after,
    )


args_description = [
    Cmd(
        "t|rial",
        None,
        "manage trials",
        [
            Cmd(
                "describe",
                describe_trial,
                "describe trial",
                [
                    Arg("trial_id", type=int, help="trial ID"),
                    Arg("--metrics", action="store_true", help="display full metrics"),
                    Group(
                        Arg("--csv", action="store_true", help="print as CSV"),
                        Arg("--json", action="store_true", help="print JSON"),
                    ),
                ],
            ),
            Cmd(
                "download",
                download,
                "download checkpoint for trial",
                [
                    Arg("trial_id", type=int, help="trial ID"),
                    Group(
                        Arg(
Exemplo n.º 18
0
    print(f"Successfully un-archived project {args.project_name}.")


args_description = [
    Cmd(
        "p|roject",
        None,
        "manage projects",
        [
            Cmd(
                "list",
                list_workspace_projects,
                "list the projects associated with a workspace",
                [
                    Arg("workspace_name",
                        type=str,
                        help="name of the workspace"),
                    Arg(
                        "--sort-by",
                        type=str,
                        choices=["id", "name"],
                        default="id",
                        help="sort workspaces by the given field",
                    ),
                    Arg(
                        "--order-by",
                        type=str,
                        choices=["asc", "desc"],
                        default="asc",
                        help=
                        "order workspaces in either ascending or descending order",
Exemplo n.º 19
0
        context_path=args.context,
    )["command"]

    if args.detach:
        print(resp["id"])
        return

    api.pprint_task_logs(args.master, resp["id"], follow=True)


# fmt: off

args_description = [
    Cmd("command cmd", None, "manage commands", [
        Cmd("list ls", command.list_tasks, "list commands", [
            Arg("-q", "--quiet", action="store_true",
                help="only display the IDs"),
            Arg("--all", "-a", action="store_true",
                help="show all commands (including other users')"),
            Group(
                Arg("--csv", action="store_true", help="print as CSV"),
                Arg("--json", action="store_true", help="print as JSON"),
            ),
        ], is_default=True),
        Cmd("config", command.config,
            "display command config", [
                Arg("command_id", type=str, help="command ID"),
            ]),
        Cmd("run", run_command, "create command", [
            Arg("entrypoint", type=str, nargs=REMAINDER,
                help="entrypoint command and arguments to execute"),
            Arg("--config-file", default=None, type=FileType("r"),
Exemplo n.º 20
0
import argparse
import warnings
from typing import List, Union

from determined import __version__
from determined.common.declarative_argparse import Arg, Cmd, add_args

from .aws.cli import args_description as aws_args_description
from .gcp.cli import args_description as gcp_args_description
from .local.cli import args_description as local_args_description

args_subs: List[Union[Arg, Cmd]] = [
    # TODO(DET-5171): Remove --version flag when det-deploy is deprecated.
    Arg("--version",
        action="version",
        version="%(prog)s {}".format(__version__)),
    Arg("--no-preflight-checks",
        action="store_true",
        help="Disable preflight checks"),
    Arg(
        "--no-wait-for-master",
        action="store_true",
        help=
        "Do not wait for master to come up after AWS or GCP clusters are deployed",
    ),
    Arg(
        "--image-repo-prefix",
        type=str,
        default="determinedai",
        help=
        "Docker image repository to use for determined-master and determined-agent images",
Exemplo n.º 21
0
@authentication_required
def remove_client(parsed_args: Namespace) -> None:
    try:
        api.delete(parsed_args.master,
                   "oauth2/clients/{}".format(parsed_args.client_id))
    except NotFoundException:
        raise EnterpriseOnlyError("API not found: oauth2/clients")


# fmt: off

args_description = [
    Cmd("oauth", None, "manage OAuth", [
        Cmd("client", None, "manage clients", [
            Cmd("list",
                list_clients,
                "list OAuth client applications", [],
                is_default=True),
            Cmd("add", add_client, "add OAuth client application", [
                Arg("name", type=str, help="descriptive name"),
                Arg("domain", type=str, help="redirect domain"),
            ]),
            Cmd("remove", remove_client, "remove OAuth client application", [
                Arg("client_id", help="OAuth client ID to remove"),
            ]),
        ])
    ])
]  # type: List[Any]

# fmt: on
Exemplo n.º 22
0
    print(default_template)


args_description = Cmd(
    "aws",
    None,
    "AWS help",
    [
        Cmd(
            "list",
            handle_list,
            "list CloudFormation stacks",
            [
                Arg(
                    "--region",
                    type=str,
                    default=None,
                    help="AWS region",
                ),
                Arg("--profile", type=str, default=None, help="AWS profile"),
            ],
        ),
        Cmd(
            "down",
            handle_down,
            "delete CloudFormation stack",
            [
                ArgGroup(
                    "required named arguments",
                    None,
                    [
                        Arg(
Exemplo n.º 23
0
    checkpoint = Determined(args.master, None).get_checkpoint(args.uuid)
    render_checkpoint(checkpoint)


args_description = Cmd(
    "c|heckpoint",
    None,
    "manage checkpoints",
    [
        Cmd(
            "download",
            download,
            "download checkpoint from persistent storage",
            [
                Arg("uuid",
                    type=str,
                    help="Download a checkpoint by specifying its UUID."),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    help="Desired output directory for the checkpoint.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the path to the checkpoint.",
                ),
            ],
        ),
Exemplo n.º 24
0
            if args.json else "resources/allocation/aggregated")
    print_response(api.get(args.master, path, params=params))


args_description = [
    Cmd(
        "res|ources",
        None,
        "query historical resource allocation",
        [
            Cmd(
                "raw",
                raw,
                "get raw allocation information",
                [
                    Arg("timestamp_after"),
                    Arg("timestamp_before"),
                    Arg("--json",
                        action="store_true",
                        help="output JSON rather than CSV"),
                ],
            ),
            Cmd(
                "agg|regated",
                aggregated,
                "get aggregated allocation information",
                [
                    Arg("start_date", help="first date to include"),
                    Arg("end_date", help="last date to include"),
                    Arg("--json",
                        action="store_true",
Exemplo n.º 25
0

args_description = Cmd(
    "local",
    None,
    "local help",
    [
        Cmd(
            "cluster-up",
            handle_cluster_up,
            "Create a Determined cluster",
            [
                Group(
                    Arg(
                        "--master-config-path",
                        type=Path,
                        default=None,
                        help="path to master configuration",
                    ),
                    Arg(
                        "--storage-host-path",
                        type=Path,
                        default=DEFAULT_STORAGE_HOST_PATH,
                        help=
                        "Storage location for cluster data (e.g. checkpoints)",
                    ),
                ),
                Arg(
                    "--agents",
                    type=int,
                    default=1,
                    help="number of agents to start (on this machine)",
Exemplo n.º 26
0
    url = "commands/{}/events".format(resp["id"])

    with api.ws(args.master, url) as ws:
        for msg in ws:
            render_event_stream(msg)


# fmt: off

args_description = [
    Cmd("command cmd", None, "manage commands", [
        Cmd("list ls",
            command.list,
            "list commands", [
                Arg("-q",
                    "--quiet",
                    action="store_true",
                    help="only display the IDs"),
                Arg("--all",
                    "-a",
                    action="store_true",
                    help="show all commands (including other users')"),
            ],
            is_default=True),
        Cmd("config", command.config, "display command config", [
            Arg("id", type=str, help="command ID"),
        ]),
        Cmd("run", run_command, "create command", [
            Arg("entrypoint",
                type=str,
                nargs=REMAINDER,
                help="entrypoint command and arguments to execute"),
Exemplo n.º 27
0
def experiment_id_arg(help: str) -> Arg:
    return Arg("experiment_id", type=int, help=help, completer=experiment_id_completer)
Exemplo n.º 28
0
        subprocess.run(cmd)

        print(
            colored(f"To reconnect, run: det shell open {shell['id']}",
                    "green"))


# fmt: off

args_description = [
    Cmd("shell", None, "manage shells", [
        Cmd("list",
            partial(command.list_tasks),
            "list shells", [
                Arg("-q",
                    "--quiet",
                    action="store_true",
                    help="only display the IDs"),
                Arg("--all",
                    "-a",
                    action="store_true",
                    help="show all shells (including other users')"),
                Group(format_args["json"], format_args["csv"]),
            ],
            is_default=True),
        Cmd("config", partial(command.config), "display shell config", [
            Arg("shell_id", type=str, help="shell ID"),
        ]),
        Cmd("start", start_shell, "start a new shell", [
            Arg("ssh_opts",
                nargs="*",
                help="additional SSH options when connecting to the shell"),
Exemplo n.º 29
0
        args.master,
        "api/v1/tensorboards/{}".format(tensorboard_id)).json()["tensorboard"]
    check_eq(resp["state"], "STATE_RUNNING",
             "TensorBoard must be in a running state")
    api.browser_open(args.master, resp["serviceAddress"])


# fmt: off

args_description = [
    Cmd("tensorboard", None, "manage TensorBoard instances", [
        Cmd("list ls",
            partial(command.list_tasks),
            "list TensorBoard instances", [
                Arg("-q",
                    "--quiet",
                    action="store_true",
                    help="only display the IDs"),
                Arg("--all",
                    "-a",
                    action="store_true",
                    help="show all TensorBoards (including other users')"),
                Group(format_args["json"], format_args["csv"]),
            ],
            is_default=True),
        Cmd("start", start_tensorboard, "start new TensorBoard instance", [
            Arg("experiment_ids",
                type=int,
                nargs="*",
                help=
                "experiment IDs to load into TensorBoard. At most 100 trials from "
                "the specified experiment will be loaded into TensorBoard. If the "
Exemplo n.º 30
0
    OPERATION_TO_FN[args.command](args)


args_description = Cmd(
    "local",
    None,
    "local help",
    [
        Cmd(
            "cluster-up",
            handle_cluster_up,
            "Create a Determined cluster",
            [
                Arg(
                    "--master-config-path",
                    type=str,
                    default=None,
                    help="path to master configuration",
                ),
                Arg(
                    "--agents",
                    type=int,
                    default=1,
                    help="number of agents to start (on this machine)",
                ),
                Arg("--master-port",
                    type=int,
                    default=8080,
                    help="port to expose master on"),
                Arg(
                    "--cluster-name",
                    type=str,