示例#1
0
def main(args: List[str] = sys.argv[1:]) -> None:
    # TODO(#1690): Refactor admin command(s) to a separate CLI tool.
    if "DET_ADMIN" in os.environ:
        experiment_args_description.subs.append(
            Cmd(
                "delete",
                experiment.delete_experiment,
                "delete experiment",
                [
                    Arg("experiment_id", help="delete experiment"),
                    Arg(
                        "--yes",
                        action="store_true",
                        default=False,
                        help="automatically answer yes to prompts",
                    ),
                ],
            ))

    try:
        parser = make_parser()
        argcomplete.autocomplete(parser)

        parsed_args = parser.parse_args(args)

        def die(message: str, always_print_traceback: bool = False) -> None:
            if always_print_traceback or os.getenv(
                    "DET_DEBUG", "").lower() in ("true", "1", "yes"):
                import traceback

                traceback.print_exc()

            parser.exit(1, colored(message + "\n", "red"))

        v = vars(parsed_args)
        if not v.get("func"):
            parser.print_usage()
            parser.exit(2, "{}: no subcommand specified\n".format(parser.prog))

        cert_fn = str(auth.get_config_path().joinpath("master.crt"))
        if os.path.exists(cert_fn):
            os.environ["REQUESTS_CA_BUNDLE"] = cert_fn

        try:
            try:
                check_version(parsed_args)
            except requests.exceptions.SSLError:
                # An SSLError usually means that we queried a master over HTTPS and got an untrusted
                # cert, so allow the user to store and trust the current cert. (It could also mean
                # that we tried to talk HTTPS on the HTTP port, but distinguishing that based on the
                # exception is annoying, and we'll figure that out in the next step anyway.)
                addr = api.parse_master_address(parsed_args.master)
                check_not_none(addr.hostname)
                check_not_none(addr.port)
                try:
                    cert_pem_data = ssl.get_server_certificate(
                        (cast(str, addr.hostname), cast(int, addr.port)))
                except ssl.SSLError:
                    die("Tried to connect over HTTPS but couldn't get a certificate from the "
                        "master; consider using HTTP")

                cert_hash = hashlib.sha256(
                    ssl.PEM_cert_to_DER_cert(cert_pem_data)).hexdigest()
                cert_fingerprint = ":".join(chunks(cert_hash, 2))

                if not render.yes_or_no(
                        "The master sent an untrusted certificate with this SHA256 fingerprint:\n"
                        "{}\nDo you want to trust this certificate from now on?"
                        .format(cert_fingerprint)):
                    die("Unable to verify master certificate")

                with open(cert_fn, "w") as out:
                    out.write(cert_pem_data)
                os.environ["REQUESTS_CA_BUNDLE"] = cert_fn

                check_version(parsed_args)

            parsed_args.func(parsed_args)
        except KeyboardInterrupt as e:
            raise e
        except (api.errors.BadRequestException,
                api.errors.BadResponseException) as e:
            die("Failed to {}: {}".format(parsed_args.func.__name__, e))
        except api.errors.CorruptTokenCacheException:
            die("Failed to login: Attempted to read a corrupted token cache. "
                "The store has been deleted; please try again.")
        except Exception:
            die("Failed to {}".format(parsed_args.func.__name__),
                always_print_traceback=True)
    except KeyboardInterrupt:
        parser.exit(3, colored("Interrupting...\n", "red"))
示例#2
0
args_description = Cmd(
    "e|xperiment",
    None,
    "manage experiments",
    [
        # Inspection commands.
        Cmd(
            "list",
            list_experiments,
            "list experiments",
            [
                Arg(
                    "--all",
                    "-a",
                    action="store_true",
                    help=
                    "show all experiments (including archived and other users')",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
            is_default=True,
        ),
        Cmd("config", config, "display experiment config",
            [experiment_id_arg("experiment ID")]),
        Cmd(
            "describe",
            describe,
            "describe experiment",
            [
                Arg("experiment_ids",
                    help="comma-separated list of experiment IDs to describe"),
                Arg("--metrics",
                    action="store_true",
                    help="display full metrics"),
                Group(
                    Arg("--csv", action="store_true", help="print as CSV"),
                    Arg("--json", action="store_true", help="print as JSON"),
                    Arg("--outdir", type=Path,
                        help="directory to save output"),
                ),
            ],
        ),
        Cmd(
            "download-model-def",
            download_model_def,
            "download model definition",
            [
                experiment_id_arg("experiment ID"),
                Arg("--output-dir",
                    type=Path,
                    help="output directory",
                    default="."),
            ],
        ),
        Cmd(
            "list-trials lt",
            list_trials,
            "list trials of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        Cmd(
            "list-checkpoints lc",
            checkpoint.list,
            "list checkpoints of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--best",
                    type=int,
                    help="Return the best N checkpoints for this experiment. "
                    "If this flag is used, only checkpoints with an associated "
                    "validation metric will be considered.",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        # Create command.
        Cmd(
            "create",
            create,
            "create experiment",
            [
                Arg("config_file",
                    type=FileType("r"),
                    help="experiment config file (.yaml)"),
                Arg("model_def",
                    type=Path,
                    help="file or directory containing model definition"),
                Arg(
                    "-g",
                    "--git",
                    action="store_true",
                    help="Associate git metadata with this experiment. This "
                    "flag assumes that git is installed, a .git repository "
                    "exists in the model definition directory, and that the "
                    "git working tree of that repository is empty.",
                ),
                Arg(
                    "--local",
                    action="store_true",
                    help=
                    "Create the experiment in local mode instead of submitting it to the "
                    "cluster. For more information, see documentation on det.experimental.create()",
                ),
                Arg(
                    "--template",
                    type=str,
                    help=
                    "name of template to apply to the experiment configuration",
                ),
                Group(
                    Arg(
                        "-f",
                        "--follow-first-trial",
                        action="store_true",
                        help=
                        "follow the logs of the first trial that is created",
                    ),
                    Arg("--paused",
                        action="store_true",
                        help="do not activate the experiment"),
                    Arg(
                        "-t",
                        "--test-mode",
                        action="store_true",
                        help="Test the experiment configuration and model "
                        "definition by creating and scheduling a very small "
                        "experiment. This command will verify that a training "
                        "step and validation step run successfully and that "
                        "checkpoints can be saved. The test experiment will "
                        "be archived on creation.",
                    ),
                ),
            ],
        ),
        # Lifecycle management commands.
        Cmd(
            "activate",
            activate,
            "activate experiment",
            [experiment_id_arg("experiment ID to activate")],
        ),
        Cmd("cancel", cancel, "cancel experiment",
            [experiment_id_arg("experiment ID to cancel")]),
        Cmd("pause", pause, "pause experiment",
            [experiment_id_arg("experiment ID to pause")]),
        Cmd(
            "archive",
            archive,
            "archive experiment",
            [experiment_id_arg("experiment ID to archive")],
        ),
        Cmd(
            "unarchive",
            unarchive,
            "unarchive experiment",
            [experiment_id_arg("experiment ID to unarchive")],
        ),
        Cmd(
            "download",
            download,
            "download checkpoints for an experiment",
            [
                experiment_id_arg("experiment ID to cancel"),
                Arg(
                    "-o",
                    "--output-dir",
                    type=str,
                    default="checkpoints",
                    help="Desired top level directory for the checkpoints. "
                    "Checkpoints will be downloaded to "
                    "<output_dir>/<checkpoint_uuid>/<checkpoint_files>.",
                ),
                Arg(
                    "--top-n",
                    type=int,
                    default=1,
                    help="The number of checkpoints to download for the "
                    "experiment. The checkpoints are sorted by validation "
                    "metric as defined by --sort-by and --smaller-is-better."
                    "This command will select the best N checkpoints from the "
                    "top performing N trials of the experiment.",
                ),
                Arg(
                    "--sort-by",
                    type=str,
                    default=None,
                    help=
                    "The name of the validation metric to sort on. Without --sort-by, the "
                    "experiment's searcher metric is assumed. If this argument is specified, "
                    "--smaller-is-better must also be specified.",
                ),
                Arg(
                    "--smaller-is-better",
                    type=lambda s: bool(distutils.util.strtobool(s)),
                    default=None,
                    help="The sort order for metrics when using --sort-by. For "
                    "example, 'accuracy' would require passing '--smaller-is-better false'. If "
                    "--sort-by is specified, this argument must be specified.",
                ),
                Arg(
                    "-q",
                    "--quiet",
                    action="store_true",
                    help="Only print the paths to the checkpoints.",
                ),
            ],
        ),
        Cmd("kill", kill_experiment, "kill experiment",
            [Arg("experiment_id", help="experiment ID")]),
        Cmd(
            "wait",
            wait,
            "wait for experiment to reach terminal state",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--polling-interval",
                    type=int,
                    default=5,
                    help="the interval (in seconds) to poll for updated state",
                ),
            ],
        ),
        # Attribute setting commands.
        Cmd(
            "label",
            None,
            "manage experiment labels",
            [
                Cmd(
                    "add",
                    add_label,
                    "add label",
                    [
                        experiment_id_arg("experiment ID"),
                        Arg("label", help="label")
                    ],
                ),
                Cmd(
                    "remove",
                    remove_label,
                    "remove label",
                    [
                        experiment_id_arg("experiment ID"),
                        Arg("label", help="label")
                    ],
                ),
            ],
        ),
        Cmd(
            "set",
            None,
            "set experiment attributes",
            [
                Cmd(
                    "description",
                    set_description,
                    "set experiment description",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("description", help="experiment description"),
                    ],
                ),
                Cmd(
                    "gc-policy",
                    set_gc_policy,
                    "set experiment GC policy and run GC",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg(
                            "--save-experiment-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per experiment "
                            "to save",
                        ),
                        Arg(
                            "--save-trial-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per trial to save",
                        ),
                        Arg(
                            "--save-trial-latest",
                            type=int,
                            required=True,
                            help=
                            "number of latest checkpoints per trial to save",
                        ),
                        Arg(
                            "--yes",
                            action="store_true",
                            default=False,
                            help="automatically answer yes to prompts",
                        ),
                    ],
                ),
                Cmd(
                    "max-slots",
                    set_max_slots,
                    "set `max_slots` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("max_slots", type=none_or_int, help="max slots"),
                    ],
                ),
                Cmd(
                    "weight",
                    set_weight,
                    "set `weight` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("weight", type=float, help="weight"),
                    ],
                ),
            ],
        ),
    ],
)
示例#3
0
        help="master address",
        metavar="address",
        default=get_default_master_address()),
    Arg("-v",
        "--version",
        action="version",
        help="print CLI version and exit",
        version="%(prog)s {}".format(determined_cli.__version__)),
    experiment.args_description,
    checkpoint.args_description,
    Cmd(
        "task", None,
        "manage tasks (commands, experiments, notebooks, shells, tensorboards)",
        [
            Cmd("list",
                list_tasks,
                "list tasks in cluster", [
                    Arg("--csv", action="store_true", help="print as CSV"),
                ],
                is_default=True),
        ]),
    Cmd("preview-search", preview_search, "preview search", [
        Arg("config_file",
            type=FileType("r"),
            help="experiment config file (.yaml)")
    ]),
]  # type: List[object]

# fmt: on

all_args_description = (args_description + master_args_description +
示例#4
0
args_description = Cmd(
    "e|xperiment",
    None,
    "manage experiments",
    [
        # Inspection commands.
        Cmd(
            "list",
            list_experiments,
            "list experiments",
            [
                Arg(
                    "--all",
                    "-a",
                    action="store_true",
                    help=
                    "show all experiments (including archived and other users')",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
            is_default=True,
        ),
        Cmd("config", config, "display experiment config",
            [experiment_id_arg("experiment ID")]),
        Cmd(
            "describe",
            describe,
            "describe experiment",
            [
                Arg("experiment_ids",
                    help="comma-separated list of experiment IDs to describe"),
                Arg("--metrics",
                    action="store_true",
                    help="display full metrics"),
                Group(
                    Arg("--csv", action="store_true", help="print as CSV"),
                    Arg("--json", action="store_true", help="print as JSON"),
                    Arg("--outdir", type=Path,
                        help="directory to save output"),
                ),
            ],
        ),
        Cmd(
            "download-model-def",
            download_model_def,
            "download model definition",
            [
                experiment_id_arg("experiment ID"),
                Arg("--output-dir",
                    type=Path,
                    help="output directory",
                    default="."),
            ],
        ),
        Cmd(
            "list-trials lt",
            list_trials,
            "list trials of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        Cmd(
            "list-checkpoints lc",
            checkpoint.list,
            "list checkpoints of experiment",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--best",
                    type=int,
                    help="Return the best N checkpoints for this experiment. "
                    "If this flag is used, only checkpoints with an associated "
                    "validation metric will be considered.",
                ),
                Arg(
                    "-d",
                    "--download-dir",
                    type=Path,
                    help="download the listed checkpoints to this directory. "
                    "The resources of each checkpoint will be saved in a "
                    "subdirectory labeled with the experiment ID, trial ID, "
                    "and step ID. This flag is only supported for experiments "
                    "configured to use S3 or GCS checkpoint storage.",
                ),
                Arg("--csv", action="store_true", help="print as CSV"),
            ],
        ),
        # Create command.
        Cmd(
            "create",
            create,
            "create experiment",
            [
                Arg("config_file",
                    type=FileType("r"),
                    help="experiment config file (.yaml)"),
                Arg("model_def",
                    type=Path,
                    help="file or directory containing model definition"),
                Arg(
                    "-g",
                    "--git",
                    action="store_true",
                    help="Associate git metadata with this experiment. This "
                    "flag assumes that git is installed, a .git repository "
                    "exists in the model definition directory, and that the "
                    "git working tree of that repository is empty.",
                ),
                Arg(
                    "--local",
                    action="store_true",
                    help=
                    "Create the experiment in local mode instead of submitting it to the "
                    "cluster. For more information, see documentation on det.experimental.create() "
                    "and det.experimental.Mode.LOCAL",
                ),
                Arg(
                    "--template",
                    type=str,
                    help=
                    "name of template to apply to the experiment configuration",
                ),
                Group(
                    Arg(
                        "-f",
                        "--follow-first-trial",
                        action="store_true",
                        help=
                        "follow the logs of the first trial that is created",
                    ),
                    Arg("--paused",
                        action="store_true",
                        help="do not activate the experiment"),
                    Arg(
                        "-t",
                        "--test-mode",
                        action="store_true",
                        help="Test the experiment configuration and model "
                        "definition by creating and scheduling a very small "
                        "experiment. This command will verify that a training "
                        "step and validation step run successfully and that "
                        "checkpoints can be saved. The test experiment will "
                        "be archived on creation.",
                    ),
                ),
            ],
        ),
        # Lifecycle management commands.
        Cmd(
            "activate",
            activate,
            "activate experiment",
            [experiment_id_arg("experiment ID to activate")],
        ),
        Cmd("cancel", cancel, "cancel experiment",
            [experiment_id_arg("experiment ID to cancel")]),
        Cmd("pause", pause, "pause experiment",
            [experiment_id_arg("experiment ID to pause")]),
        Cmd(
            "archive",
            archive,
            "archive experiment",
            [experiment_id_arg("experiment ID to archive")],
        ),
        Cmd(
            "unarchive",
            unarchive,
            "unarchive experiment",
            [experiment_id_arg("experiment ID to unarchive")],
        ),
        Cmd("kill", kill_experiment, "kill experiment",
            [Arg("experiment_id", help="experiment ID")]),
        Cmd(
            "wait",
            wait,
            "wait for experiment to reach terminal state",
            [
                experiment_id_arg("experiment ID"),
                Arg(
                    "--polling-interval",
                    type=int,
                    default=5,
                    help="the interval (in seconds) to poll for updated state",
                ),
            ],
        ),
        # Attribute setting commands.
        Cmd(
            "label",
            None,
            "manage experiment labels",
            [
                Cmd(
                    "add",
                    add_label,
                    "add label",
                    [
                        experiment_id_arg("experiment ID"),
                        Arg("label", help="label")
                    ],
                ),
                Cmd(
                    "remove",
                    remove_label,
                    "remove label",
                    [
                        experiment_id_arg("experiment ID"),
                        Arg("label", help="label")
                    ],
                ),
            ],
        ),
        Cmd(
            "set",
            None,
            "set experiment attributes",
            [
                Cmd(
                    "description",
                    set_description,
                    "set experiment description",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("description", help="experiment description"),
                    ],
                ),
                Cmd(
                    "gc-policy",
                    set_gc_policy,
                    "set experiment GC policy and run GC",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg(
                            "--save-experiment-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per experiment "
                            "to save",
                        ),
                        Arg(
                            "--save-trial-best",
                            type=int,
                            required=True,
                            help="number of best checkpoints per trial to save",
                        ),
                        Arg(
                            "--save-trial-latest",
                            type=int,
                            required=True,
                            help=
                            "number of latest checkpoints per trial to save",
                        ),
                        Arg(
                            "--yes",
                            action="store_true",
                            default=False,
                            help="automatically answer yes to prompts",
                        ),
                    ],
                ),
                Cmd(
                    "max-slots",
                    set_max_slots,
                    "set `max_slots` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("max_slots", type=none_or_int, help="max slots"),
                    ],
                ),
                Cmd(
                    "weight",
                    set_weight,
                    "set `weight` of experiment",
                    [
                        experiment_id_arg("experiment ID to modify"),
                        Arg("weight", type=float, help="weight"),
                    ],
                ),
            ],
        ),
    ],
)