Пример #1
0
def set_gc_policy(args: Namespace) -> None:
    policy = {
        "save_experiment_best": args.save_experiment_best,
        "save_trial_best": args.save_trial_best,
        "save_trial_latest": args.save_trial_latest,
    }

    if not args.yes:
        r = api.get(
            args.master, "experiments/{}/preview_gc".format(args.experiment_id), params=policy
        )
        response = r.json()
        checkpoints = response["checkpoints"]
        metric_name = response["metric_name"]

        headers = [
            "Trial ID",
            "# of Batches",
            "State",
            "Validation Metric\n({})".format(metric_name),
            "UUID",
            "Resources",
        ]
        values = [
            [
                c["trial_id"],
                c["step"]["total_batches"],
                c["state"],
                api.metric.get_validation_metric(metric_name, c["step"]["validation"]),
                c["uuid"],
                render.format_resources(c["resources"]),
            ]
            for c in sorted(checkpoints, key=lambda c: (c["trial_id"], c["step_id"]))
            if "step" in c and c["step"].get("validation") is not None
        ]

        if len(values) != 0:
            print(
                "The following checkpoints with validation will be deleted "
                "by applying this GC Policy:"
            )
            print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=FLUSH)
        print(
            "This policy will delete {} checkpoints with "
            "validations and {} checkpoints without validations.".format(
                len(values), len(checkpoints) - len(values)
            )
        )

    if args.yes or render.yes_or_no(
        "Changing the checkpoint garbage collection policy of an "
        "experiment may result\n"
        "in the unrecoverable deletion of checkpoints.  Do you wish to "
        "proceed?"
    ):
        patch_experiment(args, "change gc policy of", {"checkpoint_storage": policy})
        print("Set GC policy of experiment {} to\n{}".format(args.experiment_id, pformat(policy)))
    else:
        print("Aborting operations.")
Пример #2
0
def delete_experiment(args: Namespace) -> None:
    if args.yes or render.yes_or_no(
        "Deleting an experiment will result in the unrecoverable \n"
        "deletion of all associated logs, checkpoints, and other \n"
        "metadata associated with the experiment. For a recoverable \n"
        "alternative, see the 'det archive' command. Do you still \n"
        "wish to proceed?"
    ):
        api.delete(args.master, "/api/v1/experiments/{}".format(args.experiment_id))
        print("Successfully deleted experiment {}".format(args.experiment_id))
    else:
        print("Aborting experiment deletion.")
Пример #3
0
def delete_experiment(args: Namespace) -> None:
    if args.yes or render.yes_or_no(
            "Deleting an experiment will result in the unrecoverable \n"
            "deletion of all associated logs, checkpoints, and other \n"
            "metadata associated with the experiment. For a recoverable \n"
            "alternative, see the 'det archive' command. Do you still \n"
            "wish to proceed?"):
        bindings.delete_DeleteExperiment(setup_session(args),
                                         experimentId=args.experiment_id)
        print("Delete of experiment {} is in progress".format(
            args.experiment_id))
    else:
        print("Aborting experiment deletion.")
Пример #4
0
def main(args: List[str] = sys.argv[1:]) -> None:
    try:
        parser = make_parser()
        argcomplete.autocomplete(parser)

        parsed_args = parser.parse_args(args)

        def die(message: str, always_print_traceback: bool = False) -> None:
            if always_print_traceback or debug_mode():
                import traceback

                traceback.print_exc()

            parser.exit(1, colored(message + "\n", "red"))

        v = vars(parsed_args)
        if not v.get("func"):
            parser.print_usage()
            parser.exit(2, "{}: no subcommand specified\n".format(parser.prog))

        cert_fn = str(auth.get_config_path().joinpath("master.crt"))
        if os.path.exists(cert_fn):
            api.request.set_master_cert_bundle(cert_fn)

        try:
            # For `det deploy`, skip interaction with master.
            if v.get("_command") == DEPLOY_CMD_NAME:
                parsed_args.func(parsed_args)
                return

            try:
                check_version(parsed_args)
            except requests.exceptions.SSLError:
                # An SSLError usually means that we queried a master over HTTPS and got an untrusted
                # cert, so allow the user to store and trust the current cert. (It could also mean
                # that we tried to talk HTTPS on the HTTP port, but distinguishing that based on the
                # exception is annoying, and we'll figure that out in the next step anyway.)
                addr = api.parse_master_address(parsed_args.master)
                check_not_none(addr.hostname)
                check_not_none(addr.port)
                try:
                    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.TLSv1_2_METHOD)
                    conn = OpenSSL.SSL.Connection(ctx, socket.socket())
                    conn.set_tlsext_host_name(
                        cast(str, addr.hostname).encode())
                    conn.connect((addr.hostname, addr.port))
                    conn.do_handshake()
                    cert_pem_data = "".join(
                        OpenSSL.crypto.dump_certificate(
                            OpenSSL.crypto.FILETYPE_PEM, cert).decode()
                        for cert in conn.get_peer_cert_chain())
                except OpenSSL.SSL.Error:
                    die("Tried to connect over HTTPS but couldn't get a certificate from the "
                        "master; consider using HTTP")

                cert_hash = hashlib.sha256(
                    ssl.PEM_cert_to_DER_cert(cert_pem_data)).hexdigest()
                cert_fingerprint = ":".join(chunks(cert_hash, 2))

                if not render.yes_or_no(
                        "The master sent an untrusted certificate chain with this SHA256 fingerprint:\n"
                        "{}\nDo you want to trust this certificate from now on?"
                        .format(cert_fingerprint)):
                    die("Unable to verify master certificate")

                with open(cert_fn, "w") as out:
                    out.write(cert_pem_data)
                api.request.set_master_cert_bundle(cert_fn)

                check_version(parsed_args)

            parsed_args.func(parsed_args)
        except KeyboardInterrupt as e:
            raise e
        except (api.errors.BadRequestException,
                api.errors.BadResponseException) as e:
            die("Failed to {}: {}".format(parsed_args.func.__name__, e))
        except api.errors.CorruptTokenCacheException:
            die("Failed to login: Attempted to read a corrupted token cache. "
                "The store has been deleted; please try again.")
        except EnterpriseOnlyError as e:
            die(f"Determined Enterprise Edition is required for this functionality: {e}"
                )
        except Exception:
            die("Failed to {}".format(parsed_args.func.__name__),
                always_print_traceback=True)
    except KeyboardInterrupt:
        parser.exit(3, colored("Interrupting...\n", "red"))
Пример #5
0
def main(args: List[str] = sys.argv[1:], ) -> None:
    # TODO: we lazily import "det deploy" but in the future we'd want to lazily import everything.
    parser = make_parser()

    full_cmd, aliases = generate_aliases(deploy_cmd.name)
    is_deploy_cmd = len(args) > 0 and any(args[0] == alias
                                          for alias in [*aliases, full_cmd])
    if is_deploy_cmd:
        from determined.deploy.cli import args_description as deploy_args_description

        add_args(parser, [deploy_args_description])
    else:
        add_args(parser, all_args_description)

    try:
        argcomplete.autocomplete(parser)

        parsed_args = parser.parse_args(args)

        def die(message: str, always_print_traceback: bool = False) -> None:
            if always_print_traceback or debug_mode():
                import traceback

                traceback.print_exc(file=sys.stderr)

            parser.exit(1, colored(message + "\n", "red"))

        v = vars(parsed_args)
        if not v.get("func"):
            parser.print_usage()
            parser.exit(2, "{}: no subcommand specified\n".format(parser.prog))

        try:
            # For `det deploy`, skip interaction with master.
            if is_deploy_cmd:
                parsed_args.func(parsed_args)
                return

            # Configure the CLI's Cert singleton.
            certs.cli_cert = certs.default_load(parsed_args.master)

            try:
                check_version(parsed_args)
            except requests.exceptions.SSLError:
                # An SSLError usually means that we queried a master over HTTPS and got an untrusted
                # cert, so allow the user to store and trust the current cert. (It could also mean
                # that we tried to talk HTTPS on the HTTP port, but distinguishing that based on the
                # exception is annoying, and we'll figure that out in the next step anyway.)
                addr = api.parse_master_address(parsed_args.master)
                check_not_none(addr.hostname)
                check_not_none(addr.port)
                try:
                    ctx = SSL.Context(SSL.TLSv1_2_METHOD)
                    conn = SSL.Connection(ctx, socket.socket())
                    conn.set_tlsext_host_name(
                        cast(str, addr.hostname).encode())
                    conn.connect(
                        cast(Sequence[Union[str, int]],
                             (addr.hostname, addr.port)))
                    conn.do_handshake()
                    cert_pem_data = "".join(
                        crypto.dump_certificate(crypto.FILETYPE_PEM,
                                                cert).decode()
                        for cert in conn.get_peer_cert_chain())
                except crypto.Error:
                    die("Tried to connect over HTTPS but couldn't get a certificate from the "
                        "master; consider using HTTP")

                cert_hash = hashlib.sha256(
                    ssl.PEM_cert_to_DER_cert(cert_pem_data)).hexdigest()
                cert_fingerprint = ":".join(chunks(cert_hash, 2))

                if not render.yes_or_no(
                        "The master sent an untrusted certificate chain with this SHA256 fingerprint:\n"
                        "{}\nDo you want to trust this certificate from now on?"
                        .format(cert_fingerprint)):
                    die("Unable to verify master certificate")

                certs.CertStore(certs.default_store()).set_cert(
                    parsed_args.master, cert_pem_data)
                # Reconfigure the CLI's Cert singleton, but preserve the certificate name.
                old_cert_name = certs.cli_cert.name
                certs.cli_cert = certs.Cert(cert_pem=cert_pem_data,
                                            name=old_cert_name)

                check_version(parsed_args)

            parsed_args.func(parsed_args)
        except KeyboardInterrupt as e:
            raise e
        except (api.errors.BadRequestException,
                api.errors.BadResponseException) as e:
            die("Failed to {}: {}".format(parsed_args.func.__name__, e))
        except api.errors.CorruptTokenCacheException:
            die("Failed to login: Attempted to read a corrupted token cache. "
                "The store has been deleted; please try again.")
        except EnterpriseOnlyError as e:
            die(f"Determined Enterprise Edition is required for this functionality: {e}"
                )
        except Exception:
            die("Failed to {}".format(parsed_args.func.__name__),
                always_print_traceback=True)
    except KeyboardInterrupt:
        # die() may not be defined yet.
        if debug_mode():
            import traceback

            traceback.print_exc(file=sys.stderr)

        print(colored("Interrupting...\n", "red"), file=sys.stderr)
        exit(3)