def set_gc_policy(args: Namespace) -> None: policy = { "save_experiment_best": args.save_experiment_best, "save_trial_best": args.save_trial_best, "save_trial_latest": args.save_trial_latest, } if not args.yes: r = api.get( args.master, "experiments/{}/preview_gc".format(args.experiment_id), params=policy ) response = r.json() checkpoints = response["checkpoints"] metric_name = response["metric_name"] headers = [ "Trial ID", "Step ID", "State", "Validation Metric\n({})".format(metric_name), "UUID", "Resources", ] values = [ [ c["trial_id"], c["step_id"], c["state"], api.metric.get_validation_metric(metric_name, c["step"]["validation"]), c["uuid"], render.format_resources(c["resources"]), ] for c in sorted(checkpoints, key=lambda c: (c["trial_id"], c["step_id"])) if "step" in c and c["step"].get("validation") is not None ] if len(values) != 0: print( "The following checkpoints with validation will be deleted " "by applying this GC Policy:" ) print(tabulate.tabulate(values, headers, tablefmt="presto"), flush=FLUSH) print( "This policy will delete {} checkpoints with " "validations and {} checkpoints without validations.".format( len(values), len(checkpoints) - len(values) ) ) if args.yes or render.yes_or_no( "Changing the checkpoint garbage collection policy of an " "experiment may result\n" "in the unrecoverable deletion of checkpoints. Do you wish to " "proceed?" ): patch_experiment(args, "change gc policy of", {"checkpoint_storage": policy}) print("Set GC policy of experiment {} to\n{}".format(args.experiment_id, pformat(policy))) else: print("Aborting operations.")
def delete_experiment(args: Namespace) -> None: if args.yes or render.yes_or_no( "Deleting an experiment will result in the unrecoverable \n" "deletion of all associated logs, checkpoints, and other \n" "metadata associated with the experiment. For a recoverable \n" "alternative, see the 'det archive' command. Do you still \n" "wish to proceed?"): api.delete(args.master, "experiments/{}".format(args.experiment_id)) print("Successfully deleted experiment {}".format(args.experiment_id)) else: print("Aborting experiment deletion.")
def main(args: List[str] = sys.argv[1:]) -> None: # TODO(#1690): Refactor admin command(s) to a separate CLI tool. if "DET_ADMIN" in os.environ: experiment_args_description.subs.append( Cmd( "delete", experiment.delete_experiment, "delete experiment", [ Arg("experiment_id", help="delete experiment"), Arg( "--yes", action="store_true", default=False, help="automatically answer yes to prompts", ), ], )) try: parser = make_parser() argcomplete.autocomplete(parser) parsed_args = parser.parse_args(args) def die(message: str, always_print_traceback: bool = False) -> None: if always_print_traceback or os.getenv( "DET_DEBUG", "").lower() in ("true", "1", "yes"): import traceback traceback.print_exc() parser.exit(1, colored(message + "\n", "red")) v = vars(parsed_args) if not v.get("func"): parser.print_usage() parser.exit(2, "{}: no subcommand specified\n".format(parser.prog)) cert_fn = str(auth.get_config_path().joinpath("master.crt")) if os.path.exists(cert_fn): os.environ["REQUESTS_CA_BUNDLE"] = cert_fn try: try: check_version(parsed_args) except requests.exceptions.SSLError: # An SSLError usually means that we queried a master over HTTPS and got an untrusted # cert, so allow the user to store and trust the current cert. (It could also mean # that we tried to talk HTTPS on the HTTP port, but distinguishing that based on the # exception is annoying, and we'll figure that out in the next step anyway.) addr = api.parse_master_address(parsed_args.master) check_not_none(addr.hostname) check_not_none(addr.port) try: cert_pem_data = ssl.get_server_certificate( (cast(str, addr.hostname), cast(int, addr.port))) except ssl.SSLError: die("Tried to connect over HTTPS but couldn't get a certificate from the " "master; consider using HTTP") cert_hash = hashlib.sha256( ssl.PEM_cert_to_DER_cert(cert_pem_data)).hexdigest() cert_fingerprint = ":".join(chunks(cert_hash, 2)) if not render.yes_or_no( "The master sent an untrusted certificate with this SHA256 fingerprint:\n" "{}\nDo you want to trust this certificate from now on?" .format(cert_fingerprint)): die("Unable to verify master certificate") with open(cert_fn, "w") as out: out.write(cert_pem_data) os.environ["REQUESTS_CA_BUNDLE"] = cert_fn check_version(parsed_args) parsed_args.func(parsed_args) except KeyboardInterrupt as e: raise e except (api.errors.BadRequestException, api.errors.BadResponseException) as e: die("Failed to {}: {}".format(parsed_args.func.__name__, e)) except api.errors.CorruptTokenCacheException: die("Failed to login: Attempted to read a corrupted token cache. " "The store has been deleted; please try again.") except Exception: die("Failed to {}".format(parsed_args.func.__name__), always_print_traceback=True) except KeyboardInterrupt: parser.exit(3, colored("Interrupting...\n", "red"))