def deploy_gcp(command: str, args: argparse.Namespace) -> None: # Preprocess the local path to store the states. # Set local state path as our current working directory. This is a no-op # when the --local-state-path arg isn't used. We do this because Terraform # module directories are populated with relative paths, and we want to # support users running gcp up and down commands from different directories. # Also, because we change the working directory, we ensure that # local_state_path is an absolute path. args.local_state_path = os.path.abspath(args.local_state_path) if not os.path.exists(args.local_state_path): os.makedirs(args.local_state_path) os.chdir(args.local_state_path) # tf_state_gcs_bucket_name argument is not necessary for `down` operation, because TF reads it # from local tf files. if hasattr(args, "tf_state_gcs_bucket_name") and args.tf_state_gcs_bucket_name: print("Using GCS bucket for state:", args.tf_state_gcs_bucket_name) else: print("Using local state path:", args.local_state_path) # Set the TF_DATA_DIR where Terraform will store its supporting files env = os.environ.copy() env["TF_DATA_DIR"] = os.path.join(args.local_state_path, "terraform_data") # Initialize determined configurations. det_configs = {} args_dict = vars(args) for arg in args_dict: if args_dict[arg] is not None: det_configs[arg] = args_dict[arg] # Handle down subcommand. if command == "down": gcp.delete(det_configs, env, args.yes) print("Delete Successful") return # Handle Up subcommand. if (args.cpu_env_image and not args.gpu_env_image) or (args.gpu_env_image and not args.cpu_env_image): print("If a CPU or GPU image is specified, both should be.") sys.exit(1) if args.master_config_template_path: if not args.master_config_template_path.exists(): raise ValueError( f"Input master config template doesn't exist: {args.master_config_template_path}" ) with args.master_config_template_path.open("r") as fin: det_configs["master_config_template"] = fin.read() # Not all args will be passed to Terraform, list the ones that won't be # TODO(ilia): Switch to filtering variables_to_include instead, i.e. # only pass the ones recognized by terraform. variables_to_exclude = [ "command", "dry_run", "environment", "local_state_path", "master", "user", "no_preflight_checks", "no_wait_for_master", "yes", "no_prompt", "master_config_template_path", "tf_state_gcs_bucket_name", "func", "_command", "_subcommand", "_subsubcommand", ] # Dry-run flag if args.dry_run: gcp.dry_run(det_configs, env, variables_to_exclude) print("Printed plan. To execute, run `det deploy gcp`") return print("Starting Determined deployment on GCP...\n") gcp.deploy(det_configs, env, variables_to_exclude) if not args.no_wait_for_master: try: gcp.wait_for_master(det_configs, env, timeout=5 * 60) except MasterTimeoutExpired: print( colored( "Determined cluster has been deployed, but master health check has failed.", "red", )) print("For details, SSH to master instance and run " "`sudo journalctl -u google-startup-scripts.service`" " or check /var/log/cloud-init-output.log.") sys.exit(1) print("Determined Deployment Successful") if args.no_wait_for_master: print( "Please allow 1-5 minutes for the master instance to be accessible via the web-ui\n" )
def deploy_gcp(command: str, args: argparse.Namespace) -> None: # Set local state path as our current working directory. This is a no-op # when the --local-state-path arg isn't used. We do this because Terraform # module directories are populated with relative paths, and we want to # support users running gcp up and down commands from different directories. # Also, because we change the working directory, we ensure that # local_state_path is an absolute path. args.local_state_path = os.path.abspath(args.local_state_path) if not os.path.exists(args.local_state_path): os.makedirs(args.local_state_path) os.chdir(args.local_state_path) # Set the TF_DATA_DIR where Terraform will store its supporting files env = os.environ.copy() env["TF_DATA_DIR"] = os.path.join(args.local_state_path, "terraform_data") # Create det_configs dictionary det_configs = {} # Add args to det_configs dict args_dict = vars(args) for arg in args_dict: if args_dict[arg] is not None: det_configs[arg] = args_dict[arg] # Not all args will be passed to Terraform, list the ones that won't be # TODO(ilia): Switch to filtering variables_to_include instead, i.e. # only pass the ones recognized by terraform. variables_to_exclude = [ "command", "dry_run", "environment", "local_state_path", "master", "user", "no_preflight_checks", "no_wait_for_master", "func", "_command", "_subcommand", "_subsubcommand", ] # Delete if command == "down": gcp.delete(det_configs, env) print("Delete Successful") return if (args.cpu_env_image and not args.gpu_env_image) or ( args.gpu_env_image and not args.cpu_env_image ): print("If a CPU or GPU image is specified, both should be.") sys.exit(1) # Dry-run flag if args.dry_run: gcp.dry_run(det_configs, env, variables_to_exclude) print("Printed plan. To execute, run `det deploy gcp`") return print("Starting Determined Deployment") gcp.deploy(det_configs, env, variables_to_exclude) if not args.no_wait_for_master: try: gcp.wait_for_master(det_configs, env, timeout=5 * 60) except MasterTimeoutExpired: print( colored( "Determined cluster has been deployed, but master health check has failed.", "red", ) ) print("For details, SSH to master instance and check /var/log/cloud-init-output.log.") sys.exit(1) print("Determined Deployment Successful") if args.no_wait_for_master: print("Please allow 1-5 minutes for the master instance to be accessible via the web-ui\n")