def deploy_configuration(config): logger.info(f'All qhub endpoints will be under *.{config["domain"]}') jupyterhub_endpoint = f'jupyter.{config["domain"]}' if ("client_id" not in config["authentication"]["config"] or "client_secret" not in config["authentication"]["config"]): logger.info( "client_id and client_secret were not specified - dynamically creating oauth client" ) with timer(logger, "creating oauth client"): config["authentication"]["config"] = auth0.create_client( jupyterhub_endpoint) with timer(logger, "rendering template"): tmp_config = pathlib.Path("./config.yaml") with tmp_config.open("w") as f: yaml.dump(config, f) render_default_template(".", tmp_config) infrastructure_dir = pathlib.Path( config["project_name"]) / "infrastructure" terraform.init(str(infrastructure_dir)) # ========= boostrap infrastructure ======== terraform.apply( str(infrastructure_dir), targets=[ "module.kubernetes", "module.kubernetes-initialization", "module.kubernetes-ingress", ], ) # ============= update dns ================ output = terraform.output(str(infrastructure_dir)) for key in output: if key.startswith("ingress"): endpoint = f'{key.split("_")[1]}.{config["domain"]}' address = output[key]["value"] if re.fullmatch(r"\d+\.\d+\.\d+\.\d+", address): cloudflare.update_record("qhub.dev", endpoint, "A", address) else: cloudflare.update_record("qhub.dev", endpoint, "CNAME", address) # ======= apply entire infrastructure ======== terraform.apply(str(infrastructure_dir))
def output(directory=None): terraform_path = download_terraform_binary() logger.info(f"terraform={terraform_path} output directory={directory}") with timer(logger, "terraform output"): return subprocess.check_output([terraform_path, "output", "-json"], cwd=directory).decode("utf8")[:-1]
def apply(directory=None, targets=None): targets = targets or [] logger.info(f"terraform apply directory={directory} targets={targets}") command = ["apply", "-auto-approve"] + ["-target=" + _ for _ in targets] with timer(logger, "terraform apply"): run_terraform_subprocess(command, cwd=directory, prefix="terraform")
def deploy_configuration(config, dns_provider, dns_auto_provision, disable_prompt): logger.info(f'All qhub endpoints will be under *.{config["domain"]}') with timer(logger, "deploying QHub"): guided_install(config, dns_provider, dns_auto_provision, disable_prompt)
def output(directory=None): logger.info(f"terraform output directory={directory}") with timer(logger, "terraform output"): output = subprocess.check_output("terraform output -json", shell=True, cwd=directory).decode("utf8") return json.loads(output)
def refresh(directory=None): logger.info(f"terraform refresh directory={directory}") command = [ "refresh", ] with timer(logger, "terraform refresh"): run_terraform_subprocess(command, cwd=directory, prefix="terraform")
def tfimport(addr, id, directory=None): logger.info(f"terraform import directory={directory} addr={addr} id={id}") command = ["import", addr, id] with timer(logger, "terraform import"): run_terraform_subprocess(command, cwd=directory, prefix="terraform", strip_errors=True, timeout=30)
def apply(directory=None, targets=None): targets = targets or [] logger.info(f"terraform apply directory={directory} targets={targets}") with timer(logger, "terraform apply"): command = " ".join(["terraform", "apply", "-auto-approve"] + ["-target=" + _ for _ in targets]) subprocess.check_output(command, shell=True, cwd=directory)
def deploy_configuration( config, dns_provider, dns_auto_provision, disable_prompt, skip_remote_state_provision, full_only, ): logger.info(f'All qhub endpoints will be under https://{config["domain"]}') with timer(logger, "deploying QHub"): try: guided_install( config, dns_provider, dns_auto_provision, disable_prompt, skip_remote_state_provision, full_only, ) except CalledProcessError as e: logger.error(e.output) raise e
def destroy_configuration(config): logger.info( """Removing all infrastructure, your local files will still remain, \n you can use 'qhub deploy' to re - install infrastructure using same config file""" ) with timer(logger, "destroying QHub"): # 01 Verify configuration file exists verify_configuration_file_exists() # 02 Check terraform check_terraform() # 03 Check Environment Variables check_cloud_credentials(config) # 04 Remove all infrastructure with change_directory("infrastructure"): run(["terraform", "destroy", "-auto-approve"]) # 06 Remove terraform backend remote state bucket with change_directory("terraform-state"): run(["terraform", "destroy", "-auto-approve"])
def destroy_configuration(config, skip_remote_state_provision=False, full_only=False): logger.info( """Removing all infrastructure, your local files will still remain, you can use 'qhub deploy' to re-install infrastructure using same config file\n""" ) with timer(logger, "destroying QHub"): # 01 Check Environment Variables check_cloud_credentials(config) # 02 Remove all infrastructure terraform.init(directory="infrastructure") terraform.refresh(directory="infrastructure") if not full_only: stages = ( { "name": "General cluster software", "targets": [ "module.kubernetes-nfs-mount", "module.kubernetes-nfs-server", "module.kubernetes-nfs-mount", "module.kubernetes-conda-store-server", "module.kubernetes-conda-store-mount", "module.kubernetes-autoscaling", "module.qhub", "module.prefect", "module.monitoring", "module.clearml", "module.forwardauth", "random_password.jupyterhub-jhsecret", "random_password.forwardauth-jhsecret", "kubernetes_secret.qhub_yaml_secret", ] + [ f"module.{helmext['name']}-extension" for helmext in config.get("helm_extensions", []) ] + [ f"module.ext-{ext['name']}" for ext in config.get("extensions", []) ], }, { "name": "Keycloak Config", "targets": [ "module.kubernetes-keycloak-config", "random_password.keycloak-qhub-bot-password", ], }, { "name": "Keycloak Helm installation", "targets": ["module.kubernetes-keycloak-helm"], }, { "name": "Kubernetes Ingress", "targets": ["module.kubernetes-ingress"], }, { "name": "Kubernetes Cluster", "targets": [ "module.kubernetes", "module.kubernetes-initialization", ], }, { "name": "Cloud Infrastructure", "targets": [ "module.registry-jupyterhub", # GCP "module.efs", # AWS "module.registry-jupyterlab", # AWS "module.network", # AWS "module.accounting", # AWS "module.registry", # Azure ], }, ) for stageinfo in stages: logger.info( f"Running Terraform Stage: {stageinfo['name']} {stageinfo['targets']}" ) terraform.destroy(directory="infrastructure", targets=stageinfo["targets"]) else: logger.info("Running Terraform Stage: FULL") terraform.destroy(directory="infrastructure") # 03 Remove terraform backend remote state bucket # backwards compatible with `qhub-config.yaml` which # don't have `terraform_state` key if ((not skip_remote_state_provision) and (config.get("terraform_state", {}).get("type", "") == "remote") and (config.get("provider") != "local")): terraform_state_sync(config) terraform.destroy(directory="terraform-state")
def force_destroy_configuration(config): logging.info( """FORCE Removing all infrastructure (not using terraform).""") with timer(logging, "destroying QHub"): # 01 Check we have cloud details we need check_cloud_credentials(config) if config.get("provider", "") != "aws": raise ValueError("force-destroy currently only available for AWS") project_name = config.get("project_name", "").strip() if project_name == "": raise ValueError("project_name cannot be blank") if "amazon_web_services" not in config: raise ValueError( "amazon_web_services section must exist in qhub-config.yaml") region = config["amazon_web_services"].get("region", "").strip() if region == "": raise ValueError( "amazon_web_services.region must exist in qhub-config.yaml") logging.info(f"Remove AWS project {project_name} in region {region}") env = config.get("namespace", "dev").strip() # 02 Remove all infrastructure try: import boto3 except ImportError: raise ValueError( "Please ensure boto3 package is installed using: pip install boto3==1.17.98" ) restag = boto3.client("resourcegroupstaggingapi", region_name=region) filter_params = dict( TagFilters=[ { "Key": "Owner", "Values": [ "terraform", "terraform-state", ], }, { "Key": "Environment", "Values": [ env, ], }, { "Key": "Project", "Values": [ project_name, ], }, ], ResourcesPerPage=50, ) resources = [] response = restag.get_resources(**filter_params) resources.extend(response["ResourceTagMappingList"]) while "PaginationToken" in response and response["PaginationToken"]: token = response["PaginationToken"] response = restag.get_resources(**filter_params, PaginationToken=token) resources.extend(response["ResourceTagMappingList"]) # Load Balancer and other K8s-generated resources will need to be queried separately: filter_params = dict( TagFilters=[{ "Key": f"kubernetes.io/cluster/{project_name}-{env}", "Values": [ "owned", ], }], ResourcesPerPage=50, ) response = restag.get_resources(**filter_params) resources.extend(response["ResourceTagMappingList"]) # IAM iam = boto3.resource("iam") for suffix in ("eks-cluster-role", "eks-node-group-role"): try: role = iam.Role(f"{project_name}-{env}-{suffix}") if role.tags is not None: tags_dict = dict([(t["Key"], t.get("Value", "")) for t in role.tags]) if (tags_dict.get("Owner", "") == "terraform" and tags_dict.get("Environment", "") == env and tags_dict.get("Project", "") == project_name): resources.append({"ResourceARN": role.arn}) except iam.meta.client.exceptions.NoSuchEntityException: pass # Summarize resources type_groups = {} for r in resources: de_arned = parse_arn(r["ResourceARN"]) t = f"{de_arned['service']}-{de_arned['resource_type']}" type_groups.setdefault(t, []).append(de_arned) logging.info(r["ResourceARN"]) logging.info([(k, len(v)) for k, v in type_groups.items()]) # Order priority_types = ( "eks-nodegroup", "eks-cluster", "elasticloadbalancing-loadbalancer", "ec2-internet-gateway", "ec2-route-table", "elasticfilesystem-file-system", "ec2-subnet", "ec2-security-group", "ec2-vpc", "ecr-repository", "dynamodb-table", "s3-None", "resource-groups-group", "iam-role", ) for pt in priority_types: logging.info(f"Inspect {pt}") for r in type_groups.get(pt, []): if pt == "eks-nodegroup": nodegroup_resource = r["resource"].split("/") cluster_name = nodegroup_resource[0] nodegroup_name = nodegroup_resource[1] logging.info( f"Delete {nodegroup_name} on cluster {cluster_name}") client = boto3.client("eks", region_name=region) client.delete_nodegroup(clusterName=cluster_name, nodegroupName=nodegroup_name) elif pt == "eks-cluster": logging.info(f"Delete EKS cluster {r['resource']}") client = boto3.client("eks", region_name=region) response = client.list_nodegroups( clusterName=r["resource"]) while len(response["nodegroups"]) > 0: logging.info("Nodegroups still present, sleep 10") time.sleep(10) response = client.list_nodegroups( clusterName=r["resource"]) client.delete_cluster(name=r["resource"]) elif pt == "elasticloadbalancing-loadbalancer": client = boto3.client("elb", region_name=region) logging.info(f"Inspect Load balancer {r['resource']}") logging.info(f"Delete Load balancer {r['resource']}") response = client.delete_load_balancer( LoadBalancerName=r["resource"]) elif pt == "ec2-route-table": logging.info(f"Inspect route table {r['resource']}") ec2 = boto3.resource("ec2", region_name=region) route_table = ec2.RouteTable(r["resource"]) for assoc in route_table.associations: logging.info(f"Delete route table assoc {assoc.id}") assoc.delete() time.sleep(10) logging.info(f"Delete route table {r['resource']}") route_table.delete() elif pt == "ec2-subnet": logging.info(f"Inspect subnet {r['resource']}") ec2 = boto3.resource("ec2", region_name=region) subnet = ec2.Subnet(r["resource"]) for ni in subnet.network_interfaces.all(): ni.load() # But can only detach if attached... if ni.attachment: ni.detach(DryRun=False, Force=True) ni.delete() logging.info(f"Delete subnet {r['resource']}") subnet.delete(DryRun=False) elif pt == "ec2-security-group": logging.info(f"Inspect security group {r['resource']}") ec2 = boto3.resource("ec2", region_name=region) security_group = ec2.SecurityGroup(r["resource"]) for ipperms in security_group.ip_permissions_egress: security_group.revoke_egress(DryRun=False, IpPermissions=[ipperms]) for ipperms in security_group.ip_permissions: security_group.revoke_ingress(DryRun=False, IpPermissions=[ipperms]) logging.info(f"Delete security group {r['resource']}") security_group.delete(DryRun=False) elif pt == "ec2-internet-gateway": logging.info(f"Inspect internet gateway {r['resource']}") ec2 = boto3.resource("ec2", region_name=region) internet_gateway = ec2.InternetGateway(r["resource"]) for attach in internet_gateway.attachments: logging.info( f"Inspect IG attachment {attach['VpcId']}") if attach.get("State", "") == "available": logging.info(f"Detach from VPC {attach['VpcId']}") internet_gateway.detach_from_vpc( VpcId=attach["VpcId"]) time.sleep(10) logging.info(f"Delete internet gateway {r['resource']}") internet_gateway.delete(DryRun=False) elif pt == "elasticfilesystem-file-system": client = boto3.client("efs", region_name=region) logging.info(f"Delete efs {r['resource']}") mts = client.describe_mount_targets( FileSystemId=r["resource"]) for mt in mts["MountTargets"]: client.delete_mount_target( MountTargetId=mt["MountTargetId"]) response = client.delete_file_system( FileSystemId=r["resource"]) ## Should wait until this returns botocore.errorfactory.FileSystemNotFound: # response = client.describe_file_systems( # FileSystemId=r['resource'] # ) elif pt == "ec2-vpc": logging.info(f"Inspect VPC {r['resource']}") ec2 = boto3.resource("ec2", region_name=region) vpc = ec2.Vpc(r["resource"]) # for cidr_assoc in vpc.cidr_block_association_set: # logging.info(cidr_assoc) # r = vpc.disassociate_subnet_cidr_block( # AssociationId=cidr_assoc['AssociationId'] # ) # logging.info(r) logging.info(f"Delete VPC {r['resource']}") vpc.delete() elif pt == "ecr-repository": logging.info(f"Inspect ECR {r['resource']}") client = boto3.client("ecr", region_name=region) logging.info( f"Delete ecr {r['account']} / {r['resource']}") response = response = client.delete_repository( registryId=r["account"], repositoryName=r["resource"], force=True, ) elif pt == "s3-None": logging.info(f"Inspect S3 {r['resource']}") s3 = boto3.resource("s3", region_name=region) logging.info(f"Delete s3 {r['resource']}") bucket = s3.Bucket(r["resource"]) r = bucket.objects.all().delete() r = bucket.object_versions.delete() response = bucket.delete() elif pt == "dynamodb-table": logging.info(f"Inspect DynamoDB {r['resource']}") client = boto3.client("dynamodb", region_name=region) logging.info(f"Delete DynamoDB {r['resource']}") response = client.delete_table(TableName=r["resource"]) elif pt == "resource-groups-group": logging.info(f"Inspect Resource Group {r['resource']}") client = boto3.client("resource-groups", region_name=region) logging.info(f"Delete Resource Group {r['resource']}") response = client.delete_group(Group=r["arn"]) elif pt == "iam-role": logging.info(f"Inspect IAM Role {r['resource']}") iam = boto3.resource("iam") role = iam.Role(r["resource"]) for policy in role.attached_policies.all(): logging.info(f"Detach Role policy {policy.arn}") response = role.detach_policy(PolicyArn=policy.arn) logging.info(f"Delete IAM Role {r['resource']}") role.delete()
def init(directory=None): logger.info(f"terraform init directory={directory}") with timer(logger, "terraform init"): run_terraform_subprocess(["init"], cwd=directory, prefix="terraform")
def init(directory=None): logger.info(f"terraform init directory={directory}") with timer(logger, "terraform init"): subprocess.check_output("terraform init", shell=True, cwd=directory)
def destroy(directory=None): logger.info(f"terraform destroy directory={directory}") with timer(logger, "terraform destroy"): command = "terraform destroy -auto-approve" subprocess.check_output(command, shell=True, cwd=directory)