def get_gcr_auth_info(layer: Layer) -> Tuple[str, str]: if GCP.using_service_account(): service_account_key = GCP.get_service_account_raw_credentials() return "_json_key", service_account_key credentials, _ = GCP.get_credentials() return "oauth2accesstoken", credentials.token
def delete_state_storage(cls, layer: "Layer") -> None: """ Idempotently remove remote storage for tf state """ # After the layer is completely deleted, remove the opta config from the state bucket. if layer.cloud == "aws": cloud_client: CloudClient = AWS(layer) elif layer.cloud == "google": cloud_client = GCP(layer) elif layer.cloud == "azurerm": cloud_client = Azure(layer) elif layer.cloud == "local": cloud_client = Local(layer) elif layer.cloud == "helm": # There is no opta managed storage to delete return else: raise Exception( f"Can not handle opta config deletion for cloud {layer.cloud}") cloud_client.delete_opta_config() cloud_client.delete_remote_state() # If this is the env layer, delete the state bucket & dynamo table as well. if layer.name == layer.root().name: logger.info(f"Deleting the state storage for {layer.name}...") if layer.cloud == "aws": cls._aws_delete_state_storage(layer) elif layer.cloud == "google": cls._gcp_delete_state_storage(layer) elif layer.cloud == "local": cls._local_delete_state_storage(layer)
def process(self, module_idx: int) -> None: byo_cert_module = None for module in self.layer.modules: if (module.aliased_type or module.type) == "external-ssl-cert": byo_cert_module = module break if byo_cert_module is not None: self.module.data[ "private_key" ] = f"${{{{module.{byo_cert_module.name}.private_key}}}}" self.module.data[ "certificate_body" ] = f"${{{{module.{byo_cert_module.name}.certificate_body}}}}" self.module.data[ "certificate_chain" ] = f"${{{{module.{byo_cert_module.name}.certificate_chain}}}}" gcp_dns_modules = self.layer.get_module_by_type("gcp-dns", module_idx) gcp_dns_module = None if len(gcp_dns_modules) > 0: gcp_dns_module = gcp_dns_modules[0] if gcp_dns_module is not None: self.module.data[ "hosted_zone_name" ] = f"${{{{module.{gcp_dns_module.name}.zone_name}}}}" self.module.data["domain"] = f"${{{{module.{gcp_dns_module.name}.domain}}}}" self.module.data[ "cert_self_link" ] = f"${{{{module.{gcp_dns_module.name}.cert_self_link}}}}" self.module.data[ "delegated" ] = f"${{{{module.{gcp_dns_module.name}.delegated}}}}" self.module.data["zone_names"] = GCP(self.layer).get_current_zones() super(GcpK8sBaseProcessor, self).process(module_idx)
def test_get_all_remote_configs_bucket_not_present(self, mocker: MockFixture) -> None: mocker.patch( "opta.core.gcp.default", return_value=(mocker.Mock(spec=Credentials), "dummy_project_id"), ) mock_storage_instance = mocker.Mock(spec=Client) mock_bucket_instance = mocker.Mock(spec=Bucket) mock_bucket_instance.name = "test" mock_storage_instance.list_buckets.return_value = [] mock_storage_instance.list_blobs.return_value = [] mocker_list_bucket_call = mocker.patch( "opta.core.gcp.storage.Client.list_buckets", return_value=[] ) mocker_list_blob_call = mocker.patch( "opta.core.gcp.storage.Client.list_blobs", return_value=[] ) mock_download_remote_blob = mocker.patch( "opta.core.gcp.GCP._download_remote_blob" ) detailed_config_map = GCP().get_all_remote_configs() assert detailed_config_map == {} mocker_list_bucket_call.assert_called_once() mocker_list_blob_call.assert_not_called() mock_download_remote_blob.assert_not_called()
def force_delete_terraform_lock(cls, layer: "Layer", exception: Exception) -> None: if layer.cloud == "aws": AWS(layer).force_delete_terraform_lock_id() elif layer.cloud == "google": GCP(layer).force_delete_terraform_lock_id() else: raise exception
def __get_cloud_client(cloud: str, layer: Optional[Layer] = None) -> CloudClient: cloud_client: CloudClient if cloud.lower() == "aws": cloud_client = AWS(layer=layer) elif cloud.lower() == "google": cloud_client = GCP(layer=layer) else: raise UserErrors(f"Can't get client for cloud {cloud}") return cloud_client
def bucket_exists(self, bucket_name: str) -> bool: if self.is_stateless_mode() is True: return False if self.cloud == "aws": region = self.providers["aws"]["region"] return AWS(self).bucket_exists(bucket_name, region) elif self.cloud == "google": return GCP(self).bucket_exists(bucket_name) else: # Note - this function does not work for Azure return False
def test_get_all_remote_configs_configuration_present( self, mocker: MockFixture ) -> None: mocker.patch( "opta.core.gcp.default", return_value=(mocker.Mock(spec=Credentials), "dummy_project_id"), ) mock_storage_instance = mocker.Mock(spec=Client) mock_bucket_instance = mocker.Mock(spec=Bucket) mock_bucket_instance.name = "test" mock_blob_list_blob_instance = mocker.Mock(spec=Blob) mock_blob_list_blob_instance.name = "opta_config/test-config" mock_storage_instance.list_buckets.return_value = [mock_bucket_instance] mock_storage_instance.list_blobs.return_value = [mock_blob_list_blob_instance] mock_download_remote_blob = mocker.patch( "opta.core.gcp.GCP._download_remote_blob", return_value={ "opta_version": "dev", "date": "test_datetime", "original_spec": "actual_config", "defaults": {}, }, ) mocker_list_bucket_call = mocker.patch( "opta.core.gcp.storage.Client.list_buckets", return_value=[mock_bucket_instance], ) mocker_list_blob_call = mocker.patch( "opta.core.gcp.storage.Client.list_blobs", return_value=[mock_blob_list_blob_instance], ) detailed_config_map = GCP().get_all_remote_configs() mocker_list_bucket_call.assert_called_once() mocker_list_blob_call.assert_called_once_with( mock_bucket_instance.name, prefix="opta_config/", delimiter="/" ) mock_download_remote_blob.assert_called_once_with( mock_bucket_instance, mock_blob_list_blob_instance.name ) assert detailed_config_map == { "test": { "test-config": { "opta_version": "dev", "date": "test_datetime", "original_spec": "actual_config", "defaults": {}, } } }
def _gcp_delete_state_storage(cls, layer: "Layer") -> None: providers = layer.gen_providers(0) if "gcs" not in providers.get("terraform", {}).get("backend", {}): return bucket_name = providers["terraform"]["backend"]["gcs"]["bucket"] credentials, project_id = GCP.get_credentials() gcs_client = storage.Client(project=project_id, credentials=credentials) try: bucket_obj = gcs_client.get_bucket(bucket_name) bucket_obj.delete(force=True) logger.info("Successfully deleted GCP state storage") except NotFound: logger.warning("State bucket was already deleted")
def get_cloud_client(self) -> CloudClient: if self.cloud == "aws": return AWS(self) elif self.cloud == "google": return GCP(self) elif self.cloud == "azurerm": return Azure(self) elif self.cloud == "local": return Local(self) elif self.cloud == "helm": return HelmCloudClient(self) else: raise Exception( f"Unknown cloud {self.cloud}. Can not handle getting the cloud client" )
def test_get_remote_state(self, mocker: MockFixture, gcp_layer: Mock) -> None: mocker.patch( "opta.core.gcp.default", return_value=(mocker.Mock(spec=Credentials), "dummy_project_id"), ) mock_bucket_instance = mocker.Mock(spec=Bucket) mocker.patch( "opta.core.gcp.storage.Client.get_bucket", return_value=mock_bucket_instance ) mock_download_remote_blob = mocker.patch( "opta.core.gcp.GCP._download_remote_blob", return_value="""{"test": "test}""" ) GCP(layer=gcp_layer).get_remote_state() mock_download_remote_blob.assert_called_once_with( mock_bucket_instance, f"{gcp_layer.name}/default.tfstate" )
def test_get_terraform_lock_id(self, mocker: MockFixture, gcp_layer: Layer) -> None: mocker.patch( "opta.core.gcp.default", return_value=(mocker.Mock(spec=Credentials), "dummy_project_id"), ) mock_gcs_client_instance = mocker.Mock(spec=Client) mock_gcs_bucket_instance = mocker.Mock(spec=Bucket) mock_gcs_tf_lock_blob = mocker.Mock(spec=Blob) mocker.patch( "opta.core.gcp.storage.Client", return_value=mock_gcs_client_instance ) mock_gcs_client_instance.get_bucket.return_value = mock_gcs_bucket_instance mock_gcs_bucket_instance.get_blob.return_value = mock_gcs_tf_lock_blob mock_gcs_tf_lock_blob.generation = 1234567890 assert GCP(gcp_layer).get_terraform_lock_id() == "1234567890"
def _gcp_get_configs(layer: "Layer") -> List[str]: bucket_name = layer.state_storage() gcs_config_dir = "opta_config/" credentials, project_id = GCP.get_credentials() gcs_client = storage.Client(project=project_id, credentials=credentials) try: bucket_object = gcs_client.get_bucket(bucket_name) except NotFound: logger.warning( "Couldn't find the state bucket, must have already been destroyed in a previous destroy run" ) return [] blobs: List[storage.Blob] = list( gcs_client.list_blobs(bucket_object, prefix=gcs_config_dir) ) configs = [blob.name[len(gcs_config_dir) :] for blob in blobs] if layer.name in configs: configs.remove(layer.name) return configs
def test_force_delete_terraform_lock_id( self, mocker: MockFixture, gcp_layer: Layer ) -> None: mocker.patch( "opta.core.gcp.default", return_value=(mocker.Mock(spec=Credentials), "dummy_project_id"), ) mock_gcs_client_instance = mocker.Mock(spec=Client) mock_gcs_bucket_instance = mocker.Mock(spec=Bucket) mocker.patch( "opta.core.gcp.storage.Client", return_value=mock_gcs_client_instance ) mock_gcs_client_instance.get_bucket.return_value = mock_gcs_bucket_instance GCP(gcp_layer).force_delete_terraform_lock_id() mock_gcs_bucket_instance.delete_blob.assert_called_once_with( f"{gcp_layer.name}/default.tflock" )
def process(self, module_idx: int) -> None: gcp_base_module = None for module in self.layer.modules: if (module.aliased_type or module.type) == "gcp-base": gcp_base_module = module break if gcp_base_module is None: raise UserErrors( "The gcp-gke module needs to be run on the same yaml as the gcp-base" ) self.module.data["cluster_name"] = get_cluster_name(self.layer.root()) self.module.data[ "vpc_self_link"] = f"${{{{module.{gcp_base_module.name}.vpc_self_link}}}}" self.module.data[ "private_subnet_self_link"] = f"${{{{module.{gcp_base_module.name}.private_subnet_self_link}}}}" self.module.data[ "k8s_master_ipv4_cidr_block"] = f"${{{{module.{gcp_base_module.name}.k8s_master_ipv4_cidr_block}}}}" self.module.data["node_zone_names"] = GCP( self.layer).get_current_zones() super(GcpGkeProcessor, self).process(module_idx)
def _gcp_verify_storage(cls, layer: "Layer") -> bool: bucket = layer.state_storage() return GCP(layer).bucket_exists(bucket)
def _apply( config: str, env: Optional[str], refresh: bool, local: bool, image_tag: Optional[str], test: bool, auto_approve: bool, input_variables: Dict[str, str], image_digest: Optional[str] = None, stdout_logs: bool = True, detailed_plan: bool = False, ) -> None: pre_check() _clean_tf_folder() if local and not test: config = local_setup(config, input_variables, image_tag, refresh_local_env=True) layer = Layer.load_from_yaml(config, env, input_variables=input_variables) layer.verify_cloud_credentials() layer.validate_required_path_dependencies() if Terraform.download_state(layer): tf_lock_exists, _ = Terraform.tf_lock_details(layer) if tf_lock_exists: raise UserErrors(USER_ERROR_TF_LOCK) _verify_parent_layer(layer, auto_approve) event_properties: Dict = layer.get_event_properties() amplitude_client.send_event( amplitude_client.START_GEN_EVENT, event_properties=event_properties, ) # We need a region with at least 3 AZs for leader election during failover. # Also EKS historically had problems with regions that have fewer than 3 AZs. if layer.cloud == "aws": providers = layer.gen_providers(0)["provider"] aws_region = providers["aws"]["region"] azs = _fetch_availability_zones(aws_region) if len(azs) < 3: raise UserErrors( fmt_msg(f""" Opta requires a region with at least *3* availability zones like us-east-1 or us-west-2. ~You configured {aws_region}, which only has the availability zones: {azs}. ~Please choose a different region. """)) Terraform.create_state_storage(layer) gen_opta_resource_tags(layer) cloud_client: CloudClient if layer.cloud == "aws": cloud_client = AWS(layer) elif layer.cloud == "google": cloud_client = GCP(layer) elif layer.cloud == "azurerm": cloud_client = Azure(layer) elif layer.cloud == "local": if local: # boolean passed via cli pass cloud_client = Local(layer) elif layer.cloud == "helm": cloud_client = HelmCloudClient(layer) else: raise Exception(f"Cannot handle upload config for cloud {layer.cloud}") existing_config: Optional[ StructuredConfig] = cloud_client.get_remote_config() old_semver_string = ("" if existing_config is None else existing_config.get("opta_version", "").strip("v")) current_semver_string = VERSION.strip("v") _verify_semver(old_semver_string, current_semver_string, layer, auto_approve) try: existing_modules: Set[str] = set() first_loop = True for module_idx, current_modules, total_block_count in gen( layer, existing_config, image_tag, image_digest, test, True, auto_approve): if first_loop: # This is set during the first iteration, since the tf file must exist. existing_modules = Terraform.get_existing_modules(layer) first_loop = False configured_modules = set([x.name for x in current_modules]) is_last_module = module_idx == total_block_count - 1 has_new_modules = not configured_modules.issubset(existing_modules) if not is_last_module and not has_new_modules and not refresh: continue if is_last_module: untouched_modules = existing_modules - configured_modules configured_modules = configured_modules.union( untouched_modules) layer.pre_hook(module_idx) if layer.cloud == "local": if is_last_module: targets = [] else: targets = list( map(lambda x: f"-target=module.{x}", sorted(configured_modules))) if test: Terraform.plan("-lock=false", *targets, layer=layer) print( "Plan ran successfully, not applying since this is a test." ) else: current_properties = event_properties.copy() current_properties["module_idx"] = module_idx amplitude_client.send_event( amplitude_client.APPLY_EVENT, event_properties=current_properties, ) logger.info("Planning your changes (might take a minute)") try: Terraform.plan( "-lock=false", "-input=false", f"-out={TF_PLAN_PATH}", layer=layer, *targets, quiet=True, ) except CalledProcessError as e: logger.error(e.stderr or "") raise e PlanDisplayer.display(detailed_plan=detailed_plan) if not auto_approve: click.confirm( "The above are the planned changes for your opta run. Do you approve?", abort=True, ) logger.info("Applying your changes (might take a minute)") service_modules = (layer.get_module_by_type( "k8s-service", module_idx) if layer.cloud == "aws" else layer.get_module_by_type( "gcp-k8s-service", module_idx)) if (len(service_modules) != 0 and cluster_exist(layer.root()) and stdout_logs): service_module = service_modules[0] # Tailing logs logger.info( f"Identified deployment for kubernetes service module {service_module.name}, tailing logs now." ) new_thread = Thread( target=tail_module_log, args=( layer, service_module.name, 10, datetime.datetime.utcnow().replace( tzinfo=pytz.UTC), 2, ), daemon=True, ) # Tailing events new_thread.start() new_thread = Thread( target=tail_namespace_events, args=( layer, datetime.datetime.utcnow().replace( tzinfo=pytz.UTC), 3, ), daemon=True, ) new_thread.start() tf_flags: List[str] = [] if auto_approve: tf_flags.append("-auto-approve") try: Terraform.apply(layer, *tf_flags, TF_PLAN_PATH, no_init=True, quiet=False) except Exception as e: layer.post_hook(module_idx, e) raise e else: layer.post_hook(module_idx, None) cloud_client.upload_opta_config() logger.info("Opta updates complete!") except Exception as e: event_properties["success"] = False event_properties["error_name"] = e.__class__.__name__ raise e else: event_properties["success"] = True finally: amplitude_client.send_event( amplitude_client.FINISH_GEN_EVENT, event_properties=event_properties, )
def _get_gcp_lock_id(cls, layer: "Layer") -> str: gcp = GCP(layer) return gcp.get_terraform_lock_id()
def metadata_hydration(self) -> Dict[Any, Any]: parent_name = self.parent.name if self.parent is not None else "nil" parent = None if self.parent is not None: parent = SimpleNamespace( **{ k: f"${{data.terraform_remote_state.parent.outputs.{k}}}" for k in self.parent.outputs() } ) providers = self.providers if self.parent is not None: providers = deep_merge(providers, self.parent.providers) provider_hydration = {} for name, values in providers.items(): provider_hydration[name] = SimpleNamespace(**values) region: Optional[str] = None k8s_access_token = None if self.cloud == "google": gcp = GCP(self) region = gcp.region credentials = gcp.get_credentials()[0] if isinstance(credentials, service_account.Credentials): service_account_credentials: service_account.Credentials = ( credentials.with_scopes( [ "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/cloud-platform", ] ) ) service_account_credentials.refresh( google.auth.transport.requests.Request() ) k8s_access_token = service_account_credentials.token else: k8s_access_token = credentials.token if k8s_access_token is None: raise Exception("Was unable to get GCP access token") elif self.cloud == "aws": aws = AWS(self) region = aws.region elif self.cloud == "azurerm": region = self.root().providers["azurerm"]["location"] elif self.cloud == "local": pass return { "parent": parent, "vars": SimpleNamespace(**self.variables), "variables": SimpleNamespace(**self.variables), "parent_name": parent_name, "layer_name": self.name, "state_storage": self.state_storage(), "env": self.get_env(), "kubeconfig": KUBE_CONFIG_DEFAULT_LOCATION, "k8s_access_token": k8s_access_token, "region": region, **provider_hydration, }
def _create_gcp_state_storage(cls, providers: dict) -> None: bucket_name = providers["terraform"]["backend"]["gcs"]["bucket"] region = providers["provider"]["google"]["region"] project_name = providers["provider"]["google"]["project"] credentials, project_id = GCP.get_credentials() if project_id != project_name: raise UserErrors( f"We got {project_name} as the project name in opta, but {project_id} in the google credentials" ) gcs_client = storage.Client(project=project_id, credentials=credentials) try: bucket = gcs_client.get_bucket(bucket_name) bucket_project_number = bucket.project_number except GoogleClientError as e: if e.code == 403: raise UserErrors( f"The Bucket Name: {bucket_name} (Opta needs to store state here) already exists.\n" "Possible Failures:\n" " - Bucket is present in some other project and User does not have access to the Project.\n" "Please change the name in the Opta Configuration file or please change the User Permissions.\n" "Please fix it and try again.") elif e.code != 404: raise UserErrors( "When trying to determine the status of the state bucket, we got an " f"{e.code} error with the message " f"{e.message}") logger.debug( "GCS bucket for terraform state not found, creating a new one") try: bucket = gcs_client.create_bucket(bucket_name, location=region) bucket_project_number = bucket.project_number except Conflict: raise UserErrors( f"It looks like a gcs bucket with the name {bucket_name} was created recently, but then deleted " "and Google keeps hold of gcs bucket names for 30 days after deletion-- pls wait until the end of " "that time or change your environment name slightly.") # Enable the APIs credentials = GoogleCredentials.get_application_default() service = discovery.build("serviceusage", "v1", credentials=credentials, static_discovery=False) new_api_enabled = False for service_name in [ "container.googleapis.com", "iam.googleapis.com", "containerregistry.googleapis.com", "cloudkms.googleapis.com", "dns.googleapis.com", "servicenetworking.googleapis.com", "redis.googleapis.com", "compute.googleapis.com", "secretmanager.googleapis.com", "cloudresourcemanager.googleapis.com", ]: request = service.services().enable( name=f"projects/{project_name}/services/{service_name}") try: response = request.execute() new_api_enabled = new_api_enabled or ( response.get("name") != "operations/noop.DONE_OPERATION") except HttpError as e: if e.resp.status == 400: raise UserErrors( f"Got a 400 response when trying to enable the google {service_name} service with the following error reason: {e._get_reason()}" ) logger.debug(f"Google service {service_name} activated") if new_api_enabled: logger.info( "New api has been enabled, waiting 120 seconds before progressing" ) time.sleep(120) service = discovery.build( "cloudresourcemanager", "v1", credentials=credentials, static_discovery=False, ) request = service.projects().get(projectId=project_id) response = request.execute() if response["projectNumber"] != str(bucket_project_number): raise UserErrors( f"State storage bucket {bucket_name}, has already been created, but it was created in another project. " f"Current project's number {response['projectNumber']}. Bucket's project number: {bucket_project_number}. " "You do, however, have access to view that bucket, so it sounds like you already run this opta apply in " "your org, but on a different project." "Note: project number is NOT project id. It is yet another globally unique identifier for your project " "I kid you not, go ahead and look it up.")
def download_state(cls, layer: "Layer") -> bool: if layer.is_stateless_mode() is True: # no remote state for stateless mode return False if not cls.verify_storage(layer): logger.debug( fmt_msg(""" We store state in S3/GCP buckets/Azure Storage. Since the state bucket was not found, ~this probably means that you either haven't created your opta resources yet, ~or you previously successfully destroyed your opta resources. """)) return False state_file: str = "./tmp.tfstate" providers = layer.gen_providers(0) terraform_backends = providers.get("terraform", {}).get("backend", {}) if "s3" in terraform_backends: bucket = providers["terraform"]["backend"]["s3"]["bucket"] region = providers["terraform"]["backend"]["s3"]["region"] key = providers["terraform"]["backend"]["s3"]["key"] logger.debug( f"Found an s3 backend in bucket {bucket} and key {key}, " "gonna try to download the statefile from there") s3 = boto3.client("s3", config=Config(region_name=region)) try: s3.download_file(Bucket=bucket, Key=key, Filename=state_file) except ClientError as e: if e.response["Error"]["Code"] == "404": # The object does not exist. logger.debug("Did not find terraform state file") return False raise elif "gcs" in terraform_backends: bucket = providers["terraform"]["backend"]["gcs"]["bucket"] prefix = providers["terraform"]["backend"]["gcs"]["prefix"] credentials, project_id = GCP.get_credentials() gcs_client = storage.Client(project=project_id, credentials=credentials) bucket_object = gcs_client.get_bucket(bucket) blob = storage.Blob(f"{prefix}/default.tfstate", bucket_object) try: with open(state_file, "wb") as file_obj: gcs_client.download_blob_to_file(blob, file_obj) except GoogleClientError as e: if e.code == 404: # The object does not exist. os.remove(state_file) return False raise elif "azurerm" in terraform_backends: storage_account_name = providers["terraform"]["backend"][ "azurerm"]["storage_account_name"] container_name = providers["terraform"]["backend"]["azurerm"][ "container_name"] key = providers["terraform"]["backend"]["azurerm"]["key"] credentials = Azure.get_credentials() try: blob = (BlobServiceClient( f"https://{storage_account_name}.blob.core.windows.net/", credential=credentials, ).get_container_client(container_name).get_blob_client(key)) with open(state_file, "wb") as file_obj: blob_data = blob.download_blob() blob_data.readinto(file_obj) except ResourceNotFoundError: return False elif layer.cloud == "local": try: tf_file = os.path.join( cls.get_local_opta_dir(), "tfstate", f"{layer.name}", ) if os.path.exists(tf_file): copyfile(tf_file, state_file) else: return False except Exception: UserErrors(f"Could copy local state file to {state_file}") elif layer.cloud == "helm": set_kube_config(layer) load_opta_kube_config() v1 = CoreV1Api() secret_name = f"tfstate-default-{layer.state_storage()}" secrets: V1SecretList = v1.list_namespaced_secret( "default", field_selector=f"metadata.name={secret_name}") if len(secrets.items) == 0: return False secret: V1Secret = secrets.items[0] decoded_secret = gzip.decompress( base64.b64decode(secret.data["tfstate"])) with open(state_file, "wb") as file_obj: file_obj.write(decoded_secret) else: raise UserErrors( "Need to get state from S3 or GCS or Azure storage") with open(state_file, "r") as file: raw_state = file.read().strip() os.remove(state_file) if raw_state != "": cls.downloaded_state[layer.name] = json.loads(raw_state) return True return False