def provide_gcp_credential_file_as_context(self): """ Context manager that provides a Google Cloud credentials for application supporting `Application Default Credentials (ADC) strategy <https://cloud.google.com/docs/authentication/production>`__. It can be used to provide credentials for external programs (e.g. gcloud) that expect authorization file in ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable. """ key_path = self._get_field('key_path', None) # type: Optional[str] # keyfile_dict = self._get_field('keyfile_dict', None) # type: Optional[Dict] if key_path and keyfile_dict: raise AirflowException( "The `keyfile_dict` and `key_path` fields are mutually exclusive. " "Please provide only one value.") elif key_path: if key_path.endswith('.p12'): raise AirflowException( 'Legacy P12 key file are not supported, use a JSON key file.' ) with patch_environ({CREDENTIALS: key_path}): yield key_path elif keyfile_dict: with tempfile.NamedTemporaryFile(mode='w+t') as conf_file: conf_file.write(keyfile_dict) conf_file.flush() with patch_environ({CREDENTIALS: conf_file.name}): yield conf_file.name else: # We will use the default service account credentials. yield None
def provide_gcp_connection( key_file_path: Optional[str] = None, scopes: Optional[Sequence] = None, project_id: Optional[str] = None, ): """ Context manager that provides a temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection. It build a new connection that includes path to provided service json, required scopes and project id. :param key_file_path: Path to file with GCP credentials .json file. :type key_file_path: str :param scopes: OAuth scopes for the connection :type scopes: Sequence :param project_id: The id of GCP project for the connection. :type project_id: str """ if key_file_path and key_file_path.endswith(".p12"): raise AirflowException( "Legacy P12 key file are not supported, use a JSON key file.") conn = build_gcp_conn(scopes=scopes, key_file_path=key_file_path, project_id=project_id) with patch_environ({AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT: conn}): yield
def provide_gcp_conn_and_credentials( key_file_path: Optional[str] = None, scopes: Optional[Sequence] = None, project_id: Optional[str] = None, ) -> Generator[None, None, None]: """ Context manager that provides both: - Google Cloud credentials for application supporting `Application Default Credentials (ADC) strategy`__. - temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection :param key_file_path: Path to file with Google Cloud Service Account .json file. :param scopes: OAuth scopes for the connection :param project_id: The id of Google Cloud project for the connection. __ https://cloud.google.com/docs/authentication/production """ with ExitStack() as stack: if key_file_path: stack.enter_context(provide_gcp_credentials(key_file_path)) # type; ignore if project_id: stack.enter_context( # type; ignore patch_environ({PROJECT: project_id, LEGACY_PROJECT: project_id}) ) stack.enter_context(provide_gcp_connection(key_file_path, scopes, project_id)) # type; ignore yield
def execute(self, context): hook = GoogleBaseHook(gcp_conn_id=self.gcp_conn_id) self.project_id = self.project_id or hook.project_id if not self.project_id: raise AirflowException( "The project id must be passed either as " "keyword project_id parameter or as project_id extra " "in GCP connection definition. Both are not set!") # Write config to a temp file and set the environment variable to point to it. # This is to avoid race conditions of reading/writing a single file with tempfile.NamedTemporaryFile() as conf_file,\ patch_environ({KUBE_CONFIG_ENV_VAR: conf_file.name}), \ hook.provide_authorized_gcloud(): # Attempt to get/update credentials # We call gcloud directly instead of using google-cloud-python api # because there is no way to write kubernetes config to a file, which is # required by KubernetesPodOperator. # The gcloud command looks at the env variable `KUBECONFIG` for where to save # the kubernetes config file. cmd = [ "gcloud", "container", "clusters", "get-credentials", self.cluster_name, "--zone", self.location, "--project", self.project_id ] if self.use_internal_ip: cmd.append('--internal-ip') execute_in_subprocess(cmd) # Tell `KubernetesPodOperator` where the config file is located self.config_file = os.environ[KUBE_CONFIG_ENV_VAR] return super().execute(context)
def provide_authorized_gcloud(self) -> Generator[None, None, None]: """ Provides a separate gcloud configuration with current credentials. The gcloud tool allows you to login to Google Cloud only - ``gcloud auth login`` and for the needs of Application Default Credentials ``gcloud auth application-default login``. In our case, we want all commands to use only the credentials from ADCm so we need to configure the credentials in gcloud manually. """ credentials_path = _cloud_sdk.get_application_default_credentials_path( ) project_id = self.project_id with ExitStack() as exit_stack: exit_stack.enter_context( self.provide_gcp_credential_file_as_context()) gcloud_config_tmp = exit_stack.enter_context( tempfile.TemporaryDirectory()) exit_stack.enter_context( patch_environ({CLOUD_SDK_CONFIG_DIR: gcloud_config_tmp})) if CREDENTIALS in os.environ: # This solves most cases when we are logged in using the service key in Airflow. # Don't display stdout/stderr for security reason check_output([ "gcloud", "auth", "activate-service-account", f"--key-file={os.environ[CREDENTIALS]}", ]) elif os.path.exists(credentials_path): # If we are logged in by `gcloud auth application-default` then we need to log in manually. # This will make the `gcloud auth application-default` and `gcloud auth` credentials equals. with open(credentials_path) as creds_file: creds_content = json.loads(creds_file.read()) # Don't display stdout/stderr for security reason check_output([ "gcloud", "config", "set", "auth/client_id", creds_content["client_id"] ]) # Don't display stdout/stderr for security reason check_output([ "gcloud", "config", "set", "auth/client_secret", creds_content["client_secret"] ]) # Don't display stdout/stderr for security reason check_output([ "gcloud", "auth", "activate-refresh-token", creds_content["client_id"], creds_content["refresh_token"], ]) if project_id: # Don't display stdout/stderr for security reason check_output( ["gcloud", "config", "set", "core/project", project_id]) yield
def provide_salesforce_connection(key_file_path: str): """ Context manager that provides a temporary value of SALESFORCE_DEFAULT connection. :param key_file_path: Path to file with SALESFORCE credentials .json file. :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") with open(key_file_path, 'r') as credentials: creds = json.load(credentials) missing_keys = CONFIG_REQUIRED_FIELDS - creds.keys() if missing_keys: message = "{missing_keys} fields are missing".format( missing_keys=missing_keys) raise AirflowException(message) conn = Connection( conn_id=SALESFORCE_CONNECTION_ID, conn_type=CONNECTION_TYPE, host=creds["host"], login=creds["login"], password=creds["password"], extra=json.dumps({"security_token": creds["security_token"]}), ) with patch_environ( {f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}): yield
def provide_azure_data_lake_default_connection(key_file_path: str): """ Context manager to provide a temporary value for azure_data_lake_default connection :param key_file_path: Path to file with azure_data_lake_default credentials .json file. :type key_file_path: str """ required_fields = {'login', 'password', 'extra'} if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") with open(key_file_path) as credentials: creds = json.load(credentials) missing_keys = required_fields - creds.keys() if missing_keys: message = f"{missing_keys} fields are missing" raise AirflowException(message) conn = Connection( conn_id=DATA_LAKE_CONNECTION_ID, conn_type=DATA_LAKE_CONNECTION_TYPE, host=creds.get("host", None), login=creds.get("login", None), password=creds.get("password", None), extra=json.dumps(creds.get('extra', None)), ) with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}): yield
def provide_gcp_conn_and_credentials( key_file_path: Optional[str] = None, scopes: Optional[Sequence] = None, project_id: Optional[str] = None, ): """ Context manager that provides both: - GCP credentials for application supporting `Application Default Credentials (ADC) strategy <https://cloud.google.com/docs/authentication/production>`__. - temporary value of :envvar:`AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT` connection :param key_file_path: Path to file with GCP credentials .json file. :type key_file_path: str :param scopes: OAuth scopes for the connection :type scopes: Sequence :param project_id: The id of GCP project for the connection. :type project_id: str """ with ExitStack() as stack: if key_file_path: stack.enter_context( # type; ignore # pylint: disable=no-member provide_gcp_credentials(key_file_path)) if project_id: stack.enter_context( # type; ignore # pylint: disable=no-member patch_environ({ PROJECT: project_id, LEGACY_PROJECT: project_id })) stack.enter_context( # type; ignore # pylint: disable=no-member provide_gcp_connection(key_file_path, scopes, project_id)) yield
def provide_gcp_credentials(key_file_path: Optional[str] = None, key_file_dict: Optional[Dict] = None): """ Context manager that provides a GCP credentials for application supporting `Application Default Credentials (ADC) strategy <https://cloud.google.com/docs/authentication/production>`__. It can be used to provide credentials for external programs (e.g. gcloud) that expect authorization file in ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable. :param key_file_path: Path to file with GCP credentials .json file. :type key_file_path: str :param key_file_dict: Dictionary with credentials. :type key_file_dict: Dict """ if not key_file_path and not key_file_dict: raise ValueError("Please provide `key_file_path` or `key_file_dict`.") if key_file_path and key_file_path.endswith(".p12"): raise AirflowException( "Legacy P12 key file are not supported, use a JSON key file.") with tempfile.NamedTemporaryFile(mode="w+t") as conf_file: if not key_file_path and key_file_dict: conf_file.write(json.dumps(key_file_dict)) conf_file.flush() key_file_path = conf_file.name if key_file_path: with patch_environ({CREDENTIALS: key_file_path}): yield else: # We will use the default service account credentials. yield
def cheat_sheet(args): """Display cheat-sheet.""" with contextlib.ExitStack() as exit_stack: if not is_terminal_support_colors(): exit_stack.enter_context(patch_environ({ANSI_COLORS_DISABLED: "1"})) cprint("List of all commands:".upper(), attrs=["bold", "underline"]) print() display_commands_index()
def execute(self, context: 'Context') -> Optional[str]: hook = GoogleBaseHook(gcp_conn_id=self.gcp_conn_id) self.project_id = self.project_id or hook.project_id if not self.project_id: raise AirflowException( "The project id must be passed either as " "keyword project_id parameter or as project_id extra " "in Google Cloud connection definition. Both are not set!") # Write config to a temp file and set the environment variable to point to it. # This is to avoid race conditions of reading/writing a single file with tempfile.NamedTemporaryFile() as conf_file, patch_environ( {KUBE_CONFIG_ENV_VAR: conf_file.name}), hook.provide_authorized_gcloud(): # Attempt to get/update credentials # We call gcloud directly instead of using google-cloud-python api # because there is no way to write kubernetes config to a file, which is # required by KubernetesPodOperator. # The gcloud command looks at the env variable `KUBECONFIG` for where to save # the kubernetes config file. cmd = [ "gcloud", "container", "clusters", "get-credentials", self.cluster_name, "--project", self.project_id, ] if self.impersonation_chain: if isinstance(self.impersonation_chain, str): impersonation_account = self.impersonation_chain elif len(self.impersonation_chain) == 1: impersonation_account = self.impersonation_chain[0] else: raise AirflowException( "Chained list of accounts is not supported, please specify only one service account" ) cmd.extend([ '--impersonate-service-account', impersonation_account, ]) if self.regional: cmd.append('--region') else: cmd.append('--zone') cmd.append(self.location) if self.use_internal_ip: cmd.append('--internal-ip') execute_in_subprocess(cmd) # Tell `KubernetesPodOperator` where the config file is located self.config_file = os.environ[KUBE_CONFIG_ENV_VAR] return super().execute(context)
def test_should_update_variable_and_restore_state_when_exit(self): with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}): del os.environ["TEST_NOT_EXISTS"] self.assertEqual("BEFORE", os.environ["TEST_EXISTS"]) self.assertNotIn("TEST_NOT_EXISTS", os.environ) with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}): self.assertEqual("AFTER", os.environ["TEST_NOT_EXISTS"]) self.assertEqual("AFTER", os.environ["TEST_EXISTS"]) self.assertEqual("BEFORE", os.environ["TEST_EXISTS"]) self.assertNotIn("TEST_NOT_EXISTS", os.environ)
def test_should_update_variable_and_restore_state_when_exit(self): with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}): del os.environ["TEST_NOT_EXISTS"] assert "BEFORE" == os.environ["TEST_EXISTS"] assert "TEST_NOT_EXISTS" not in os.environ with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}): assert "AFTER" == os.environ["TEST_NOT_EXISTS"] assert "AFTER" == os.environ["TEST_EXISTS"] assert "BEFORE" == os.environ["TEST_EXISTS"] assert "TEST_NOT_EXISTS" not in os.environ
def test_should_restore_state_when_exception(self): with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}): del os.environ["TEST_NOT_EXISTS"] self.assertEqual("BEFORE", os.environ["TEST_EXISTS"]) self.assertNotIn("TEST_NOT_EXISTS", os.environ) with suppress(AirflowException): with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}): self.assertEqual("AFTER", os.environ["TEST_NOT_EXISTS"]) self.assertEqual("AFTER", os.environ["TEST_EXISTS"]) raise AirflowException("Unknown excepiton") self.assertEqual("BEFORE", os.environ["TEST_EXISTS"]) self.assertNotIn("TEST_NOT_EXISTS", os.environ)
def test_should_restore_state_when_exception(self): with mock.patch.dict("os.environ", {"TEST_NOT_EXISTS": "BEFORE", "TEST_EXISTS": "BEFORE"}): del os.environ["TEST_NOT_EXISTS"] assert "BEFORE" == os.environ["TEST_EXISTS"] assert "TEST_NOT_EXISTS" not in os.environ with suppress(AirflowException): with process_utils.patch_environ({"TEST_NOT_EXISTS": "AFTER", "TEST_EXISTS": "AFTER"}): assert "AFTER" == os.environ["TEST_NOT_EXISTS"] assert "AFTER" == os.environ["TEST_EXISTS"] raise AirflowException("Unknown exception") assert "BEFORE" == os.environ["TEST_EXISTS"] assert "TEST_NOT_EXISTS" not in os.environ
def provide_wasb_default_connection(key_file_path: str): """ Context manager to provide a temporary value for wasb_default connection :param key_file_path: Path to file with wasb_default credentials .json file. :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") with open(key_file_path) as credentials: creds = json.load(credentials) conn = Connection( conn_id=WASB_CONNECTION_ID, conn_type="wasb", host=creds.get("host", None), login=creds.get("login", None), password=creds.get("password", None), extra=json.dumps(creds.get('extra', None)), ) with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}): yield
def provide_facebook_connection(key_file_path: str): """ Context manager that provides a temporary value of AIRFLOW_CONN_GOOGLE_CLOUD_DEFAULT connection. It build a new connection that includes path to provided service json, required scopes and project id. :param key_file_path: Path to file with FACEBOOK credentials .json file. :type key_file_path: str """ if not key_file_path.endswith(".json"): raise AirflowException("Use a JSON key file.") with open(key_file_path) as credentials: creds = json.load(credentials) missing_keys = CONFIG_REQUIRED_FIELDS - creds.keys() if missing_keys: message = f"{missing_keys} fields are missing" raise AirflowException(message) conn = Connection(conn_id=FACEBOOK_CONNECTION_ID, conn_type=CONNECTION_TYPE, extra=json.dumps(creds)) with patch_environ( {f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}): yield
def provide_leveldb_connection(): """Context manager that provides a temporary value of AIRFLOW_CONN_LEVELDB_DEFAULT connection""" conn = Connection(conn_id=LEVELDB_CONNECTION_ID, conn_type=CONNECTION_TYPE) with patch_environ({f"AIRFLOW_CONN_{conn.conn_id.upper()}": conn.get_uri()}): yield