def test_execute_with_xcom_exceeded_max_xcom_size(
            self, mock_sys_getsizeof, mock_json_dumps,
            mock_s3_hook_load_string, mock_google_api_hook_query):
        context = {'task_instance': Mock()}
        xcom_kwargs = {
            'google_api_response_via_xcom': 'response',
            'google_api_endpoint_params_via_xcom': 'params',
            'google_api_endpoint_params_via_xcom_task_ids': 'params',
        }
        context['task_instance'].xcom_pull.return_value = {}

        self.assertRaises(
            RuntimeError,
            GoogleApiToS3TransferOperator(**self.kwargs,
                                          **xcom_kwargs).execute, context)

        mock_google_api_hook_query.assert_called_once_with(
            endpoint=self.kwargs['google_api_endpoint_path'],
            data=self.kwargs['google_api_endpoint_params'],
            paginate=self.kwargs['google_api_pagination'],
            num_retries=self.kwargs['google_api_num_retries'])
        mock_json_dumps.assert_called_once_with(
            mock_google_api_hook_query.return_value)
        mock_s3_hook_load_string.assert_called_once_with(
            string_data=mock_json_dumps.return_value,
            key=self.kwargs['s3_destination_key'],
            replace=self.kwargs['s3_overwrite'])
        context['task_instance'].xcom_pull.assert_called_once_with(
            task_ids=xcom_kwargs[
                'google_api_endpoint_params_via_xcom_task_ids'],
            key=xcom_kwargs['google_api_endpoint_params_via_xcom'])
        context['task_instance'].xcom_push.assert_not_called()
        mock_sys_getsizeof.assert_called_once_with(
            mock_google_api_hook_query.return_value)
    def test_execute(self, mock_json_dumps, mock_s3_hook_load_string,
                     mock_google_api_hook_query):
        context = {'task_instance': Mock()}

        GoogleApiToS3TransferOperator(**self.kwargs).execute(context)

        mock_google_api_hook_query.assert_called_once_with(
            endpoint=self.kwargs['google_api_endpoint_path'],
            data=self.kwargs['google_api_endpoint_params'],
            paginate=self.kwargs['google_api_pagination'],
            num_retries=self.kwargs['google_api_num_retries'])
        mock_json_dumps.assert_called_once_with(
            mock_google_api_hook_query.return_value)
        mock_s3_hook_load_string.assert_called_once_with(
            string_data=mock_json_dumps.return_value,
            key=self.kwargs['s3_destination_key'],
            replace=self.kwargs['s3_overwrite'])
        context['task_instance'].xcom_pull.assert_not_called()
        context['task_instance'].xcom_push.assert_not_called()
Пример #3
0
from airflow import DAG
from airflow.providers.amazon.aws.operators.google_api_to_s3_transfer import GoogleApiToS3TransferOperator
from airflow.utils.dates import days_ago

# [START howto_operator_google_api_to_s3_transfer_basic_env_variables]
GOOGLE_SHEET_ID = getenv("GOOGLE_SHEET_ID")
GOOGLE_SHEET_RANGE = getenv("GOOGLE_SHEET_RANGE")
S3_DESTINATION_KEY = getenv("S3_DESTINATION_KEY", "s3://bucket/key.json")
# [END howto_operator_google_api_to_s3_transfer_basic_env_variables]

default_args = {"start_date": days_ago(1)}

with DAG(dag_id="example_google_api_to_s3_transfer_basic",
         default_args=default_args,
         schedule_interval=None,
         tags=['example']) as dag:
    # [START howto_operator_google_api_to_s3_transfer_basic_task_1]
    task_google_sheets_values_to_s3 = GoogleApiToS3TransferOperator(
        google_api_service_name='sheets',
        google_api_service_version='v4',
        google_api_endpoint_path='sheets.spreadsheets.values.get',
        google_api_endpoint_params={
            'spreadsheetId': GOOGLE_SHEET_ID,
            'range': GOOGLE_SHEET_RANGE
        },
        s3_destination_key=S3_DESTINATION_KEY,
        task_id='google_sheets_values_to_s3',
        dag=dag)
    # [END howto_operator_google_api_to_s3_transfer_basic_task_1]
Пример #4
0
s3_file_name, _ = s3_file.rsplit('.', 1)

with DAG(dag_id="example_google_api_to_s3_transfer_advanced",
         default_args=default_args,
         schedule_interval=None,
         tags=['example']) as dag:
    # [START howto_operator_google_api_to_s3_transfer_advanced_task_1]
    task_video_ids_to_s3 = GoogleApiToS3TransferOperator(
        gcp_conn_id=YOUTUBE_CONN_ID,
        google_api_service_name='youtube',
        google_api_service_version='v3',
        google_api_endpoint_path='youtube.search.list',
        google_api_endpoint_params={
            'part': 'snippet',
            'channelId': YOUTUBE_CHANNEL_ID,
            'maxResults': 50,
            'publishedAfter': YOUTUBE_VIDEO_PUBLISHED_AFTER,
            'publishedBefore': YOUTUBE_VIDEO_PUBLISHED_BEFORE,
            'type': 'video',
            'fields': 'items/id/videoId'
        },
        google_api_response_via_xcom='video_ids_response',
        s3_destination_key=f'{s3_directory}/youtube_search_{s3_file_name}.json',
        task_id='video_ids_to_s3')
    # [END howto_operator_google_api_to_s3_transfer_advanced_task_1]
    # [START howto_operator_google_api_to_s3_transfer_advanced_task_1_1]
    task_check_and_transform_video_ids = BranchPythonOperator(
        python_callable=_check_and_transform_video_ids,
        op_args=[
            task_video_ids_to_s3.google_api_response_via_xcom,
            task_video_ids_to_s3.task_id