Пример #1
0
    def test_execute_with_xcom(self, mock_json_dumps, mock_s3_hook_load_string,
                               mock_google_api_hook_query):
        context = {'task_instance': Mock()}
        xcom_kwargs = {
            'google_api_response_via_xcom': 'response',
            'google_api_endpoint_params_via_xcom': 'params',
            'google_api_endpoint_params_via_xcom_task_ids': 'params',
        }
        context['task_instance'].xcom_pull.return_value = {}

        GoogleApiToS3Operator(**self.kwargs, **xcom_kwargs).execute(context)

        mock_google_api_hook_query.assert_called_once_with(
            endpoint=self.kwargs['google_api_endpoint_path'],
            data=self.kwargs['google_api_endpoint_params'],
            paginate=self.kwargs['google_api_pagination'],
            num_retries=self.kwargs['google_api_num_retries'],
        )
        mock_json_dumps.assert_called_once_with(
            mock_google_api_hook_query.return_value)
        mock_s3_hook_load_string.assert_called_once_with(
            string_data=mock_json_dumps.return_value,
            key=self.kwargs['s3_destination_key'],
            replace=self.kwargs['s3_overwrite'],
        )
        context['task_instance'].xcom_pull.assert_called_once_with(
            task_ids=xcom_kwargs[
                'google_api_endpoint_params_via_xcom_task_ids'],
            key=xcom_kwargs['google_api_endpoint_params_via_xcom'],
        )
        context['task_instance'].xcom_push.assert_called_once_with(
            key=xcom_kwargs['google_api_response_via_xcom'],
            value=mock_google_api_hook_query.return_value)
Пример #2
0
    def test_execute(self, mock_json_dumps, mock_s3_hook_load_string, mock_google_api_hook_query):
        context = {'task_instance': Mock()}

        GoogleApiToS3Operator(**self.kwargs).execute(context)

        mock_google_api_hook_query.assert_called_once_with(
            endpoint=self.kwargs['google_api_endpoint_path'],
            data=self.kwargs['google_api_endpoint_params'],
            paginate=self.kwargs['google_api_pagination'],
            num_retries=self.kwargs['google_api_num_retries']
        )
        mock_json_dumps.assert_called_once_with(mock_google_api_hook_query.return_value)
        mock_s3_hook_load_string.assert_called_once_with(
            string_data=mock_json_dumps.return_value,
            key=self.kwargs['s3_destination_key'],
            replace=self.kwargs['s3_overwrite']
        )
        context['task_instance'].xcom_pull.assert_not_called()
        context['task_instance'].xcom_push.assert_not_called()
s3_file_name, _ = s3_file.rsplit('.', 1)

with DAG(dag_id="example_google_api_to_s3_transfer_advanced",
         default_args=default_args,
         schedule_interval=None,
         tags=['example']) as dag:
    # [START howto_operator_google_api_to_s3_transfer_advanced_task_1]
    task_video_ids_to_s3 = GoogleApiToS3Operator(
        gcp_conn_id=YOUTUBE_CONN_ID,
        google_api_service_name='youtube',
        google_api_service_version='v3',
        google_api_endpoint_path='youtube.search.list',
        google_api_endpoint_params={
            'part': 'snippet',
            'channelId': YOUTUBE_CHANNEL_ID,
            'maxResults': 50,
            'publishedAfter': YOUTUBE_VIDEO_PUBLISHED_AFTER,
            'publishedBefore': YOUTUBE_VIDEO_PUBLISHED_BEFORE,
            'type': 'video',
            'fields': 'items/id/videoId'
        },
        google_api_response_via_xcom='video_ids_response',
        s3_destination_key=f'{s3_directory}/youtube_search_{s3_file_name}.json',
        task_id='video_ids_to_s3')
    # [END howto_operator_google_api_to_s3_transfer_advanced_task_1]
    # [START howto_operator_google_api_to_s3_transfer_advanced_task_1_1]
    task_check_and_transform_video_ids = BranchPythonOperator(
        python_callable=_check_and_transform_video_ids,
        op_args=[
            task_video_ids_to_s3.google_api_response_via_xcom,
            task_video_ids_to_s3.task_id
Пример #4
0
from datetime import datetime
from os import getenv

from airflow import DAG
from airflow.providers.amazon.aws.transfers.google_api_to_s3 import GoogleApiToS3Operator

GOOGLE_SHEET_ID = getenv("GOOGLE_SHEET_ID", "test-google-sheet-id")
GOOGLE_SHEET_RANGE = getenv("GOOGLE_SHEET_RANGE", "test-google-sheet-range")
S3_DESTINATION_KEY = getenv("S3_DESTINATION_KEY", "s3://test-bucket/key.json")

with DAG(
        dag_id="example_google_api_sheets_to_s3",
        start_date=datetime(2021, 1, 1),
        catchup=False,
        tags=['example'],
) as dag:
    # [START howto_transfer_google_api_sheets_to_s3]
    task_google_sheets_values_to_s3 = GoogleApiToS3Operator(
        task_id='google_sheet_data_to_s3',
        google_api_service_name='sheets',
        google_api_service_version='v4',
        google_api_endpoint_path='sheets.spreadsheets.values.get',
        google_api_endpoint_params={
            'spreadsheetId': GOOGLE_SHEET_ID,
            'range': GOOGLE_SHEET_RANGE
        },
        s3_destination_key=S3_DESTINATION_KEY,
    )
    # [END howto_transfer_google_api_sheets_to_s3]
Пример #5
0
        dag_id="example_google_api_youtube_to_s3",
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        catchup=False,
        tags=['example'],
) as dag:
    # [START howto_transfer_google_api_youtube_search_to_s3]
    task_video_ids_to_s3 = GoogleApiToS3Operator(
        task_id='video_ids_to_s3',
        google_api_service_name='youtube',
        google_api_service_version='v3',
        google_api_endpoint_path='youtube.search.list',
        google_api_endpoint_params={
            'part': 'snippet',
            'channelId': YOUTUBE_CHANNEL_ID,
            'maxResults': 50,
            'publishedAfter': YOUTUBE_VIDEO_PUBLISHED_AFTER,
            'publishedBefore': YOUTUBE_VIDEO_PUBLISHED_BEFORE,
            'type': 'video',
            'fields': 'items/id/videoId',
        },
        google_api_response_via_xcom='video_ids_response',
        s3_destination_key=f'{S3_BUCKET_NAME}/youtube_search.json',
        s3_overwrite=True,
    )
    # [END howto_transfer_google_api_youtube_search_to_s3]

    task_transform_video_ids = transform_video_ids()

    # [START howto_transfer_google_api_youtube_list_to_s3]
    task_video_data_to_s3 = GoogleApiToS3Operator(
        task_id='video_data_to_s3',