Exemplo n.º 1
0
    def test_execute_without_replace(self, mock_hook, mock_hook2):
        mock_hook.return_value.list.return_value = MOCK_FILES
        mock_hook.return_value.download.return_value = b"testing"
        mock_hook2.return_value.list.return_value = MOCK_FILES

        operator = GCSToS3Operator(
            task_id=TASK_ID,
            bucket=GCS_BUCKET,
            prefix=PREFIX,
            delimiter=DELIMITER,
            dest_aws_conn_id="aws_default",
            dest_s3_key=S3_BUCKET,
            replace=False,
        )
        # create dest bucket with all the files
        hook = S3Hook(aws_conn_id='airflow_gcs_test')
        bucket = hook.get_bucket('bucket')
        bucket.create()
        for mock_file in MOCK_FILES:
            bucket.put_object(Key=mock_file, Body=b'testing')

        # we expect nothing to be uploaded
        # and all the MOCK_FILES to be present at the S3 bucket
        uploaded_files = operator.execute(None)
        self.assertEqual([], uploaded_files)
        self.assertEqual(sorted(MOCK_FILES),
                         sorted(hook.list_keys('bucket', delimiter='/')))
Exemplo n.º 2
0
    def test_execute_with_acl_policy(self, mock_load_bytes, mock_gcs_hook,
                                     mock_gcs_hook2):
        mock_gcs_hook.return_value.list.return_value = MOCK_FILES
        mock_gcs_hook.return_value.download.return_value = b"testing"
        mock_gcs_hook2.return_value.list.return_value = MOCK_FILES

        operator = GCSToS3Operator(
            task_id=TASK_ID,
            bucket=GCS_BUCKET,
            prefix=PREFIX,
            delimiter=DELIMITER,
            dest_aws_conn_id="aws_default",
            dest_s3_key=S3_BUCKET,
            replace=False,
            acl_policy=ACL_POLICY,
        )

        # Create dest bucket without files
        hook = S3Hook(aws_conn_id='airflow_gcs_test')
        bucket = hook.get_bucket('bucket')
        bucket.create()

        operator.execute(None)

        # Make sure the acl_policy parameter is passed to the upload method
        self.assertEqual(mock_load_bytes.call_args.kwargs['acl_policy'],
                         ACL_POLICY)
Exemplo n.º 3
0
    def test_execute_should_pass_dest_s3_extra_args_to_s3_hook(
            self, s3_mock_hook, mock_hook, mock_hook2):
        mock_hook.return_value.list.return_value = MOCK_FILES
        mock_hook.return_value.download.return_value = b"testing"
        mock_hook2.return_value.list.return_value = MOCK_FILES
        s3_mock_hook.return_value = mock.Mock()
        s3_mock_hook.parse_s3_url.return_value = mock.Mock()

        operator = GCSToS3Operator(
            task_id=TASK_ID,
            bucket=GCS_BUCKET,
            prefix=PREFIX,
            delimiter=DELIMITER,
            dest_aws_conn_id="aws_default",
            dest_s3_key=S3_BUCKET,
            replace=True,
            dest_s3_extra_args={
                "ContentLanguage": "value",
            },
        )
        operator.execute(None)
        s3_mock_hook.assert_called_once_with(
            aws_conn_id='aws_default',
            extra_args={'ContentLanguage': 'value'},
            verify=None)
Exemplo n.º 4
0
    def test_execute_incremental(self, mock_hook, mock_hook2):
        mock_hook.return_value.list.return_value = MOCK_FILES
        mock_hook.return_value.download.return_value = b"testing"
        mock_hook2.return_value.list.return_value = MOCK_FILES

        operator = GCSToS3Operator(
            task_id=TASK_ID,
            bucket=GCS_BUCKET,
            prefix=PREFIX,
            delimiter=DELIMITER,
            dest_aws_conn_id="aws_default",
            dest_s3_key=S3_BUCKET,
            replace=False,
        )
        # create dest bucket
        hook = S3Hook(aws_conn_id='airflow_gcs_test')
        bucket = hook.get_bucket('bucket')
        bucket.create()
        bucket.put_object(Key=MOCK_FILES[0], Body=b'testing')

        # we expect all except first file in MOCK_FILES to be uploaded
        # and all the MOCK_FILES to be present at the S3 bucket
        uploaded_files = operator.execute(None)
        assert sorted(MOCK_FILES[1:]) == sorted(uploaded_files)
        assert sorted(MOCK_FILES) == sorted(
            hook.list_keys('bucket', delimiter='/'))
Exemplo n.º 5
0
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from datetime import datetime

from airflow import DAG
from airflow.providers.amazon.aws.transfers.gcs_to_s3 import GCSToS3Operator

BUCKET = os.getenv("BUCKET", "bucket")
S3_KEY = os.getenv("S3_KEY", "s3://<bucket>/<prefix>")

with DAG(
        dag_id="example_gcs_to_s3",
        start_date=datetime(2021, 1, 1),
        tags=["example"],
        catchup=False,
) as dag:
    # [START howto_transfer_gcs_to_s3]
    gcs_to_s3 = GCSToS3Operator(
        task_id="gcs_to_s3",
        bucket=BUCKET,
        dest_s3_key=S3_KEY,
        replace=True,
    )
    # [END howto_transfer_gcs_to_s3]