def test_execute_without_replace(self, mock_hook, mock_hook2): mock_hook.return_value.list.return_value = MOCK_FILES mock_hook.return_value.download.return_value = b"testing" mock_hook2.return_value.list.return_value = MOCK_FILES operator = GCSToS3Operator( task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id="aws_default", dest_s3_key=S3_BUCKET, replace=False, ) # create dest bucket with all the files hook = S3Hook(aws_conn_id='airflow_gcs_test') bucket = hook.get_bucket('bucket') bucket.create() for mock_file in MOCK_FILES: bucket.put_object(Key=mock_file, Body=b'testing') # we expect nothing to be uploaded # and all the MOCK_FILES to be present at the S3 bucket uploaded_files = operator.execute(None) self.assertEqual([], uploaded_files) self.assertEqual(sorted(MOCK_FILES), sorted(hook.list_keys('bucket', delimiter='/')))
def test_execute_with_acl_policy(self, mock_load_bytes, mock_gcs_hook, mock_gcs_hook2): mock_gcs_hook.return_value.list.return_value = MOCK_FILES mock_gcs_hook.return_value.download.return_value = b"testing" mock_gcs_hook2.return_value.list.return_value = MOCK_FILES operator = GCSToS3Operator( task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id="aws_default", dest_s3_key=S3_BUCKET, replace=False, acl_policy=ACL_POLICY, ) # Create dest bucket without files hook = S3Hook(aws_conn_id='airflow_gcs_test') bucket = hook.get_bucket('bucket') bucket.create() operator.execute(None) # Make sure the acl_policy parameter is passed to the upload method self.assertEqual(mock_load_bytes.call_args.kwargs['acl_policy'], ACL_POLICY)
def test_execute_should_pass_dest_s3_extra_args_to_s3_hook( self, s3_mock_hook, mock_hook, mock_hook2): mock_hook.return_value.list.return_value = MOCK_FILES mock_hook.return_value.download.return_value = b"testing" mock_hook2.return_value.list.return_value = MOCK_FILES s3_mock_hook.return_value = mock.Mock() s3_mock_hook.parse_s3_url.return_value = mock.Mock() operator = GCSToS3Operator( task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id="aws_default", dest_s3_key=S3_BUCKET, replace=True, dest_s3_extra_args={ "ContentLanguage": "value", }, ) operator.execute(None) s3_mock_hook.assert_called_once_with( aws_conn_id='aws_default', extra_args={'ContentLanguage': 'value'}, verify=None)
def test_execute_incremental(self, mock_hook, mock_hook2): mock_hook.return_value.list.return_value = MOCK_FILES mock_hook.return_value.download.return_value = b"testing" mock_hook2.return_value.list.return_value = MOCK_FILES operator = GCSToS3Operator( task_id=TASK_ID, bucket=GCS_BUCKET, prefix=PREFIX, delimiter=DELIMITER, dest_aws_conn_id="aws_default", dest_s3_key=S3_BUCKET, replace=False, ) # create dest bucket hook = S3Hook(aws_conn_id='airflow_gcs_test') bucket = hook.get_bucket('bucket') bucket.create() bucket.put_object(Key=MOCK_FILES[0], Body=b'testing') # we expect all except first file in MOCK_FILES to be uploaded # and all the MOCK_FILES to be present at the S3 bucket uploaded_files = operator.execute(None) assert sorted(MOCK_FILES[1:]) == sorted(uploaded_files) assert sorted(MOCK_FILES) == sorted( hook.list_keys('bucket', delimiter='/'))
# Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os from datetime import datetime from airflow import DAG from airflow.providers.amazon.aws.transfers.gcs_to_s3 import GCSToS3Operator BUCKET = os.getenv("BUCKET", "bucket") S3_KEY = os.getenv("S3_KEY", "s3://<bucket>/<prefix>") with DAG( dag_id="example_gcs_to_s3", start_date=datetime(2021, 1, 1), tags=["example"], catchup=False, ) as dag: # [START howto_transfer_gcs_to_s3] gcs_to_s3 = GCSToS3Operator( task_id="gcs_to_s3", bucket=BUCKET, dest_s3_key=S3_KEY, replace=True, ) # [END howto_transfer_gcs_to_s3]