Exemplo n.º 1
0
    def test_execute_without_replace(self, mock_hook, mock_hook2):
        mock_hook.return_value.list.return_value = MOCK_FILES
        mock_hook.return_value.download.return_value = b"testing"
        mock_hook2.return_value.list.return_value = MOCK_FILES

        operator = GCSToS3Operator(task_id=TASK_ID,
                                   bucket=GCS_BUCKET,
                                   prefix=PREFIX,
                                   delimiter=DELIMITER,
                                   dest_aws_conn_id=None,
                                   dest_s3_key=S3_BUCKET,
                                   replace=False)
        # create dest bucket with all the files
        hook = S3Hook(aws_conn_id=None)
        bucket = hook.get_bucket('bucket')
        bucket.create()
        for mock_file in MOCK_FILES:
            bucket.put_object(Key=mock_file, Body=b'testing')

        # we expect nothing to be uploaded
        # and all the MOCK_FILES to be present at the S3 bucket
        uploaded_files = operator.execute(None)
        self.assertEqual([],
                         uploaded_files)
        self.assertEqual(sorted(MOCK_FILES),
                         sorted(hook.list_keys('bucket', delimiter='/')))
Exemplo n.º 2
0
    def test_execute_incremental(self, mock_hook, mock_hook2):
        mock_hook.return_value.list.return_value = MOCK_FILES
        mock_hook.return_value.download.return_value = b"testing"
        mock_hook2.return_value.list.return_value = MOCK_FILES

        operator = GCSToS3Operator(task_id=TASK_ID,
                                   bucket=GCS_BUCKET,
                                   prefix=PREFIX,
                                   delimiter=DELIMITER,
                                   dest_aws_conn_id="aws_default",
                                   dest_s3_key=S3_BUCKET,
                                   replace=False)
        # create dest bucket
        hook = S3Hook(aws_conn_id='airflow_gcs_test')
        bucket = hook.get_bucket('bucket')
        bucket.create()
        bucket.put_object(Key=MOCK_FILES[0], Body=b'testing')

        # we expect all except first file in MOCK_FILES to be uploaded
        # and all the MOCK_FILES to be present at the S3 bucket
        uploaded_files = operator.execute(None)
        self.assertEqual(sorted(MOCK_FILES[1:]),
                         sorted(uploaded_files))
        self.assertEqual(sorted(MOCK_FILES),
                         sorted(hook.list_keys('bucket', delimiter='/')))