示例#1
0
    def test_file_splitting(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test that ndjson is split by approx_max_file_size_bytes param."""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value
        expected_upload = {
            JSON_FILENAME.format(0): b''.join(NDJSON_LINES[:2]),
            JSON_FILENAME.format(1): NDJSON_LINES[2],
        }

        def _assert_upload(bucket,
                           obj,
                           tmp_filename,
                           mime_type=None,
                           gzip=False):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual('application/json', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(expected_upload[obj], file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySQLToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            approx_max_file_size_bytes=len(
                expected_upload[JSON_FILENAME.format(0)]),
        )
        op.execute(None)
示例#2
0
    def test_schema_file_with_custom_schema(self, gcs_hook_mock_class,
                                            mysql_hook_mock_class):
        """Test writing schema files with customized schema"""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):  # pylint: disable=unused-argument
            if obj == SCHEMA_FILENAME:
                self.assertFalse(gzip)
                with open(tmp_filename, 'rb') as file:
                    self.assertEqual(b''.join(CUSTOM_SCHEMA_JSON), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySQLToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            schema_filename=SCHEMA_FILENAME,
            schema=SCHEMA,
        )
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, gcs_hook_mock.upload.call_count)
示例#3
0
    def test_exec_success_json(self, gcs_hook_mock_class,
                               mysql_hook_mock_class):
        """Test successful run of execute function for JSON"""
        op = MySQLToGCSOperator(task_id=TASK_ID,
                                mysql_conn_id=MYSQL_CONN_ID,
                                sql=SQL,
                                bucket=BUCKET,
                                filename=JSON_FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket,
                           obj,
                           tmp_filename,
                           mime_type=None,
                           gzip=False):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(JSON_FILENAME.format(0), obj)
            self.assertEqual('application/json', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(b''.join(NDJSON_LINES), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(
            mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(
            SQL)
示例#4
0
 def test_convert_type(self, value, schema_type, expected):
     op = MySQLToGCSOperator(task_id=TASK_ID,
                             mysql_conn_id=MYSQL_CONN_ID,
                             sql=SQL,
                             bucket=BUCKET,
                             filename=JSON_FILENAME)
     self.assertEqual(op.convert_type(value, schema_type), expected)
示例#5
0
    def test_exec_success_csv_with_delimiter(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test successful run of execute function for CSV with a field delimiter"""
        op = MySQLToGCSOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            export_format='csv',
            field_delimiter='|',
            bucket=BUCKET,
            filename=CSV_FILENAME,
        )

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False):
            assert BUCKET == bucket
            assert CSV_FILENAME.format(0) == obj
            assert 'text/csv' == mime_type
            assert not gzip
            with open(tmp_filename, 'rb') as file:
                assert b''.join(CSV_LINES_PIPE_DELIMITED) == file.read()

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
示例#6
0
 def test_execute_with_query_error(self, mock_gcs_hook, mock_mysql_hook):
     mock_mysql_hook.return_value.get_conn.return_value.cursor.return_value.execute.side_effect = (
         ProgrammingError
     )
     op = MySQLToGCSOperator(
         task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME, schema_filename=SCHEMA_FILENAME
     )
     with pytest.raises(ProgrammingError):
         op.execute(None)
示例#7
0
    def test_exec_success_csv_ensure_utc(self, gcs_hook_mock_class,
                                         mysql_hook_mock_class):
        """Test successful run of execute function for CSV"""
        op = MySQLToGCSOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            export_format='CSV',
            bucket=BUCKET,
            filename=CSV_FILENAME,
            ensure_utc=True,
        )

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket,
                           obj,
                           tmp_filename,
                           mime_type=None,
                           gzip=False):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(CSV_FILENAME.format(0), obj)
            self.assertEqual('text/csv', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(b''.join(CSV_LINES), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(
            mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_has_calls(
            [mock.call(TZ_QUERY), mock.call(SQL)])
示例#8
0
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MySQLToGCSOperator(task_id=TASK_ID,
                             sql=SQL,
                             bucket=BUCKET,
                             filename=JSON_FILENAME,
                             export_format='CSV',
                             field_delimiter='|')
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.sql, SQL)
     self.assertEqual(op.bucket, BUCKET)
     self.assertEqual(op.filename, JSON_FILENAME)
     self.assertEqual(op.export_format, 'csv')
     self.assertEqual(op.field_delimiter, '|')
示例#9
0
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MySQLToGCSOperator(
         task_id=TASK_ID,
         sql=SQL,
         bucket=BUCKET,
         filename=JSON_FILENAME,
         export_format='CSV',
         field_delimiter='|',
     )
     assert op.task_id == TASK_ID
     assert op.sql == SQL
     assert op.bucket == BUCKET
     assert op.filename == JSON_FILENAME
     assert op.export_format == 'csv'
     assert op.field_delimiter == '|'
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from airflow import models
from airflow.providers.google.cloud.transfers.mysql_to_gcs import MySQLToGCSOperator
from airflow.utils import dates

GCS_BUCKET = os.environ.get("GCP_GCS_BUCKET", "example-airflow-mysql-gcs")
FILENAME = 'test_file'

SQL_QUERY = "SELECT * from test_table"

with models.DAG(
        'example_mysql_to_gcs',
        default_args=dict(start_date=dates.days_ago(1)),
        schedule_interval=None,
        tags=['example'],
) as dag:
    # [START howto_operator_mysql_to_gcs]
    upload = MySQLToGCSOperator(task_id='mysql_to_gcs',
                                sql=SQL_QUERY,
                                bucket=GCS_BUCKET,
                                filename=FILENAME,
                                export_format='csv')
    # [END howto_operator_mysql_to_gcs]