Exemplo n.º 1
0
    def test_file_splitting(self, gcs_hook_mock_class, mssql_hook_mock_class):
        """Test that ndjson is split by approx_max_file_size_bytes param."""
        mssql_hook_mock = mssql_hook_mock_class.return_value
        mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value
        expected_upload = {
            JSON_FILENAME.format(0): b''.join(NDJSON_LINES[:2]),
            JSON_FILENAME.format(1): NDJSON_LINES[2],
        }

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False):
            assert BUCKET == bucket
            assert 'application/json' == mime_type
            assert GZIP == gzip
            with open(tmp_filename, 'rb') as file:
                assert expected_upload[obj] == file.read()

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MSSQLToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            approx_max_file_size_bytes=len(expected_upload[JSON_FILENAME.format(0)]),
        )
        op.execute(None)
Exemplo n.º 2
0
    def test_exec_success_json(self, gcs_hook_mock_class, mssql_hook_mock_class):
        """Test successful run of execute function for JSON"""
        op = MSSQLToGCSOperator(
            task_id=TASK_ID, mssql_conn_id=MSSQL_CONN_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME
        )

        mssql_hook_mock = mssql_hook_mock_class.return_value
        mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False):
            assert BUCKET == bucket
            assert JSON_FILENAME.format(0) == obj
            assert 'application/json' == mime_type
            assert GZIP == gzip
            with open(tmp_filename, 'rb') as file:
                assert b''.join(NDJSON_LINES) == file.read()

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mssql_hook_mock_class.assert_called_once_with(mssql_conn_id=MSSQL_CONN_ID)
        mssql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
Exemplo n.º 3
0
    def test_schema_file(self, gcs_hook_mock_class, mssql_hook_mock_class):
        """Test writing schema files."""
        mssql_hook_mock = mssql_hook_mock_class.return_value
        mssql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mssql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):  # pylint: disable=unused-argument
            if obj == SCHEMA_FILENAME:
                with open(tmp_filename, 'rb') as file:
                    self.assertEqual(b''.join(SCHEMA_JSON), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MSSQLToGCSOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            schema_filename=SCHEMA_FILENAME)
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, gcs_hook_mock.upload.call_count)
Exemplo n.º 4
0
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MSSQLToGCSOperator(task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME)
     assert op.task_id == TASK_ID
     assert op.sql == SQL
     assert op.bucket == BUCKET
     assert op.filename == JSON_FILENAME
Exemplo n.º 5
0
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MSSQLToGCSOperator(task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME)
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.sql, SQL)
     self.assertEqual(op.bucket, BUCKET)
     self.assertEqual(op.filename, JSON_FILENAME)
Exemplo n.º 6
0
# specific language governing permissions and limitations
# under the License.
import os
from datetime import datetime

from airflow import models
from airflow.providers.google.cloud.transfers.mssql_to_gcs import MSSQLToGCSOperator

GCS_BUCKET = os.environ.get("GCP_GCS_BUCKET", "example-airflow")
FILENAME = 'test_file'

SQL_QUERY = "USE airflow SELECT * FROM Country;"

with models.DAG(
        'example_mssql_to_gcs',
        schedule_interval='@once',
        start_date=datetime(2021, 12, 1),
        catchup=False,
        tags=['example'],
) as dag:
    # [START howto_operator_mssql_to_gcs]
    upload = MSSQLToGCSOperator(
        task_id='mssql_to_gcs',
        mssql_conn_id='airflow_mssql',
        sql=SQL_QUERY,
        bucket=GCS_BUCKET,
        filename=FILENAME,
        export_format='csv',
    )
    # [END howto_operator_mssql_to_gcs]