Exemplo n.º 1
0
 def test_transform_load_operator_save_to_json_func_handles_error(
     self, postgresql, tmp_path: Path
 ):
     """Test if save_to_json function can handle Expections thrown."""
     postgres_master_config = PostgresConfig('postgres_default')
     task = LoadToJsonOperator(
         task_id='test2',
         postgres_config=postgres_master_config,
         query=SqlQueries.select_all_query_to_json,
         path=tmp_path,
         tables=['ports']
     )
     cursor = postgresql.cursor()
     file = tmp_path / 'foo.json'
     with raises(Exception):
         task.save_to_json(cursor=cursor, key='foo', file_name=file)
Exemplo n.º 2
0
    def test_transform_load_operator_save_to_json_func(
        self, postgresql, tmp_path: Path
    ):
        """Test if save_to_json function can save a json file given cursor."""
        postgres_master_config = PostgresConfig('postgres_default')
        task = LoadToJsonOperator(
            task_id='test2',
            postgres_config=postgres_master_config,
            query=SqlQueries.select_all_query_to_json,
            path=tmp_path,
            tables=['ports']
        )
        expected_data = {
            'id': 1,
            'countryName': 'Philippines',
            'portName': 'Aleran/Ozamis',
            'unlocode': 'PH ALE',
            'coordinates': '4234N 00135E',
            'staging_id': uuid.uuid4().__str__()
        }
        insert_query = SqlQueries.ports_table_insert.format(
            updated_at=datetime.datetime.utcnow()
        )
        select_query = SqlQueries.select_all_query_to_json.format(
            table='ports'
        )

        # Insert the test data
        cursor = postgresql.cursor(cursor_factory=RealDictCursor)
        cursor.execute(insert_query, expected_data)
        postgresql.commit()

        # Execute to store the data in the memory pool
        cursor.execute(select_query)

        # Save to json file and assert the file data
        file = tmp_path / 'ports.json'
        task.save_to_json(cursor=cursor, key='ports', file_name=file)
        with open(file, "r") as f:
            result = json.load(f)
            assert 'ports' in result
            result = result['ports'][0]

        # Assert expectations
        expected_data.pop('staging_id')
        assert result == expected_data
Exemplo n.º 3
0
    def test_data_quality_operator_closes_db_conn_after_use(
        self, mocker, postgresql, ports_collection, test_dag,
        tmp_path: Path
    ):
        """Test if operator will close the database connection after use."""
        # Create PSQL mock object
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        # Make sure that the source table has an item in it
        pg_hook = PostgresHook()
        conn = pg_hook.get_conn()
        cursor = conn.cursor(cursor_factory=RealDictCursor)
        query = TransformAndLoadOperator._format_query(
            SqlQueries.ports_table_insert,
            {"updated_at": datetime.datetime.utcnow()}
        )
        data = {
            'id': 1,
            'countryName': 'Philippines',
            'portName': 'Aleran/Ozamis',
            'unlocode': 'PH ALE',
            'coordinates': '4234N 00135E',
            'staging_id': uuid.uuid4().__str__()
        }
        cursor.execute(query, data)

        # Define the data quality checker
        postgres_master_config = PostgresConfig('postgres_default')
        check_data_quality = DataQualityCheckOperator(
            tables=['ports'],
            postgres_config=postgres_master_config,
            task_id='test',
            queries={
                "ports_row_count": SqlQueries.ports_row_count,
                "ports_updated_count": SqlQueries.ports_updated_count
            },
            dag=test_dag
        )
        check_data_quality.execute(
            context={}, testing=False
        )
        assert conn.closed
    def test_transform_load_operator(
        self, mocker, postgresql, ports_collection, test_dag
    ):
        """Test if transform_load_operator upserts data into master db."""
        # Create mocks
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        mocker.patch.object(
            MongoHook,
            "get_collection",
            return_value=ports_collection
        )

        # Check if the source table has an item in it
        mongo_hook = MongoHook()
        collection = mongo_hook.get_collection()
        assert collection.count_documents({}) > 0

        # Check if the sink table is initially empty
        cursor = postgresql.cursor()
        cursor.execute("SELECT COUNT(*) FROM ports;")
        initial_result = cursor.fetchone()[0]
        assert initial_result == 0

        # Setup task
        mongo_staging_config = MongoConfig('mongo_default', 'ports')
        postgres_master_config = PostgresConfig('postgres_default')
        task = TransformAndLoadOperator(
            mongo_config=mongo_staging_config,
            postgres_config=postgres_master_config,
            task_id='test',
            processor=PortsItemProcessor(),
            query=SqlQueries.ports_table_insert,
            query_params={"updated_at": datetime.datetime.utcnow()},
            dag=test_dag
        )

        # Execute task and check if it inserted the data successfully
        task.execute(context={}, testing=True)
        cursor.execute("SELECT COUNT(*) FROM ports;")
        after_result = cursor.fetchone()[0]
        assert after_result > 0
    def test_transform_load_operator_database_error(
        self, mocker, postgresql, ports_collection, test_dag
    ):
        """Test if transform_load_operator handles DB errors."""
        # Create mocks
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        mocker.patch.object(
            MongoHook,
            "get_collection",
            return_value=ports_collection
        )

        # Check if the source table has an item in it
        mongo_hook = MongoHook()
        collection = mongo_hook.get_collection()
        assert collection.count_documents({}) > 0

        # Setup task, intentionally give an unknown table
        mongo_staging_config = MongoConfig('mongo_default', 'ports')
        postgres_master_config = PostgresConfig('postgres_default')
        task = TransformAndLoadOperator(
            mongo_config=mongo_staging_config,
            postgres_config=postgres_master_config,
            task_id='test',
            processor=PortsItemProcessor(),
            query=SqlQueries.ports_table_insert.replace(
                'ports', 'ports_wrong'
            ),
            query_params={"updated_at": datetime.datetime.utcnow()},
            dag=test_dag
        )

        # Execute the task and check if it will raise an UndefinedTable error
        with raises((UndefinedTable, Exception, OperationalError)):
            # Set testing to false to implicitly close the database
            task.execute(context={}, testing=False)
            task.execute(context={}, testing=True)
    def test_transform_load_operator_exception_error(
        self, mocker, postgresql, ports_collection, test_dag
    ):
        """Test if transform_load_operator handles Exception thrown."""
        # Create mocks
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        mocker.patch.object(
            MongoHook,
            "get_collection",
            return_value=ports_collection
        )

        # Check if the source table has an item in it
        mongo_hook = MongoHook()
        collection = mongo_hook.get_collection()
        assert collection.count_documents({}) > 0

        # Setup task
        mongo_staging_config = MongoConfig('mongo_default', 'ports')
        postgres_master_config = PostgresConfig('postgres_default')
        task = TransformAndLoadOperator(
            mongo_config=mongo_staging_config,
            postgres_config=postgres_master_config,
            task_id='test',
            processor=PortsItemProcessor(),
            query='Wrong SQL query',
            dag=test_dag
        )

        # Execute task and check if it will raise an Exception error
        with raises(Exception):
            task.execute(context={}, testing=True)
Exemplo n.º 7
0
    def test_save_to_json_operator(
        self, mocker, postgresql, ports_collection, test_dag,
        tmp_path: Path
    ):
        """Test if save_to_json_operator saves the file on a specified path"""
        # Create mocks
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        mocker.patch.object(
            MongoHook,
            "get_collection",
            return_value=ports_collection
        )

        # Check if the source table has an item in it
        mongo_hook = MongoHook()
        collection = mongo_hook.get_collection()
        assert collection.count_documents({}) > 0

        # Setup some data, transfer staging data to master
        mongo_staging_config = MongoConfig('mongo_default', 'ports')
        postgres_master_config = PostgresConfig('postgres_default')
        transform_load = TransformAndLoadOperator(
            mongo_config=mongo_staging_config,
            postgres_config=postgres_master_config,
            task_id='test',
            processor=PortsItemProcessor(),
            query=SqlQueries.ports_table_insert,
            query_params={"updated_at": datetime.datetime.utcnow()},
            dag=test_dag
        )

        # Execute task and check if it inserted the data successfully
        transform_load.execute(context={}, testing=True)
        pg_hook = PostgresHook()
        cursor = pg_hook.get_conn().cursor()
        cursor.execute("SELECT COUNT(*) FROM ports;")
        after_result = cursor.fetchone()[0]
        assert after_result > 0

        # Alter tmp_path to forcesively create a path
        tmp_path = tmp_path / 'unknown-path'

        # Execute save_to_json to save the data into json file on tmp_path
        save_to_json = LoadToJsonOperator(
            task_id='export_to_json',
            postgres_config=postgres_master_config,
            query=SqlQueries.select_all_query_to_json,
            path=tmp_path,
            tables=['ports'],
            dag=test_dag
        )
        save_to_json.execute(
            {'execution_date': datetime.datetime(2021, 1, 1)}
        )

        output_path = tmp_path / 'ports_20210101T000000.json'

        expected_data = {
            'ports': [{
                'id': 1,
                'countryName': 'Philippines',
                'portName': 'Aleran/Ozamis',
                'unlocode': 'PH ALE',
                'coordinates': '4234N 00135E'
            }]
        }

        # Read result
        with open(output_path, "r") as f:
            result = json.load(f)

        # Assert
        assert 'ports' in result
        assert result == expected_data
Exemplo n.º 8
0
    def test_save_to_json_operator_database_error(
        self, mocker, postgresql, ports_collection, test_dag,
        tmp_path: Path
    ):
        """Test if save_to_json_operator can handle errors related to db."""
        # Create mocks
        mocker.patch.object(
            PostgresHook,
            "get_conn",
            return_value=postgresql
        )
        mocker.patch.object(
            MongoHook,
            "get_collection",
            return_value=ports_collection
        )

        # Check if the source table has an item in it
        mongo_hook = MongoHook()
        collection = mongo_hook.get_collection()
        assert collection.count_documents({}) > 0

        # Setup some data, transfer staging data to master
        mongo_staging_config = MongoConfig('mongo_default', 'ports')
        postgres_master_config = PostgresConfig('postgres_default')
        transform_load = TransformAndLoadOperator(
            mongo_config=mongo_staging_config,
            postgres_config=postgres_master_config,
            task_id='test',
            processor=PortsItemProcessor(),
            query=SqlQueries.ports_table_insert,
            query_params={"updated_at": datetime.datetime.utcnow()},
            dag=test_dag
        )

        # Execute task and check if it inserted the data successfully
        transform_load.execute(context={}, testing=True)
        pg_hook = PostgresHook()
        cursor = pg_hook.get_conn().cursor()
        cursor.execute("SELECT COUNT(*) FROM ports;")
        after_result = cursor.fetchone()[0]
        assert after_result > 0

        # Execute save_to_json to save the data into json file on tmp_path
        save_to_json = LoadToJsonOperator(
            task_id='test2',
            postgres_config=postgres_master_config,
            query=SqlQueries.select_all_query_to_json,
            path=tmp_path,
            tables=['foo'],
            dag=test_dag
        )
        with raises((UndefinedTable, OperationalError, Exception)):
            # Set testing = False to implicitly close the database connection
            save_to_json.execute(
                {'execution_date': datetime.datetime(2021, 1, 1)},
                testing=False
            )
            save_to_json.execute(
                {'execution_date': datetime.datetime(2021, 1, 1)},
                testing=True
            )
Exemplo n.º 9
0
from airflow.models import Variable
from airflow.operators.dummy import DummyOperator
from airflow.operators.bash import BashOperator
from airflow.operators.email import EmailOperator
from airflow.providers.slack.operators.slack import SlackAPIPostOperator
from custom_operators.stage_operator import StageDatatoMongodbOperator
from custom_operators.data_quality_operator import DataQualityCheckOperator
from custom_operators.save_json_operator import LoadToJsonOperator
from custom_operators.transform_load_operator import TransformAndLoadOperator
from helpers.configs.postgres.postgres_config import PostgresConfig
from helpers.configs.mongo.mongo_config import MongoConfig
from helpers.lib.data_processors import PortsItemProcessor
from helpers.lib.sql_queries import SqlQueries

mongo_staging_config = MongoConfig('mongo_staging', 'ports')
postgres_master_config = PostgresConfig('postgres_master')

default_args = {
    'owner': 'airflow',
    'start_date': datetime.datetime(2020, 1, 1),
    'depends_on_past': False,
    'email_on_failure': True,
    'email': '*****@*****.**',
    'retries': 5,
    'retry_delay': datetime.timedelta(minutes=10)
}

dag = DAG(dag_id='unece_dag',
          schedule_interval='@daily',
          default_args=default_args,
          catchup=False)