load_table_fact_airbnb_austin_la = LoadFactOperator( task_id='Load_Fact_Airbnb_Austin_LA_Table', provided_context=True, dag=dag, aws_credentials_id='aws_credentials', redshift_conn_id='redshift', query=SqlQueries.load_fact_airbnb_austin_la_insert, operation='insert', table='FACT_AIRBNB_AUSTIN_LA') create_table_fact_airbnb.set_upstream(load_table_dim_hosts) create_table_fact_airbnb.set_upstream(load_table_dim_properties) create_table_fact_airbnb.set_upstream(load_table_dim_calendars) create_table_fact_airbnb.set_upstream(load_table_dim_reviews) load_table_fact_airbnb_austin_la.set_upstream(create_table_fact_airbnb) ## RUN DATA QUALITY CHECKS TO ENSURE Recors have been moved correctly through platforms without any errors run_quality_checks = DataQualityOperator(task_id='Run_DATA_QUALITY_CHECKS', dag=dag, provide_context=True, redshift_conn_id='redshift', tables=[ 'DIM_HOSTS', 'DIM_REVIEWS', 'DIM_CALENDARS', 'DIM_PROPERTIES', 'FACT_AIRBNB_AUSTIN_LA' ]) run_quality_checks.set_upstream(load_table_fact_airbnb_austin_la) end_operator = DummyOperator(task_id='END_TASK', dag=dag)
load_time_dimension_table = LoadDimensionOperator( task_id='Load_time_dim_table', table_target='time', dag=dag, redshift_connection_id='redshift', query=SqlQueries.time_table_insert, truncate_before=True) run_quality_checks = DataQualityOperator( task_id='Run_data_quality_checks', dag=dag, redshift_connection_id='redshift', tables=['songplays', 'users', 'songs', 'artists', 'time']) end_operator = DummyOperator(task_id='Stop_execution', dag=dag) start_operator.set_downstream( [stage_events_to_redshift, stage_songs_to_redshift]) load_songplays_table.set_upstream( [stage_events_to_redshift, stage_songs_to_redshift]) load_songplays_table.set_downstream([ load_song_dimension_table, load_user_dimension_table, load_artist_dimension_table, load_time_dimension_table ]) run_quality_checks.set_upstream([ load_song_dimension_table, load_user_dimension_table, load_artist_dimension_table, load_time_dimension_table ]) end_operator.set_upstream(run_quality_checks)
task_id='LOAD_DIM_CALENDARS_TABLE', dag=dag, query=SqlQueries.calendars_table_insert, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", operation="insert", table="DIM_CALENDARS") load_dim_calendars_table.set_upstream(create_dim_calendars_table) create_load_fact_airbnb_amst_table = LoadFactOperator( task_id='Create_Load_FACT_AIRBNB_AMST_Table', dag=dag, query=SqlQueries.CREATE_LOAD_FACT_AIRBNB_AMST, redshift_conn_id="redshift", aws_credentials_id="aws_credentials") create_load_fact_airbnb_amst_table.set_upstream(load_dim_hosts_table) create_load_fact_airbnb_amst_table.set_upstream(load_dim_reviews_table) create_load_fact_airbnb_amst_table.set_upstream(load_dim_properties_table) create_load_fact_airbnb_amst_table.set_upstream(load_dim_calendars_table) ##RUN DATA QULAITY CHECKS TO ENSURE THAT RECORDS HAD BEEN MOVED CORRECTLY THROUGH PLATFORMS WITHOUT ANY ERRORS run_quality_checks = DataQualityOperator(task_id='Run_DATA_QUALITY_Checks', dag=dag, redshift_conn_id="redshift") run_quality_checks.set_upstream(create_load_fact_airbnb_amst_table) ##DUMMY OPERATOR to indicate that the DAG has run successfully - DAG end_operator = DummyOperator(task_id='END_OPERATOR', dag=dag) end_operator.set_upstream(run_quality_checks)