Пример #1
0
    table="DIM_PROPERTIES",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials")
create_dim_properties_table.set_upstream(MID_operator)

##Loading modified events into STG Tables - DAGS

load_dim_hosts_table = LoadDimensionOperator(
    task_id='LOAD_DIM_HOSTS_TABLE',
    dag=dag,
    query=SqlQueries.hosts_table_insert,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    operation="insert",
    table="DIM_HOSTS")
load_dim_hosts_table.set_upstream(create_dim_hosts_table)

load_dim_reviews_table = LoadDimensionOperator(
    task_id='LOAD_DIM_REVIEWS_TABLE',
    dag=dag,
    query=SqlQueries.reviews_table_insert,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    operation="insert",
    table="DIM_REVIEWS")
load_dim_reviews_table.set_upstream(create_dim_reviews_table)

load_dim_properties_table = LoadDimensionOperator(
    task_id='LOAD_DIM_PROPERTIES_TABLE',
    dag=dag,
    query=SqlQueries.properties_table_insert,
    dag=dag,
    postgres_conn_id="redshift",
    sql=SqlQueries.create_dim_properties)
create_table_dim_properties.set_upstream(MID_operator)

##Loading events into STG Tables - DAGS

load_table_dim_hosts = LoadDimensionOperator(
    task_id='LOAD_TABLE_DIM_HOSTS',
    provided_context=True,
    dag=dag,
    redshift_conn_id='redshift',
    query=SqlQueries.hosts_table_insert,
    operation='insert',
    table='DIM_HOSTS')
load_table_dim_hosts.set_upstream(create_table_dim_hosts)

load_table_dim_calendars = LoadDimensionOperator(
    task_id='LOAD_TABLE_DIM_CALENDARS',
    provided_context=True,
    dag=dag,
    redshift_conn_id='redshift',
    query=SqlQueries.calendars_table_insert,
    operation='insert',
    table='DIM_CALENDARS')
load_table_dim_calendars.set_upstream(create_table_dim_calendars)

load_table_dim_reviews = LoadDimensionOperator(
    task_id='LOAD_TABLE_DIM_REVIEWS',
    provided_context=True,
    dag=dag,
load_time_dimension_table = LoadDimensionOperator(
    task_id='Load_time_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="time",
    query=SqlQueries.time_table_insert,
    append_flag=False)

run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         dq_checks=dq_checks)

end_operator = DummyOperator(task_id='Stop_execution', dag=dag)

#Dependencies
stage_events_to_redshift.set_upstream([start_operator])
stage_songs_to_redshift.set_upstream([start_operator])
load_songplays_table.set_upstream(
    [stage_events_to_redshift, stage_songs_to_redshift])
load_user_dimension_table.set_upstream([load_songplays_table])
load_song_dimension_table.set_upstream([load_songplays_table])
load_artist_dimension_table.set_upstream([load_songplays_table])
load_time_dimension_table.set_upstream([load_songplays_table])
run_quality_checks.set_upstream([
    load_user_dimension_table, load_song_dimension_table,
    load_artist_dimension_table, load_time_dimension_table
])
end_operator.set_upstream([run_quality_checks])