stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    target_table='staging_songs',
    s3_bucket='udacity-dend',
    s3_key='song_data/{year}/{month}/{full_date}-events.json',
    format_option='json_path',
    provide_context=True
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    target_table='songplays',
    sql_query=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    target_table='users',
    sql_query=SqlQueries.user_table_insert,
    delete_records_before_load=False,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
示例#2
0
    stage_songs_to_redshift = StageToRedshiftOperator(
        task_id='load_songs_from_s3_to_redshift',
        redshift_conn_id='redshift',
        aws_credentials_id='aws_credentials',
        region_name="us-west-2",
        s3_bucket='{{ var.json.s3.bucket }}',
        s3_key='{{ var.json.s3.song_key }}',
        table_name='staging_songs',
        json_format="auto",
        dag=dag)

    load_songplays_table = LoadFactOperator(
        task_id='Load_songplays_fact_table',
        redshift_conn_id='redshift',
        load_mode=LoadMode.TRUNCATE,
        clear_table_sql=SqlQueries.clear_table.format("songplays"),
        load_data_sql=SqlQueries.songplay_table_insert,
        dag=dag)

    load_user_dimension_table = LoadDimensionOperator(
        task_id='Load_user_dim_table',
        redshift_conn_id='redshift',
        load_mode=LoadMode.TRUNCATE,
        clear_table_sql=SqlQueries.clear_table.format("users"),
        load_data_sql=SqlQueries.user_table_insert,
        dag=dag)

    load_song_dimension_table = LoadDimensionOperator(
        task_id='Load_song_dim_table',
        redshift_conn_id='redshift',
示例#3
0
    task_id='Stage_dependencies',
    provide_context=True,
    dag=dag)

stage_projects_to_redshift = StageToRedshiftOperator(
    aws_conn_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_projects',
    s3_addr='s3://dend-capstone-lkj/projects_with_repository.csv',
    task_id='Stage_projects',
    provide_context=True,
    dag=dag)

load_repositories_fact_table = LoadFactOperator(
    redshift_conn_id='redshift',
    table='repository_fact',
    task_id='Load_repositories_fact_table',
    provide_context=True,
    dag=dag)

load_projects_dimension_table = LoadDimensionOperator(
    redshift_conn_id='redshift',
    table='project_dim',
    task_id='Load_projects_dim_table',
    provide_context=True,
    dag=dag)

load_versions_dimension_table = LoadDimensionOperator(
    redshift_conn_id='redshift',
    table='version_dim',
    task_id='Load_versions_dim_table',
    provide_context=True,
示例#4
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='stage_songs',
    dag=dag,
    create_table_sql=SqlQueries.create_songs_stage,
    stage_table_sql=SqlQueries.staging_table_copy_template,
    redshift_conn_id="redshift",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    table="songs_stage",
    s3_region="us-west-2")

load_songplays_table = LoadFactOperator(
    task_id='load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    create_table_sql=SqlQueries.create_songplays,
    insert_table_sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    create_table_sql=SqlQueries.create_users,
    insert_table_sql=SqlQueries.user_table_insert,
    delete_existing_records=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='load_song_dim_table',
    redshift_conn_id='redshift',
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='staging_songs',
    aws_credentials_id='aws_credentials',
    s3_key='song_data/',
    s3_bucket='udacity-dend',
    redshift_conn_id='redshift',
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    sql_stmt=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='users',
    load_option='delete-load',
    sql_stmt=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    task_id='Stage_songs',
    dag=dag,
    table='staging_songs',
    time_format='epochmillisecs',
    region='us-west-2',
    format_type='auto',
    s3_bucket='udacity-dend',
    s3_key='song_data/A/A/A/',
    use_partitioning=False,
    execution_date='{{ execution_date }}',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id='redshift',
                                        table='songplays',
                                        append_data='False',
                                        sql=SqlQueries.songplay_table_insert)

user_table_task_id = "Load_user_dim_table"
load_user_dim_task = SubDagOperator(subdag=load_dimension_tables_dag(
    "sparkify_etl_dag",
    user_table_task_id,
    "redshift",
    "users",
    "False",
    SqlQueries.user_table_insert,
    start_date=start_date),
                                    task_id=user_table_task_id,
                                    dag=dag)
示例#7
0
    s3_key="log_data",
    path="s3://udacity-dend/log_json_path.json",
    dag=dag)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    dag=dag)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    redshift_conn_id="redshift",
    table="songplays",
    sql_stmt=SqlQueries.songplay_table_insert,
    dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    table="songs",
    sql_stmt=SqlQueries.song_table_insert,
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
    table="users",
    sql_stmt=SqlQueries.user_table_insert,
示例#8
0
    table='staging_events',
    copy_options="JSON 's3://udacity-dend/log_json_path.json'")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    s3_bucket='udacity-dend',
    s3_prefix='song_data',
    table='staging_songs',
    copy_options="FORMAT AS JSON 'auto'")

# Insert fact and dimension tables

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    select_sql=SqlQueries.songplays_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_users_dim_table',
    dag=dag,
    table='users',
    select_sql=SqlQueries.users_table_insert,
    mode='truncate')

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_songs_dim_table',
    dag=dag,
    table='songs',
    select_sql=SqlQueries.songs_table_insert,