rename = S3KeyRenameOperator(
        task_id='{0}_rename'.format(table['name']),
        s3_conn_id='s3',
        s3_bucket='jessecarah',  # refactor to use meta data from connection
        table=table['name'],
        since=since,
        dag=dag)

    load = S3ToRedshiftOperator(
        task_id='{0}_load'.format(table['name']),
        s3_conn_id='s3',
        s3_bucket='jessecarah',
        s3_key='{0}/{1}/{0}.csv'.format(table['name'], since),
        load_type=table['replication'],
        redshift_conn_id='redshift',
        redshift_schema='airflow',
        table=table['name'],
        primary_key='id' if not table['name'] == 'result_maker' else None,
        copy_params=[
            "COMPUPDATE OFF", "STATUPDATE OFF", "FORMAT as CSV",
            "TIMEFORMAT 'auto'", "BLANKSASNULL", "TRUNCATECOLUMNS",
            "region as 'us-east-1'", "IGNOREHEADER 1"
        ],
        origin_schema='../templates/{0}_schema.json'.format(table['name']),
        schema_location='local',
        incremental_key='id' if table['replication'] == 'upsert' else None,
        dag=dag)

    skip_check >> s3_cleanup >> build_schedule >> sense_s3_key >> rename >> load
Exemplo n.º 2
0
    upload_S3_product_category_name_translation_3 = PythonOperator(
        task_id='upload_S3_product_category_name_translation_3',
        python_callable=upload_file_to_S3_with_hook,
        op_kwargs={
            'filename':
            '/usr/local/airflow/dags/airflow/data_files_split/product_category_name_translation_part_3.csv',
            'key': 'product_category_name_translation_3.csv',
            'bucket_name': 'brazillian-e-commerce-data-update'
        })

    load_customers = S3ToRedshiftOperator(
        task_id="load_customers",
        redshift_conn_id="redshift_conn",
        table="customers_staging",
        s3_bucket="brazillian-e-commerce-data-update",
        s3_path="customers",
        s3_access_key_id=aws_access_key_id,
        s3_secret_access_key=aws_secret_access_key,
        delimiter=",",
        region="ca-central-1")

    load_geolocation = S3ToRedshiftOperator(
        task_id="load_geolocation",
        redshift_conn_id="redshift_conn",
        table="geolocation_staging",
        s3_bucket="brazillian-e-commerce-data-update",
        s3_path="geolocation",
        s3_access_key_id=aws_access_key_id,
        s3_secret_access_key=aws_secret_access_key,
        delimiter=",",
        region="ca-central-1")
Exemplo n.º 3
0
dag = DAG('redshift-demo',
  default_args=default_args,
  schedule_interval='@once'
)

upsert = RedshiftUpsertOperator(
  task_id='upsert',
  src_redshift_conn_id="my_redshift",
  dest_redshift_conn_id="my_redshift",
  src_table="stage_customer",
  dest_table="customer",
  src_keys=["id"],
  dest_keys=["id"],
  dag = dag
)
 
load = S3ToRedshiftOperator(
  task_id="load",
  redshift_conn_id="my_redshift",
  table="stage_customer",
  s3_bucket="bucket_name",
  s3_path="new_data.csv",
  s3_access_key_id="key",
  s3_secret_access_key="key",
  delimiter=",",
  region="us-east-1",
  dag=dag
)
 
load >> upsert
import datetime as dt

from airflow import DAG
from airflow.operators.redshift_load_plugin import S3ToRedshiftOperator

default_args = {
    'owner': 'me',
    'start_date': dt.datetime(2019, 10, 8),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

dag = DAG('S3-RS-COPY-demo',
          default_args=default_args,
          schedule_interval='@once')

s3load = S3ToRedshiftOperator(task_id="s3load",
                              redshift_conn_id="test_rs_conn",
                              iam_role="arn:aws:iam::1234:role/testRole",
                              region="us-west-1",
                              s3_path="s3://account/20191004/stg_account.csv",
                              delimiter=",",
                              staging_table="stg_account",
                              format_as_json="auto",
                              dag=dag)

s3load
Exemplo n.º 5
0
default_args = {
    'owner': 'me',
    'start_date': dt.datetime(2019, 10, 8),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

dag = DAG('redshift-load-demo',
          default_args=default_args,
          schedule_interval='@once')

s3load_user = S3ToRedshiftOperator(
    task_id="s3load_user",
    redshift_conn_id="test_rs_conn",
    iam_role="arn:aws:iam::1234:role/testRole",
    region="us-west-1",
    s3_path="s3://kwiff_user/20191008/users.csv",
    delimiter=",",
    staging_table="stg_user",
    format_as_json="auto",
    dag=dag)

s3load_user_tag = S3ToRedshiftOperator(
    task_id="s3load_user_tag",
    redshift_conn_id="test_rs_conn",
    iam_role="arn:aws:iam::1234:role/testRole",
    region="us-west-1",
    s3_path="s3://kwiff_user/20191008/user_tags.csv",
    delimiter=",",
    staging_table="stg_user_tags",
    format_as_json="auto",
    dag=dag)
Exemplo n.º 6
0
dag = DAG('redshift-demo',
  default_args=default_args,
  schedule_interval='@once'
)

upsert = RedshiftUpsertOperator(
  task_id='upsert',
  src_redshift_conn_id="pc_redshift",
  dest_redshift_conn_id="pc_redshift",
  src_table="stage_customer",
  dest_table="customer",
  src_keys=["id"],
  dest_keys=["id"],
  dag = dag
)
 
load = S3ToRedshiftOperator(
  task_id="load",
  redshift_conn_id="pc_redshift",
  table="stage_customer",
  s3_bucket="vid-airflow-source-data",
  s3_path="customer.csv",
  s3_access_key_id="key",
  s3_secret_access_key="key",
  delimiter=",",
  region="us-east-1",
  dag=dag
)
 
load >> upsert