示例#1
0
# Global variables
bqproject = 'usage-data-reporting'
datasetenv = 'DEV'

# Create DAG
dag = DAG(ENVIRONMENTS['dev']['dag-name'],
          default_args=DEFAULT_ARGS,
          schedule_interval=schedule_interval,
          description='CK DAG GBQ Test')

MOVE_LDZ_DATA_TO_DWH = BigQueryOperator(
    dag=dag,
    task_id='CK_GBQ_TEST_TASK_01',
    sql='Tests/ckgbqtest.sql',
    params={
        "project": bqproject,
        "environment": datasetenv
    },
    # destination_dataset_table=bqproject + '.' + datasetenv + '_DWH_GBQ_Test.CK_GBQ_Test', # Zieltabelle
    write_disposition=
    'WRITE_APPEND',  # Specify the write disposition truncate or write_append
    use_legacy_sql=False,
    bigquery_conn_id=ENVIRONMENTS['dev']['connection-id'])

# Referenziert auf den dag - name (s.o.) - liefert eine Beschreibung mit.
MOVE_LDZ_DATA_TO_DWH.doc_md = """Write data from LDZ to DWH"""

# Define how the different steps in the workflow are executed

MOVE_LDZ_DATA_TO_DWH
示例#2
0
        task_id='my_bq_task_1_' +
        lob,  # task id's must be uniqe within the dag
        bql=
        'my_qry_1.sql',  # the actual sql command we want to run on bigquery is in this file in the same folder. it is also templated
        params={
            "lob": lob
        },  # the sql file above have a template in it for a 'lob' paramater - this is how we pass it in
        destination_dataset_table='airflow.' + lob +
        '_test_task1',  # we also in this example want our target table to be lob and task specific
        write_disposition=
        'WRITE_TRUNCATE',  # drop and recreate this table each time, you could use other options here
        bigquery_conn_id=
        'my_gcp_connection'  # this is the airflow connection to gcp we defined in the front end. More info here: https://github.com/alexvanboxel/airflow-gcp-examples
    )
    # add documentation for what this task does - this will be displayed in the Airflow UI
    bq_task_1.doc_md = """\
    Append a "Hello World!" message string to the table [airflow.<lob>_test_task1]
    """

    # define the second task, in our case another big query operator
    bq_task_2 = BigQueryOperator(
        dag=
        dag,  # need to tell airflow that this task belongs to the dag we defined above
        task_id='my_bq_task_2_' +
        lob,  # task id's must be uniqe within the dag
        bql=
        'my_qry_2.sql',  # the actual sql command we want to run on bigquery is in this file in the same folder. it is also templated
        params={
            "lob": lob
        },  # the sql file above have a template in it for a 'lob' paramater - this is how we pass it in
        destination_dataset_table='airflow.' + lob +