Python PythonOperator.doc_md示例，airflow.operators.python_operator.PythonOperator.doc_md Python示例

示例#1

0

显示文件

    # [START load_function]
    def load(**kwargs):
        ti = kwargs['ti']
        total_value_string = ti.xcom_pull(task_ids='transform', key='total_order_value')
        total_order_value = json.loads(total_value_string)

        print(total_order_value)
    # [END load_function]

    # [START main_flow]
    extract_task = PythonOperator(
        task_id='extract',
        python_callable=extract,
    )
    extract_task.doc_md = """\
#### Extract task
A simple Extract task to get data ready for the rest of the data pipeline.
In this case, getting data is simulated by reading from a hardcoded JSON string.
This data is then put into xcom, so that it can be processed by the next task.
"""

    transform_task = PythonOperator(
        task_id='transform',
        python_callable=transform,
    )
    transform_task.doc_md = """\
#### Transform task
A simple Transform task which takes in the collection of order data from xcom
and computes the total order value.
This computed value is then put into xcom, so that it can be processed by the next task.

示例#2

0

显示文件

文件： example_1.py 项目： fossabot/docker-airflow

        dag_id,
        default_args=default_args(),
        schedule_interval="10 * * * *",
        start_date=datetime(2021, 1, 1,
                            tzinfo=pendulum.timezone("Asia/Tokyo")),
) as dag:
    dag.doc_md = __doc__

    start = DummyOperator(task_id="start")

    a = PythonOperator(
        task_id="a",
        params={},
        python_callable=task_sample,
    )
    a.doc_md = task_sample.__doc__

    b = BranchPythonOperator(
        task_id="b",
        params={},
        python_callable=task_branch,
    )

    c = DummyOperator(task_id="c")

    d = DummyOperator(task_id="d")

    e = ShortCircuitOperator(
        task_id="e",
        params={},
        trigger_rule="none_failed",

示例#3

0

显示文件

         echo There are $NUM_TO_PROCESS files to process.
         test $NUM_TO_PROCESS -gt 0
         '''),
     params={'product': product},
 )
 # Thanks https://stackoverflow.com/questions/48580341/how-to-add-manual-tasks-in-an-apache-airflow-dag
 manual_sign_off = PythonOperator(
     task_id=f"manual_sign_off_{product}",
     python_callable=task_to_fail,
     retries=1,
     retry_delay=TIMEOUT,
 )
 manual_sign_off.doc_md = dedent("""
         ## Instructions
         Perform some manual checks that the number of COGs to be generated seems to be about right.
         
         You can also do spot checks that files don't already exist in S3.
         
         Once you're happy, mark this job as **Success** for the DAG to continue running.
     """)
 submit_task_id = f'submit_cog_convert_job_{product}'
 submit_bulk_cog_convert = SSHOperator(
     task_id=submit_task_id,
     command=dedent(COMMON + """
         cd {{work_dir}}
         mkdir out
         
         qsub <<EOF
         #!/bin/bash
         #PBS -l wd,walltime=5:00:00,mem=190GB,ncpus=48,jobfs=1GB
         #PBS -P {{params.project}}
         #PBS -q {{params.queue}}

示例#4

0

显示文件

文件： data_clean.py 项目： jr4fs/query-data-preprocessing

    df.to_sql(f'clean_{tablename}', con, if_exists='replace', index=False)


for table in tables:
    clean_data = PythonOperator(
        task_id=f'clean_data_{table}',
        python_callable=clean_data_df,
        op_kwargs={'tablename': table},
        dag=dag,
    )
    load_data >> clean_data

# [START documentation]
dag.doc_md = __doc__

load_data.doc_md = """\
#### Load Data 
This task loads data from the csv files in the data directory (set as 
an environment variable DATA_DIR) into the database Airflow creates.
"""

read_data.doc_md = """\
#### Read Data 
This task does nothing. It demonstrates how to use the SQLite operator.
"""

clean_data.doc_md = """\
#### Clean Data 
This task removes a column with pandas. It demonstrates how to alter data 
and write it back into the same table.
"""

示例#5

0

显示文件

文件： arxiv_redshift_dag.py 项目： jbmadsen/ArXiv-Metadata-ETL

re_parse_authors_data = PythonOperator(
    task_id='re_parse_authors',
    dag=dag,
    provide_context=True,
    python_callable=helpers.load_authors,
    op_kwargs={
        'aws_credentials_id': 'aws_credentials',
        'redshift_connection_id': 'redshift',
        's3_credentials_id': 's3_credentials',
        'region': 'us-east-1',
        'bucket': 'arxiv-etl',
        'file_name': 'staging/authors/authors-parsed.json'
    },
)
re_parse_authors_data.doc_md = """
# Parses data from S3 locally and re-formats it to easily work with Redshift COPY, then saves it back to S3
"""

stage_authors_to_redshift = StageFromS3ToRedshiftOperator(
    task_id='stage_authors',
    dag=dag,
    provide_context=True,
    table="staging.authors",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="arxiv-etl",
    s3_key="staging/authors/authors_parsed.csv",
    region="us-east-1",
    file_type="csv")
stage_authors_to_redshift.doc_md = """

示例#6

0

显示文件

    dag=math_dag
)

t2 = PythonOperator(
    task_id="subtraction_task",
    python_callable=sub_nos,
    depends_on_past=False,
    retries=3,
    dag=math_dag
)

square_task = PythonOperator(
    task_id="square_task",
    python_callable=square_no,
    depends_on_past=True,
    retries=3,
    dag=math_dag
)

math_dag.doc_md = __doc__

t1.doc_md = """\
#### Addition Task Documentation
A simple task to add two numbers
![miztiik-success-green](https://img.shields.io/badge/Miztiik:Automation:Airflow:Level-300-blue)
"""

# Configure Task Dependencies
t1 >> t2
t1 >> square_task

示例#7

0

显示文件

文件： house_prices.py 项目： jsmithdataanalytics/house_price_tracker

    task_id='get_listings',
    python_callable=get_listings,
    dag=dag,
)

t2 = PythonOperator(
    task_id='send_email',
    provide_context=True,
    python_callable=send_email,
    dag=dag,
)

# noinspection PyStatementEffect
t1 >> t2

# Documentation
dag.doc_md = f"""
#### DAG Documentation
{dag.description}
"""

t1.doc_md = """
#### Task Documentation
Retrieves and stores Zoopla data
"""

t2.doc_md = """
#### Task Documentation
Sends email notification when new data is available
"""

示例#8

0

显示文件

文件： covid_daily.py 项目： abhijeetanandshah6997/airflow_covid19_daily_india

    dag=dag,
)

t2 = PythonOperator(
    task_id='Fetch_Data_and_Create_CSV',
    python_callable=task2,
    retries=3,
    dag=dag,
)

t3 = PythonOperator(
    task_id='Upload_Big_Query',
    python_callable=task3,
    retries=3,
    dag=dag,
)

dag.doc_md = __doc__

t1.doc_md = """\
            #### Task 1 : Install Requirements.
            Install requirements present in requirements.txt
            """

t2.doc_md = """\
            #### Task 2 : Fetch data from API & create a local csv.
            The API provides the change in Covid-19 Cases state-wise everyday
            """

t1 >> t2 >> t3

示例#9

0

显示文件

        file.columns = file.columns.map(lambda x: x.replace('(', '').replace(
            ')', ''))  # удаляем символы скобок из имен колонок
        engine = PostgresHook(
            postgres_conn_id='postgres_local').get_sqlalchemy_engine()
        file.to_sql('airflow_stg_mining_po',
                    con=engine,
                    index=True,
                    if_exists='replace',
                    schema='beeline')

    # читаем файл и записываем во временную таблицу целевой БД
    process_file = PythonOperator(task_id='process_file',
                                  provide_context=True,
                                  python_callable=process_xls_file)

    process_file.doc_md = """\
        #### Task Documentation
        You can document your task using the attributes `doc_md` (markdown),
        `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
        rendered in the UI's Task Instance Details page.
        ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)
        """

    # обновляем целевую таблицу
    update_target_table = PostgresOperator(task_id='update_target_table',
                                           sql='''
            insert into beeline.airflow_mining_po 
                select * from beeline.airflow_stg_mining_po
            on conflict do nothing;
        ''',
                                           postgres_conn_id='postgres_local',

示例#10

0

显示文件

                           python_callable=createlog,
                           dag=dag)

ExtracttoDF = PythonOperator(task_id='sqlite_to_df',
                             python_callable=getdf,
                             dag=dag)

LoadTask = PythonOperator(task_id='Destinationdb',
                          python_callable=createdb,
                          dag=dag)

UpsertTask = PythonOperator(task_id='Destinationdb_Upsert',
                            python_callable=updatedb,
                            dag=dag)

dag.doc_md = __doc__

ExtracttoDF.doc_md = """\
Extract data from source DB
"""

templated_command = """
{% for i in range(5) %}
    echo "{{ ds }}"
    echo "{{ macros.ds_add(ds, 7)}}"
    echo "{{ params.my_param }}"
{% endfor %}
"""

[ExtracttoDF, CreateLog] >> LoadTask >> UpsertTask

示例#11

0

显示文件

    """
    message_task(SQL_CONN_STRING, KEY_WORDS, FREQ)


with DAG(
    'create_postgres_db',
    description="Creates Postgres DB for tweets if it doesn't already exist",
    schedule_interval="@once",
    default_args=default_args
) as create_pgdb_dag:

    create_db = PythonOperator(
        task_id='create_db', python_callable=create_postgres_db,
        dag=create_pgdb_dag
    )
    create_db.doc_md = """\
    #### CREATE PGDB
    Creates a database in Postgres for the transformed tweet data, \
    if one does not already exist
    """

    create_db


with DAG(
    'tweetl_dag',
    description='Performs ETL round and triggers slackbot',
    schedule_interval=timedelta(seconds=FREQ),
    catchup=False,
    default_args=default_args
) as tweetl_dag: