def csv_to_json():
    df = pd.read_csv('data_engineering_test_file.csv')
    for i, r in df.iterrows():
        print(r['name'])
        df.to_json('from_airflow.json', orient='records')


default_args = {
    'owner': 'Roman',
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

with DAG(
        'TestingAirflow',
        default_args=default_args,
        description='A simple Airflow pipeline for testing',
        schedule_interval=timedelta(days=1),
        start_date=days_ago(2),
        tags=['example']
) as dag:
    print_starting = BashOperator(task_id='starting',
                                  bash_command='echo "I am reading the CSV file now..."')
    csv_conversion = PythonOperator(task_id='convert_csv_to_json',
                                    python_callable=csv_to_json)

print_starting.set_downstream(csv_conversion)
args = {
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(dag_id='example_bash_operator',
          default_args=args,
          schedule_interval=None)

cmd = 'ls -l'
run_this_last = DummyOperator(task_id='run_this_last', dag=dag)

run_this = BashOperator(task_id='run_after_loop',
                        bash_command='echo 1',
                        dag=dag)
run_this.set_downstream(run_this_last)

for i in range(3):
    i = str(i)
    task = BashOperator(
        task_id='runme_' + i,
        bash_command='echo "{{ task_instance_key_str }}" && sleep 1',
        dag=dag)
    task.set_downstream(run_this)

task = BashOperator(
    task_id='also_run_this',
    bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)
task.set_downstream(run_this_last)
Пример #3
0
        params={'my_param': 'Parameter I passed in'},
    )

    t1 >> [t2, t3]
    
    
""" 
Setting up Dependencies 

Let's say we have tasks t1, t2 and t3 that do not depend on each other.
Note that when executing your script, Airflow will raise exceptions 
when it finds cycles in your DAG or when a dependency is referenced more 
than once.
"""

t1.set_downstream(t2)

# This means that t2 will depend on t1
# running successfully to run.
# It is equivalent to:
t2.set_upstream(t1)

# The bit shift operator can also be
# used to chain operations:
t1 >> t2

# And the upstream dependency with the
# bit shift operator:
t2 << t1

# Chaining multiple dependencies becomes
Пример #4
0
    command=[
        "/bin/bash",
        "-c",
        "/bin/sleep 30; "
        "/bin/mv {{params.source_location}}/{{ ti.xcom_pull('view_file') }} {{params.target_location}};"
        "/bin/echo '{{params.target_location}}/{{ ti.xcom_pull('view_file') }}';",
    ],
    task_id="move_data",
    do_xcom_push=True,
    params={"source_location": "/your/input_dir/path", "target_location": "/your/output_dir/path"},
    dag=dag,
)

print_templated_cmd = """
    cat {{ ti.xcom_pull('move_data') }}
"""

t_print = DockerOperator(
    api_version="1.19",
    docker_url="tcp://localhost:2375",
    image="centos:latest",
    volumes=["/your/host/output_dir/path:/your/output_dir/path"],
    command=print_templated_cmd,
    task_id="print",
    dag=dag,
)

t_view.set_downstream(t_is_data_available)
t_is_data_available.set_downstream(t_move)
t_move.set_downstream(t_print)