Exemplo n.º 1
0
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago

args = {
    'owner': 'airflow',
    'start_date': days_ago(2),
}


def dummy(*args, **kwargs):
    """Dummy function"""
    return "pass"


with DAG(dag_id='example_xcom_args',
         default_args=args,
         schedule_interval=None,
         tags=['example']) as dag:
    task1 = PythonOperator(
        task_id='task1',
        python_callable=dummy,
    )

    task2 = PythonOperator(
        task_id='task2',
        python_callable=dummy,
        op_kwargs={"dummy": task1.output},
    )
Exemplo n.º 2
0
    # get value_2
    pulled_value_2 = ti.xcom_pull(task_ids='push_by_returning')
    if pulled_value_2 != value_2:
        raise ValueError(f'The two values differ {pulled_value_2} and {value_2}')

    # get both value_1 and value_2
    pulled_value_1, pulled_value_2 = ti.xcom_pull(key=None, task_ids=['push', 'push_by_returning'])
    if pulled_value_1 != value_1:
        raise ValueError(f'The two values differ {pulled_value_1} and {value_1}')
    if pulled_value_2 != value_2:
        raise ValueError(f'The two values differ {pulled_value_2} and {value_2}')


push1 = PythonOperator(
    task_id='push',
    dag=dag,
    python_callable=push,
)

push2 = PythonOperator(
    task_id='push_by_returning',
    dag=dag,
    python_callable=push_by_returning,
)

pull = PythonOperator(
    task_id='puller',
    dag=dag,
    python_callable=puller,
)
Exemplo n.º 3
0
# DAG tests backfill with pooled tasks
# Previously backfill would queue the task but never run it
dag1 = DAG(dag_id='test_backfill_pooled_task_dag', default_args=default_args)
dag1_task1 = DummyOperator(
    task_id='test_backfill_pooled_task',
    dag=dag1,
    pool='test_backfill_pooled_task_pool',
)

# dag2 has been moved to test_prev_dagrun_dep.py

# DAG tests that a Dag run that doesn't complete is marked failed
dag3 = DAG(dag_id='test_dagrun_states_fail', default_args=default_args)
dag3_task1 = PythonOperator(task_id='test_dagrun_fail',
                            dag=dag3,
                            python_callable=fail)
dag3_task2 = DummyOperator(
    task_id='test_dagrun_succeed',
    dag=dag3,
)
dag3_task2.set_upstream(dag3_task1)

# DAG tests that a Dag run that completes but has a failure is marked success
dag4 = DAG(dag_id='test_dagrun_states_success', default_args=default_args)
dag4_task1 = PythonOperator(
    task_id='test_dagrun_fail',
    dag=dag4,
    python_callable=fail,
)
dag4_task2 = DummyOperator(task_id='test_dagrun_succeed',
Exemplo n.º 4
0
          default_args=args,
          schedule_interval=None,
          tags=['example'])


# [START howto_operator_python]
def print_context(ds, **kwargs):
    """Print the Airflow context and ds variable from the context."""
    pprint(kwargs)
    print(ds)
    return 'Whatever you return gets printed in the logs'


run_this = PythonOperator(
    task_id='print_the_context',
    python_callable=print_context,
    dag=dag,
)

# [END howto_operator_python]


# [START howto_operator_python_kwargs]
def my_sleeping_function(random_base):
    """This is a function that will run within the DAG execution"""
    time.sleep(random_base)


# Generate 5 sleeping tasks, sleeping from 0.0 to 0.4 seconds respectively
for i in range(5):
    task = PythonOperator(
Exemplo n.º 5
0
        timeout=60,
        poke_interval=10,
        retries=100,
        mode='poke',
    )

    transformer_sensor = PythonSensor(
        task_id='transformer_sensor',
        python_callable=_wait_for_file,
        op_kwargs={'path': '{{ var.value.transformer_path }}'},
        timeout=60,
        poke_interval=10,
        retries=100,
        mode='poke',
    )

    predict = PythonOperator(task_id='predict',
                             python_callable=_predict,
                             op_kwargs={
                                 'test_data_path':
                                 '/opt/airflow/data/raw/{{ ds }}/test.csv',
                                 'model_path':
                                 '{{ var.value.model_path }}',
                                 'transformer_path':
                                 '{{ var.value.transformer_path }}',
                                 'output_dir':
                                 '/opt/airflow/data/predictions/{{ ds }}/',
                             })

    [data_sensor, model_sensor, transformer_sensor] >> predict
Exemplo n.º 6
0
with DAG(
        dag_id='example_twitter_dag',
        default_args=default_args,
        schedule_interval="@daily",
        tags=['example'],
) as dag:

    # --------------------------------------------------------------------------------
    # This task should call Twitter API and retrieve tweets from yesterday from and to
    # for the four twitter users (Twitter_A,..,Twitter_D) There should be eight csv
    # output files generated by this task and naming convention
    # is direction(from or to)_twitterHandle_date.csv
    # --------------------------------------------------------------------------------

    fetch_tweets = PythonOperator(task_id='fetch_tweets',
                                  python_callable=fetchtweets)

    # --------------------------------------------------------------------------------
    # Clean the eight files. In this step you can get rid of or cherry pick columns
    # and different parts of the text
    # --------------------------------------------------------------------------------

    clean_tweets = PythonOperator(task_id='clean_tweets',
                                  python_callable=cleantweets)

    clean_tweets << fetch_tweets

    # --------------------------------------------------------------------------------
    # In this section you can use a script to analyze the twitter data. Could simply
    # be a sentiment analysis through algorithms like bag of words or something more
    # complicated. You can also take a look at Web Services to do such tasks
Exemplo n.º 7
0
            hero_img = x.get('img')
            table += f'''<tr><td class="hero_img_name">{hero_img}</td><td class="hero_name">{hero_name}</td><td class="winrate">{hero_media_winrate}</td></tr>'''
        table += '''</table>
                    </html>'''

        return table

    def salva_html(ds, **kwargs):
        ti = kwargs['ti']
        table = ti.xcom_pull(task_ids='gera_html')
        with open('table_winrate.html', 'w') as f:
            f.write(table)
            f.close()

    run_coleta_winrate_heroes = PythonOperator(
        task_id='coleta_winrate_heroes',
        python_callable=coleta_winrate_heroes,
    )

    run_salva_mongo_coleta = PythonOperator(
        task_id='salva_mongo_coleta',
        python_callable=salva_mongo,
        op_kwargs={
            'task_id': 'coleta_winrate_heroes',
            'database': 'dota_col',
            'collection': 'winrate_meta'
        },
    )

    run_gera_media_winrate = PythonOperator(
        task_id='gera_media_winrate',
        python_callable=gera_media_winrate,
Exemplo n.º 8
0
    'retries': 1,
    'retry_delay': timedelta(hours=1),
}

dag = DAG('basic_pipeline', default_args=project_cfg,
          schedule_interval=timedelta(days=1))


def example_task(_id, **kwargs):
    print("Task {}".format(_id))
    return "completed task {}".format(_id)


task_1 = PythonOperator(
    task_id='task_1',
    provide_context=True,
    python_callable=example_task,
    op_kwargs={'_id': 1},
    dag=dag
)

task_2 = PythonOperator(
    task_id='task_2',
    provide_context=True,
    python_callable=example_task,
    op_kwargs={'_id': 2},
    dag=dag
)

task_1 >> task_2
Exemplo n.º 9
0
def _fetch_dataset_new():
    print("Fetching data (NEW)...")


with DAG(
        dag_id="03_branching",
        start_date=airflow.utils.dates.days_ago(3),
        schedule_interval="@daily",
) as dag:
    start = DummyOperator(task_id="start")

    pick_branch = BranchPythonOperator(task_id="pick_branch",
                                       python_callable=_pick_branch)

    fetch_dataset_old = PythonOperator(task_id="fetch_dataset_old",
                                       python_callable=_fetch_dataset_old)

    fetch_dataset_new = PythonOperator(task_id="fetch_dataset_new",
                                       python_callable=_fetch_dataset_new)

    fetch_another_dataset = DummyOperator(task_id="fetch_another_dataset")

    join_datasets = DummyOperator(task_id="join_datasets",
                                  trigger_rule="none_failed")

    train_model = DummyOperator(task_id="train_model")
    deploy_model = DummyOperator(task_id="deploy_model")

    start >> pick_branch
    pick_branch >> [fetch_dataset_old, fetch_dataset_new]
    [fetch_dataset_old, fetch_dataset_new, fetch_another_dataset
Exemplo n.º 10
0
    df.to_csv('dags/postgresqldata.csv')
    print("-------Data Saved------")


def insertElasticsearch():
    es = Elasticsearch()
    df = pd.read_csv('dags/postgresqldata.csv')
    for _, r in df.iterrows():
        doc = r.to_json()
        res = es.index(index="frompostgresql", doc_type="doc", body=doc)
        print(res)


default_args = {
    'owner': 'sbahaddi',
    'start_date': dt.datetime(2021, 3, 25),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

with DAG('MyDBdag',
         default_args=default_args,
         schedule_interval='@daily',
         ) as dag:
    getData = PythonOperator(task_id='QueryPostgreSQL',
                             python_callable=queryPostgresql)
    insertData = PythonOperator(
        task_id='InsertDataElasticsearch', python_callable=insertElasticsearch)

getData >> insertData
Exemplo n.º 11
0
        Tests whether the volume has been mounted.
        """
        with open('/foo/volume_mount_test.txt', 'w') as foo:
            foo.write('Hello')

        return_code = os.system("cat /foo/volume_mount_test.txt")
        if return_code != 0:
            raise ValueError(
                f"Error when checking volume mount. Return code {return_code}")

    # You can use annotations on your kubernetes pods!
    start_task = PythonOperator(task_id="start_task",
                                python_callable=print_stuff,
                                executor_config={
                                    "KubernetesExecutor": {
                                        "annotations": {
                                            "test": "annotation"
                                        }
                                    }
                                })

    # You can mount volume or secret to the worker pod
    second_task = PythonOperator(
        task_id="four_task",
        python_callable=test_volume_mount,
        executor_config={
            "KubernetesExecutor": {
                "volumes": [
                    {
                        "name": "example-kubernetes-test-volume",
                        "hostPath": {
Exemplo n.º 12
0
    end_date = Variable.get('narrativedx_end_date',
                            default_var=first_of_month - timedelta(days=1))
    start_date = Variable.get('narrativedx_start_date',
                              default_var=first_of_month -
                              timedelta(days=end_date.day))

    sql = sql.format(start_date=start_date, end_date=end_date, surv=service)
    df = pd.read_sql(sql, ppw_engine)

    df.to_csv(basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv'))


queries = []
for service in services:
    delete = PythonOperator(task_id=f'delete_older_{service}_file',
                            python_callable=delete_older_file,
                            op_kwargs={'service': service},
                            dag=dag)

    query = PythonOperator(task_id=f'query_narrativedx_{service}',
                           python_callable=query_narrativedx,
                           op_kwargs={'service': service},
                           dag=dag)

    sftp = SFTPOperator(
        task_id=f'upload_{service}_to_sftp',
        ssh_conn_id='coh_sftp',
        local_filepath=str(
            basepath.joinpath(f'NarrativeDX - {service} - {exec_date}.csv')),
        remote_filepath=f'/sftp/NarrativeDX - {service} - {exec_date}.csv',
        operation='put',
        create_intermediate_dirs=True,
Exemplo n.º 13
0
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

# Using a DAG context manager, you don't have to specify the dag property of each task
with DAG(
        'rock_content_item_backfill_example_dag',
        start_date=datetime(2021, 2, 22),
        max_active_runs=1,
        schedule_interval='@once',
        default_args=default_args,
        # catchup=False # enable if you don't want historical dag runs to run
) as dag:

    t0 = PythonOperator(
        task_id='fetch_and_save_content_items',
        python_callable=
        fetch_and_save_content_items,  # make sure you don't include the () of the function
        op_kwargs={'client': None})

    t1 = PythonOperator(
        task_id='fetch_and_save_content_items_connections',
        python_callable=
        fetch_and_save_content_items_connections,  # make sure you don't include the () of the function
        op_kwargs={
            'client': None,
            'do_backfill': True
        })

    t0 >> t1
    'owner': 'airflow',
}

def print_cwd(ds, **kwargs):
    """Print the Airflow context and ds variable from the context."""
    print (Path.cwd())
    return str(Path.cwd())


dag = DAG(
    dag_id='tika_bash_operator',
    default_args=args,
    start_date=days_ago(2),
    dagrun_timeout=timedelta(minutes=60),
    tags=['curl_tika'],
    params={"example_key": "example_value"},
)

run_this = BashOperator(
    task_id='run_curl',
    bash_command='curl -T /opt/airflow/dags/LICENSE http://0.0.0.0:9998/meta',
    dag=dag,
)

run_this0 = PythonOperator(
    task_id='print_the_context',
    python_callable=print_cwd,
    dag=dag,
)

run_this0 >> run_this
Exemplo n.º 15
0
            return ['accurate', 'in_accurate']

    return 'in_accurate'


with DAG('xcom_dag',
         schedule_interval='@daily',
         default_args=default_args,
         catchup=False) as dag:

    downloading_data = BashOperator(task_id='downloading_data',
                                    bash_command='sleep 3',
                                    do_xcom_push=False)

    with TaskGroup('processing_tasks') as processing_tasks:
        training_model_a = PythonOperator(task_id='training_model_a',
                                          python_callable=_training_model)

        training_model_b = PythonOperator(task_id='training_model_b',
                                          python_callable=_training_model)

        training_model_c = PythonOperator(task_id='training_model_c',
                                          python_callable=_training_model)

    choose_model = BranchPythonOperator(task_id='task_4',
                                        python_callable=_choose_best_model)

    accurate = DummyOperator(task_id='accurate')

    in_accurate = DummyOperator(task_id='in_accurate')
    downloading_data >> processing_tasks >> choose_model
    choose_model >> [accurate, in_accurate]
Exemplo n.º 16
0

def print_hello(**context):
    received_value = context['ti'].xcom_pull(key='random_value')
    print(f'hello, I received the following {str(received_value)}')


def branch_func(**context):
    if random.random() < 0.5:
        return 'say_hi'
    return 'say_hello'


run_this_task = PythonOperator(task_id='run_this',
                               python_callable=push_to_xcom,
                               provide_context=True,
                               retries=10,
                               retry_delay=timedelta(seconds=1),
                               dag=dag)

run_this_task_2 = PythonOperator(task_id='say_hi',
                                 python_callable=print_hi,
                                 provide_context=True,
                                 dag=dag)

run_this_task_3 = PythonOperator(task_id='say_hello',
                                 python_callable=print_hello,
                                 provide_context=True,
                                 dag=dag)

branch_op = BranchPythonOperator(task_id='branch_task',
                                 python_callable=branch_func,
         catchup=False,
         tags=['example666'],
         render_template_as_native_obj=True) as dag:

    def extract():
        data_string = '{"1001": 301.27, "1002": 433.21, "1003": 502.22}'
        return json.loads(data_string)

    def transform(order_data):
        print(type(order_data))
        print(order_data)
        total_order_value = 0
        for value in order_data.values():
            total_order_value += value
        return {"total_order_value": total_order_value}

    extract_task = PythonOperator(
        task_id="extract",
        python_callable=extract,
    )

    transform_task = PythonOperator(
        task_id="transform",
        op_kwargs={"order_data": "{{ti.xcom_pull('extract')}}"},
        python_callable=transform,
    )

    extract_task >> transform_task

if __name__ == "__main__":
    dag.cli()
Exemplo n.º 18
0
    logging.info(f'Total count is: {error_count}')
    logging.info(f'Error list is : {error_list}')

    return error_count, error_list


# Create Airflow dag
default_args = {"retries": 2, 'retry_delay': timedelta(minutes=5)}

with DAG(dag_id='loganalyzer',
         default_args=default_args,
         description='A simple DAG',
         schedule_interval='0 18 * * 1-5',
         start_date=datetime(2021, 11, 20, hour=18),
         catchup=False) as dag:

    t1 = PythonOperator(task_id='aapl_log_errors',
                        python_callable=analyze_file,
                        op_kwargs={
                            'stock': 'aapl',
                            'log_dir': base_log_folder
                        })

    t2 = PythonOperator(task_id='tsla_log_errors',
                        python_callable=analyze_file,
                        op_kwargs={
                            'stock': 'tsla',
                            'log_dir': base_log_folder
                        })

t1 >> t2
Exemplo n.º 19
0
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
}

with DAG(
        dag_id="covid_data_dag",
        default_args=default_args,
        description=
        "DAG to update Covid 19 data daily to push to a Postgres database.",
        schedule_interval='30 9 * * *',
        start_date=datetime(2021, 8, 24),
) as dag:

    # Initiate tasks
    task_1 = DummyOperator(task_id="Initiate_DAG")

    task_2 = PythonOperator(
        task_id="dashboard_update",
        python_callable=covid_19_dashboard_update,
        op_kwargs={
            "username": config.username,
            "password": passwords_dict.get('postgres_password'),
            "database": config.database,
            "table_name": config.table_name,
            "columns": config.columns,
            "geo_ids_url": config.geo_ids_url,
        },
    )

    task_1 >> task_2
Exemplo n.º 20
0
from datetime import timedelta

from airflow.models import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.timezone import datetime

DEFAULT_DATE = datetime(2016, 1, 1)
default_args = dict(start_date=DEFAULT_DATE, owner='airflow')


def fail():
    raise ValueError('Expected failure.')


def success(ti=None, *args, **kwargs):
    if ti.execution_date != DEFAULT_DATE + timedelta(days=1):
        fail()


# DAG tests that tasks ignore all dependencies

dag1 = DAG(dag_id='test_run_ignores_all_dependencies',
           default_args=dict(depends_on_past=True, **default_args))
dag1_task1 = PythonOperator(task_id='test_run_dependency_task',
                            python_callable=fail,
                            dag=dag1)
dag1_task2 = PythonOperator(task_id='test_run_dependent_task',
                            python_callable=success,
                            dag=dag1)
dag1_task1.set_downstream(dag1_task2)
        """
        Tests whether the volume has been mounted.
        """
        with open('/foo/volume_mount_test.txt', 'w') as foo:
            foo.write('Hello')

        return_code = os.system("cat /foo/volume_mount_test.txt")
        if return_code != 0:
            raise ValueError(f"Error when checking volume mount. Return code {return_code}")

    # You can use annotations on your kubernetes pods!
    start_task = PythonOperator(
        task_id="start_task",
        python_callable=print_stuff,
        executor_config={
            "KubernetesExecutor": {
                "annotations": {"test": "annotation"}
            }
        }
    )

    # You can mount volume or secret to the worker pod
    second_task = PythonOperator(
        task_id="four_task",
        python_callable=test_volume_mount,
        executor_config={
            "KubernetesExecutor": {
                "volumes": [
                    {
                        "name": "example-kubernetes-test-volume",
                        "hostPath": {"path": "/tmp/"},
            # Loop through metadata results
            for field, ts_obj in r.json().items():
                device_metadata[field] = ts_obj[
                    0]  #assign the first (latest) value
                print(f'Adding {field}->{ts_obj[0]} to metdata result payload')

            results_metadata[d_id] = device_metadata

        print(json.dumps(results_metadata))
        return json.dumps(results_metadata)

    ##############################################

    flashflood_authenticate_task = PythonOperator(
        task_id='flashflood_authenticate',
        python_callable=aware.flashfloodinfo_authenticate)

    flashflood_get_customer = PythonOperator(
        task_id='flashflood_get_customer',
        python_callable=aware.flashflood_get_customer,
        op_kwargs={
            'token':
            "{{task_instance.xcom_pull(task_ids='flashflood_authenticate')}}"
        })

    get_aware_devices_task = PythonOperator(
        task_id='get_aware_devices',
        python_callable=aware.get_aware_devices,
        op_kwargs={
            'token':
Exemplo n.º 23
0
            total_order_value += value

        total_value = {"total_order_value": total_order_value}
        total_value_json_string = json.dumps(total_value)
        ti.xcom_push('total_order_value', total_value_json_string)

    def load(**kwargs):
        ti = kwargs['ti']
        total_value_string = ti.xcom_pull(task_ids='transform',
                                          key='total_order_value')
        total_order_value = json.loads(total_value_string)

        print(total_order_value)

    extract_task = PythonOperator(
        task_id='extract',
        python_callable=extract,
    )
    extract_task.doc_md = """\
#### Extract task
A simple Extract task to get data ready for the rest of the data pipeline.
In this case, getting data is simulated by reading from a hardcoded JSON string.
This data is then put into xcom, so that it can be processed by the next task.
"""

    transform_task = PythonOperator(
        task_id='transform',
        python_callable=transform,
    )
    transform_task.doc_md = """\
#### Transform task
A simple Transform task which takes in the collection of order data from xcom
Exemplo n.º 24
0
    tempfile = read_s3(kwargs['file'])
    conn = settings.engine.raw_connection()
    try:
        with open(tempfile, 'r') as f:
            cursor = conn.cursor()
            cursor.copy_expert(query, f)
            conn.commit()
    finally:
        conn.close()
        os.remove(tempfile)


with DAG(dag_id=dag_id, schedule_interval=None, catchup=False, start_date=days_ago(1)) as dag:

    pause_dags_t = PythonOperator(
        task_id="pause_dags",
        python_callable=pause_dags
    )
    with TaskGroup(group_id='import') as import_t:
        for x in OBJECTS_TO_IMPORT:
            load_task = PythonOperator(
                task_id=x[1],
                python_callable=load_data,
                op_kwargs={'query': x[0], 'file': x[1]},
                provide_context=True
            )
        load_variable_t = PythonOperator(
            task_id="variable",
            python_callable=importVariable
        )

    load_task_instance_t = PythonOperator(
Exemplo n.º 25
0
                                                    'last_played': retorna_localtime(hero['last_played']),
                                                    'played': hero.get('games'),
                                                    'won': hero.get('win'),
                                                    'lost': hero.get('games') - hero.get('win'),
                                                    'winrate': retorna_winrate_player_hero(hero.get('games'), hero.get('win'))
                                                    }
        return player_heroes_data

    def salva_mongo(player_id,ds, **kwargs):
        ti = kwargs['ti']
        dict_player_heroes_data = ti.xcom_pull(task_ids=f'Coleta_Dados_Player_{player_id}')
        db_col, db_client = collection_mongo_local('mongodb:27017', 'dota_col', 'winrate_player_heroes')
        db_col.insert_one(dict_player_heroes_data)
        db_client.close()

    player_ids=[23724176, 79380838,79409528, 95777879, 146329338]
    for i in player_ids:
        player_id = i
        run_coleta_winrate_player_heroes = PythonOperator(
            task_id=f'Coleta_Dados_Player_{player_id}',
            python_callable=coleta_winrate_player_heroes,
            op_kwargs={'player_id': player_id}
    )   
        run_salva_mongo = PythonOperator(   
        task_id=f'salva_mongo_{player_id}',
        python_callable=salva_mongo,
        op_kwargs={'player_id': player_id}
        )


        run_coleta_winrate_player_heroes >> run_salva_mongo
    """
        able to get context
    """
    ctx = get_current_context()
    log.info("The knights of Ni say: %s (at %s)", value, ctx['ts'])


with DAG(
        dag_id='example_xcom_args',
        default_args={'owner': 'airflow'},
        start_date=days_ago(2),
        schedule_interval=None,
        tags=['example'],
) as dag:
    task1 = PythonOperator(
        task_id='generate_value',
        python_callable=generate_value,
    )

    print_value(task1.output)

with DAG(
        "example_xcom_args_with_operators",
        default_args={'owner': 'airflow'},
        start_date=days_ago(2),
        schedule_interval=None,
        tags=['example'],
) as dag2:
    bash_op1 = BashOperator(task_id="c", bash_command="echo c")
    bash_op2 = BashOperator(task_id="d", bash_command="echo c")
    xcom_args_a = print_value("first!")
    xcom_args_b = print_value("second!")
Exemplo n.º 27
0
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}


# Using a DAG context manager, you don't have to specify the dag property of each task
with DAG('rivervalley_rock_people_dag',
         start_date=datetime(2021, 4, 29),
         max_active_runs=1,
         schedule_interval=timedelta(minutes=30),  # https://airflow.apache.org/docs/stable/scheduler.html#dag-runs
         default_args=default_args,
         # catchup=False # enable if you don't want historical dag runs to run
         ) as dag:

    t0 = PythonOperator(
        task_id='fetch_and_save_campuses',
        python_callable=fetch_and_save_campuses,  # make sure you don't include the () of the function
        op_kwargs={'client': 'rivervalley'}
    )

    # generate tasks with a loop. task_id must be unique
    t1 = PythonOperator(
        task_id='fetch_and_save_people',
        python_callable=fetch_and_save_people,  # make sure you don't include the () of the function
        op_kwargs={'do_backfill': False, 'client': 'rivervalley'}
    )

    t0 >> t1
Exemplo n.º 28
0
                {
                    'topologyKey': 'kubernetes.io/hostname',
                    'labelSelector': {
                        'matchExpressions': [{'key': 'app', 'operator': 'In', 'values': ['airflow']}]
                    },
                }
            ]
        }
    }

    tolerations = [{'key': 'dedicated', 'operator': 'Equal', 'value': 'airflow'}]

    # You don't have to specify any special KubernetesExecutor configuration if
    # you don't want/need to
    start_task = PythonOperator(
       task_id="start_task",
       python_callable=print_stuff
    )

    # Check available libraries in airflow/ci:latest image
    one_task = PythonOperator(
        task_id="one_task",
        python_callable=check_installed_libraries,
        executor_config={"KubernetesExecutor": {"image": "apache/airflow:2.0.2-python3.8"}},
    )

    # List pods in current namespace
    two_task = PythonOperator(
        task_id="two_task",
        python_callable=list_pods,
        executor_config={"KubernetesExecutor": {"image": "apache/airflow:2.0.2-python3.8"}},
    )
Exemplo n.º 29
0
    pathlib.Path("/tmp/images").mkdir(parents=True, exist_ok=True)

    # Download all pictures in launches.json
    with open("/tmp/launches.json") as f:
        launches = json.load(f)
        image_urls = [launch["image"] for launch in launches["results"]]
        for image_url in image_urls:
            try:
                response = requests.get(image_url)
                image_filename = image_url.split('/')[-1]
                target_file = f"/tmp/images/{image_filename}"
                with open(target_file, "wb") as f:
                    f.write(response.content)
                print(f"Downloaded {image_url} to {target_file}")
            except request_exceptions.MissingSchema:
                print(f"{image_url} appears to be an invalid URL.")
            except requests.exceptions.ConnectionError:
                print(f"Could not connect to {image_url}.")


get_pictures = PythonOperator(task_id="get_pictures",
                              python_callable=_get_pictures,
                              dag=dag)

notify = BashOperator(
    task_id="notify",
    bash_command='echo "there are now $(ls /tmp/images/ | wc -l) images."',
    dag=dag)

download_launches >> get_pictures >> notify
Exemplo n.º 30
0
            })
        thread.daemon = True
        thread.start()
        time.sleep(consumer.RECEIVE_DURATION)
        consumer_client.close()
        thread.join()
    except KeyboardInterrupt:
        print('Stop receiving.')

    print('Consumer2 has stopped receiving, end time is {}.'.format(
        time.time()))


t1 = PythonOperator(
    task_id='produce_raw_message',
    python_callable=produce_raw_message,
    dag=dag,
)
t2 = PythonOperator(
    task_id='preprocess_raw_message',
    python_callable=preprocess_raw_message,
    dag=dag,
)

t3 = PythonOperator(
    task_id='consume_and_offload_preprocessed_message',
    python_callable=consume_preprocessed_message,
    dag=dag,
)

t1 >> t2 >> t3