import datetime import logging from airflow import DAG from airflow.operators.python_operator import PythonOperator from airflow.operators.dummy_operator import DummyOperator def test_dag_function(): logging.info("test_dag_function is running...") test_dag = DAG( dag_id='test_dag', start_date=datetime.datetime.now()) start_operator = DummyOperator(task_id='Begin_execution', dag=test_dag) # create_tables_in_redshift = DummyOperator(task_id='create_tables_in_redshift', dag=test_dag) stage_events_to_redshift = DummyOperator(task_id='stage_events', dag=test_dag) stage_songs_to_redshift = DummyOperator(task_id='stage_songs', dag=test_dag) load_songplays_table = DummyOperator( task_id='Load_songplays_fact_table', dag=test_dag ) load_user_dimension_table = DummyOperator( task_id='Load_user_dim_table', dag=test_dag ) load_song_dimension_table = DummyOperator(
# software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os from datetime import datetime from airflow import DAG from airflow.operators.dummy import DummyOperator DEFAULT_DATE = datetime(2016, 1, 1) args = { 'owner': 'airflow', 'start_date': DEFAULT_DATE, } dag = DAG(dag_id='test_om_failure_callback_dag', default_args=args) def write_data_to_callback(*arg, **kwargs): # pylint: disable=unused-argument with open(os.environ.get('AIRFLOW_CALLBACK_FILE'), "w+") as f: f.write("Callback fired") task = DummyOperator( task_id='test_om_failure_callback_task', dag=dag, on_failure_callback=write_data_to_callback )
'example.json') SNOWFLAKE_SELECT_SQL = f"SELECT * FROM {SNOWFLAKE_SAMPLE_TABLE} LIMIT 100;" SNOWFLAKE_SLACK_SQL = f"SELECT O_ORDERKEY, O_CUSTKEY, O_ORDERSTATUS FROM {SNOWFLAKE_SAMPLE_TABLE} LIMIT 10;" SNOWFLAKE_SLACK_MESSAGE = "Results in an ASCII table:\n" \ "```{{ results_df | tabulate(tablefmt='pretty', headers='keys') }}```" SNOWFLAKE_CREATE_TABLE_SQL = f"CREATE TRANSIENT TABLE IF NOT EXISTS {SNOWFLAKE_LOAD_TABLE}(data VARIANT);" default_args = { 'owner': 'airflow', 'start_date': days_ago(2), } dag = DAG( 'example_snowflake', default_args=default_args, tags=['example'], ) select = SnowflakeOperator( task_id='select', snowflake_conn_id=SNOWFLAKE_CONN_ID, sql=SNOWFLAKE_SELECT_SQL, dag=dag, ) slack_report = SnowflakeToSlackOperator(task_id="slack_report", sql=SNOWFLAKE_SLACK_SQL, slack_message=SNOWFLAKE_SLACK_MESSAGE, snowflake_conn_id=SNOWFLAKE_CONN_ID, slack_conn_id=SLACK_CONN_ID,
import logging from airflow import DAG from airflow.operators.python_operator import PythonOperator # # TODO: Define a function for the PythonOperator to call and have it log something # # def my_function(): # logging.info(<REPLACE>) def routine(): logging.info('HELLO WORLD') dag = DAG( 'lesson1.exercise1', start_date=datetime.datetime.now()) # # TODO: Uncomment the operator below and replace the arguments labeled <REPLACE> below # dag = DAG('lesson1.exercise1',start_date = datetime.datetime.now()) greet_task = PythonOperator( task_id = 'rountine_task', ptyhon_callable = rountine, dag = dag) #greet_task = PythonOperator( # task_id="<REPLACE>", # python_callable=<REPLACE>,
# AWS_KEY = os.environ.get('AWS_KEY') # AWS_SECRET = os.environ.get('AWS_SECRET') default_args = { 'catchup': False, 'depends_on_past': False, 'email_on_failure': False, 'owner': 'udacity', 'retries': 3, 'retry_delay': timedelta(minutes=5), 'start_date': datetime.now() } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='@daily') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='stage_events', aws_conn_id='aws_conn_id', redshift_conn_id='redshift_conn_id', iam_role="arn:aws:iam::850743350707:role/udacity-redshift-role", s3_bucket='udacity-dend', s3_key='log-data', table='staging_events', dag=dag) stage_songs_to_redshift = StageToRedshiftOperator(
""" Executing tasks at a particular time of the day using sensor operator. """ from airflow import DAG from airflow.operators import BashOperator, TimeSensor from datetime import datetime, timedelta, time default_args = { 'owner': 'Samarth', 'start_date': datetime(2016, 03, 15, 12), } # "schedule_interval" is your cron expression you can write any cron expression like unix cron. dag = DAG('airflow_task_with_time_sensor', default_args=default_args, schedule_interval="1 * * * *") bash_task = BashOperator( task_id='dependency_for_sensor', bash_command='echo "Sensor would only be enabled after I am done!"', dag=dag) # Sensor operator takes "target_time" which is a specific time in a day irrespective of date/day. # Sensor is executed once the target time has passed. In this case after 10:55 at morning. sensor_task = TimeSensor(task_id='sensor_task', target_time=time(10, 55, 1, 1), dag=dag) post_sensor_task = BashOperator( task_id='post_sensor_task', bash_command='echo "I am done, it means sensor has done its job."',
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example DAG demonstrating the usage of BranchDayOfWeekOperator. """ import pendulum from airflow import DAG from airflow.operators.dummy import DummyOperator from airflow.operators.weekday import BranchDayOfWeekOperator with DAG( dag_id="example_weekday_branch_operator", start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), catchup=False, tags=["example"], schedule_interval="@daily", ) as dag: # [START howto_operator_day_of_week_branch] dummy_task_1 = DummyOperator(task_id='branch_true', dag=dag) dummy_task_2 = DummyOperator(task_id='branch_false', dag=dag) branch = BranchDayOfWeekOperator( task_id="make_choice", follow_task_ids_if_true="branch_true", follow_task_ids_if_false="branch_false", week_day="Monday", ) # Run dummy_task_1 if branch executes on Monday
# ================ DAG definition ================ default_args = { 'owner': 'airflow', 'description': 'Hourly monitor of DAGs', 'start_date': datetime(year=2020, month=4, day=14), 'depend_on_past': False, 'email_on_failure': False, 'retries': 3, 'retry_delay': timedelta(minutes=5) } dag = DAG( dag_id=SELF_DAG_ID, default_args=default_args, schedule_interval='@hourly', catchup=False ) # ================ task definitions ================ start_task = DummyOperator(task_id='start', dag=dag) end_task = DummyOperator(task_id='end', dag=dag) check_statuses_task = DummyOperator(task_id='check_statuses', dag=dag) trigger_condition_table_dag_task = TriggerDagRunOperator( task_id='trigger_condition_table_dag', trigger_dag_id='condition_table_dag', dag=dag ) condition_table_sensor_task = PythonOperator(
db_conn = DatabaseEngine() total_checks = [] count_checks = [] geo_checks = [] check_name = {} # needed to put quotes on elements in geotypes for SQL_CHECK_GEO def quote(instr): return f"'{instr}'" with DAG( dag_id, description="locaties veilige-afstandobjecten zoals Vuurwerkopslag, Wachtplaats," "Bunkerschip, Sluis, Munitieopslag, Gasdrukregel -en meetstation", default_args=default_args, user_defined_filters=dict(quote=quote), template_searchpath=["/"], ) as dag: # 1. Post info message on slack slack_at_start = MessageOperator( task_id="slack_at_start", http_conn_id="slack", webhook_token=slack_webhook_token, message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})", username="******", ) # 2. Create temp directory to store files mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")
'-s', "{{ task_instance.xcom_pull('parse_request', key='s3location') }}", '-d', 's3://demo-wcd/', '-c', 'job', '-m', 'append', '--input-options', 'header=true' ] } }] dag = DAG('emr_job_flow_manual_steps_dag', default_args=DEFAULT_ARGS, dagrun_timeout=timedelta(hours=2), schedule_interval='0 3 * * *') parse_request = PythonOperator(task_id='parse_request', provide_context=True, python_callable=retrieve_s3_file, dag=dag) step_adder = EmrAddStepsOperator(task_id='add_steps', job_flow_id=CLUSTER_ID, aws_conn_id='aws_default', steps=SPARK_TEST_STEPS, dag=dag) step_checker = EmrStepSensor( task_id='watch_step',
from airflow import DAG from airflow.operators.python_operator import PythonOperator from datetime import datetime def print_hello(): return 'Hello world!' dag = DAG('hello_world', description='Simple tutorial DAG', start_date=datetime(2020, 1, 1), schedule_interval=None) hello_operator = PythonOperator(task_id='hello_task', python_callable=print_hello, dag=dag)
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = DAG(TEST_DAG_ID, default_args=args) self.dag = dag
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = DAG(TEST_DAG_ID, default_args=args) self.dag = dag rows = [ (1880, "John", 0.081541, "boy"), (1880, "William", 0.080511, "boy"), (1880, "James", 0.050057, "boy"), (1880, "Charles", 0.045167, "boy"), (1880, "George", 0.043292, "boy"), (1880, "Frank", 0.02738, "boy"), (1880, "Joseph", 0.022229, "boy"), (1880, "Thomas", 0.021401, "boy"), (1880, "Henry", 0.020641, "boy"), (1880, "Robert", 0.020404, "boy"), (1880, "Edward", 0.019965, "boy"), (1880, "Harry", 0.018175, "boy"), (1880, "Walter", 0.014822, "boy"), (1880, "Arthur", 0.013504, "boy"), (1880, "Fred", 0.013251, "boy"), (1880, "Albert", 0.012609, "boy"), (1880, "Samuel", 0.008648, "boy"), (1880, "David", 0.007339, "boy"), (1880, "Louis", 0.006993, "boy"), (1880, "Joe", 0.006174, "boy"), (1880, "Charlie", 0.006165, "boy"), (1880, "Clarence", 0.006165, "boy"), (1880, "Richard", 0.006148, "boy"), (1880, "Andrew", 0.005439, "boy"), (1880, "Daniel", 0.00543, "boy"), (1880, "Ernest", 0.005194, "boy"), (1880, "Will", 0.004966, "boy"), (1880, "Jesse", 0.004805, "boy"), (1880, "Oscar", 0.004594, "boy"), (1880, "Lewis", 0.004366, "boy"), (1880, "Peter", 0.004189, "boy"), (1880, "Benjamin", 0.004138, "boy"), (1880, "Frederick", 0.004079, "boy"), (1880, "Willie", 0.00402, "boy"), (1880, "Alfred", 0.003961, "boy"), (1880, "Sam", 0.00386, "boy"), (1880, "Roy", 0.003716, "boy"), (1880, "Herbert", 0.003581, "boy"), (1880, "Jacob", 0.003412, "boy"), (1880, "Tom", 0.00337, "boy"), (1880, "Elmer", 0.00315, "boy"), (1880, "Carl", 0.003142, "boy"), (1880, "Lee", 0.003049, "boy"), (1880, "Howard", 0.003015, "boy"), (1880, "Martin", 0.003015, "boy"), (1880, "Michael", 0.00299, "boy"), (1880, "Bert", 0.002939, "boy"), (1880, "Herman", 0.002931, "boy"), (1880, "Jim", 0.002914, "boy"), (1880, "Francis", 0.002905, "boy"), (1880, "Harvey", 0.002905, "boy"), (1880, "Earl", 0.002829, "boy"), (1880, "Eugene", 0.00277, "boy"), ] self.env_vars = { 'AIRFLOW_CTX_DAG_ID': 'test_dag_id', 'AIRFLOW_CTX_TASK_ID': 'test_task_id', 'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00', 'AIRFLOW_CTX_DAG_RUN_ID': '55', 'AIRFLOW_CTX_DAG_OWNER': 'airflow', 'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**', } from airflow.hooks.mysql_hook import MySqlHook with MySqlHook().get_conn() as cur: cur.execute(''' CREATE TABLE IF NOT EXISTS baby_names ( org_year integer(4), baby_name VARCHAR(25), rate FLOAT(7,6), sex VARCHAR(4) ) ''') for row in rows: cur.execute("INSERT INTO baby_names VALUES(%s, %s, %s, %s);", row)
ret[r['name']] = m[1] print(ret) return ret def print_context(ds, **kwargs): logging.info("kwargs: %s" % kwargs) logging.info("ds: %s" % ds) ti = kwargs['ti'] return 'Whatever you return gets printed in the logs ' + str( ti.xcom_pull(key=None, task_ids=['get_data'])) dag = DAG('mymean', description='Simple Mean of the temperature from last year', default_args={'owner': 'iiot-book'}, schedule_interval='* * * * 0', start_date=datetime.datetime(2018, 6, 21), catchup=False) kairos_operator = KairosDBOperator(task_id='get_data', query={ "metrics": [{ "tags": {}, "name": "device0.my.measure.temperature", "aggregators": [{ "name": "scale", "factor": "1.0" }] }], "plugins": [],
from airflow import DAG import datetime as dt from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator from datetime import timedelta default_args = { 'owner': 'dl-predictor', 'depends_on_past': False, 'start_date': dt.datetime(2020, 9, 21), 'retries': 0 # , # 'retry_delay': timedelta(minutes=5), } dag = DAG( 'dl_pre_processing', default_args=default_args, schedule_interval=None, ) def sparkOperator( file, task_id, **kwargs ): return SparkSubmitOperator( application='/home/airflow/airflow/predictor_dl_model/pipeline/{}'.format(file), application_args=['/home/airflow/airflow/predictor_dl_model/config.yml'], conn_id='spark_default', conf={'spark.driver.maxResultSize': '8g'}, driver_memory='32G',
'email': ['*****@*****.**', '*****@*****.**' ], # Email que irá receber informações sobre a DAG 'email_on_failure': False, # Se deseja ser notificado a cada falha que ocorrer na DAG 'email_on_retry': False, # Se der alguma falha, tentar fazer nova notificação 'retries': 1, # Em caso de falha quantas tentativas serão notificadas 'retry_delay': timedelta( minutes=1 ) # Em caso de falha qual o tempo de tentativa entre uma notificação e outra } # Definindo - DAG (fluxo) dag = DAG( "treino-05-docker", # Nome da DAG description="Paralelismos", # Informação sobre a DAG default_args=default_args, # Argumentos definidos na lista acima schedule_interval=timedelta(minutes=5) # Intervalo de cada execução ) # BashOperator - para marcar o início da execução start_processing = BashOperator(task_id='start_processing', bash_command='echo "Start processing!" ', dag=dag) # Task para efetuar o download dos dados get_data = BashOperator( task_id='get-data', bash_command= 'curl http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip -o /usr/local/airflow/data/microdados_enade_2019.zip', dag=dag)
my_templated_field, *args, **kwargs): """ :param my_templated_field: The value of the templated field. :type my_templated_field: string """ super(MyCustomOperator, self).__init__(*args, **kwargs) self.my_templated_field = my_templated_field # This will be replaced at execution time as this field is declared in the class property 'template_fields'. def execute(self, context): # List objects. self.log.info('MyCustomOperator executed with value for the templated field : %s', self.my_templated_field) # Do stuff... with DAG(dag_id='09_custom_operators', schedule_interval='*/10 * * * *', catchup=False, default_args=default_args) as dag: using_static_value = MyCustomOperator( task_id='using_static_value', my_templated_field='Plouf' ) using_templated_value = MyCustomOperator( task_id='using_templated_value', my_templated_field='Dag-run-id=[{{ run_id }}]' )
default_args = { 'owner': 'airflow', 'wait_for_downstream': False, 'depends_on_past': False, 'start_date': datetime(2019, 1, 1), 'end_date': datetime(2019, 12, 31), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'sla': timedelta(minutes=30), 'retry_delay': timedelta(minutes=5) } with DAG('Postgres2Postgres', default_args=default_args, schedule_interval='@daily') as dag: def transfer_daily_data(ds, **kwargs): src_engine = PostgresHook( postgres_conn_id='postgres_local').get_sqlalchemy_engine() tgt_engine = PostgresHook( postgres_conn_id='postgres_local').get_sqlalchemy_engine() execution_date = kwargs['execution_date'].strftime('%Y-%m-%d') next_date = (kwargs['execution_date'] + timedelta(days=1)).strftime('%Y-%m-%d') execution_date_u = kwargs['execution_date'].strftime('%Y%m%d') logging.info(kwargs)
default_args = { 'owner': 'chenxianxin', 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), 'execution_timeout': timedelta(hours=12), 'depends_on_past': True, 'start_date': utils.dates.days_ago(1) } dag = DAG( 'temp12', default_args = default_args, description = u'信息流业务统计', schedule_interval = "@daily" ) dag.doc_md = __doc__ env = {'HIVE_CONF_DIR':'daohang','HADOOP_CONF_DIR':'daohang'} lens = [140,5,5,2,2] temps = [] for n in range(lens[0]): temp = 'temp_%s' % n temp = BashOperator( dag = dag, env = env, task_id = temp,
import os from ETL.SFTPToS3Operator import SFTPToS3Operator from airflow import DAG from airflow.utils.dates import days_ago from airflow.operators.bash_operator import BashOperator from airflow.operators.dummy_operator import DummyOperator ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY_ID") SECRET_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") REGION_NAME = os.environ.get("AWS_DEFAULT_REGION") BUCKET_NAME = os.environ.get("AWS_BUCKET_NAME") dag = DAG(dag_id="wineDataDag", start_date=days_ago(1), schedule_interval="@daily") start_dag = DummyOperator(task_id="StartDag", dag=dag) download_file_1 = SFTPToS3Operator(task_id="download_file_1", aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, region_name=REGION_NAME, bucket_name=BUCKET_NAME, pkey="*private key file path*", ip="3.9.23.108", username="******", file_to_download="winemag-data-130k-v2.csv", save_as="wine_csv_1.csv", dag=dag) download_file_2 = SFTPToS3Operator(
return None def source_to_use(**kwargs): ti = kwargs['ti'] source = ti.xcom_pull(task_ids='hook_task') print("source fetch from XCOM: {}".format(source)) return source def check_for_activated_source(**kwargs): ti = kwargs['ti'] return ti.xcom_pull(task_ids='xcom_task').lower() with DAG('branch_dag', default_args=default_args, schedule_interval='@once') as dag: start_task = DummyOperator(task_id='start_task') hook_task = PythonOperator(task_id='hook_task', python_callable=get_activated_sources) xcom_task = PythonOperator(task_id='xcom_task', python_callable=source_to_use, provide_context=True) branch_task = BranchPythonOperator( task_id='branch_task', python_callable=check_for_activated_source, provide_context=True) mysql_task = BashOperator(task_id='mysql', bash_command='echo "MYSQL is activated"') postgresql_task = BashOperator( task_id='postgresql', bash_command='echo "PostgreSQL is activated"') s3_task = BashOperator(task_id='s3', bash_command='echo "S3 is activated"')
# to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import timedelta from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.utils.dates import days_ago with DAG( dag_id="test_with_non_default_owner", schedule_interval="0 0 * * *", start_date=days_ago(2), dagrun_timeout=timedelta(minutes=60), tags=["example"], ) as dag: run_this_last = DummyOperator( task_id="test_task", owner="John", )
# The adi_dimensional_by_date bq table is loaded in mango_log_processing.py DEFAULT_ARGS = { 'owner': '*****@*****.**', 'depends_on_past': False, 'start_date': datetime(2018, 1, 1), 'email': ['*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=15), } blp_dag = DAG( 'adi_by_region', default_args=DEFAULT_ARGS, dagrun_timeout=timedelta(hours=1), # Run on the first of every month, after 1AM PDT. Cron is in UTC schedule_interval='0 9 1 * *') gcp_conn_id = "google_cloud_derived_datasets" connection = GoogleCloudBaseHook(gcp_conn_id=gcp_conn_id) aws_conn_id = 'aws_data_iam_blpadi' location = 'us-central1-a' cluster_name = 'bq-load-gke-1' # Calculate run month and year. Execution date is the previous period (month) run_month = '{{ (execution_date.replace(day=1)).strftime("%m") }}' run_year = '{{ (execution_date.replace(day=1)).strftime("%Y") }}'
def setUp(self): configuration.load_test_config() args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = DAG('test_dag_id', default_args=args) self.dag = dag self.hql = """
from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from operators import (RenderToS3Operator) default_args = { 'owner': 'avivysya', 'start_date': datetime(2019, 1, 12), 'depends_on_past': False, 'retries': 8, 'retry_delay': timedelta(minutes=5), 'email_on_retry': False } dag = DAG('file_upload_dag', default_args=default_args, description='Upload data files to s3', schedule_interval='0 * * * *', catchup=False) content_list = os.listdir('./data/processed-data/') dir_list = filter( lambda x: os.path.isdir(os.path.join('./data/processed-data/', x)), content_list) start_upload = DummyOperator(task_id='Upload_To_S3_Start', dag=dag) end_upload = DummyOperator(task_id='Upload_To_S3_Finalized', dag=dag) render_to_s3 = RenderToS3Operator( task_id='Render_To_S3', dag=dag, local_output=False,
from datetime import datetime from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from datetime import date import json from main.factory import pipeline_factory, app_task_factory import os DIR_PATH = os.path.dirname(os.path.realpath(__file__)) print (DIR_PATH) PARENT_PATH=os.path.abspath('..') today = date.today().strftime("%Y%m%d") dag = DAG('humidity_uber_rides', description='Humidity Range & Uber Rides - Pipeline', schedule_interval='0 10 * * *', start_date=datetime.now(), catchup=False) with open('pipeline/resources/config.json', 'r') as f: config = json.load(f) start_operator = DummyOperator(task_id='start_task', dag=dag) join_operator = DummyOperator(task_id='join_pipeline_task', dag=dag) end_task = DummyOperator(task_id='end_task', dag=dag) upber_data_pipeline = pipeline_factory(dag, config, today, start_operator, 'uber_data') upber_data_pipeline.set_downstream(join_operator) weather_data_pipeline = pipeline_factory(dag, config, today, start_operator, 'weather_data') weather_data_pipeline.set_downstream(join_operator) app_task = app_task_factory(config, today, dag, join_operator) app_task.set_downstream(end_task)
default_args = { 'owner': 'airflow', 'depends_on_past': False, # 'start_date': airflow.utils.dates.days_ago(2), # You can also set the start date as follows 'start_date': dt.datetime(year=2018, month=10, day=22), 'retries': 1, 'retry_delay': dt.timedelta(minutes=5), } dag = DAG( dag_id='airflow_tutorial_v01', default_args=default_args, schedule_interval='0 * * * *', # You can also specify schedule interval as follows # schedule_interval=dt.timedelta(hours=1) ) print_hello = BashOperator(task_id='print_hello', bash_command='echo "hello"', dag=dag) sleep = BashOperator(task_id='sleep', bash_command='sleep 5', dag=dag) print_world = PythonOperator(task_id='print_world', python_callable=print_world, dag=dag) # one way of setting dependencies
from airflow import DAG from airflow.utils.dates import days_ago from airflow.operators import PythonOperator args = { 'owner': 'airflow', 'start_date': days_ago(2), } dag = DAG('example_xcom', schedule_interval="@once", default_args=args, tags=['example']) value_1 = [1, 2, 3] value_2 = {'a': 'b'} def push(**kwargs): """Pushes an XCom without a specific target""" kwargs['ti'].xcom_push(key='value from pusher 1', value=value_1) def push_by_returning(**kwargs): """Pushes an XCom without a specific target, just by returning it""" return value_2 def puller(**kwargs): """Pull all previously pushed XComs and check if the pushed values match the pulled values.""" ti = kwargs['ti'] pulled_value_1 = ti.xcom_pull(key='value from pusher 1', task_ids='push') pulled_value_2 = ti.xcom_pull(task_ids='push_by_returning') print(pulled_value_1, pulled_value_2)
return DingdingOperator( task_id='dingding_success_callback', dingding_conn_id='dingding_default', message_type='text', message=message, at_all=True, ).execute(context) args['on_failure_callback'] = failure_callback # [END howto_operator_dingding_failure_callback] with DAG( dag_id='example_dingding_operator', default_args=args, schedule_interval='@once', dagrun_timeout=timedelta(minutes=60), tags=['example'], ) as dag: # [START howto_operator_dingding] text_msg_remind_none = DingdingOperator( task_id='text_msg_remind_none', dingding_conn_id='dingding_default', message_type='text', message='Airflow dingding text message remind none', at_mobiles=None, at_all=False) # [END howto_operator_dingding] text_msg_remind_specific = DingdingOperator(
"jdoe_67890", 'depends_on_past': False, 'start_date': datetime.combine(datetime.today() - timedelta(1), datetime.min.time()), 'email': "None", 'email_on_failure': False, 'email_on_retry': False, } dag = DAG(dag_id="jb-12348_prep", description="No description provided", catchup=True, max_active_runs=1, schedule_interval=None, default_args=default_args) load_collection_0 = PythonOperator( task_id='load_collection_0', dag=dag, python_callable=EODataProcessor, op_kwargs={ 'filepaths': [ './wekeo_data_storage/S5P_RPRO_L2__NO2____20180503T093059_20180503T111427_02866_01_010202_20190202T034117.nc', './wekeo_data_storage/S5P_RPRO_L2__NO2____20180502T094957_20180502T113325_02852_01_010202_20190201T215849.nc', './wekeo_data_storage/S5P_RPRO_L2__NO2____20180501T082724_20180501T101003_02837_01_010202_20190201T175639.nc' ], 'dc_filepaths': None,