_steps = { 'wofs': { 'algorithm': "wofs-wf", 'version': '1.0', 'queue': queue_utils.assign_queue(), }, } args = { 'owner': 'mp.mancipe10', 'start_date': airflow.utils.dates.days_ago(2), 'execID': "mp.mancipe10_wofs_paso_3_clasificacion_varios_anhos", 'product': _params['products'][0] } dag = DAG(dag_id=args['execID'], default_args=args, schedule_interval=None, dagrun_timeout=timedelta(minutes=120)) wofs = dag_utils.queryMapByTileByYear(lat=_params['lat'], lon=_params['lon'], time_ranges=_params['time_ranges'], product=_params['products'][0], algorithm=_steps['wofs']['algorithm'], version=_steps['wofs']['version'], queue=_steps['wofs']['queue'], dag=dag, task_id="wofs") wofs
from airflow.models import DAG from airflow.operators.bash_operator import BashOperator from datetime import datetime default_args = { "start_date": datetime(2020, 4, 15), } cleandata_dag = DAG("cleandata", default_args=default_args, schedule_interval="@daily") # Modify the templated command to handle a # second argument called filename. templated_command = """ bash cleandata.sh {{ ds_nodash }} {{ params.filename }} """ # Modify clean_task to pass the new argument clean_task = BashOperator(task_id="cleandata_task", bash_command=templated_command, params={ "filename": "salesdata.txt", }, dag=cleandata_dag) # Create a new BashOperator clean_task2 clean_task2 = BashOperator(task_id="cleandata_task2", bash_command=templated_command, params={ "filename": "supportdata.txt",
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ This is an example dag for using the Kubernetes Executor. """ import os import airflow from airflow.models import DAG from airflow.operators.python_operator import PythonOperator args = {'owner': 'Airflow', 'start_date': airflow.utils.dates.days_ago(2)} dag = DAG(dag_id='example_kubernetes_executor', default_args=args, schedule_interval=None) affinity = { 'podAntiAffinity': { 'requiredDuringSchedulingIgnoredDuringExecution': [{ 'topologyKey': 'kubernetes.io/hostname', 'labelSelector': { 'matchExpressions': [{ 'key': 'app', 'operator': 'In', 'values': ['airflow'] }] } }] }
import airflow from airflow.operators.bash_operator import BashOperator from airflow.models import DAG from datetime import timedelta args = {"owner": "Aldo", "start_date": airflow.utils.dates.days_ago(7)} dag = DAG( dag_id="example_bash_lunch_time", default_args=args, schedule_interval="30 12 * * *", dagrun_timeout=timedelta(minutes=60), ) echo = 'echo "It\'s lunch time!"' echo_task = BashOperator(task_id="lunch_time", bash_command=echo, dag=dag) if __name__ == "__main__": dag.cli()
from airflow.operators.email_operator import EmailOperator from airflow.operators.mysql_operator import MySqlOperator email = Variable.get('email', deserialize_json=True) local_tz = pendulum.timezone("America/Mexico_City") cfg = '/usr/local/airflow/dags/templates/sql_scritps' default_args = { 'owner': 'sergio', 'start_date': datetime(2021, 3, 1, tzinfo=local_tz) } with DAG('11-load-dimensions.py', default_args=default_args, schedule_interval=None, template_searchpath=cfg, catchup=False, is_paused_upon_creation=False) as dag: start = DummyOperator(task_id='start') truncate_tables = MySqlOperator(task_id='truncate_tables', mysql_conn_id='cool_car', sql='00-truncate_tables.sql', autocommit=True, dag=dag) load_branch = MySqlOperator(task_id='load_branch_office', mysql_conn_id='cool_car', sql='01-dim_branch_office.sql', autocommit=True,
# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import datetime from airflow.models import DAG from airflow.operators.bash import BashOperator DEFAULT_DATE = datetime(2019, 12, 1) dag = DAG(dag_id='test_dag_under_subdir2', start_date=DEFAULT_DATE, schedule_interval=None) task = BashOperator(task_id='task1', bash_command='echo "test dag under sub directory subdir2"', dag=dag)
import time import logging from airflow.models import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago logger = logging.getLogger(__name__) args = { 'start_date': days_ago(1), 'owner': 'airflow', } dag = DAG(dag_id='common_target', default_args=args, schedule_interval=None) def run_this_func(dag_run, **kwargs): timeout = dag_run.conf['timeout'] logger.info("Chunk received: {}".format(timeout)) time.sleep(timeout) chunk_handler = PythonOperator(task_id='chunk_handler', provide_context=True, python_callable=run_this_func, dag=dag)
session = settings.Session() default_args = { 'owner': DAG_OWNER_NAME, 'depends_on_past': False, 'email': ALERT_EMAIL_ADDRESSES, 'email_on_failure': True, 'email_on_retry': False, 'start_date': START_DATE, 'retries': 1, 'retry_delay': timedelta(minutes=1) } dag = DAG(DAG_ID, default_args=default_args, schedule_interval=SCHEDULE_INTERVAL, start_date=START_DATE) if hasattr(dag, 'doc_md'): dag.doc_md = __doc__ if hasattr(dag, 'catchup'): dag.catchup = False def print_configuration_function(**context): logging.info("Loading Configurations...") dag_run_conf = context.get("dag_run").conf logging.info("dag_run.conf: " + str(dag_run_conf)) max_db_entry_age_in_days = None if dag_run_conf: max_db_entry_age_in_days = dag_run_conf.get("maxDBEntryAgeInDays", None)
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 3, 'retry_delay': timedelta(hours=1), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } # The execution date as YYYY-MM-DD date = "{{ ds }}" dag = DAG('etl_daily', start_date=datetime(2016, 8, 4), schedule_interval="0 9 * * MON-FRI", default_args=default_args) t1 = PythonOperator(task_id='daily_futures_price_ingest', python_callable=DailyFuturesPriceIngest.launch, dag=dag, provide_context=True) t2 = PythonOperator(task_id='daily_generic_futures_price_ingest', python_callable=DailyGenericFuturesPriceIngest.launch, dag=dag, provide_context=True) t3 = PythonOperator(task_id='daily_equity_index_price_ingest', python_callable=DailyEquityIndexPriceIngest.launch, dag=dag,
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os from airflow.models import DAG from airflow.providers.microsoft.azure.operators.wasb_delete_blob import WasbDeleteBlobOperator from airflow.providers.microsoft.azure.transfers.local_to_wasb import LocalFilesystemToWasbOperator from airflow.utils.dates import days_ago PATH_TO_UPLOAD_FILE = os.environ.get('AZURE_PATH_TO_UPLOAD_FILE', 'example-text.txt') with DAG("example_local_to_wasb", schedule_interval="@once", start_date=days_ago(2)) as dag: upload = LocalFilesystemToWasbOperator(task_id="upload_file", file_path=PATH_TO_UPLOAD_FILE, container_name="mycontainer", blob_name='myblob') delete = WasbDeleteBlobOperator(task_id="delete_file", container_name="mycontainer", blob_name="myblob") upload >> delete
# SEARCH_PATH = f'{AIRFLOW_HOME}/scripts/sql/' # development # RESOURCE_PATH = f'{AIRFLOW_HOME}/resources/' # development SEARCH_PATH = f'{AIRFLOW_HOME}/dags/efs/uw211dashboard/scripts/sql/' # production RESOURCE_PATH = f'{AIRFLOW_HOME}/dags/efs/uw211dashboard/resources/' # production args = { 'owner': '211dashboard', 'start_date': datetime(2020, 6, 1), 'concurrency': 1, 'retries': 0, 'depends_on_past': False, 'catchup': False } dag = DAG(dag_id='211dash_manual_update', schedule_interval='@once', template_searchpath=SEARCH_PATH, default_args=args) ''' Define manual update operators. ''' ''' 1. Census data operators ''' truncate_core_census_tables = PostgresOperator( task_id='truncate_core_census_tables', sql='trnctTbls_census.sql', dag=dag) transform_census_county_files = PythonOperator( task_id='transform_census_county_files', python_callable=transform_static_s3, op_kwargs={ 'data': 'census_county', 'filename': 'census_data_by_county.csv', 'resource_path': RESOURCE_PATH, 'transformer': transform_census_data,
# to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """Example of the LatestOnlyOperator""" import datetime as dt from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.latest_only_operator import LatestOnlyOperator from airflow.utils.dates import days_ago dag = DAG(dag_id='latest_only', schedule_interval=dt.timedelta(hours=4), start_date=days_ago(2), tags=['example']) latest_only = LatestOnlyOperator(task_id='latest_only', dag=dag) task1 = DummyOperator(task_id='task1', dag=dag) latest_only >> task1
# init logger LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.INFO) default_args = { "owner": "Airflow", "start_date": airflow.utils.dates.days_ago(1), "depends_on_past": False, "email_on_failure": False, "email_on_retry": False, "email": "*****@*****.**", "retries": 1, "retry_delay": timedelta(minutes=1) } with DAG(dag_id="ddt-spark-k8s-operator", schedule_interval="@hourly", default_args=default_args, catchup=False) as dag: t1 = SparkKubernetesOperator( task_id='stage_1_submit', namespace="ddt-compute", application_file="SparkApplication_stage_1.yaml", kubernetes_conn_id="kubernetes_default", do_xcom_push=True ) t2 = SparkKubernetesSensor( task_id='stage_1_monitor', namespace="ddt-compute", application_name="{{ task_instance.xcom_pull(task_ids='stage_1_submit')['metadata']['name'] }}", kubernetes_conn_id="kubernetes_default", ) t1 >> t2
python_callable=print_context) sd1_op_7 = PythonOperator(task_id='sd1_op_7', provide_context=True, python_callable=print_context) sd1_op_1 >> [sd1_op_2, sd1_op_3] sd1_op_6 >> sd1_op_2 sd1_op_2 >> [sd1_op_4, sd1_op_5] sd1_op_5 >> sd1_op_7 sd1_op_3 >> sd1_op_7 return dag with DAG('ShortCircuitPlayground', 'A playground DAG', default_args=default_args) as dag: op_1 = PythonOperator(task_id='op_1', provide_context=True, python_callable=print_context) op_2 = PythonOperator(task_id='op_2', provide_context=True, python_callable=print_context) op_3 = PythonOperator(task_id='op_3', provide_context=True, python_callable=print_context) op_4 = PythonOperator(task_id='op_4', provide_context=True, python_callable=print_context) op_5 = PythonOperator(task_id='op_5', provide_context=True,
try: from airflow.operators.empty import EmptyOperator except ModuleNotFoundError: from airflow.operators.dummy import DummyOperator as EmptyOperator # type: ignore from airflow.providers.dbt.cloud.operators.dbt import ( DbtCloudGetJobRunArtifactOperator, DbtCloudRunJobOperator, ) from airflow.providers.dbt.cloud.sensors.dbt import DbtCloudJobRunSensor from airflow.utils.edgemodifier import Label with DAG( dag_id="example_dbt_cloud", default_args={"dbt_cloud_conn_id": "dbt", "account_id": 39151}, start_date=datetime(2021, 1, 1), schedule_interval=None, catchup=False, ) as dag: begin = EmptyOperator(task_id="begin") end = EmptyOperator(task_id="end") # [START howto_operator_dbt_cloud_run_job] trigger_job_run1 = DbtCloudRunJobOperator( task_id="trigger_job_run1", job_id=48617, check_interval=10, timeout=300, ) # [END howto_operator_dbt_cloud_run_job]
import datetime from airflow.models import DAG from airflow.operators.bash_operator import BashOperator dag = DAG( dag_id="chapter12_task_sla", default_args={"email": "*****@*****.**"}, schedule_interval=datetime.timedelta(hours=12), start_date=datetime.datetime(2020, 4, 1), ) sleeptask = BashOperator( task_id="sleeptask", bash_command="sleep 5", sla=datetime.timedelta(seconds=1), dag=dag, )
from airflow import utils from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator dag = DAG( dag_id='pattern_parallel_split', default_args={ 'start_date': utils.dates.days_ago(1), }, schedule_interval=None, ) with dag: read_input = DummyOperator(task_id='read_input') aggregate_data = DummyOperator(task_id='generate_data') convert_to_parquet = DummyOperator(task_id='convert_to_parquet') convert_to_avro = DummyOperator(task_id='convert_to_avro') read_input >> aggregate_data >> [convert_to_parquet, convert_to_avro]
from datetime import datetime, timedelta from airflow.models import DAG from airflow.operators.python_operator import PythonOperator import logging DAG = DAG( dag_id = 'simple_xcom', start_date = datetime(2017, 10, 26), schedule_interval = None, ) def push_function(**context): msg = 'the_message' logging.info("message to push: '%s'" % msg) print("message to push: '%s'" % msg) task_instance = context['task_instance'] task_instance.xcom_push(key = 'the_message', value = msg) push_task = PythonOperator( task_id = 'push_task', python_callable = push_function, provide_context = True, dag = DAG, ) def pull_function(**kwargs): ti = kwargs['ti'] msg = ti.xcom_pull(task_ids = 'push_task', key = 'the_message')
from airflow.operators.bash_operator import BashOperator from airflow.operators.dummy_operator import DummyOperator from airflow.operators.python_operator import PythonOperator from airflow.contrib.operators.postgres_to_gcs_operator import PostgresToGoogleCloudStorageOperator from airflow.hooks.base_hook import BaseHook args = { 'owner': 'Airflow', #'start_date': airflow.utils.dates.days_ago(2), 'start_date': datetime.datetime(2019, 11, 1), } dag = DAG( dag_id='exercise_hooks', default_args=args, schedule_interval="0 0 * * *", dagrun_timeout=datetime.timedelta(minutes=60), ) get_data = PostgresToGoogleCloudStorageOperator( postgres_conn_id="test_connection", bucket='test_bucket312312', filename="land_registry_price_paid_uk/{{ ds_nodash }}/test_{}.csv", sql= "SELECT * FROM land_registry_price_paid_uk WHERE transfer_date = '{{ ds }}'", task_id='get_data', dag=dag, ) get_data
def delayed_fail(): """ Delayed failure to make sure that processes are running before the error is raised. TODO handle more directly (without sleeping) """ time.sleep(5) raise ValueError('Expected failure.') # DAG tests backfill with pooled tasks # Previously backfill would queue the task but never run it dag1 = DAG(dag_id='test_backfill_pooled_task_dag', default_args=default_args) dag1_task1 = DummyOperator( task_id='test_backfill_pooled_task', dag=dag1, pool='test_backfill_pooled_task_pool', ) # DAG tests depends_on_past dependencies dag2 = DAG(dag_id='test_depends_on_past', default_args=default_args) dag2_task1 = DummyOperator( task_id='test_dop_task', dag=dag2, depends_on_past=True, ) # DAG tests that a Dag run that doesn't complete is marked failed
def _print_exec_date(execution_date, **context): print(execution_date) def _get_weekday(execution_date, **context): return execution_date.strftime("%a") args = { 'owner': 'Airflow', 'start_date': airflow.utils.dates.days_ago(2), } with DAG(dag_id='dag4_postgres_hook', default_args=args, schedule_interval=None, dagrun_timeout=timedelta(minutes=60)) as dag: print_data = PythonOperator( task_id='print_data', python_callable=_get_data, ) # filename='gdd_data{}_{rundate}.csv'.format(rundate=rundate) print(filename) copy_data_to_gcs = PostgresToGoogleCloudStorageOperator( task_id='copy_data_to_gcs', sql=sql, bucket='gdd_bucket', filename='gdd_data{}_/{{ ds }}.json', postgres_conn_id='postgres_cursus_db',
def _get_task(self, **kwargs): return BaseOperator(task_id='test_task', dag=DAG('test_dag'), **kwargs)
import os from libs import print_stuff from airflow.models import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago default_args = { 'owner': 'airflow', 'start_date': days_ago(2) } with DAG( dag_id='example_kubernetes_executor_config', default_args=default_args, schedule_interval=None, tags=['example'], ) as dag: def test_volume_mount(): """ Tests whether the volume has been mounted. """ with open('/foo/volume_mount_test.txt', 'w') as foo: foo.write('Hello') return_code = os.system("cat /foo/volume_mount_test.txt") if return_code != 0: raise ValueError(f"Error when checking volume mount. Return code {return_code}") # You can use annotations on your kubernetes pods!
except: print('No report for {}'.format(s['name'])) pass publish_mattermost = MattermostOperator( task_id="publish_result", mattermost_endpoint=MATTERMOST_ENDPOINT, text=message) publish_mattermost.execute(dict()) with DAG( dag_id=DAG_NAME, schedule_interval='0 5 * * *', start_date=days_ago(1), dagrun_timeout=timedelta(minutes=120), tags=['schemas', 'irve', 'consolidation', 'datagouv'], default_args=default_args, ) as dag: clean_previous_outputs = CleanFolderOperator( task_id="clean_previous_outputs", folder_path=TMP_FOLDER + DAG_FOLDER) tmp_folder = TMP_FOLDER + DAG_FOLDER + '{{ ds }}' + "/" shared_notebooks_params = { "msgs": "Ran from Airflow " + '{{ ds }}' + "!", "WORKING_DIR": AIRFLOW_DAG_HOME + DAG_FOLDER + 'notebooks/', "TMP_FOLDER": tmp_folder, "API_KEY": API_KEY, "API_URL": API_URL,
# See the License for the specific language governing permissions and # limitations under the License. import airflow import random from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.python_operator import BranchPythonOperator args = { 'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(2) } dag = DAG( dag_id='example_branch_operator', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) join = DummyOperator( task_id='join',
import datetime as dt import airflow from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.latest_only_operator import LatestOnlyOperator dag = DAG( dag_id='latest_only', schedule_interval=dt.timedelta(hours=4), start_date=airflow.utils.dates.days_ago(2), ) latest_only = LatestOnlyOperator(task_id='latest_only', dag=dag) task1 = DummyOperator(task_id='task1', dag=dag) latest_only >> task1
############################################################################### #自己APP的token token = 'yPz5v7f3XGdcsTCyme2hKXbu58fKDgEriFNPSo/NcMNoZPWVZEwYIOlQ2jqNQeXF080NRsgf/jzbYI/VjlJTl2H1Xc9ZXN7wBHLJH82E6uJsab+TuUAaT2G4TZtH5T+uWycR5QSotn6TQiy/ykra4wdB04t89/1O/w1cDnyilFU=' #自己的ID ID = 'U6c8f2685a2918d7afbd819b12c15a848' ########### dag所有參數,就放在這裡面 ########### args = { 'owner': 'cheating', #這個dag的擁有者 'start_date': airflow.utils.dates.days_ago(0) #開啟時,設定往前幾天開始執行 } ########### dag設定檔 ########### dag = DAG( dag_id='Stock', #dag的名稱 default_args=args, #把上方的參數放進去 schedule_interval='10 * * * * *') #多久執行一次 ########### 查看當前價格 ########### def look_price(stock='3624', bs='>', price=31): # 先到yahoo爬取該股票的資料 url = 'https://tw.stock.yahoo.com/q/q?s=' + stock list_req = requests.get(url) soup = BeautifulSoup(list_req.content, "html.parser") getstock = soup.find('b').text #開始進行價格判斷 if bs == '<': #判斷大小於 if float(getstock) < price: get = stock + '的價格:' + getstock line_bot_api = LineBotApi(token)
def setUp(self): self.dagbag = models.DagBag(dag_folder='/dev/null', include_examples=True) self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=self.args)
def init_dims_sub_dag(parent_dag_name, child_dag_name, start_date, redshift_conn_id): dag = DAG('%s.%s' % (parent_dag_name, child_dag_name), start_date=start_date) drop_dim_vehicles_task = PostgresOperator( task_id='drop_dim_vehicles', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_VEHICLES) drop_dim_vehicle_models_task = PostgresOperator( task_id='drop_dim_vehicle_models', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_VEHICLE_MODELS) drop_dim_rental_zones_task = PostgresOperator( task_id='drop_dim_rental_zones', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_RENTAL_ZONES) drop_dim_companies_task = PostgresOperator( task_id='drop_dim_companies', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_COMPANIES) drop_dim_categories_task = PostgresOperator( task_id='drop_dim_categroies', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_CATEGORIES) drop_dim_date_task = PostgresOperator( task_id='drop_dim_date', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_DATE) drop_dim_weather_task = PostgresOperator( task_id='drop_dim_weather', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.DROP_TABLE_DIM_WEATHER) create_dim_vehicles_task = PostgresOperator( task_id='create_dim_vehicles', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_VEHICLES) create_dim_vehicle_models_task = PostgresOperator( task_id='create_dim_vehicle_models', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_VEHICLE_MODELS) create_dim_rental_zones_task = PostgresOperator( task_id='create_dim_rental_zones', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_RENTAL_ZONES) create_dim_companies_task = PostgresOperator( task_id='create_dim_companies', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_COMPANIES) create_dim_categories_task = PostgresOperator( task_id='create_dim_categories', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_CATEGORIES) create_dim_date_task = PostgresOperator( task_id='create_dim_date', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_DATE) create_dim_weather_task = PostgresOperator( task_id='create_dim_weather', dag=dag, postgres_conn_id=redshift_conn_id, sql=init_statements.CREATE_TABLE_DIM_WEATHER) drop_dim_vehicles_task >> create_dim_vehicles_task drop_dim_vehicle_models_task >> create_dim_vehicle_models_task drop_dim_rental_zones_task >> create_dim_rental_zones_task drop_dim_companies_task >> create_dim_companies_task drop_dim_categories_task >> create_dim_categories_task drop_dim_date_task >> create_dim_date_task drop_dim_weather_task >> create_dim_weather_task return dag
pm.execute_notebook(kwargs['notebook'], '/data/notebook-runs/GetTweets-output.ipynb', parameters={'dt': ds}, kernel_name='spylon-kernel', progress_bar=False, report_mode=True, start_timeout=60) dag = DAG('get_tweets', default_args={ 'owner': 'data-engineering', 'depends_on_past': False, 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) }, catchup=False, start_date=datetime(2020, 10, 25, 22, 0, 0), schedule_interval=None, max_active_runs=1) with dag: load_tweets = PythonOperator( task_id='load_tweets', provide_context=True, python_callable=execute_notebook, op_kwargs={'notebook': '/notebooks/GetTweets.ipynb'}, dag=dag, )