} with DAG('fix_s3_recording_url_pipeline', default_args=default_args, schedule_interval='*/10 * * * *', catchup=False) as dag: t1 = BashOperator( task_id='login_aws', bash_command= '$(aws ecr get-login --region eu-west-1 --no-include-email)') t2 = DockerOperator( task_id='fix_s3_recording_url_pipeline', auto_remove=True, image=IMAGE_NAME, api_version='auto', command=COMMAND, docker_url='unix://var/run/docker.sock', network_mode='host', environment={ 'DATABASE_HOST': DATABASE_HOST, 'ELASTICSEARCH_URL': ELASTICSEARCH_URL, 'DYNAMODB_HOST': DYNAMODB_HOST, }, volumes=[LOG_DIRECTORY, BOTO_CREDENTIAL], force_pull=True, ) t2.set_upstream(t1)
data = json.load(g) def mapping(dict, dag1): if not dict['ttl']: t1 = DockerOperator( task_id=dict['task_id'], image=dict['image'], command=eval(dict['command']), xcom_push=bool(dict['xcom_push']), dag=dag1) return [t1] else: list_to_edit = [] for task in dict['ttl']: list_to_edit.extend(mapping(task, dag)) t = DockerOperator( task_id=dict['task_id'], image=dict['image'], command=dict['command'], xcom_push=bool(dict['xcom_push']), dag=dag1) final = [t] final.extend(list_to_edit) for i in range(len(list_to_edit)): t.set_upstream(list_to_edit[i]) return [t] return1 = mapping(data, dag) print(return1)
fda_linker_task = SubDagOperator( dag=dag, subdag=fda_dap(parent_dag_name='fda', child_dag_name='linker', start_date=dag.start_date, schedule_interval=dag.schedule_interval), task_id='linker', ) remove_unknown_documentcloud_docs_task = DockerOperator( task_id='remove_unknown_documentcloud_docs', dag=dag, image='opentrials/processors:latest', force_pull=True, api_version='1.23', environment={ 'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'), 'DATABASE_URL': helpers.get_postgres_uri('api_db'), 'EXPLORERDB_URL': helpers.get_postgres_uri('explorer_db'), 'LOGGING_URL': Variable.get('LOGGING_URL'), 'DOCUMENTCLOUD_USERNAME': Variable.get('DOCUMENTCLOUD_USERNAME'), 'DOCUMENTCLOUD_PASSWORD': Variable.get('DOCUMENTCLOUD_PASSWORD'), 'DOCUMENTCLOUD_PROJECT': Variable.get('DOCUMENTCLOUD_PROJECT'), 'FERNET_KEY': os.environ['FERNET_KEY'], }, command='make start remove_unknown_documentcloud_docs') remove_unknown_documentcloud_docs_task.set_upstream(fda_linker_task) fda_linker_task.set_upstream(fda_dap_task)
**kwargs, ) load_black_scholes = DockerOperator( task_id='load_black_scholes', command='python finance/data/td_ameritrade/black_scholes/load.py', **kwargs, ) end_time = BashOperator( task_id='end_pipeline', bash_command='date', dag=dag, ) scrape_options.set_upstream(start_time) load_options.set_upstream(scrape_options) table_creator_options.set_upstream(load_options) scrape_quotes.set_upstream(scrape_options) load_quotes.set_upstream(scrape_quotes) table_creator_quotes.set_upstream(load_quotes) table_creator_stocks.set_upstream(table_creator_quotes) report_black_scholes.set_upstream(table_creator_options) report_black_scholes.set_upstream(table_creator_stocks) load_black_scholes.set_upstream(report_black_scholes) report_options.set_upstream(table_creator_options) report_options.set_upstream(table_creator_stocks)
'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'), 'LOGGING_URL': Variable.get('LOGGING_URL'), 'PYTHON_ENV': Variable.get('ENV'), }, command='make start fda_dap' ) processor_task = DockerOperator( task_id='fda_dap_processor', dag=dag, image='okibot/processors:latest', force_pull=True, environment={ 'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'), 'DATABASE_URL': helpers.get_postgres_uri('api_db'), 'EXPLORERDB_URL': helpers.get_postgres_uri('explorer_db'), 'LOGGING_URL': Variable.get('LOGGING_URL'), 'AWS_ACCESS_KEY_ID': Variable.get('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': Variable.get('AWS_SECRET_ACCESS_KEY'), 'AWS_S3_BUCKET': Variable.get('AWS_S3_BUCKET'), 'AWS_S3_REGION': Variable.get('AWS_S3_REGION'), 'AWS_S3_CUSTOM_DOMAIN': Variable.get('AWS_S3_CUSTOM_DOMAIN'), 'DOCUMENTCLOUD_USERNAME': Variable.get('DOCUMENTCLOUD_USERNAME'), 'DOCUMENTCLOUD_PASSWORD': Variable.get('DOCUMENTCLOUD_PASSWORD'), 'DOCUMENTCLOUD_PROJECT': Variable.get('DOCUMENTCLOUD_PROJECT'), }, command='make start fda_dap' ) processor_task.set_upstream(collector_task)
task_id='end_pipeline', bash_command='date', dag=dag) tasks = {} command_prefix = 'python finance/data/' command_suffix = '/sql.py' jobs = ['fred', 'internals', 'td_ameritrade', 'yahoo'] for job in jobs: tasks.update({job: command_prefix + job + command_suffix}) prior_task = '' for task in tasks: task_id = 'create_tables_' + task dock_task = DockerOperator( task_id=task_id, image='py-dw-stocks', auto_remove=True, command=tasks.get(task), volumes=['/media/nautilus/fun-times-in-python:/usr/src/app'], network_mode='bridge', dag=dag ) if prior_task: dock_task.set_upstream(prior_task) else: dock_task.set_upstream(start_time) prior_task = dock_task end_time.set_upstream(dock_task)
) scrape_equities = DockerOperator( task_id='scrape_td_equities', command='python finance/data/td_ameritrade/equities/scrape.py', **kwargs, ) load_equities = DockerOperator( task_id='load_td_equities', command='python finance/data/td_ameritrade/equities/load.py', **kwargs, ) table_creator_equities = DockerOperator( task_id='update_td_equities_table', command='python finance/data/td_ameritrade/equities/sql.py', **kwargs, ) end_time = BashOperator( task_id='end_pipeline', bash_command='date', dag=dag, ) scrape_equities.set_upstream(start_time) load_equities.set_upstream(scrape_equities) table_creator_equities.set_upstream(load_equities) end_time.set_upstream(table_creator_equities)
image='useful1', command='5 6', xcom_push=True, dag=dag) t2 = PythonOperator( task_id='print_task', python_callable=print11, provide_context=True, xcom_push=True, templates_dict={'a3': "{{ ti.xcom_pull(task_ids='divide1_conv_on')}}"}, dag=dag) t3 = DockerOperator(task_id='docker2', image='useful1', command="{{ ti.xcom_pull(task_ids='print_task')}}" + ' 10', xcom_push=True, dag=dag) t4 = PythonOperator( task_id='print_task2', python_callable=print11, provide_context=True, xcom_push=True, templates_dict={'a3': "{{ ti.xcom_pull(task_ids='docker2')}}"}, dag=dag) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3)
task_id='task_1', bash_command= 'echo "Starting executor Task 1 | Passed Conf : {{ dag_run.conf["json_executor_task"] }}"', dag=dag) executor = DockerOperator( task_id='executor', image='openjdk:8-jre-alpine', api_version='auto', auto_remove=True, volumes=[ '/usr/local/airflow/artifacts:/usr/local/airflow/artifacts', '/var/run/docker.sock:/var/run/docker.sock' ], docker_url="unix://var/run/docker.sock", network_mode="bridge", environment={ 'VPC_EXECUTOR_TASK': '{{ dag_run.conf["json_executor_task"] }}' }, command= 'java -cp /usr/local/airflow/artifacts/jar-with-dependencies.jar <class>', dag=dag) t2 = BashOperator( task_id='task_2', bash_command='echo "Finishing executor Task 2 | Execution Time : {{ ts }}"', dag=dag) executor.set_upstream(t1) executor.set_downstream(t2)
# 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG( dag_id='yahoo_stocks', default_args=args, schedule_interval=None, ) start_time = BashOperator(task_id='start_pipeline', bash_command='date', dag=dag) task = DockerOperator(task_id='scrape_yahoo_stocks', image='py-dw-stocks', auto_remove=True, command='python finance/data/yahoo/sql.py', volumes=[ '/media/nautilus/fun-times-in-python:/usr/src/app', '/media/nautilus/raw_files:/mnt' ], network_mode='bridge', dag=dag) end_time = BashOperator(task_id='end_pipeline', bash_command='date', dag=dag) task.set_upstream(start_time) end_time.set_upstream(task)
image='useful2', command='divide ' + str(ON_SALES) + ' ' + str(ON_CALLS), xcom_push=True, dag=dag) t2 = DockerOperator(task_id='divide1_conv_off', image='useful2', command='divide ' + str(OFF_SALES) + ' ' + str(OFF_CALLS), xcom_push=True, dag=dag) t3 = DockerOperator(task_id='numerator', image='useful2', command='subtract ' + "{{ti.xcom_pull(task_ids='divide1_conv_on')}}" + ' ' + "{{ti.xcom_pull(task_ids='divide1_conv_off')}}", xcom_push=True, dag=dag) t4 = DockerOperator(task_id='divide_results', image='useful2', command='divide ' + "{{ti.xcom_pull(task_ids='numerator')}}" + ' ' + "{{ti.xcom_pull(task_ids='divide1_conv_off')}}", xcom_push=True, dag=dag) t3.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3)
stask={'json_executor_task' : task} print(stask) json_task=json.dumps(stask) print(json_task) trigger_dag(dag_id="Executor", run_id=run_id, conf=json_task, execution_date=execution_date, replace_microseconds=False) planner = DockerOperator( task_id='planner', image='openjdk:8-jre-alpine', api_version='auto', auto_remove=False, volumes=['/usr/local/airflow/artifacts:/usr/local/airflow/artifacts', '/var/run/docker.sock:/var/run/docker.sock'], docker_url="unix://var/run/docker.sock", network_mode="bridge", command='java -cp /usr/local/airflow/artifacts/jar-with-dependencies.jar <class> {{ ts }}', xcom_push=True, xcom_all=True, dag=dag) end_task = PythonOperator( task_id='queue_executor_tasks', python_callable=schedule_executor, dag=dag) planner.set_upstream(start_task) end_task.set_upstream(planner)