dag = DAG( 'preprocessin-train-pipeline', default_args=default_args, description='A simple tutorial DAG', schedule_interval=timedelta(minutes=10), ) # [START howto_operator_bash] preprocessing = BashOperator( task_id='preprocessing', bash_command='cd /Users/nickkon/projects/2021.AI/grace-enterprise-demo && ' 'echo "\n Running preprocessing \n" && ' 'python airflow_pipelines/src/preprocessing.py', dag=dag, ) # [END howto_operator_bash] training = BashOperator( task_id='train', bash_command='cd /Users/nickkon/projects/2021.AI/grace-enterprise-demo && ' 'echo "\n Running training \n" && ' 'python airflow_pipelines/src/train.py', dag=dag, ) training >> preprocessing if __name__ == "__main__": dag.cli()
] SENTINEL_INIT_ETL_END = DummyOperator(task_id='sentinel_init_etl_end', dag=PROJECT_DAG) for etl_set in [MIMIC_ETL_FILES, CCS_ETL_FILES]: for elem in etl_set: mkdir_op = BashOperator( task_id=f'mkdir_mimic_{elem}', bash_command=f"sudo su hdfs -c ' hdfs dfs -mkdir -p /mimic/{elem}'", dag=PROJECT_DAG) mkdir_op.set_upstream(SENTINEL_START) put_op = BashOperator( task_id=f'put_mimic_{elem}', bash_command=f"sudo su hdfs -c ' hdfs dfs -mkdir -p /mimic/{elem}'", dag=PROJECT_DAG) put_op.set_upstream(mkdir_op) put_op.set_downstream(SENTINEL_INIT_ETL_END) # sudo su hdfs -c 'hdfs dfs -mkdir -p /model/noteevents_with_topics' # sudo su hdfs -c 'hdfs dfs -mkdir -p /model/admissions_ccs_ohe' # sudo su hdfs -c 'hdfs dfs -mkdir -p /model/admissions_topic_scores' # sudo su hdfs -c 'hdfs dfs -chown -R root /mimic' # sudo su hdfs -c 'hdfs dfs -chown -R root /ccs' # sudo su hdfs -c 'hdfs dfs -chown -R root /model' # ] if __name__ == "__main__": PROJECT_DAG.cli()