def begin_pipeline(**kwargs): print("begin_pipeline:") s3_bucket = os.environ.get("S3_BUCKET", "") folder_path = kwargs['dag_run'].conf.get('folder_path') s3_file = kwargs['dag_run'].conf.get('s3_file') uuid_mapping_file = kwargs['dag_run'].conf.get('uuid_mapping_file') biobank_file = kwargs['dag_run'].conf.get('biobank_file') mapping_file = kwargs['dag_run'].conf.get('mapping_file') dp = DagPebbles() download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) pipeline_state_args = { "s3_bucket": s3_bucket, "folder_path": folder_path, "s3_file": s3_file, "uuid_mapping_file": uuid_mapping_file, "biobank_file": biobank_file, "mapping_file": mapping_file, "download_key": download_key } dp.save_pipeline_state(**pipeline_state_args) kwargs["ti"].xcom_push(key="folder_path", value=folder_path) kwargs["ti"].xcom_push(key="s3_file", value=s3_file) kwargs["ti"].xcom_push(key="download_key", value=download_key) kwargs["ti"].xcom_push(key="uuid_mapping_file", value=uuid_mapping_file) kwargs["ti"].xcom_push(key="biobank_file", value=biobank_file) kwargs["ti"].xcom_push(key="mapping_file", value=mapping_file)
def validate_log_file(**kwargs): print("validate_log_file:") dp = DagPebbles() if dp.validate_pipeline_log(kwargs["ti"].xcom_pull(key='download_key')): return "pipeline_log_validation_passed" else: return "pipeline_log_validation_failed"
def pipeline_enable_check(**kwargs): dp = DagPebbles() if dp.pipeline_enable_check('DECRYPT_FILES'): kwargs["ti"].xcom_push(key="S3_BUCKET", value=os.environ.get("S3_BUCKET","")) kwargs["ti"].xcom_push(key="SKIP_DECRYPT_FILES", value="N") return "pipeline_check_passed" else: return "pipeline_check_skipped"
def pipeline_enable_check(**kwargs): dp = DagPebbles() if dp.pipeline_enable_check('DATA_PIPELINE_INITIATOR'): #TODO:: get it from db kwargs["ti"].xcom_push(key="SKIP_DOWNLOAD_LOG_FILE", value='N') kwargs["ti"].xcom_push(key="SKIP_DECRYPT_LOG_FILE", value='N') return "pipeline_check_passed" else: return "pipeline_check_skipped"
def begin_pipeline(**kwargs): dp = DagPebbles() pipeline = dp.get_current_pipeline() print(pipeline) packed_dir=os.environ.get("BCH_HPDS_INTERNAL") kwargs["ti"].xcom_push(key='packed_dir', value=packed_dir) hpds_encrypted_file_name = dp.get_hpds_packed_file_name()+".encrypted" kwargs["ti"].xcom_push(key='hpds_encrypted_file_name', value=hpds_encrypted_file_name) hpds_encrypted_file = packed_dir + '/' + hpds_encrypted_file_name kwargs["ti"].xcom_push(key='hpds_encrypted_file', value=hpds_encrypted_file)
def begin_pipeline(**kwargs): print("begin_pipeline:") dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_bucket = os.environ.get("S3_BUCKET","") folder_path = pipeline['log_file_path'] s3_file = pipeline['log_file_name'] download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) kwargs["ti"].xcom_push(key="folder_path", value=folder_path) kwargs["ti"].xcom_push(key="s3_file", value=s3_file) kwargs["ti"].xcom_push(key="download_key", value=download_key)
def begin_pipeline(**kwargs): dp = DagPebbles() pipeline = dp.get_current_pipeline() print(pipeline) current_time = datetime.now() packed_file_name = "hpds_phenotype_" + os.environ.get( "BCH_PIC_SURE_HPDS_ALS_TAG") + "_" + current_time.strftime( '%m_%d_%Y_%H_%M_%S') + "_" + os.environ.get( "BCH_PIC_SURE_HPDS_ETL_TAG") + ".tar.gz" packed_dir = os.environ.get("BCH_HPDS_INTERNAL") kwargs["ti"].xcom_push(key='packed_file_name', value=packed_file_name) kwargs["ti"].xcom_push(key='packed_dir', value=packed_dir) dp.save_hpds_package_file_name(packed_file_name)
def stage_custom_dmp_files(**kwargs): print("stage_custom_dmp_files:") dp = DagPebbles() custom_log_file = kwargs['dag_run'].conf.get('custom_log_file') custom_dmp_file = kwargs['dag_run'].conf.get('custom_dmp_file') pipeline_args = { "custom_log_file": custom_log_file, "custom_dmp_file": custom_dmp_file } dp.save_pipeline_state(**pipeline_args) dp.stage_custom_dmp_files(**pipeline_args)
def stage_uuid_mapping_file(**kwargs): print("stage_uuid_mapping_file:") dp = DagPebbles() dp.stage_uuid_mapping_file(log_file_id=None)
def stage_biobank_file(**kwargs): print("stage_biobank_file:") dp = DagPebbles() dp.stage_biobank_file(log_file_id=None)
t_end = PythonOperator( task_id="end", python_callable=end, provide_context=True, trigger_rule="none_failed", dag=dag, ) t_pipeline_begin >> t_check_pipeline t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline t_check_pipeline >> t_pipeline_check_passed try: dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_file = pipeline['log_file_name'] target_log_file = pipeline['log_file_name'].replace(".encrypted", "") decrypt_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/decrypt_s3_file.sh " + s3_file + " {{ ti.xcom_pull(key='SKIP_DECRYPT_FILES')}} " t_decrypt_log_file = BashOperator( task_id='decrypt_log_file', bash_command=decrypt_log_file_cmd, dag=dag) t_pipeline_check_passed >> t_decrypt_log_file files = dp.get_files(log_file_id = None, type = 'decrypt') if files == None or len(files) == 0: t_decrypt_log_file >> t_end_pipeline else:
def load_data(**kwargs): print("load_data:") dp = DagPebbles() dp.load_data(log_file_id=None)
def pipeline_enable_check(**kwargs): dp = DagPebbles() if dp.pipeline_enable_check('CONCEPT_DIM_MAPPING'): return "pipeline_check_passed" else: return "pipeline_check_skipped"
def notify(**kwargs): dp = DagPebbles() print("notify")
t_end = PythonOperator( task_id="end", python_callable=end, provide_context=True, trigger_rule="none_failed", dag=dag, ) t_pipeline_begin >> t_check_pipeline t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline t_check_pipeline >> t_pipeline_check_passed try: dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_file = pipeline['log_file_name'] s3_file = DATA_LOCATION + "/"+ s3_file transfer_log_file_cmd = "perl /opt/bitnami/airflow/airflow-data/scripts/transfer_file_rds.pl " + s3_file + " {{ ti.xcom_pull(key='SKIP_TRANSFER_FILES')}}" print("transfer_log_file_cmd: ") print(transfer_log_file_cmd) t_transfer_log_file = BashOperator( task_id='transfer_log_file', bash_command=transfer_log_file_cmd, dag=dag) t_pipeline_check_passed >> t_transfer_log_file files = dp.get_files(log_file_id = None, type = 'transfer')
def stage_dmp_files1(**kwargs): print("stage_dmp_files1:") dp = DagPebbles() dp.stage_dmp_files1(log_file_id=None)
def clean_hpds_source_data(**kwargs): dp = DagPebbles() dp.clean_hpds_source_data()
def recreate_hpds_source_data(**kwargs): dp = DagPebbles() dp.recreate_bch_hpds_data()
def begin_pipeline(**kwargs): dp = DagPebbles() pipeline = dp.get_current_pipeline() print(pipeline)
def pipeline_enable_check(**kwargs): dp = DagPebbles() if dp.pipeline_enable_check('STAGE_CUSTOM_DMP_FILES'): return "pipeline_check_passed" else: return "pipeline_check_skipped"
def save_pipeline_log(**kwargs): print("save_pipeline_log:") dp = DagPebbles() dp.save_pipeline_log(kwargs["ti"].xcom_pull(key='folder_path'), kwargs["ti"].xcom_pull(key='s3_file'))
def cleanup(**kwargs): dp = DagPebbles() print("cleanup")
def pipeline_enable_check(**kwargs): dp = DagPebbles() if dp.pipeline_enable_check('DATA_LOAD'): return "pipeline_check_passed" else: return "pipeline_check_skipped"
def end(**kwargs): dp = DagPebbles() print("end")
t_end = PythonOperator( task_id="end", python_callable=end, provide_context=True, trigger_rule="none_failed", dag=dag, ) t_pipeline_begin >> t_check_pipeline t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline t_check_pipeline >> t_pipeline_check_passed try: dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_bucket = os.environ.get("S3_BUCKET","") folder_path = pipeline['log_file_path'] s3_file = pipeline['log_file_name'] download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) download_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/download_s3_file.sh " + download_key + " " + s3_file + " " + "N" t_download_log_file = BashOperator( task_id='download_log_file', bash_command=download_log_file_cmd, dag=dag) t_pipeline_check_passed >> t_download_log_file files = dp.get_files(log_file_id = None, type = 'download') if files == None or len(files) == 0: t_download_log_file >> t_end_pipeline