# [START load_function] def load(**kwargs): ti = kwargs['ti'] total_value_string = ti.xcom_pull(task_ids='transform', key='total_order_value') total_order_value = json.loads(total_value_string) print(total_order_value) # [END load_function] # [START main_flow] extract_task = PythonOperator( task_id='extract', python_callable=extract, ) extract_task.doc_md = """\ #### Extract task A simple Extract task to get data ready for the rest of the data pipeline. In this case, getting data is simulated by reading from a hardcoded JSON string. This data is then put into xcom, so that it can be processed by the next task. """ transform_task = PythonOperator( task_id='transform', python_callable=transform, ) transform_task.doc_md = """\ #### Transform task A simple Transform task which takes in the collection of order data from xcom and computes the total order value. This computed value is then put into xcom, so that it can be processed by the next task.
total_value_string = ti.xcom_pull(task_ids='transform', key='total_order_value') total_order_value = json.loads(total_value_string) print(total_order_value) # [END load_function] # [START main_flow] extract_task = PythonOperator( task_id='extract', python_callable=extract, ) extract_task.doc_md = dedent("""\ #### Extract task A simple Extract task to get data ready for the rest of the data pipeline. In this case, getting data is simulated by reading from a hardcoded JSON string. This data is then put into xcom, so that it can be processed by the next task. """) transform_task = PythonOperator( task_id='transform', python_callable=transform, ) transform_task.doc_md = dedent("""\ #### Transform task A simple Transform task which takes in the collection of order data from xcom and computes the total order value. This computed value is then put into xcom, so that it can be processed by the next task. """) load_task = PythonOperator(
# For each region export_by_gene_task = PythonOperator( task_id=f'export_premsa_sequences_{gene}', python_callable=export_premsa_sequences, op_kwargs={ "config" : default_args['params'], 'nuc_output_fn': nuc_sequence_output, 'prot_output_fn' : prot_sequence_output, 'gene' : gene }, pool='mongo', dag=dag, ) export_by_gene_task.set_upstream(gene_mk_dir_task) export_bealign_task = PythonOperator( task_id=f'export_bealign_{gene}', python_callable=export_bealign_sequences, op_kwargs={ "config" : default_args['params'], 'nuc_output_fn': bealign_nuc_sequence_output, 'gene' : gene }, provide_context=True, pool='mongo', dag=dag, ) export_bealign_task.set_upstream(gene_mk_dir_task) export_by_gene.extend([export_duplicates_task, export_by_gene_task, export_bealign_sequences]) dag.doc_md = __doc__ export_by_gene_task.doc_md = """\ # Task Documentation Exports by specific gene """
export_premsa_sequence_task = PythonOperator( task_id=f'export_premsa_sequences_{gene}', python_callable=export_premsa_sequences, op_kwargs={ "config" : default_args['params'], 'nuc_output_fn': nuc_sequence_output, 'prot_output_fn' : prot_sequence_output, 'gene' : gene }, dag=dag, ) export_duplicates_task = PythonOperator( task_id=f'export_duplicates_{gene}', python_callable=export_duplicates, op_kwargs={ 'output_fn' : initial_duplicate_output, 'gene': gene }, dag=dag, ) export_by_gene.append(export_premsa_sequence_task) export_by_gene.append(export_duplicates_task) dag.doc_md = __doc__ export_sequences.doc_md = """\ #### Task Documentation Creates a directory and exports selected sequences """ # Add export meta and export sequence tasks to be executed in parallel export_by_gene.extend([export_meta, export_sequences]) mk_dir >> export_by_gene
file = str(entry.name) filename = '/Users/leandroarruda/Codes/UdacityCapstone/data/processed/{}'.format( file) destination = 'covid19/staging/{}'.format(file) bucket_name = 'udacity-data-lake' upload_file(file_name=filename, bucket=bucket_name, object_name=destination) # [START main_flow] extract_task = PythonOperator( task_id='extract', python_callable=extract, ) extract_task.doc_md = dedent("""\ #### Extract task A simple Extract task to get data ready for the rest of the data pipeline. """) transform_task = PythonOperator( task_id='transform', python_callable=transform, ) transform_task.doc_md = dedent("""\ #### Transform task A simple Transform task which takes in the collection of order data from csv files, normalizes, standardize, and creates date fields. """) load_task = PythonOperator( task_id='load', python_callable=load,
) t4 = BashOperator( task_id='move_tsla_file', bash_command= 'mv /c/GoogleDrive/Springboard/airflow-mini-project/data_tsla.csv $AIRFLOW_HOME/data/"$(date +"%Y-%m-%d")"', dag=dag, ) t5 = PythonOperator( task_id='query_data', python_callable=query_data, ) dag.doc_md = __doc__ t1.doc_md = """\ #### Task Documentation You can document your task using the attributes `doc_md` (markdown), `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets rendered in the UI's Task Instance Details page. ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png) """ templated_command = """ {% for i in range(5) %} echo "{{ ds }}" echo "{{ macros.ds_add(ds, 7)}}" echo "{{ params.my_param }}" {% endfor %} """ t0 >> t1 >> t3 >> t5