for connection in sakila_connections:
    for table in sakila_tables:
        extract = MySqlToGoogleCloudStorageOperator(
            task_id="extract_mysql_%s_%s" % (connection, table),
            mysql_conn_id=connection,
            google_cloud_storage_conn_id='gcp_test',
            sql="SELECT *, '%s' as source FROM sakila.%s" %
            (connection, table),
            bucket='ghen-airflow',
            filename="%s/%s/%s{}.json" % (connection, table, table),
            schema_filename="%s/schemas/%s.json" % (connection, table),
            dag=dag)

        load = GoogleCloudStorageToBigQueryOperator(
            task_id="load_bq_%s_%s" % (connection, table),
            bigquery_conn_id='gcp_test',
            google_cloud_storage_conn_id='gcp_test',
            bucket='ghen-airflow',
            destination_project_dataset_table="spark-test-173322.%s.%s" %
            (connection, table),
            source_objects=["%s/%s/%s*.json" % (connection, table, table)],
            schema_object="%s/schemas/%s.json" % (connection, table),
            source_format='NEWLINE_DELIMITED_JSON',
            create_disposition='CREATE_IF_NEEDED',
            write_disposition='WRITE_TRUNCATE',
            project_id='spark-test-173322',
            dag=dag)

        load.set_upstream(extract)
        slack_notify.set_upstream(load)
nothing_to_update_op = DummyOperator(
    task_id='nothing_to_update',
    dag=dag
)

check_job_posting_to_be_updated_op.set_downstream(check_to_remove_op)
check_job_posting_to_be_updated_op.set_downstream(check_to_update_op)

check_work_experience_to_be_updated_op.set_downstream(check_to_remove_op)
check_work_experience_to_be_updated_op.set_downstream(check_to_update_op)

update_scores_branch_op.set_upstream(check_to_update_op)
remove_scores_op.set_upstream(check_to_remove_op)
nothing_to_remove_op.set_upstream(check_to_remove_op)
nothing_to_update_op.set_upstream(check_to_update_op)

notify_processing_completion_op.set_upstream(nothing_to_remove_op)
notify_processing_completion_op.set_upstream(nothing_to_update_op)

update_scores_branch_op.set_downstream(compute_title_feature_op)
update_scores_branch_op.set_downstream(compute_skill_feature_op)
update_scores_branch_op.set_downstream(compute_description_feature_op)

compute_similarity_op.set_upstream(compute_title_feature_op)
compute_similarity_op.set_upstream(compute_skill_feature_op)
compute_similarity_op.set_upstream(compute_description_feature_op)
compute_similarity_op.set_downstream(update_scores_op)
notify_processing_completion_op.set_upstream(update_scores_op)
notify_processing_completion_op.set_upstream(remove_scores_op)
    channel=slack_channel,
    username='******',
    text='Cluster has been *restarted!*\n'
         'It\'s all fine move forward with your ETLs and Crawlers!\n'
         'Message datetime: {{params.curr_date}}',
    params={'curr_date': str(datetime.now(pytz.timezone('America/Sao_Paulo')))},
    dag=dag
)

run_etl_crawler_cluster_up = SubDagOperator(
  subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_up', dag.schedule_interval),
  task_id='crawler_dag_cluster_up',
  dag=dag,
)

run_etl_crawler_cluster_restarted = SubDagOperator(
  subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_restarted', dag.schedule_interval),
  task_id='crawler_dag_cluster_restarted',
  dag=dag,
)
    
branch1.set_upstream(check_cluster)                                       
send_slack_cluster_ok.set_upstream(branch1)     
send_slack_cluster_start.set_upstream(branch1)
start_cluster.set_upstream(send_slack_cluster_start)
branch2.set_upstream(start_cluster)
send_slack_cluster_down.set_upstream(branch2)
send_slack_cluster_restarted_ok.set_upstream(branch2)
run_etl_crawler_cluster_up.set_upstream(send_slack_cluster_ok)
run_etl_crawler_cluster_restarted.set_upstream(send_slack_cluster_restarted_ok)
示例#4
0
"""

create_cluster = PythonOperator(task_id='create_databricks_cluster',
                                dag=dag,
                                python_callable=create_databricks_cluster)

create_cluster_notify = SlackAPIPostOperator(
    task_id='create_cluster_notify',
    username='******',
    token='XXX',
    channel='#databricks_jobs',
    text=
    ":databricks: Databricks Cluster Created with ID: {{ task_instance.xcom_pull(task_ids='create_databricks_cluster') }}",
    dag=dag)

create_cluster_notify.set_upstream(create_cluster)

train_model = ECSOperator(
    task_id="train_model",
    task_definition='trainmodelriskassessment',
    cluster='TalendECS',
    aws_conn_id='aws_default',
    overrides={
        'containerOverrides': [
            {
                'name':
                "trainmodelriskassessment",
                'command': [
                    "--context_param DATABRICKS_ENDPOINT=XXX",
                    "--context_param DATABRICKS_TOKEN=XXX",
                    "--context_param DATABRICKS_CLUSTER_ID={{ task_instance.xcom_pull(task_ids='create_databricks_cluster') }}"