示例#1
0
}

dag = DAG('property_classification', default_args=default_args)

fetch_images_task = PythonOperator(task_id='fetch_images',
                                   python_callable=fetch_images,
                                   dag=dag,
                                   op_kwargs={
                                       'input_file': input_file_path,
                                       'is_training': training
                                   })

alex_net_task = PythonOperator(task_id='alex_net',
                               python_callable=alex,
                               dag=dag)
vgg_task = PythonOperator(task_id='vgg', python_callable=vgg, dag=dag)
res_net_task = PythonOperator(task_id='res_net',
                              python_callable=resnet,
                              dag=dag)

ensemble_task = PythonOperator(task_id='ensemble',
                               python_callable=final_predictions,
                               dag=dag)

# define the relationship between the tasks
fetch_images_task.set_downstream(alex_net_task)
alex_net_task.set_downstream(vgg_task)
vgg_task.set_downstream(res_net_task)
res_net_task.set_downstream(ensemble_task)

#todo: check run time for parallel execution of a,v,r
    op_kwargs={
        'wekeo_url':
        BaseHook.get_connection("wekeo_hda").host,
        'username':
        BaseHook.get_connection("wekeo_hda").login,
        'password':
        BaseHook.get_connection("wekeo_hda").password,
        'wekeo_job_id':
        'EGUIBC37kepM90lTGVNTHpIdfuA',
        'item_url':
        '6a143583-a6a4-53e6-9e6d-8d4edc60a702/S5P_RPRO_L2__NO2____20180503T093059_20180503T111427_02866_01_010202_20190202T034117',
        'output_filepath':
        './wekeo_data_storage/S5P_RPRO_L2__NO2____20180503T093059_20180503T111427_02866_01_010202_20190202T034117'
    },
    queue='process')
wekeo_0.set_downstream([load_collection_0])

wekeo_1 = PythonOperator(
    task_id='wekeo_download_1',
    dag=dag,
    python_callable=download_wekeo_data,
    op_kwargs={
        'wekeo_url':
        BaseHook.get_connection("wekeo_hda").host,
        'username':
        BaseHook.get_connection("wekeo_hda").login,
        'password':
        BaseHook.get_connection("wekeo_hda").password,
        'wekeo_job_id':
        'EGUIBC37kepM90lTGVNTHpIdfuA',
        'item_url':
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 0,
}

with DAG(
        'PriceAlerter',
        default_args=default_args,
        schedule_interval='*/5 9-17 * * *',
        catchup=False,
) as dag:
    price_listener = PythonOperator(
        task_id='listener',
        python_callable=listener.listen,
    )
    email_trigger = ShortCircuitOperator(
        task_id='email_trigger',
        python_callable=lambda: True if listener.trigger else False,
        trigger_rule=TriggerRule.NONE_FAILED,
    )
    email = EmailOperator(
        task_id='email',
        to=email_service.email_list,
        subject=email_service.get_email_subject(listener.summary),
        html_content=email_service.get_html_content(listener.summary),
    )

price_listener.set_downstream(email_trigger)
email_trigger.set_downstream(email)
示例#4
0


t9 = \
    PythonOperator(
        task_id='Files_has_extracted',
        python_callable=extract,
        dag=dag)

t10 = \
    PythonOperator(
        task_id='Email_notify3',
        python_callable=email,
        dag=dag)

t1.set_downstream(t0)
t1.set_downstream(t2)
t2.set_downstream(t3)
t3.set_downstream(t0)
t3.set_downstream(t4)
t4.set_downstream(t5)
t5.set_downstream(t0)
t5.set_downstream(t6)
t6.set_downstream(t8)
t8.set_downstream(t9)
t8.set_downstream(t0)
t5.set_downstream(t7)
t7.set_downstream(t8)
t5.set_downstream(t8)
t9.set_downstream(t10)
t10.set_downstream(t0)
示例#5
0
# replace user with your username
base_dir = '/home/user/idsp_pipeline'
default_args = {
    'owner': 'user',
    'depends_on_past': False,
    'start_date': dt.datetime.strptime('2017-04-17T00:00:00',
                                       '%Y-%m-%dT%H:%M:%S'),
    'provide_context': True
}
dag = DAG('idsp_v1',
          default_args=default_args,
          schedule_interval='0 0 * * 2',
          max_active_runs=1)

web_scrape_task = PythonOperator(task_id='scrape_web',
                                 python_callable=scrape_web,
                                 op_kwargs={'base_dir': base_dir},
                                 dag=dag)
pdf_scrape_task = PythonOperator(task_id='scrape_pdf',
                                 python_callable=scrape_pdf,
                                 op_kwargs={'base_dir': base_dir},
                                 dag=dag)
add_to_dataset_task = PythonOperator(task_id='add_to_dataset',
                                     python_callable=add_to_dataset,
                                     op_kwargs={'base_dir': base_dir},
                                     dag=dag)

# define the relationship between the tasks using set_downstream
web_scrape_task.set_downstream(pdf_scrape_task)
pdf_scrape_task.set_downstream(add_to_dataset_task)
示例#6
0
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
"""

join = DummyOperator(
    task_id='join',
    trigger_rule='all_done',
    dag=dag
)

sum_up = PythonOperator(
    task_id='sum_up',
    provide_context=True,
    python_callable=sum_up_task,
    dag=dag,
    execution_timeout=timedelta(seconds=60),
    on_failure_callback=notify_failure,
    )

p1.set_upstream(p0)
p2.set_upstream(p0)
p3.set_upstream(p0)
c1.set_upstream(p1)
c2.set_upstream(p2)
c3.set_upstream(p3)
c3.set_downstream(join)
c2.set_downstream(join)
sum_up.set_upstream(join)
示例#7
0
    attachment.add_header("Content-Disposition", "attachment", filename=filename)
    msg = MIMEMultipart()
    msg.attach(attachment)
    msg["Subject"] = str("Resultado Analise Notebook")
    msg["From"] = "*****@*****.**"
    msg["Reply-to"] = "*****@*****.**"

    server = smtplib.SMTP("smtp.gmail.com:587")
    server.ehlo()
    server.starttls()
    server.login("*****@*****.**", "a.24423242")
    server.sendmail(msg["From"], "*****@*****.**", msg.as_string())
    server.quit()


def acessNotebook():
    print("Acessando Notebook no Swift")


runZero = PythonOperator(task_id="Acess_Notebook", provide_context=False, python_callable=acessNotebook, dag=dag)

runFirst = PythonOperator(task_id="ExecNotebook", provide_context=False, python_callable=execNotebook, dag=dag)

runSecond = PythonOperator(task_id="Get_Output", provide_context=False, python_callable=getOutput, dag=dag)

runThird = PythonOperator(task_id="send_email", provide_context=False, python_callable=send_email, dag=dag)

runZero.set_downstream(runFirst)
runFirst.set_downstream(runSecond)
runSecond.set_downstream(runThird)
示例#8
0
        wrtr.writerow(['url', 'count'])
        wrtr.writerows(cntr.most_common(5))


simple_search = PythonOperator(task_id='search_twitter',
                               provide_context=True,
                               python_callable=search_twitter,
                               dag=dag,
                               params={'query': '#python'})

move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite',
                                       provide_context=True,
                                       python_callable=csv_to_sqlite,
                                       dag=dag)

id_popular = PythonOperator(task_id='identify_popular_links',
                            provide_context=True,
                            python_callable=identify_popular_links,
                            dag=dag)

email_links = EmailOperator(task_id='email_best_links',
                            to='*****@*****.**',
                            subject='Latest popular links',
                            html_content='Check out the latest!!',
                            files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
                            dag=dag)

simple_search.set_downstream(move_tweets_to_sqlite)
id_popular.set_upstream(move_tweets_to_sqlite)
email_links.set_upstream(id_popular)
                            html_content='Check out the latest!!',
                            files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
                            dag=dag)


sub = SubDagOperator(subdag=subdag,
                     task_id='insert_and_id_pop',
                     trigger_rule='one_success',
                     dag=dag)


clear_latest = BashOperator(bash_command='rm -rf {}/latest_links.txt'.format(
    RAW_TWEET_DIR), task_id='clear_latest', dag=dag)


gen_search_terms.set_upstream(fill_search_terms)

for term in SEARCH_TERMS:
    term_without_punctuation = re.sub(r'\W+', '', term)
    simple_search = PythonOperator(
        task_id='search_{}_twitter'.format(term_without_punctuation),
        provide_context=True,
        python_callable=search_twitter,
        dag=dag,
        params={'query': term})
    simple_search.set_upstream(gen_search_terms)
    simple_search.set_downstream(sub)

sub.set_downstream(email_links)
email_links.set_downstream(clear_latest)
def create_dag(dag_id, schedule, start_date, delta_sensor, airpots_codes,
               default_args):

    dag = DAG(dag_id,
              schedule_interval=schedule,
              start_date=start_date,
              default_args=default_args)

    dag.doc_md = """
    # DAG fetching data from smiles.com.ar
    ### procesing and dumping on postgresql
    """
    """start = TimeDeltaSensor(
        task_id='wait_to_start',
        delta=timedelta(minutes=delta_sensor),
        dag=dag)"""

    start = DummyOperator(task_id="start", dag=dag)

    branches = []

    def return_dates_branches(**kwargs):
        return branches

    gen_url_branch = BranchPythonOperator(
        task_id='generate_url_dates',
        provide_context=True,
        python_callable=return_dates_branches,
        dag=dag)

    def transform_data(**kwargs):
        ti = kwargs['ti']
        raw_data = ti.xcom_pull(task_ids=return_dates_branches())
        data = []
        logging.info(raw_data)
        if raw_data is not None:
            flat_list = [item for sublist in raw_data for item in sublist]
            for row in flat_list:
                row = list(row)
                # add À-ÿ for spanish accents
                date = '/'.join(
                    list(
                        re.compile("([A-ZÀ-ÿ]+)(\d+)([A-ZÀ-ÿ]+)").split(
                            row[1]))[2:4])
                date = dateparser.parse(date,
                                        languages=['pt', 'es'],
                                        date_formats=['%d/%b'
                                                      ]).strftime('%Y-%m-%d')
                row[1] = date
                td = row[4].split(':')
                row[4] = str(timedelta(hours=int(td[0]), minutes=int(td[1])))
                row[5] = int(row[5].replace('.', ''))
                row[6] = int(row[6].replace('.', ''))
                row[8] = row[8].split(' ')[-1]
                row.insert(0, datetime.now().strftime('%Y-%m-%d'))
                data.append(tuple(row))
            return data
        else:
            print('No se recibio datos')

    t2 = PythonOperator(
        task_id='transform_data',
        python_callable=transform_data,
        depends_on_past=True,
        trigger_rule=TriggerRule.ALL_SUCCESS,
        provide_context=True,
        dag=dag,
    )

    t2.doc_md = """
    #### Task Documentation
    Transform fetched data
    @return a list of tuples
    """

    # def gen_url_dates(**kwargs):
    date_start = read_scraped_date(airpots_codes)
    date_end = date_start + timedelta(days=AMOUNT_DAYS)
    date_generated = [
        date_start + timedelta(days=x)
        for x in range(0, (date_end - date_start).days)
    ]

    for i, date in enumerate(date_generated):
        date_ml = str(date.timestamp())[:8] + '00000'
        url_dated = """https://www.smiles.com.ar/emission?originAirportCode={}&destinationAirportCode={}&departureDate={}&adults=1&children=0&infants=0&isFlexibleDateChecked=false&tripType=3&currencyCode=BRL&segments=2&departureDate2={}&originAirportCode2={}&destinationAirportCode2={}""".format(
            airpots_codes[0][0], airpots_codes[1], date_ml, date_ml,
            airpots_codes[0][1], airpots_codes[1])

        get_data_op = PythonOperator(
            task_id='get_data_{}and{}to{}_{}'.format(airpots_codes[0][0],
                                                     airpots_codes[0][1],
                                                     airpots_codes[1], i),
            python_callable=get_data_URL,
            op_kwargs={'URL': url_dated},
            trigger_rule=TriggerRule.ONE_SUCCESS,
            provide_context=True,
            dag=dag,
        )
        branches.append(get_data_op.task_id)
        get_data_op.set_upstream(gen_url_branch)
        get_data_op.set_downstream(t2)
        get_data_op.doc_md = """
        #### Task Documentation
        Fetch data from passed url
        return list of semi-parsed data
        """

    insert_data = PythonOperator(
        task_id='insert_data',
        python_callable=insert_into_table,
        provide_context=True,
        dag=dag,
    )

    insert_data.doc_md = """
    #### Task Documentation
    Insert parsed and transformed data into table
    """
    t2.set_downstream(insert_data)
    gen_url_branch.set_upstream(start)

    return dag
示例#11
0
    subject='Latest popular links',
    html_content='Check out the latest!!',
    files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
    dag=dag)

sub = SubDagOperator(subdag=subdag,
                     task_id='insert_and_id_pop',
                     trigger_rule='one_success',
                     dag=dag)

clear_latest = BashOperator(
    bash_command='rm -rf {}/latest_links.txt'.format(RAW_TWEET_DIR),
    task_id='clear_latest',
    dag=dag)

gen_search_terms.set_upstream(fill_search_terms)

for term in SEARCH_TERMS:
    term_without_punctuation = re.sub(r'\W+', '', term)
    simple_search = PythonOperator(
        task_id='search_{}_twitter'.format(term_without_punctuation),
        provide_context=True,
        python_callable=search_twitter,
        dag=dag,
        params={'query': term})
    simple_search.set_upstream(gen_search_terms)
    simple_search.set_downstream(sub)

sub.set_downstream(email_links)
email_links.set_downstream(clear_latest)
    entries_to_delete = session.query(airflow_db_model).filter(
        age_check_column <= max_date,
        ).all()
    logging.info("Process will be Deleting the following " + str(airflow_db_model.__name__) + "(s):")
    for entry in entries_to_delete:
        logging.info("\tEntry: " + str(entry) + ", Date: " + str(entry.__dict__[str(age_check_column).split(".")[1]]))
    logging.info("Process will be Deleting " + str(len(entries_to_delete)) + " " + str(airflow_db_model.__name__) + "(s)")

    if ENABLE_DELETE:
        logging.info("Performing Delete...")
        for entry in entries_to_delete:
            session.delete(entry)
        logging.info("Finished Performing Delete")
    else:
        logging.warn("You're opted to skip deleting the db entries!!!")

    logging.info("Finished Running Cleanup Process")

for db_object in DATABASE_OBJECTS:

    cleanup = PythonOperator(
        task_id='cleanup_' + str(db_object["airflow_db_model"].__name__),
        python_callable=cleanup_function,
        params=db_object,
        provide_context=True,
        dag=dag
    )

    print_configuration.set_downstream(cleanup)
示例#13
0
    return 'Whatever you return gets printed in the logs'


setup_jobs = PythonOperator(task_id='setup_jobs',
                            provide_context=True,
                            python_callable=setup_jobs_fn,
                            dag=dag)


def collect_results_fn(ds, **kwargs):
    pprint(kwargs)
    print(ds)


collect_results = PythonOperator(task_id='collect_results',
                                 provide_context=True,
                                 python_callable=collect_results_fn,
                                 dag=dag)

for i in range(10):
    '''
    Generating 10 sleeping task, sleeping from 0 to 9 seconds
    respectively
    '''
    task = PythonOperator(task_id='sleep_for_' + str(i),
                          python_callable=my_sleeping_function,
                          op_kwargs={'random_base': float(i) / 10},
                          dag=dag)
    task.set_upstream(setup_jobs)
    task.set_downstream(collect_results)
示例#14
0
t7 = PythonOperator(
        task_id= 'vcf',
        python_callable=create_container,
        op_kwargs={'container_image':VCF_IMAGE, 'cmd': VCF_CMD},
        provide_context=True,
        dag=dag)

t8 = PythonOperator(
        task_id= 'vcf_wait',
        python_callable=noop,
        op_kwargs={},
        provide_context=True,
        dag=dag)

t9 = PythonOperator(
        task_id= 'vcf_cleanup',
        python_callable=cleanup,
        op_kwargs={},
        provide_context=True,
        dag=dag)

t1.set_downstream(t2)
t1.set_downstream(t3)
t2.set_downstream(t4)
t4.set_downstream(t5)
t4.set_downstream(t6)
t5.set_downstream(t7)
t7.set_downstream(t8)
t7.set_downstream(t9
simple_search = PythonOperator(task_id='search_twitter',
                               provide_context=True,
                               python_callable=search_twitter,
                               dag=dag,
                               params={'query': '#python'})


move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite',
                                       provide_context=True,
                                       python_callable=csv_to_sqlite,
                                       dag=dag)


id_popular = PythonOperator(task_id='identify_popular_links',
                            provide_context=True,
                            python_callable=identify_popular_links,
                            dag=dag)


email_links = EmailOperator(task_id='email_best_links',
                            to='*****@*****.**',
                            subject='Latest popular links',
                            html_content='Check out the latest!!',
                            files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
                            dag=dag)


simple_search.set_downstream(move_tweets_to_sqlite)
id_popular.set_upstream(move_tweets_to_sqlite)
email_links.set_upstream(id_popular)
示例#16
0
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1),
}

# DAG is scheduled to run every 8 hours
dag = DAG('PostTweet',
          schedule_interval=timedelta(hours=8),
          default_args=default_args)

# This dag will stage all the tweets to the csv file
t1 = PythonOperator(task_id='stage_tweets',
                    python_callable=stage_tweets,
                    dag=dag)

#This Dag will commit all the tweets to the csv file
t2 = PythonOperator(task_id='commit_tweets',
                    python_callable=commit_tweets,
                    dag=dag)

# This dag is used to send the tweets to twitter
t3 = PythonOperator(task_id='post_status', python_callable=post_tweet, dag=dag)

# Backup all the files and tweets to google drive
t4 = PythonOperator(task_id='backup', python_callable=upload.main, dag=dag)

t1.set_downstream(t2)
t2.set_downstream(t3)
t3.set_downstream(t4)
示例#17
0
    schedule_interval="@once")
# used to fatorize the code and avoid repetition
tabDays = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]

def get_day(**kwargs):
    kwargs['ti'].xcom_push(key='day', value=datetime.now().weekday())


get_weekday = PythonOperator(
    task_id='weekday',
    python_callable=get_day,
    provide_context=True,
    dag=dag
)

def branch(**kwargs):
    return 'task_for_' + 'wednesday'



fork = BranchPythonOperator(
    task_id='branching',
    python_callable=branch,
    provide_context=True,
    dag=dag)

get_weekday.set_downstream(fork)


for day in range(0, 6):
    fork.set_downstream(DummyOperator(task_id='task_for_' + tabDays[day], dag=dag))
示例#18
0
# Integrating read_data operator in airflow dag
read_table = PythonOperator(task_id='read_table',
                            python_callable=read_data,
                            op_kwargs={'fig_path': fig_path},
                            dag=dag)
# Integrating data_report operator in airflow dag
data_report = PythonOperator(task_id='data_report',
                             python_callable=data_report,
                             op_kwargs={'fig_path': fig_path},
                             dag=dag)
# Integrating plots operator in airflow dag
plots = PythonOperator(task_id='var_dist_plots',
                       python_callable=plot_var_distributions,
                       op_kwargs={'fig_path': fig_path},
                       dag=dag)
# Integrating train_test operator in airflow dag
train_test = PythonOperator(task_id='train_test',
                            python_callable=make_train_test,
                            op_kwargs={'fig_path': fig_path},
                            dag=dag)
# Integrating model_run operator in airflow dag
model_run = PythonOperator(task_id='model_run',
                           python_callable=run_model,
                           op_kwargs={'fig_path': fig_path},
                           dag=dag)

# Set the task sequence
read_table.set_downstream(data_report)
data_report.set_downstream([plots, train_test])
train_test.set_downstream(model_run)
示例#19
0
default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': airflow.utils.dates.days_ago(2),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False
}

dag = DAG('athena_query_wk',
          default_args=default_args,
          dagrun_timeout=timedelta(hours=2),
          schedule_interval='0 3 * * *')

submit_query = PythonOperator(task_id='submit_athena_query',
                              python_callable=run_athena_query,
                              op_kwargs={
                                  'query': count_query,
                                  'db': db_name,
                                  's3_output': s3_ouput
                              },
                              dag=dag)

check_query_result = PythonOperator(task_id='check_query_result',
                                    python_callable=check_query_status,
                                    provide_context=True,
                                    dag=dag)

submit_query.set_downstream(check_query_result)
示例#20
0
    task_id='setup_jobs',
    provide_context=True,
    python_callable=setup_jobs_fn,
    dag=dag)


def collect_results_fn(ds, **kwargs):
    pprint(kwargs)
    print(ds)


collect_results = PythonOperator(
    task_id='collect_results',
    provide_context=True,
    python_callable=collect_results_fn,
    dag=dag)


for i in range(10):
    '''
    Generating 10 sleeping task, sleeping from 0 to 9 seconds
    respectively
    '''
    task = PythonOperator(
        task_id='sleep_for_'+str(i),
        python_callable=my_sleeping_function,
        op_kwargs={'random_base': float(i)/10},
        dag=dag)
    task.set_upstream(setup_jobs)
    task.set_downstream(collect_results)
            <table>
                <tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr>
                <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr>
                <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr>
                <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr>
                <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr>
                <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr>
                <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr>
                <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr>
                <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr>
            </table>

            <h2>Processes Killed</h2>
            <ul>
            {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %}
                <li>Process {{loop.index}}</li>
                <ul>
                {% for key, value in process_killed.iteritems() %}
                    <li>{{ key }}: {{ value }}</li>
                {% endfor %}
                </ul>
            {% endfor %}
            </ul>
        </body>
    </html>
    """,
    dag=dag)

kill_halted_tasks.set_downstream(email_or_not_branch)
email_or_not_branch.set_downstream(send_processes_killed_email)
示例#22
0
    'owner': 'ryan',
    'depends_on_past': False,
    'start_date': datetime.utcnow(),
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

# Run at the top of the hour Monday to Friday.
# Note: This doesn't line up with the market hours of
# 10PM Sunday till 10PM Friday GMT.
dag = DAG(dag_id='stocks',
          default_args=args,
          schedule_interval='0 * * * 1,2,3,4,5',
          dagrun_timeout=timedelta(seconds=30))
# loop through the lob's we want to use to build up our dag
for stock in stocks:
    get_stocks_task = \
        PythonOperator(task_id='get_stocks',
                       provide_context=True,
                       op_kwargs={"stock": stock},
                       python_callable=get_stocks,
                       dag=dag)

    cache_latest_stocks_task = \
        PythonOperator(task_id='cache_latest_stocks',
                       provide_context=True,
                       python_callable=cache_latest_stocks,
                       dag=dag)

    get_stocks_task.set_downstream(cache_latest_stocks_task)
示例#23
0
args = {
    'owner': 'mark',
    'depends_on_past': False,
    'start_date': datetime.utcnow(),
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

# Run at the top of the hour Monday to Friday.
# Note: This doesn't line up with the market hours of
# 10PM Sunday till 10PM Friday GMT.
dag = DAG(dag_id='rates',
          default_args=args,
          schedule_interval='0 * * * 1,2,3,4,5',
          dagrun_timeout=timedelta(seconds=30))

get_rates_task = \
    PythonOperator(task_id='get_rates',
                   provide_context=True,
                   python_callable=get_rates,
                   dag=dag)

cache_latest_rates_task = \
    PythonOperator(task_id='cache_latest_rates',
                   provide_context=True,
                   python_callable=cache_latest_rates,
                   dag=dag)

get_rates_task.set_downstream(cache_latest_rates_task)
dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args)
subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args)
subdag7_task1 = PythonOperator(
    task_id='test_subdag_fail',
    dag=subdag7,
    python_callable=fail)
subdag7_task2 = DummyOperator(
    task_id='test_subdag_dummy_1',
    dag=subdag7,)
subdag7_task3 = DummyOperator(
    task_id='test_subdag_dummy_2',
    dag=subdag7)
dag7_subdag1 = SubDagOperator(
    task_id='subdag',
    dag=dag7,
    subdag=subdag7)
subdag7_task1.set_downstream(subdag7_task2)
subdag7_task2.set_downstream(subdag7_task3)

# DAG tests that queued tasks are run
dag8 = DAG(
    dag_id='test_scheduled_queued_tasks',
    start_date=DEFAULT_DATE,
    end_date=DEFAULT_DATE,
    default_args=default_args)
dag8_task1 = PythonOperator(
    python_callable=fail,
    task_id='test_queued_task',
    dag=dag8,
    pool='test_queued_pool')
示例#25
0
    task_id='test_depends_on_past_2',
    depends_on_past=True,
    dag=dag6,
)
dag6_task2.set_upstream(dag6_task1)

# DAG tests that a deadlocked subdag is properly caught
dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args)
subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args)
subdag7_task1 = PythonOperator(task_id='test_subdag_fail',
                               dag=subdag7,
                               python_callable=fail)
subdag7_task2 = DummyOperator(
    task_id='test_subdag_dummy_1',
    dag=subdag7,
)
subdag7_task3 = DummyOperator(task_id='test_subdag_dummy_2', dag=subdag7)
dag7_subdag1 = SubDagOperator(task_id='subdag', dag=dag7, subdag=subdag7)
subdag7_task1.set_downstream(subdag7_task2)
subdag7_task2.set_downstream(subdag7_task3)

# DAG tests that queued tasks are run
dag8 = DAG(dag_id='test_scheduled_queued_tasks',
           start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           default_args=default_args)
dag8_task1 = PythonOperator(python_callable=fail,
                            task_id='test_queued_task',
                            dag=dag8,
                            pool='test_queued_pool')
示例#26
0
                    dag=dag)

templated_command = """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
"""

join = DummyOperator(task_id='join', trigger_rule='all_done', dag=dag)

sum_up = PythonOperator(
    task_id='sum_up',
    provide_context=True,
    python_callable=sum_up_task,
    dag=dag,
    execution_timeout=timedelta(seconds=60),
    on_failure_callback=notify_failure,
)

p1.set_upstream(p0)
p2.set_upstream(p0)
p3.set_upstream(p0)
c1.set_upstream(p1)
c2.set_upstream(p2)
c3.set_upstream(p3)
c3.set_downstream(join)
c2.set_downstream(join)
sum_up.set_upstream(join)