def create_biowardrobe_workflow(workflow): _workflow_file = available(workflow=workflow) dag = CWLDAG(default_args={ 'owner': 'airflow', 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 20, 'retry_exponential_backoff': True, 'retry_delay': timedelta(minutes=30), 'max_retry_delay': timedelta(minutes=60 * 4) }, cwl_workflow=_workflow_file) dag.create() dag.add(BioWardrobeJobDispatcher(dag=dag), to='top') dag.add(BioWardrobeJobGatherer(dag=dag), to='bottom') return dag
def create_biowardrobe_workflow(workflow): _workflow_file = available(workflow=workflow) dag = CWLDAG(default_args={ 'owner': 'airflow', 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'pool': 'basic_analysis', 'retries': 10, 'retry_exponential_backoff': True, 'retry_delay': timedelta(minutes=60), 'max_retry_delay': timedelta(minutes=60 * 24) }, cwl_workflow=_workflow_file) dag.create() dag.add(CWLJobDispatcher(dag=dag), to='top') dag.add(CWLJobGatherer(dag=dag), to='bottom') return dag
def generate_biowardrobe_workflow(): _template = u"""#!/usr/bin/env python3 from airflow import DAG from biowardrobe_cwl_workflows import workflow dag = workflow("{}") """ all_workflows = available() for workflow in all_workflows: if not workflow: continue _filename = os.path.abspath( os.path.join( DAGS_FOLDER, os.path.basename(os.path.splitext(workflow)[0]) + '.py')) print(_filename) with open(_filename, 'w') as generated_workflow_stream: generated_workflow_stream.write(_template.format(workflow)) try: api_client.get_pool(name='basic_analysis') except Exception as e: api_client.create_pool(name='basic_analysis', slots=1, description="pool to run basic analysis") if not conf.has_option('cwl', 'tmp_folder'): if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'): copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig') with open(conf.AIRFLOW_CONFIG, 'w') as fp: # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']: # conf.conf.remove_section(s) conf.conf.add_section('cwl') conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp')) conf.set('core', 'logging_level', 'WARNING') conf.set('core', 'load_examples', 'False') conf.set('webserver', 'dag_default_view', 'graph') conf.set('webserver', 'dag_orientation', 'TB') conf.set('webserver', 'web_server_worker_timeout', '120') conf.set('scheduler', 'job_heartbeat_sec', '20') conf.set('scheduler', 'scheduler_heartbeat_sec', '20') conf.set('scheduler', 'min_file_process_interval', '30') conf.conf.write(fp) # startup_scripts = ['com.datirium.airflow-scheduler.plist', 'com.datirium.airflow-webserver.plist'] # if platform == "darwin": # _sys_dir = os.path.expanduser('~/Library/LaunchAgents') # for scripts in startup_scripts: # with open(os.path.join(system_folder, 'macosx', scripts), 'r') as s: # data = s.read() # # OS X # dst = os.path.join(_sys_dir, scripts) # # if os.path.exists(dst): # with open(dst + '.new', 'w') as w: # w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME)) # else: # with open(dst, 'w') as w: # w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME)) # if platform == "linux" or platform == "linux2": # linux # elif platform == "win32": # Windows... # TODO: tmp, dags do not exist ??? # generate_biowardrobe_workflow()