def execute(self, context): if self.provide_context: context.update(self.op_kwargs) self.op_kwargs = context session = settings.Session() created_dr_ids = [] for dro in self.python_callable(*self.op_args, **self.op_kwargs): if not dro: break if not isinstance(dro, DagRunOrder): dro = DagRunOrder(payload=dro) now = dt.utcnow() if dro.run_id is None: dro.run_id = 'trig__' + now.isoformat() dbag = DagBag(settings.DAGS_FOLDER) trigger_dag = dbag.get_dag(self.trigger_dag_id) dr = trigger_dag.create_dagrun( run_id=dro.run_id, execution_date=now, state=State.RUNNING, conf=dro.payload, external_trigger=True, ) created_dr_ids.append(dr.id) self.log.info("Created DagRun %s, %s", dr, now) if created_dr_ids: session.commit() context['ti'].xcom_push(self.CREATED_DAGRUN_KEY, created_dr_ids) else: self.log.info("No DagRun created") session.close()
def trigger_preprocessing(context): """ calls the preprocessing dag: pass the filenames of the stuff to process """ # we have a jpg, xml, small mrc and large mrc, and gainref dm4 file # assume the common filename is the same and allow the preprocessing dag wait for the other files? what happens if two separate calls to the same dag occur? found = {} if context == None: return for f in context['ti'].xcom_pull(task_ids='rsync', key='return_value'): this = Path(f).resolve().stem for pattern in (r'\-\d+$', r'\-gain\-ref$'): if re.search(pattern, this): this = re.sub(pattern, '', this) # LOG.warn("mapped: %s -> %s" % (f, this)) #LOG.info("this: %s, f: %s" % (this,f)) # EPU: only care about the xml file for now (let the dag deal with the other files if f.endswith('.xml') and not f.startswith( 'Atlas') and not f.startswith('Tile_') and '_Data_' in f: #LOG.warn("found EPU metadata %s" % this ) found[this] = True # serialEM: just look for tifs elif f.endswith('.tif'): m = re.match(r'^(?P<base>.*\_\d\d\d\d\d)(\_.*)?\.tif$', f) if m: #LOG.info('found %s' % (m.groupdict()['base'],) ) found[m.groupdict()['base']] = True # tomography file elif '[' in this and ']' in this: t = this.split(']')[0] + ']' found[t] = True for base_filename, _ in sorted(found.items()): sample = context['ti'].xcom_pull(task_ids='config', key='sample') inst = context['ti'].xcom_pull(task_ids='config', key='instrument') name = context['ti'].xcom_pull(task_ids='config', key='experiment') run_id = '%s__%s' % (name, base_filename) dro = DagRunOrder(run_id=run_id) d = sample['params'] d['directory'] = context['ti'].xcom_pull( task_ids='config', key='experiment_directory') + '/' + sample['guid'] + '/' d['base'] = base_filename d['experiment'] = name d['microscope'] = inst['_id'] d['cs'] = inst['params']['cs'] d['keV'] = inst['params']['keV'] # only do single-particle if bool(strtobool(str( d['preprocess/enable']))) and d['imaging_method'] in ( 'single-particle', ): LOG.info('triggering dag %s with %s' % (run_id, d)) dro.payload = d yield dro return
def execute(self, context): if self.execution_date is not None: run_id = 'trig__{}'.format(self.execution_date) self.execution_date = timezone.parse(self.execution_date) else: run_id = 'trig__' + timezone.utcnow().isoformat() dro = DagRunOrder(run_id=run_id) if self.python_callable is not None: dro = self.python_callable(context, dro) if dro: dbag = DagBag(settings.DAGS_FOLDER) trigger_dag = dbag.get_dag(self.trigger_dag_id) if not trigger_dag.get_dagrun(execution_date=self.execution_date): dr = trigger_dag.create_dagrun( run_id=dro.run_id, state=State.RUNNING, conf=json.dumps(dro.payload), execution_date=self.execution_date, external_trigger=True) logging.info("Creating DagRun %s" % dr) else: trigger_dag.clear(start_date=self.execution_date, end_date=self.execution_date, only_failed=False, only_running=False, confirm_prompt=False, reset_dag_runs=True, include_subdags=False, dry_run=False) logging.info("Cleared DagRun %s" % trigger_dag) else: self.log.info("Criteria not met, moving on")
def generate_dag_run(**context): """Callable can depend on the context""" for i in range(10): yield DagRunOrder(payload={ 'timeout': "%i", 'ds': context["ds"], })
def flex_maybe_spawn(**kwargs): """ This is a generator which returns appropriate DagRunOrders """ print('kwargs:') pprint(kwargs) print('dag_run conf:') ctx = kwargs['dag_run'].conf pprint(ctx) collectiontype = kwargs['ti'].xcom_pull(key='collectiontype', task_ids="check_uuids") assay_type = kwargs['ti'].xcom_pull(key='assay_type', task_ids="check_uuids") lz_paths = kwargs['ti'].xcom_pull(key='lz_paths', task_ids="check_uuids") uuids = kwargs['ti'].xcom_pull(key='uuids', task_ids="check_uuids") print('collectiontype: <{}>, assay_type: <{}>'.format( collectiontype, assay_type)) print(f'uuids: {uuids}') print('lz_paths:') pprint(lz_paths) payload = { 'ingest_id': kwargs['run_id'], 'crypt_auth_tok': kwargs['crypt_auth_tok'], 'parent_lz_path': lz_paths, 'parent_submission_id': uuids, 'metadata': {}, 'dag_provenance_list': utils.get_git_provenance_list(__file__) } for next_dag in utils.downstream_workflow_iter(collectiontype, assay_type): yield next_dag, DagRunOrder(payload=payload)
def trigger_preprocessing(context): """ calls the preprocessing dag: pass the filenames of the stuff to process """ # we have a jpg, xml, small mrc and large mrc, and gainref dm4 file # assume the common filename is the same and allow the preprocessing dag wait for the other files? what happens if two separate calls to the same dag occur? found = {} if context == None: return for f in context['ti'].xcom_pull(task_ids='rsync_data', key='return_value'): this = Path(f).resolve().stem for pattern in (r'\-\d+$', r'\-gain\-ref$'): if re.search(pattern, this): this = re.sub(pattern, '', this) # LOG.warn("mapped: %s -> %s" % (f, this)) # only care about the xml file for now (let the dag deal with the other files if f.endswith('.xml'): LOG.warn("found EPU metadata %s" % this) found[this] = True # now = datetime.utcnow().replace(microsecond=0) for base_filename, _ in found.items(): exp = context['ti'].xcom_pull(task_ids='parse_config', key='experiment') # this = datetime.utcnow().replace(microsecond=0) # if now == this: # sleep( 1 ) # this = datetime.utcnow().replace(microsecond=0) # run_id='%s__%s' % (exp['microscope'], this.isoformat()) run_id = '%s_%s__%s' % (exp['name'], exp['microscope'], base_filename) dro = DagRunOrder(run_id=run_id) d = { 'directory': context['ti'].xcom_pull(task_ids='parse_config', key='experiment_directory'), 'base': base_filename, 'experiment': exp['name'], } LOG.info('triggering dag %s with %s' % (run_id, d)) # now = this dro.payload = d # implement dry_run somehow yield dro return
def dispatch_jobs(**context): logger.debug("context: %s", context) pending_dags: List[dict] = get_trigger_dags() for pending_dag in pending_dags: dro = DagRunOrder(run_id="trig__" + timezone.utcnow().isoformat()) dro.payload = pending_dag["payload"] # trigger airflow dag trigger_dag( dag_id=pending_dag["target_dag_id"], run_id=dro.run_id, conf=json.dumps(dro.payload), execution_date=None, replace_microseconds=False, ) logger.info("Triggered: %s", pending_dag["target_dag_id"])
def execute(self, context, **kwargs): """ if loop count > 0 :param context: :param kwargs: :return: """ self.loop_id = 'loop_' + context['execution_date'].strftime('%Y_%m_%d_%H_%M_%S') self.log.info('Loop id ' + self.loop_id) loop_count = self.get_loop_count() if loop_count != 0: loop_dag_run_id = '__'.join(['loop', self.dag_id, self.loop_id, str(loop_count)]) self.log.info('DagRun Loop id ' + loop_dag_run_id) dro = DagRunOrder(run_id=loop_dag_run_id) dro = self.python_callable(context, dro) if dro: self.log.info('Loop criteria met. Loop count %d' % loop_count) self.dag.create_dagrun( run_id=dro.run_id, state=State.RUNNING, conf=dro.payload, execution_date=context['execution_date'] + dt.timedelta(microseconds=1) if context[ 'execution_date'] else None, external_trigger=True) self.log.info('Decrementing loop from %d' % loop_count) self.set_loop_count(loop_count - 1) if self.skip_downstream: self.log.info('Skipping the downstream tasks') downstream_tasks = context['task'].get_flat_relatives(upstream=False) self.log.debug("Downstream task_ids %s", downstream_tasks) if downstream_tasks: self.skip(context['dag_run'], context['ti'].execution_date, downstream_tasks) else: self.log.info("Loop criteria not met. Continuing the downstream tasks") self.delete_loop_count() else: self.log.info('Loop count is 0. Continuing the downstream tasks ') self.delete_loop_count()
def maybe_spawn_dags(**kwargs): """ This is a generator which returns appropriate DagRunOrders """ print('kwargs:') pprint(kwargs) print('dag_run conf:') pprint(kwargs['dag_run'].conf) metadata = kwargs['dag_run'].conf['metadata'] auth_tok = kwargs['dag_run'].conf['auth_tok'] payload = {k:kwargs['dag_run'].conf[k] for k in kwargs['dag_run'].conf} payload['apply'] = 'salmon_rnaseq_10x' if 'dag_provenance' in payload: payload['dag_provenance'].update(utils.get_git_provenance_dict(__file__)) else: new_prov = utils.get_git_provenance_list(__file__) if 'dag_provenance_list' in payload: new_prov.extend(payload['dag_provenance_list']) payload['dag_provenance_list'] = new_prov yield DagRunOrder(payload=payload)
def flex_maybe_spawn(**kwargs): """ This is a generator which returns appropriate DagRunOrders """ print('kwargs:') pprint(kwargs) print('dag_run conf:') ctx = kwargs['dag_run'].conf pprint(ctx) md_extract_retcode = int( kwargs['ti'].xcom_pull(task_ids="run_md_extract")) md_consistency_retcode = int( kwargs['ti'].xcom_pull(task_ids="md_consistency_tests")) if md_extract_retcode == 0 and md_consistency_retcode == 0: collectiontype = kwargs['ti'].xcom_pull(key='collectiontype', task_ids="send_status_msg") assay_type = kwargs['ti'].xcom_pull(key='assay_type', task_ids="send_status_msg") print('collectiontype: <{}>, assay_type: <{}>'.format( collectiontype, assay_type)) md_fname = os.path.join(utils.get_tmp_dir_path(kwargs['run_id']), 'rslt.yml') with open(md_fname, 'r') as f: md = yaml.safe_load(f) payload = { k: kwargs['dag_run'].conf[k] for k in kwargs['dag_run'].conf } payload = { 'ingest_id': ctx['run_id'], 'crypt_auth_tok': ctx['crypt_auth_tok'], 'parent_lz_path': ctx['lz_path'], 'parent_submission_id': ctx['submission_id'], 'metadata': md, 'dag_provenance_list': utils.get_git_provenance_list(__file__) } for next_dag in utils.downstream_workflow_iter( collectiontype, assay_type): yield next_dag, DagRunOrder(payload=payload) else: return None
def generate_dag_run(): """Callable can return explicit DagRuns""" for i in range(10): yield DagRunOrder(payload={'timeout': i})
def generate_dag_run(): for i in range(100): yield DagRunOrder(payload={'index': i})
def fill_target_dag_payload(context: Dict[str, object], order: DagRunOrder) -> DagRunOrder: print(f"Context: '{context}'") order.payload = {"message": 'hello from trigger dag'} print(f"DagRunOrder payload: '{order.payload}'") return order
def generate_dag_run(): return [DagRunOrder(payload={'timeout': i}) for i in range(10)]