def execute(self, context: Context) -> Any: started_at: Union[datetime.datetime, float] if self.reschedule: # If reschedule, use the start date of the first try (first try can be either the very # first execution of the task, or the first execution after the task was cleared.) first_try_number = context['ti'].max_tries - self.retries + 1 task_reschedules = TaskReschedule.find_for_task_instance( context['ti'], try_number=first_try_number ) if not task_reschedules: start_date = timezone.utcnow() else: start_date = task_reschedules[0].start_date started_at = start_date def run_duration() -> float: # If we are in reschedule mode, then we have to compute diff # based on the time in a DB, so can't use time.monotonic return (timezone.utcnow() - start_date).total_seconds() else: started_at = start_monotonic = time.monotonic() def run_duration() -> float: return time.monotonic() - start_monotonic try_number = 1 log_dag_id = self.dag.dag_id if self.has_dag() else "" xcom_value = None while True: poke_return = self.poke(context) if poke_return: if isinstance(poke_return, PokeReturnValue): xcom_value = poke_return.xcom_value break if run_duration() > self.timeout: # If sensor is in soft fail mode but times out raise AirflowSkipException. if self.soft_fail: raise AirflowSkipException(f"Snap. Time is OUT. DAG id: {log_dag_id}") else: raise AirflowSensorTimeout(f"Snap. Time is OUT. DAG id: {log_dag_id}") if self.reschedule: next_poke_interval = self._get_next_poke_interval(started_at, run_duration, try_number) reschedule_date = timezone.utcnow() + timedelta(seconds=next_poke_interval) if _is_metadatabase_mysql() and reschedule_date > _MYSQL_TIMESTAMP_MAX: raise AirflowSensorTimeout( f"Cannot reschedule DAG {log_dag_id} to {reschedule_date.isoformat()} " f"since it is over MySQL's TIMESTAMP storage limit." ) raise AirflowRescheduleException(reschedule_date) else: time.sleep(self._get_next_poke_interval(started_at, run_duration, try_number)) try_number += 1 self.log.info("Success criteria met. Exiting.") return xcom_value
def execute(self, context: Dict) -> Any: started_at = timezone.utcnow() try_number = 1 log_dag_id = self.dag.dag_id if self.has_dag() else "" if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance( context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date try_number = len(task_reschedules) + 1 while not self.poke(context): if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException( f"Snap. Time is OUT. DAG id: {log_dag_id}") else: raise AirflowSensorTimeout( f"Snap. Time is OUT. DAG id: {log_dag_id}") if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self._get_next_poke_interval( started_at, try_number)) raise AirflowRescheduleException(reschedule_date) else: sleep(self._get_next_poke_interval(started_at, try_number)) try_number += 1 self.log.info("Success criteria met. Exiting.")
def execute(self, context): started_at = timezone.utcnow() if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance(context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date while not self.poke(context): self.log.info("Poke status",self.poke(context)) if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self.poke_interval) raise AirflowRescheduleException(reschedule_date) else: sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def execute(self, context): started_at = datetime.now() while not self.poke(context): if (datetime.now() - started_at).total_seconds() > self.timeout: if self.soft_fail: raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') sleep(self.poke_interval) logging.info("Success criteria met. Exiting.")
def execute(self, context: Dict) -> Any: started_at = None if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance( context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date try_number = len(task_reschedules) + 1 else: started_at = timezone.utcnow() def run_duration() -> float: # If we are in reschedule mode, then we have to compute diff # based on the time in a DB, so can't use time.monotonic nonlocal started_at return (timezone.utcnow() - started_at).total_seconds() else: started_at = time.monotonic() def run_duration() -> float: nonlocal started_at return time.monotonic() - started_at try_number = 1 log_dag_id = self.dag.dag_id if self.has_dag() else "" while not self.poke(context): if run_duration() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): raise AirflowSkipException( f"Snap. Time is OUT. DAG id: {log_dag_id}") else: raise AirflowSensorTimeout( f"Snap. Time is OUT. DAG id: {log_dag_id}") if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self._get_next_poke_interval( started_at, run_duration, try_number)) raise AirflowRescheduleException(reschedule_date) else: time.sleep( self._get_next_poke_interval(started_at, run_duration, try_number)) try_number += 1 self.log.info("Success criteria met. Exiting.")
def execute(self, context): started_at = timezone.utcnow() time_jump = self.params.get('time_jump') while not self.poke(context): if time_jump: started_at -= time_jump if (timezone.utcnow() - started_at).total_seconds() > self.timeout: if self.soft_fail: raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') time.sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def execute(self, context: Dict) -> Any: started_at = None if self.reschedule: # If reschedule, use the start date of the first try (first try can be either the very # first execution of the task, or the first execution after the task was cleared.) first_try_number = context['ti'].max_tries - self.retries + 1 task_reschedules = TaskReschedule.find_for_task_instance( context['ti'], try_number=first_try_number) if task_reschedules: started_at = task_reschedules[0].start_date else: started_at = timezone.utcnow() def run_duration() -> float: # If we are in reschedule mode, then we have to compute diff # based on the time in a DB, so can't use time.monotonic nonlocal started_at return (timezone.utcnow() - started_at).total_seconds() else: started_at = time.monotonic() def run_duration() -> float: nonlocal started_at return time.monotonic() - started_at try_number = 1 log_dag_id = self.dag.dag_id if self.has_dag() else "" while not self.poke(context): if run_duration() > self.timeout: # If sensor is in soft fail mode but times out raise AirflowSkipException. if self.soft_fail: raise AirflowSkipException( f"Snap. Time is OUT. DAG id: {log_dag_id}") else: raise AirflowSensorTimeout( f"Snap. Time is OUT. DAG id: {log_dag_id}") if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self._get_next_poke_interval( started_at, run_duration, try_number)) raise AirflowRescheduleException(reschedule_date) else: time.sleep( self._get_next_poke_interval(started_at, run_duration, try_number)) try_number += 1 self.log.info("Success criteria met. Exiting.")
def execute(self, context): started_at = timezone.utcnow() while not self.poke(context): if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def execute(self, context, bash_command_function='get_bash_command'): func = getattr(self, bash_command_function) bash_command = func(context) host = self.hook._host_ref() started_at = datetime.now() with SSHTempFileContent(self.hook, bash_command, self.task_id) as remote_file_path: logging.info("Temporary script " "location : {0}:{1}".format(host, remote_file_path)) while not self.poke_output(self.hook, context, remote_file_path): if (datetime.now() - started_at).total_seconds() > self.timeout: if self.soft_fail: raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') sleep(self.poke_interval) logging.info("Success criteria met. Exiting.")
def execute(self, context): started_at = datetime.now() while True: poke_result = self.poke(context) if poke_result: break if (datetime.now() - started_at).total_seconds() > self.timeout: timeout_msg = 'Snap. Time is OUT.' if self.soft_fail: raise AirflowSkipException(timeout_msg) else: raise AirflowSensorTimeout(timeout_msg) else: self._send_notification(context, success=False) time.sleep(self.poke_interval) if self.last_notification is not None: # notify about success in case of previous warnings self._send_notification(context, success=True) logging.info('Success criteria met. Exiting.') return poke_result
def execute(self, context): self.session = Session().get() started_at = datetime.now() success = False counter = 0 while not success: logging.info('Iteration sequence: %s' % counter) try: if (datetime.now() - started_at).total_seconds() > self.timeout: raise AirflowSensorTimeout success = self.poke(context) logging.info('Bool result of SQLSensorRun is %s from %s' % (success, self.query_result)) if success: if self.run and self.run.status != State.SUCCESS: self.set_state(State.SUCCESS, set_result=True) logging.info( "Success criteria met. Exiting. Result was: %s" % self.query_result) elif self.run.status == State.EXPIRED: logging.info('Run is expired. Skip slipping.') else: logging.info('Current status is %s, nap time %s.' % (self.run.status, self.poke_interval)) sleep(self.poke_interval) except AirflowSensorTimeout as e: self.set_state(State.FAILED) raise AirflowSensorTimeout('Snap. Time is OUT.') except Exception as e: self.set_state(State.FATAL) raise Exception('Fatal exception: %s' % e) finally: counter += 1 self.session.commit() logging.info('Execution finished. Close session.') self.session.close()
def execute(self, context): # setting batch_info in table and persist profile and epoch in xcom super(DMStartOperator, self).execute(context) # default to the profile DM endpoint if function is not passed self.get_function() logging.info("DM function: %s", self.function) # invoke DM function to get latest job instance self.get_jobs() logging.info('Context on Airflow is %s', context['ds']) logging.info("run Frequency %s", self.run_frequency) logging.info("schedule interval on Airflow is %s", context['dag'].schedule_interval) # check if this is a possible catchup (delay run) situation, the design is to force this instance to complete # let the most recent instance to run # if the current time and the execution time is more than 2 intervals behind, that is a catch up run try: if self.get_time_diff(context) > \ (context['dag'].schedule_interval + context['dag'].schedule_interval): logging.info( "This is a catch up run, this whole dag will be marked success, only the most recent " "schedule will be executed.") self.is_catchup = True else: logging.info("This is a regular run") self.is_catchup = False except Exception: self.is_catchup = False if self.is_catchup and self.self_catch_up: # don't run the current run - force complete this run, and let the real run to run for t in context['dag'].tasks: if t != context['task']: t.run(start_date=context['execution_date'], end_date=context['execution_date'], mark_success=True, ignore_dependencies=True) else: # get current interval id to be pass to lambda DM call of startJobInstances # since airflow schedule kicks off at last minute of the schedule day # add one day to the airflow execution date run_date = context['execution_date'] + timedelta(days=1) self.current_interval = run_date.strftime('%Y-%m-%dT%H:%M') logging.info('Current Interval - real process date is %s', self.current_interval) if self.last_run_status == 'NONE': logging.info("This is the Initial run of job") started_at = datetime.now() # check if job is ready to run by calling the poke method while not self.poke(context): sleep(self.poke_interval) if (datetime.now() - started_at).seconds > self.timeout: raise AirflowSensorTimeout('Snap. Time is OUT.') logging.info( "Success criteria met. set job to RUNNING status in DM") try: response = self.call_DM("updateJobInstance", "RUNNING") except DMException: raise AirflowException( "error update the job to running in DM, callDM exception") # check if the success keyword is in the response try: success = response['success'] except KeyError: logging.info("updating the job to running DM not successful") raise AirflowException( "update the job to running in DM not successful")
def execute(self, context): started_at = datetime.now() # wait until the time condition is met or the dag run to success or the sensor step is running or success sensor_count = self.get_sensor_count(context) # wait until we have reached or passed the sensor step dag_success = self.check_if_success(context) if not self.kill_all_tasks: while not self.sensor_step(context, sensor_count) and not dag_success: logging.info("waiting to get to the sensor step") dag_success = self.check_if_success(context) target_dttm = (context['execution_date'] + context['dag'].schedule_interval + self.delta) logging.info('Checking if the time ({0}) has come'.format(target_dttm)) while not self.poke(context) and not dag_success: sleep(self.poke_interval) if (datetime.now() - started_at).seconds > self.timeout: raise AirflowSensorTimeout('Snap. Time is OUT.') dag_success = self.check_if_success(context) logging.info("Time delta met. checking downstream tasks...") if not dag_success: sensor_waiting = self.check_if_sensor_waiting( context, sensor_count) # kill task only if only sensors are waiting, or when the kill_all_task is set to true if (not self.kill_all_tasks and sensor_waiting) or self.kill_all_tasks: logging.info( "(kill_all_tasks is false and only sensors are waiting) or (the kill_all_task is set to true" ) # pause dag before marking tasks success logging.info('pause dag') self.set_is_paused(True, context['dag'].dag_id) logging.info('clearing out instances') self.clear_out_instance(context) # if the option to kill all tasks is set, get the list of success task id, since there is no need to # mark them success again if self.kill_all_tasks: logging.info('killing all tasks') else: logging.info('sensors are waiting and SLA is met') success_list = self.get_success_list(context) for t in context['dag'].tasks: # do not mark this task or any tasks that have completed success if t.task_id != context[ 'task'].task_id and t.task_id not in success_list: t.run(start_date=context['execution_date'], end_date=context['execution_date'], mark_success=True, ignore_dependencies=True) logging.info("Done marking all tasks success.") # unpause dag after the instances are cleared out logging.info('unpause dag') self.set_is_paused(False, context['dag'].dag_id) if self.send_email: logging.info('send email is True') exec_date = context['execution_date'] append_html = '<p>Automated email sent by ' + context['dag'].dag_id + ' for date ' + \ exec_date.strftime('%Y-%m-%dT%H:%M:%S') + '. Please do not reply to email.</p>' if self.email_html is None or self.email_html == '': self.email_html = append_html else: self.email_html += append_html retries = 3 cluster = conf.get('core', 'cluster') email_subject = "[airflow-{cluster}] {subject}".format( cluster=cluster, subject=self.subject) for i in range(retries): try: send_email(self.to, email_subject, self.email_html) except: logging.info("Failed to send email") pass break else: logging.info('send email is false') else: logging.info('tasks are started, not killing any tasks') else: logging.info("Dag ran successfully, exiting SLAWatcher")