def emit(self, record: logging.LogRecord): """Emits a log record. Arguments: record {any} -- The logging record. """ if not self.has_context: self.set_context() db_record_message = self.format(record) try: db_record = DagFileProcessingLogRecord(self._log_filepath, db_record_message) self.db_session.add(db_record) self.db_session.commit() except Exception: try: self.db_session.rollback() except: pass airflow_db_logger_log.error( f"Error while attempting to log ({self._log_filepath}): {db_record_message}" ) airflow_db_logger_log.error(traceback.format_exc()) super().emit(record)
def emit(self, record: logging.LogRecord): """Emits a log record. Arguments: record {any} -- The logging record. """ # A fix to allow the debug executor to run also to the database. if IS_RUNNING_DEBUG_EXECUTOR and not self.has_task_context: ti: TaskInstance = get_calling_frame_objects_by_type( TaskInstance, first_only=True) if ti is not None: self.set_context(task_instance=ti) if self.has_task_context and self.has_context: try: db_record = TaskExecutionLogRecord( self.task_context_info.dag_id, self.task_context_info.task_id, self.task_context_info.execution_date, self.task_context_info.try_number, self.format(record), ) self.db_session.add(db_record) self.db_session.commit() except Exception: try: self.db_session.rollback() except: pass airflow_db_logger_log.error(traceback.format_exc()) super().emit(record)
def set_context(self, task_instance): """Initialize the db log configuration. Arguments: task_instance {task instance object} -- The task instace to write for. """ try: self._task_instance = task_instance self._task_context_info = ExecutionLogTaskContextInfo( task_instance) self._logfile_subpath = os.path.join( self.subfolder_path, self._render_logfile_subpath()) super().set_context() except Exception as err: airflow_db_logger_log.error(err)
def set_context(self, filepath=None): """Initialize the dag log configuration. Arguments: dag_filepath {str} -- The path to the dag file. """ try: self._log_filepath = (os.path.join( self.subfolder_path, self.global_log_file) if filepath is None else os.path.join( self.subfolder_path, self.dags_subfolder_path, self._render_relative_dag_filepath(filepath))) self._db_session = DBLoggerSession() except Exception: airflow_db_logger_log.error( "Failed to initialize process logger contexts") airflow_db_logger_log.error(traceback.format_exc())
def read( self, task_instance: TaskInstance, try_number: int = None, metadata: dict = None, ): """Read logs of given task instance from the database. Arguments: task_instance {TaskInstance} -- The task instance object Keyword Arguments: try_number {int} -- The run try number (default: {None}) metadata {dict} -- Added metadata (default: {None}) Raises: Exception: [description] Returns: List[str] -- A log array. """ # Task instance increments its try number when it starts to run. # So the log for a particular task try will only show up when # try number gets incremented in DB, i.e logs produced the time # after cli run and before try_number + 1 in DB will not be displayed. db_session: DBLoggerSession = None try: db_session = DBLoggerSession() if try_number is None: next_try = task_instance.next_try_number try_numbers = list(range(1, next_try)) elif try_number < 1: logs = [ "Error fetching the logs. Try number {} is invalid.". format(try_number), ] return logs else: try_numbers = [try_number] logs_by_try_number: Dict[int, List[TaskExecutionLogRecord]] = dict() airflow_db_logger_log.info( f"Reading logs: {task_instance.dag_id}/{task_instance.task_id} {try_numbers} {{{task_instance.execution_date}}}" ) log_records_query = ( db_session.query(TaskExecutionLogRecord).filter( TaskExecutionLogRecord.dag_id == task_instance.dag_id ).filter( TaskExecutionLogRecord.task_id == task_instance.task_id). filter(TaskExecutionLogRecord.execution_date == task_instance.execution_date).filter( TaskExecutionLogRecord.try_number.in_(try_numbers))) if DB_LOGGER_SHOW_REVERSE_ORDER is True: log_records_query = log_records_query.order_by( desc(TaskExecutionLogRecord.timestamp)) else: log_records_query = log_records_query.order_by( asc(TaskExecutionLogRecord.timestamp)) log_records = log_records_query.all() db_session.close() db_session = None log_record: TaskExecutionLogRecord = None # pull the records log_records: List[TaskExecutionLogRecord] = [ r for r in log_records ] for log_record in log_records: try_number = int(log_record.try_number) if try_number not in logs_by_try_number: logs_by_try_number[try_number] = [] logs_by_try_number[try_number].append(str(log_record.text)) for try_number in logs_by_try_number.keys(): logs_by_try_number[try_number] = str("\n".join( logs_by_try_number[try_number])) try_numbers.sort() logs = [] metadata_array = [] for try_number in try_numbers: # logs.appen log = logs_by_try_number.get(try_number, "[No logs found]") if AIRFLOW_MAJOR_VERSION > 1: log = [(task_instance.hostname, log)] logs.append(log) metadata_array.append({"end_of_log": True}) # airflow_db_logger_log.info(traceback.format_stack().j) # traceback.print_stack() airflow_db_logger_log.info(f"Read {len(logs)} logs") return logs, metadata_array except Exception: if db_session: try: db_session.rollback() except Exception: pass airflow_db_logger_log.error(traceback.format_exc()) return [ f"An error occurred while connecting to the database:\n" + f"{traceback.format_exc()}" ], [{ "end_of_log": True }] finally: if db_session: db_session.close()