def tearDown(self) -> None: """ We save the logs to a separate directory so that we can see them later. """ date_str = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") logs_folder = resolve_logs_folder() print() print(f"Saving all log files to {logs_folder}/previous_runs/{date_str}") print() target_dir = os.path.join(logs_folder, "previous_runs", date_str) mkdirs(target_dir, 0o755) files = os.listdir(logs_folder) for file in files: if file != "previous_runs": file_path = os.path.join(logs_folder, file) shutil.move(file_path, target_dir) super().setUp()
def _init_file(self, ti): """ Create log directory and give it correct permissions. :param ti: task instance object :return relative log path of the given task instance """ # To handle log writing when tasks are impersonated, the log files need to # be writable by the user that runs the Airflow command and the user # that is impersonated. This is mainly to handle corner cases with the # SubDagOperator. When the SubDagOperator is run, all of the operators # run under the impersonated user and create appropriate log files # as the impersonated user. However, if the user manually runs tasks # of the SubDagOperator through the UI, then the log files are created # by the user that runs the Airflow command. For example, the Airflow # run command may be run by the `airflow_sudoable` user, but the Airflow # tasks may be run by the `airflow` user. If the log files are not # writable by both users, then it's possible that re-running a task # via the UI (or vice versa) results in a permission error as the task # tries to write to a log file created by the other user. relative_path = self.filename_template.format( dag_id=ti.dag_id, task_id=ti.task_id, execution_date=ti.execution_date.isoformat(), try_number=ti.try_number + 1) full_path = os.path.join(self.local_base, relative_path) directory = os.path.dirname(full_path) # Create the log file and give it group writable permissions # TODO(aoen): Make log dirs and logs globally readable for now since the SubDag # operator is not compatible with impersonation (e.g. if a Celery executor is used # for a SubDag operator and the SubDag operator has a different owner than the # parent DAG) if not os.path.exists(directory): # Create the directory as globally writable using custom mkdirs # as os.makedirs doesn't set mode properly. mkdirs(directory, 0o775) if not os.path.exists(full_path): open(full_path, "a").close() # TODO: Investigate using 444 instead of 666. os.chmod(full_path, 0o666) return full_path
def _init_file(self, ti): """ Create log directory and give it correct permissions. :param ti: task instance object :return: relative log path of the given task instance """ # To handle log writing when tasks are impersonated, the log files need to # be writable by the user that runs the Airflow command and the user # that is impersonated. This is mainly to handle corner cases with the # SubDagOperator. When the SubDagOperator is run, all of the operators # run under the impersonated user and create appropriate log files # as the impersonated user. However, if the user manually runs tasks # of the SubDagOperator through the UI, then the log files are created # by the user that runs the Airflow command. For example, the Airflow # run command may be run by the `airflow_sudoable` user, but the Airflow # tasks may be run by the `airflow` user. If the log files are not # writable by both users, then it's possible that re-running a task # via the UI (or vice versa) results in a permission error as the task # tries to write to a log file created by the other user. relative_path = self._render_filename(ti, ti.try_number) full_path = os.path.join(self.local_base, relative_path) directory = os.path.dirname(full_path) # Create the log file and give it group writable permissions # TODO(aoen): Make log dirs and logs globally readable for now since the SubDag # operator is not compatible with impersonation (e.g. if a Celery executor is used # for a SubDag operator and the SubDag operator has a different owner than the # parent DAG) if not os.path.exists(directory): # Create the directory as globally writable using custom mkdirs # as os.makedirs doesn't set mode properly. mkdirs(directory, 0o777) if not os.path.exists(full_path): open(full_path, "a").close() # TODO: Investigate using 444 instead of 666. os.chmod(full_path, 0o666) return full_path
# Only update the handlers and loggers when CONFIG_PROCESSOR_MANAGER_LOGGER is set. # This is to avoid exceptions when initializing RotatingFileHandler multiple times # in multiple processes. if os.environ.get('CONFIG_PROCESSOR_MANAGER_LOGGER') == 'True': DEFAULT_LOGGING_CONFIG['handlers'] \ .update(DEFAULT_DAG_PARSING_LOGGING_CONFIG['handlers']) DEFAULT_LOGGING_CONFIG['loggers'] \ .update(DEFAULT_DAG_PARSING_LOGGING_CONFIG['loggers']) # Manually create log directory for processor_manager handler as RotatingFileHandler # will only create file but not the directory. processor_manager_handler_config: Dict[str, Any] = \ DEFAULT_DAG_PARSING_LOGGING_CONFIG['handlers']['processor_manager'] directory: str = os.path.dirname( processor_manager_handler_config['filename']) mkdirs(directory, 0o755) ################## # Remote logging # ################## REMOTE_LOGGING: bool = conf.getboolean('logging', 'remote_logging') if REMOTE_LOGGING: ELASTICSEARCH_HOST: str = conf.get('elasticsearch', 'HOST') # Storage bucket URL for remote logging # S3 buckets should start with "s3://" # Cloudwatch log groups should start with "cloudwatch://" # GCS buckets should start with "gs://"
}, }, } REMOTE_LOGGING = conf.get('core', 'remote_logging') # Only update the handlers and loggers when CONFIG_PROCESSOR_MANAGER_LOGGER is set. # This is to avoid exceptions when initializing RotatingFileHandler multiple times # in multiple processes. if os.environ.get('CONFIG_PROCESSOR_MANAGER_LOGGER') == 'True': DEFAULT_LOGGING_CONFIG['handlers'] \ .update(DEFAULT_DAG_PARSING_LOGGING_CONFIG['handlers']) DEFAULT_LOGGING_CONFIG['loggers'] \ .update(DEFAULT_DAG_PARSING_LOGGING_CONFIG['loggers']) # Manually create log directory for processor_manager handler as RotatingFileHandler # will only create file but not the directory. processor_manager_handler_config = DEFAULT_DAG_PARSING_LOGGING_CONFIG['handlers'][ 'processor_manager'] directory = os.path.dirname(processor_manager_handler_config['filename']) mkdirs(directory, 0o755) if REMOTE_LOGGING and REMOTE_BASE_LOG_FOLDER.startswith('s3://'): DEFAULT_LOGGING_CONFIG['handlers'].update(REMOTE_HANDLERS['s3']) elif REMOTE_LOGGING and REMOTE_BASE_LOG_FOLDER.startswith('gs://'): DEFAULT_LOGGING_CONFIG['handlers'].update(REMOTE_HANDLERS['gcs']) elif REMOTE_LOGGING and REMOTE_BASE_LOG_FOLDER.startswith('wasb'): DEFAULT_LOGGING_CONFIG['handlers'].update(REMOTE_HANDLERS['wasb']) elif REMOTE_LOGGING and ELASTICSEARCH_HOST: DEFAULT_LOGGING_CONFIG['handlers'].update(REMOTE_HANDLERS['elasticsearch'])