def test_read_raises_return_error(self): self.hook_inst_mock.get_key.side_effect = Exception('error') result = logging.S3Log().read(self.remote_log_location, return_error=True) msg = 'Could not read logs from %s' % self.remote_log_location self.assertEqual(result, msg) self.logging_mock.error.assert_called_once_with(msg)
def _read(self, ti, try_number): """ Read logs of given task instance and try_number from S3 remote storage. If failed, read the log from task instance host machine. :param ti: task instance object :param try_number: task instance try_number to read logs from """ # Explicitly getting log relative path is necessary as the given # task instance might be different than task instance passed in # in set_context method. log_relative_path = self.filename_template.format( dag_id=ti.dag_id, task_id=ti.task_id, execution_date=ti.execution_date.isoformat(), try_number=try_number + 1) remote_loc = os.path.join(self.remote_base, log_relative_path) s3_log = logging_utils.S3Log() if s3_log.log_exists(remote_loc): # If S3 remote file exists, we do not fetch logs from task instance # local machine even if there are errors reading remote logs, as # returned remote_log will contain error messages. remote_log = s3_log.read(remote_loc, return_error=True) log = '*** Reading remote log from {}.\n{}\n'.format( remote_loc, remote_log) else: log = super(S3TaskHandler, self)._read(ti, try_number) return log
def test_init_raises(self): self.hook_mock.side_effect = Exception('Failed to connect') logging.S3Log() self.logging_mock.error.assert_called_once_with( 'Could not create an S3Hook with connection id "". Please make ' 'sure that airflow[s3] is installed and the S3 connection exists.' )
def test_write(self): logging.S3Log().write('text', self.remote_log_location) self.hook_inst_mock.load_string.assert_called_once_with( 'content\ntext', key=self.remote_log_location, replace=True, encrypt=False, )
def close(self): """ Close and upload local log file to remote storage S3. """ # When application exit, system shuts down all handlers by # calling close method. Here we check if logger is already # closed to prevent uploading the log to remote storage multiple # times when `logging.shutdown` is called. if self.closed: return super(S3TaskHandler, self).close() local_loc = os.path.join(self.local_base, self.log_relative_path) remote_loc = os.path.join(self.remote_base, self.log_relative_path) if os.path.exists(local_loc): # read log and remove old logs to get just the latest additions with open(local_loc, 'r') as logfile: log = logfile.read() logging_utils.S3Log().write(log, remote_loc) self.closed = True
def run(args, dag=None): db_utils.pessimistic_connection_handling() if dag: args.dag_id = dag.dag_id # Setting up logging log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) logging.root.handlers = [] logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle and not dag: dag = get_dag(args) elif not dag: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, force=args.force, pool=args.pool) executor.heartbeat() executor.end() # Force the log to flush, and set the handler to go back to normal so we # don't continue logging to the task's log file. The flush is important # because we subsequently read from the log to insert into S3 or Google # cloud storage. logging.root.handlers[0].flush() logging.root.handlers = [] # store logs remotely remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') # deprecated as of March 2016 if not remote_base and conf.get('core', 'S3_LOG_FOLDER'): warnings.warn( 'The S3_LOG_FOLDER conf key has been replaced by ' 'REMOTE_BASE_LOG_FOLDER. Your conf still works but please ' 'update airflow.cfg to ensure future compatibility.', DeprecationWarning) remote_base = conf.get('core', 'S3_LOG_FOLDER') if os.path.exists(filename): # read log and remove old logs to get just the latest additions with open(filename, 'r') as logfile: log = logfile.read() remote_log_location = filename.replace(log_base, remote_base) # S3 if remote_base.startswith('s3:/'): logging_utils.S3Log().write(log, remote_log_location) # GCS elif remote_base.startswith('gs:/'): logging_utils.GCSLog().write( log, remote_log_location, append=True) # Other elif remote_base and remote_base != 'None': logging.error( 'Unsupported remote log location: {}'.format(remote_base))
def test_write_raises(self): self.hook_inst_mock.load_string.side_effect = Exception('error') logging.S3Log().write('text', self.remote_log_location) msg = 'Could not write logs to %s' % self.remote_log_location self.logging_mock.error.assert_called_once_with(msg)
def test_read_raises(self): self.hook_inst_mock.get_key.side_effect = Exception('error') self.assertEqual(logging.S3Log().read(self.remote_log_location), '')
def test_read_key_empty(self): self.hook_inst_mock.get_key.return_value = None self.assertEqual(logging.S3Log().read(self.remote_log_location), '')
def test_read(self): self.assertEqual(logging.S3Log().read(self.remote_log_location), 'content')
def test_log_exists_no_hook(self): self.hook_mock.side_effect = Exception('Failed to connect') self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
def test_log_exists_raises(self): self.hook_inst_mock.get_key.side_effect = Exception('error') self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
def test_log_exists_none(self): self.hook_inst_mock.get_key.return_value = None self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
def test_log_exists(self): self.assertTrue(logging.S3Log().log_exists(self.remote_log_location))
def test_init(self): logging.S3Log() self.hook_mock.assert_called_once_with('')