Пример #1
0
 def test_read_raises_return_error(self):
     self.hook_inst_mock.get_key.side_effect = Exception('error')
     result = logging.S3Log().read(self.remote_log_location,
                                   return_error=True)
     msg = 'Could not read logs from %s' % self.remote_log_location
     self.assertEqual(result, msg)
     self.logging_mock.error.assert_called_once_with(msg)
    def _read(self, ti, try_number):
        """
        Read logs of given task instance and try_number from S3 remote storage.
        If failed, read the log from task instance host machine.
        :param ti: task instance object
        :param try_number: task instance try_number to read logs from
        """
        # Explicitly getting log relative path is necessary as the given
        # task instance might be different than task instance passed in
        # in set_context method.
        log_relative_path = self.filename_template.format(
            dag_id=ti.dag_id,
            task_id=ti.task_id,
            execution_date=ti.execution_date.isoformat(),
            try_number=try_number + 1)
        remote_loc = os.path.join(self.remote_base, log_relative_path)

        s3_log = logging_utils.S3Log()
        if s3_log.log_exists(remote_loc):
            # If S3 remote file exists, we do not fetch logs from task instance
            # local machine even if there are errors reading remote logs, as
            # returned remote_log will contain error messages.
            remote_log = s3_log.read(remote_loc, return_error=True)
            log = '*** Reading remote log from {}.\n{}\n'.format(
                remote_loc, remote_log)
        else:
            log = super(S3TaskHandler, self)._read(ti, try_number)

        return log
Пример #3
0
 def test_init_raises(self):
     self.hook_mock.side_effect = Exception('Failed to connect')
     logging.S3Log()
     self.logging_mock.error.assert_called_once_with(
         'Could not create an S3Hook with connection id "". Please make '
         'sure that airflow[s3] is installed and the S3 connection exists.'
     )
Пример #4
0
 def test_write(self):
     logging.S3Log().write('text', self.remote_log_location)
     self.hook_inst_mock.load_string.assert_called_once_with(
         'content\ntext',
         key=self.remote_log_location,
         replace=True,
         encrypt=False,
     )
    def close(self):
        """
        Close and upload local log file to remote storage S3.
        """
        # When application exit, system shuts down all handlers by
        # calling close method. Here we check if logger is already
        # closed to prevent uploading the log to remote storage multiple
        # times when `logging.shutdown` is called.
        if self.closed:
            return

        super(S3TaskHandler, self).close()

        local_loc = os.path.join(self.local_base, self.log_relative_path)
        remote_loc = os.path.join(self.remote_base, self.log_relative_path)
        if os.path.exists(local_loc):
            # read log and remove old logs to get just the latest additions
            with open(local_loc, 'r') as logfile:
                log = logfile.read()
            logging_utils.S3Log().write(log, remote_loc)

        self.closed = True
Пример #6
0
def run(args, dag=None):
    db_utils.pessimistic_connection_handling()
    if dag:
        args.dag_id = dag.dag_id

    # Setting up logging
    log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER'))
    directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args)
    if not os.path.exists(directory):
        os.makedirs(directory)
    iso = args.execution_date.isoformat()
    filename = "{directory}/{iso}".format(**locals())

    logging.root.handlers = []
    logging.basicConfig(
        filename=filename,
        level=settings.LOGGING_LEVEL,
        format=settings.LOG_FORMAT)

    if not args.pickle and not dag:
        dag = get_dag(args)
    elif not dag:
        session = settings.Session()
        logging.info('Loading pickle id {args.pickle}'.format(**locals()))
        dag_pickle = session.query(
            DagPickle).filter(DagPickle.id == args.pickle).first()
        if not dag_pickle:
            raise AirflowException("Who hid the pickle!? [missing pickle]")
        dag = dag_pickle.pickle
    task = dag.get_task(task_id=args.task_id)

    ti = TaskInstance(task, args.execution_date)

    if args.local:
        print("Logging into: " + filename)
        run_job = jobs.LocalTaskJob(
            task_instance=ti,
            mark_success=args.mark_success,
            force=args.force,
            pickle_id=args.pickle,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            pool=args.pool)
        run_job.run()
    elif args.raw:
        ti.run(
            mark_success=args.mark_success,
            force=args.force,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            job_id=args.job_id,
            pool=args.pool,
        )
    else:
        pickle_id = None
        if args.ship_dag:
            try:
                # Running remotely, so pickling the DAG
                session = settings.Session()
                pickle = DagPickle(dag)
                session.add(pickle)
                session.commit()
                pickle_id = pickle.id
                print((
                    'Pickled dag {dag} '
                    'as pickle_id:{pickle_id}').format(**locals()))
            except Exception as e:
                print('Could not pickle the DAG')
                print(e)
                raise e

        executor = DEFAULT_EXECUTOR
        executor.start()
        print("Sending to executor.")
        executor.queue_task_instance(
            ti,
            mark_success=args.mark_success,
            pickle_id=pickle_id,
            ignore_dependencies=args.ignore_dependencies,
            ignore_depends_on_past=args.ignore_depends_on_past,
            force=args.force,
            pool=args.pool)
        executor.heartbeat()
        executor.end()

    # Force the log to flush, and set the handler to go back to normal so we
    # don't continue logging to the task's log file. The flush is important
    # because we subsequently read from the log to insert into S3 or Google
    # cloud storage.
    logging.root.handlers[0].flush()
    logging.root.handlers = []

    # store logs remotely
    remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER')

    # deprecated as of March 2016
    if not remote_base and conf.get('core', 'S3_LOG_FOLDER'):
        warnings.warn(
            'The S3_LOG_FOLDER conf key has been replaced by '
            'REMOTE_BASE_LOG_FOLDER. Your conf still works but please '
            'update airflow.cfg to ensure future compatibility.',
            DeprecationWarning)
        remote_base = conf.get('core', 'S3_LOG_FOLDER')

    if os.path.exists(filename):
        # read log and remove old logs to get just the latest additions

        with open(filename, 'r') as logfile:
            log = logfile.read()

        remote_log_location = filename.replace(log_base, remote_base)
        # S3
        if remote_base.startswith('s3:/'):
            logging_utils.S3Log().write(log, remote_log_location)
        # GCS
        elif remote_base.startswith('gs:/'):
            logging_utils.GCSLog().write(
                log,
                remote_log_location,
                append=True)
        # Other
        elif remote_base and remote_base != 'None':
            logging.error(
                'Unsupported remote log location: {}'.format(remote_base))
Пример #7
0
 def test_write_raises(self):
     self.hook_inst_mock.load_string.side_effect = Exception('error')
     logging.S3Log().write('text', self.remote_log_location)
     msg = 'Could not write logs to %s' % self.remote_log_location
     self.logging_mock.error.assert_called_once_with(msg)
Пример #8
0
 def test_read_raises(self):
     self.hook_inst_mock.get_key.side_effect = Exception('error')
     self.assertEqual(logging.S3Log().read(self.remote_log_location), '')
Пример #9
0
 def test_read_key_empty(self):
     self.hook_inst_mock.get_key.return_value = None
     self.assertEqual(logging.S3Log().read(self.remote_log_location), '')
Пример #10
0
 def test_read(self):
     self.assertEqual(logging.S3Log().read(self.remote_log_location),
                      'content')
Пример #11
0
 def test_log_exists_no_hook(self):
     self.hook_mock.side_effect = Exception('Failed to connect')
     self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
Пример #12
0
 def test_log_exists_raises(self):
     self.hook_inst_mock.get_key.side_effect = Exception('error')
     self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
Пример #13
0
 def test_log_exists_none(self):
     self.hook_inst_mock.get_key.return_value = None
     self.assertFalse(logging.S3Log().log_exists(self.remote_log_location))
Пример #14
0
 def test_log_exists(self):
     self.assertTrue(logging.S3Log().log_exists(self.remote_log_location))
Пример #15
0
 def test_init(self):
     logging.S3Log()
     self.hook_mock.assert_called_once_with('')