Пример #1
0
def main():
    dbnd_bootstrap()

    from airflow.models import TaskInstance
    from airflow.utils.log.logging_mixin import redirect_stderr, redirect_stdout
    from airflow.utils.timezone import utcnow

    from dbnd import config
    from dbnd._core.constants import TaskExecutorType
    from dbnd._core.settings import RunConfig
    from dbnd._core.task_run.task_run import TaskRun
    from dbnd.tasks.basics import dbnd_sanity_check
    from dbnd_examples.dbnd_airflow import bash_dag

    airflow_task_log = logging.getLogger("airflow.task")

    task = bash_dag.t3
    execution_date = utcnow()
    ti = TaskInstance(task, execution_date)

    ti.init_run_context(raw=False)
    logger.warning("Running with task_log %s",
                   airflow_task_log.handlers[0].handler.baseFilename)
    with redirect_stdout(airflow_task_log, logging.INFO), redirect_stderr(
            airflow_task_log, logging.WARN):
        logger.warning("from redirect")

        logger.warning("after patch")
        with config({RunConfig.task_executor_type: TaskExecutorType.local}):
            run = dbnd_sanity_check.dbnd_run()
            tr = run.root_task_run  # type: TaskRun

    logger.warning("TR: %s %s %s", tr, tr.task_tracker_url,
                   tr.log.local_log_file)
Пример #2
0
def task_run(args, dag=None):
    """Runs a single task instance"""
    if dag:
        args.dag_id = dag.dag_id

    log = LoggingMixin().log

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if not args.pickle and not dag:
        dag = get_dag(args)
    elif not dag:
        with db.create_session() as session:
            log.info('Loading pickle id %s', args.pickle)
            dag_pickle = session.query(DagPickle).filter(
                DagPickle.id == args.pickle).first()
            if not dag_pickle:
                raise AirflowException("Who hid the pickle!? [missing pickle]")
            dag = dag_pickle.pickle

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    log.info("Running %s on host %s", ti, hostname)

    if args.interactive:
        _run(args, dag, ti)
    else:
        with redirect_stdout(ti.log, logging.INFO), redirect_stderr(
                ti.log, logging.WARN):
            _run(args, dag, ti)
    logging.shutdown()
Пример #3
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException(
            "You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
            _run_task_by_selected_method(args, dag, ti)
    logging.shutdown()
Пример #4
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        if settings.DONOT_MODIFY_HANDLERS:
            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)
        else:
            # Get all the Handlers from 'airflow.task' logger
            # Add these handlers to the root logger so that we can get logs from
            # any custom loggers defined in the DAG
            airflow_logger_handlers = logging.getLogger('airflow.task').handlers
            root_logger = logging.getLogger()
            root_logger_handlers = root_logger.handlers

            # Remove all handlers from Root Logger to avoid duplicate logs
            for handler in root_logger_handlers:
                root_logger.removeHandler(handler)

            for handler in airflow_logger_handlers:
                root_logger.addHandler(handler)
            root_logger.setLevel(logging.getLogger('airflow.task').level)

            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)

            # We need to restore the handlers to the loggers as celery worker process
            # can call this command multiple times,
            # so if we don't reset this then logs from next task would go to the wrong place
            for handler in airflow_logger_handlers:
                root_logger.removeHandler(handler)
            for handler in root_logger_handlers:
                root_logger.addHandler(handler)

    logging.shutdown()