Пример #1
0
        def monitor_gunicorn(gunicorn_master_proc):
            # These run forever until SIG{INT, TERM, KILL, ...} signal is sent
            if conf.getint('webserver', 'worker_refresh_interval') > 0:
                master_timeout = conf.getint('webserver', 'web_server_master_timeout')
                restart_workers(gunicorn_master_proc, num_workers, master_timeout)
            else:
                while gunicorn_master_proc.poll() is None:
                    time.sleep(1)

                sys.exit(gunicorn_master_proc.returncode)
Пример #2
0
 def default_pool_open_slots(session):
     from airflow.models import TaskInstance as TI  # To avoid circular imports
     total_slots = conf.getint('core', 'non_pooled_task_slot_count')
     used_slots = session.query(func.count()).filter(
         TI.pool == Pool.default_pool_name).filter(
         TI.state.in_([State.RUNNING, State.QUEUED])).scalar()
     return total_slots - used_slots
Пример #3
0
    def start_refresh(gunicorn_master_proc):
        batch_size = conf.getint('webserver', 'worker_refresh_batch_size')
        log.debug('%s doing a refresh of %s workers', state, batch_size)
        sys.stdout.flush()
        sys.stderr.flush()

        excess = 0
        for _ in range(batch_size):
            gunicorn_master_proc.send_signal(signal.SIGTTIN)
            excess += 1
            wait_until_true(lambda: num_workers_expected + excess ==
                            get_num_workers_running(gunicorn_master_proc),
                            master_timeout)
Пример #4
0
 def test_send_mime_ssl(self, mock_smtp, mock_smtp_ssl):
     mock_smtp.return_value = mock.Mock()
     mock_smtp_ssl.return_value = mock.Mock()
     with conf_vars({('smtp', 'smtp_ssl'): 'True'}):
         utils.email.send_MIME_email('from',
                                     'to',
                                     MIMEMultipart(),
                                     dryrun=False)
     self.assertFalse(mock_smtp.called)
     mock_smtp_ssl.assert_called_once_with(
         conf.get('smtp', 'SMTP_HOST'),
         conf.getint('smtp', 'SMTP_PORT'),
     )
Пример #5
0
 def test_send_mime_noauth(self, mock_smtp, mock_smtp_ssl):
     mock_smtp.return_value = mock.Mock()
     mock_smtp_ssl.return_value = mock.Mock()
     with conf_vars({
         ('smtp', 'smtp_user'): None,
         ('smtp', 'smtp_password'): None,
     }):
         utils.email.send_MIME_email('from',
                                     'to',
                                     MIMEMultipart(),
                                     dryrun=False)
     self.assertFalse(mock_smtp_ssl.called)
     mock_smtp.assert_called_once_with(
         conf.get('smtp', 'SMTP_HOST'),
         conf.getint('smtp', 'SMTP_PORT'),
     )
     self.assertFalse(mock_smtp.login.called)
Пример #6
0
 def test_send_mime(self, mock_smtp, mock_smtp_ssl):
     mock_smtp.return_value = mock.Mock()
     mock_smtp_ssl.return_value = mock.Mock()
     msg = MIMEMultipart()
     utils.email.send_MIME_email('from', 'to', msg, dryrun=False)
     mock_smtp.assert_called_once_with(
         conf.get('smtp', 'SMTP_HOST'),
         conf.getint('smtp', 'SMTP_PORT'),
     )
     self.assertTrue(mock_smtp.return_value.starttls.called)
     mock_smtp.return_value.login.assert_called_once_with(
         conf.get('smtp', 'SMTP_USER'),
         conf.get('smtp', 'SMTP_PASSWORD'),
     )
     mock_smtp.return_value.sendmail.assert_called_once_with(
         'from', 'to', msg.as_string())
     self.assertTrue(mock_smtp.return_value.quit.called)
Пример #7
0
def restart_workers(gunicorn_master_proc, num_workers_expected, master_timeout):
    """
    Runs forever, monitoring the child processes of @gunicorn_master_proc and
    restarting workers occasionally.
    Each iteration of the loop traverses one edge of this state transition
    diagram, where each state (node) represents
    [ num_ready_workers_running / num_workers_running ]. We expect most time to
    be spent in [n / n]. `bs` is the setting webserver.worker_refresh_batch_size.
    The horizontal transition at ? happens after the new worker parses all the
    dags (so it could take a while!)
       V ────────────────────────────────────────────────────────────────────────┐
    [n / n] ──TTIN──> [ [n, n+bs) / n + bs ]  ────?───> [n + bs / n + bs] ──TTOU─┘
       ^                          ^───────────────┘
       │
       │      ┌────────────────v
       └──────┴────── [ [0, n) / n ] <─── start
    We change the number of workers by sending TTIN and TTOU to the gunicorn
    master process, which increases and decreases the number of child workers
    respectively. Gunicorn guarantees that on TTOU workers are terminated
    gracefully and that the oldest worker is terminated.
    """

    def wait_until_true(fn, timeout=0):
        """
        Sleeps until fn is true
        """
        start_time = time.time()
        while not fn():
            if 0 < timeout <= time.time() - start_time:
                raise AirflowWebServerTimeout(
                    "No response from gunicorn master within {0} seconds"
                    .format(timeout))
            time.sleep(0.1)

    def start_refresh(gunicorn_master_proc):
        batch_size = conf.getint('webserver', 'worker_refresh_batch_size')
        log.debug('%s doing a refresh of %s workers', state, batch_size)
        sys.stdout.flush()
        sys.stderr.flush()

        excess = 0
        for _ in range(batch_size):
            gunicorn_master_proc.send_signal(signal.SIGTTIN)
            excess += 1
            wait_until_true(lambda: num_workers_expected + excess ==
                            get_num_workers_running(gunicorn_master_proc),
                            master_timeout)

    try:  # pylint: disable=too-many-nested-blocks
        wait_until_true(lambda: num_workers_expected ==
                        get_num_workers_running(gunicorn_master_proc),
                        master_timeout)
        while True:
            num_workers_running = get_num_workers_running(gunicorn_master_proc)
            num_ready_workers_running = \
                get_num_ready_workers_running(gunicorn_master_proc)

            state = '[{0} / {1}]'.format(num_ready_workers_running, num_workers_running)

            # Whenever some workers are not ready, wait until all workers are ready
            if num_ready_workers_running < num_workers_running:
                log.debug('%s some workers are starting up, waiting...', state)
                sys.stdout.flush()
                time.sleep(1)

            # Kill a worker gracefully by asking gunicorn to reduce number of workers
            elif num_workers_running > num_workers_expected:
                excess = num_workers_running - num_workers_expected
                log.debug('%s killing %s workers', state, excess)

                for _ in range(excess):
                    gunicorn_master_proc.send_signal(signal.SIGTTOU)
                    excess -= 1
                    wait_until_true(lambda: num_workers_expected + excess ==
                                    get_num_workers_running(gunicorn_master_proc),
                                    master_timeout)

            # Start a new worker by asking gunicorn to increase number of workers
            elif num_workers_running == num_workers_expected:
                refresh_interval = conf.getint('webserver', 'worker_refresh_interval')
                log.debug(
                    '%s sleeping for %ss starting doing a refresh...',
                    state, refresh_interval
                )
                time.sleep(refresh_interval)
                start_refresh(gunicorn_master_proc)

            else:
                # num_ready_workers_running == num_workers_running < num_workers_expected
                log.error((
                    "%s some workers seem to have died and gunicorn"
                    "did not restart them as expected"
                ), state)
                time.sleep(10)
                if len(
                    psutil.Process(gunicorn_master_proc.pid).children()
                ) < num_workers_expected:
                    start_refresh(gunicorn_master_proc)
    except (AirflowWebServerTimeout, OSError) as err:
        log.error(err)
        log.error("Shutting down webserver")
        try:
            gunicorn_master_proc.terminate()
            gunicorn_master_proc.wait()
        finally:
            sys.exit(1)
Пример #8
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Base executor - this is the base class for all the implemented executors.
"""
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Set, Tuple, Union

from airflow import LoggingMixin, conf
from airflow.models import TaskInstance
from airflow.models.taskinstance import SimpleTaskInstance, TaskInstanceKeyType
from airflow.stats import Stats
from airflow.utils.state import State

PARALLELISM: int = conf.getint('core', 'PARALLELISM')

NOT_STARTED_MESSAGE = "The executor should be started first!"

# Command to execute - list of strings
# the first element is always "airflow".
# It should be result of TaskInstance.generate_command method.q
CommandType = List[str]


# Task that is queued. It contains all the information that is
# needed to run the task.
#
# Tuple of: command, priority, queue name, SimpleTaskInstance
QueuedTaskInstanceType = Tuple[CommandType, int, Optional[str], Union[SimpleTaskInstance, TaskInstance]]
class DAGDependenciesView(BaseView):
    dagbag = None
    plugins_folder = conf.get("core", "plugins_folder")
    template_folder = os.path.join(plugins_folder, "dag-dependencies-plugin")
    route_base = "/"
    refresh_interval = conf.getint(
        "dag_dependencies_plugin",
        "refresh_interval",
        fallback=conf.getint("scheduler", "dag_dir_list_interval"),
    )
    last_refresh = datetime.utcnow() - timedelta(seconds=refresh_interval)
    nodes = []
    edges = []

    def render(self, template, **context):
        return render_template(
            template,
            base_template=self.appbuilder.base_template,
            appbuilder=self.appbuilder,
            **context,
        )

    @expose("/dag-dependencies")
    @has_access
    def list(self):
        title = "DAG Dependencies"

        if self.dagbag is None:
            from airflow.www_rbac.views import dagbag

            self.dagbag = dagbag

        if datetime.utcnow() > self.last_refresh + timedelta(
                seconds=self.refresh_interval):
            self.nodes, self.edges = self._generate_graph()
            self.last_refresh = datetime.utcnow()

        return self.render_template(
            "dag_dependencies.html",
            title=title,
            nodes=self.nodes,
            edges=self.edges,
            last_refresh=self.last_refresh.strftime("%Y-%m-%d %H:%M:%S"),
            arrange=conf.get("webserver", "dag_orientation"),
            width=request.args.get("width", "100%"),
            height=request.args.get("height", "800"),
        )

    def _generate_graph(self):
        nodes = {}
        edges = []

        for dag_id, dag in self.dagbag.dags.items():
            dag_node_id = f"d--{dag_id}"
            nodes[dag_node_id] = DAGDependenciesView._node_dict(
                dag_node_id, dag_id, "fill: rgb(232, 247, 228)")

            for task in dag.tasks:
                task_node_id = f"t--{dag_id}--{task.task_id}"
                if isinstance(task, TriggerDagRunOperator):
                    nodes[task_node_id] = DAGDependenciesView._node_dict(
                        task_node_id, task.task_id, "fill: rgb(255, 239, 235)")

                    edges.extend([
                        {
                            "u": dag_node_id,
                            "v": task_node_id
                        },
                        {
                            "u": task_node_id,
                            "v": f"d--{task.trigger_dag_id}"
                        },
                    ])
                elif isinstance(task, ExternalTaskSensor):
                    nodes[task_node_id] = DAGDependenciesView._node_dict(
                        task_node_id, task.task_id, "fill: rgb(230, 241, 242)")

                    edges.extend([
                        {
                            "u": task_node_id,
                            "v": dag_node_id
                        },
                        {
                            "u": f"d--{task.external_dag_id}",
                            "v": task_node_id
                        },
                    ])

            implicit = getattr(dag, "implicit_dependencies", None)
            if isinstance(implicit, list):
                for dep in implicit:
                    dep_node_id = f"i--{dag_id}--{dep}"
                    nodes[dep_node_id] = DAGDependenciesView._node_dict(
                        dep_node_id, "implicit", "fill: gold")

                    edges.extend([
                        {
                            "u": dep_node_id,
                            "v": dag_node_id
                        },
                        {
                            "u": f"d--{dep}",
                            "v": dep_node_id
                        },
                    ])

        return list(nodes.values()), edges

    @staticmethod
    def _node_dict(node_id, label, style):
        return {
            "id": node_id,
            "value": {
                "label": label,
                "style": style,
                "rx": 5,
                "ry": 5
            },
        }