Пример #1
0
 def test_raise_error_if_process_is_running(self):
     pid = os.getpid()
     with NamedTemporaryFile('+w') as f:
         f.write(str(pid))
         f.flush()
         with self.assertRaisesRegex(AirflowException, "is already running under PID"):
             check_if_pidfile_process_is_running(f.name, process_name="test")
Пример #2
0
 def test_remove_if_no_process(self):
     # Assert file is deleted
     with self.assertRaises(FileNotFoundError):
         with NamedTemporaryFile('+w') as f:
             f.write('19191919191919191991')
             f.flush()
             check_if_pidfile_process_is_running(f.name, process_name="test")
Пример #3
0
def webserver(args):
    """Starts Airflow Webserver"""
    print(settings.HEADER)

    access_logfile = args.access_logfile or conf.get('webserver',
                                                     'access_logfile')
    error_logfile = args.error_logfile or conf.get('webserver',
                                                   'error_logfile')
    num_workers = args.workers or conf.get('webserver', 'workers')
    worker_timeout = args.worker_timeout or conf.get(
        'webserver', 'web_server_worker_timeout')
    ssl_cert = args.ssl_cert or conf.get('webserver', 'web_server_ssl_cert')
    ssl_key = args.ssl_key or conf.get('webserver', 'web_server_ssl_key')
    if not ssl_cert and ssl_key:
        raise AirflowException(
            'An SSL certificate must also be provided for use with ' + ssl_key)
    if ssl_cert and not ssl_key:
        raise AirflowException(
            'An SSL key must also be provided for use with ' + ssl_cert)

    if args.debug:
        print(
            f"Starting the web server on port {args.port} and host {args.hostname}."
        )
        app = create_app(testing=conf.getboolean('core', 'unit_test_mode'))
        app.run(
            debug=True,
            use_reloader=not app.config['TESTING'],
            port=args.port,
            host=args.hostname,
            ssl_context=(ssl_cert, ssl_key) if ssl_cert and ssl_key else None,
        )
    else:
        # This pre-warms the cache, and makes possible errors
        # get reported earlier (i.e. before demonization)
        os.environ['SKIP_DAGS_PARSING'] = 'True'
        app = cached_app(None)
        os.environ.pop('SKIP_DAGS_PARSING')

        pid_file, stdout, stderr, log_file = setup_locations(
            "webserver", args.pid, args.stdout, args.stderr, args.log_file)

        # Check if webserver is already running if not, remove old pidfile
        check_if_pidfile_process_is_running(pid_file=pid_file,
                                            process_name="webserver")

        print(
            textwrap.dedent('''\
                Running the Gunicorn Server with:
                Workers: {num_workers} {workerclass}
                Host: {hostname}:{port}
                Timeout: {worker_timeout}
                Logfiles: {access_logfile} {error_logfile}
                =================================================================\
            '''.format(
                num_workers=num_workers,
                workerclass=args.workerclass,
                hostname=args.hostname,
                port=args.port,
                worker_timeout=worker_timeout,
                access_logfile=access_logfile,
                error_logfile=error_logfile,
            )))

        run_args = [
            'gunicorn',
            '--workers',
            str(num_workers),
            '--worker-class',
            str(args.workerclass),
            '--timeout',
            str(worker_timeout),
            '--bind',
            args.hostname + ':' + str(args.port),
            '--name',
            'airflow-webserver',
            '--pid',
            pid_file,
            '--config',
            'python:airflow.www.gunicorn_config',
        ]

        if args.access_logfile:
            run_args += ['--access-logfile', str(args.access_logfile)]

        if args.error_logfile:
            run_args += ['--error-logfile', str(args.error_logfile)]

        if args.daemon:
            run_args += ['--daemon']

        if ssl_cert:
            run_args += ['--certfile', ssl_cert, '--keyfile', ssl_key]

        run_args += ["airflow.www.app:cached_app()"]

        gunicorn_master_proc = None

        def kill_proc(signum, _):  # pylint: disable=unused-argument
            log.info("Received signal: %s. Closing gunicorn.", signum)
            gunicorn_master_proc.terminate()
            with suppress(TimeoutError):
                gunicorn_master_proc.wait(timeout=30)
            if gunicorn_master_proc.poll() is not None:
                gunicorn_master_proc.kill()
            sys.exit(0)

        def monitor_gunicorn(gunicorn_master_pid: int):
            # Register signal handlers
            signal.signal(signal.SIGINT, kill_proc)
            signal.signal(signal.SIGTERM, kill_proc)

            # These run forever until SIG{INT, TERM, KILL, ...} signal is sent
            GunicornMonitor(
                gunicorn_master_pid=gunicorn_master_pid,
                num_workers_expected=num_workers,
                master_timeout=conf.getint('webserver',
                                           'web_server_master_timeout'),
                worker_refresh_interval=conf.getint('webserver',
                                                    'worker_refresh_interval',
                                                    fallback=30),
                worker_refresh_batch_size=conf.getint(
                    'webserver', 'worker_refresh_batch_size', fallback=1),
                reload_on_plugin_change=conf.getboolean(
                    'webserver', 'reload_on_plugin_change', fallback=False),
            ).start()

        if args.daemon:
            handle = setup_logging(log_file)

            base, ext = os.path.splitext(pid_file)
            with open(stdout, 'w+') as stdout, open(stderr, 'w+') as stderr:
                ctx = daemon.DaemonContext(
                    pidfile=TimeoutPIDLockFile(f"{base}-monitor{ext}", -1),
                    files_preserve=[handle],
                    stdout=stdout,
                    stderr=stderr,
                )
                with ctx:
                    subprocess.Popen(run_args, close_fds=True)

                    # Reading pid of gunicorn master as it will be different that
                    # the one of process spawned above.
                    while True:
                        sleep(0.1)
                        gunicorn_master_proc_pid = read_pid_from_pidfile(
                            pid_file)
                        if gunicorn_master_proc_pid:
                            break

                    # Run Gunicorn monitor
                    gunicorn_master_proc = psutil.Process(
                        gunicorn_master_proc_pid)
                    monitor_gunicorn(gunicorn_master_proc.pid)

        else:
            gunicorn_master_proc = subprocess.Popen(run_args, close_fds=True)
            monitor_gunicorn(gunicorn_master_proc.pid)
Пример #4
0
 def test_ok_if_no_file(self):
     check_if_pidfile_process_is_running('some/pid/file', process_name="test")
def webserver(args):
    """Starts Airflow Webserver"""
    print(settings.HEADER)

    # Check for old/insecure config, and fail safe (i.e. don't launch) if the config is wildly insecure.
    if conf.get('webserver', 'secret_key') == 'temporary_key':
        from rich import print as rich_print

        rich_print(
            "[red][bold]ERROR:[/bold] The `secret_key` setting under the webserver config has an insecure "
            "value - Airflow has failed safe and refuses to start. Please change this value to a new, "
            "per-environment, randomly generated string, for example using this command `[cyan]openssl rand "
            "-hex 30[/cyan]`",
            file=sys.stderr,
        )
        sys.exit(1)

    access_logfile = args.access_logfile or conf.get('webserver',
                                                     'access_logfile')
    error_logfile = args.error_logfile or conf.get('webserver',
                                                   'error_logfile')
    access_logformat = args.access_logformat or conf.get(
        'webserver', 'access_logformat')
    num_workers = args.workers or conf.get('webserver', 'workers')
    worker_timeout = args.worker_timeout or conf.get(
        'webserver', 'web_server_worker_timeout')
    ssl_cert = args.ssl_cert or conf.get('webserver', 'web_server_ssl_cert')
    ssl_key = args.ssl_key or conf.get('webserver', 'web_server_ssl_key')
    if not ssl_cert and ssl_key:
        raise AirflowException(
            'An SSL certificate must also be provided for use with ' + ssl_key)
    if ssl_cert and not ssl_key:
        raise AirflowException(
            'An SSL key must also be provided for use with ' + ssl_cert)

    if args.debug:
        print(
            f"Starting the web server on port {args.port} and host {args.hostname}."
        )
        app = create_app(testing=conf.getboolean('core', 'unit_test_mode'))
        app.run(
            debug=True,
            use_reloader=not app.config['TESTING'],
            port=args.port,
            host=args.hostname,
            ssl_context=(ssl_cert, ssl_key) if ssl_cert and ssl_key else None,
        )
    else:

        pid_file, stdout, stderr, log_file = setup_locations(
            "webserver", args.pid, args.stdout, args.stderr, args.log_file)

        # Check if webserver is already running if not, remove old pidfile
        check_if_pidfile_process_is_running(pid_file=pid_file,
                                            process_name="webserver")

        print(
            textwrap.dedent(f'''\
                Running the Gunicorn Server with:
                Workers: {num_workers} {args.workerclass}
                Host: {args.hostname}:{args.port}
                Timeout: {worker_timeout}
                Logfiles: {access_logfile} {error_logfile}
                Access Logformat: {access_logformat}
                ================================================================='''
                            ))

        run_args = [
            sys.executable,
            '-m',
            'gunicorn',
            '--workers',
            str(num_workers),
            '--worker-class',
            str(args.workerclass),
            '--timeout',
            str(worker_timeout),
            '--bind',
            args.hostname + ':' + str(args.port),
            '--name',
            'airflow-webserver',
            '--pid',
            pid_file,
            '--config',
            'python:airflow.www.gunicorn_config',
        ]

        if args.access_logfile:
            run_args += ['--access-logfile', str(args.access_logfile)]

        if args.error_logfile:
            run_args += ['--error-logfile', str(args.error_logfile)]

        if args.access_logformat and args.access_logformat.strip():
            run_args += ['--access-logformat', str(args.access_logformat)]

        if args.daemon:
            run_args += ['--daemon']

        if ssl_cert:
            run_args += ['--certfile', ssl_cert, '--keyfile', ssl_key]

        run_args += ["airflow.www.app:cached_app()"]

        gunicorn_master_proc = None

        def kill_proc(signum, _):
            log.info("Received signal: %s. Closing gunicorn.", signum)
            gunicorn_master_proc.terminate()
            with suppress(TimeoutError):
                gunicorn_master_proc.wait(timeout=30)
            if gunicorn_master_proc.poll() is not None:
                gunicorn_master_proc.kill()
            sys.exit(0)

        def monitor_gunicorn(gunicorn_master_pid: int):
            # Register signal handlers
            signal.signal(signal.SIGINT, kill_proc)
            signal.signal(signal.SIGTERM, kill_proc)

            # These run forever until SIG{INT, TERM, KILL, ...} signal is sent
            GunicornMonitor(
                gunicorn_master_pid=gunicorn_master_pid,
                num_workers_expected=num_workers,
                master_timeout=conf.getint('webserver',
                                           'web_server_master_timeout'),
                worker_refresh_interval=conf.getint('webserver',
                                                    'worker_refresh_interval',
                                                    fallback=30),
                worker_refresh_batch_size=conf.getint(
                    'webserver', 'worker_refresh_batch_size', fallback=1),
                reload_on_plugin_change=conf.getboolean(
                    'webserver', 'reload_on_plugin_change', fallback=False),
            ).start()

        if args.daemon:
            # This makes possible errors get reported before daemonization
            os.environ['SKIP_DAGS_PARSING'] = 'True'
            app = create_app(None)
            os.environ.pop('SKIP_DAGS_PARSING')

            handle = setup_logging(log_file)

            base, ext = os.path.splitext(pid_file)
            with open(stdout, 'w+') as stdout, open(stderr, 'w+') as stderr:
                ctx = daemon.DaemonContext(
                    pidfile=TimeoutPIDLockFile(f"{base}-monitor{ext}", -1),
                    files_preserve=[handle],
                    stdout=stdout,
                    stderr=stderr,
                )
                with ctx:
                    subprocess.Popen(run_args, close_fds=True)

                    # Reading pid of gunicorn master as it will be different that
                    # the one of process spawned above.
                    while True:
                        sleep(0.1)
                        gunicorn_master_proc_pid = read_pid_from_pidfile(
                            pid_file)
                        if gunicorn_master_proc_pid:
                            break

                    # Run Gunicorn monitor
                    gunicorn_master_proc = psutil.Process(
                        gunicorn_master_proc_pid)
                    monitor_gunicorn(gunicorn_master_proc.pid)

        else:
            with subprocess.Popen(run_args,
                                  close_fds=True) as gunicorn_master_proc:
                monitor_gunicorn(gunicorn_master_proc.pid)