def worker(args): """Starts Airflow Celery worker""" env = os.environ.copy() env['AIRFLOW_HOME'] = settings.AIRFLOW_HOME if not settings.validate_session(): print("Worker exiting... database connection precheck failed! ") sys.exit(1) autoscale = args.autoscale skip_serve_logs = args.skip_serve_logs if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") worker_instance = worker_bin.worker(app=celery_app) options = { 'optimization': 'fair', 'O': 'fair', 'queues': args.queues, 'concurrency': args.concurrency, 'autoscale': autoscale, 'hostname': args.celery_hostname, 'loglevel': conf.get('logging', 'LOGGING_LEVEL'), } if conf.has_option("celery", "pool"): options["pool"] = conf.get("celery", "pool") if args.daemon: pid, stdout, stderr, log_file = setup_locations("worker", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) if sub_proc: sub_proc.terminate()
def worker(args): """Starts Airflow Celery worker""" if not settings.validate_session(): raise SystemExit( "Worker exiting, database connection precheck failed.") autoscale = args.autoscale skip_serve_logs = args.skip_serve_logs if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") # Setup locations pid_file_path, stdout, stderr, log_file = setup_locations( process=WORKER_PROCESS_NAME, pid=args.pid, stdout=args.stdout, stderr=args.stderr, log=args.log_file, ) if hasattr(celery_app.backend, 'ResultSession'): # Pre-create the database tables now, otherwise SQLA via Celery has a # race condition where one of the subprocesses can die with "Table # already exists" error, because SQLA checks for which tables exist, # then issues a CREATE TABLE, rather than doing CREATE TABLE IF NOT # EXISTS try: session = celery_app.backend.ResultSession() session.close() except sqlalchemy.exc.IntegrityError: # At least on postgres, trying to create a table that already exist # gives a unique constraint violation or the # "pg_type_typname_nsp_index" table. If this happens we can ignore # it, we raced to create the tables and lost. pass # Setup Celery worker options = [ 'worker', '-O', 'fair', '--queues', args.queues, '--concurrency', args.concurrency, '--hostname', args.celery_hostname, '--loglevel', conf.get('logging', 'LOGGING_LEVEL'), '--pidfile', pid_file_path, ] if autoscale: options.extend(['--autoscale', autoscale]) if args.without_mingle: options.append('--without-mingle') if args.without_gossip: options.append('--without-gossip') if conf.has_option("celery", "pool"): pool = conf.get("celery", "pool") options.extend(["--pool", pool]) # Celery pools of type eventlet and gevent use greenlets, which # requires monkey patching the app: # https://eventlet.net/doc/patching.html#monkey-patch # Otherwise task instances hang on the workers and are never # executed. maybe_patch_concurrency(['-P', pool]) if args.daemon: # Run Celery worker as daemon handle = setup_logging(log_file) with open(stdout, 'w+') as stdout_handle, open(stderr, 'w+') as stderr_handle: if args.umask: umask = args.umask ctx = daemon.DaemonContext( files_preserve=[handle], umask=int(umask, 8), stdout=stdout_handle, stderr=stderr_handle, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) celery_app.worker_main(options) else: # Run Celery worker in the same process sub_proc = _serve_logs(skip_serve_logs) celery_app.worker_main(options) if sub_proc: sub_proc.terminate()
def worker(args): """Starts Airflow Celery worker""" if not settings.validate_session(): print("Worker exiting... database connection precheck failed! ") sys.exit(1) autoscale = args.autoscale skip_serve_logs = args.skip_serve_logs if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") # Setup locations pid_file_path, stdout, stderr, log_file = setup_locations( process=WORKER_PROCESS_NAME, pid=args.pid, stdout=args.stdout, stderr=args.stderr, log=args.log_file, ) # Setup Celery worker worker_instance = worker_bin.worker(app=celery_app) options = { 'optimization': 'fair', 'O': 'fair', 'queues': args.queues, 'concurrency': args.concurrency, 'autoscale': autoscale, 'hostname': args.celery_hostname, 'loglevel': conf.get('logging', 'LOGGING_LEVEL'), 'pidfile': pid_file_path, } if conf.has_option("celery", "pool"): options["pool"] = conf.get("celery", "pool") if args.daemon: # Run Celery worker as daemon handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) stdout.close() stderr.close() else: # Run Celery worker in the same process sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) if sub_proc: sub_proc.terminate()
def worker(args): """Starts Airflow Celery worker""" if not settings.validate_session(): print("Worker exiting... database connection precheck failed! ") sys.exit(1) autoscale = args.autoscale skip_serve_logs = args.skip_serve_logs if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") # Setup locations pid_file_path, stdout, stderr, log_file = setup_locations( process=WORKER_PROCESS_NAME, pid=args.pid, stdout=args.stdout, stderr=args.stderr, log=args.log_file, ) # Setup Celery worker worker_instance = worker_bin.worker(app=celery_app) options = { 'optimization': 'fair', 'O': 'fair', 'queues': args.queues, 'concurrency': args.concurrency, 'autoscale': autoscale, 'hostname': args.celery_hostname, 'loglevel': conf.get('logging', 'LOGGING_LEVEL'), 'pidfile': pid_file_path, } if conf.has_option("celery", "pool"): pool = conf.get("celery", "pool") options["pool"] = pool # Celery pools of type eventlet and gevent use greenlets, which # requires monkey patching the app: # https://eventlet.net/doc/patching.html#monkey-patch # Otherwise task instances hang on the workers and are never # executed. maybe_patch_concurrency(['-P', pool]) if args.daemon: # Run Celery worker as daemon handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') if args.umask: umask = args.umask ctx = daemon.DaemonContext( files_preserve=[handle], umask=int(umask, 8), stdout=stdout, stderr=stderr, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) stdout.close() stderr.close() else: # Run Celery worker in the same process sub_proc = _serve_logs(skip_serve_logs) worker_instance.run(**options) if sub_proc: sub_proc.terminate()
def worker(args): """Starts Airflow Celery worker""" env = os.environ.copy() env['AIRFLOW_HOME'] = settings.AIRFLOW_HOME if not settings.validate_session(): print("Worker exiting... database connection precheck failed! ") sys.exit(1) # Celery worker from airflow.executors.celery_executor import app as celery_app from celery.bin import worker # pylint: disable=redefined-outer-name autoscale = args.autoscale if autoscale is None and conf.has_option("celery", "worker_autoscale"): autoscale = conf.get("celery", "worker_autoscale") worker = worker.worker(app=celery_app) # pylint: disable=redefined-outer-name options = { 'optimization': 'fair', 'O': 'fair', 'queues': args.queues, 'concurrency': args.concurrency, 'autoscale': autoscale, 'hostname': args.celery_hostname, 'loglevel': conf.get('core', 'LOGGING_LEVEL'), } if conf.has_option("celery", "pool"): options["pool"] = conf.get("celery", "pool") if args.daemon: pid, stdout, stderr, log_file = setup_locations("worker", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: sub_proc = subprocess.Popen(['airflow', 'serve_logs'], env=env, close_fds=True) worker.run(**options) sub_proc.kill() stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) sub_proc = subprocess.Popen(['airflow', 'serve_logs'], env=env, close_fds=True) worker.run(**options) sub_proc.kill()