def supervisor(): logger.debug("Supervisor called with: %r", sys.argv) supervisor_args = [] agent_args = [] in_agent_args = False tmp_argv = list(sys.argv) del tmp_argv[0] for arg in tmp_argv: if not in_agent_args and arg != "--": supervisor_args.append(arg) elif not in_agent_args and arg == "--": in_agent_args = True else: agent_args.append(arg) logger.debug("supervisor_args: %s", supervisor_args) parser = AgentArgumentParser( description="Start and monitor the agent process") parser.add_argument("--updates-drop-dir", config="agent_updates_dir", type=isdir, type_kwargs=dict(create=True), help="Where to look for agent updates") parser.add_argument("--agent-package-dir", type=isdir, type_kwargs=dict(create=True), help="Path to the actual agent code") parser.add_argument("--pidfile", config="supervisor_lock_file", help="The file to store the process id in. " "[default: %(default)s]") parser.add_argument("-n", "--no-daemon", default=False, action="store_true", config=False, help="If provided then do not run the process in the " "background.") parser.add_argument("--chdir", config="agent_chdir", type=isdir, help="The directory to chdir to upon launch.") parser.add_argument("--uid", type=int, help="The user id to run the supervisor as. " "*This setting is ignored on Windows.*") parser.add_argument("--gid", type=int, help="The group id to run the supervisor as. " "*This setting is ignored on Windows.*") args = parser.parse_args(supervisor_args) if not args.no_daemon and fork is not NotImplemented: logger.info("sending supervisor log output to %s" % config["supervisor_log"]) daemon_start_return_code = start_daemon_posix( args.log, args.chdir, args.uid, args.gid) if isinstance(daemon_start_return_code, INTEGER_TYPES): return daemon_start_return_code elif not args.no_daemon and fork is NotImplemented: logger.warning( "`fork` is not implemented on %s, starting in " "foreground" % OS.title()) else: logger.debug("Not forking to background") pid = os.getpid() # Write the PID file try: with open(config["supervisor_lock_file"], "w") as pidfile: pidfile.write(str(os.getpid())) except OSError as e: logger.error( "Failed to write PID file %s: %s", config["supervisor_lock_file"], e) return 1 else: logger.debug("Wrote PID to %s", config["supervisor_lock_file"]) logger.info("supervisor pid: %s" % pid) if getuid is not NotImplemented: logger.info("uid: %s" % getuid()) if getgid is not NotImplemented: logger.info("gid: %s" % getgid()) def terminate_handler(*_): subprocess.call(["pyfarm-agent"] + agent_args + ["stop"]) sys.exit(0) def restart_handler(*_): subprocess.call(["pyfarm-agent"] + agent_args + ["stop"]) logger.debug("Setting signal handlers") signal.signal(signal.SIGTERM, terminate_handler) signal.signal(signal.SIGINT, terminate_handler) signal.signal(signal.SIGHUP, restart_handler) update_file_path = join(config["agent_updates_dir"], "pyfarm-agent.zip") run_control_file = config["run_control_file_by_platform"]\ [operating_system()] loop_interval = config["supervisor_interval"] while True: if subprocess.call(["pyfarm-agent", "status"]) != 0: if not isfile(run_control_file): logger.info("pyfarm_agent is not running, but run control file " "%s does not exist. Not restarting the agent", run_control_file) logger.info("pyfarm-agent is not running") if (os.path.isfile(update_file_path) and zipfile.is_zipfile(update_file_path)): logger.info("Found an upgrade to pyfarm-agent") try: remove_directory(args.agent_package_dir, raise_=True) os.makedirs(args.agent_package_dir) with zipfile.ZipFile(update_file_path, "r") as archive: archive.extractall(args.agent_package_dir) remove_file( update_file_path, retry_on_exit=True, raise_=False) except Exception as e: logger.error( "Caught exception trying to update agent: %r", e) logger.info("starting pyfarm-agent now") if subprocess.call(["pyfarm-agent"] + agent_args + ["start"]) != 0: logger.error("Could not start pyfarm-agent") sys.exit(1) time.sleep(loop_interval)
def start(self): url = self.agent_api + "/status" try: response = requests.get( url, headers={"Content-Type": "application/json"}) except ConnectionError: pid = None remove_lock_file = False # Try to open an existing lock file try: with open(config["agent_lock_file"], "r") as pidfile: try: pid = int(pidfile.read().strip()) except ValueError: logger.warning( "Could not convert pid in %s to an integer.", config["agent_lock_file"]) remove_lock_file = True # If the file is missing we ignore the error and read # pid/remove_lock_file later on. except (OSError, IOError) as e: if e.errno == ENOENT: logger.debug( "Process ID file %s does not exist", config["agent_lock_file"]) else: raise else: assert pid is not None, "pid was not set" logger.debug( "Process ID file %s exists", config["agent_lock_file"]) if pid is not None: try: process = psutil.Process(pid) # Process does exist, does it appear to be our agent? if process.name() == "pyfarm-agent": logger.error( "Agent is already running, pid %s", pid) return 1 # Not our agent so the lock file is probably wrong. else: logger.debug( "Process %s does not appear to be the " "agent.", pid) remove_lock_file = True # Process in the pid file does not exist or we don't have the # rights to access it except (psutil.NoSuchProcess, psutil.AccessDenied): logger.debug( "Process ID in %s is stale.", config["agent_lock_file"]) remove_lock_file = True if remove_lock_file: logger.debug( "Attempting to remove PID file %s", config["agent_lock_file"]) try: remove_file( config["agent_lock_file"], retry_on_exit=True, raise_=True) except (WindowsError, OSError, IOError): return 1 else: code = "%s %s" % ( response.status_code, responses[response.status_code]) pid = response.json()["pids"]["parent"] logger.error( "Agent at pid %s is already running, got %s from %s.", pid, code, url) return 1 logger.info("Starting agent") if not isdir(config["jobtype_task_logs"]): logger.debug("Creating %s", config["jobtype_task_logs"]) try: os.makedirs(config["jobtype_task_logs"]) except (OSError, WindowsError): logger.error("Failed to create %s", config["jobtype_task_logs"]) return 1 # create the directory for log if not self.args.no_daemon and not isfile(config["agent_log"]): try: os.makedirs(dirname(config["agent_log"])) except (OSError, WindowsError) as e: # Not an error because it could be created later on logger.warning( "failed to create %s: %r", dirname(config["agent_log"]), e) # If we don't explicitly clear this exception here, twisted will # keep displaying it with its stacktrace every time a callback # raises an exception. sys.exc_clear() # so long as fork could be imported and --no-daemon was not set # then setup the log files if not self.args.no_daemon and fork is not NotImplemented: logger.info("sending log output to %s" % config["agent_log"]) daemon_start_return_code = start_daemon_posix( config["agent_log"], config["agent_chdir"], self.args.uid, self.args.gid) if isinstance(daemon_start_return_code, INTEGER_TYPES): return daemon_start_return_code elif not self.args.no_daemon and fork is NotImplemented: logger.warning( "`fork` is not implemented on %s, starting in " "foreground" % OS.title()) # PID file should not exist now. Either the last agent instance # should have removed it or we should hae above. if isfile(config["agent_lock_file"]): logger.error("PID file should not exist on disk at this point.") return 1 # Create the directory for the pid file if necessary pid_dirname = dirname(config["agent_lock_file"]) if not isdir(pid_dirname): try: os.makedirs(pid_dirname) except OSError: # pragma: no cover logger.error( "Failed to create parent directory for %s", config["agent_lock_file"]) return 1 else: logger.debug("Created directory %s", pid_dirname) # Write the PID file pid = os.getpid() try: with open(config["agent_lock_file"], "w") as pidfile: pidfile.write(str(pid)) except OSError as e: logger.error( "Failed to write PID file %s: %s", config["agent_lock_file"], e) return 1 else: logger.debug("Wrote PID to %s", config["agent_lock_file"]) logger.info("pid: %s", pid) if not isfile(config["run_control_file"]): directory = dirname(config["run_control_file"]) try: os.makedirs(directory) except (OSError, IOError) as e: # pragma: no cover if e.errno != EEXIST: logger.error( "Failed to create parent directory for %s: %s: %s", config["run_control_file"], type(e).__name__, e) raise else: logger.debug("Created directory %s", directory) try: with open(config["run_control_file"], "a"): pass except (OSError, IOError) as e: logger.error("Failed to create run control file %s: %s: %s", config["run_control_file"], type(e).__name__, e) else: logger.info("Created run control file %s", config["run_control_file"]) if getuid is not NotImplemented: logger.info("uid: %s" % getuid()) if getgid is not NotImplemented: logger.info("gid: %s" % getgid()) from pyfarm.agent.service import Agent # Setup the agent, register stop(), then run the agent service = Agent() signal.signal(signal.SIGINT, service.sigint_handler) reactor.callWhenRunning(service.start) reactor.run()