示例#1
0
    def _cleanup_system_temp_dir(self, minimum_age=604800):
        """
        Cleans up old file from the system's temp directory to reclaim space.
        Files that cannot be deleted, for example because of missing
        permissions, are silently ignored.

        .. warning::

            This will delete files from the system temp directory.

        """
        tempdir = tempfile.gettempdir()

        # followlinks=False is the default for os.walk.  I am specifying it
        # explicitly here to make it more obvious.  Setting this to True
        # instead might make us delete files outside of tempdir, if there is
        # a symlink in there somewhere.
        for root, dirs, files in os.walk(tempdir, topdown=True, followlinks=False):
            # Don't delete our own temp files
            if root == dirname(config.tempdir):
                dirs[:] = []  # Do not iterate over sub directories in this root
                continue
            for filename in files:
                fullpath = join(root, filename)
                stat_result = os.stat(fullpath)
                timestamp = max(stat_result.st_atime, stat_result.st_mtime)
                if timestamp + minimum_age < time.time():
                    logger.debug("Deleting tempfile %s", fullpath)
                    remove_file(fullpath, retry_on_exit=False, raise_=False)
示例#2
0
    def post(self, **kwargs):
        request = kwargs["request"]
        data = kwargs["data"]
        agent = config["agent"]

        remove_file(
            config["run_control_file"],
            retry_on_exit=True, raise_=False)

        stopping = agent.stop()

        # TODO: need to wire this up to the real deferred object in stop()
        if data.get("wait"):
            def finished(_, finish_request):
                finish_request.setResponseCode(OK)
                finish_request.finish()

            if isinstance(stopping, Deferred):
                stopping.addCallback(finished, request)
            else:  # pragma: no cover
                request.setResponseCode(OK)
                request.finish()

        else:
            request.setResponseCode(ACCEPTED)
            request.finish()

        stopping.addCallbacks(lambda _: reactor.stop(),
                              lambda _: reactor.stop())
        return NOT_DONE_YET
示例#3
0
    def test_retry_on_shutdown_raise(self):
        os.remove(self.path)

        with self.assertRaises((WindowsError, OSError, IOError)):
            remove_file(
                self.path, ignored_errnos=(), retry_on_exit=True, raise_=True)

        self.assertEqual(atexit._exithandlers, self.atexit_signature)
示例#4
0
    def test_retry_only_once(self):
        os.remove(self.path)

        # If remove_file is wrapped in a while loop and is being
        # passed the same arguments, it should only be one call for atexit.
        remove_file(
            self.path, ignored_errnos=(), retry_on_exit=True, raise_=False)
        remove_file(
            self.path, ignored_errnos=(), retry_on_exit=True, raise_=False)
        self.assertEqual(atexit._exithandlers, self.atexit_signature)
示例#5
0
    def _ensure_free_space_in_temp_dir(self, tempdir, space, minimum_age=None):
        """
        Ensures that at least space bytes of data can be stored on the volume
        on which tempdir is located, deleting files from tempdir if necessary.

        .. warning::

            This will delete files in tempdir to reclaim storage space.

        :raises InsufficientSpaceError:
            Raised if enough space cannot be claimed.
        """
        assert isinstance(tempdir, STRING_TYPES), "Expected string for tempdir"
        try:
            space = int(space)
        except (ValueError, TypeError):
            raise TypeError(
                "Could not convert value %r for `space` in " "_ensure_free_space_in_temp_dir() to an integer." % space
            )

        try:
            os.makedirs(tempdir)
        except OSError as e:  # pragma: no cover
            if e.errno != EEXIST:
                raise

        if disk_usage(tempdir).free >= space:
            return

        tempfiles = []
        # followlinks=False is the default for os.walk.  I am specifying it
        # explicitly here to make it more obvious.  Setting this to True
        # instead might make us delete files outside of tempdir, if there is
        # a symlink in there somewhere.
        for root, dirs, files in os.walk(tempdir, followlinks=False):
            for filename in files:
                fullpath = join(root, filename)
                atime = os.stat(fullpath).st_atime
                tempfiles.append({"filepath": fullpath, "atime": atime})

        tempfiles.sort(key=lambda x: x["atime"])

        while disk_usage(tempdir).free < space:
            if not tempfiles:
                raise InsufficientSpaceError("Cannot free enough space in temp " "directory %s" % tempdir)
            element = tempfiles.pop(0)
            if not minimum_age or os.stat(element["filepath"]).st_mtime + minimum_age < time.time():
                logger.debug("Deleting tempfile %s", element["filepath"])
                remove_file(element["filepath"], retry_on_exit=False, raise_=False)
            else:  # pragma: no cover
                logger.debug(
                    "Not deleting tempfile %s, it is newer than %s " "seconds", element["filepath"], minimum_age
                )
示例#6
0
    def sigint_handler(self, *_):
        utility.remove_file(
            config["run_control_file"], retry_on_exit=True, raise_=False)

        def errback(failure):
            svclog.error(
                "Error while attempting to shutdown the agent: %s", failure)

            # Stop the reactor but handle the exit code ourselves otherwise
            # Twisted will just exit with 0.
            reactor.stop()
            sys.exit(1)

        # Call stop() and wait for it to finish before we stop
        # the reactor.
        # NOTE: We're not using inlineCallbacks here because reactor.stop()
        # would be called in the middle of the generator unwinding
        deferred = self.stop()
        deferred.addCallbacks(lambda _: reactor.stop(), errback)
示例#7
0
    def stop(self):
        """
        Internal code which stops the agent.  This will terminate any running
        processes, inform the master of the terminated tasks, update the
        state of the agent on the master.
        """
        yield self.stop_lock.acquire()
        if self.stopped:
            yield self.stop_lock.release()
            svclog.warning("Agent is already stopped")
            returnValue(None)

        svclog.info("Stopping the agent")

        self.shutting_down = True
        self.shutdown_timeout = (
            datetime.utcnow() + timedelta(
                seconds=config["agent_shutdown_timeout"]))

        if self.agent_api() is not None:
            try:
                yield self.post_shutdown_to_master()
            except Exception as error:  # pragma: no cover
                svclog.warning(
                    "Error while calling post_shutdown_to_master()", error)
        else:
            svclog.warning("Cannot post shutdown, agent_api() returned None")

        utility.remove_file(
            config["agent_lock_file"], retry_on_exit=True, raise_=False)

        svclog.debug("Stopping execution of jobtypes")
        for jobtype_id, jobtype in config["jobtypes"].items():
            try:
                jobtype.stop()
            except Exception as error:  # pragma: no cover
                svclog.warning(
                    "Error while calling stop() on %s (id: %s): %s",
                    jobtype, jobtype_id, error
                )
                config["jobtypes"].pop(jobtype_id)

        svclog.info(
            "Waiting on %s job types to terminate", len(config["jobtypes"]))

        while config["jobtypes"] and datetime.utcnow() < self.shutdown_timeout:
            for jobtype_id, jobtype in config["jobtypes"].copy().items():
                if not jobtype._has_running_processes():
                    svclog.warning(
                        "%r has not removed itself, forcing removal",
                        jobtype)
                    config["jobtypes"].pop(jobtype_id)

            # Brief delay so we don't tie up the cpu
            delay = Deferred()
            reactor.callLater(1, delay.callback, None)
            yield delay

        self.stopped = True
        yield self.stop_lock.release()
        returnValue(None)
示例#8
0
logger = getLogger("agent.sysinfo")

# Determine if file system is case sensitive
try:
    _filesystem_is_case_sensitive
except NameError:  # pragma: no cover
    fd, path = tempfile.mkstemp()
    _filesystem_is_case_sensitive = \
        not all(map(isfile, [path, path.lower(), path.upper()]))

    try:
        os.close(fd)
    except OSError:  # pragma: no cover
        pass

    remove_file(path, retry_on_exit=True, raise_=False)
    del fd, path

# Determine if environment is case sensitive
try:
    _environment_is_case_sensitive
except NameError:  # pragma: no cover
    envvar_lower = "PYFARM_CHECK_ENV_CASE_" + uuid.uuid4().hex
    envvar_upper = envvar_lower.upper()

    # populate environment then compare the difference
    os.environ.update({envvar_lower: "0", envvar_upper: "1"})
    _environment_is_case_sensitive = \
        os.environ[envvar_lower] != os.environ[envvar_upper]

    # remove the envvars we just made
示例#9
0
 def test_retry_on_shutdown_no_raise(self):
     os.remove(self.path)
     remove_file(
         self.path, ignored_errnos=(), retry_on_exit=True, raise_=False)
     self.assertEqual(atexit._exithandlers, self.atexit_signature)
示例#10
0
 def test_ignored_error(self):
     os.remove(self.path)
     remove_file(self.path, ignored_errnos=(ENOENT, ), raise_=True)
     self.assertFalse(isfile(self.path))
示例#11
0
 def test_removes_file(self):
     remove_file(self.path)
     self.assertFalse(isfile(self.path))
示例#12
0
def supervisor():
    logger.debug("Supervisor called with: %r", sys.argv)
    supervisor_args = []
    agent_args = []
    in_agent_args = False
    tmp_argv = list(sys.argv)
    del tmp_argv[0]
    for arg in tmp_argv:
        if not in_agent_args and arg != "--":
            supervisor_args.append(arg)
        elif not in_agent_args and arg == "--":
            in_agent_args = True
        else:
            agent_args.append(arg)

    logger.debug("supervisor_args: %s", supervisor_args)

    parser = AgentArgumentParser(
        description="Start and monitor the agent process")
    parser.add_argument("--updates-drop-dir",
                        config="agent_updates_dir",
                        type=isdir, type_kwargs=dict(create=True),
                        help="Where to look for agent updates")
    parser.add_argument("--agent-package-dir",
                        type=isdir, type_kwargs=dict(create=True),
                        help="Path to the actual agent code")
    parser.add_argument("--pidfile", config="supervisor_lock_file",
                        help="The file to store the process id in. "
                             "[default: %(default)s]")
    parser.add_argument("-n", "--no-daemon", default=False, action="store_true",
                        config=False,
                        help="If provided then do not run the process in the "
                             "background.")
    parser.add_argument("--chdir", config="agent_chdir", type=isdir,
                        help="The directory to chdir to upon launch.")
    parser.add_argument("--uid", type=int,
                        help="The user id to run the supervisor as.  "
                             "*This setting is ignored on Windows.*")
    parser.add_argument("--gid", type=int,
                        help="The group id to run the supervisor as.  "
                             "*This setting is ignored on Windows.*")
    args = parser.parse_args(supervisor_args)

    if not args.no_daemon and fork is not NotImplemented:
        logger.info("sending supervisor log output to %s" %
                    config["supervisor_log"])
        daemon_start_return_code = start_daemon_posix(
            args.log, args.chdir, args.uid, args.gid)

        if isinstance(daemon_start_return_code, INTEGER_TYPES):
            return daemon_start_return_code

    elif not args.no_daemon and fork is NotImplemented:
        logger.warning(
            "`fork` is not implemented on %s, starting in "
            "foreground" % OS.title())
    else:
        logger.debug("Not forking to background")

    pid = os.getpid()
    # Write the PID file
    try:
        with open(config["supervisor_lock_file"], "w") as pidfile:
            pidfile.write(str(os.getpid()))
    except OSError as e:
        logger.error(
            "Failed to write PID file %s: %s",
            config["supervisor_lock_file"], e)
        return 1
    else:
        logger.debug("Wrote PID to %s", config["supervisor_lock_file"])

    logger.info("supervisor pid: %s" % pid)

    if getuid is not NotImplemented:
        logger.info("uid: %s" % getuid())

    if getgid is not NotImplemented:
        logger.info("gid: %s" % getgid())

    def terminate_handler(*_):
        subprocess.call(["pyfarm-agent"] + agent_args + ["stop"])
        sys.exit(0)

    def restart_handler(*_):
        subprocess.call(["pyfarm-agent"] + agent_args + ["stop"])

    logger.debug("Setting signal handlers")

    signal.signal(signal.SIGTERM, terminate_handler)
    signal.signal(signal.SIGINT, terminate_handler)
    signal.signal(signal.SIGHUP, restart_handler)

    update_file_path = join(config["agent_updates_dir"], "pyfarm-agent.zip")
    run_control_file = config["run_control_file_by_platform"]\
        [operating_system()]
    loop_interval = config["supervisor_interval"]

    while True:
        if subprocess.call(["pyfarm-agent", "status"]) != 0:
            if not isfile(run_control_file):
                logger.info("pyfarm_agent is not running, but run control file "
                            "%s does not exist. Not restarting the agent",
                            run_control_file)
            logger.info("pyfarm-agent is not running")
            if (os.path.isfile(update_file_path) and
                zipfile.is_zipfile(update_file_path)):
                logger.info("Found an upgrade to pyfarm-agent")
                try:
                    remove_directory(args.agent_package_dir, raise_=True)
                    os.makedirs(args.agent_package_dir)
                    with zipfile.ZipFile(update_file_path, "r") as archive:
                        archive.extractall(args.agent_package_dir)

                    remove_file(
                        update_file_path, retry_on_exit=True, raise_=False)
                except Exception as e:
                    logger.error(
                        "Caught exception trying to update agent: %r", e)

            logger.info("starting pyfarm-agent now")
            if subprocess.call(["pyfarm-agent"] + agent_args + ["start"]) != 0:
                logger.error("Could not start pyfarm-agent")
                sys.exit(1)

        time.sleep(loop_interval)
示例#13
0
    def start(self):
        url = self.agent_api + "/status"
        try:
            response = requests.get(
                url, headers={"Content-Type": "application/json"})
        except ConnectionError:
            pid = None
            remove_lock_file = False

            # Try to open an existing lock file
            try:
                with open(config["agent_lock_file"], "r") as pidfile:
                    try:
                        pid = int(pidfile.read().strip())
                    except ValueError:
                        logger.warning(
                            "Could not convert pid in %s to an integer.",
                            config["agent_lock_file"])
                        remove_lock_file = True

            # If the file is missing we ignore the error and read
            # pid/remove_lock_file later on.
            except (OSError, IOError) as e:
                if e.errno == ENOENT:
                    logger.debug(
                        "Process ID file %s does not exist",
                        config["agent_lock_file"])
                else:
                    raise

            else:
                assert pid is not None, "pid was not set"
                logger.debug(
                    "Process ID file %s exists", config["agent_lock_file"])

            if pid is not None:
                try:
                    process = psutil.Process(pid)

                    # Process does exist, does it appear to be our agent?
                    if process.name() == "pyfarm-agent":
                        logger.error(
                            "Agent is already running, pid %s", pid)
                        return 1

                    # Not our agent so the lock file is probably wrong.
                    else:
                        logger.debug(
                            "Process %s does not appear to be the "
                            "agent.", pid)
                        remove_lock_file = True

                # Process in the pid file does not exist or we don't have the
                # rights to access it
                except (psutil.NoSuchProcess, psutil.AccessDenied):
                    logger.debug(
                        "Process ID in %s is stale.",
                        config["agent_lock_file"])
                    remove_lock_file = True

            if remove_lock_file:
                logger.debug(
                    "Attempting to remove PID file %s",
                    config["agent_lock_file"])

                try:
                    remove_file(
                        config["agent_lock_file"],
                        retry_on_exit=True, raise_=True)
                except (WindowsError, OSError, IOError):
                    return 1
        else:
            code = "%s %s" % (
                response.status_code, responses[response.status_code])
            pid = response.json()["pids"]["parent"]
            logger.error(
                "Agent at pid %s is already running, got %s from %s.",
                pid, code, url)
            return 1

        logger.info("Starting agent")

        if not isdir(config["jobtype_task_logs"]):
            logger.debug("Creating %s", config["jobtype_task_logs"])
            try:
                os.makedirs(config["jobtype_task_logs"])
            except (OSError, WindowsError):
                logger.error("Failed to create %s", config["jobtype_task_logs"])
                return 1

        # create the directory for log
        if not self.args.no_daemon and not isfile(config["agent_log"]):
            try:
                os.makedirs(dirname(config["agent_log"]))
            except (OSError, WindowsError) as e:
                # Not an error because it could be created later on
                logger.warning(
                    "failed to create %s: %r", dirname(config["agent_log"]), e)
                # If we don't explicitly clear this exception here, twisted will
                # keep displaying it with its stacktrace every time a  callback
                # raises an exception.
                sys.exc_clear()

        # so long as fork could be imported and --no-daemon was not set
        # then setup the log files
        if not self.args.no_daemon and fork is not NotImplemented:
            logger.info("sending log output to %s" % config["agent_log"])
            daemon_start_return_code = start_daemon_posix(
                config["agent_log"], config["agent_chdir"],
                self.args.uid, self.args.gid)

            if isinstance(daemon_start_return_code, INTEGER_TYPES):
                return daemon_start_return_code

        elif not self.args.no_daemon and fork is NotImplemented:
            logger.warning(
                "`fork` is not implemented on %s, starting in "
                "foreground" % OS.title())

        # PID file should not exist now.  Either the last agent instance
        # should have removed it or we should hae above.
        if isfile(config["agent_lock_file"]):
            logger.error("PID file should not exist on disk at this point.")
            return 1

        # Create the directory for the pid file if necessary
        pid_dirname = dirname(config["agent_lock_file"])
        if not isdir(pid_dirname):
            try:
                os.makedirs(pid_dirname)
            except OSError:  # pragma: no cover
                logger.error(
                    "Failed to create parent directory for %s",
                    config["agent_lock_file"])
                return 1
            else:
                logger.debug("Created directory %s", pid_dirname)

        # Write the PID file
        pid = os.getpid()
        try:
            with open(config["agent_lock_file"], "w") as pidfile:
                pidfile.write(str(pid))
        except OSError as e:
            logger.error(
                "Failed to write PID file %s: %s", config["agent_lock_file"], e)
            return 1
        else:
            logger.debug("Wrote PID to %s", config["agent_lock_file"])

        logger.info("pid: %s", pid)

        if not isfile(config["run_control_file"]):
            directory = dirname(config["run_control_file"])
            try:
                os.makedirs(directory)
            except (OSError, IOError) as e:  # pragma: no cover
                if e.errno != EEXIST:
                    logger.error(
                        "Failed to create parent directory for %s: %s: %s",
                        config["run_control_file"], type(e).__name__, e)
                    raise
            else:
                logger.debug("Created directory %s", directory)
            try:
                with open(config["run_control_file"], "a"):
                    pass
            except (OSError, IOError) as e:
                logger.error("Failed to create run control file %s: %s: %s",
                             config["run_control_file"], type(e).__name__, e)
            else:
                logger.info("Created run control file %s",
                            config["run_control_file"])

        if getuid is not NotImplemented:
            logger.info("uid: %s" % getuid())

        if getgid is not NotImplemented:
            logger.info("gid: %s" % getgid())

        from pyfarm.agent.service import Agent

        # Setup the agent, register stop(), then run the agent
        service = Agent()
        signal.signal(signal.SIGINT, service.sigint_handler)
        reactor.callWhenRunning(service.start)
        reactor.run()