def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ name = ascii2unicode(name) service.AsyncMultiService.__init__(self) self.slavename = ascii2unicode(name) self.password = password # protocol registration self.registration = None # these are set when the service is started self.botmaster = None self.manager = None self.master = None self.buildslaveid = None self.slave_status = SlaveStatus(name) self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error( 'notify_on_missing arg %r is not a string' % (i,)) self.missing_timeout = missing_timeout self.missing_timer = None # a protocol connection, if we're currently connected self.conn = None self._old_builder_list = None
def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ service.MultiService.__init__(self) self.slavename = name self.password = password self.botmaster = None # no buildmaster yet self.slave_status = SlaveStatus(name) self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: assert isinstance(i, str) self.missing_timeout = missing_timeout self.missing_timer = None self.keepalive_interval = keepalive_interval self.detached_subs = None self._old_builder_list = None
class AbstractBuildSlave(service.ReconfigurableServiceMixin, service.AsyncMultiService, object): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) # reconfig slaves after builders reconfig_priority = 64 def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ name = ascii2unicode(name) service.AsyncMultiService.__init__(self) self.slavename = ascii2unicode(name) self.password = password # protocol registration self.registration = None # these are set when the service is started self.botmaster = None self.manager = None self.master = None self.buildslaveid = None self.slave_status = SlaveStatus(name) self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error( 'notify_on_missing arg %r is not a string' % (i,)) self.missing_timeout = missing_timeout self.missing_timer = None # a protocol connection, if we're currently connected self.conn = None self._old_builder_list = None def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.slavename) def updateLocks(self): """Convert the L{LockAccess} objects in C{self.locks} into real lock objects, while also maintaining the subscriptions to lock releases.""" # unsubscribe from any old locks for s in self.lock_subscriptions: s.unsubscribe() # convert locks into their real form locks = [(self.botmaster.getLockFromLockAccess(a), a) for a in self.access] self.locks = [(l.getLock(self), la) for l, la in locks] self.lock_subscriptions = [l.subscribeToReleases(self._lockReleased) for l, la in self.locks] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(self, access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another slave to look at it. """ log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) if not self.locksAvailable(): log.msg("slave %s can't lock, giving up" % (self, )) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks(%s): %s" % (self, self.locks)) for lock, access in self.locks: lock.release(self, access) def _lockReleased(self): """One of the locks for this slave was released; try scheduling builds.""" if not self.botmaster: return # oh well.. self.botmaster.maybeStartBuildsForSlave(self.slavename) def _applySlaveInfo(self, info): if not info: return self.slave_status.setAdmin(info.get("admin")) self.slave_status.setHost(info.get("host")) self.slave_status.setAccessURI(info.get("access_uri", None)) self.slave_status.setVersion(info.get("version", "(unknown)")) @defer.inlineCallbacks def _getSlaveInfo(self): buildslave = yield self.master.data.get( ('buildslaves', self.buildslaveid)) self._applySlaveInfo(buildslave['slaveinfo']) def setServiceParent(self, parent): # botmaster needs to set before setServiceParent which calls startService self.manager = parent self.master = parent.master self.botmaster = parent.master.botmaster return service.AsyncMultiService.setServiceParent(self, parent) @defer.inlineCallbacks def startService(self): self.updateLocks() self.startMissingTimer() self.buildslaveid = yield self.master.data.updates.findBuildslaveId( self.slavename) yield self._getSlaveInfo() yield service.AsyncMultiService.startService(self) @defer.inlineCallbacks def reconfigServiceWithBuildbotConfig(self, new_config): # Given a new BuildSlave, configure this one identically. Because # BuildSlave objects are remotely referenced, we can't replace them # without disconnecting the slave, yet there's no reason to do that. new = self.findNewSlaveInstance(new_config) assert self.slavename == new.slavename self.password = new.password # update our records with the buildslave manager if not self.registration: self.registration = yield self.master.buildslaves.register(self) yield self.registration.update(new, new_config) # adopt new instance's configuration parameters self.max_builds = new.max_builds self.access = new.access self.notify_on_missing = new.notify_on_missing if self.missing_timeout != new.missing_timeout: running_missing_timer = self.missing_timer self.stopMissingTimer() self.missing_timeout = new.missing_timeout if running_missing_timer: self.startMissingTimer() properties = Properties() properties.updateFromProperties(new.properties) self.properties = properties self.updateLocks() bids = [b._builderid for b in self.botmaster.getBuildersForSlave(self.slavename)] yield self.master.data.updates.buildslaveConfigured(self.buildslaveid, bids) # update the attached slave's notion of which builders are attached. # This assumes that the relevant builders have already been configured, # which is why the reconfig_priority is set low in this class. yield self.updateSlave() yield service.ReconfigurableServiceMixin.reconfigServiceWithBuildbotConfig(self, new_config) @defer.inlineCallbacks def stopService(self): if self.registration: yield self.registration.unregister() self.registration = None self.stopMissingTimer() yield service.AsyncMultiService.stopService(self) def findNewSlaveInstance(self, new_config): # TODO: called multiple times per reconfig; use 1-element cache? for sl in new_config.slaves: if sl.slavename == self.slavename: return sl assert 0, "no new slave named '%s'" % self.slavename def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def isConnected(self): return self.conn def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() text += "\n" text += "%s\n" % status.getURLForThing(self.slave_status) subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.conn: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): # TODO pass @defer.inlineCallbacks def attached(self, conn): """This is called when the slave connects.""" metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) self.conn = conn self._old_builder_list = None # clear builder list before proceed self.slave_status.addPauseWatcher(self._pauseChanged) self.slave_status.setConnected(True) self._applySlaveInfo(conn.info) self.slave_commands = conn.info.get("slave_commands", {}) self.slave_environ = conn.info.get("environ", {}) self.slave_basedir = conn.info.get("basedir", None) self.slave_system = conn.info.get("system", None) self.conn.notifyOnDisconnect(self.detached) slaveinfo = { 'admin': conn.info.get('admin'), 'host': conn.info.get('host'), 'access_uri': conn.info.get('access_uri'), 'version': conn.info.get('version') } yield self.master.data.updates.buildslaveConnected( buildslaveid=self.buildslaveid, masterid=self.master.masterid, slaveinfo=slaveinfo ) if self.slave_system == "nt": self.path_module = namedModule("ntpath") else: # most everything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.master.status.slaveConnected(self.slavename) yield self.updateSlave() yield self.botmaster.maybeStartBuildsForSlave(self.slavename) def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) @defer.inlineCallbacks def detached(self): metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) self.conn = None self._old_builder_list = [] self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.removePauseWatcher(self._pauseChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.master.status.slaveDisconnected(self.slavename) self.releaseLocks() yield self.master.data.updates.buildslaveDisconnected( buildslaveid=self.buildslaveid, masterid=self.master.masterid, ) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if self.conn is None: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.conn) def _disconnect(self, conn): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback def _disconnected(): eventually(d.callback, None) conn.notifyOnDisconnect(_disconnected) conn.loseConnection() log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.config.slavebuilddir) for b in our_builders] if blist == self._old_builder_list: return defer.succeed(None) d = self.conn.remoteSetBuilderList(builders=blist) def sentBuilderList(ign): self._old_builder_list = blist return ign d.addCallback(sentBuilderList) return d def shutdownRequested(self): log.msg("slave %s wants to shut down" % self.slavename) self.slave_status.setGraceful(True) def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def buildFinished(self, sb): """This is called when a build on this slave is finished.""" self.botmaster.maybeStartBuildsForSlave(self.slavename) def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. Note for subclassers: if a slave can become willing to start a build without any action on that slave (for example, by a resource in use on another slave becoming available), then you must arrange for L{maybeStartBuildsForSlave} to be called at that time, or builds on this slave will not start. """ if self.slave_status.isPaused(): return False # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.master for st in buildmaster.status: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" self.maybeShutdown() @defer.inlineCallbacks def shutdown(self): """Shutdown the slave""" if not self.conn: log.msg("no remote; slave is already shut down") return yield self.conn.remoteShutdown() def maybeShutdown(self): """Shut down this slave if it has been asked to shut down gracefully, and has no active builders.""" if not self.slave_status.getGraceful(): return active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if active_builders: return d = self.shutdown() d.addErrback(log.err, 'error while shutting down slave') def _pauseChanged(self, paused): if paused is True: self.botmaster.master.status.slavePaused(self.slavename) else: self.botmaster.master.status.slaveUnpaused(self.slavename) def pause(self): """Stop running new builds on the slave.""" self.slave_status.setPaused(True) def unpause(self): """Restart running new builds on the slave.""" self.slave_status.setPaused(False) self.botmaster.maybeStartBuildsForSlave(self.slavename) def isPaused(self): return self.slave_status.isPaused()
class AbstractBuildSlave(pb.Avatar, service.MultiService): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) keepalive_timer = None keepalive_interval = None def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ service.MultiService.__init__(self) self.slavename = name self.password = password self.botmaster = None # no buildmaster yet self.slave_status = SlaveStatus(name) self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: assert isinstance(i, str) self.missing_timeout = missing_timeout self.missing_timer = None self.keepalive_interval = keepalive_interval self.detached_subs = None self._old_builder_list = None def identity(self): """ Return a tuple describing this slave. After reconfiguration a new slave with the same identity will update this one, rather than replacing it, thereby avoiding an interruption of current activity. """ return (self.slavename, self.password, '%s.%s' % (self.__class__.__module__, self.__class__.__name__)) def update(self, new): """ Given a new BuildSlave, configure this one identically. Because BuildSlave objects are remotely referenced, we can't replace them without disconnecting the slave, yet there's no reason to do that. """ # the reconfiguration logic should guarantee this: assert self.slavename == new.slavename assert self.password == new.password assert self.identity() == new.identity() self.max_builds = new.max_builds self.access = new.access self.notify_on_missing = new.notify_on_missing self.missing_timeout = new.missing_timeout self.keepalive_interval = new.keepalive_interval self.properties = Properties() self.properties.updateFromProperties(new.properties) if self.botmaster: self.updateLocks() def __repr__(self): if self.botmaster: builders = self.botmaster.getBuildersForSlave(self.slavename) return "<%s '%s', current builders: %s>" % \ (self.__class__.__name__, self.slavename, ','.join(map(lambda b: b.name, builders))) else: return "<%s '%s', (no builders yet)>" % \ (self.__class__.__name__, self.slavename) def updateLocks(self): # convert locks into their real form locks = [] for access in self.access: if not isinstance(access, LockAccess): access = access.defaultAccess() lock = self.botmaster.getLockByID(access.lockid) locks.append((lock, access)) self.locks = [(l.getLock(self), la) for l, la in locks] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another slave to look at it. """ log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) if not self.locksAvailable(): log.msg("slave %s can't lock, giving up" % (self, )) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks(%s): %s" % (self, self.locks)) for lock, access in self.locks: lock.release(self, access) def setBotmaster(self, botmaster): assert not self.botmaster, "BuildSlave already has a botmaster" self.botmaster = botmaster self.updateLocks() self.startMissingTimer() def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def doKeepalive(self): self.keepalive_timer = reactor.callLater(self.keepalive_interval, self.doKeepalive) if not self.slave: return d = self.slave.callRemote("print", "Received keepalive from master") d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, )) def stopKeepaliveTimer(self): if self.keepalive_timer: self.keepalive_timer.cancel() def startKeepaliveTimer(self): assert self.keepalive_interval log.msg("Starting buildslave keepalive timer for '%s'" % \ (self.slavename, )) self.doKeepalive() def recordConnectTime(self): if self.slave_status: self.slave_status.recordConnectTime() def isConnected(self): return self.slave def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.slave: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): if buildStarted: self.slave_status.buildStarted(buildStarted) if buildFinished: self.slave_status.buildFinished(buildFinished) @metrics.countMethod('AbstractBuildSlave.attached()') def attached(self, bot): """This is called when the slave connects. @return: a Deferred that fires when the attachment is complete """ # the botmaster should ensure this. assert not self.isConnected() metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) # set up the subscription point for eventual detachment self.detached_subs = subscription.SubscriptionPoint("detached") # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # we accumulate slave information in this 'state' dictionary, then # set it atomically if we make it far enough through the process state = {} # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) d = defer.succeed(None) def _log_attachment_on_slave(res): d1 = bot.callRemote("print", "attached") d1.addErrback(lambda why: None) return d1 d.addCallback(_log_attachment_on_slave) def _get_info(res): d1 = bot.callRemote("getSlaveInfo") def _got_info(info): log.msg("Got slaveinfo from '%s'" % self.slavename) # TODO: info{} might have other keys state["admin"] = info.get("admin") state["host"] = info.get("host") state["access_uri"] = info.get("access_uri", None) state["slave_environ"] = info.get("environ", {}) state["slave_basedir"] = info.get("basedir", None) state["slave_system"] = info.get("system", None) def _info_unavailable(why): why.trap(pb.NoSuchMethod) # maybe an old slave, doesn't implement remote_getSlaveInfo log.msg("BuildSlave.info_unavailable") log.err(why) d1.addCallbacks(_got_info, _info_unavailable) return d1 d.addCallback(_get_info) self.startKeepaliveTimer() def _get_version(res): d = bot.callRemote("getVersion") def _got_version(version): state["version"] = version def _version_unavailable(why): why.trap(pb.NoSuchMethod) # probably an old slave state["version"] = '(unknown)' d.addCallbacks(_got_version, _version_unavailable) return d d.addCallback(_get_version) def _get_commands(res): d1 = bot.callRemote("getCommands") def _got_commands(commands): state["slave_commands"] = commands def _commands_unavailable(why): # probably an old slave log.msg("BuildSlave._commands_unavailable") if why.check(AttributeError): return log.err(why) d1.addCallbacks(_got_commands, _commands_unavailable) return d1 d.addCallback(_get_commands) def _accept_slave(res): self.slave_status.setAdmin(state.get("admin")) self.slave_status.setHost(state.get("host")) self.slave_status.setAccessURI(state.get("access_uri")) self.slave_status.setVersion(state.get("version")) self.slave_status.setConnected(True) self.slave_commands = state.get("slave_commands") self.slave_environ = state.get("slave_environ") self.slave_basedir = state.get("slave_basedir") self.slave_system = state.get("slave_system") self.slave = bot if self.slave_system == "win32": self.path_module = namedModule("win32path") else: # most eveything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.botmaster.master.status.slaveConnected(self.slavename) return self.updateSlave() d.addCallback(_accept_slave) d.addCallback(lambda _: self.botmaster.maybeStartBuildsForSlave(self.slavename)) # Finally, the slave gets a reference to this BuildSlave. They # receive this later, after we've started using them. d.addCallback(lambda _: self) return d def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) def detached(self, mind): metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) self.slave = None self._old_builder_list = [] self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.botmaster.master.status.slaveDisconnected(self.slavename) self.stopKeepaliveTimer() # notify watchers, but do so in the next reactor iteration so that # any further detached() action by subclasses happens first def notif(): subs = self.detached_subs self.detached_subs = None subs.deliver() reactor.callLater(0, notif) def subscribeToDetach(self, callback): """ Request that C{callable} be invoked with no arguments when the L{detached} method is invoked. @returns: L{Subscription} """ assert self.detached_subs, "detached_subs is only set if attached" return self.detached_subs.subscribe(callback) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if not self.slave: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.slave) def _disconnect(self, slave): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback with one argument, the # RemoteReference being disconnected. def _disconnected(rref): reactor.callLater(0, d.callback, None) slave.notifyOnDisconnect(_disconnected) tport = slave.broker.transport # this is the polite way to request that a socket be closed tport.loseConnection() try: # but really we don't want to wait for the transmit queue to # drain. The remote end is unlikely to ACK the data, so we'd # probably have to wait for a (20-minute) TCP timeout. #tport._closeSocket() # however, doing _closeSocket (whether before or after # loseConnection) somehow prevents the notifyOnDisconnect # handlers from being run. Bummer. tport.offset = 0 tport.dataBuffer = "" except: # however, these hacks are pretty internal, so don't blow up if # they fail or are unavailable log.msg("failed to accelerate the shutdown process") log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.slavebuilddir) for b in our_builders] if blist == self._old_builder_list: log.msg("Builder list is unchanged; not calling setBuilderList") return defer.succeed(None) d = self.slave.callRemote("setBuilderList", blist) def sentBuilderList(ign): self._old_builder_list = blist return ign d.addCallback(sentBuilderList) return d def perspective_keepalive(self): self.messageReceivedFromSlave() def perspective_shutdown(self): log.msg("slave %s wants to shut down" % self.slavename) self.slave_status.setGraceful(True) def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def buildFinished(self, sb): """This is called when a build on this slave is finished.""" self.botmaster.maybeStartBuildsForSlave(self.slavename) def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. Note for subclassers: if a slave can become willing to start a build without any action on that slave (for example, by a resource in use on another slave becoming available), then you must arrange for L{maybeStartBuildsForSlave} to be called at that time, or builds on this slave will not start. """ # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.master for st in buildmaster.statusTargets: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" self.maybeShutdown() @defer.deferredGenerator def shutdown(self): """Shutdown the slave""" if not self.slave: log.msg("no remote; slave is already shut down") return # First, try the "new" way - calling our own remote's shutdown # method. The method was only added in 0.8.3, so ignore NoSuchMethod # failures. def new_way(): d = self.slave.callRemote('shutdown') d.addCallback(lambda _ : True) # successful shutdown request def check_nsm(f): f.trap(pb.NoSuchMethod) return False # fall through to the old way d.addErrback(check_nsm) def check_connlost(f): f.trap(pb.PBConnectionLost) return True # the slave is gone, so call it finished d.addErrback(check_connlost) return d wfd = defer.waitForDeferred(new_way()) yield wfd if wfd.getResult(): return # done! # Now, the old way. Look for a builder with a remote reference to the # client side slave. If we can find one, then call "shutdown" on the # remote builder, which will cause the slave buildbot process to exit. def old_way(): d = None for b in self.slavebuilders.values(): if b.remote: d = b.remote.callRemote("shutdown") break if d: log.msg("Shutting down (old) slave: %s" % self.slavename) # The remote shutdown call will not complete successfully since the # buildbot process exits almost immediately after getting the # shutdown request. # Here we look at the reason why the remote call failed, and if # it's because the connection was lost, that means the slave # shutdown as expected. def _errback(why): if why.check(pb.PBConnectionLost): log.msg("Lost connection to %s" % self.slavename) else: log.err("Unexpected error when trying to shutdown %s" % self.slavename) d.addErrback(_errback) return d log.err("Couldn't find remote builder to shut down slave") return defer.succeed(None) wfd = defer.waitForDeferred(old_way()) yield wfd wfd.getResult() def maybeShutdown(self): """Shut down this slave if it has been asked to shut down gracefully, and has no active builders.""" if not self.slave_status.getGraceful(): return active_builders = [sb for sb in self.slavebuilders.values() if sb.isBusy()] if active_builders: return d = self.shutdown() d.addErrback(log.err, 'error while shutting down slave')
class AbstractBuildSlave(config.ReconfigurableServiceMixin, pb.Avatar, service.MultiService): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) keepalive_timer = None keepalive_interval = None # reconfig slaves after builders reconfig_priority = 64 def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600, friendlyName=None, os=None, eid=-1, fqdn=None): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary @param fqdn: The fully qualified domain name (eg: slave1.unity.com) of the agent @type fqdn: string """ service.MultiService.__init__(self) self.slavename = name self.password = password self.friendly_name = friendlyName self.eid = eid # External ID self.fqdn = fqdn # Slave's full domain name if self.friendly_name is None: self.friendly_name = name # PB registration self.registration = None self.registered_port = None # these are set when the service is started, and unset when it is # stopped self.botmaster = None self.master = None self.slave_status = SlaveStatus(name) self.slave_status.setFriendlyName(self.friendly_name) self.slave_status.eid = eid self.slave_status.fqdn = fqdn self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error('notify_on_missing arg %r is not a string' % (i, )) self.missing_timeout = missing_timeout self.missing_timer = None self.keepalive_interval = keepalive_interval self.detached_subs = None self._old_builder_list = None self.os = os def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.slavename) def updateLocks(self): """Convert the L{LockAccess} objects in C{self.locks} into real lock objects, while also maintaining the subscriptions to lock releases.""" # unsubscribe from any old locks for s in self.lock_subscriptions: s.unsubscribe() # convert locks into their real form locks = [(self.botmaster.getLockFromLockAccess(a), a) for a in self.access] self.locks = [(l.getLock(self), la) for l, la in locks] self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) for l, la in self.locks ] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(self, access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another slave to look at it. """ log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) if not self.locksAvailable(): log.msg("slave %s can't lock, giving up" % (self, )) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks(%s): %s" % (self, self.locks)) for lock, access in self.locks: lock.release(self, access) def _lockReleased(self): """One of the locks for this slave was released; try scheduling builds.""" if not self.botmaster: return # oh well.. self.botmaster.maybeStartBuildsForSlave(self.slavename) def setServiceParent(self, parent): # botmaster needs to set before setServiceParent which calls startService self.botmaster = parent self.master = parent.master self.slave_status.setMaster(parent.master) service.MultiService.setServiceParent(self, parent) def startService(self): self.updateLocks() self.startMissingTimer() return service.MultiService.startService(self) @defer.inlineCallbacks def reconfigService(self, new_config): # Given a new BuildSlave, configure this one identically. Because # BuildSlave objects are remotely referenced, we can't replace them # without disconnecting the slave, yet there's no reason to do that. new = self.findNewSlaveInstance(new_config) assert self.slavename == new.slavename # do we need to re-register? if (not self.registration or self.password != new.password or new_config.slavePortnum != self.registered_port): if self.registration: yield self.registration.unregister() self.registration = None self.password = new.password self.registered_port = new_config.slavePortnum self.registration = self.master.pbmanager.register( self.registered_port, self.slavename, self.password, self.getPerspective) # adopt new instance's configuration parameters self.max_builds = new.max_builds self.access = new.access self.notify_on_missing = new.notify_on_missing self.keepalive_interval = new.keepalive_interval if self.missing_timeout != new.missing_timeout: running_missing_timer = self.missing_timer self.stopMissingTimer() self.missing_timeout = new.missing_timeout if running_missing_timer: self.startMissingTimer() properties = Properties() properties.updateFromProperties(new.properties) self.properties = properties self.updateLocks() # update the attached slave's notion of which builders are attached. # This assumes that the relevant builders have already been configured, # which is why the reconfig_priority is set low in this class. yield self.updateSlave() yield config.ReconfigurableServiceMixin.reconfigService( self, new_config) @defer.inlineCallbacks def stopService(self): if self.registration: self.registration.unregister() self.registration = None self.stopMissingTimer() yield service.MultiService.stopService(self) def findNewSlaveInstance(self, new_config): # TODO: called multiple times per reconfig; use 1-element cache? for sl in new_config.slaves: if sl.slavename == self.slavename: return sl assert 0, "no new slave named '%s'" % self.slavename def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def getPerspective(self, mind, slavename): assert slavename == self.slavename metrics.MetricCountEvent.log("attached_slaves", 1) # record when this connection attempt occurred if self.slave_status: self.slave_status.recordConnectTime() # try to use TCP keepalives try: mind.broker.transport.setTcpKeepAlive(1) except: pass if self.isConnected(): # duplicate slave - send it to arbitration arb = botmaster.DuplicateSlaveArbitrator(self) return arb.getPerspective(mind, slavename) else: log.msg("slave '%s' attaching from %s" % (slavename, mind.broker.transport.getPeer())) return self def doKeepalive(self): self.keepalive_timer = reactor.callLater(self.keepalive_interval, self.doKeepalive) if not self.slave: return d = self.slave.callRemote("print", "Received keepalive from master") d.addErrback(log.msg, "Keepalive failed for '%s'" % (self.slavename, )) def stopKeepaliveTimer(self): if self.keepalive_timer: self.keepalive_timer.cancel() def startKeepaliveTimer(self): assert self.keepalive_interval log.msg("Starting buildslave keepalive timer for '%s'" % \ (self.slavename, )) self.doKeepalive() def isConnected(self): return self.slave def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.master status = buildmaster.getStatus() text = "Katana working for '%s'\n" % status.getTitle() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " Katana\n" text += " %s\n" % status.getTitleURL() text += "\n" text += "%s\n" % status.getURLForThing(self.slave_status) subject = "Katana: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.slave: return self.sendBuilderList() else: return defer.succeed(None) def updateStatusBuildStarted(self, build): self.slave_status.buildStarted(build) @defer.inlineCallbacks def updateStatusBuildFinished(self, result, build=None): if build: yield self.slave_status.buildFinished(build) defer.returnValue(result) @metrics.countMethod('AbstractBuildSlave.attached()') def attached(self, bot): """This is called when the slave connects. @return: a Deferred that fires when the attachment is complete """ # the botmaster should ensure this. assert not self.isConnected() metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) # set up the subscription point for eventual detachment self.detached_subs = subscription.SubscriptionPoint("detached") # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # we accumulate slave information in this 'state' dictionary, then # set it atomically if we make it far enough through the process state = {} # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) d = defer.succeed(None) def _log_attachment_on_slave(res): d1 = bot.callRemote("print", "attached") d1.addErrback(lambda why: None) return d1 d.addCallback(_log_attachment_on_slave) def _get_info(res): d1 = bot.callRemote("getSlaveInfo") def _got_info(info): log.msg("Got slaveinfo from '%s'" % self.slavename) # TODO: info{} might have other keys state["admin"] = info.get("admin") state["host"] = info.get("host") state["access_uri"] = info.get("access_uri", None) state["slave_environ"] = info.get("environ", {}) state["slave_basedir"] = info.get("basedir", None) state["slave_system"] = info.get("system", None) def _info_unavailable(why): why.trap(pb.NoSuchMethod) # maybe an old slave, doesn't implement remote_getSlaveInfo log.msg("BuildSlave.info_unavailable") klog.err_json(why) d1.addCallbacks(_got_info, _info_unavailable) return d1 d.addCallback(_get_info) self.startKeepaliveTimer() def _get_version(res): d = bot.callRemote("getVersion") def _got_version(version): state["version"] = version def _version_unavailable(why): why.trap(pb.NoSuchMethod) # probably an old slave state["version"] = '(unknown)' d.addCallbacks(_got_version, _version_unavailable) return d d.addCallback(_get_version) def _get_commands(res): d1 = bot.callRemote("getCommands") def _got_commands(commands): state["slave_commands"] = commands def _commands_unavailable(why): # probably an old slave if why.check(AttributeError): return log.msg("BuildSlave.getCommands is unavailable - ignoring") klog.err_json(why) d1.addCallbacks(_got_commands, _commands_unavailable) return d1 d.addCallback(_get_commands) def _accept_slave(res): self.slave_status.setAdmin(state.get("admin")) self.slave_status.setHost(state.get("host")) self.slave_status.setAccessURI(state.get("access_uri")) self.slave_status.setVersion(state.get("version")) self.slave_status.setConnected(True) self.slave_commands = state.get("slave_commands") self.slave_environ = state.get("slave_environ") self.slave_basedir = state.get("slave_basedir") self.slave_system = state.get("slave_system") self.slave = bot if self.slave_system == "nt": self.path_module = namedModule("ntpath") else: # most eveything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.master.status.slaveConnected(self.slavename) return self.updateSlave() d.addCallback(_accept_slave) d.addCallback( lambda _: self.botmaster.maybeStartBuildsForSlave(self.slavename)) # Finally, the slave gets a reference to this BuildSlave. They # receive this later, after we've started using them. d.addCallback(lambda _: self) return d def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) def detached(self, mind): metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) self.slave = None self._old_builder_list = [] self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.master.status.slaveDisconnected(self.slavename) self.stopKeepaliveTimer() self.releaseLocks() # notify watchers, but do so in the next reactor iteration so that # any further detached() action by subclasses happens first def notif(): subs = self.detached_subs self.detached_subs = None subs.deliver() eventually(notif) def subscribeToDetach(self, callback): """ Request that C{callable} be invoked with no arguments when the L{detached} method is invoked. @returns: L{Subscription} """ assert self.detached_subs, "detached_subs is only set if attached" return self.detached_subs.subscribe(callback) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if not self.slave: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.slave) def _disconnect(self, slave): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback with one argument, the # RemoteReference being disconnected. def _disconnected(rref): eventually(d.callback, None) slave.notifyOnDisconnect(_disconnected) tport = slave.broker.transport # this is the polite way to request that a socket be closed tport.loseConnection() try: # but really we don't want to wait for the transmit queue to # drain. The remote end is unlikely to ACK the data, so we'd # probably have to wait for a (20-minute) TCP timeout. #tport._closeSocket() # however, doing _closeSocket (whether before or after # loseConnection) somehow prevents the notifyOnDisconnect # handlers from being run. Bummer. tport.offset = 0 tport.dataBuffer = "" except: # however, these hacks are pretty internal, so don't blow up if # they fail or are unavailable log.msg("failed to accelerate the shutdown process") log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.config.slavebuilddir) for b in our_builders] if blist == self._old_builder_list: return defer.succeed(None) d = self.slave.callRemote("setBuilderList", blist) def sentBuilderList(ign): self._old_builder_list = blist return ign d.addCallback(sentBuilderList) return d def perspective_keepalive(self): self.messageReceivedFromSlave() def perspective_shutdown(self): log.msg("slave %s wants to shut down" % self.slavename) self.slave_status.setGraceful(True) def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def buildFinished(self, sb): """This is called when a build on this slave is finished.""" if self.botmaster: self.botmaster.maybeStartBuildsForSlave(self.slavename) def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. Note for subclassers: if a slave can become willing to start a build without any action on that slave (for example, by a resource in use on another slave becoming available), then you must arrange for L{maybeStartBuildsForSlave} to be called at that time, or builds on this slave will not start. """ if self.slave_status.isPaused(): return False # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.master for st in buildmaster.status: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" self.maybeShutdown() @defer.inlineCallbacks def shutdown(self): """Shutdown the slave""" if not self.slave: log.msg("no remote; slave is already shut down") return # First, try the "new" way - calling our own remote's shutdown # method. The method was only added in 0.8.3, so ignore NoSuchMethod # failures. def new_way(): d = self.slave.callRemote('shutdown') d.addCallback(lambda _: True) # successful shutdown request def check_nsm(f): f.trap(pb.NoSuchMethod) return False # fall through to the old way d.addErrback(check_nsm) def check_connlost(f): f.trap(pb.PBConnectionLost) return True # the slave is gone, so call it finished d.addErrback(check_connlost) return d if (yield new_way()): return # done! # Now, the old way. Look for a builder with a remote reference to the # client side slave. If we can find one, then call "shutdown" on the # remote builder, which will cause the slave buildbot process to exit. def old_way(): d = None for b in self.slavebuilders.values(): if b.remote: d = b.remote.callRemote("shutdown") break if d: log.msg("Shutting down (old) slave: %s" % self.slavename) # The remote shutdown call will not complete successfully since the # buildbot process exits almost immediately after getting the # shutdown request. # Here we look at the reason why the remote call failed, and if # it's because the connection was lost, that means the slave # shutdown as expected. def _errback(why): if why.check(pb.PBConnectionLost): log.msg("Lost connection to %s" % self.slavename) else: klog.err_json( "Unexpected error when trying to shutdown %s" % self.slavename) d.addErrback(_errback) return d klog.err_json("Couldn't find remote builder to shut down slave") return defer.succeed(None) yield old_way() def maybeShutdown(self): """Shut down this slave if it has been asked to shut down gracefully, and has no active builders.""" if not self.slave_status.getGraceful(): return active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if active_builders: return d = self.shutdown() d.addErrback(klog.err_json, 'error while shutting down slave') def isPaused(self): return self.slave_status.paused
def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ name = ascii2unicode(name) service.AsyncMultiService.__init__(self) self.slavename = ascii2unicode(name) self.password = password # protocol registration self.registration = None # these are set when the service is started self.botmaster = None self.manager = None self.master = None self.buildslaveid = None self.slave_status = SlaveStatus(name) self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error('notify_on_missing arg %r is not a string' % (i, )) self.missing_timeout = missing_timeout self.missing_timer = None # a protocol connection, if we're currently connected self.conn = None self._old_builder_list = None
class AbstractBuildSlave(service.ReconfigurableServiceMixin, service.AsyncMultiService, object): """This is the master-side representative for a remote buildbot slave. There is exactly one for each slave described in the config file (the c['slaves'] list). When buildbots connect in (.attach), they get a reference to this instance. The BotMaster object is stashed as the .botmaster attribute. The BotMaster is also our '.parent' Service. I represent a build slave -- a remote machine capable of running builds. I am instantiated by the configuration file, and can be subclassed to add extra functionality.""" implements(IBuildSlave) # reconfig slaves after builders reconfig_priority = 64 def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary """ name = ascii2unicode(name) service.AsyncMultiService.__init__(self) self.slavename = ascii2unicode(name) self.password = password # protocol registration self.registration = None # these are set when the service is started self.botmaster = None self.manager = None self.master = None self.buildslaveid = None self.slave_status = SlaveStatus(name) self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error('notify_on_missing arg %r is not a string' % (i, )) self.missing_timeout = missing_timeout self.missing_timer = None # a protocol connection, if we're currently connected self.conn = None self._old_builder_list = None def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.slavename) def updateLocks(self): """Convert the L{LockAccess} objects in C{self.locks} into real lock objects, while also maintaining the subscriptions to lock releases.""" # unsubscribe from any old locks for s in self.lock_subscriptions: s.unsubscribe() # convert locks into their real form locks = [(self.botmaster.getLockFromLockAccess(a), a) for a in self.access] self.locks = [(l.getLock(self), la) for l, la in locks] self.lock_subscriptions = [ l.subscribeToReleases(self._lockReleased) for l, la in self.locks ] def locksAvailable(self): """ I am called to see if all the locks I depend on are available, in which I return True, otherwise I return False """ if not self.locks: return True for lock, access in self.locks: if not lock.isAvailable(self, access): return False return True def acquireLocks(self): """ I am called when a build is preparing to run. I try to claim all the locks that are needed for a build to happen. If I can't, then my caller should give up the build and try to get another slave to look at it. """ log.msg("acquireLocks(slave %s, locks %s)" % (self, self.locks)) if not self.locksAvailable(): log.msg("slave %s can't lock, giving up" % (self, )) return False # all locks are available, claim them all for lock, access in self.locks: lock.claim(self, access) return True def releaseLocks(self): """ I am called to release any locks after a build has finished """ log.msg("releaseLocks(%s): %s" % (self, self.locks)) for lock, access in self.locks: lock.release(self, access) def _lockReleased(self): """One of the locks for this slave was released; try scheduling builds.""" if not self.botmaster: return # oh well.. self.botmaster.maybeStartBuildsForSlave(self.slavename) def _applySlaveInfo(self, info): if not info: return self.slave_status.setAdmin(info.get("admin")) self.slave_status.setHost(info.get("host")) self.slave_status.setAccessURI(info.get("access_uri", None)) self.slave_status.setVersion(info.get("version", "(unknown)")) @defer.inlineCallbacks def _getSlaveInfo(self): buildslave = yield self.master.data.get( ('buildslaves', self.buildslaveid)) self._applySlaveInfo(buildslave['slaveinfo']) def setServiceParent(self, parent): # botmaster needs to set before setServiceParent which calls startService self.manager = parent self.master = parent.master self.botmaster = parent.master.botmaster return service.AsyncMultiService.setServiceParent(self, parent) @defer.inlineCallbacks def startService(self): self.updateLocks() self.startMissingTimer() self.buildslaveid = yield self.master.data.updates.findBuildslaveId( self.slavename) yield self._getSlaveInfo() yield service.AsyncMultiService.startService(self) @defer.inlineCallbacks def reconfigServiceWithBuildbotConfig(self, new_config): # Given a new BuildSlave, configure this one identically. Because # BuildSlave objects are remotely referenced, we can't replace them # without disconnecting the slave, yet there's no reason to do that. new = self.findNewSlaveInstance(new_config) assert self.slavename == new.slavename self.password = new.password # update our records with the buildslave manager if not self.registration: self.registration = yield self.master.buildslaves.register(self) yield self.registration.update(new, new_config) # adopt new instance's configuration parameters self.max_builds = new.max_builds self.access = new.access self.notify_on_missing = new.notify_on_missing if self.missing_timeout != new.missing_timeout: running_missing_timer = self.missing_timer self.stopMissingTimer() self.missing_timeout = new.missing_timeout if running_missing_timer: self.startMissingTimer() properties = Properties() properties.updateFromProperties(new.properties) self.properties = properties self.updateLocks() bids = [ b._builderid for b in self.botmaster.getBuildersForSlave(self.slavename) ] yield self.master.data.updates.buildslaveConfigured( self.buildslaveid, bids) # update the attached slave's notion of which builders are attached. # This assumes that the relevant builders have already been configured, # which is why the reconfig_priority is set low in this class. yield self.updateSlave() yield service.ReconfigurableServiceMixin.reconfigServiceWithBuildbotConfig( self, new_config) @defer.inlineCallbacks def stopService(self): if self.registration: yield self.registration.unregister() self.registration = None self.stopMissingTimer() yield service.AsyncMultiService.stopService(self) def findNewSlaveInstance(self, new_config): # TODO: called multiple times per reconfig; use 1-element cache? for sl in new_config.slaves: if sl.slavename == self.slavename: return sl assert 0, "no new slave named '%s'" % self.slavename def startMissingTimer(self): if self.notify_on_missing and self.missing_timeout and self.parent: self.stopMissingTimer() # in case it's already running self.missing_timer = reactor.callLater(self.missing_timeout, self._missing_timer_fired) def stopMissingTimer(self): if self.missing_timer: self.missing_timer.cancel() self.missing_timer = None def isConnected(self): return self.conn def _missing_timer_fired(self): self.missing_timer = None # notify people, but only if we're still in the config if not self.parent: return buildmaster = self.botmaster.master status = buildmaster.getStatus() text = "The Buildbot working for '%s'\n" % status.getTitle() text += ("has noticed that the buildslave named %s went away\n" % self.slavename) text += "\n" text += ("It last disconnected at %s (buildmaster-local time)\n" % time.ctime(time.time() - self.missing_timeout)) # approx text += "\n" text += "The admin on record (as reported by BUILDSLAVE:info/admin)\n" text += "was '%s'.\n" % self.slave_status.getAdmin() text += "\n" text += "Sincerely,\n" text += " The Buildbot\n" text += " %s\n" % status.getTitleURL() text += "\n" text += "%s\n" % status.getURLForThing(self.slave_status) subject = "Buildbot: buildslave %s was lost" % self.slavename return self._mail_missing_message(subject, text) def updateSlave(self): """Called to add or remove builders after the slave has connected. @return: a Deferred that indicates when an attached slave has accepted the new builders and/or released the old ones.""" if self.conn: return self.sendBuilderList() else: return defer.succeed(None) def updateSlaveStatus(self, buildStarted=None, buildFinished=None): # TODO pass @defer.inlineCallbacks def attached(self, conn): """This is called when the slave connects.""" metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", 1) # now we go through a sequence of calls, gathering information, then # tell the Botmaster that it can finally give this slave to all the # Builders that care about it. # Reset graceful shutdown status self.slave_status.setGraceful(False) # We want to know when the graceful shutdown flag changes self.slave_status.addGracefulWatcher(self._gracefulChanged) self.conn = conn self._old_builder_list = None # clear builder list before proceed self.slave_status.addPauseWatcher(self._pauseChanged) self.slave_status.setConnected(True) self._applySlaveInfo(conn.info) self.slave_commands = conn.info.get("slave_commands", {}) self.slave_environ = conn.info.get("environ", {}) self.slave_basedir = conn.info.get("basedir", None) self.slave_system = conn.info.get("system", None) self.conn.notifyOnDisconnect(self.detached) slaveinfo = { 'admin': conn.info.get('admin'), 'host': conn.info.get('host'), 'access_uri': conn.info.get('access_uri'), 'version': conn.info.get('version') } yield self.master.data.updates.buildslaveConnected( buildslaveid=self.buildslaveid, masterid=self.master.masterid, slaveinfo=slaveinfo) if self.slave_system == "nt": self.path_module = namedModule("ntpath") else: # most everything accepts / as separator, so posix should be a # reasonable fallback self.path_module = namedModule("posixpath") log.msg("bot attached") self.messageReceivedFromSlave() self.stopMissingTimer() self.master.status.slaveConnected(self.slavename) yield self.updateSlave() yield self.botmaster.maybeStartBuildsForSlave(self.slavename) def messageReceivedFromSlave(self): now = time.time() self.lastMessageReceived = now self.slave_status.setLastMessageReceived(now) @defer.inlineCallbacks def detached(self): metrics.MetricCountEvent.log("AbstractBuildSlave.attached_slaves", -1) self.conn = None self._old_builder_list = [] self.slave_status.removeGracefulWatcher(self._gracefulChanged) self.slave_status.removePauseWatcher(self._pauseChanged) self.slave_status.setConnected(False) log.msg("BuildSlave.detached(%s)" % self.slavename) self.master.status.slaveDisconnected(self.slavename) self.releaseLocks() yield self.master.data.updates.buildslaveDisconnected( buildslaveid=self.buildslaveid, masterid=self.master.masterid, ) def disconnect(self): """Forcibly disconnect the slave. This severs the TCP connection and returns a Deferred that will fire (with None) when the connection is probably gone. If the slave is still alive, they will probably try to reconnect again in a moment. This is called in two circumstances. The first is when a slave is removed from the config file. In this case, when they try to reconnect, they will be rejected as an unknown slave. The second is when we wind up with two connections for the same slave, in which case we disconnect the older connection. """ if self.conn is None: return defer.succeed(None) log.msg("disconnecting old slave %s now" % self.slavename) # When this Deferred fires, we'll be ready to accept the new slave return self._disconnect(self.conn) def _disconnect(self, conn): # all kinds of teardown will happen as a result of # loseConnection(), but it happens after a reactor iteration or # two. Hook the actual disconnect so we can know when it is safe # to connect the new slave. We have to wait one additional # iteration (with callLater(0)) to make sure the *other* # notifyOnDisconnect handlers have had a chance to run. d = defer.Deferred() # notifyOnDisconnect runs the callback def _disconnected(): eventually(d.callback, None) conn.notifyOnDisconnect(_disconnected) conn.loseConnection() log.msg("waiting for slave to finish disconnecting") return d def sendBuilderList(self): our_builders = self.botmaster.getBuildersForSlave(self.slavename) blist = [(b.name, b.config.slavebuilddir) for b in our_builders] if blist == self._old_builder_list: return defer.succeed(None) d = self.conn.remoteSetBuilderList(builders=blist) def sentBuilderList(ign): self._old_builder_list = blist return ign d.addCallback(sentBuilderList) return d def shutdownRequested(self): log.msg("slave %s wants to shut down" % self.slavename) self.slave_status.setGraceful(True) def addSlaveBuilder(self, sb): self.slavebuilders[sb.builder_name] = sb def removeSlaveBuilder(self, sb): try: del self.slavebuilders[sb.builder_name] except KeyError: pass def buildFinished(self, sb): """This is called when a build on this slave is finished.""" self.botmaster.maybeStartBuildsForSlave(self.slavename) def canStartBuild(self): """ I am called when a build is requested to see if this buildslave can start a build. This function can be used to limit overall concurrency on the buildslave. Note for subclassers: if a slave can become willing to start a build without any action on that slave (for example, by a resource in use on another slave becoming available), then you must arrange for L{maybeStartBuildsForSlave} to be called at that time, or builds on this slave will not start. """ if self.slave_status.isPaused(): return False # If we're waiting to shutdown gracefully, then we shouldn't # accept any new jobs. if self.slave_status.getGraceful(): return False if self.max_builds: active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if len(active_builders) >= self.max_builds: return False if not self.locksAvailable(): return False return True def _mail_missing_message(self, subject, text): # first, see if we have a MailNotifier we can use. This gives us a # fromaddr and a relayhost. buildmaster = self.botmaster.master for st in buildmaster.status: if isinstance(st, MailNotifier): break else: # if not, they get a default MailNotifier, which always uses SMTP # to localhost and uses a dummy fromaddr of "buildbot". log.msg("buildslave-missing msg using default MailNotifier") st = MailNotifier("buildbot") # now construct the mail m = Message() m.set_payload(text) m['Date'] = formatdate(localtime=True) m['Subject'] = subject m['From'] = st.fromaddr recipients = self.notify_on_missing m['To'] = ", ".join(recipients) d = st.sendMessage(m, recipients) # return the Deferred for testing purposes return d def _gracefulChanged(self, graceful): """This is called when our graceful shutdown setting changes""" self.maybeShutdown() @defer.inlineCallbacks def shutdown(self): """Shutdown the slave""" if not self.conn: log.msg("no remote; slave is already shut down") return yield self.conn.remoteShutdown() def maybeShutdown(self): """Shut down this slave if it has been asked to shut down gracefully, and has no active builders.""" if not self.slave_status.getGraceful(): return active_builders = [ sb for sb in self.slavebuilders.values() if sb.isBusy() ] if active_builders: return d = self.shutdown() d.addErrback(log.err, 'error while shutting down slave') def _pauseChanged(self, paused): if paused is True: self.botmaster.master.status.slavePaused(self.slavename) else: self.botmaster.master.status.slaveUnpaused(self.slavename) def pause(self): """Stop running new builds on the slave.""" self.slave_status.setPaused(True) def unpause(self): """Restart running new builds on the slave.""" self.slave_status.setPaused(False) self.botmaster.maybeStartBuildsForSlave(self.slavename) def isPaused(self): return self.slave_status.isPaused()
def getSlaveStatus(slave): slave_status = SlaveStatus(slave) slave_status.master = self.master return slave_status
def __init__(self, name, password, max_builds=None, notify_on_missing=[], missing_timeout=3600, properties={}, locks=None, keepalive_interval=3600, friendlyName=None, os=None, eid=-1, fqdn=None): """ @param name: botname this machine will supply when it connects @param password: password this machine will supply when it connects @param max_builds: maximum number of simultaneous builds that will be run concurrently on this buildslave (the default is None for no limit) @param properties: properties that will be applied to builds run on this slave @type properties: dictionary @param locks: A list of locks that must be acquired before this slave can be used @type locks: dictionary @param fqdn: The fully qualified domain name (eg: slave1.unity.com) of the agent @type fqdn: string """ service.MultiService.__init__(self) self.slavename = name self.password = password self.friendly_name = friendlyName self.eid = eid # External ID self.fqdn = fqdn # Slave's full domain name if self.friendly_name is None: self.friendly_name = name # PB registration self.registration = None self.registered_port = None # these are set when the service is started, and unset when it is # stopped self.botmaster = None self.master = None self.slave_status = SlaveStatus(name) self.slave_status.setFriendlyName(self.friendly_name) self.slave_status.eid = eid self.slave_status.fqdn = fqdn self.slave = None # a RemoteReference to the Bot, when connected self.slave_commands = None self.slavebuilders = {} self.max_builds = max_builds self.access = [] if locks: self.access = locks self.lock_subscriptions = [] self.properties = Properties() self.properties.update(properties, "BuildSlave") self.properties.setProperty("slavename", name, "BuildSlave") self.lastMessageReceived = 0 if isinstance(notify_on_missing, str): notify_on_missing = [notify_on_missing] self.notify_on_missing = notify_on_missing for i in notify_on_missing: if not isinstance(i, str): config.error('notify_on_missing arg %r is not a string' % (i, )) self.missing_timeout = missing_timeout self.missing_timer = None self.keepalive_interval = keepalive_interval self.detached_subs = None self._old_builder_list = None self.os = os