def _try_read(fd, n): # using djb's suggested way of doing non-blocking reads from a blocking # socket: http://cr.yp.to/unix/nonblock.html # We can't just make the socket non-blocking, because we want to be # compatible with GNU Make, and they can't handle it. r,w,x = select.select([fd], [], [], 0) if not r: return '' # try again # ok, the socket is readable - but some other process might get there # first. We have to set an alarm() in case our read() gets stuck. assert(state.is_flushed()) oldh = signal.signal(signal.SIGALRM, _timeout) try: signal.alarm(1) # emergency fallback try: b = os.read(_fds[0], 1) except OSError, e: if e.errno in (errno.EAGAIN, errno.EINTR): # interrupted or it was nonblocking return '' # try again else: raise finally: signal.alarm(0) signal.signal(signal.SIGALRM, oldh) return b and b or None # None means EOF
def get_token(reason): assert(state.is_flushed()) global _mytokens assert(_mytokens <= 1) setup(1) while 1: if _mytokens >= 1: _debug("_mytokens is %d\n" % _mytokens) assert(_mytokens == 1) _debug('(%r) used my own token...\n' % reason) break assert(_mytokens < 1) _debug('(%r) waiting for tokens...\n' % reason) wait(want_token=1) if _mytokens >= 1: break assert(_mytokens < 1) if _fds: b = _try_read(_fds[0], 1) if b == None: raise Exception('unexpected EOF on token read') if b: _mytokens += 1 _debug('(%r) got a token (%r).\n' % (reason, b)) break assert(_mytokens <= 1)
def start_job(reason, jobfunc, donefunc): assert(state.is_flushed()) global _mytokens assert(_mytokens <= 1) get_token(reason) assert(_mytokens >= 1) assert(_mytokens == 1) _mytokens -= 1 r,w = _make_pipe(50) pid = os.fork() if pid == 0: # child os.close(r) rv = 201 try: try: rv = jobfunc() or 0 _debug('jobfunc completed (%r, %r)\n' % (jobfunc,rv)) except Exception: import traceback traceback.print_exc() finally: _debug('exit: %d\n' % rv) os._exit(rv) close_on_exec(r, True) os.close(w) pd = Job(reason, pid, donefunc) _waitfds[r] = pd
def start_job(reason, jobfunc, donefunc): assert state.is_flushed() global _mytokens assert _mytokens <= 1 assert _mytokens == 1 # Subprocesses always start with 1 token, so we have to destroy ours # in order for the universe to stay in balance. _destroy_tokens(1) r, w = _make_pipe(50) pid = os.fork() if pid == 0: # child os.close(r) rv = 201 try: try: rv = jobfunc() or 0 _debug('jobfunc completed (%r, %r)\n' % (jobfunc, rv)) except Exception: import traceback traceback.print_exc() finally: _debug('exit: %d\n' % rv) os._exit(rv) close_on_exec(r, True) os.close(w) pd = Job(reason, pid, donefunc) _waitfds[r] = pd
def wait(want_token): rfds = _waitfds.keys() if _fds and want_token: rfds.append(_fds[0]) assert(rfds) assert(state.is_flushed()) r,w,x = select.select(rfds, [], []) _debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r)) for fd in r: if _fds and fd == _fds[0]: pass else: pd = _waitfds[fd] _debug("done: %r\n" % pd.name) _release(1) os.close(fd) del _waitfds[fd] rv = os.waitpid(pd.pid, 0) assert(rv[0] == pd.pid) _debug("done1: rv=%r\n" % (rv,)) rv = rv[1] if os.WIFEXITED(rv): pd.rv = os.WEXITSTATUS(rv) else: pd.rv = -os.WTERMSIG(rv) _debug("done2: rv=%d\n" % pd.rv) pd.donefunc(pd.name, pd.rv)
def ensure_token(reason, max_delay=None): global _mytokens assert state.is_flushed() assert _mytokens <= 1 while 1: if _mytokens >= 1: _debug("_mytokens is %d\n" % _mytokens) assert _mytokens == 1 _debug('(%r) used my own token...\n' % reason) break assert _mytokens < 1 _debug('(%r) waiting for tokens...\n' % reason) _wait(want_token=1, max_delay=max_delay) if _mytokens >= 1: break assert _mytokens < 1 b = _try_read(_tokenfds[0], 1) _debug('GOT tokenfd\n') if b == '': raise Exception('unexpected EOF on token read') if b: _mytokens += 1 _debug('(%r) got a token (%r).\n' % (reason, b)) break if max_delay != None: break assert _mytokens <= 1
def force_return_tokens(): n = len(_waitfds) if n: _debug('%d tokens left in force_return_tokens\n' % n) _debug('returning %d tokens\n' % n) for k in _waitfds.keys(): del _waitfds[k] if _fds: _release(n) assert(state.is_flushed())
def force_return_tokens(): n = len(_waitfds) _debug('%d,%d -> %d jobs left in force_return_tokens\n' % (_mytokens, _cheats, n)) for k in _waitfds.keys(): del _waitfds[k] _create_tokens(n) if has_token(): _release_except_mine() assert _mytokens == 1, 'mytokens=%d' % _mytokens assert _cheats <= _mytokens, 'mytokens=%d cheats=%d' % (_mytokens, _cheats) assert _cheats in (0, 1), 'cheats=%d' % _cheats if _cheats: _debug('%d,%d -> force_return_tokens: recovering final token\n' % (_mytokens, _cheats)) _destroy_tokens(_cheats) os.write(_cheatfds[1], 't' * _cheats) assert state.is_flushed()
def wait_all(): _debug("wait_all\n") assert(state.is_flushed()) while running(): while _mytokens >= 1: release_mine() _debug("wait_all: wait()\n") wait(want_token=0) _debug("wait_all: empty list\n") get_token('self') # get my token back if _toplevel: bb = '' while 1: b = _try_read(_fds[0], 8192) bb += b if not b: break if len(bb) != _toplevel-1: raise Exception('on exit: expected %d tokens; found only %r' % (_toplevel-1, len(bb))) os.write(_fds[1], bb)
def _wait(want_token, max_delay): rfds = _waitfds.keys() if want_token: rfds.append(_tokenfds[0]) assert rfds assert state.is_flushed() r, w, x = select.select(rfds, [], [], max_delay) _debug('_tokenfds=%r; wfds=%r; readable: %r\n' % (_tokenfds, _waitfds, r)) for fd in r: if fd == _tokenfds[0]: pass else: pd = _waitfds[fd] _debug("done: %r\n" % pd.name) # redo subprocesses are expected to die without releasing their # tokens, so things are less likely to get confused if they # die abnormally. That means a token has 'disappeared' and we # now need to recreate it. b = _try_read(_cheatfds[0], 1) _debug('GOT cheatfd\n') if b == None: _create_tokens(1) if has_token(): _release_except_mine() else: # someone exited with _cheats > 0, so we need to compensate # by *not* re-creating a token now. pass os.close(fd) del _waitfds[fd] rv = os.waitpid(pd.pid, 0) assert rv[0] == pd.pid _debug("done1: rv=%r\n" % (rv, )) rv = rv[1] if os.WIFEXITED(rv): pd.rv = os.WEXITSTATUS(rv) else: pd.rv = -os.WTERMSIG(rv) _debug("done2: rv=%d\n" % pd.rv) pd.donefunc(pd.name, pd.rv)
def _do_subproc(self): # careful: REDO_PWD was the PWD relative to the STARTPATH at the time # we *started* building the current target; but that target ran # redo-ifchange, and it might have done it from a different directory # than we started it in. So os.getcwd() might be != REDO_PWD right # now. assert (state.is_flushed()) dn = self.dodir newp = os.path.realpath(dn) os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR) os.environ['REDO_TARGET'] = self.basename + self.ext os.environ['REDO_DEPTH'] = vars.DEPTH + ' ' vars.add_lock(str(self.lock.fid)) if dn: os.chdir(dn) os.dup2(self.f.fileno(), 1) os.close(self.f.fileno()) close_on_exec(1, False) signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv)) os.execvp(self.argv[0], self.argv) assert (0)
def wait_all(): _debug("%d,%d -> wait_all\n" % (_mytokens, _cheats)) assert state.is_flushed() while 1: while _mytokens >= 1: release_mine() if not running(): break _debug("wait_all: wait()\n") _wait(want_token=0, max_delay=None) _debug("wait_all: empty list\n") if _toplevel: # If we're the toplevel and we're sure no child processes remain, # then we know we're totally idle. Self-test to ensure no tokens # mysteriously got created/destroyed. tokens = _try_read_all(_tokenfds[0], 8192) cheats = _try_read_all(_cheatfds[0], 8192) _debug('toplevel: GOT %d tokens and %d cheats\n' % (len(tokens), len(cheats))) if len(tokens) - len(cheats) != _toplevel: raise Exception('on exit: expected %d tokens; found %r-%r' % (_toplevel, len(tokens), len(cheats))) os.write(_tokenfds[1], tokens)
pretty=opt.pretty, color=opt.color, debug_locks=opt.debug_locks, debug_pids=opt.debug_pids) for t in targets: if os.path.exists(t): f = state.File(name=t) if not f.is_generated: warn('%s: exists and not marked as generated; not redoing.\n' % f.nicename()) state.rollback() j = atoi(opt.jobs or 1) if j < 1 or j > 1000: err('invalid --jobs value: %r\n' % opt.jobs) jwack.setup(j) try: assert(state.is_flushed()) retcode = builder.main(targets, lambda t: (True, True)) assert(state.is_flushed()) finally: try: state.rollback() finally: try: jwack.force_return_tokens() except Exception, e: traceback.print_exc(100, sys.stderr) err('unexpected error: %r\n' % e) retcode = 1 if vars_init.is_toplevel: builder.await_log_reader() sys.exit(retcode)
def main(targets, shouldbuildfunc): retcode = [0] # a list so that it can be reassigned from done() if vars.SHUFFLE: import random random.shuffle(targets) locked = [] def done(t, rv): if rv: retcode[0] = 1 # In the first cycle, we just build as much as we can without worrying # about any lock contention. If someone else has it locked, we move on. seen = {} lock = None for t in targets: if not t: err('cannot build the empty target ("").\n') retcode[0] = 204 break assert (state.is_flushed()) if t in seen: continue seen[t] = 1 if not jwack.has_token(): state.commit() jwack.get_token(t) if retcode[0] and not vars.KEEP_GOING: break if not state.check_sane(): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break f = state.File(name=t) lock = state.Lock(f.id) if vars.UNLOCKED: lock.owned = True else: lock.trylock() if not lock.owned: if vars.DEBUG_LOCKS: log('%s (locked...)\n' % _nice(t)) locked.append((f.id, t)) else: # We had to create f before we had a lock, because we need f.id # to make the lock. But someone may have updated the state # between then and now. # FIXME: separate obtaining the fid from creating the File. # FIXME: maybe integrate locking into the File object? f.refresh() BuildJob(t, f, lock, shouldbuildfunc, done).start() state.commit() assert (state.is_flushed()) lock = None del lock # Now we've built all the "easy" ones. Go back and just wait on the # remaining ones one by one. There's no reason to do it any more # efficiently, because if these targets were previously locked, that # means someone else was building them; thus, we probably won't need to # do anything. The only exception is if we're invoked as redo instead # of redo-ifchange; then we have to redo it even if someone else already # did. But that should be rare. while locked or jwack.running(): state.commit() jwack.wait_all() # at this point, we don't have any children holding any tokens, so # it's okay to block below. if retcode[0] and not vars.KEEP_GOING: break if locked: if not state.check_sane(): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break fid, t = locked.pop(0) lock = state.Lock(fid) backoff = 0.01 lock.trylock() while not lock.owned: # Don't spin with 100% CPU while we fight for the lock. import random time.sleep(random.random() * min(backoff, 1.0)) backoff *= 2 if vars.DEBUG_LOCKS: warn('%s (WAITING)\n' % _nice(t)) # this sequence looks a little silly, but the idea is to # give up our personal token while we wait for the lock to # be released; but we should never run get_token() while # holding a lock, or we could cause deadlocks. jwack.release_mine() try: lock.waitlock() except state.CyclicDependencyError: err('cyclic dependency while building %s\n' % _nice(t)) jwack.get_token(t) retcode[0] = 208 return retcode[0] lock.unlock() jwack.get_token(t) lock.trylock() assert (lock.owned) if vars.DEBUG_LOCKS: log('%s (...unlocked!)\n' % _nice(t)) if state.File(name=t).is_failed(): err('%s: failed in another thread\n' % _nice(t)) retcode[0] = 2 lock.unlock() else: BuildJob(t, state.File(id=fid), lock, shouldbuildfunc, done).start() lock = None state.commit() return retcode[0]