def __init__(self, **kwargs): super().__init__() self._tslam = kwargs['tslam'] # Argument parsing ---------------------------------------------------- {{{ argp = argparse.ArgumentParser() self._init_add_args(argp) self._init_handle_args(argp.parse_args(kwargs['cliargs'])) # --------------------------------------------------------------------- }}} # Power of two, greater than page log self._bucklog = 16 self._pagelog = 12 self._maxbix = 1 # Next never-touched bucket index (AHWM) self._szbix2ap = {} # BUMP allocation pointer by size and bix self._bix2szbm = {} # BUMP and WAIT buckets' size and bitmaps self._njunkb = 0 # Number of buckets in JUNK state self._nbwb = 0 # Number of buckets in BUMP|WAIT states self._bix2state = IntervalMap(self._maxbix, 2**(64 - self._bucklog) - self._maxbix, BuckSt.AHWM) self._brscache = None # Biggest revokable span cache self._junklru = dllist() # List of all revokable spans, LRU self._junkbdn = {} # JUNK bix to node in above list
class Allocator (RenamingAllocatorBase): __slots__ = ('_maxeva', '_eva2sz', '_state') def __init__(self, **kwargs): super().__init__() self._eva2sz = {} self._maxeva = 0 self._state = IntervalMap(4096, 2**64, False) def _alloc(self, event, sz): # Impose a minimum size on all allocations, so that, in particular, # zero-size allocations are still distinct entities, as required by # POSIX. if sz < 4 : sz = 4 res = self._maxeva self._maxeva += sz self._eva2sz[res] = sz self._state.mark(res, sz, True) return res def _free(self, event, eva): self._state.mark(eva, self._eva2sz[eva], False) del self._eva2sz[eva] def _try_realloc(self, event, oeva, nsz): return False
def __init__(self, **kwargs): super().__init__() self._tslam = kwargs['tslam'] self._paranoia = 0 # Argument parsing ---------------------------------------------------- {{{ argp = argparse.ArgumentParser() self._init_add_args(argp) self._init_handle_args(argp.parse_args(kwargs['cliargs'])) # --------------------------------------------------------------------- }}} self._pagelog = 12 self._basepg = 1 baseva = self._basepg * 2**self._pagelog self._brscache = None self._eva2sst = IntervalMap(baseva, 2**64 - baseva, SegSt.AHWM) self._eva2sz = {} self._evp2pst = IntervalMap(self._basepg, 2**(64 - self._pagelog) - self._basepg, PageSt.UMAP) self._junklru = dllist() self._junkadn = {} self._njunk = 0 self._nmapped = 0 self._npend = 0 self._nwait = 0 self._tidylst = SegFreeList(extcoal=self._sfl_coalesce) self._wildern = baseva
def __init__(self, *args, **kwargs): super(__class__, self).__init__(*args, **kwargs) self._mtags = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), 0) self._prefs = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), None) self._tlpf = {} self._tlpf[None] = SegFreeList()
def __init__(self, **kwargs) : super().__init__(**kwargs) # XXX self._revoke_k = 8 self._free = self._free_unsafe self._prefs = IntervalMap ( self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), None ) self._tlpf = {} self._tlpf[None] = SegFreeList()
def __init__(self, colour_count, sweeping_revoker): self._sweeping_revoker = sweeping_revoker self._ccount = colour_count self._addr_ivals_c = IntervalMap.from_valued_interval_domain(AddrIval(0, 2**64, 0))
def __init__(self): super().__init__(calc_total_for_state=AddrIvalState.ALLOCD) bkg_ival = AddrIval(0, 2**64, None) self.__addr_ivals = IntervalMap.from_valued_interval_domain(bkg_ival, coalescing=False) self._realloc_stubs = IntervalMap.from_valued_interval_domain(bkg_ival)
def __init__(self, *, calc_total_for_state): super().__init__() self.__addr_ivals = IntervalMap.from_valued_interval_domain(AddrIval(0, 2**64, None)) self._total = 0 self._calc_total_for_state = calc_total_for_state
def __init__(self): bkg_ival = AddrIval(0, 2**64, None) self._addr_ivals = \ IntervalMap.from_valued_interval_domain(bkg_ival, coalescing=False)
class ClingyAllocatorBase(RenamingAllocatorBase): # Initialization ------------------------------------------------------ {{{ __slots__ = ('_bix2state', '_bix2szbm', '_brscache', '_bucklog', '_junklru', '_junkbdn', '_maxbix', '_njunkb', '_nbwb', '_pagelog', '_paranoia', '_revoke_k', '_szbix2ap', '_tslam') __metaclass__ = ABCMeta # Argument definition and response ------------------------------------ {{{ @staticmethod def _init_add_args(argp): argp.add_argument( '--realloc', action='store', type=str, default="always", choices=['always', 'yes', 'onlyshrink', 'never', 'no']) argp.add_argument('--paranoia', action='store', type=int, default=0) argp.add_argument('--revoke-k', action='store', type=int, default=1) argp.add_argument('--render-style', action='store', type=str, default="compact", choices=['compact', 'expand16']) def _init_handle_args(self, args): self._paranoia = args.paranoia if self._paranoia == 0 and __debug__: logging.warn("Assertions still enabled, even with paranoia 0; " "try python -O") if self._paranoia != 0 and not __debug__: raise ValueError("Paranoia without assertions will just be slow") assert args.revoke_k > 0 self._revoke_k = args.revoke_k if args.realloc == "never" or args.realloc == "no": self._try_realloc = self._try_realloc_never elif args.realloc == "onlyshrink": self._try_realloc = self._try_realloc_onlyshrink else: self._try_realloc = self._try_realloc_yes if args.render_style == "expand16": self.render = self._render_expanded # --------------------------------------------------------------------- }}} def __init__(self, **kwargs): super().__init__() self._tslam = kwargs['tslam'] # Argument parsing ---------------------------------------------------- {{{ argp = argparse.ArgumentParser() self._init_add_args(argp) self._init_handle_args(argp.parse_args(kwargs['cliargs'])) # --------------------------------------------------------------------- }}} # Power of two, greater than page log self._bucklog = 16 self._pagelog = 12 self._maxbix = 1 # Next never-touched bucket index (AHWM) self._szbix2ap = {} # BUMP allocation pointer by size and bix self._bix2szbm = {} # BUMP and WAIT buckets' size and bitmaps self._njunkb = 0 # Number of buckets in JUNK state self._nbwb = 0 # Number of buckets in BUMP|WAIT states self._bix2state = IntervalMap(self._maxbix, 2**(64 - self._bucklog) - self._maxbix, BuckSt.AHWM) self._brscache = None # Biggest revokable span cache self._junklru = dllist() # List of all revokable spans, LRU self._junkbdn = {} # JUNK bix to node in above list # --------------------------------------------------------------------- }}} # Size-related utility functions -------------------------------------- {{{ def _issmall(self, sz): return sz <= 2**(self._bucklog - 1) # Find the right size bucket for a small request. Starting from 16, we # divide the gap between successive powers of two into four regions and map # objects into the smallest one larger than their size. The size sequence, # specifically, begins 16 20 24 28 32 40 48 56 64 80 96 112 128 . We # consider only objects smaller than half a bucket (i.e. 2**bucklog bytes) # to be "small"; this is captured by _issmall(), above. def _szfix(self, sz): assert self._issmall(sz) if sz <= 16: return 16 # XXX # At 1/4 linear separation between successive powers of two, we # are only guaranteed 16/4 = 4 byte alignment of objects. If we # really want to get down to it, we could try doing something more # clever here or we could enforce that we always allocate objects # with size max(requested_size, alignment*4). bl = sz.bit_length() - 1 fl = 1 << bl d = sz - fl if d == 0: return sz cl = fl << 1 assert fl <= sz < cl if d <= (fl >> 1): if d <= (fl >> 2): return fl + (fl >> 2) else: return fl + (fl >> 1) elif d <= 3 * (fl >> 2): return fl + 3 * (fl >> 2) return cl def _maxoix(self, sz): return int((2**self._bucklog) / self._szfix(sz)) def _bix2va(self, bix): return bix << self._bucklog def _va2bix(self, va): return va >> self._bucklog def _sz2nbucks(self, sz): return int((sz + 2**self._bucklog - 1) >> self._bucklog) def _nbucks2sz(self, bs): return bs << self._bucklog # --------------------------------------------------------------------- }}} # Additional state assertions and diagnostics ------------------------- {{{ def _state_diag(self): return (self._bix2szbm, self._szbix2ap, [x for x in self._bix2state]) def _state_asserts(self): # if __debug__ : logging.debug("%r %r %r", self._bix2szbm, self._szbix2ap, [x for x in self._bix2state]) # Ensure that our _maxbix looks like the HWM (mbase, msz, mv) = self._bix2state[self._maxbix] assert mbase + msz == 2**(64 - self._bucklog), ("maxbix not max", self._maxbix, mbase, msz, mv) assert mv == BuckSt.AHWM, ("maxbix not AHWM", self._maxbix, mbase, msz, mv) (njunk, ntidy) = (0, 0) for (qb, qsz, qv) in self._bix2state: if qv == BuckSt.JUNK: # Ensure presence on linked list assert self._junkbdn.get(qb, None) is not None, "JUNK not on LRU" # Account njunk += qsz elif qv == BuckSt.TIDY: # Account ntidy += qsz elif qv == BuckSt.BUMP: # Ensure that BUMP states are backed in dictionaries for bc in range(qb, qb + qsz): (bsz, _) = self._bix2szbm[bc] assert self._szbix2ap.get(bsz) is not None, \ ("BUMP miss sz", bc, bsz, self._state_diag()) assert self._szbix2ap[bsz][bc] is not None, \ ("BUMP miss ix", bc, bsz, self._state_diag()) elif qv == BuckSt.WAIT: # Same for WAIT states. Not all WAIT-state buckets are # necessarily indexed, tho', so we have to be somewhat careful bc = qb bce = qb + qsz while bc < bce: assert self._bix2szbm.get(bc) is not None, \ ("B/W miss", bc, self._state_diag()) (bsz, _) = self._bix2szbm[bc] bc += self._sz2nbucks(bsz) # Check that our running sum of JUNK pages is correct assert self._njunkb == njunk, "JUNK accounting botch" nbw = 0 for b in self._bix2szbm: # All busy buckets are marked as such? (_, _, v) = self._bix2state[b] assert v in [BuckSt.BUMP, BuckSt.WAIT], ("B/W botch", bc, v, \ self._state_diag()) # Account nbw += self._sz2nbucks(self._bix2szbm[b][0]) assert self._nbwb == nbw, \ ("BUMP|WAIT accounting botch", nbw, self._nbwb, self._bix2szbm.keys(), [x for x in self._bix2state]) # Everything adds up, right? # non-AHWM JUNK BUMP|WAIT TIDY assert self._maxbix == self._njunkb + self._nbwb + ntidy, \ ("General accounting botch", self._maxbix, self._njunkb, self._bix2szbm.keys(), [x for x in self._bix2state]) # Every currently-active BUMP bucket is tagged as such, yes? for sz in self._szbix2ap: for bc in self._szbix2ap[sz]: (_, _, v) = self._bix2state[bc] assert v == BuckSt.BUMP, ("BUMP botch", bc, v, self._state_diag()) # Ensure that JUNK list entries are so stated for (jb, jsz) in self._junklru: (qb, qsz, qv) = self._bix2state[jb] assert qv == BuckSt.JUNK, "LRU not JUNK" assert qb == jb and qsz == jsz, "LRU JUNK segment botch" # --------------------------------------------------------------------- }}} # Revocation logic ---------------------------------------------------- {{{ # An actual implementation would maintain a prioqueue or something; # we can get away with a linear scan. We interrogate the bucket state # interval map for ease of coalescing, even though we also maintain a # parallel JUNK LRU queue. def _find_largest_revokable_spans(self, n=1): if n == 0: return if n == 1 and self._brscache is not None: return [self._brscache] bests = [(0, -1, -1)] # [(njunk, bix, sz)] in ascending order for (qbase, qsz, qv) in self._bix2state.iter_vfilter(None, self._maxbix, st_tj): # Smaller or busy spans don't interest us if qsz <= bests[0][0]: continue # Reject spans that are entirely TIDY already. js = [ sz for (_, sz, v) in self._bix2state[qbase:qbase + qsz] if v == BuckSt.JUNK ] if js == []: continue # Sort spans by number of JUNK buckets, not JUNK|TIDY buckets nj = sum(js) if nj <= bests[0][0]: continue insort(bests, (nj, qbase, qsz)) bests = bests[(-n):] return bests def _mark_tidy(self, bix, sz, nj): # Because we coalesce with TIDY spans while revoking, there may be # several JUNK spans in here. Go remove all of them from the LRU. for (qbix, qsz, qv) in self._bix2state[bix:bix + sz]: assert qv in st_atj, "Revoking non-revokable span" if qv == BuckSt.JUNK: self._junklru.remove(self._junkbdn.pop(qbix)) self._njunkb -= nj self._bix2state.mark(bix, sz, BuckSt.TIDY) self._brscache = None def _do_revoke(self, ss): if self._paranoia > PARANOIA_STATE_ON_REVOKE: self._state_asserts() nrev = sum([nj for (nj, _, _) in ss if nj > 0]) ntidy = self._maxbix - self._njunkb - self._nbwb print("Revoking: ts=%.2f hwm=%d busy=%d junk=%d tidy=%d rev=%d rev/hwm=%2.2f%% rev/junk=%2.2f%% ss=%r" \ % (self._tslam() / 1e9, self._maxbix, self._nbwb, self._njunkb, ntidy, nrev, nrev/self._maxbix * 100, nrev/self._njunkb * 100, ss), file=sys.stderr) for (nj, bix, sz) in ss: self._mark_tidy(bix, sz, nj) self._publish( 'revoked', "---", "", *((self._bix2va(bix), self._bix2va(bix + sz)) for (_, bix, sz) in ss)) # Conditionally revokes the top n segments if the predicate, which is # given the number of junk buckets in the largest span, says to. # # If given a "revoke" paramter, it must be an iterable of # bases of junk spans which will be guaranteed to be revoked, even if they # are not the largest spans known. This may be used to force some degree # of reuse of small spans, as suggested by Hongyan. def _predicated_revoke_best(self, fn, n=None, revoke=[]): revoke = list(revoke) assert len(revoke) <= self._revoke_k if n is None: n = self._revoke_k nrev = None brss = None if self._brscache is not None: # If the best revocable span is cached, just exract the answer (nrev, _, _) = self._brscache else: # Otherwise, answer is not cached, so go compute it now. # Compute one more so we can update the cache immediately. brss = self._find_largest_revokable_spans(n=n + 1) self._brscache = brss[-1] nrev = self._brscache[0] if fn(nrev): # Revoking the top k spans means that the (k+1)th span is # certainly the most productive, in terms of the number of JUNK # buckets it contains. Immediately update the cache to avoid # needing another sweep later. if brss is None: brss = self._find_largest_revokable_spans(n=n + 1) assert brss[-1][0] == nrev, \ ("Incorrect accounting in cache?", brss, nrev, self._brscache) # For each mandatory span, fish through the best spans to see if one # contains it. If so, let's revoke the container rather than the # containee. rset = set() for mustbix in revoke: for (brnj, brix, brsz) in brss: if brix <= mustbix < brix + brsz: rset.add((brnj, brix, brsz)) break else: # No container found; add the mandatory span, counting the number # of junk buckets in it. (qix, qsz, _) = self._bix2state.get(mustbix, coalesce_with_values=st_tj) rset.add( (sum(sz for (_, sz, v) in self._bix2state[qix:qix + qsz] if v == BuckSt.JUNK), qix, qsz)) # Now, go through the best spans until we have at most the number of # spans we can revoke in one go. Since we may have picked some of the # best spans while considering mandatory spans above, it's not as # simple as just concatenating a list, but it's still not terrible. while len(rset) < self._revoke_k and brss != []: rset.add(brss[-1]) brss = brss[:-1] self._do_revoke(rset) # Find the largest best span not used and update the cache while brss != []: if brss[-1] not in rset: break brss = brss[:-1] if brss != []: self._brscache = brss[-1] else: self._brscache = (0, -1, -1) @abstractmethod def _maybe_revoke(self): # By default, don't! # # A reasonable implementation of this function will look like a series # of checks about the state of the allocator (probing at the accounted # _njunkb and _nbwb and _maxbix) and then a call to # _predicated_revoke_best with an additional predicate, given the # number of buckets that can be reclaimed in the largest revokable # (JUNK|TIDY coalesced) span. I apologize for the interface, but # it seemed like a good balance between detailed accounting of the best # revokable span at all times or always needing to walk the intervalmap. # pass # --------------------------------------------------------------------- }}} # Allocation ---------------------------------------------------------- {{{ # Return the bucket index to use for a small placement of size `sz` and # made by call stack `stk`. Available options include the existing bump # buckets `bbks` or the TIDY/AHWM segments indicated in `tidys`. These # last two parameters are Python iterators, not lists, to speed up the # most common cases. `tidys` is an iterator of (index, length) tuples, # each indicating possibly multiple locations. @abstractmethod def _alloc_place_small(self, stk, sz, bbks, tidys): raise NotImplemented() # Some classes may be associating metadata with bump buckets. This # callback fires whenever a bump bucket fills, to indicate that no future # allocations will take place from that bucket and so the metadata can be # released. def _alloc_place_small_full(self, bbix): pass # Return the initial bucket index to use for a large allocation of `sz` # *buckets* (not bytes). `tidys` is, as with `_alloc_place_small`, an # iterator of (index, length) pairs. @abstractmethod def _alloc_place_large(self, stk, sz, tidys): raise NotImplemented() def _mark_allocated(self, reqbase, reqbsz, nst): if self._paranoia > PARANOIA_STATE_PER_OPER: assert nst in {BuckSt.BUMP, BuckSt.WAIT} (qbase, qsz, qv) = self._bix2state.get(reqbase, coalesce_with_values=st_at) assert qv in st_at, ("New allocated mark in bad state", qv) assert qbase + qsz >= reqbase + reqbsz, "New allocated undersized?" if reqbase > self._maxbix: # Allocation request leaving a gap; mark the skipped spans as TIDY # rather than leaving them as AHWM. # # While this might, technically, change the largest revokable span, # it will not change the number of JUNK buckets in any span, and so # we need not necessarily invalidate brscache. self._bix2state.mark(self._maxbix, reqbase - self._maxbix, BuckSt.TIDY) # If the allocation takes place within the current best revokable span, # invalidate the cache and let the revocation heuristic reconstruct it. if self._brscache is not None: (_, brsix, brssz) = self._brscache if brsix <= reqbase < brsix + brssz: self._brscache = None self._nbwb += reqbsz self._maxbix = max(self._maxbix, reqbase + reqbsz) self._bix2state.mark(reqbase, reqbsz, nst) def _alloc(self, stk, tid, sz): if __debug__: logging.debug(">_alloc sz=%d", sz) if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() # XXX should coalesce tidys = ((loc, tsz) for (loc, tsz, v) in self._bix2state \ if v in { BuckSt.TIDY , BuckSt.AHWM }) if self._issmall(sz): # Is small allocation # Is bump bucket available? nsz = self._szfix(sz) bbs = self._szbix2ap.get(nsz, {}) bbix = self._alloc_place_small(stk, sz, iter(bbs.keys()), tidys) if bbix not in bbs: self._publish('mapd', stk, tid, self._bix2va(bbix), self._bix2va(bbix + 1), 0b11) self._mark_allocated(bbix, 1, BuckSt.BUMP) self._bix2szbm[bbix] = (nsz, 0) if nsz not in self._szbix2ap: self._szbix2ap[nsz] = {} bbap = 0 else: bbap = bbs[bbix] if __debug__: # Some sanity-checking doesn't hurt, either. (bbsz, bbbm) = self._bix2szbm[bbix] assert bbsz == nsz, "Incorrect indexing of BUMP buckets" assert bbbm & ( 1 << bbap) == 0, "Attempting to BUMP into free object" bbap += 1 if bbap == self._maxoix(nsz): # out of room; can't bump this any more del self._szbix2ap[nsz][bbix] self._bix2state.mark(bbix, 1, BuckSt.WAIT) # Inform the placement policy that this one is no-go and won't be # coming back, so it can stop tracking metadata about it. self._alloc_place_small_full(bbix) else: assert bbap < self._maxoix( nsz), "Allocation pointer beyond maximum" # just revise allocation pointer self._szbix2ap[nsz][bbix] = bbap res = self._bix2va(bbix) + (bbap - 1) * nsz else: # Large allocation. # Placement bsz = self._sz2nbucks(sz) bbix = self._alloc_place_large(stk, bsz, tidys) if __debug__: (pbase, psz, pv) = self._bix2state.get(bbix) assert pbase + psz >= bbix + bsz, "Large placement botch" # Enroll in WAIT state and map pages self._mark_allocated(bbix, bsz, BuckSt.WAIT) self._bix2szbm[bbix] = (sz, 0) res = self._bix2va(bbix) self._publish('mapd', stk, tid, res, res + self._nbucks2sz(bsz), 0b11) nsz = self._nbucks2sz(bsz) if __debug__: logging.debug("<_alloc eva=%x", res) return (res, nsz) # --------------------------------------------------------------------- }}} # Free ---------------------------------------------------------------- {{{ # Allow for parametrizable behavior when a bucket becomes free. Should # return one of # None : leave the bucket considered allocated # True : mark the bucket as JUNK # False : mark the bucket as TIDY immediately def _on_bucket_free(self, bix, bsz): return True # Mark a (span of) bucket(s) JUNK. # # This may change the largest revocable span, so carry out a single probe # of the state intervalmap to see. Do not attempt to revise the cache # here, as that would require counting up the number of JUNK pages in the # span returned; just invalidate it and let the revocation heuristic # recompute it when needed. # # Junk spans are also tracked in a LRU cache; do the appropriate juggling # here. # # It may make sense to hook this method in subclasses, too, for further # metadata management, especially if we end up designing a "related # object" API extension: one may need to refer to metadata of objects # whose buckets have already gone from BUMP to BUSY, i.e., for which # _alloc_place_small_full() has already been called. def _mark_junk(self, bix, bsz): assert self._bix2state[bix][2] != BuckSt.JUNK, "re-marking JUNK" del self._bix2szbm[bix] self._bix2state.mark(bix, bsz, BuckSt.JUNK) self._njunkb += bsz self._nbwb -= bsz if self._brscache is not None: (brsnj, _, _) = self._brscache (_, qsz, _) = self._bix2state.get(bix, coalesce_with_values=st_tj) if qsz >= brsnj: self._brscache = None dll_im_coalesced_insert(bix, bsz, self._bix2state, self._junklru, self._junkbdn) def _free_bix(self, bix, bsz): r = self._on_bucket_free(bix, bsz) if r == True: self._mark_junk(bix, bsz) elif r == False: self._mark_tidy(bix, bsz, 0) elif r is None: pass else: assert False, "Invalid return from _on_free_bix: %r" % r def _free(self, stk, tid, eva): if __debug__: logging.debug(">_free eva=%x", eva) if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() # Look up existing allocation bix = self._va2bix(eva) b = self._bix2szbm[bix] # Sanity check state (spanbase, spansize, spanst) = (None, None, None) if __debug__: (spanbase, spansize, spanst) = self._bix2state.get(bix) assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \ ("Attempting to free in non-BUMP/WAIT bucket:", bix, spanst) (sz, bbm) = b if self._issmall(sz): # Small allocation. Set bit in bitmask. boff = eva - self._bix2va(bix) assert boff % sz == 0, "Nonzero phase in small bucket" bitix = int(boff / sz) bitm = 1 << bitix assert bbm & bitm == 0, "Free of bitmask-free object" bbm |= bitm if spanst == BuckSt.BUMP: bbs = self._szbix2ap[sz] assert bix in bbs, ("Free in BUMP state but not any BUMP bucket", \ sz, bix, bbix) bbap = bbs[bix] assert bitix < bbap, ("Free in BUMP bucket beyond alloc ptr", \ sz, bix, bitix, bbap) if bbm == (1 << self._maxoix(sz)) - 1: # All objects now free; move bucket state assert bix not in self._szbix2ap.get(sz, {}), \ ("Freeing bucket still registered as bump block", \ bix, sz, self._bix2szbm[bix], self._szbix2ap.get(sz)) assert spanst == BuckSt.WAIT, "Freeing bucket in non-WAIT state" self._free_bix(bix, 1) # XXX At the moment, we only unmap when the entire bucket is free. # This is just nwf being lazy and not wanting to do the bit math for # page-at-a-time release. self._publish('unmapd', stk, tid, self._bix2va(bix), self._bix2va(bix + 1)) else: # Just update self._bix2szbm[bix] = (sz, bbm) else: # Large allocation, retire all blocks to JUNK, UNMAP, and maybe revoke bsz = self._sz2nbucks(sz) assert spanst == BuckSt.WAIT, \ ("Freeing large span in incorrect state", sz, spanst, bix, b, self._state_diag()) assert spanbase <= bix and bix + bsz <= spanbase + spansize, \ "Mismatched bucket states of large allocation" self._free_bix(bix, bsz) self._publish('unmapd', stk, tid, self._bix2va(bix), self._bix2va(bix + bsz)) if __debug__: logging.debug("<_free eva=%x", eva) # --------------------------------------------------------------------- }}} # Reallocation -------------------------------------------------------- {{{ # Since we sometimes allocate a bit more than we need, our realloc is # potentially nontrivial. We're being a little sloppy if we do this, # tho', as we're reusing memory without revoking it. We consider this # acceptable, tho', because we presume that realloc does not change the # type of the object nor its effective lifetime, and so even if the object # temporarily shrinks and then expands, it's still the same object. # # If you're not convinced by the above, you're exactly the kind of person # that --realloc=onlyshrink or --realloc=none are for! # # NB: When making an object smaller, this does not and MUST NOT transition # the tail to JUNK state for pending reuse, because the capabilities we # have will have given out originally still span the whole region and so # could come to overlap later allocations (if this allocation is not # freed, so that they will not be subsets in the revocation test). A real # revoker may wish to scream loudly if it finds capabilities partially # overlapping the revocation region (subsuming capabilities are presumably # OK, as the allocator holds these). # def _try_realloc_yes(self, stk, tid, oeva, nsz): if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() # Find the size of the existing allocation bix = self._va2bix(oeva) b = self._bix2szbm[bix] # Sanity check state if __debug__: (spanbase, spansize, spanst) = self._bix2state.get(bix) assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \ "Attempting to realloc in non-BUMP/WAIT bucket" (osz, _) = b if nsz <= osz: if __debug__: logging.debug("<_try_realloc shrink eva=%x", oeva) # Shrinking is always fine, I suppose # Don't update the block size, even if it's not a bitmap bucket (which # can't be so updated anyway) and don't move anything to JUNK return True if self._issmall(osz): if __debug__: logging.debug("<_try_realloc small eva=%x", oeva) # Small allocation not growing by much. return self._issmall(nsz) and self._szfix(nsz) == osz # Unfortunately, even if the next small piece is free, it's not easy # to use it. While we could grow into it and immediately mark it free # (relying on the non-freeness of the current allocation to prevent # freeing of the bucket, though this becomes more complicated with # page-at-a-time unmapping), subsequent reallocations would not only # not be able to do this trick but also fail to copy the additional # data, which would be really bad, since the size is derived from the # bucket metadata. # Large allocation getting larger. If not much larger... if nsz <= self._nbucks2sz(self._sz2nbucks(osz)): if __debug__: logging.debug("<_try_realloc sm enlarging eva=%x (bix=%d) osz=%d nsz=%d %s", \ self._bix2va(bix), bix, osz, nsz, self._state_diag()) self._bix2szbm[bix] = (nsz, 0) return True # It might happen that we have enough free spans ahead of us that we can # just gobble them. eix = bix + self._sz2nbucks(osz) (nextbase, nextsize, nextst) = self._bix2state.get(eix) if nextst not in {BuckSt.TIDY, BuckSt.AHWM}: if __debug__: logging.debug("<_try_realloc up against %s at eva=%x", nextst, oeva) return False if nsz <= osz + self._nbucks2sz(nextsize): if __debug__: logging.debug("<_try_realloc enlarging eva=%x osz=%d nsz=%d", \ self._bix2va(bix), osz, nsz) self._bix2szbm[bix] = (nsz, 0) self._mark_allocated(eix, self._sz2nbucks(nsz - osz), BuckSt.WAIT) self._publish('mapd', stk, tid, self._nbucks2sz(bix + self._sz2nbucks(osz)), \ self._nbucks2sz(bix + self._sz2nbucks(nsz)), 0b11) return True return False # In-place allocations only if the resulting object is smaller (or no # bigger than size rounded up, in terms of bitmap buckets; we don't have # the original size to enforce a strict shrinkage policy); see caveat # above for why this does not transition any bytes to JUNK. def _try_realloc_onlyshrink(self, stk, tid, oeva, nsz): if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() # Find the size of the existing allocation bix = self._va2bix(oeva) b = self._bix2szbm[bix] # Sanity check state if __debug__: (spanbase, spansize, spanst) = self._bix2state.get(bix) assert (spanst == BuckSt.BUMP) or (spanst == BuckSt.WAIT), \ "Attempting to realloc in non-BUMP/WAIT bucket" (osz, _) = b return nsz <= osz def _try_realloc_never(self, stk, tid, oeva, nsz): # Don't bother with PARANOIA_STATE_PER_OPER since we're just going to # call down anyway return False # --------------------------------------------------------------------- }}} # Rendering ----------------------------------------------------------- {{{ def occshade(self, szbm): (sz, bm) = szbm maxoix = self._maxoix(sz) pc = bin(bm).count("1") # XXX YIKES i = ceil(256.0 * pc / maxoix) - 1 return (i << 8) + ((255 - i) << 16) def render(self, img): from common.render import renderSpansZ from PIL import ImageDraw zo = img.width.bit_length() << 1 basebix = next(loc for (loc, _, _) in self._bix2state) # Paint most of the buckets; exclude AHWM since that's big renderSpansZ( img, zo, ((loc - basebix, sz, bst2color[st]) for (loc, sz, st) in self._bix2state if st != BuckSt.AHWM)) # Paint over small WAIT buckets with some occupancy information renderSpansZ(img, zo, ((bix - basebix, 1, self.occshade(self._bix2szbm[bix])) for (loc, sz, st) in self._bix2state if st == BuckSt.WAIT for bix in range(loc, loc + sz) if self._bix2szbm.get(bix, None) is not None if self._issmall(self._bix2szbm[bix][0]))) # Paint over the largest revokable span (which may hide some TIDY # blocks, but that's fine) brss = self._find_largest_revokable_spans(n=1) if brss != [] and brss[0][1] is not None: renderSpansZ(img, zo, [(brss[0][1] - basebix, brss[0][2], cBRS)]) # Paint over the oldest JUNK span oldestj = self._junklru.first if oldestj is not None: (qbix, qsz, _) = self._bix2state.get(oldestj.value[0], coalesce_with_values=st_tj) renderSpansZ(img, zo, [(qbix - basebix, qsz, cOJS)]) def _render_expanded(self, img): from common.render import renderSpansZ from PIL import ImageDraw zo = img.width.bit_length() << 1 basebix = next(loc for (loc, _, _) in self._bix2state) # bix and offset to pixel index def expand(bix, off): return (bix * 2**self._bucklog + off + 15) >> 4 # render a bitmap bucket at block index offset (relative to basebix) def rendszbm(bio, ap, sz, bm): ep = self._maxoix(sz) if ap is None: ap = ep for oix in range(0, ap): renderSpansZ( img, zo, [(expand(bio, oix * sz), expand(0, sz * (oix + 1) - 1) - expand(0, sz * oix), bst2color[BuckSt.JUNK] if bm & 1 == 1 else bst2color[BuckSt.WAIT])]) bm >>= 1 renderSpansZ(img, zo, [(expand(bio, ap * sz), expand(0, sz * ep) - expand(0, sz * ap), bst2color[BuckSt.TIDY])]) # Paint most of the buckets; exclude AHWM since that's big for (loc, sz, st) in self._bix2state: # skip AHWM if st == BuckSt.AHWM: continue # JUNK, and TIDY are entirely uniform elif st == BuckSt.JUNK: renderSpansZ( img, zo, [(expand(loc - basebix, 0), expand(sz, 0), bst2color[st])]) elif st == BuckSt.TIDY: renderSpansZ( img, zo, [(expand(loc - basebix, 0), expand(sz, 0), bst2color[st])]) # BUMP states are backed at every bix with a bitmap elif st == BuckSt.BUMP: for bix in range(loc, loc + sz): (asz, bm) = self._bix2szbm[bix] rendszbm(bix - basebix, self._szbix2ap[asz].get(bix, None), asz, bm) # WAIT states are complicated: they are either backed with a bitmap # or by a large value, indicating the uniform occupancy of one or # more buckets. We don't have better resolution than that, so just # render those uniformly. elif st == BuckSt.WAIT: bix = loc while bix < loc + sz: (asz, bm) = self._bix2szbm[bix] if self._issmall(asz): # bitmap, one bucket rendszbm(bix - basebix, self._szbix2ap[asz].get(bix, None), asz, bm) bix += 1 else: # large object nsz = self._sz2nbucks(asz) renderSpansZ(img, zo, [(expand(bix - basebix, 0), expand( nsz, 0), bst2color[st])]) bix += nsz
class TraditionalAllocatorBase(RenamingAllocatorBase): # Initialization ------------------------------------------------------ {{{ __slots__ = ( '_alignlog', # Power of two default alignment '_alignmsk', # Derived alignment mask '_minsize', # Minimum allocation size '_paranoia', # Self-tests '_tslam', # fetch the current trace timestamp '_basepg', # Bottom-most page index to use '_brscache', # cached biggest revokable span '_eva2sst', # Emulated Virtual Address to Segment STate '_eva2sz', # EVA to size for outstanding allocations (WAIT states) '_evp2pst', # Emulated Virtual Page to Page STate '_junklru', # LRU queue of JUNK segments, by base address '_junkadn', # JUNK segment base EVA to node in junklru '_njunk', # Number of bytes JUNK '_nmapped', # Number of bytes MAPD '_npend', # Number of bytes in PEND state '_nwait', # Number of bytes WAIT (allocated) '_pagelog', # Base-2 log of page size '_tidylst', # SegFreeList of TIDY spans '_wildern' # Wilderness location ) __metaclass__ = ABCMeta # Argument definition and response ------------------------------------ {{{ @staticmethod def _init_add_args(argp): argp.add_argument('--paranoia', action='store', type=int, default=0) argp.add_argument('--min-size', action='store', type=int, default=16) argp.add_argument('--align-log', action='store', type=int, default=2) def _init_handle_args(self, args): self._alignlog = args.align_log self._alignmsk = (1 << args.align_log) - 1 self._minsize = args.min_size self._paranoia = args.paranoia if self._paranoia == 0 and __debug__: logging.warn("Assertions still enabled, even with paranoia 0; " "try python -O") if self._paranoia != 0 and not __debug__: raise ValueError("Paranoia without assertions will just be slow") # --------------------------------------------------------------------- }}} def __init__(self, **kwargs): super().__init__() self._tslam = kwargs['tslam'] self._paranoia = 0 # Argument parsing ---------------------------------------------------- {{{ argp = argparse.ArgumentParser() self._init_add_args(argp) self._init_handle_args(argp.parse_args(kwargs['cliargs'])) # --------------------------------------------------------------------- }}} self._pagelog = 12 self._basepg = 1 baseva = self._basepg * 2**self._pagelog self._brscache = None self._eva2sst = IntervalMap(baseva, 2**64 - baseva, SegSt.AHWM) self._eva2sz = {} self._evp2pst = IntervalMap(self._basepg, 2**(64 - self._pagelog) - self._basepg, PageSt.UMAP) self._junklru = dllist() self._junkadn = {} self._njunk = 0 self._nmapped = 0 self._npend = 0 self._nwait = 0 self._tidylst = SegFreeList(extcoal=self._sfl_coalesce) self._wildern = baseva # --------------------------------------------------------------------- }}} # Size-related utility functions -------------------------------------- {{{ def _eva2evp(self, eva): return eva >> self._pagelog def _evp2eva(self, evp): return evp << self._pagelog def _eva2evp_roundup(self, eva): return (eva + (1 << self._pagelog) - 1) >> self._pagelog def _npg2nby(self, npg): return npg << self._pagelog def _eva_align_roundup(self, eva): return ((eva + self._alignmsk) >> self._alignlog) << self._alignlog # --------------------------------------------------------------------- }}} # Additional state assertions and diagnostics ------------------------- {{{ def _state_asserts(self): # Ensure that our wilderness looks like the HWM (qbase, qsz, qv) = self._eva2sst[self._wildern] # "I'm sure it's around here somewhere" assert qbase + qsz == 2**64, ("wilderness lost", self._wildern, qbase, qsz, qv) # "no longer above high water mark" assert qv == SegSt.AHWM, ("wilderness flooded", self._wildern, qbase, qsz, qv) # All outstanding allocations are backed by WAIT and MAPD segments, yes? for a in self._eva2sz.keys(): (qbase, qsz, qv) = self._eva2sst[a] assert qv == SegSt.WAIT, ("rude allocation", a, qv) # not WAITing # segment too short for allocation assert qbase + qsz >= a + self._eva2sz[a], ("alloc overflow", a) (qbase, qsz, qv) = self._evp2pst[self._eva2evp(a)] assert qv == PageSt.MAPD, ("lost allocation", a, qv) # "un-mapped" assert self._evp2eva(qbase) + self._npg2nby(qsz) >= a + self._eva2sz[a],\ ("partially lost allocation", a, self._eva2sz[a], qbase, qsz) # All JUNK queue entries are backed by JUNK segments for (jb, jsz) in self._junklru: (qb, qsz, qv) = self._eva2sst[jb] assert jb == qb and jsz == qsz and qv == SegSt.JUNK, \ ("JUNK list state mismatch", (jb, jsz), (qb, qsz, qv)) assert jb in self._junkadn, "JUNK node not in index" assert (jb, jsz) == self._junkadn[jb].value, "JUNK index botch" for jb in self._junkadn: assert self._junkadn[jb].value[0] == jb jsz = self._junkadn[jb].value[1] (qb, qsz, qv) = self._eva2sst[jb] assert jb == qb and jsz == qsz and qv == SegSt.JUNK, \ ("JUNK list state mismatch", (jb, jsz), (qb, qsz, qv)) # All TIDY list entries are backed by TIDY segments, and the SegFL is OK for (tb, tsz) in self._tidylst.iterlru(): (qb, qsz, qv) = self._eva2sst[tb] assert tb == qb and tsz == qsz and qv == SegSt.TIDY, \ ("TIDY list state mismatch", (tb, tsz), (qb, qsz, qv)) self._tidylst.crossreference_asserts() # All WAIT spans are covered by allocations, all JUNK and TIDY spans # correspond with entries in their queues nwait = 0 njunk = 0 npend = 0 for (qb, qsz, qv) in self._eva2sst: if qv == SegSt.WAIT: nwait += qsz ab = qb while ab < qb + qsz: asz = self._eva2sz.get(ab, None) assert asz is not None, ("WAIT w/o alloc sz", qb, ab) ab += asz assert ab == qb + qsz, "Allocations overrun WAIT segment?" elif qv == SegSt.TIDY: assert qsz == self._tidylst.peek(qb) elif qv == SegSt.JUNK: njunk += qsz dln = self._junkadn.get(qb, None) assert dln is not None assert dln.value == (qb, qsz) elif qv == SegSt.AHWM: assert qb == self._wildern, "There must be only one final frontier" elif qv == SegSt.PEND: npend += qsz assert nwait == self._nwait, ("Improper account of WAIT bytes", nwait, self._nwait) assert njunk == self._njunk, ("Improper account of JUNK bytes", njunk, self._njunk) assert npend == self._npend, ("Improper account of PEND bytes", npend, self._npend) # All MAPD segments have some reason to be mapped? Well, maybe not # exactly, since we are lazy about unmapping, or might be. # ## for (mb, msz, mv) in self._eva2pst : ## if mv != PageSt.MAPD : continue ## for (qb, qsz, qv) in self._eva2sst[mb:mb+msz] : ## if qv == SegSt.WAIT : break ## else : assert False, ("MAPD w/o WAIT", mb, msz) # --------------------------------------------------------------------- }}} # Revocation logic ---------------------------------------------------- {{{ def _sfl_coalesce(self, va): (qva, qsz, _) = self._eva2sst.get(va) return (qva, qsz) # Mark a span TIDY. This must not be used to re-mark any existing TIDY # span. # # Inserts the coalesced span at the end of tidylst. def _mark_tidy(self, loc, sz): self._eva2sst.mark(loc, sz, SegSt.TIDY) self._tidylst.insert(loc, sz) def _mark_revoked(self, loc, sz): self._mark_tidy(loc, sz) # An actual implementation would maintain a prioqueue or something; # we can get away with a linear scan. We interrogate the segment state # interval map for ease of coalescing, even though we also maintain a # parallel JUNK LRU queue. Returns spans as (njunk, base, size) triples, # coalescing JUNK and TIDY segments together. def _find_largest_revokable_spans(self, n=1): if n == 0: return if n == 1 and self._brscache is not None: return [self._brscache] bests = [(0, -1, -1)] # [(njunk, loc, sz)] in ascending order for (qbase, qsz, qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj): # smaller spans don't interest us if qsz <= bests[0][0]: continue # Reject spans that are entirely TIDY already. js = [ sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz] if v == SegSt.JUNK ] if js == []: continue # Sort spans by number of JUNK bytes, not JUNK|TIDY bytes nj = sum(js) if nj <= bests[0][0]: continue insort(bests, (nj, qbase, qsz)) bests = bests[(-n):] # Go ahead and set this now, even though it's likely we're about to # use this span in revocation and, so, invalidate this cache. Still, # if we don't, so much the better, yeah? self._brscache = bests[-1] return [best for best in bests if best[1] >= 0] def _do_revoke(self, ss): if self._paranoia > PARANOIA_STATE_ON_REVOKE: self._state_asserts() self._brscache = None for (nj, loc, sz) in ss: self._njunk -= nj # Because we coalesce with TIDY spans while revoking, there may be # several JUNK spans in here. Go remove all of them from the LRU. for (qb, qsz, qv) in self._eva2sst[loc:loc + sz]: assert qv in sst_tj, "Revoking non-revokable span" if qv == SegSt.JUNK: self._junklru.remove(self._junkadn.pop(qb)) self._mark_revoked(qb, qsz) self._publish('revoked', "---", "", *((loc, loc + sz) for (_, loc, sz) in ss)) def _do_revoke_best_and(self, n=None, revoke=[]): revs = list(revoke) assert len(revs) <= self._revoke_k, (revoke) if n is None: n = self._revoke_k nrev = None brss = self._find_largest_revokable_spans(n=n + 1) rset = set() for rloc in revs: for (brnj, brloc, brsz) in brss: if brloc <= rloc < brloc + brsz: rset.add((brnj, brloc, brsz)) break else: (qloc, qsz, qv) = self._eva2sst.get(rloc, coalesce_with_values=sst_tj) rset.add((sum([ sz for (_, sz, v) in self._eva2sst[qloc:qloc + qsz] if v == SegSt.JUNK ]), qloc, qsz)) while len(rset) <= n and brss != []: rset.add(brss[-1]) brss = brss[:-1] self._do_revoke(rset) while brss != []: if brss[-1] not in rset: break brss = brss[:-1] if brss != []: self._brscache = brss[-1] else: self._brscache = (0, -1, -1) @abstractmethod def _maybe_revoke(self): pass # --------------------------------------------------------------------- }}} # Allocation ---------------------------------------------------------- {{{ def _alloc_place(self, stk, sz): # XXX Approximate best-fit / oldest-fit strategy, since coalesced # entries are moved to the back of the tidy list. # # Note the requirement to either fit exactly or leave some threshold # of bytes available. (XXX but it's not quite the right test, is it?) # for (pos, psz) in self._tidylst.iterfor(sz, 1 << self._alignlog): apos = self._eva_align_roundup(pos) if apos == pos: return pos elif pos + psz >= apos + sz: return apos return self._eva_align_roundup(self._wildern) def _ensure_mapped(self, stk, tid, reqbase, reqsz): pbase = self._eva2evp(reqbase) plim = self._eva2evp(reqbase + reqsz - 1) + 1 for (qb, qsz, qv) in self._evp2pst[pbase:plim]: if qv == PageSt.MAPD: continue b = max(qb, pbase) l = min(qb + qsz, plim) self._nmapped += self._npg2nby(l - b) self._publish('mapd', stk, tid, self._evp2eva(b), self._evp2eva(l), 0b11) self._evp2pst.mark(pbase, plim - pbase, PageSt.MAPD) # When marking a span allocated, we may have residual TIDY segments left # over. Because overriding implementatins may be tracking their own # metadata about TIDY spans, we provide this hook for intercepting without # having to duplicate all the work doen in _mark_allocated. Unlike # _mark_tidy, these spans are already marked TIDY, they are just not in # the TIDY metadata structures. def _mark_allocated_residual(self, stk, loc, sz, isLeft): self._tidylst.insert(loc, sz) def _mark_allocated(self, stk, reqbase, reqsz): if self._paranoia > PARANOIA_STATE_PER_OPER: (qbase, qsz, qv) = self._eva2sst.get(reqbase, coalesce_with_values=sst_at) assert qv in sst_at, ("New allocated mark in bad state", \ (reqbase, reqsz), (qbase, qsz, qv), list(self._eva2sst)) assert qbase + qsz >= reqbase + reqsz, "New allocated undersized?" # Remove span from tidy list; may create two more entries. # No need to use the coalescing insert functionality here because we # know, inductively, that we certainly won't coalesce in either direction. # # XXX We act as though any residual spans have been just created; is # that the right policy? # # XXX Don't create segments less than the minimum allocation size, as # there's no possible utility to them and we'll catch them # post-coalescing in mark_tidy. This change will require modification # to our asserts and sanity checking, too. if reqbase < self._wildern: (qb, qsz, qv) = self._eva2sst[reqbase] assert qv == SegSt.TIDY assert qsz >= reqsz tsz = self._tidylst.remove(qb) assert tsz == qsz # Do the marking now, so that our work on our tidy list sees the # correct (lack of) coalescing hereafter, but above we wanted to find # the whole TIDY span. The duplication with the else branch below is # a little sad. :/ self._eva2sst.mark(reqbase, reqsz, SegSt.WAIT) if qb + qsz != reqbase + reqsz: # Insert residual right span self._mark_allocated_residual(stk, reqbase + reqsz, qb + qsz - reqbase - reqsz, False) if reqbase != qb: # Insert residual left span self._mark_allocated_residual(stk, qb, reqbase - qb, True) else: # Homesteading beyond the wildnerness frontier leaves a TIDY gap if reqbase > self._wildern: self._mark_tidy(self._wildern, reqbase - self._wildern) self._eva2sst.mark(reqbase, reqsz, SegSt.WAIT) # If the allocation takes place within the current best revokable span, # invalidate the cache and let the revocation heuristic reconstruct it. if self._brscache is not None: (_, brsix, brssz) = self._brscache if brsix <= reqbase < brsix + brssz: self._brscache = None self._nwait += reqsz self._wildern = max(self._wildern, reqbase + reqsz) def _alloc(self, stk, tid, sz): if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() if sz < self._minsize: sz = self._minsize # minimum size sz = (sz + self._alignmsk) & ~self._alignmsk # and alignment loc = self._alloc_place(stk, sz) assert loc & self._alignmsk == 0 self._ensure_mapped("malloc " + stk, tid, loc, sz) self._mark_allocated(stk, loc, sz) self._eva2sz[loc] = sz return (loc, sz) # --------------------------------------------------------------------- }}} # Free ---------------------------------------------------------------- {{{ def _ensure_unmapped(self, stk, tid, loc, sz): pbase = self._eva2evp_roundup(loc) plim = self._eva2evp(loc + sz - 1) if pbase == plim: return # might not be an entire page for (qb, qsz, qv) in self._evp2pst[pbase:plim]: if qv == PageSt.UMAP: continue b = max(qb, pbase) l = min(qb + qsz, plim) self._nmapped -= self._npg2nby(l - b) self._publish('unmapd', stk, tid, self._evp2eva(b), self._evp2eva(l)) self._evp2pst.mark(pbase, plim - pbase, PageSt.UMAP) # When exiting the WAIT sate, there are multiple ways things can go: # # PEND: for some reason, this span of memory is neither reusable nor # revokable. One assumes that eventually this will no longer # be true and so we will see PEND -> {TIDY, JUNK} transitions # here, too. # # TIDY: This memory does not need to be run through a revocation pass. # Either we are running in an unsafe mode or there is some other # mechanism available, such as fast pointer invalidation # (MTE/SSM). # # JUNK: This memory needs to be revoked before it can be reused. # # This function handles all of the associated logic. The lists given # should not contain coalescable regions for efficiency's sake, but I do # not think anything will go wrong if they do. def _mark_free(self, stk, tid, pends, tidys, junks): for (loc, sz) in pends: self._nwait -= sz self._npend += sz self._eva2sst.mark(loc, sz, SegSt.PEND) for (loc, sz) in tidys: self._mark_free_accounting_helper(loc, sz) self._mark_tidy(loc, sz) (qb, qsz, qv) = self._eva2sst.get(loc) assert qv == SegSt.TIDY, (loc, sz, qb, qsz, qv, list(self._eva2sst)) self._mark_free_unmap_helper(stk, tid, qb, qsz) for (loc, sz) in junks: self._mark_free_accounting_helper(loc, sz) self._eva2sst.mark(loc, sz, SegSt.JUNK) self._njunk += sz # If it happens that this span may be larger than the cached largest # revokable span, invalidate the cache if self._brscache is not None: (brsnj, _, _) = self._brscache (_, qsz, _) = self._eva2sst.get(loc, coalesce_with_values=sst_tj) if qsz >= brsnj: self._brscache = None # Update the JUNK LRU (qb, qsz) = dll_im_coalesced_insert(loc, sz, self._eva2sst, self._junklru, self._junkadn) self._mark_free_unmap_helper(stk, tid, qb, qsz) def _mark_free_accounting_helper(self, loc, sz): for (qb, qsz, qv) in self._eva2sst[loc:loc + sz]: lim = min(qb + qsz, loc + sz) qb = max(loc, qb) qsz = lim - qb if qv == SegSt.PEND: self._npend -= qsz elif qv == SegSt.WAIT: self._nwait -= qsz else: assert False # If the span is large enough, go ensure that it is unmapped, save # possibly for some material on either side. # XXX configurable policy def _mark_free_unmap_helper(self, stk, tid, qb, qsz): if qsz > (16 * 2**self._pagelog): self._ensure_unmapped("free " + stk, tid, qb, qsz) def _free(self, stk, tid, loc): if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() assert self._eva2sst[loc][2] == SegSt.WAIT, "free non-WAIT?" # Mark this span as junk sz = self._eva2sz.pop(loc) self._mark_free(stk, tid, [], [], [(loc, sz)]) def _free_unsafe(self, stk, tid, loc): if self._paranoia > PARANOIA_STATE_PER_OPER: self._state_asserts() assert self._eva2sst[loc][2] == SegSt.WAIT, "free non-WAIT?" # Immediately mark this span as TIDY, rather than JUNK sz = self._eva2sz.pop(loc) self._mark_free(stk, tid, [], [(loc, sz)], []) # --------------------------------------------------------------------- }}} # Realloc ------------------------------------------------------------- {{{ def _try_realloc(self, stk, tid, oeva, nsz): # XXX return False # --------------------------------------------------------------------- }}} # Rendering ----------------------------------------------------------- {{{ def render(self, img): from common.render import renderSpansZ from PIL import ImageDraw sst2color = { SegSt.TIDY: 0xFFFFFF, SegSt.WAIT: 0x00FF00, SegSt.JUNK: 0xFF0000, } baseva = self._basepg * 2**self._pagelog zo = img.width.bit_length() << 1 renderSpansZ( img, zo, (((loc - baseva) >> self._alignlog, sz >> self._alignlog, sst2color[st]) for (loc, sz, st) in self._eva2sst.irange(baseva, self._wildern))) # Paint over the oldest JUNK span oldestj = self._junklru.first if oldestj is not None: (qb, qsz) = oldestj.value qb -= baseva renderSpansZ( img, zo, [(qb >> self._alignlog, qsz >> self._alignlog, 0xFF00FF)]) # Paint over the oldest TIDY span oldestt = self._tidylst.eldest() if oldestt is not None: (qb, qsz) = oldestt qb -= baseva renderSpansZ( img, zo, [(qb >> self._alignlog, qsz >> self._alignlog, 0x00FFFF)])
def __init__(self, **kwargs): super().__init__() self._eva2sz = {} self._maxeva = 0 self._state = IntervalMap(4096, 2**64, False)
class Allocator(TraditionalAllocatorBase): __slots__ = ( '_mtags' # Memory tag version intervalmap '_nvers', # Number of versions '_revoke_all', # Concurrency is infinitely fast; revoke all free spans '_revoke_jwr', # JUNK/WAIT for revocation '_revoke_k', # Limited revocation facilities '_revoke_lru', # With revoke_k, sample from junklru, too. ) @staticmethod def _init_add_args(argp): super(__class__, __class__)._init_add_args(argp) argp.add_argument('--versions', action='store', type=int, default=16) argp.add_argument( '--revoke-min', action='store', type=int, default=0, help="Suppress revocations reclaiming fewer JUNK bytes") argp.add_argument('--revoke-factor', action='store', type=float, default=None, help="Ratio of JUNK to WAIT triggering revocation") argp.add_argument('--revoke-k', action='store', type=int, default=None, help="Assume limited revocation facilities") argp.add_argument( '--revoke-lru', action='store', type=int, default=None, help="Ensure old maximal-versioned spans eventually recycled") argp.add_argument('--revoke-sort', action='store', type=str, default="clock", choices=["clock", "size"], help="Selection function for limited revocation") argp.add_argument( '--revoke-all-colors', action='store_true', default=False, help="Revoke all free spans, not just maximal-versioned") def _init_handle_args(self, args): super(__class__, self)._init_handle_args(args) self._nvers = args.versions self._revoke_all = args.revoke_all_colors self._revoke_jwr = args.revoke_factor self._revoke_min = args.revoke_min self._revoke_k = args.revoke_k self._revoke_lru = args.revoke_lru if self._revoke_lru is not None: if self._revoke_k is None: self._revoke_k = self._revoke_lru else: assert self._revoke_k >= self._revoke_lru else: self._revoke_lru = 0 if args.revoke_sort == "clock": self._find_largest_revokable_spans = self._find_clockiest_revokable_spans elif args.revoke_sort == "size": pass if args.revoke_k is None and args.revoke_lru is not None: raise ValueError("--revoke-lru only sensible with --revoke-k") if args.revoke_factor is None and args.revoke_min == 0: raise ValueError( "Please restrain revocation with --revoke-factor or --revoke-min" ) def __init__(self, *args, **kwargs): super(__class__, self).__init__(*args, **kwargs) self._mtags = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), 0) def _state_asserts(self): super(__class__, self)._state_asserts() # All JUNK spans are at the maximal version, and all non-JUNK spans are # at other versions. ist = (x for x in self._eva2sst) itg = (x for x in self._mtags) for (qb, qsz, qvs) in im_stream_inter(lambda _: ist, lambda _: itg): (qst, qtv) = qvs if qst == SegSt.JUNK: assert qtv == self._nvers, (qb, qsz, qtv) elif qst == SegSt.AHWM: assert qtv == 0 else: assert qtv != self._nvers # XXX # We'd like to ask a slightly different question, namely: where can we # place this to minimize the advancement of the version clocks. You might # think we'd never advance in allocate, having advanced in free, but if we # choose a place formed from several freed spans, we have to advance to the # max of all spans we end up using, which might advance some of the clocks # quite a bit. # ... # At the moment, tidylst coalesces all versions together. We should # instead hunt for a minimum of the (byte*clock_delta) sum for the places # considered. We can stop early if we find a zero, of course. # def _alloc_place(self, stk, sz): pos = super()._alloc_place(stk, sz) nv = max(v for (_, __, v) in self._mtags[pos:pos + sz]) assert nv != self._nvers self._mtags.mark(pos, sz, nv) return pos def _free(self, stk, tid, loc): sz = self._eva2sz[loc] (_, __, v) = self._mtags.get(loc) if v == self._nvers - 1: super(__class__, self)._free(stk, tid, loc) self._mtags.mark(loc, sz, self._nvers) else: super(__class__, self)._free_unsafe(stk, tid, loc) self._mtags.mark(loc, sz, v + 1) def _mark_revoked(self, loc, sz): super()._mark_revoked(loc, sz) self._mtags.mark(loc, sz, 0) # Sort by the sum of (sz*version), as that is the value by which we wind # back the clock to defer later revocations. def _find_clockiest_revokable_spans(self, n=1): if n == 0: return bests = [(0, 0, -1, -1) ] # [(clocksum, njunk, loc, sz)] in ascending order for (qbase, qsz, qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj): clocksum = 0 for (vbase, vsz, vv) in self._mtags[qbase:qbase + qsz]: # Don't walk off the end of the last segment vsz = min(vsz, qbase + qsz - vbase) clocksum += vsz * vv if clocksum <= bests[0][0]: continue # For internal accounting, also accumulate the number of JUNK bytes nj = sum([ sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz] if v == SegSt.JUNK ]) insort(bests, (clocksum, nj, qbase, qsz)) bests = bests[(-n):] return [best[1:] for best in bests if best[2] >= 0] # Revocation here does not have a fixed number of windows on which it can # operate; everything in the junk (and tidy!) lists can be revoked in a # single go. In implementation, this looks like validating that every # capability's contained version field matches the version painted in RAM. # # The limits of the allocator behavior range from being able to actually # get all free spans, regardless of their version (JUNK or TIDY), to being # able to reclaim just the ones that were already at the maximal version # (i.e. JUNK). In practice, one could imagine the allocator maintaining # a "free epoch" bit and reclaiming (i.e., restoring to version 0) all # segments whose free epoch predates the current, now-ending sweep. # def _maybe_revoke(self): # Not above ratio threshold if self._revoke_jwr is not None \ and self._njunk < self._revoke_jwr * self._nwait: return if self._njunk < self._revoke_min: return if self._revoke_k is None: it = ((jsz, jb, jsz) for (jb, jsz) in self._junklru) if self._revoke_all: it = itertools.chain( it, ((0, tb, tsz) for (tb, tsz) in self._tidylst.iterlru())) else: # Allocate room for up to _revoke_lru things from the JUNK LRU. nlru = min(self._revoke_lru, len(self._junklru)) # XXX I'd love to, but boy is this a slow way to do it. # ## # Estimate reclaim quantity ## unjunk = self._find_largest_revokable_spans(n=self._revoke_k-nlru) ## unjunk = sum(nj for (nj, _, __) in unjunk) ## if unjunk < self._revoke_min: ## return # # This is going to be much faster due to the brscache. # unjunk = self._find_largest_revokable_spans(n=1) if len(unjunk) * self._revoke_k < self._revoke_min: return # Do that again, but as part of the set of things to push to the revoker it = (x for x in self._find_largest_revokable_spans( n=self._revoke_k - nlru)) # Add from the LRU JUNK queue it = itertools.chain(it, ((jsz, jb, jsz) for (jb, jsz) in self._junklru)) # XXX Could also pull from the TIDY queue, but should filter by those # that contain nonzero versions or sort by the clockiest span or # something. # # XXX Should also coalesce with TIDY spans for the things we pull from # the LRU queue. # Limit to the number we actually can run # XXX This should deduplicate before slicing. Sigh. # it = itertools.islice(it, self._revoke_k) self._do_revoke(list(it))
def __init__(self, *args, **kwargs): super(__class__, self).__init__(*args, **kwargs) self._mtags = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), 0)
class Allocator(TraditionalAllocatorBase): __slots__ = ( '_mtags' # Memory tag version intervalmap '_nvers', # Number of versions '_revoke_all', # Concurrency is infinitely fast; revoke all free spans '_revoke_jwr', # JUNK/WAIT for revocation '_revoke_k', # Limited revocation facilities '_revoke_lru', # With revoke_k, sample from junklru, too. '_revoke_front', # Put revoked regions at the head of the TIDY list '_revoke_jmin', # Suppress revocation unless this much JUNK accumulated '_revoke_tmax', # Suppress revocation if more than this much TIDY already '_prefs', # va -> Preferred stack flavor '_tlpf', # TIDY List Per Flavor (flavor -> SegFreeList) '_stkfaf', # Stack flavor allocation factor ) @staticmethod def _init_add_args(argp): super(__class__, __class__)._init_add_args(argp) argp.add_argument('--versions', action='store', type=int, default=16) argp.add_argument( '--revoke-min', action='store', type=int, default=0, help="Suppress revocations reclaiming fewer JUNK bytes") argp.add_argument('--revoke-max-tidy', action='store', type=int, default=None, help="Suppress revocations if sufficient TIDY bytes") argp.add_argument('--revoke-factor', action='store', type=float, default=None, help="Ratio of JUNK to WAIT triggering revocation") argp.add_argument('--revoke-k', action='store', type=int, default=None, help="Assume limited revocation facilities") argp.add_argument( '--revoke-lru', action='store', type=int, default=None, help="Ensure old maximal-versioned spans eventually recycled") argp.add_argument('--revoke-sort', action='store', type=str, default="clock", choices=["clock", "size"], help="Selection function for limited revocation") argp.add_argument( '--revoke-all-colors', action='store_true', default=False, help="Revoke all free spans, not just maximal-versioned") argp.add_argument( '--flavor-open-factor', action='store', type=int, default=1024, help="Scale factor when opening a flavored heap region") argp.add_argument('--revoke-front', action='store', type=bool, default=True, help="Front revoked spans on the TIDY queue") def _init_handle_args(self, args): super(__class__, self)._init_handle_args(args) self._nvers = args.versions self._revoke_all = args.revoke_all_colors self._revoke_jwr = args.revoke_factor self._revoke_jmin = args.revoke_min self._revoke_tmax = args.revoke_max_tidy self._revoke_k = args.revoke_k self._revoke_lru = args.revoke_lru self._revoke_front = args.revoke_front self._stkfaf = args.flavor_open_factor if self._revoke_lru is not None: if self._revoke_k is None: self._revoke_k = self._revoke_lru else: assert self._revoke_k >= self._revoke_lru else: self._revoke_lru = 0 if args.revoke_sort == "clock": self._find_largest_revokable_spans = self._find_clockiest_revokable_spans elif args.revoke_sort == "size": pass if args.revoke_k is None and args.revoke_lru is not None: raise ValueError("--revoke-lru only sensible with --revoke-k") if args.revoke_factor is None and args.revoke_min == 0: raise ValueError( "Please restrain revocation with --revoke-factor or --revoke-min" ) def __init__(self, *args, **kwargs): super(__class__, self).__init__(*args, **kwargs) self._mtags = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), 0) self._prefs = IntervalMap(self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), None) self._tlpf = {} self._tlpf[None] = SegFreeList() def _state_asserts(self): super(__class__, self)._state_asserts() #### Stack flavors: # For each flavored TIDY list, check that... for stk in self._tlpf.keys(): # ... The list itself is OK self._tlpf[stk].crossreference_asserts() # ... and every element ... for (pos, sz) in self._tlpf[stk].iterlru(): # ... is of the correct flavor (pbase, psz, pv) = self._prefs[pos] assert pv == stk, ("Mixed-flavor TIDY", (pos, sz, stk), (pbase, psz, pv)) assert pbase + psz >= pos + sz, ( "More flavored TIDY than preferred", (pos, sz, stk), (pbase, psz)) # ... is actually tidy (qbase, qsz, qv) = self._eva2sst[pos] assert qv == SegSt.TIDY, ("Flavored TIDY is not TIDY", (pos, sz, stk), (qbase, qsz, qv)) assert qbase + qsz >= pos + sz, ( "More flavor TIDY than TIDY", (pos, sz, stk), (qbase, qsz), self._eva2sst[qbase + qsz], list(self._tlpf[stk].iterlru())) # ... and really does end where it's supposed to: at the next # preference or TIDY boundary assert pos + sz == min(pbase + psz, qbase + qsz), \ ("TIDY segment length mismatch", (pos, sz, stk), (psz, pv), (qsz, qv)) # Check that all TIDY segments with flavor are on the appropriate flavored TIDY list fif = lambda start: ( (loc, sz, v) for (loc, sz, v) in self._prefs[start:]) tif = lambda start: ((loc, sz, v) for (loc, sz, v) in self._eva2sst[start:] if v == SegSt.TIDY) for (loc, sz, (stk, _)) in im_stream_inter(fif, tif): fdns = self._tlpf[stk].adns.get(loc, None) assert fdns is not None, \ ("Unindexed flavored TIDY span", (loc, sz, stk), self._prefs[loc], self._eva2sst[loc], self._tlpf[stk].adns) assert (loc, sz) == fdns[0].value, \ ("Flavored TIDY index mismatch", (loc, sz, stk), fdns[0].value) #### MTE: # All JUNK spans are at the maximal version, and all non-JUNK spans are # at other versions. Please note that this test is exceptionally slow, # due to the large number of segments and color spans that build up, and # so it is here but further gated. if self._paranoia > 2: ist = (x for x in self._eva2sst) itg = (x for x in self._mtags) for (qb, qsz, (qst, qtv)) in im_stream_inter(lambda _: ist, lambda _: itg): if qst == SegSt.JUNK: assert qtv == self._nvers, (qb, qsz, qtv) elif qst == SegSt.AHWM: assert qtv == 0 else: assert qtv != self._nvers # XXX # We'd like to ask a slightly different question, namely: where can we # place this to minimize the advancement of the version clocks. You might # think we'd never advance in allocate, having advanced in free, but if we # choose a place formed from several freed spans, we have to advance to the # max of all spans we end up using, which might advance some of the clocks # quite a bit. # ... # At the moment, tidylst coalesces all versions together. We should # instead hunt for a minimum of the (byte*clock_delta) sum for the places # considered. We can stop early if we find a zero, of course. # def _alloc_place_helper(self, stk, sz): #### Stack flavor: fit = self._tlpf.setdefault(stk, SegFreeList()).iterfor( sz, 1 << self._alignlog) # Walk the free list to see if we have something of the "stack" flavor laying around. for (tpos, tsz) in fit: apos = self._eva_align_roundup(tpos) if (apos == tpos) or (tpos + tsz >= apos + sz): # Remove from the freelist; any residual spans will come back to us in a moment self._tlpf[stk].remove(tpos) return apos # OK, that didn't work out. Start over, go claim something without preference bigger # than the allocation we're tasked with, repaint it to have the current preference, # and return the base thereof. # # Align to multiple of page sizes sz = self._evp2eva(self._eva2evp_roundup(sz * self._stkfaf)) for (tpos, tsz) in self._tlpf[None].iterfor(sz, 1 << self._alignlog): apos = self._eva_align_roundup(tpos) if tpos + tsz >= apos + self._stkfaf * sz: self._tlpf[None].remove(tpos) self._prefs.mark(tpos, tsz, stk) return apos # OK, OK, we really haven't found anything, even if we're willing to # repaint. Go grab at the wilderness; bump the wilderness pointer now to trigger # the common case in _mark_allocated; things will be enqueued on our per-flavor TIDY # list using _mark_allocated_residual # # XXX? Round base up to page boundaries and allocate the whole # thing for this flavor. # pos = self._eva_align_roundup(self._wildern) pos = self._evp2eva(self._eva2evp_roundup(self._wildern)) self._wildern = pos + sz self._prefs.mark(pos, self._wildern - pos, stk) # This is kind of gross; this segment is not in any of our flavored free # lists, and we don't want to put it there, as our _mark_tidy would do. # So instead, we reach up to the superclass to update the segment state # and rely on duplicating any other work that our _mark_tidy does # (which, thankfully, is currently none) super()._mark_tidy(pos, sz) return pos def _alloc_place(self, stk, sz): pos = self._alloc_place_helper(stk, sz) #### MTE: nv = max(v for (_, __, v) in self._mtags[pos:pos + sz]) assert nv != self._nvers self._mtags.mark(pos, sz, nv) return pos def _mark_allocated_residual(self, stk, loc, sz, isLeft): super()._mark_allocated_residual(stk, loc, sz, isLeft) #### Stack flavor: # This is a bit of a mess. The "residual" spans that come back to us # are from the generic TIDY list, which coalesces across our preferences. # So, only queue the residual bits to the preference-respecting TIDY list # if the current allocation stack matches the span's preference, and, only then, # up to our preference's boundary. Discontiguous stk-flavored TIDY spans will # already be in the right free list (proof by induction). if isLeft: (qbase, qsz, qv) = self._prefs[loc + sz - 1] if stk == qv: base = max(qbase, loc) self._tlpf[stk].expunge(base, loc + sz - base) self._tlpf[stk].insert(base, loc + sz - base) else: (qbase, qsz, qv) = self._prefs[loc] if stk == qv: lim = min(qbase + qsz, loc + sz) self._tlpf[stk].expunge(loc, lim - loc) self._tlpf[stk].insert(loc, lim - loc) def _free(self, stk, tid, loc): sz = self._eva2sz[loc] (_, __, v) = self._mtags.get(loc) if v == self._nvers - 1: super(__class__, self)._free(stk, tid, loc) self._mtags.mark(loc, sz, self._nvers) else: super(__class__, self)._free_unsafe(stk, tid, loc) self._mtags.mark(loc, sz, v + 1) def _mark_tidy(self, loc, sz): super()._mark_tidy(loc, sz) #### Stack flavor: for (tloc, tsz, tv) in self._prefs[loc:loc + sz]: nloc = max(tloc, loc) nsz = min(loc + sz, tloc + tsz) - nloc self._tlpf[tv].insert(nloc, nsz) def _mark_revoked(self, loc, sz): #### Stack flavor: # XXX Should we be preserving preferences? Always? Sometimes? # Remove each overlapping span from each preferrential TIDY list; the parent allocator # will take care of managing the global TIDY list. for (_, __, pv) in self._prefs[loc:loc + sz]: self._tlpf[pv].expunge(loc, sz) if sz >= 0: # XXX # Just paint the whole thing as None. self._tlpf[None].insert(loc, sz, front=self._revoke_front) self._prefs.mark(loc, sz, None) else: # Preserve preferences and re-queue for (qb, qsz, pv) in self._prefs[loc:loc + sz]: b = max(qb, loc) l = min(qb + qsz, loc + sz) self._tlpf[pv].insert(b, l - b, front=self._revoke_front) #### MTE: self._mtags.mark(loc, sz, 0) super()._mark_tidy(loc, sz) # Sort by the sum of (sz*version), as that is the value by which we wind # back the clock to defer later revocations. def _find_clockiest_revokable_spans(self, n=1): if n == 0: return if n == 1 and self._brscache is not None: return [self._brscache] bests = [(0, 0, -1, -1) ] # [(clocksum, njunk, loc, sz)] in ascending order for (qbase, qsz, qv) in self._eva2sst.iter_vfilter(None, self._wildern, sst_tj): clocksum = 0 for (vbase, vsz, vv) in self._mtags[qbase:qbase + qsz]: # Don't walk off the end of the last segment vsz = min(vsz, qbase + qsz - vbase) clocksum += vsz * vv if clocksum <= bests[0][0]: continue # For internal accounting, also accumulate the number of JUNK bytes nj = sum([ sz for (_, sz, v) in self._eva2sst[qbase:qbase + qsz] if v == SegSt.JUNK ]) insort(bests, (clocksum, nj, qbase, qsz)) bests = bests[(-n):] return [best[1:] for best in bests if best[2] >= 0] def _revoke_iterator(self): for (b, s, v) in self._eva2sst.iter_vfilter( None, self._wildern, sst_tj if self._revoke_all else [SegSt.JUNK]): jsum = sum(sz for (_, sz, v) in self._eva2sst[b:b + s] if v == SegSt.JUNK) if jsum == 0: continue yield (jsum, b, s) # Revocation here does not have a fixed number of windows on which it can # operate; everything in the junk (and tidy!) lists can be revoked in a # single go. In implementation, this looks like validating that every # capability's contained version field matches the version painted in RAM. # # The limits of the allocator behavior range from being able to actually # get all free spans, regardless of their version (JUNK or TIDY), to being # able to reclaim just the ones that were already at the maximal version # (i.e. JUNK). In practice, one could imagine the allocator maintaining # a "free epoch" bit and reclaiming (i.e., restoring to version 0) all # segments whose free epoch predates the current, now-ending sweep. # def _maybe_revoke(self): # Not above ratio threshold if self._revoke_jwr is not None \ and self._njunk < self._revoke_jwr * self._nwait: return # Not enough JUNK if self._njunk < self._revoke_jmin: return # Still enough TIDY? if self._revoke_tmax is not None and \ self._wildern - self._nwait \ - self._njunk - self.evp2eva(self._baseva) \ > self._revoke_tmax : return if self._revoke_k is None: it = self._revoke_iterator() else: # Allocate room for up to _revoke_lru things from the JUNK LRU. nlru = min(self._revoke_lru, len(self._junklru)) # XXX I'd love to, but boy is this a slow way to do it. # ## # Estimate reclaim quantity ## unjunk = self._find_largest_revokable_spans(n=self._revoke_k-nlru) ## unjunk = sum(nj for (nj, _, __) in unjunk) ## if unjunk < self._revoke_jmin: ## return # # This is going to be much faster to simulate due to the brscache, though # it overestimates the amout we will reclaim from non-LRU spans and gives # no credit to LRU spans. # unjunk = self._find_largest_revokable_spans(n=1) if len(unjunk) * (self._revoke_k - nlru) < self._revoke_jmin: return # Do that again, but as part of the set of things to push to the revoker it = (x for x in self._find_largest_revokable_spans( n=self._revoke_k - nlru)) # Add from the LRU JUNK queue it = itertools.chain(it, ((jsz, jb, jsz) for (jb, jsz) in self._junklru)) # XXX Could also pull from the TIDY queue, but should filter by those # that contain nonzero versions or sort by the clockiest span or # something. # # XXX Should also coalesce with TIDY spans for the things we pull from # the LRU queue. # Limit to the number we actually can run # XXX This should deduplicate before slicing. Sigh. # it = itertools.islice(it, self._revoke_k) rl = list(it) rl.reverse( ) # XXX reverse is a hack (lower VA last, fronted on LRU queues?) self._do_revoke(rl) if __debug__: self._state_asserts()
class Allocator(TraditionalAllocatorBase): slots = ( '_prefs' , # Preferred stack flavor for regions of memory '_tlpf' , # TIDY List Per Flavor '_stkfaf', # Stack flavor allocation factor ) # The parent class's _tidylst will continue to be all TIDY spans of any # flavor, but we additionally track TIDY-per-flavor in _tlpf, including # the unflavored spans at _tlpf[None] @staticmethod def _init_add_args(argp) : super(__class__, __class__)._init_add_args(argp) argp.add_argument('--flavor-open-factor', action='store', type=int, default=1024, help="Scale factor when opening a flavored heap region") def _init_handle_args(self, args) : super(__class__, self)._init_handle_args(args) self._stkfaf = args.flavor_open_factor def __init__(self, **kwargs) : super().__init__(**kwargs) # XXX self._revoke_k = 8 self._free = self._free_unsafe self._prefs = IntervalMap ( self._evp2eva(self._basepg), 2**64 - self._evp2eva(self._basepg), None ) self._tlpf = {} self._tlpf[None] = SegFreeList() def _state_asserts(self): super()._state_asserts() # For each flavored TIDY list, check that... for stk in self._tlpf.keys() : # ... The list itself is OK self._tlpf[stk].crossreference_asserts() # ... and every element ... for (pos, sz) in self._tlpf[stk].iterlru() : # ... is of the correct flavor (qbase, qsz, qv) = self._prefs[pos] assert qv == stk, ("Mixed-flavor TIDY", (pos, sz, stk), (qbase, qsz, qv)) assert qbase + qsz >= pos + sz, ("More preferred TIDY than flavored", (pos, sz, stk), (qbase, qsz)) # ... is actually tidy (qbase, qsz, qv) = self._eva2sst[pos] assert qv == SegSt.TIDY, ("Flavored TIDY is not TIDY", (pos, sz, stk), (qbase, qsz, qv)) assert qbase + qsz >= pos + sz, ("More flavor TIDY than TIDY", (pos, sz, stk), (qbase, qsz), self._eva2sst[qbase+qsz], list(self._tlpf[stk].iterlru())) # Check that all TIDY segments with flavor are on the appropriate flavored TIDY list fif = lambda start : ((loc, sz, v) for (loc, sz, v) in self._prefs[start:]) tif = lambda start : ((loc, sz, v) for (loc, sz, v) in self._eva2sst[start:] if v == SegSt.TIDY) for (loc, sz, (stk, _)) in im_stream_inter(fif, tif) : assert self._tlpf[stk].adns.get(loc,None) is not None, \ ("Unindexed flavored TIDY span", (loc, sz, stk), self._prefs[loc], self._eva2sst[loc], self._tlpf[stk].adns) def _mark_allocated_residual(self, stk, loc, sz, isLeft): super()._mark_allocated_residual(stk, loc, sz, isLeft) # This is a bit of a mess. The "residual" spans that come back to us # are from the generic TIDY list, which coalesces across our preferences. # So, only queue the residual bits to the preference-respecting TIDY list # if the current allocation stack matches the span's preference, and, only then, # up to our preference's boundary. Discontiguous stk-flavored TIDY spans will # already be in the right free list (proof by induction). if isLeft : (qbase, qsz, qv) = self._prefs[loc+sz-1] if stk == qv : base = max(qbase, loc) self._tlpf[stk].expunge(base, loc + sz - base) self._tlpf[stk].insert(base, loc + sz - base) else : (qbase, qsz, qv) = self._prefs[loc] if stk == qv : lim = min(qbase + qsz, loc + sz) self._tlpf[stk].expunge(loc, lim - loc) self._tlpf[stk].insert(loc, lim - loc) def _alloc_place(self, stk, sz) : fit = self._tlpf.setdefault(stk, SegFreeList()).iterfor(sz, 1 << self._alignlog) # Walk the free list to see if we have something of the "stack" flavor laying around. for (tpos, tsz) in fit : apos = self._eva_align_roundup(tpos) if (apos == tpos) or (tpos + tsz >= apos + sz) : # Remove from the freelist; any residual spans will come back to us in a moment self._tlpf[stk].remove(tpos) return apos # OK, that didn't work out. Take two, go claim something without preference bigger # than the allocation we're tasked with, repaint it to have the current preference, # and return the base thereof. # Yes, but boy is this a slow way to get it, apparently. :( # Now we use per-flavor free lists. # ## for (tpos, tsz) in self._tidylst.iterfor(self._stkfaf * sz, 1 << self._alignlog) : ## for (ppos, psz, pv) in self._prefs[tpos:tpos+tsz] : ## if pv is not None : continue ## psz = min(tpos + tsz, ppos + psz) - ppos ## ppos = max(tpos, ppos) ## apos = self._eva_align_roundup(ppos) ## if ppos + psz >= apos + self._stkfaf * sz : ## self._prefs.mark(ppos, psz, stk) ## return apos for (tpos, tsz) in self._tlpf[None].iterfor(self._stkfaf * sz, 1 << self._alignlog) : apos = self._eva_align_roundup(tpos) if tpos + tsz >= apos + self._stkfaf * sz : self._prefs.mark(tpos, tsz, stk) self._tlpf[None].remove(tpos) return apos # OK, OK, we really haven't found anything, even if we're willing to # repaint. Go grab at the wilderness; bump the wilderness pointer now to trigger # the common case in _mark_allocated; things will be enqueued on our per-flavor TIDY # list using _mark_allocated_residual pos = self._eva_align_roundup(self._wildern) self._wildern = pos + self._stkfaf * sz self._prefs.mark(pos, self._stkfaf * sz, stk) # This is kind of gross; this segment is not in any of our flavored free # lists, and we don't want to put it there, as our _mark_tidy would do. # So instead, we reach up to the superclass to update the segment state # and rely on duplicating any other work that our _mark_tidy does # (which, thankfully, is currently none) super()._mark_tidy(pos, self._stkfaf * sz) return pos def _mark_tidy(self, loc, sz) : super()._mark_tidy(loc, sz) for (tloc, tsz, tv) in self._prefs[loc:loc+sz] : nloc = max(tloc, loc) nsz = min(loc + sz, tloc + tsz) - nloc self._tlpf[tv].insert(nloc, nsz) def _mark_revoked(self, loc, sz) : # Just paint the whole thing as None, though that's potentially rude to any painted # spans on either end. (XXX?) # Remove each overlapping span from each preferrential TIDY list; the parent allocator # will take care of removing it from the global TIDY list. for (_, __, pv) in self._prefs[loc:loc+sz] : self._tlpf[pv].expunge(loc, sz) self._tlpf[None].insert(loc, sz) self._prefs.mark(loc, sz, None) super()._mark_tidy(loc,sz) def _maybe_revoke(self): # XXX configurable policy. # # Should we be looking at both the general state of the heap as well as the occupancies # of our preferred regions? if self._njunk >= self._nwait and len(self._junklru) >= 16 : self._do_revoke_best_and(revoke=[loc for (loc, _) in itertools.islice(self._junklru,8)])