class RegAllocator(object): DEBUG_REGALLOC = False def __init__(self, graph, consider_var, ListOfKind): self.graph = graph self.consider_var = consider_var self.ListOfKind = ListOfKind def make_dependencies(self): dg = DependencyGraph() for block in self.graph.iterblocks(): # Compute die_at = {Variable: index_of_operation_with_last_usage} die_at = dict.fromkeys(block.inputargs, 0) for i, op in enumerate(block.operations): for v in op.args: if isinstance(v, Variable): die_at[v] = i elif isinstance(v, self.ListOfKind): for v1 in v: if isinstance(v1, Variable): die_at[v1] = i if op.result is not None: die_at[op.result] = i + 1 if isinstance(block.exitswitch, tuple): for x in block.exitswitch: die_at.pop(x, None) else: die_at.pop(block.exitswitch, None) for link in block.exits: for v in link.args: die_at.pop(v, None) die_at = [(value, key) for (key, value) in die_at.items()] die_at.sort() die_at.append((sys.maxint,)) # Done. XXX the code above this line runs 3 times # (for kind in KINDS) to produce the same result... livevars = [v for v in block.inputargs if self.consider_var(v)] # Add the variables of this block to the dependency graph for i, v in enumerate(livevars): dg.add_node(v) for j in range(i): dg.add_edge(livevars[j], v) livevars = set(livevars) die_index = 0 for i, op in enumerate(block.operations): while die_at[die_index][0] == i: try: livevars.remove(die_at[die_index][1]) except KeyError: pass die_index += 1 if (op.result is not None and self.consider_var(op.result)): dg.add_node(op.result) for v in livevars: if self.consider_var(v): dg.add_edge(v, op.result) livevars.add(op.result) self._depgraph = dg def coalesce_variables(self): self._unionfind = UnionFind() pendingblocks = list(self.graph.iterblocks()) while pendingblocks: block = pendingblocks.pop() # Aggressively try to coalesce each source variable with its # target. We start from the end of the graph instead of # from the beginning. This is a bit arbitrary, but the idea # is that the end of the graph runs typically more often # than the start, given that we resume execution from the # middle during blackholing. for link in block.exits: if link.last_exception is not None: self._depgraph.add_node(link.last_exception) if link.last_exc_value is not None: self._depgraph.add_node(link.last_exc_value) for i, v in enumerate(link.args): self._try_coalesce(v, link.target.inputargs[i]) def _try_coalesce(self, v, w): if isinstance(v, Variable) and self.consider_var(v): assert self.consider_var(w) dg = self._depgraph uf = self._unionfind v0 = uf.find_rep(v) w0 = uf.find_rep(w) if v0 is not w0 and v0 not in dg.neighbours[w0]: _, rep, _ = uf.union(v0, w0) assert uf.find_rep(v0) is uf.find_rep(w0) is rep if rep is v0: dg.coalesce(w0, v0) else: assert rep is w0 dg.coalesce(v0, w0) def find_node_coloring(self): self._coloring = self._depgraph.find_node_coloring() if self.DEBUG_REGALLOC: for block in self.graph.iterblocks(): print block for v in block.getvariables(): print '\t', v, '\t', self.getcolor(v) def getcolor(self, v): return self._coloring[self._unionfind.find_rep(v)] def swapcolors(self, col1, col2): for key, value in self._coloring.items(): if value == col1: self._coloring[key] = col2 elif value == col2: self._coloring[key] = col1
class RegAllocator(object): DEBUG_REGALLOC = False def __init__(self, graph, consider_var, ListOfKind): self.graph = graph self.consider_var = consider_var self.ListOfKind = ListOfKind def make_dependencies(self): dg = DependencyGraph() for block in self.graph.iterblocks(): # Compute die_at = {Variable: index_of_operation_with_last_usage} die_at = dict.fromkeys(block.inputargs, 0) for i, op in enumerate(block.operations): for v in op.args: if isinstance(v, Variable): die_at[v] = i elif isinstance(v, self.ListOfKind): for v1 in v: if isinstance(v1, Variable): die_at[v1] = i if op.result is not None: die_at[op.result] = i + 1 if isinstance(block.exitswitch, tuple): for x in block.exitswitch: die_at.pop(x, None) else: die_at.pop(block.exitswitch, None) for link in block.exits: for v in link.args: die_at.pop(v, None) die_at = [(value, key) for (key, value) in die_at.items()] die_at.sort() die_at.append((sys.maxint,)) # Done. XXX the code above this line runs 3 times # (for kind in KINDS) to produce the same result... livevars = [v for v in block.inputargs if self.consider_var(v)] # Add the variables of this block to the dependency graph for i, v in enumerate(livevars): dg.add_node(v) for j in range(i): dg.add_edge(livevars[j], v) livevars = set(livevars) die_index = 0 for i, op in enumerate(block.operations): while die_at[die_index][0] == i: try: livevars.remove(die_at[die_index][1]) except KeyError: pass die_index += 1 if (op.result is not None and self.consider_var(op.result)): dg.add_node(op.result) for v in livevars: if self.consider_var(v): dg.add_edge(v, op.result) livevars.add(op.result) self._depgraph = dg def coalesce_variables(self): self._unionfind = UnionFind() pendingblocks = list(self.graph.iterblocks()) while pendingblocks: block = pendingblocks.pop() # Aggressively try to coalesce each source variable with its # target. We start from the end of the graph instead of # from the beginning. This is a bit arbitrary, but the idea # is that the end of the graph runs typically more often # than the start, given that we resume execution from the # middle during blackholing. for link in block.exits: if link.last_exception is not None: self._depgraph.add_node(link.last_exception) if link.last_exc_value is not None: self._depgraph.add_node(link.last_exc_value) for i, v in enumerate(link.args): self._try_coalesce(v, link.target.inputargs[i]) def _try_coalesce(self, v, w): if isinstance(v, Variable) and self.consider_var(v) \ and self.consider_var(w): dg = self._depgraph uf = self._unionfind v0 = uf.find_rep(v) w0 = uf.find_rep(w) if v0 is not w0 and v0 not in dg.neighbours[w0]: _, rep, _ = uf.union(v0, w0) assert uf.find_rep(v0) is uf.find_rep(w0) is rep if rep is v0: dg.coalesce(w0, v0) else: assert rep is w0 dg.coalesce(v0, w0) def find_node_coloring(self): self._coloring = self._depgraph.find_node_coloring() if self.DEBUG_REGALLOC: for block in self.graph.iterblocks(): print block for v in block.getvariables(): print '\t', v, '\t', self.getcolor(v) def find_num_colors(self): if self._coloring: numcolors = max(self._coloring.values()) + 1 else: numcolors = 0 self.numcolors = numcolors def getcolor(self, v): return self._coloring[self._unionfind.find_rep(v)] def checkcolor(self, v, color): try: return self.getcolor(v) == color except KeyError: return False def swapcolors(self, col1, col2): for key, value in self._coloring.items(): if value == col1: self._coloring[key] = col2 elif value == col2: self._coloring[key] = col1
def move_pushes_earlier(graph, regalloc): """gc_push_roots and gc_pop_roots are pushes/pops to the shadowstack, immediately enclosing the operation that needs them (typically a call). Here, we try to move individual pushes earlier. Should run after expand_push_roots(), but before expand_pop_roots(), so that it sees individual 'gc_save_root' operations but bulk 'gc_pop_roots' operations. """ # Concrete example (assembler tested on x86-64 gcc 5.3 and clang 3.7): # # ----original---- ----move_pushes_earlier---- # # while (a > 10) { *foo = b; # *foo = b; while (a > 10) { # a = g(a); a = g(a); # b = *foo; b = *foo; # // *foo = b; # } } # return b; return b; # # => the store and the => the store is before, and gcc/clang # load are in the loop, moves the load after the loop # even in the assembler (the commented-out '*foo=b' is removed # here, but gcc/clang would also remove it) # Draft of the algorithm: see shadowcolor.txt if not regalloc: return entrymap = mkentrymap(graph) assert len(entrymap[graph.startblock]) == 1 inputvars = {} # {inputvar: (its block, its index in inputargs)} for block in graph.iterblocks(): for i, v in enumerate(block.inputargs): inputvars[v] = (block, i) Plist = [] for index in range(regalloc.numcolors): U = UnionFind() S = set() for block in graph.iterblocks(): for op in reversed(block.operations): if op.opname == 'gc_pop_roots': break else: continue # no gc_pop_roots in this block for v in op.args: if isinstance(v, Variable) and regalloc.checkcolor(v, index): break else: continue # no variable goes into index i succ = set() pending_succ = [(block, v)] while pending_succ: block1, v1 = pending_succ.pop() assert regalloc.checkcolor(v1, index) for op1 in block1.operations: if is_trivial_rewrite(op1) and op1.args[0] is v1: if regalloc.checkcolor(op1.result, index): pending_succ.append((block1, op1.result)) for link1 in block1.exits: for i2, v2 in enumerate(link1.args): if v2 is not v1: continue block2 = link1.target w2 = block2.inputargs[i2] if w2 in succ or not regalloc.checkcolor(w2, index): continue succ.add(w2) for op2 in block2.operations: if op2.opname in ('gc_save_root', 'gc_pop_roots'): break else: pending_succ.append((block2, w2)) U.union_list(list(succ)) S.update(succ) G = defaultdict(set) for block in graph.iterblocks(): found = False for opindex, op in enumerate(block.operations): if op.opname == 'gc_save_root': if (isinstance(op.args[1], Constant) and op.args[1].concretetype == lltype.Signed): break elif op.args[0].value == index: found = True break if not found or not isinstance(op.args[1], Variable): continue # no matching gc_save_root in this block key = (block, op) pred = set() pending_pred = [(block, op.args[1], opindex)] while pending_pred: block1, v1, opindex1 = pending_pred.pop() assert regalloc.getcolor(v1) == index for i in range(opindex1-1, -1, -1): op1 = block1.operations[i] if op1.opname == 'gc_pop_roots': break # stop if op1.result is v1: if not is_trivial_rewrite(op1): break # stop if not regalloc.checkcolor(op1.args[0], index): break # stop v1 = op1.args[0] else: varindex = block1.inputargs.index(v1) if v1 in pred: continue # already done pred.add(v1) for link1 in entrymap[block1]: prevblock1 = link1.prevblock if prevblock1 is not None: w1 = link1.args[varindex] if isinstance(w1, Variable) and w1 not in pred: if regalloc.checkcolor(w1, index): pending_pred.append((prevblock1, w1, len(prevblock1.operations))) U.union_list(list(pred)) for v1 in pred: G[v1].add(key) M = S.intersection(G) parts_target = {} for v in M: vp = U.find_rep(v) if vp not in parts_target: new_part = (index, set(), set()) # (index, # subset P of variables, # set of (block, gc_save_root)) Plist.append(new_part) parts_target[vp] = new_part part = parts_target[vp] part[1].add(v) part[2].update(G[v]) # Sort P so that it prefers places that would avoid multiple # gcsaveroots (smaller 'heuristic' result, so first in sorted # order); but also prefers smaller overall pieces, because it # might be possible to remove several small-scale pieces instead # of one big-scale one. def heuristic((index, P, gcsaveroots)): return float(len(P)) / len(gcsaveroots) Plist.sort(key=heuristic) variables_along_changes = {} live_at_start_of_block = set() # set of (block, index) insert_gc_push_root = defaultdict(list) for index, P, gcsaveroots in Plist: # if this Plist entry is not valid any more because of changes # done by the previous entries, drop it if any((inputvars[v][0], index) in live_at_start_of_block for v in P): continue if any(op not in block.operations for block, op in gcsaveroots): continue for v in P: assert regalloc.getcolor(v) == index assert v not in variables_along_changes success_count = 0 mark = [] for v in P: block, varindex = inputvars[v] for link in entrymap[block]: w = link.args[varindex] if link.prevblock is not None: prevoperations = link.prevblock.operations else: prevoperations = [] for op in reversed(prevoperations): if op.opname == 'gc_pop_roots': # it is possible to have gc_pop_roots() without # w in the args, if w is the result of the call # that comes just before. if (isinstance(w, Variable) and w in op.args and regalloc.checkcolor(w, index)): success_count += 1 else: mark.append((index, link, varindex)) break if op.result is w: if is_trivial_rewrite(op) and ( regalloc.checkcolor(op.args[0], index)): w = op.args[0] else: mark.append((index, link, varindex)) break else: if not isinstance(w, Variable) or w not in P: mark.append((index, link, varindex)) if success_count > 0: for block, op in gcsaveroots: newops = list(block.operations) newops.remove(op) block.operations = newops for index, link, varindex in mark: insert_gc_push_root[link].append((index, link.args[varindex])) for v in P: block, varindex = inputvars[v] variables_along_changes[v] = block, index live_at_start_of_block.add((block, index)) for link in insert_gc_push_root: newops = [_gc_save_root(index, v) for index, v in sorted(insert_gc_push_root[link])] insert_empty_block(link, newops=newops)