def inline_malloc_removal_phase(config, translator, graphs, inline_threshold, inline_heuristic, call_count_pred=None): type_system = translator.rtyper.type_system.name # inline functions in each other if inline_threshold: log.inlining("phase with threshold factor: %s" % inline_threshold) log.inlining("heuristic: %s.%s" % (inline_heuristic.__module__, inline_heuristic.__name__)) inline.auto_inline_graphs(translator, graphs, inline_threshold, heuristic=inline_heuristic, call_count_pred=call_count_pred) if config.print_statistics: print "after inlining:" print_statistics(translator.graphs[0], translator) # vaporize mallocs if config.mallocs: log.malloc("starting malloc removal") remove_mallocs(translator, graphs, type_system) if config.print_statistics: print "after malloc removal:" print_statistics(translator.graphs[0], translator)
def instrument_inline_candidates(graphs, threshold): cache = {None: False} def candidate(graph): try: return cache[graph] except KeyError: res = static_instruction_count(graph) <= threshold cache[graph] = res return res n = 0 for parentgraph in graphs: for block in parentgraph.iterblocks(): ops = block.operations i = len(ops)-1 while i >= 0: op = ops[i] i -= 1 if op.opname == "direct_call": funcobj = get_funcobj(op.args[0].value) graph = getattr(funcobj, 'graph', None) if graph is not None: if getattr(getattr(funcobj, '_callable', None), '_dont_inline_', False): continue if candidate(graph): tag = Constant('inline', Void) label = Constant(n, Signed) dummy = Variable() dummy.concretetype = Void count = SpaceOperation('instrument_count', [tag, label], dummy) ops.insert(i+1, count) n += 1 log.inlining("%d call sites instrumented" % n)
def auto_inline_graphs(translator, graphs, threshold, call_count_pred=None, heuristic=inlining_heuristic): callgraph = inlinable_static_callers(graphs) count = auto_inlining(translator, threshold, callgraph=callgraph, heuristic=heuristic, call_count_pred=call_count_pred) log.inlining('inlined %d callsites.'% (count,)) for graph in graphs: removenoops.remove_duplicate_casts(graph, translator)
def auto_inline_graphs(translator, graphs, threshold, call_count_pred=None, heuristic=inlining_heuristic): callgraph = inlinable_static_callers(graphs) count = auto_inlining(translator, threshold, callgraph=callgraph, heuristic=heuristic, call_count_pred=call_count_pred) log.inlining('inlined %d callsites.'% (count,)) for graph in graphs: removenoops.remove_superfluous_keep_alive(graph) removenoops.remove_duplicate_casts(graph, translator)
def auto_inlining(translator, threshold=None, callgraph=None, call_count_pred=None, heuristic=inlining_heuristic): assert threshold is not None and threshold != 1 from heapq import heappush, heappop, heapreplace, heapify callers = {} # {graph: {graphs-that-call-it}} callees = {} # {graph: {graphs-that-it-calls}} if callgraph is None: callgraph = inlinable_static_callers(translator.graphs) for graph1, graph2 in callgraph: callers.setdefault(graph2, {})[graph1] = True callees.setdefault(graph1, {})[graph2] = True # the -len(callers) change is OK heap = [(0.0, -len(callers[graph]), graph) for graph in callers] valid_weight = {} try_again = {} lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() raise_analyzer = RaiseAnalyzer(translator) count = 0 while heap: weight, _, graph = heap[0] if not valid_weight.get(graph): weight, fixed = heuristic(graph) #print ' + cost %7.2f %50s' % (weight, graph.name) heapreplace(heap, (weight, -len(callers[graph]), graph)) valid_weight[graph] = True if not fixed: try_again[graph] = True continue if weight >= threshold: # finished... unless some graphs not in valid_weight would now # have a weight below the threshold. Re-insert such graphs # at the start of the heap finished = True for i in range(len(heap)): graph = heap[i][2] if not valid_weight.get(graph): heap[i] = (0.0, heap[i][1], graph) finished = False if finished: break else: heapify(heap) continue heappop(heap) if callers[graph]: if translator.config.translation.verbose: log.inlining('%7.2f %50s' % (weight, graph.name)) else: log.dot() for parentgraph in callers[graph]: if parentgraph == graph: continue subcount = 0 try: subcount = inline_function(translator, graph, parentgraph, lltype_to_classdef, raise_analyzer, call_count_pred) res = bool(subcount) except CannotInline: try_again[graph] = True res = CannotInline if res is True: count += subcount # the parentgraph should now contain all calls that were # done by 'graph' for graph2 in callees.get(graph, {}): callees[parentgraph][graph2] = True callers[graph2][parentgraph] = True if parentgraph in try_again: # the parentgraph was previously uninlinable, but it has # been modified. Maybe now we can inline it into further # parents? del try_again[parentgraph] heappush(heap, (0.0, -len(callers[parentgraph]), parentgraph)) valid_weight[parentgraph] = False return count
def auto_inlining(translator, threshold=None, callgraph=None, call_count_pred=None, heuristic=inlining_heuristic): assert threshold is not None and threshold != 1 to_cleanup = {} from heapq import heappush, heappop, heapreplace, heapify callers = {} # {graph: {graphs-that-call-it}} callees = {} # {graph: {graphs-that-it-calls}} if callgraph is None: callgraph = inlinable_static_callers(translator.graphs) for graph1, graph2 in callgraph: callers.setdefault(graph2, {})[graph1] = True callees.setdefault(graph1, {})[graph2] = True # the -len(callers) change is OK heap = [(0.0, -len(callers[graph]), graph) for graph in callers] valid_weight = {} try_again = {} lltype_to_classdef = translator.rtyper.lltype_to_classdef_mapping() raise_analyzer = RaiseAnalyzer(translator) count = 0 while heap: weight, _, graph = heap[0] if not valid_weight.get(graph): if hasattr(graph, 'func') and \ getattr(graph.func, '_always_inline_', None): weight, fixed = 0.0, True else: weight, fixed = heuristic(graph) #print ' + cost %7.2f %50s' % (weight, graph.name) heapreplace(heap, (weight, -len(callers[graph]), graph)) valid_weight[graph] = True if not fixed: try_again[graph] = True continue if weight >= threshold: # finished... unless some graphs not in valid_weight would now # have a weight below the threshold. Re-insert such graphs # at the start of the heap finished = True for i in range(len(heap)): graph = heap[i][2] if not valid_weight.get(graph): heap[i] = (0.0, heap[i][1], graph) finished = False if finished: break else: heapify(heap) continue heappop(heap) if callers[graph]: if translator.config.translation.verbose: log.inlining('%7.2f %50s' % (weight, graph.name)) else: log.dot() for parentgraph in callers[graph]: if parentgraph == graph: continue subcount = 0 try: subcount = inline_function(translator, graph, parentgraph, lltype_to_classdef, raise_analyzer, call_count_pred, cleanup=False) to_cleanup[parentgraph] = True res = bool(subcount) except CannotInline: try_again[graph] = True res = CannotInline if res is True: count += subcount # the parentgraph should now contain all calls that were # done by 'graph' for graph2 in callees.get(graph, {}): callees[parentgraph][graph2] = True callers[graph2][parentgraph] = True if parentgraph in try_again: # the parentgraph was previously uninlinable, but it has # been modified. Maybe now we can inline it into further # parents? del try_again[parentgraph] heappush(heap, (0.0, -len(callers[parentgraph]), parentgraph)) valid_weight[parentgraph] = False for graph in to_cleanup: cleanup_graph(graph) return count