def run_optimization(self, info, loop): self.orig_label_args = loop.label.getarglist_copy() self.linear_find_smallest_type(loop) byte_count = self.smallest_type_bytes vsize = self.cpu.vector_register_size if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL: # stop, there is no chance to vectorize this trace # we cannot optimize normal traces (if there is no label) raise NotAVectorizeableLoop() # find index guards and move to the earliest position graph = self.analyse_index_calculations(loop) if graph is not None: state = SchedulerState(graph) self.schedule(state) # reorder the trace # unroll self.unroll_count = self.get_unroll_count(vsize) self.unroll_loop_iterations(loop, self.unroll_count) # vectorize graph = DependencyGraph(loop) self.find_adjacent_memory_refs(graph) self.extend_packset() self.combine_packset() # TODO move cost model to CPU costmodel = X86_CostModel(self.cpu, self.cost_threshold) state = VecScheduleState(graph, self.packset, self.cpu, costmodel) self.schedule(state) if not state.profitable(): raise NotAProfitableLoop() return graph.index_vars
def vectoroptimizer_unrolled(self, loop, unroll_factor=-1): opt = self.vectoroptimizer(loop) opt.linear_find_smallest_type(loop) loop.setup_vectorization() if unroll_factor == -1 and opt.smallest_type_bytes == 0: raise NotAVectorizeableLoop() if unroll_factor == -1: unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE) print "" print "unroll factor: ", unroll_factor, opt.smallest_type_bytes self.show_dot_graph(DependencyGraph(loop), "original_" + self.test_name) graph = opt.analyse_index_calculations(loop) if graph is not None: cycle = graph.cycles() if cycle is not None: print "CYCLE found %s" % cycle self.show_dot_graph(graph, "early_exit_" + self.test_name) assert cycle is None state = SchedulerState(graph) opt.schedule(state) opt.unroll_loop_iterations(loop, unroll_factor) self.debug_print_operations(loop) graph = DependencyGraph(loop) self.last_graph = graph # legacy for test_dependency self.show_dot_graph(graph, self.test_name) def gmr(i): return graph.memory_refs[graph.nodes[i]] graph.getmemref = gmr return opt, graph
def vectorize(self, loop, unroll_factor=-1): info = FakeLoopInfo(loop) info.snapshot(loop) opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() costmodel = GenericCostModel(self.cpu, 0) state = VecScheduleState(graph, opt.packset, self.cpu, costmodel) opt.schedule(state) if not costmodel.profitable(): raise NotAProfitableLoop() gso = GuardStrengthenOpt(graph.index_vars) gso.propagate_all_forward(info, loop) # # re-schedule graph = DependencyGraph(loop) state = SchedulerState(self.cpu, graph) state.prepare() Scheduler().walk_and_emit(state) state.post_schedule() # oplist = loop.operations loop.operations = loop.prefix[:] if loop.prefix_label: loop.operations += [loop.prefix_label] loop.operations += oplist return opt
def run_optimization(self, metainterp_sd, info, loop, jitcell_token, user_code): self.orig_label_args = loop.label.getarglist_copy() self.linear_find_smallest_type(loop) byte_count = self.smallest_type_bytes vsize = self.vector_ext.vec_size() # stop, there is no chance to vectorize this trace # we cannot optimize normal traces (if there is no label) if vsize == 0: debug_print("vector size is zero") raise NotAVectorizeableLoop if byte_count == 0: debug_print("could not find smallest type") raise NotAVectorizeableLoop if loop.label.getopnum() != rop.LABEL: debug_print("not a loop, can only vectorize loops") raise NotAVectorizeableLoop # find index guards and move to the earliest position graph = self.analyse_index_calculations(loop) if graph is not None: state = SchedulerState(metainterp_sd.cpu, graph) self.schedule(state) # reorder the trace # unroll self.unroll_count = self.get_unroll_count(vsize) align_unroll = self.unroll_count==1 and \ self.vector_ext.should_align_unroll self.unroll_loop_iterations(loop, self.unroll_count, align_unroll_once=align_unroll) # vectorize graph = DependencyGraph(loop) self.find_adjacent_memory_refs(graph) self.extend_packset() self.combine_packset() costmodel = GenericCostModel(self.cpu, self.cost_threshold) state = VecScheduleState(graph, self.packset, self.cpu, costmodel) self.schedule(state) if not state.profitable(): raise NotAProfitableLoop gso = GuardStrengthenOpt(graph.index_vars) gso.propagate_all_forward(info, loop, user_code) # re-schedule the trace -> removes many pure operations graph = DependencyGraph(loop) state = SchedulerState(self.cpu, graph) state.schedule() info.extra_before_label = loop.align_operations for op in loop.align_operations: op.set_forwarded(None) return loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False)
def schedule(self, loop, unroll_factor=-1, with_guard_opt=False): info = FakeLoopInfo(loop) info.snapshot(loop) opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor) opt.find_adjacent_memory_refs(graph) opt.extend_packset() opt.combine_packset() costmodel = FakeCostModel(self.cpu) state = VecScheduleState(graph, opt.packset, self.cpu, costmodel) opt.schedule(state) if with_guard_opt: gso = GuardStrengthenOpt(graph.index_vars) gso.propagate_all_forward(info, loop) # re-schedule graph = DependencyGraph(loop) state = SchedulerState(self.cpu, graph) state.prepare() Scheduler().walk_and_emit(state) state.post_schedule() return opt
def earlyexit(self, loop): opt = self.vectoroptimizer(loop) graph = opt.analyse_index_calculations(loop) state = SchedulerState(graph) opt.schedule(state) return graph.loop