示例#1
0
文件: vector.py 项目: sota/pypy-old
    def run_optimization(self, info, loop):
        self.orig_label_args = loop.label.getarglist_copy()
        self.linear_find_smallest_type(loop)
        byte_count = self.smallest_type_bytes
        vsize = self.cpu.vector_register_size
        if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL:
            # stop, there is no chance to vectorize this trace
            # we cannot optimize normal traces (if there is no label)
            raise NotAVectorizeableLoop()

        # find index guards and move to the earliest position
        graph = self.analyse_index_calculations(loop)
        if graph is not None:
            state = SchedulerState(graph)
            self.schedule(state) # reorder the trace

        # unroll
        self.unroll_count = self.get_unroll_count(vsize)
        self.unroll_loop_iterations(loop, self.unroll_count)

        # vectorize
        graph = DependencyGraph(loop)
        self.find_adjacent_memory_refs(graph)
        self.extend_packset()
        self.combine_packset()
        # TODO move cost model to CPU
        costmodel = X86_CostModel(self.cpu, self.cost_threshold)
        state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
        self.schedule(state)
        if not state.profitable():
            raise NotAProfitableLoop()
        return graph.index_vars
示例#2
0
    def vectoroptimizer_unrolled(self, loop, unroll_factor=-1):
        opt = self.vectoroptimizer(loop)
        opt.linear_find_smallest_type(loop)
        loop.setup_vectorization()
        if unroll_factor == -1 and opt.smallest_type_bytes == 0:
            raise NotAVectorizeableLoop()
        if unroll_factor == -1:
            unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
            print ""
            print "unroll factor: ", unroll_factor, opt.smallest_type_bytes
        self.show_dot_graph(DependencyGraph(loop),
                            "original_" + self.test_name)
        graph = opt.analyse_index_calculations(loop)
        if graph is not None:
            cycle = graph.cycles()
            if cycle is not None:
                print "CYCLE found %s" % cycle
            self.show_dot_graph(graph, "early_exit_" + self.test_name)
            assert cycle is None
            state = SchedulerState(graph)
            opt.schedule(state)
        opt.unroll_loop_iterations(loop, unroll_factor)
        self.debug_print_operations(loop)
        graph = DependencyGraph(loop)
        self.last_graph = graph  # legacy for test_dependency
        self.show_dot_graph(graph, self.test_name)

        def gmr(i):
            return graph.memory_refs[graph.nodes[i]]

        graph.getmemref = gmr
        return opt, graph
示例#3
0
    def vectorize(self, loop, unroll_factor=-1):
        info = FakeLoopInfo(loop)
        info.snapshot(loop)
        opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
        opt.find_adjacent_memory_refs(graph)
        opt.extend_packset()
        opt.combine_packset()
        costmodel = GenericCostModel(self.cpu, 0)
        state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
        opt.schedule(state)
        if not costmodel.profitable():
            raise NotAProfitableLoop()
        gso = GuardStrengthenOpt(graph.index_vars)
        gso.propagate_all_forward(info, loop)
        #
        # re-schedule
        graph = DependencyGraph(loop)
        state = SchedulerState(self.cpu, graph)
        state.prepare()
        Scheduler().walk_and_emit(state)
        state.post_schedule()
        #
        oplist = loop.operations

        loop.operations = loop.prefix[:]
        if loop.prefix_label:
            loop.operations += [loop.prefix_label]
        loop.operations += oplist
        return opt
示例#4
0
文件: vector.py 项目: Mu-L/pypy
    def run_optimization(self, metainterp_sd, info, loop, jitcell_token,
                         user_code):
        self.orig_label_args = loop.label.getarglist_copy()
        self.linear_find_smallest_type(loop)
        byte_count = self.smallest_type_bytes
        vsize = self.vector_ext.vec_size()
        # stop, there is no chance to vectorize this trace
        # we cannot optimize normal traces (if there is no label)
        if vsize == 0:
            debug_print("vector size is zero")
            raise NotAVectorizeableLoop
        if byte_count == 0:
            debug_print("could not find smallest type")
            raise NotAVectorizeableLoop
        if loop.label.getopnum() != rop.LABEL:
            debug_print("not a loop, can only vectorize loops")
            raise NotAVectorizeableLoop
        # find index guards and move to the earliest position
        graph = self.analyse_index_calculations(loop)
        if graph is not None:
            state = SchedulerState(metainterp_sd.cpu, graph)
            self.schedule(state)  # reorder the trace

        # unroll
        self.unroll_count = self.get_unroll_count(vsize)
        align_unroll = self.unroll_count==1 and \
                       self.vector_ext.should_align_unroll
        self.unroll_loop_iterations(loop,
                                    self.unroll_count,
                                    align_unroll_once=align_unroll)

        # vectorize
        graph = DependencyGraph(loop)
        self.find_adjacent_memory_refs(graph)
        self.extend_packset()
        self.combine_packset()
        costmodel = GenericCostModel(self.cpu, self.cost_threshold)
        state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
        self.schedule(state)
        if not state.profitable():
            raise NotAProfitableLoop
        gso = GuardStrengthenOpt(graph.index_vars)
        gso.propagate_all_forward(info, loop, user_code)

        # re-schedule the trace -> removes many pure operations
        graph = DependencyGraph(loop)
        state = SchedulerState(self.cpu, graph)
        state.schedule()

        info.extra_before_label = loop.align_operations
        for op in loop.align_operations:
            op.set_forwarded(None)

        return loop.finaloplist(jitcell_token=jitcell_token,
                                reset_label_token=False)
示例#5
0
文件: vector.py 项目: mozillazg/pypy
    def run_optimization(self, metainterp_sd, info, loop, jitcell_token, user_code):
        self.orig_label_args = loop.label.getarglist_copy()
        self.linear_find_smallest_type(loop)
        byte_count = self.smallest_type_bytes
        vsize = self.vector_ext.vec_size()
        # stop, there is no chance to vectorize this trace
            # we cannot optimize normal traces (if there is no label)
        if vsize == 0:
            debug_print("vector size is zero")
            raise NotAVectorizeableLoop
        if byte_count == 0:
            debug_print("could not find smallest type")
            raise NotAVectorizeableLoop
        if loop.label.getopnum() != rop.LABEL:
            debug_print("not a loop, can only vectorize loops")
            raise NotAVectorizeableLoop
        # find index guards and move to the earliest position
        graph = self.analyse_index_calculations(loop)
        if graph is not None:
            state = SchedulerState(metainterp_sd.cpu, graph)
            self.schedule(state) # reorder the trace

        # unroll
        self.unroll_count = self.get_unroll_count(vsize)
        align_unroll = self.unroll_count==1 and \
                       self.vector_ext.should_align_unroll
        self.unroll_loop_iterations(loop, self.unroll_count,
                                    align_unroll_once=align_unroll)

        # vectorize
        graph = DependencyGraph(loop)
        self.find_adjacent_memory_refs(graph)
        self.extend_packset()
        self.combine_packset()
        costmodel = GenericCostModel(self.cpu, self.cost_threshold)
        state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
        self.schedule(state)
        if not state.profitable():
            raise NotAProfitableLoop
        gso = GuardStrengthenOpt(graph.index_vars)
        gso.propagate_all_forward(info, loop, user_code)

        # re-schedule the trace -> removes many pure operations
        graph = DependencyGraph(loop)
        state = SchedulerState(self.cpu, graph)
        state.schedule()

        info.extra_before_label = loop.align_operations
        for op in loop.align_operations:
            op.set_forwarded(None)

        return loop.finaloplist(jitcell_token=jitcell_token, reset_label_token=False)
示例#6
0
 def schedule(self, loop, unroll_factor=-1, with_guard_opt=False):
     info = FakeLoopInfo(loop)
     info.snapshot(loop)
     opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
     opt.find_adjacent_memory_refs(graph)
     opt.extend_packset()
     opt.combine_packset()
     costmodel = FakeCostModel(self.cpu)
     state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
     opt.schedule(state)
     if with_guard_opt:
         gso = GuardStrengthenOpt(graph.index_vars)
         gso.propagate_all_forward(info, loop)
     # re-schedule
     graph = DependencyGraph(loop)
     state = SchedulerState(self.cpu, graph)
     state.prepare()
     Scheduler().walk_and_emit(state)
     state.post_schedule()
     return opt
示例#7
0
 def earlyexit(self, loop):
     opt = self.vectoroptimizer(loop)
     graph = opt.analyse_index_calculations(loop)
     state = SchedulerState(graph)
     opt.schedule(state)
     return graph.loop