def generate_loop_schedules(kernel, debug_args={}): from loopy.kernel import kernel_state if kernel.state != kernel_state.PREPROCESSED: raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) schedule_count = 0 debug = ScheduleDebugger(**debug_args) from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag ilp_inames = set(iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), IlpBaseTag)) vec_inames = set(iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag)) parallel_inames = set( iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), ParallelTag) ) loop_nest_with_map = find_loop_nest_with_map(kernel) loop_nest_around_map = find_loop_nest_around_map(kernel) sched_state = SchedulerState( kernel=kernel, loop_nest_around_map=loop_nest_around_map, loop_insn_dep_map=find_loop_insn_dep_map( kernel, loop_nest_with_map=loop_nest_with_map, loop_nest_around_map=loop_nest_around_map ), breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, # time-varying part active_inames=(), entered_inames=frozenset(), schedule=(), unscheduled_insn_ids=set(insn.id for insn in kernel.instructions), scheduled_insn_ids=frozenset(), # ilp and vec are not parallel for the purposes of the scheduler parallel_inames=parallel_inames - ilp_inames - vec_inames, group_insn_counts=group_insn_counts(kernel), active_group_counts={}, ) generators = [ generate_loop_schedules_internal(sched_state, debug=debug, allow_boost=None), generate_loop_schedules_internal(sched_state, debug=debug), ] def print_longest_dead_end(): if debug.interactive: print("Loo.py will now show you the scheduler state at the point") print("where the longest (dead-end) schedule was generated, in the") print("the hope that some of this makes sense and helps you find") print("the issue.") print() print("To disable this interactive behavior, pass") print(" debug_args=dict(interactive=False)") print("to generate_loop_schedules().") print(75 * "-") six.moves.input("Enter:") print() print() debug.debug_length = len(debug.longest_rejected_schedule) while True: try: for _ in generate_loop_schedules_internal(sched_state, debug=debug): pass except ScheduleDebugInput as e: debug.debug_length = int(str(e)) continue break try: for gen in generators: for gen_sched in gen: # gen_sched = insert_barriers(kernel, gen_sched, # reverse=False, kind="global") # for sched_item in gen_sched: # if ( # isinstance(sched_item, Barrier) # and sched_item.kind == "global"): # raise LoopyError("kernel requires a global barrier %s" # % sched_item.comment) debug.stop() logger.info("%s: barrier insertion: start" % kernel.name) gen_sched = insert_barriers(kernel, gen_sched, reverse=False, kind="local") logger.info("%s: barrier insertion: done" % kernel.name) yield kernel.copy(schedule=gen_sched, state=kernel_state.SCHEDULED) debug.start() schedule_count += 1 # if no-boost mode yielded a viable schedule, stop now if schedule_count: break except KeyboardInterrupt: print() print(75 * "-") print("Interrupted during scheduling") print(75 * "-") print_longest_dead_end() raise debug.done_scheduling() if not schedule_count: print(75 * "-") print("ERROR: Sorry--loo.py did not find a schedule for your kernel.") print(75 * "-") print_longest_dead_end() raise RuntimeError("no valid schedules found") logger.info("%s: schedule done" % kernel.name)
def generate_loop_schedules(kernel, debug_args={}): from loopy.kernel import kernel_state if kernel.state != kernel_state.PREPROCESSED: raise LoopyError("cannot schedule a kernel that has not been " "preprocessed") loop_priority = kernel.loop_priority from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) schedule_count = 0 debug = ScheduleDebugger(**debug_args) from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag ilp_inames = set( iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), IlpBaseTag)) vec_inames = set( iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag)) parallel_inames = set( iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)) sched_state = SchedulerState( kernel=kernel, loop_nest_map=loop_nest_map(kernel), breakable_inames=ilp_inames, ilp_inames=ilp_inames, vec_inames=vec_inames, # ilp and vec are not parallel for the purposes of the scheduler parallel_inames=parallel_inames - ilp_inames - vec_inames) generators = [ generate_loop_schedules_internal(sched_state, loop_priority, debug=debug, allow_boost=None), generate_loop_schedules_internal(sched_state, loop_priority, debug=debug)] for gen in generators: for gen_sched in gen: # gen_sched = insert_barriers(kernel, gen_sched, # reverse=False, kind="global") # for sched_item in gen_sched: # if isinstance(sched_item, Barrier) and sched_item.kind == "global": # raise LoopyError("kernel requires a global barrier %s" # % sched_item.comment) gen_sched = insert_barriers(kernel, gen_sched, reverse=False, kind="local") debug.stop() yield kernel.copy( schedule=gen_sched, state=kernel_state.SCHEDULED) debug.start() schedule_count += 1 # if no-boost mode yielded a viable schedule, stop now if schedule_count: break debug.done_scheduling() if not schedule_count: if debug.interactive: print(75*"-") print("ERROR: Sorry--loo.py did not find a schedule for your kernel.") print(75*"-") print("Loo.py will now show you the scheduler state at the point") print("where the longest (dead-end) schedule was generated, in the") print("the hope that some of this makes sense and helps you find") print("the issue.") print() print("To disable this interactive behavior, pass") print(" debug_args=dict(interactive=False)") print("to generate_loop_schedules().") print(75*"-") six.moves.input("Enter:") print() print() debug.debug_length = len(debug.longest_rejected_schedule) for _ in generate_loop_schedules_internal(sched_state, loop_priority, debug=debug): pass raise RuntimeError("no valid schedules found") logger.info("%s: schedule done" % kernel.name)