Exemplo n.º 1
0
def generate_loop_schedules(kernel, debug_args={}):
    from loopy.kernel import kernel_state

    if kernel.state != kernel_state.PREPROCESSED:
        raise LoopyError("cannot schedule a kernel that has not been " "preprocessed")

    from loopy.check import pre_schedule_checks

    pre_schedule_checks(kernel)

    schedule_count = 0

    debug = ScheduleDebugger(**debug_args)

    from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag

    ilp_inames = set(iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), IlpBaseTag))
    vec_inames = set(iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag))
    parallel_inames = set(
        iname for iname in kernel.all_inames() if isinstance(kernel.iname_to_tag.get(iname), ParallelTag)
    )

    loop_nest_with_map = find_loop_nest_with_map(kernel)
    loop_nest_around_map = find_loop_nest_around_map(kernel)
    sched_state = SchedulerState(
        kernel=kernel,
        loop_nest_around_map=loop_nest_around_map,
        loop_insn_dep_map=find_loop_insn_dep_map(
            kernel, loop_nest_with_map=loop_nest_with_map, loop_nest_around_map=loop_nest_around_map
        ),
        breakable_inames=ilp_inames,
        ilp_inames=ilp_inames,
        vec_inames=vec_inames,
        # time-varying part
        active_inames=(),
        entered_inames=frozenset(),
        schedule=(),
        unscheduled_insn_ids=set(insn.id for insn in kernel.instructions),
        scheduled_insn_ids=frozenset(),
        # ilp and vec are not parallel for the purposes of the scheduler
        parallel_inames=parallel_inames - ilp_inames - vec_inames,
        group_insn_counts=group_insn_counts(kernel),
        active_group_counts={},
    )

    generators = [
        generate_loop_schedules_internal(sched_state, debug=debug, allow_boost=None),
        generate_loop_schedules_internal(sched_state, debug=debug),
    ]

    def print_longest_dead_end():
        if debug.interactive:
            print("Loo.py will now show you the scheduler state at the point")
            print("where the longest (dead-end) schedule was generated, in the")
            print("the hope that some of this makes sense and helps you find")
            print("the issue.")
            print()
            print("To disable this interactive behavior, pass")
            print("  debug_args=dict(interactive=False)")
            print("to generate_loop_schedules().")
            print(75 * "-")
            six.moves.input("Enter:")
            print()
            print()

            debug.debug_length = len(debug.longest_rejected_schedule)
            while True:
                try:
                    for _ in generate_loop_schedules_internal(sched_state, debug=debug):
                        pass

                except ScheduleDebugInput as e:
                    debug.debug_length = int(str(e))
                    continue

                break

    try:
        for gen in generators:
            for gen_sched in gen:
                # gen_sched = insert_barriers(kernel, gen_sched,
                #         reverse=False, kind="global")

                # for sched_item in gen_sched:
                #     if (
                #             isinstance(sched_item, Barrier)
                #             and sched_item.kind == "global"):
                #         raise LoopyError("kernel requires a global barrier %s"
                #                 % sched_item.comment)

                debug.stop()

                logger.info("%s: barrier insertion: start" % kernel.name)

                gen_sched = insert_barriers(kernel, gen_sched, reverse=False, kind="local")

                logger.info("%s: barrier insertion: done" % kernel.name)

                yield kernel.copy(schedule=gen_sched, state=kernel_state.SCHEDULED)
                debug.start()

                schedule_count += 1

            # if no-boost mode yielded a viable schedule, stop now
            if schedule_count:
                break

    except KeyboardInterrupt:
        print()
        print(75 * "-")
        print("Interrupted during scheduling")
        print(75 * "-")
        print_longest_dead_end()
        raise

    debug.done_scheduling()
    if not schedule_count:
        print(75 * "-")
        print("ERROR: Sorry--loo.py did not find a schedule for your kernel.")
        print(75 * "-")
        print_longest_dead_end()
        raise RuntimeError("no valid schedules found")

    logger.info("%s: schedule done" % kernel.name)
Exemplo n.º 2
0
def generate_loop_schedules(kernel, debug_args={}):
    from loopy.kernel import kernel_state
    if kernel.state != kernel_state.PREPROCESSED:
        raise LoopyError("cannot schedule a kernel that has not been "
                "preprocessed")

    loop_priority = kernel.loop_priority

    from loopy.check import pre_schedule_checks
    pre_schedule_checks(kernel)

    schedule_count = 0

    debug = ScheduleDebugger(**debug_args)

    from loopy.kernel.data import IlpBaseTag, ParallelTag, VectorizeTag
    ilp_inames = set(
            iname
            for iname in kernel.all_inames()
            if isinstance(kernel.iname_to_tag.get(iname), IlpBaseTag))
    vec_inames = set(
            iname
            for iname in kernel.all_inames()
            if isinstance(kernel.iname_to_tag.get(iname), VectorizeTag))
    parallel_inames = set(
            iname for iname in kernel.all_inames()
            if isinstance(kernel.iname_to_tag.get(iname), ParallelTag))

    sched_state = SchedulerState(
            kernel=kernel,
            loop_nest_map=loop_nest_map(kernel),
            breakable_inames=ilp_inames,
            ilp_inames=ilp_inames,
            vec_inames=vec_inames,
            # ilp and vec are not parallel for the purposes of the scheduler
            parallel_inames=parallel_inames - ilp_inames - vec_inames)

    generators = [
            generate_loop_schedules_internal(sched_state, loop_priority,
                debug=debug, allow_boost=None),
            generate_loop_schedules_internal(sched_state, loop_priority,
                debug=debug)]
    for gen in generators:
        for gen_sched in gen:
            # gen_sched = insert_barriers(kernel, gen_sched,
            #         reverse=False, kind="global")

            # for sched_item in gen_sched:
            #     if isinstance(sched_item, Barrier) and sched_item.kind == "global":
            #         raise LoopyError("kernel requires a global barrier %s"
            #                 % sched_item.comment)

            gen_sched = insert_barriers(kernel, gen_sched,
                    reverse=False, kind="local")

            debug.stop()
            yield kernel.copy(
                    schedule=gen_sched,
                    state=kernel_state.SCHEDULED)
            debug.start()

            schedule_count += 1

        # if no-boost mode yielded a viable schedule, stop now
        if schedule_count:
            break

    debug.done_scheduling()

    if not schedule_count:
        if debug.interactive:
            print(75*"-")
            print("ERROR: Sorry--loo.py did not find a schedule for your kernel.")
            print(75*"-")
            print("Loo.py will now show you the scheduler state at the point")
            print("where the longest (dead-end) schedule was generated, in the")
            print("the hope that some of this makes sense and helps you find")
            print("the issue.")
            print()
            print("To disable this interactive behavior, pass")
            print("  debug_args=dict(interactive=False)")
            print("to generate_loop_schedules().")
            print(75*"-")
            six.moves.input("Enter:")
            print()
            print()

            debug.debug_length = len(debug.longest_rejected_schedule)
            for _ in generate_loop_schedules_internal(sched_state, loop_priority,
                    debug=debug):
                pass

        raise RuntimeError("no valid schedules found")

    logger.info("%s: schedule done" % kernel.name)