示例#1
0
def find_associativity():
    FUNC_CODE = """
    int go(unsigned array_size, unsigned stride, unsigned steps)
    {
      char *ary = (char *) malloc(sizeof(int) * array_size);

      unsigned p = 0;
      for (unsigned i = 0; i < steps; ++i)
      {
        ary[p] ++;
        p += stride;
        if (p >= array_size)
          p = 0;
      }

      int result = 0;
      for (unsigned i = 0; i < array_size; ++i)
          result += ary[i];

      free(ary);
      return result;
    }
    """
    from codepy.jit import extension_from_string
    cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE)

    result = {}

    steps = 2**20
    from pytools import ProgressBar
    meg_range = range(1, 25)
    stride_range = range(1, 640)
    pb = ProgressBar("bench", len(meg_range)*len(stride_range))
    for array_megs in meg_range:
        for stride in stride_range:
            start = time()
            cmod.go(array_megs<<20, stride, steps)
            stop = time()

            elapsed = stop-start
            gb_transferred = 2*steps/1e9 # 2 for rw, 4 for sizeof(int)
            bandwidth = gb_transferred/elapsed

            result[array_megs, stride] = bandwidth
            pb.progress()

    from cPickle import dump
    dump(result, open("assoc_result.dat", "w"))

    open("assoc.c", "w").write(FUNC_CODE)
示例#2
0
def build_matrix(op, dtype=None, shape=None):
    dtype = dtype or op.dtype
    from pytools import ProgressBar
    shape = shape or op.shape
    rows, cols = shape
    pb = ProgressBar("matrix", cols)
    mat = np.zeros(shape, dtype)

    try:
        matvec_method = op.matvec
    except AttributeError:
        matvec_method = op.__call__

    for i in range(cols):
        unit_vec = np.zeros(cols, dtype=dtype)
        unit_vec[i] = 1
        mat[:, i] = matvec_method(unit_vec)
        pb.progress()

    pb.finished()

    return mat
示例#3
0
def dump_couch_to_sqlite(couch_db, outfile, scan_max=None):
    import sqlite3 as sqlite

    # {{{ scan for types
    column_type_dict = {}

    from pytools import ProgressBar

    pb = ProgressBar("scan (pass 1/2)", len(couch_db))
    scan_count = 0
    for doc in generate_all_docs(couch_db):
        if "type" in doc and doc["type"] == "job":
            for k, v in doc.iteritems():
                new_type = type(v)
                if k in column_type_dict and column_type_dict[k] != new_type and v is not None:
                    old_type = column_type_dict[k]
                    if set([old_type, new_type]) == set([float, int]):
                        new_type = float
                    else:
                        raise RuntimeError("ambiguous types for '%s': %s, %s" % (k, new_type, old_type))
                column_type_dict[k] = new_type

            scan_count += 1
            if scan_max is not None and scan_count >= scan_max:
                break
        pb.progress()

    pb.finished()
    # }}}

    del column_type_dict["type"]
    column_types = []

    for name, tp in column_type_dict.iteritems():
        column_types.append((name, tp))

    def get_sql_type(tp):
        if tp in (str, unicode):
            return "text"
        elif issubclass(tp, list):
            return "text"
        elif issubclass(tp, int):
            return "integer"
        elif issubclass(tp, (float, numpy.floating)):
            return "real"
        else:
            raise TypeError("No SQL type for %s" % tp)

    create_stmt = "create table data (%s)" % ",".join("%s %s" % (name, get_sql_type(tp)) for name, tp in column_types)
    db_conn = sqlite.connect(outfile, timeout=30)
    db_conn.execute(create_stmt)
    db_conn.commit()

    insert_stmt = "insert into data values (%s)" % (",".join(["?"] * len(column_types)))

    pb = ProgressBar("fill (pass 2/2)", len(couch_db))
    for doc in generate_all_docs(couch_db):
        data = [None] * len(column_types)
        for i, (col_name, col_tp) in enumerate(column_types):
            if "type" in doc and doc["type"] == "job":
                try:
                    if isinstance(doc[col_name], list):
                        data[i] = str(doc[col_name])
                    else:
                        data[i] = doc[col_name]
                except KeyError:
                    print "doc %s had no field %s" % (doc["_id"], col_name)

        db_conn.execute(insert_stmt, data)
        pb.progress()

    pb.finished()

    db_conn.commit()
    db_conn.close()
示例#4
0
def test_fmm_completeness(
    ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind
):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array " "generation")

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=13)
    if sources_have_extent:
        source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)

    tree, _ = tb(
        queue,
        sources,
        targets=targets,
        max_particles_in_box=30,
        source_radii=source_radii,
        target_radii=target_radii,
        debug=True,
    )
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt

        pt.show()

    from boxtree.traversal import FMMTraversalBuilder

    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    # weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order

            filtered_targets = filter_target_lists_in_user_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order

            filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter

            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs]
            tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv")
            pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt

        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt

        filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [filt_targets[0][bad], filt_targets[1][bad]]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
示例#5
0
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5,
        log_filename=None):
    plans = [p for p in plan_generator() if p.invalid_reason() is None]

    debug = "cuda_%s_plan" % opt_name in debug_flags
    show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug

    if "cuda_plan_log" not in debug_flags:
        log_filename = None

    if not plans:
        raise RuntimeError, "no valid CUDA execution plans found"

    if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags:
        from pytools import argmax2
        return argmax2((plan, plan.occupancy_record().occupancy)
                for plan in plans), 0

    max_occup = max(plan.occupancy_record().occupancy for plan in plans)
    desired_occup = occupancy_slack*max_occup

    if log_filename is not None:
        from pytools import single_valued
        feature_columns = single_valued(p.feature_columns() for p in plans)
        feature_names = [fc.split()[0] for fc in feature_columns]

        try:
            import sqlite3 as sqlite
        except ImportError:
            from pysqlite2 import dbapi2 as sqlite

        db_conn = sqlite.connect("plan-%s.dat" % log_filename)

        try:
            db_conn.execute("""
                  create table data (
                    id integer primary key autoincrement,
                    %s,
                    value real)"""
                    % ", ".join(feature_columns))
        except sqlite.OperationalError:
            pass

    if show_progress:
        from pytools import ProgressBar
        pbar = ProgressBar("plan "+opt_name, len(plans))
    try:
        plan_values = []
        for p in plans:
            if show_progress:
                pbar.progress()

            if p.occupancy_record().occupancy >= desired_occup - 1e-10:
                if debug:
                    print "<---- trying %s:" % p

                value = target_func(p)
                if isinstance(value, tuple):
                    extra_info = value[1:]
                    value = value[0]
                else:
                    extra_info = None

                if value is not None:
                    if debug:
                        print "----> yielded %g" % (value)
                    plan_values.append(((len(plan_values), p), value))

                    if log_filename is not None:
                        db_conn.execute(
                                "insert into data (%s,value) values (%s)"
                                % (", ".join(feature_names),
                                    ",".join(["?"]*(1+len(feature_names)))),
                                p.features(*extra_info)+(value,))
    finally:
        if show_progress:
            pbar.finished()

    if log_filename is not None:
        db_conn.commit()

    from pytools import argmax2, argmin2
    if maximize:
        num_plan, plan = argmax2(plan_values)
    else:
        num_plan, plan = argmin2(plan_values)

    plan_value = plan_values[num_plan][1]

    if debug:
        print "----------------------------------------------"
        print "chosen: %s" % plan
        print "value: %g" % plan_value
        print "----------------------------------------------"

    return plan, plan_value