def view(self, dtype=None): if dtype is None: dtype = self.dtype old_itemsize = self.dtype.itemsize itemsize = np.dtype(dtype).itemsize from pytools import argmin2 min_stride_axis = argmin2( (axis, abs(stride)) for axis, stride in enumerate(self.strides)) if self.shape[min_stride_axis] * old_itemsize % itemsize != 0: raise ValueError("new type not compatible with array") new_shape = ( self.shape[:min_stride_axis] + (self.shape[min_stride_axis] * old_itemsize // itemsize,) + self.shape[min_stride_axis+1:]) new_strides = ( self.strides[:min_stride_axis] + (self.strides[min_stride_axis] * itemsize // old_itemsize,) + self.strides[min_stride_axis+1:]) return GPUArray( shape=new_shape, dtype=dtype, allocator=self.allocator, strides=new_strides, base=self, gpudata=int(self.gpudata))
def pick_faster_func(benchmark, choices, attempts=3): from pytools import argmin2 return argmin2((f, min(benchmark(f) for i in range(attempts))) for f in choices)
def pick_faster_func(benchmark, choices, attempts=3): from pytools import argmin2 return argmin2( (f, min(benchmark(f) for i in range(attempts))) for f in choices)
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5, log_filename=None): plans = [p for p in plan_generator() if p.invalid_reason() is None] debug = "cuda_%s_plan" % opt_name in debug_flags show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug if "cuda_plan_log" not in debug_flags: log_filename = None if not plans: raise RuntimeError, "no valid CUDA execution plans found" if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags: from pytools import argmax2 return argmax2((plan, plan.occupancy_record().occupancy) for plan in plans), 0 max_occup = max(plan.occupancy_record().occupancy for plan in plans) desired_occup = occupancy_slack*max_occup if log_filename is not None: from pytools import single_valued feature_columns = single_valued(p.feature_columns() for p in plans) feature_names = [fc.split()[0] for fc in feature_columns] try: import sqlite3 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite db_conn = sqlite.connect("plan-%s.dat" % log_filename) try: db_conn.execute(""" create table data ( id integer primary key autoincrement, %s, value real)""" % ", ".join(feature_columns)) except sqlite.OperationalError: pass if show_progress: from pytools import ProgressBar pbar = ProgressBar("plan "+opt_name, len(plans)) try: plan_values = [] for p in plans: if show_progress: pbar.progress() if p.occupancy_record().occupancy >= desired_occup - 1e-10: if debug: print "<---- trying %s:" % p value = target_func(p) if isinstance(value, tuple): extra_info = value[1:] value = value[0] else: extra_info = None if value is not None: if debug: print "----> yielded %g" % (value) plan_values.append(((len(plan_values), p), value)) if log_filename is not None: db_conn.execute( "insert into data (%s,value) values (%s)" % (", ".join(feature_names), ",".join(["?"]*(1+len(feature_names)))), p.features(*extra_info)+(value,)) finally: if show_progress: pbar.finished() if log_filename is not None: db_conn.commit() from pytools import argmax2, argmin2 if maximize: num_plan, plan = argmax2(plan_values) else: num_plan, plan = argmin2(plan_values) plan_value = plan_values[num_plan][1] if debug: print "----------------------------------------------" print "chosen: %s" % plan print "value: %g" % plan_value print "----------------------------------------------" return plan, plan_value