def benchmark(template, setting, tree): queue = tree.context.queues[0] queue.profiles[template, sc.float32] = sc.profile(template(*setting), sc.float32, queue) times = [] total = 0 i = 0 #Warm-up z, events = sc.driver.enqueue(tree) tree.context.queues[0].synchronize() #Time while total < 1e-1: start = time() z, events = sc.driver.enqueue(tree) tree.context.queues[0].synchronize() end = time() times.append(end - start) total += times[-1] i+=1 return median(times)
def benchmark(template, setting, tree): queue = tree.context.queues[0] queue.profiles[template, sc.float32] = sc.profile(template(*setting), sc.float32, queue) times = [] total = 0 i = 0 #Warm-up z, events = sc.driver.enqueue(tree) tree.context.queues[0].synchronize() #Time while total < 1e-1: start = time() z, events = sc.driver.enqueue(tree) tree.context.queues[0].synchronize() end = time() times.append(end - start) total += times[-1] i += 1 return median(times)
def benchmark(template, tree, operation=sc.templates.gemm_nn): queue = tree.context.queues[0] queue.profiles[template, dtype] = sc.profile(template, dtype, queue) times = [] total = 0 #Warm-up try: z, events = sc.driver.enqueue(tree) queue.synchronize() except: return float("inf") #Time while total < 1e-2: start = time.time() z, events = sc.driver.enqueue(tree) queue.synchronize() end = time.time() times.append(end - start) total += times[-1] return median(times)
import isaac as sc import isaac.templates as templates sc.driver.default.queue_properties = sc.driver.PROFILING_ENABLE sc.driver.default.device = 0; #Construct vectors using the default device. M, N, K = 972, 1541, 793 A = sc.empty((M, K), sc.float32) B = sc.empty((K, N), sc.float32) #Get command queue queue = A.context.queues[0] #Benchmark profile 1 queue.profiles[sc.templates.gemm_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,8,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,8), sc.float32, queue) C, events = sc.driver.enqueue(sc.reduce_1d(A, B)) C.context.synchronize() print 'Profile 1 finished in', sum([e.elapsed_time for e in events])*1e-9, 's' #Benchmark profile 2 queue.profiles[sc.templates.gemm_nn, sc.float32] = sc.profile(templates.gemm_nn(1,8,16,16,1,8,1,8,templates.FETCH_FROM_LOCAL,templates.FETCH_FROM_LOCAL,8,16), sc.float32, queue) C, events = sc.driver.enqueue(sc.reduce_1d(A, B)) C.context.synchronize() print 'Profile 2 finished in', sum([e.elapsed_time for e in events])*1e-9, 's'