def run(self): timer = pk.Timer() for r in range(self.R): pk.parallel_for("gather", self.N, self.benchmark) pk.fence() self.seconds = timer.seconds()
def run(self): pk.parallel_for( pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size]), self.init) pk.fence() timer = pk.Timer() for i in range(self.iterations): if self.permute: pk.parallel_for( "transpose", pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size], rank=pk.Rank(2, pk.Iterate.Left, pk.Iterate.Right)), self.tranpose) else: pk.parallel_for( "transpose", pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size], rank=pk.Rank(2, pk.Iterate.Right, pk.Iterate.Left)), self.tranpose) self.transpose_time = timer.seconds() self.abserr = pk.parallel_reduce( pk.MDRangePolicy([0, 0], [self.order, self.order], [self.tile_size, self.tile_size]), self.abserr_reduce) pk.printf("%f\n", self.abserr) episilon: float = 1.0e-8 if (self.abserr > episilon): pk.printf( "ERROR: aggregated squared error exceeds threshold %.2f\n", self.abserr) else: pk.printf("Solution validates %2.f\n", self.abserr)
def run(self): t: int = tile_size r: int = radius pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.init) pk.fence() timer = pk.Timer() for i in range(iterations): if (i == 1): pk.fence() if r == 1: # star1 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star1) elif r == 2: # star2 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star2) else: # star3 stencil pk.parallel_for( "stencil", pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.star3) pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.increment) pk.fence() self.stencil_time = timer.seconds() active_points: int = (n - 2 * r) * (n - 2 * r) # verify correctness self.norm = pk.parallel_reduce( pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.norm_reduce) pk.fence() self.norm /= active_points episilon: float = 1.0e-8 reference_norm: float = 2 * (iterations) if (abs(self.norm - reference_norm) > episilon): pk.printf("ERROR: L1 norm != Reference norm err=%.2f\n", abs(self.norm - reference_norm)) else: pk.printf("Solution validates\n")
def run(self): printf("Initializing Views...\n") pk.parallel_for(self.dataCount, self.init_data) pk.parallel_for(self.indicesCount, self.init_indices) printf("Starting benchmarking...\n") pk.fence() timer = pk.Timer() for i in range(self.repeats): # FIXME: randomize indices # for i in range(self.indicesCount): # self.indices[i] = random.randrange(self.dataCount) if self.use_atomics: pk.parallel_for("gups", self.indicesCount, self.run_gups_atomic) else: pk.parallel_for("gups", self.indicesCount, self.run_gups) pk.fence() self.gupsTime = timer.seconds()
def run(self): pk.parallel_for(self.array_size, self.init_arrays) timer = pk.Timer() for i in range(self.num_times): pk.parallel_for("babel_stream", self.array_size, self.copy) pk.fence() # self.runtimes[0][i] = timer.seconds() # timer.reset() pk.parallel_for("babel_stream", self.array_size, self.mul) pk.fence() # self.runtimes[1][i] = timer.seconds() # timer.reset() pk.parallel_for("babel_stream", self.array_size, self.add) pk.fence() pk.parallel_for("babel_stream", self.array_size, self.triad) pk.fence() self.sum = pk.parallel_reduce("babel_stream", self.array_size, self.dot) self.runtime = timer.seconds()
def run(self): pk.parallel_for(self.length, self.init) # pk.parallel_for(self.length, lambda i: 0, self.A) # pk.parallel_for(self.length, lambda i: 2, self.B) # pk.parallel_for(self.length, lambda i: 2, self.C) pk.fence() timer = pk.Timer() for i in range(self.iterations): pk.parallel_for("nstream", self.length, self.nstream) pk.fence() self.nstream_time = timer.seconds() # verify correctness ar: float = 0 br: float = 2 cr: float = 2 for i in range(self.iterations): ar += br + self.scalar * cr ar *= self.length self.asum = pk.parallel_reduce(self.length, lambda i, acc: acc + abs(self.A[i])) pk.fence() episilon: float = 1.0e-8 if (abs(ar - self.asum) / self.asum > episilon): pk.printf("ERROR: Failed Valication on output array\n") else: avgtime: float = self.nstream_time / self.iterations nbytes: float = 4.0 * self.length * 4 pk.printf("Solution validates\n") pk.printf("Rate (MB/s): %.2f\n", 1.e-6 * nbytes / avgtime) pk.printf("Avg time (ms): %f\n", avgtime / 1.e-3)
space = pk.ExecutionSpace(args.execution_space) pk.set_default_space(space) N = args.N K = args.K D = args.D R = args.R U = args.U F = args.F scalar_size = 8 policy = pk.RangePolicy(pk.get_default_space(), 0, N) w = Benchmark_double_8(N, K, D, R, F) timer = pk.Timer() for r in range(R): pk.parallel_for(policy, w.benchmark) pk.fence() seconds = timer.seconds() num_bytes = 1.0 * N * K * R * (2 * scalar_size + 4) + N * R * scalar_size flops = 1.0 * N * K * R * (F * 2 * U + 2 * (U - 1)) gather_ops = 1.0 * N * K * R * 2 seconds = seconds print( f"SNKDRUF: {scalar_size/4} {N} {K} {D} {R} {U} {F} Time: {seconds} " + f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds} GGather/s: {1e-9 * gather_ops / seconds}" )
def run(self): timer = pk.Timer() pk.parallel_for("bytes_and_flops", pk.TeamPolicy(self.N, self.T), self.benchmark) pk.fence() self.seconds = timer.seconds()