示例#1
0
文件: gather.py 项目: kokkos/pykokkos
    def run(self):
        timer = pk.Timer()
        for r in range(self.R):
            pk.parallel_for("gather", self.N, self.benchmark)
            pk.fence()

        self.seconds = timer.seconds()
示例#2
0
    def run(self):
        pk.parallel_for(
            pk.MDRangePolicy([0, 0], [self.order, self.order],
                             [self.tile_size, self.tile_size]), self.init)
        pk.fence()

        timer = pk.Timer()

        for i in range(self.iterations):
            if self.permute:
                pk.parallel_for(
                    "transpose",
                    pk.MDRangePolicy([0, 0], [self.order, self.order],
                                     [self.tile_size, self.tile_size],
                                     rank=pk.Rank(2, pk.Iterate.Left,
                                                  pk.Iterate.Right)),
                    self.tranpose)
            else:
                pk.parallel_for(
                    "transpose",
                    pk.MDRangePolicy([0, 0], [self.order, self.order],
                                     [self.tile_size, self.tile_size],
                                     rank=pk.Rank(2, pk.Iterate.Right,
                                                  pk.Iterate.Left)),
                    self.tranpose)

        self.transpose_time = timer.seconds()

        self.abserr = pk.parallel_reduce(
            pk.MDRangePolicy([0, 0], [self.order, self.order],
                             [self.tile_size, self.tile_size]),
            self.abserr_reduce)

        pk.printf("%f\n", self.abserr)
        episilon: float = 1.0e-8
        if (self.abserr > episilon):
            pk.printf(
                "ERROR: aggregated squared error exceeds threshold %.2f\n",
                self.abserr)
        else:
            pk.printf("Solution validates %2.f\n", self.abserr)
示例#3
0
    def run(self):
        t: int = tile_size
        r: int = radius

        pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]), self.init)
        pk.fence()

        timer = pk.Timer()

        for i in range(iterations):
            if (i == 1):
                pk.fence()

            if r == 1:
                # star1 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star1)
            elif r == 2:
                # star2 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star2)
            else:
                # star3 stencil
                pk.parallel_for(
                    "stencil", pk.MDRangePolicy([r, r], [n - r, n - r],
                                                [t, t]), self.star3)

            pk.parallel_for(pk.MDRangePolicy([0, 0], [n, n], [t, t]),
                            self.increment)

        pk.fence()
        self.stencil_time = timer.seconds()

        active_points: int = (n - 2 * r) * (n - 2 * r)

        # verify correctness
        self.norm = pk.parallel_reduce(
            pk.MDRangePolicy([r, r], [n - r, n - r], [t, t]), self.norm_reduce)
        pk.fence()
        self.norm /= active_points

        episilon: float = 1.0e-8
        reference_norm: float = 2 * (iterations)
        if (abs(self.norm - reference_norm) > episilon):
            pk.printf("ERROR: L1 norm != Reference norm err=%.2f\n",
                      abs(self.norm - reference_norm))
        else:
            pk.printf("Solution validates\n")
示例#4
0
    def run(self):
        printf("Initializing Views...\n")
        pk.parallel_for(self.dataCount, self.init_data)
        pk.parallel_for(self.indicesCount, self.init_indices)

        printf("Starting benchmarking...\n")
        pk.fence()

        timer = pk.Timer()
        for i in range(self.repeats):
            # FIXME: randomize indices
            # for i in range(self.indicesCount):
            #     self.indices[i] = random.randrange(self.dataCount)

            if self.use_atomics:
                pk.parallel_for("gups", self.indicesCount,
                                self.run_gups_atomic)
            else:
                pk.parallel_for("gups", self.indicesCount, self.run_gups)

            pk.fence()

        self.gupsTime = timer.seconds()
示例#5
0
    def run(self):
        pk.parallel_for(self.array_size, self.init_arrays)

        timer = pk.Timer()
        for i in range(self.num_times):
            pk.parallel_for("babel_stream", self.array_size, self.copy)
            pk.fence()
            # self.runtimes[0][i] = timer.seconds()
            # timer.reset()

            pk.parallel_for("babel_stream", self.array_size, self.mul)
            pk.fence()
            # self.runtimes[1][i] = timer.seconds()
            # timer.reset()

            pk.parallel_for("babel_stream", self.array_size, self.add)
            pk.fence()
            pk.parallel_for("babel_stream", self.array_size, self.triad)
            pk.fence()
            self.sum = pk.parallel_reduce("babel_stream", self.array_size,
                                          self.dot)

        self.runtime = timer.seconds()
示例#6
0
    def run(self):
        pk.parallel_for(self.length, self.init)
        # pk.parallel_for(self.length, lambda i: 0, self.A)
        # pk.parallel_for(self.length, lambda i: 2, self.B)
        # pk.parallel_for(self.length, lambda i: 2, self.C)
        pk.fence()

        timer = pk.Timer()

        for i in range(self.iterations):
            pk.parallel_for("nstream", self.length, self.nstream)

        pk.fence()
        self.nstream_time = timer.seconds()

        # verify correctness
        ar: float = 0
        br: float = 2
        cr: float = 2
        for i in range(self.iterations):
            ar += br + self.scalar * cr

        ar *= self.length

        self.asum = pk.parallel_reduce(self.length,
                                       lambda i, acc: acc + abs(self.A[i]))
        pk.fence()

        episilon: float = 1.0e-8
        if (abs(ar - self.asum) / self.asum > episilon):
            pk.printf("ERROR: Failed Valication on output array\n")
        else:
            avgtime: float = self.nstream_time / self.iterations
            nbytes: float = 4.0 * self.length * 4
            pk.printf("Solution validates\n")
            pk.printf("Rate (MB/s): %.2f\n", 1.e-6 * nbytes / avgtime)
            pk.printf("Avg time (ms): %f\n", avgtime / 1.e-3)
示例#7
0
        space = pk.ExecutionSpace(args.execution_space)

    pk.set_default_space(space)

    N = args.N
    K = args.K
    D = args.D
    R = args.R
    U = args.U
    F = args.F
    scalar_size = 8

    policy = pk.RangePolicy(pk.get_default_space(), 0, N)
    w = Benchmark_double_8(N, K, D, R, F)

    timer = pk.Timer()
    for r in range(R):
        pk.parallel_for(policy, w.benchmark)
        pk.fence()

    seconds = timer.seconds()

    num_bytes = 1.0 * N * K * R * (2 * scalar_size + 4) + N * R * scalar_size
    flops = 1.0 * N * K * R * (F * 2 * U + 2 * (U - 1))
    gather_ops = 1.0 * N * K * R * 2
    seconds = seconds
    print(
        f"SNKDRUF: {scalar_size/4} {N} {K} {D} {R} {U} {F} Time: {seconds} " +
        f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds} GGather/s: {1e-9 * gather_ops / seconds}"
    )
示例#8
0
 def run(self):
     timer = pk.Timer()
     pk.parallel_for("bytes_and_flops", pk.TeamPolicy(self.N, self.T),
                     self.benchmark)
     pk.fence()
     self.seconds = timer.seconds()