Exemplo n.º 1
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    E: int = values[3]
    fill: bool = values[-1]
    nrepeat: int = 1000
    print(f"Total size S = {N * M} N = {N} M = {M} E = {E}")

    w = Workload(N, M, E, fill)
    p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space())

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} x {E} is {result}")
    solution: float = N * M * E

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(
        f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)"
    )
Exemplo n.º 2
0
    def test_outer_for(self):
        expected_result: float = 0
        for i in range(self.M):
            expected_result += self.value

        pk.parallel_for(pk.TeamPolicy(self.N, pk.AUTO, space=self.execution_space), self.functor.outer_for)
        for i in range(self.N):
            result: int = self.functor.for_view[i]
            self.assertEqual(expected_result, result)
Exemplo n.º 3
0
    def run(self):
        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("team_vector_loop",
                                             pk.TeamPolicy(self.E, "auto", 32),
                                             self.yAx)

        self.timer_result = timer.seconds()
Exemplo n.º 4
0
    def run(self):
        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("team_policy",
                                             pk.TeamPolicy(self.N, "auto"),
                                             self.yAx)

        self.timer_result = timer.seconds()
Exemplo n.º 5
0
    def test_yAx_plus1(self):
        expected_result: float = 0
        for j in range(self.N):
            temp2: float = 0
            for i in range(self.M):
                temp2 += self.A[j][i] * self.x[i]
            expected_result += (self.y[j] + 1) * temp2

        result: int = pk.parallel_reduce(pk.TeamPolicy(self.N, pk.AUTO, space=self.execution_space), self.functor.yAx_plus1)

        self.assertEqual(expected_result, result)
Exemplo n.º 6
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    E: int = values[3]
    fill: bool = values[-1]
    nrepeat: int = 1000
    print(f"Total size S = {N * M} N = {N} M = {M} E = {E}")

    y: pk.View2D = pk.View([E, N], pk.double, layout=pk.Layout.LayoutRight)
    x: pk.View2D = pk.View([E, M], pk.double, layout=pk.Layout.LayoutRight)
    A: pk.View3D = pk.View([E, N, M], pk.double, layout=pk.Layout.LayoutRight)

    if fill:
        y.fill(1)
        x.fill(1)
        A.fill(1)
    else:
        for e in range(E):
            for i in range(N):
                y[e][i] = 1

            for i in range(M):
                x[e][i] = 1

            for j in range(N):
                for i in range(M):
                    A[e][j][i] = 1

    p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space())

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, yAx, N=N, M=M, y=y, x=x, A=A)

    timer_result = timer.seconds()

    print(
        f"Computed result for {N} x {M} x {E} is {result}")
    solution: float = N * M * E

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n",
                  result, solution)

    print(f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Exemplo n.º 7
0
    def test_yAx_vector(self):
        expected_result: float = 0
        for e in range(self.E):
            tempN: float = 0

            for j in range(self.N):
                tempM: float = 0

                for i in range(self.M):
                    tempM += self.A_vector[e][j][i] * self.x_vector[e][i]

                tempN += self.y_vector[e][j] * tempM

            expected_result += tempN

        result: float = pk.parallel_reduce(pk.TeamPolicy(self.E, pk.AUTO, 32, self.execution_space), self.functor.yAx_vector)

        self.assertEqual(expected_result, result)
Exemplo n.º 8
0
        print("S must be 0 (shared scratch memory not supported)")
        exit(1) 

    space = pk.ExecutionSpace.OpenMP
    if args.execution_space:
        space = pk.ExecutionSpace(args.execution_space)

    N = args.N
    K = args.K
    R = args.R
    U = args.U
    F = args.F
    T = args.T
    S = args.S
    scalar_size = 8
    
    pk.set_default_space(space)

    r = pk.TeamPolicy(N, T, space=pk.get_default_space())
    w = Benchmark_double_8(N, K, R, args.D, F, T, S)

    timer = pk.Timer()
    pk.parallel_for(r, w.benchmark)
    seconds = timer.seconds()

    num_bytes = 1.0 * N * K * R * 3 * scalar_size
    flops = 1.0 * N * K * R * (F * 2 * U + 2 * (U - 1))
    print(f"NKRUFTS: {N} {K} {R} {U} {F} {T} {S} Time: {seconds} " +
            f"Bandwidth: {1.0 * num_bytes / seconds / (1024**3)} GiB/s GFlop/s: {1e-9 * flops / seconds}")
    print(w.C)
Exemplo n.º 9
0
 def run(self):
     timer = pk.Timer()
     pk.parallel_for("bytes_and_flops", pk.TeamPolicy(self.N, self.T),
                     self.benchmark)
     pk.fence()
     self.seconds = timer.seconds()
Exemplo n.º 10
0
 def run(self) -> None:
     if self.parallel_for:
         pk.parallel_for(pk.TeamPolicy(self.nbins, 1, 8), self.pfor)
     else:
         self.PE = pk.parallel_reduce(
             pk.TeamPolicy(self.nbins, 1, 8), self.preduce)