Пример #1
0
    def test_subviews_sum(self):
        expected_result: int = self.threads * (self.i_1 * 2)
        temp: int = pk.parallel_reduce(self.range_policy,
                                       self.functor.views)  # initialize views
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.subviews)

        self.assertEqual(expected_result, result)
Пример #2
0
    def run(self):
        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("subview", self.N, self.yAx)

        self.timer_result = timer.seconds()
Пример #3
0
        def team_reduce(j: int, team_acc: pk.Acc[pk.double]):
            def vector_reduce(i: int, vector_acc: pk.Acc[pk.double]):
                vector_acc += self.A_vector[e][j][i] * self.x_vector[e][i]

            tempM: float = pk.parallel_reduce(pk.ThreadVectorRange(team_member, self.M), vector_reduce)

            team_acc += self.y_vector[e][j] * tempM
Пример #4
0
    def test_dynamic2D(self):
        expected_result: int = self.i_4 * self.i_1 * self.i_2
        result: int = pk.parallel_reduce(
            pk.RangePolicy(pk.ExecutionSpace.Default, 0, self.i_2),
            self.functor.dynamic2D)

        self.assertEqual(expected_result, result)
Пример #5
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    nrepeat: int = 1 
    print(f"Total size S = {N * M} N = {N} M = {M}")

    y = pk.View([N], pk.double)
    x = pk.View([M], pk.double)
    A = pk.View([N * M], pk.double)

    p = pk.RangePolicy(pk.get_default_space(), 0, N)
    pk.parallel_for(p, y_init, y=y)
    pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), y_init, y=x)
    pk.parallel_for(p, matrix_init, M=M, A=A)

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, yAx, M=M, y=y, x=x, A=A)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Пример #6
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    E: int = values[3]
    fill: bool = values[-1]
    nrepeat: int = 1000
    print(f"Total size S = {N * M} N = {N} M = {M} E = {E}")

    w = Workload(N, M, E, fill)
    p = pk.TeamPolicy(E, "auto", 32, pk.get_default_space())

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} x {E} is {result}")
    solution: float = N * M * E

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(
        f"N({N}) M({M}) E({E}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)"
    )
Пример #7
0
    def test_dep_two(self):
        dep_two = DepTwo(DepOne(self.i_1, self.f_1, self.b_1))
        expected_result: float = self.threads * dep_two.sum()
        result: float = pk.parallel_reduce(self.range_policy,
                                           self.functor.dep_two_work)

        self.assertEqual(expected_result, result)
Пример #8
0
def run() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('iterations', type=int)
    parser.add_argument('length', type=int)
    parser.add_argument('offset', nargs='?', type=int, default=0)
    args = parser.parse_args()
    iterations = args.iterations
    length = args.length
    offset = args.offset
    scalar = 3

    if iterations < 1:
        sys.exit("ERROR: iterations must be >= 1")

    if length <= 0:
        sys.exit("ERROR: vector length must be positive")

    # emulate cpp example
    if length <= 0:
        sys.exit("ERROR: offset must be nonnegative")

    print("Number of iterations = ", iterations)
    print("Vector length        = ", length)
    print("Offset               = ", offset)

    p = pk.RangePolicy(pk.ExecutionSpace.OpenMP, 0, length)
    w = Workload(iterations, length, offset, scalar)

    pk.parallel_for(p, w.init_views)
    # pk.fence()

    timer = pk.Timer()

    for i in range(iterations):
        pk.parallel_for(p, w.nstream)

    # pk.fence()
    nstream_time = timer.seconds()

    # verify correctness
    ar: float = 0
    br: float = 2
    cr: float = 2
    for i in range(iterations):
        ar += br + scalar * cr

    ar *= length

    asum = pk.parallel_reduce(p, w.res_reduce)
    # pk.fence()

    episilon: float = 1.0e-8
    if (abs(ar - asum) / asum > episilon):
        print("ERROR: Failed Valication on output array")
    else:
        avgtime: float = nstream_time / iterations
        nbytes: float = 4.0 * length * 4
        print("Solution validates")
        print("Rate (MB/s): %.2f" % (1.e-6 * nbytes / avgtime))
        print("Avg time (ms): %f" % (avgtime / 1.e-3))
Пример #9
0
def run() -> None:
    values: Tuple[int, int, int, int, int, bool] = parse_args()
    N: int = values[0]
    M: int = values[1]
    nrepeat: int = 100
    print(f"Total size S = {N * M} N = {N} M = {M}")

    p = pk.RangePolicy(pk.get_default_space(), 0, N)
    w = Workload(N, M)
    pk.parallel_for(p, w.y_init)
    pk.parallel_for(pk.RangePolicy(pk.get_default_space(), 0, M), w.x_init)
    pk.parallel_for(p, w.matrix_init)

    timer = pk.Timer()

    for i in range(nrepeat):
        result = pk.parallel_reduce(p, w.yAx)

    timer_result = timer.seconds()

    print(f"Computed result for {N} x {M} is {result}")
    solution = N * M

    if result != solution:
        pk.printf("Error: result (%lf) != solution (%lf)\n", result, solution)

    print(f"N({N}) M({M}) nrepeat({nrepeat}) problem(MB) time({timer_result}) bandwidth(GB/s)")
Пример #10
0
    def test_dep_one_return(self):
        dep_one = DepOne(self.i_1, self.f_1, self.b_1)
        expected_result: float = self.threads * dep_one.sum()
        result: float = pk.parallel_reduce(self.range_policy,
                                           self.functor.dep_one_return)

        self.assertEqual(expected_result, result)
Пример #11
0
    def team_reduce(j: int, team_acc: pk.Acc[float]):
        def vector_reduce(i: int, vector_acc: pk.Acc[float]):
            vector_acc += A[e][j][i] * x[e][i]

        tempM: float = pk.parallel_reduce(
            pk.ThreadVectorRange(team_member, M), vector_reduce)

        team_acc += y[e][j] * tempM
Пример #12
0
    def test_for_step_stmt(self):
        expected_result: int = 0
        for i in range(self.i_2, self.i_1, self.i_2):
            expected_result += self.threads * self.i_2
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.for_step_stmt)

        self.assertEqual(expected_result, result)
Пример #13
0
 def run(self) -> None:
     if self.parallel_for:
         if self.half_neigh:
             pk.parallel_for("ForceLJNeigh::compute", self.N_local,
                             self.halfneigh_for)
         else:
             pk.parallel_for("ForceLJNeigh::compute", self.N_local,
                             self.fullneigh_for)
     else:
         if self.half_neigh:
             self.energy = pk.parallel_reduce(
                 "ForceLJNeigh::compute_energy", self.N_local,
                 self.halfneigh_reduce)
         else:
             self.energy = pk.parallel_reduce(
                 "ForceLJNeigh::compute_energy", self.N_local,
                 self.fullneigh_reduce)
Пример #14
0
    def test_gt_op(self):
        if self.value_1 > self.value_2:
            expected_result: int = self.threads * self.value_1
        else:
            expected_result: int = self.threads * self.value_2
        result: int = pk.parallel_reduce(self.range_policy, self.functor.gt_op)

        self.assertEqual(expected_result, result)
Пример #15
0
    def run(self):
        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("team_policy",
                                             pk.TeamPolicy(self.N, "auto"),
                                             self.yAx)

        self.timer_result = timer.seconds()
Пример #16
0
    def test_bool_op(self):
        if not self.b_1:
            expected_result: int = self.threads * (self.i_1)
        else:
            expected_result: int = self.threads * (self.i_2)
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.bool_op)

        self.assertEqual(expected_result, result)
Пример #17
0
    def test_continue(self):
        expected_result: int = 0
        for i in range(self.i_1):
            expected_result += self.threads * self.i_2
            continue
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.continue_stmt)

        self.assertEqual(expected_result, result)
Пример #18
0
    def test_if_else_stmt(self):
        if self.b_1:
            expected_result: int = self.threads * self.i_1
        else:
            expected_result: int = self.threads * self.i_2
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.if_else_stmt)

        self.assertEqual(expected_result, result)
Пример #19
0
    def run(self):
        timer = pk.Timer()

        pk.parallel_for(self.N, self.matrix_init)

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("04", self.N, self.yAx)

        self.timer_result = timer.seconds()
Пример #20
0
    def test_dep_two_mutate(self):
        dep_one = DepOne(self.i_1, self.f_1, self.b_1)
        dep_two = DepTwo(dep_one)
        dep_one.i = self.i_2
        expected_result: float = self.threads * dep_two.sum()
        result: float = pk.parallel_reduce(self.range_policy,
                                           self.functor.dep_two_mutate)

        self.assertEqual(expected_result, result)
Пример #21
0
    def test_bool_sum(self):
        if self.b_1:
            expected_result: int = self.threads * self.i_1
        else:
            expected_result: int = self.threads * self.i_2
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.add_bool)

        self.assertEqual(expected_result, result)
Пример #22
0
    def outer_for(self, team_member: pk.TeamMember) -> None:
        j: int = team_member.league_rank()

        def inner_reduce(i: int, acc: pk.Acc[pk.double]):
            acc += self.value

        if team_member.team_rank() == 0:
            temp: float = pk.parallel_reduce(pk.TeamThreadRange(team_member, self.M), inner_reduce)
            self.for_view[j] = temp
Пример #23
0
    def test_compare(self):
        if self.i_1 > self.i_2:
            expected_result: int = self.threads * (self.i_1)
        else:
            expected_result: int = self.threads * (self.i_2)
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.compare)

        self.assertEqual(expected_result, result)
Пример #24
0
    def run(self):
        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("team_vector_loop",
                                             pk.TeamPolicy(self.E, "auto", 32),
                                             self.yAx)

        self.timer_result = timer.seconds()
Пример #25
0
    def yAx(self, team_member: pk.TeamMember, acc: pk.Acc[pk.double]) -> None:
        j: int = team_member.league_rank()

        def inner_reduce(i: int, inner_acc: pk.Acc[pk.double]):
            inner_acc += self.A[j][i] * self.x[i]

        temp2: float = pk.parallel_reduce(pk.TeamThreadRange(team_member, self.M), inner_reduce)

        if team_member.team_rank() == 0:
            acc += self.y[j] * temp2
Пример #26
0
    def test_while_stmt(self):
        x: int = 0
        expected_result: int = 0
        while x < self.i_1:
            expected_result += self.threads * self.i_2
            x += 1
        result: int = pk.parallel_reduce(self.range_policy,
                                         self.functor.while_stmt)

        self.assertEqual(expected_result, result)
Пример #27
0
    def test_yAx_plus1(self):
        expected_result: float = 0
        for j in range(self.N):
            temp2: float = 0
            for i in range(self.M):
                temp2 += self.A[j][i] * self.x[i]
            expected_result += (self.y[j] + 1) * temp2

        result: int = pk.parallel_reduce(pk.TeamPolicy(self.N, pk.AUTO, space=self.execution_space), self.functor.yAx_plus1)

        self.assertEqual(expected_result, result)
Пример #28
0
    def run(self):
        pk.parallel_for(N, self.init_y)
        pk.parallel_for(M, self.init_x)
        pk.parallel_for(pk.MDRangePolicy([0, 0], [self.N, self.M]),
                        self.init_A)

        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("mdrange", self.N, self.yAx)

        self.timer_result = timer.seconds()
Пример #29
0
    def run(self):
        pk.parallel_for(self.N, self.y_init)
        # pk.parallel_for(self.N, lambda i : self.y[i] = 1)
        pk.parallel_for(self.M, self.x_init)
        # pk.parallel_for(self.N, lambda i : self.x[i] = 1)
        pk.parallel_for(self.N, self.matrix_init)

        timer = pk.Timer()

        for i in range(self.nrepeat):
            self.result = pk.parallel_reduce("01", self.N, self.yAx)

        self.timer_result = timer.seconds()
Пример #30
0
def yAx(team_member: pk.TeamMember, acc: pk.Acc[float], M: int,
        y: pk.View1D[pk.double], x: pk.View1D[pk.double],
        A: pk.View2D[pk.double]):
    j: int = team_member.league_rank()

    def inner_reduce(i: int, inner_acc: pk.Acc[float]):
        inner_acc += A[j][i] * x[i]

    temp2: float = pk.parallel_reduce(pk.TeamThreadRange(team_member, M),
                                      inner_reduce)

    if team_member.team_rank() == 0:
        acc += y[j] * temp2