Пример #1
0
def p_norm(ap, p, TPBX, TPBY):
    d_ap_out = p_norm_helper(ap, p, TPBX, TPBY)
    d_ap_out_flatten = d_ap_out.flatten()
    reduced_sum = cuda.reduce(lambda a, b: a + b)
    reduced_ap = reduced_sum(d_ap_out_flatten)
    rst = reduced_ap**(1 / p)
    return rst
Пример #2
0
 def setup(self):
     self.no_op = cuda.jit(argtypes=())(no_op)
     self.stream = cuda.stream()
     self.f32 = np.zeros(self.n, dtype=np.float32)
     self.d_f32 = cuda.to_device(self.f32, self.stream)
     self.f64 = np.zeros(self.n, dtype=np.float64)
     self.d_f64 = cuda.to_device(self.f64, self.stream)
     self.sum_reduce = cuda.reduce(lambda x, y: x + y)
     self.res_f32 = cuda.to_device(np.zeros(1, dtype=np.float32))
     self.res_f64 = cuda.to_device(np.zeros(1, dtype=np.float64))
     self.stream.synchronize()
Пример #3
0
 def test_prod_reduce(self):
     prod_reduce = cuda.reduce(lambda a, b: a * b)
     A = (np.arange(64, dtype=np.float64) + 1)
     expect = A.prod()
     got = prod_reduce(A, init=1)
     self.assertTrue(np.allclose(expect, got))
Пример #4
0
 def test_prod_reduce(self):
     prod_reduce = cuda.reduce(lambda a, b: a * b)
     A = np.arange(64, dtype=np.float64) + 1
     expect = A.prod()
     got = prod_reduce(A, init=1)
     self.assertTrue(np.allclose(expect, got))