Пример #1
0
def calculate_z_numpy_gpu(q, maxiter, z):
    """Calculate z using numpy on the GPU via gpuarray"""
    outputg = gpuarray.to_gpu(
        nm.resize(nm.array(0, ), q.shape).astype(nm.int32))
    zg = gpuarray.to_gpu(z.astype(nm.complex64))
    qg = gpuarray.to_gpu(q.astype(nm.complex64))
    # 2.0 as an array
    twosg = gpuarray.to_gpu(nm.array([2.0] * zg.size).astype(nm.float32))
    # 0+0j as an array
    cmplx0sg = gpuarray.to_gpu(
        nm.array([0 + 0j] * zg.size).astype(nm.complex64))
    # for abs_zg > twosg result
    comparison_result = gpuarray.to_gpu(
        nm.array([False] * zg.size).astype(nm.bool))
    # we'll add 1 to iterg after each iteration
    iterg = gpuarray.to_gpu(nm.array([0] * zg.size).astype(nm.int32))

    for iter in range(maxiter):
        zg = zg * zg + qg

        # abs returns a complex (rather than a float) from the complex
        # input where the real component is the absolute value (which
        # looks like a bug) so I take the .real after abs()
        abs_zg = abs(zg).real

        comparison_result = abs_zg > twosg
        qg = gpuarray.if_positive(comparison_result, cmplx0sg, qg)
        zg = gpuarray.if_positive(comparison_result, cmplx0sg, zg)
        outputg = gpuarray.if_positive(comparison_result, iterg, outputg)
        iterg = iterg + 1
    output = outputg.get()
    return output
def calculate_z_numpy_gpu(q, maxiter, z):
    """Calculate z using numpy on the GPU"""
    outputg = gpuarray.to_gpu(nm.resize(nm.array(0,), q.shape))
    zg = gpuarray.to_gpu(z.astype(nm.complex64))
    qg = gpuarray.to_gpu(q.astype(nm.complex64))
    # 2.0 as an array
    twosg = gpuarray.to_gpu(nm.array([2.0]*zg.size).astype(numpy.float32))
    # 0+0j as an array
    cmplx0sg = gpuarray.to_gpu(nm.array([0+0j]*zg.size).astype(nm.complex64))
    # for abs_zg > twosg result
    comparison_result = gpuarray.to_gpu(nm.array([False]*zg.size).astype(nm.bool))
    # we'll add 1 to iterg after each iteration
    iterg = gpuarray.to_gpu(nm.array([0]*zg.size).astype(nm.int32))
    
    for iter in range(maxiter):
        zg = zg*zg + qg

        # abs returns a complex (rather than a float) from the complex
        # input where the real component is the absolute value (which
        # looks like a bug) so I take the .real after abs()
        abs_zg = abs(zg).real
       
        comparison_result = abs_zg > twosg
        qg = gpuarray.if_positive(comparison_result, cmplx0sg, qg)
        zg = gpuarray.if_positive(comparison_result, cmplx0sg, zg)
        outputg = gpuarray.if_positive(comparison_result, iterg, outputg)
        iterg = iterg + 1
    output = outputg.get()
    return output
Пример #3
0
def calculate_z_asnumpy_gpu(q, maxiter, z):
    """Calculate z using numpy on the GPU"""
    # convert complex128s (2*float64) to complex64 (2*float32) so they run
    # on older CUDA cards like the one in my MacBook. To use float64 doubles
    # just edit these two lines
    complex_type = np.complex64  # or nm.complex128 on newer CUDA devices
    float_type = np.float32  # or nm.float64 on newer CUDA devices

    # create an output array on the gpu of int32 as one long vector
    outputg = gpuarray.to_gpu(np.resize(np.array(0, ), q.shape))
    # resize our z and g as necessary to longer or shorter float types
    z = z.astype(complex_type)
    q = q.astype(complex_type)
    # create zg and qg on the gpu
    zg = gpuarray.to_gpu(z)
    qg = gpuarray.to_gpu(q)
    # create 2.0 as an array
    twosg = gpuarray.to_gpu(np.array([2.0] * zg.size).astype(float_type))
    # create 0+0j as an array
    cmplx0sg = gpuarray.to_gpu(
        np.array([0 + 0j] * zg.size).astype(complex_type))
    # create a bool array to hold the (for abs_zg > twosg) result later
    comparison_result = gpuarray.to_gpu(
        np.array([False] * zg.size).astype(np.bool))
    # we'll add 1 to iterg after each iteration, create an array to hold the iteration count
    iterg = gpuarray.to_gpu(np.array([0] * zg.size).astype(np.int32))

    for iter in range(maxiter):
        # multiply z on the gpu by itself, add q (on the gpu)
        zg = zg * zg + qg
        # abs returns a complex (rather than a float) from the complex
        # input where the real component is the absolute value (which
        # looks like a bug) so I take the .real after abs()
        # the above bug relates to pyCUDA from mid2010, it might be fixed now...
        abs_zg = abs(zg).real

        # figure out if zg is > 2
        comparison_result = abs_zg > twosg
        # based on the result either take 0+0j for qg and zg or leave unchanged
        qg = gpuarray.if_positive(comparison_result, cmplx0sg, qg)
        zg = gpuarray.if_positive(comparison_result, cmplx0sg, zg)
        # if the comparison is true then update the iterations count to outputg
        # which we'll extract later
        outputg = gpuarray.if_positive(comparison_result, iterg, outputg)
        # increment the iteration counter
        iterg = iterg + 1
    # extract the result from the gpu back to the cpu
    output = outputg.get()
    return output
Пример #4
0
def sqrt_normalize_gpu(img):
    global posr, negr, posa, nega, stream
    rgb = gpuarray.to_gpu(img[:, :, :3].copy())
    a = gpuarray.to_gpu(img[:, :, 3].copy())

    if not posr:
        posr = gpuarray.zeros_like(rgb) + 1
        negr = gpuarray.zeros_like(rgb) - 1
        posa = gpuarray.zeros_like(a) + 1
        nega = gpuarray.zeros_like(a) - 1
    rgb = cumath.sqrt(abs(rgb), stream=stream) * gpuarray.if_positive(
        rgb, posr, negr, stream=stream)
    a = cumath.sqrt(abs(a), stream=stream) * gpuarray.if_positive(
        a, posa, nega, stream=stream)
    return normalize_gpu(rgb, a)
Пример #5
0
 def linear_corr_cuda(self, image, l):
     N = image.shape[0]
     nd = self.pairwise_difference(image, N)
     C = (1 - (nd / l))
     zeros = misc.zeros(C.shape, C.dtype)
     C = gpuarray.if_positive(C, C, zeros)
     return C.copy()
Пример #6
0
def step_1(matrix_color, matrix_suma):
    #La función gpuarray.if_positive evalua cada posición de la matriz
    #Y de acuerdo a su valor realiza la primer operación o la segunda constatando una sentencia If Else
    matrix_1 = gpuarray.if_positive(matrix_suma,
                                    (3 * matrix_color) / matrix_suma,
                                    matrix_suma)
    return matrix_1
def calculate_z_asnumpy_gpu(q, maxiter, z):
    """Calculate z using numpy on the GPU"""
    # convert complex128s (2*float64) to complex64 (2*float32) so they run
    # on older CUDA cards like the one in my MacBook. To use float64 doubles
    # just edit these two lines
    complex_type = np.complex64  # or nm.complex128 on newer CUDA devices
    float_type = np.float32  # or nm.float64 on newer CUDA devices

    # create an output array on the gpu of int32 as one long vector
    outputg = gpuarray.to_gpu(np.resize(np.array(0), q.shape))
    # resize our z and g as necessary to longer or shorter float types
    z = z.astype(complex_type)
    q = q.astype(complex_type)
    # create zg and qg on the gpu
    zg = gpuarray.to_gpu(z)
    qg = gpuarray.to_gpu(q)
    # create 2.0 as an array
    twosg = gpuarray.to_gpu(np.array([2.0] * zg.size).astype(float_type))
    # create 0+0j as an array
    cmplx0sg = gpuarray.to_gpu(np.array([0 + 0j] * zg.size).astype(complex_type))
    # create a bool array to hold the (for abs_zg > twosg) result later
    comparison_result = gpuarray.to_gpu(np.array([False] * zg.size).astype(np.bool))
    # we'll add 1 to iterg after each iteration, create an array to hold the iteration count
    iterg = gpuarray.to_gpu(np.array([0] * zg.size).astype(np.int32))

    for iter in range(maxiter):
        # multiply z on the gpu by itself, add q (on the gpu)
        zg = zg * zg + qg
        # abs returns a complex (rather than a float) from the complex
        # input where the real component is the absolute value (which
        # looks like a bug) so I take the .real after abs()
        # the above bug relates to pyCUDA from mid2010, it might be fixed now...
        abs_zg = abs(zg).real

        # figure out if zg is > 2
        comparison_result = abs_zg > twosg
        # based on the result either take 0+0j for qg and zg or leave unchanged
        qg = gpuarray.if_positive(comparison_result, cmplx0sg, qg)
        zg = gpuarray.if_positive(comparison_result, cmplx0sg, zg)
        # if the comparison is true then update the iterations count to outputg
        # which we'll extract later
        outputg = gpuarray.if_positive(comparison_result, iterg, outputg)
        # increment the iteration counter
        iterg = iterg + 1
    # extract the result from the gpu back to the cpu
    output = outputg.get()
    return output
Пример #8
0
def make_sample_data(set_: int):
    np.random.seed(set_ * 4347)
    if set_ == 1:  # Uniform distribution
        data = np.random.uniform(0, 1, size=(samples, num_features))
    if set_ == 2:  # 3 Gaussian distribution
        data = multi_gauss_clusters(n_clusters=3)
    if set_ == 3:  # 10 Gaussian distribution
        data = multi_gauss_clusters(n_clusters=10)
    df = pd.DataFrame()
    np.random.shuffle(data)
    df['vec'] = data.tolist()

    # find nearest neighbours
    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=51,
                            algorithm='ball_tree',
                            leaf_size=30).fit(data)
    _, nbrs_indices = nbrs.kneighbors(data)
    for n_nbr in range(10, 51, 5):
        df[f"known_neighbours_{n_nbr}"] = [
            x[1:(n_nbr + 1)] for x in nbrs_indices
        ]

    # hash using random hyperplane LSH
    import pycuda.gpuarray as gpuarray
    import skcuda.linalg as linalg
    import pycuda.autoinit
    linalg.init()
    os.environ['CUDA_HOME'] = "/opt/cuda/"
    vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32)
    LSH = LSHBias(feature_dim=num_features, bits=LSH_NUM_BITS)
    W = np.array(LSH.W, dtype=np.float32)
    b_gpu = gpuarray.to_gpu(W)
    ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32)
    X = np.concatenate((vec_np, ones), axis=1)

    # do the matrix multiplication
    a_gpu = gpuarray.to_gpu(X)
    mul = linalg.mdot(a_gpu, b_gpu)
    # get binary: 1 if value >= 0, else 0
    res = gpuarray.if_positive(
        mul >= gpuarray.zeros(mul.shape, dtype=np.float32),
        then_=gpuarray.ones_like(mul),
        else_=gpuarray.zeros_like(mul))
    res = np.array(res.get(), dtype=np.uint32)

    # convert grouped bits to integers
    res = np_array_binary_to_grouped_integers(res)
    df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res]
    df.to_parquet(f"{config.CUDA_neighbour_search_df_dir}df-{set_}.parquet",
                  index=False)

    print("created test-data")
Пример #9
0
def wsparsify(w_gpu, percentage):
  """
  Keeps only as many entries nonzero as specified by percentage.
  """

  w    = w_gpu.get()
  vals = sort(w)[::-1]
  idx  = floor(prod(w.shape()) * percentage/100)
  zw_gpu = cua.zeros_like(w_gpu)   # gpu array filled with zeros
  tw_gpu = cua.empty_like(w_gpu)   # gpu array containing threshold
  tw_gpu.fill(vals[idx])        
  w_gpu  = cua.if_positive(w_gpu > tw_gpu, w_gpu, zw_gpu)

  del zw_gpu
  del tw_gpu

  return w_gpu
Пример #10
0
def wsparsify(w_gpu, percentage):
  """
  Keeps only as many entries nonzero as specified by percentage.
  """

  w    = w_gpu.get()
  vals = sort(w)[::-1]
  idx  = floor(prod(w.shape()) * percentage/100)
  zw_gpu = cua.zeros_like(w_gpu)   # gpu array filled with zeros
  tw_gpu = cua.empty_like(w_gpu)   # gpu array containing threshold
  tw_gpu.fill(vals[idx])        
  w_gpu  = cua.if_positive(w_gpu > tw_gpu, w_gpu, zw_gpu)

  del zw_gpu
  del tw_gpu

  return w_gpu
Пример #11
0
 def run_gpu(self, Niters):
     """
     Run G-S on GPU. The result is overwritten on the attribute "self.wdata"
     containing a pycuda array.
     """
     Nz, Ny, Nx = self.shape
     # Allocate output data
     wdata = gpuarray.empty((Ny, Nx), np.complex64)
     sim = gpuarray.empty((Nz, Ny, Nx), np.complex64)
     Isim = gpuarray.empty((Nz, Ny, Nx), np.complex64)
     for io in trange(Niters):
         # Propagate the initial wave to simulate defocused waves
         # Psi(x,y,z) = convolve[Psi(x,y,0), CTF(x,y,z)]
         cu_fft.fft(self.wdata, wdata, self.pft2dcc)
         for kk in range(Nz):
             sim[kk, :, :] = self.ctfd[kk, :, :] * wdata
         cu_fft.ifft(sim, sim, self.pft3dcc, True)
         if hasattr(self, 'Esdata'):
             # Use the intensities, Isim = |Psi|**2
             # Convolve with spatial-coherence envelope
             # Isim = convolve[Isim, Es]
             Isim = sim * sim.conj()
             cu_fft.fft(Isim, Isim, self.pft3dcc)
             cu_fft.ifft(Isim * self.Esdata, Isim, self.pft3dcc, True)
             # Combine experimental and simulated amplitudes with simulated phase
             # Psi' = [abs(Psi)+sqrt(Iexp)-sqrt(Isim)]*exp[i*arg(Psi)]
             self.cuwave(self.Iexp, sim, Isim.real, Isim)
         else:
             # Combine experimental amplitudes with simulated phase
             # Psi' = [sqrt(Iexp)]*exp[i*arg(Psi)]
             self.cuwave(self.Iexp, sim, Isim)
         sim = gpuarray.if_positive(self.mask, Isim, sim)
         # then back-propagate to the exit plane and take average
         # Psi(x,y,0) = < convolve[Psi, CTF*] >_z
         cu_fft.fft(sim, sim, self.pft3dcc)
         sim = sim * self.ctfd.conj()
         cu_fft.ifft(sim, sim, self.pft3dcc, True)
         wdata = misc.mean(sim.reshape(Nz, Nx * Ny), 0).reshape(Ny, Nx)
         # update phase and wave
         self.cuphase(wdata, self.wdata, self.phase_data)
         self.wdata = wdata.copy()
Пример #12
0
def isgreater_gpu(x_gpu, y_gpu):
  """
  Computes if x_gpu > y_gpu and gives back a mask with 0s and 1s.
  Note, that y_gpu can be a scalar value as well.
  """

  if ((y_gpu.__class__ == np.float) or 
      (y_gpu.__class__ == np.float32) or 
      (y_gpu.__class__ == np.float64)):

    val   = np.float32(y_gpu)
    y_gpu = cua.empty_like(x_gpu)   # gpu array containing threshold
    y_gpu.fill(val)
 
  zeros_gpu = cua.zeros_like(x_gpu)   # gpu array filled with zeros
  ones_gpu  = cua.empty_like(x_gpu)   # gpu array containing threshold
  ones_gpu.fill(np.float32(1.))        

  mask_gpu  = cua.if_positive(x_gpu > y_gpu, ones_gpu, zeros_gpu)

  del zeros_gpu
  del ones_gpu

  return mask_gpu
Пример #13
0
def isgreater_gpu(x_gpu, y_gpu):
  """
  Computes if x_gpu > y_gpu and gives back a mask with 0s and 1s.
  Note, that y_gpu can be a scalar value as well.
  """

  if ((y_gpu.__class__ == np.float) or 
      (y_gpu.__class__ == np.float32) or 
      (y_gpu.__class__ == np.float64)):

    val   = np.float32(y_gpu)
    y_gpu = cua.empty_like(x_gpu)   # gpu array containing threshold
    y_gpu.fill(val)
 
  zeros_gpu = cua.zeros_like(x_gpu)   # gpu array filled with zeros
  ones_gpu  = cua.empty_like(x_gpu)   # gpu array containing threshold
  ones_gpu.fill(np.float32(1.))        

  mask_gpu  = cua.if_positive(x_gpu > y_gpu, ones_gpu, zeros_gpu)

  del zeros_gpu
  del ones_gpu

  return mask_gpu
Пример #14
0
  # Read image. BW images have R=G=B so extract the R-value
  image = img.imread(in_file_name)[:,:,0]
  height, width = np.int32(image.shape)
  area = height*width
  print "Processing %d x %d image" % (width, height)

  blocksize = (32,32,1)
  gridsize = (int(width/32),int(height/32))

  ones = np.empty([height,width])
  ones[:,:] = 1

  im_d = gpu.to_gpu(np.float32(np.array(image)))
  ones_d = gpu.to_gpu(np.int32(np.array(ones)))
  zero_d = gpu.to_gpu(np.int32(np.zeros([height,width])))
  threshold_d = gpu.if_positive(im_d - seed_threshold, zero_d, ones_d)
  new_d = gpu.to_gpu(np.int32(np.zeros([height,width])))

  old_flags = 0
  new_flags = gpu.sum(threshold_d).get()
  
  while (new_flags - old_flags) != 0:
    old_flags = new_flags

    # run filter kernel
    filter_kernel(threshold_d, im_d, new_d, width, height, threshold, block=blocksize, grid=gridsize)

    new_flags = gpu.sum(new_d).get()

    # transfer output to input
    threshold_d = new_d
Пример #15
0
    # hashing different .orc DataFrames
    for filename in tqdm(glob(basepath + "part-*.orc")):
        df = pd.read_orc(filename)
        df = df.rename(columns={"FeatureVector_all_features": "vec"})
        count += 1
        vec_np = np.array(df['vec'].values.tolist(), dtype=np.float32)
        # add bias term
        ones = np.ones(shape=(vec_np.shape[0], 1), dtype=np.float32)
        X = np.concatenate((vec_np, ones), axis=1)

        # do the matrix multiplication
        a_gpu = gpuarray.to_gpu(X)
        mul = linalg.mdot(a_gpu, b_gpu)
        # get binary: 1 if value >= 0, else 0
        res = gpuarray.if_positive(
            mul >= gpuarray.zeros(mul.shape, dtype=np.float32),
            then_=gpuarray.ones_like(mul),
            else_=gpuarray.zeros_like(mul))
        res = np.array(res.get(), dtype=np.uint32)

        # convert grouped bits to integers
        res = np_array_binary_to_grouped_integers(res)

        df[f"hash_{LSH_NUM_BITS}_bits"] = [x for x in res]
        df = df[["rec_MBID", f"hash_{LSH_NUM_BITS}_bits"]]
        df.to_parquet(f"{config.ABz_GPU_hashed_output_dir}{count}.parquet",
                      index=False)

    # save as a single parquet file
    spark = SparkSession \
        .builder \
        .appName("hashed file coalesce") \
Пример #16
0
H = 240

cap.set(cv.CAP_PROP_FRAME_HEIGHT, H)
cap.set(cv.CAP_PROP_FRAME_WIDTH, W)

ret, frame = cap.read()
gray_a = cv.cvtColor(frame, cv.COLOR_RGB2GRAY)

img_ori_gpu = gpuarray.to_gpu(gray_a.astype(np.float32))
img_buf_gpu = gpuarray.empty_like(img_ori_gpu)
img_sub = gpuarray.ones_like(img_ori_gpu)
img_sub = 25 * img_sub
img_bgm = gpuarray.zeros_like(img_sub)
while True:
    ret, frame = cap.read()
    gray_buff = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    img_res_gpu = gpuarray.to_gpu(gray_buff.astype(np.float32))
    img_buf_gpu = cmath.fabs(img_ori_gpu - img_res_gpu)
    img_buf_gpu = img_buf_gpu - img_sub
    img_ori_gpu = img_res_gpu.copy()
    img_res_gpu = gpuarray.if_positive(img_buf_gpu, img_bgm, img_res_gpu)
    gray_buff = img_res_gpu.get()
    gray_buff = gray_buff.astype(np.uint8)
    frame = cv.cvtColor(gray_buff, cv.COLOR_GRAY2RGB)
    cv.imshow("Moving Detecting!", frame)
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()
Пример #17
0
    def map_if_positive(self, expr):
        crit = self.rec(expr.criterion)
        then = self.rec(expr.then)
        else_ = self.rec(expr.else_)

        return gpuarray.if_positive(crit, then, else_)
Пример #18
0
# `python Homework5_script.py [number of iteration for MCMC]`
###############

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
import pycuda.gpuarray as gpuarray
from pycuda import curandom

import sys

N = int(sys.argv[1])  #int(1e3)
print("Number of iterations: " + str(N))

np.random.seed(123)
draws = np.random.uniform(-1, 1, N)
index = np.linspace(-1, 1, N)

a_gpu = cuda.mem_alloc(draws.nbytes)
cuda.memcpy_htod(a_gpu, draws)

gen = pycuda.curandom.XORWOWRandomNumberGenerator()
xy = gen.gen_uniform((2, N), np.float32)
xy = sum(xy**2)**0.5

M = gpuarray.sum(gpuarray.if_positive(xy - 1, xy * 0, xy * 0 + 1))

pi = 4 * M / N
print("The estimated value of pi is: " + str(pi))
Пример #19
0
    def map_if_positive(self, expr):
        crit = self.rec(expr.criterion)
        then = self.rec(expr.then)
        else_ = self.rec(expr.else_)

        return gpuarray.if_positive(crit, then, else_)
print('a:\n{0}\nshape={1}\n'.format(a.get(), a.shape))

stream = drv.Stream()
b = gpuarray.to_gpu_async(h_array, stream=stream)
print('b:\n{0}\nshape={1}\n'.format(b.get(), b.shape))

c = gpuarray.empty((100, 100), dtype=dtype)
print('c:\n{0}\nshape={1}\n'.format(c, c.shape))

d = gpuarray.zeros((100, 100), dtype=dtype)
print('d:\n{0}\nshape={1}\n'.format(d, d.shape))

e = gpuarray.arange(0.0, 100.0, 1.0, dtype=dtype)
print('e:\n{0}\nshape={1}\n'.format(e, e.shape))

f = gpuarray.if_positive(e < 50, e - 100, e + 100)
print('f:\n{0}\nshape={1}\n'.format(f, f.shape))

g = gpuarray.if_positive(e < 50, gpuarray.ones_like(e), gpuarray.zeros_like(e))
print('g:\n{0}\nshape={1}\n'.format(g, g.shape))

h = gpuarray.maximum(e, f)
print('h:\n{0}\nshape={1}\n'.format(h, h.shape))

i = gpuarray.minimum(e, f)
print('i:\n{0}\nshape={1}\n'.format(i, i.shape))

g = gpuarray.sum(a)
print(g, type(g))

k = gpuarray.max(a)