Exemplo n.º 1
0
def local_search(points, weights, distances, upper_bound, lower_bound,
                 random_states):
    thread_id = cuda.threadIdx.x

    if thread_id < len(points):
        pass
        # TODO rozmiar tej tablicy musi byc stalą czasu kompilacji , wtf
        tmp_point = cuda.local.array(20, float64)

        for i in range(20):
            tmp_point[i] = points[thread_id][i]

        for index in range(20):
            direction = xoroshiro128p_uniform_float32(random_states,
                                                      thread_id) > 0.5
            step = xoroshiro128p_uniform_float32(random_states, thread_id)

            if direction:
                length = upper_bound - tmp_point[index]
                tmp_point[index] = tmp_point[index] + step * length
            else:
                length = tmp_point[index] - lower_bound
                tmp_point[index] = tmp_point[index] - step * length

            val1 = qap_device(tmp_point, weights, distances)
            val2 = qap_device(points[thread_id], weights, distances)
            if val1 < val2:
                for i in range(20):
                    points[thread_id][i] = tmp_point[i]
                break
Exemplo n.º 2
0
 def loop(_, r, V, rngs, w, d, tavg, bold_state, bold_out, I, Delta, eta,
          tau, J, cr, cv, r_sigma, V_sigma):
     it = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
     nt = cuda.blockDim.x * cuda.gridDim.x
     itx = cuda.threadIdx.x
     # if it==0: print('hello from ', cuda.blockIdx.x, cuda.threadIdx.x)
     # if it==0: print("NT =", NT)
     o_tau = nb.float32(1 / tau)
     # if it==0: print("o_tau = ", o_tau)
     assert r.shape[0] == V.shape[
         0] == nh  # shape asserts help numba optimizer
     assert r.shape[1] == V.shape[1] == nn
     # if it==0: print("creating nrV shared..")
     nrV = cuda.shared.array((2, blockDim_x), nb.float32)
     # if it==0: print("zeroing tavg..")
     for j in range(nto):
         for i in range(nn):
             tavg[j, 0, i, it] = nb.float32(0.0)
             tavg[j, 1, i, it] = nb.float32(0.0)
     # if it==0: print('tavg zero\'d', -1, nh - 1)
     for t0 in range(-1, nh - 1):
         # if it==0: print('t0=', t0)
         t = nh - 1 if t0 < 0 else t0
         # if it==0: print('t=', t)
         t1 = t0 + 1
         # if it==0: print('t1=', t1)
         # if it==0: print('nh//nto', nh // nto)
         # if it==0: print('t1=', t1)
         t0_nto = t0 // (nh // nto)
         # if it==0: print(t, t1, t0_nto)
         for i in range(nn):
             rc = nb.float32(0)  # using array here costs 50%+
             Vc = nb.float32(0)
             for j in range(nn):
                 dij = (t - d[i, j] + nh) & (nh - 1)
                 rc += w[i, j] * cfpre(r[dij, j, it], r[t, i, it])
                 Vc += w[i, j] * cfpre(V[dij, j, it], V[t, i, it])
             rc = cfpost(rc)
             Vc = cfpost(Vc)
             # RNG + Box Muller
             pi_2 = nb.float32(np.pi * 2)
             u1 = xoroshiro128p_uniform_float32(
                 rngs, t1 * nt * nn * 2 + i * nt * 2 + it)
             u2 = xoroshiro128p_uniform_float32(
                 rngs, t1 * nt * nn * 2 + i * nt * 2 + it + nt)
             z0 = math.sqrt(-nb.float32(2.0) * math.log(u1)) * math.cos(
                 pi_2 * u2)
             z1 = math.sqrt(-nb.float32(2.0) * math.log(u1)) * math.sin(
                 pi_2 * u2)
             # RK4
             rk4_rV(nrV, r[t, i, it], V[t, i, it], o_tau, pi, tau, Delta,
                    eta, J, I, cr, rc, cv, Vc, r_sigma, V_sigma, z0, z1)
             r[t1, i, it] = nrV[0, itx]
             V[t1, i, it] = nrV[1, itx]
             # if it==0: print(nrV[0, it], nrV[1, it], o_nh)
             tavg[t0_nto, 0, i, it] += nrV[0, itx] * o_nh
             tavg[t0_nto, 1, i, it] += nrV[1, itx] * o_nh
             # if it==0: print(t1, o_nh, tavg[t0_nto, 0, i, it], tavg[t0_nto, 1, i, it])
             bold_out[i, it] = fmri_gpu(it, bold_state[i], nrV[0, itx], dt)
Exemplo n.º 3
0
def mc_pi(states, iterations, out):
    tid = cuda.grid(1)

    inside = 0
    for i in range(iterations):
        x = xoroshiro128p_uniform_float32(states, tid)
        y = xoroshiro128p_uniform_float32(states, tid)
        if x**2 + y**2 <= 1.0:
            inside += 1

    out[tid] = 4.0 * inside / iterations
Exemplo n.º 4
0
def simulate_pi(rng_states, iterations, out):
    thread_id = cuda.grid(1)

    inside = 0
    for i in range(iterations):
        x = xoroshiro128p_uniform_float32(rng_states, thread_id)
        y = xoroshiro128p_uniform_float32(rng_states, thread_id)
        if x**2 + y**2 <= 1.0:
            inside += 1

    out[thread_id] = 4.0 * inside / iterations
Exemplo n.º 5
0
def compute_pi(rng_states, iterations, out):
    """Find the maximum value in values and store in result[0]"""
    thread_id = cuda.grid(1)

    # Compute pi by drawing random (x, y) points and finding what
    # fraction lie inside a unit circle
    inside = 0
    for i in range(iterations):
        x = xoroshiro128p_uniform_float32(rng_states, thread_id)
        y = xoroshiro128p_uniform_float32(rng_states, thread_id)
        if x**2 + y**2 <= 1.0:
            inside += 1

    out[thread_id] = 4.0 * inside / iterations
def compute_pi(rng_states, iterations, out):
    """Find the maximum value in values and store in result[0]"""
    thread_id = cuda.grid(1)

    # Compute pi by drawing random (x, y) points and finding what
    # fraction lie inside a unit circle
    inside = 0
    for i in range(iterations):
        x = xoroshiro128p_uniform_float32(rng_states, thread_id)
        y = xoroshiro128p_uniform_float32(rng_states, thread_id)
        if x**2 + y**2 <= 1.0:
            inside += 1

    out[thread_id] = 4.0 * inside / iterations
 def mutate_kernel(d_next_gen, d_is_elite, rng_states, mutate_prob):
     r"""Perform mutation by randomly swapping two CPs.
     """
     i = cuda.grid(1)
     if i < d_next_gen.shape[0]:
         if d_is_elite[i] == False:
             rnd = xoroshiro128p_uniform_float32(rng_states, i)
             if rnd < mutate_prob:
                 rnd = xoroshiro128p_uniform_float32(rng_states, i)
                 idx1 = int(math.floor(rnd * d_next_gen.shape[1]))
                 rnd = xoroshiro128p_uniform_float32(rng_states, i)
                 idx2 = int(math.floor(rnd * d_next_gen.shape[1]))
                 tmp = d_next_gen[i, idx1]
                 d_next_gen[i, idx1] = d_next_gen[i, idx2]
                 d_next_gen[i, idx2] = tmp
Exemplo n.º 8
0
def Busqueda_MetropolisCUDA(M,individuo,probabilidades,AristMono,numColores,numNodos,rng_states,id):
    if AristMono != 0:
        for i in prange(busqueda_vecindario):
            nodo = 0
            vacia = 0
            #print(i)
            while vacia == 0:
                bolsaProbabilistica = int(bolsaProbabilidad_gpu(probabilidades,numColores,rng_states,id))  # Elige una bolsa con probabilidad a su número de nodos
                #print(bolsaProbabilistica)
                vacia = esVacia_gpu(individuo[bolsaProbabilistica],numNodos) #verifica que la bolsa no esté vacía
                #print(id, vacia)
            while nodo == 0:    #Selecciona un nodo al azar
                r = int(xoroshiro128p_uniform_float32(rng_states, id)*numNodos)  #selecciona un número al azar del cero al número de nodos
                nodo = individuo[bolsaProbabilistica][r]
            monoAct = NAMBolsa_gpu(individuo[bolsaProbabilistica], r, M,numNodos)  # calcula el número de aristas monocromáticas del nodo en la bolsa elegida
            #print(id, r, nodo, bolsaProbabilistica,monoAct)
            BolsaNueva = bolsaAleatoria_gpu(bolsaProbabilistica, numColores,rng_states,id)
            #print(bolsaProbabilistica, BolsaNueva, r)
            monopost = NAMBolsa_gpu(individuo[BolsaNueva], r, M,numNodos)
            delta = monopost - monoAct
            #print(delta, AristMono)
            if probAcepta_gpu(delta,rng_states,id):
                #print("acepta")
                AristMono = AristMono + delta
                #print(AristMono)
                individuo[bolsaProbabilistica][nodo] = 0  # Elimina el nodo de la bolsa
                individuo[BolsaNueva][nodo] = 1  # Inserta el nodo en otra bolsa al azar
            if AristMono == 0:
                break
def Busqueda_EscalandoCUDA(M, individuo, probabilidades, AristMono, numColores,
                           numNodos, rng_states, id):
    if AristMono != 0:
        for i in prange(busqueda_vecindario):
            r = 0
            vacia = 0
            while vacia == 0:
                bolsaProbabilistica = int(
                    bolsaProbabilidad_gpu(probabilidades, numColores,
                                          rng_states, id)
                )  # Elige una bolsa con probabilidad a su número de nodos
                vacia = esVacia_gpu(
                    individuo[bolsaProbabilistica],
                    numNodos)  #verifica que la bolsa no esté vacía
            while r == 0:  #Selecciona un nodo al azar
                nodo = int(
                    xoroshiro128p_uniform_float32(rng_states, id) * numNodos
                )  #selecciona un número al azar del cero al número de nodos
                r = individuo[bolsaProbabilistica][nodo]
            monoAct = NAMBolsa_gpu(
                individuo[bolsaProbabilistica], nodo, M, numNodos
            )  # calcula el número de aristas monocromáticas del nodo en la bolsa elegida
            BolsaNueva = bolsaAleatoria_gpu(bolsaProbabilistica, numColores,
                                            rng_states, id)
            monopost = NAMBolsa_gpu(individuo[BolsaNueva], nodo, M, numNodos)
            delta = monopost - monoAct
            if monopost > monoAct:
                AristMono = AristMono + delta
                individuo[bolsaProbabilistica][
                    nodo] = 0  # Elimina el nodo de la bolsa
                individuo[BolsaNueva][
                    nodo] = 1  # Inserta el nodo en otra bolsa al azar
            if AristMono == 0:
                break
Exemplo n.º 10
0
def packet(rng_states, thread_id, q):
    x = 0
    y = False
    while y == False:
        y = (xoroshiro128p_uniform_float32(rng_states, thread_id) > q)
        x += 1
    return x
 def cross_over_1p(parent1, parent2, rng_states, d_pop, d_next_gen, i):
     r"""Perform 1-point crossover. Copy a portion of parent1, then fill the
     rest with parent2.
     """
     m = d_pop.shape[1]
     rnd = xoroshiro128p_uniform_float32(rng_states, i)
     split = int(math.floor(rnd * m))
     # copy from parent1
     for j in range(split):
         d_next_gen[i, j] = d_pop[parent1, j]
     # copy from parent2
     idx = split
     for j in range(m):
         cp2 = d_pop[parent2, j]
         repeat = False
         for k in range(split):
             cp1 = d_next_gen[i, k]
             if cp1 == cp2:
                 repeat = True
                 break
         if repeat == False:
             d_next_gen[i, idx] = cp2
             idx += 1
             if idx == m:
                 break
Exemplo n.º 12
0
def scatter2(threadindex, rng_states, neutron):
    "isotropic scattering kernel with a uniform sampling of the polar angle and azimuthal angle"
    # randomly pick direction
    theta = xoroshiro128p_uniform_float32(rng_states, threadindex) * pi
    phi = xoroshiro128p_uniform_float32(rng_states, threadindex) * (2 * pi)
    cos_t, sin_t = cos(theta), sin(theta)
    sin_p, cos_p = sin(phi), cos(phi)
    # compute velocity
    vx, vy, vz = neutron[3:6]
    vi = sqrt(vx * vx + vy * vy + vz * vz)
    vx = vi * sin_t * cos_p
    vy = vi * sin_t * sin_p
    vz = vi * cos_t
    neutron[3:6] = vx, vy, vz
    neutron[-1] *= sin_t * (pi / 2)
    return
Exemplo n.º 13
0
def tournament(rng_states, i, pop_size, d_nonelite, num_elites, d_fitness_all,
               tournament_size):
    r"""Randomly choose candidates from nonelite individuals then choose the
    best as one parent.
    """
    rnd = xoroshiro128p_uniform_float32(rng_states, i)
    num_nonelite = d_nonelite.size
    parent = d_nonelite[int(math.floor(rnd * num_nonelite))]
    min_fitness = d_fitness_all[parent]
    for j in range(tournament_size - 1):
        rnd = xoroshiro128p_uniform_float32(rng_states, i)
        new_parent = parent = d_nonelite[int(math.floor(rnd * num_nonelite))]
        if min_fitness < d_fitness_all[new_parent]:
            parent = new_parent
            min_fitness = d_fitness_all[new_parent]
    return parent
Exemplo n.º 14
0
def scatter(threadindex, rng_states, neutron):
    "isotropic scattering kernel with a uniform sampling of 4pi solid angle"
    # randomly pick direction
    cos_t = xoroshiro128p_uniform_float32(rng_states, threadindex) * 2 - 1
    phi = xoroshiro128p_uniform_float32(rng_states, threadindex) * (2 * pi)
    if cos_t > 1: cos_t = 1
    sin_t = sqrt(1 - cos_t * cos_t)
    sin_p, cos_p = sin(phi), cos(phi)
    # compute velocity
    vx, vy, vz = neutron[3:6]
    vi = sqrt(vx * vx + vy * vy + vz * vz)
    vx = vi * sin_t * cos_p
    vy = vi * sin_t * sin_p
    vz = vi * cos_t
    neutron[3:6] = vx, vy, vz
    return
Exemplo n.º 15
0
def find_spread_gpu(graph, active, new_active, new_ones, mc, p, rng_states):

    # Get abosolute position of current thread
    thread_id = cuda.grid(1)

    # Because of fixed block sizes, some of the threads won't be needed
    if thread_id >= mc:
        return

    done = False
    while not done:
        done = True
        for j in range(new_active[thread_id].shape[0]):
            if new_active[thread_id][j]:
                for k in range(graph.shape[0]):
                    if graph[j][k] and p > xoroshiro128p_uniform_float32(
                            rng_states, thread_id):
                        new_ones[thread_id][k] = True

        for j in range(new_active[thread_id].shape[0]):
            if new_ones[thread_id][j] and (not active[thread_id][j]):
                active[thread_id][j] = True
                new_active[thread_id][j] = True
                done = False
            else:
                new_active[thread_id][j] = False
            new_ones[thread_id][j] = False
Exemplo n.º 16
0
def move(rng_states, start_x, start_y, out_x, out_y, doms, rs, domhits, domhitstimes):
    thread_id = cuda.grid(1)
    
    def rng():
        return xoroshiro128p_uniform_float32(rng_states, thread_id)
    
    x = start_x
    y = start_y
    d = rng()*math.pi*2
    vx = math.cos(d)
    vy = math.sin(d)
    absorbed = False
    time = 0
    while not absorbed:
        if rng() < 0.02:#1:
            d = xoroshiro128p_uniform_float32(rng_states, thread_id)*math.pi*2
            vx = math.cos(d)
            vy = math.sin(d)
        if rng() < 0.02:#05:
            absorbed = True
        x += vx
        y += vy
        for i in range(len(doms)):
            domx = doms[i,0]
            domy = doms[i,1]
            r = rs[i]
            if r >= (math.sqrt((domx-x)**2 + (domy-y)**2)):
                domhits[thread_id, i] += 1
                domhitstimes[thread_id, i] = time
                absorbed = True
        time += 1

    out_x[thread_id] = x
    out_y[thread_id] = y
Exemplo n.º 17
0
def rng_kernel_float32(states, out, count, distribution):
    thread_id = cuda.grid(1)

    for i in range(count):
        if distribution == UNIFORM:
            out[thread_id * count + i] = xoroshiro128p_uniform_float32(states, thread_id)
        elif distribution == NORMAL:
            out[thread_id * count + i] = xoroshiro128p_normal_float32(states, thread_id)
Exemplo n.º 18
0
def _detect_gpu(matrix, vec, rng_states):
    thread_id = cuda.grid(1)
    if thread_id < vec.shape[0]:
        l = matrix.shape[0]
        x = int(xoroshiro128p_uniform_float32(rng_states, thread_id) * l)
        y = int(xoroshiro128p_uniform_float32(rng_states, thread_id) * l)
        ret = 0
        for m in [x, y]:
            m_inv = x + y - m
            for n in range(l):
                if matrix[m, n] > 0 or matrix[m_inv, n] > 0:
                    if m != n and m_inv != n:
                        ret += (abs(m - n) - abs(m_inv - n)) * (
                            matrix[m, n] - matrix[m_inv, n])
        if ret > 0:
            vec[thread_id, 0] = x
            vec[thread_id, 1] = y
Exemplo n.º 19
0
def recombination(
    inp_weights, out_weights, n_inp_ia, tot_ia, n_weights, rng_states
):  #n_inp_ia: number of ia in input, tot_ia: total number of ia to be generated, n_weights: total number of weighta for ia

    pos = cuda.grid(1)
    if pos < tot_ia:
        ia_1 = int(
            xoroshiro128p_uniform_float32(rng_states, cuda.grid(1)) * n_inp_ia)
        ia_2 = int(
            xoroshiro128p_uniform_float32(rng_states, cuda.grid(1)) * n_inp_ia)
        cut = int(
            xoroshiro128p_uniform_float32(rng_states, cuda.grid(1)) *
            n_weights)
        for i in range(n_weights):
            if i < cut:
                out_weights[pos][i] = inp_weights[ia_1][i]
            else:
                out_weights[pos][i] = inp_weights[ia_2][i]
Exemplo n.º 20
0
def sample_kernel(rng_states,weight,old_particle_pos,particle_pos):
    tx = int(cuda.threadIdx.x) # this is the unique thread ID within a 1D block
    ty = int(cuda.blockIdx.x)  # Similarly, this is t
    thread_id = cuda.grid(1)

    tt=xoroshiro128p_uniform_float32(rng_states,thread_id)
    if tt<0.01:
        particle_pos[ty][0]=xoroshiro128p_uniform_float32(rng_states,thread_id)*max_x
        particle_pos[ty][1]=xoroshiro128p_uniform_float32(rng_states,thread_id)*max_y
    else:
        t=xoroshiro128p_uniform_float32(rng_states,thread_id)
        for i in range(len(weight)):
            if t-weight[i]<0:
                particle_pos[ty][0]=old_particle_pos[i][0]
                particle_pos[ty][1]=old_particle_pos[i][1]
                break
            else:
                t-=weight[i]
Exemplo n.º 21
0
def bolsaAleatoriaProbabilidadCUDA (probabilidades, numColores,rng_states,id):
    r = xoroshiro128p_uniform_float32(rng_states, id) #selecciona un número al azar del cero al uno
    l = 0
    for i in range(numColores): #recorre hasta el número de bolsas
        if (r >= l and r < l + probabilidades[i]):  #si cae entre l y la probabilidad de la bolsa i
            return i    #retorna el indice i
        else:
            l = l + probabilidades[i]    #si no a la variable l le suma probabilidad de la bolsa i
    return i
Exemplo n.º 22
0
def mutation(inp_weights, n_ia, n_weights, rng_states, prob=0.1):
    # Thread id in a 1D block
    tx = cuda.threadIdx.x
    # Block id in a 1D grid
    ty = cuda.blockIdx.x
    if ty < n_ia and tx < n_weights:
        a = xoroshiro128p_uniform_float32(rng_states, cuda.grid(1))
        if a < prob:
            inp_weights[ty][tx] += (xoroshiro128p_normal_float32(
                rng_states, cuda.grid(1))) / 5.
Exemplo n.º 23
0
def recombination_2(inp_weights, out_weights, n_inp_ia, tot_ia, n_weights,
                    rng_states):
    # Thread id in a 1D block
    tx = cuda.threadIdx.x
    # Block id in a 1D grid
    ty = cuda.blockIdx.x
    if ty < tot_ia and tx < n_weights:
        ia_rng = int(
            xoroshiro128p_uniform_float32(rng_states, cuda.grid(1)) * n_inp_ia)
        out_weights[ty][tx] = inp_weights[ia_rng][tx]
Exemplo n.º 24
0
def probabilidadAceptarCUDA(delta, rng_states, id):
    if delta < 0:
        return True
    else:
        P = math.exp(-delta/k*T)
        r = xoroshiro128p_uniform_float32(rng_states, id) # selecciona un número al azar del cero al uno
        if r < P:
            return True
        else:
            return False
Exemplo n.º 25
0
def crossoverUniform1(popvec_in, mother, father, ii, popvec_out, config_i,
                      rng_states, tid, tmp):
    chr_sz = config_i[cfg.CHROMO_SIZE]

    # uniform crossover leading to 1 child
    for j in range(0, chr_sz):
        if (xoroshiro128p_uniform_float32(rng_states, tid) < 0.50):
            popvec_out[ii * chr_sz + j] = popvec_in[mother * chr_sz + j]
        else:
            popvec_out[ii * chr_sz + j] = popvec_in[father * chr_sz + j]
 def initialize_kernel(d_pop_init, rng_states):
     r"""Generate random numbers.
     """
     i = cuda.grid(1)
     pop_size = d_pop_init.shape[0]
     m = d_pop_init.shape[1]
     if i < pop_size:
         for j in range(m):
             rnd = xoroshiro128p_uniform_float32(rng_states, i)
             d_pop_init[i, j] = rnd
Exemplo n.º 27
0
def sample(q_array, input, sigma, rng):
    pos = cuda.grid(1)

    if pos < q_array.shape[1]:

        euler = cuda.local.array(shape=(3), dtype=float32)
        for i in range(euler.shape[0]):
            euler[i] = input[i] + (xoroshiro128p_uniform_float32(rng, pos) -
                                   0.5)
        quaternion_add_euler(q_array[:, pos], euler)
Exemplo n.º 28
0
 def propagate(
         threadindex, rng_states,
         in_neutron,
         square, width, height, radius,
         wl_distr, Lambda0, dLambda, E0, dE,
         xw, yh, dist, pmul
 ):
     r1 = xoroshiro128p_uniform_float32(rng_states, threadindex)
     r2 = xoroshiro128p_uniform_float32(rng_states, threadindex)
     r3 = xoroshiro128p_uniform_float32(rng_states, threadindex)
     r4 = xoroshiro128p_uniform_float32(rng_states, threadindex)
     r5 = xoroshiro128p_uniform_float32(rng_states, threadindex)
     if square:
         x = width * (r1 - 0.5)
         y = height * (r2 - 0.5)
     else:
         chi=2*math.pi*r1
         r=math.sqrt(r2)*radius
         x=r*math.cos(chi)
         y=r*math.sin(chi)
     in_neutron[:3] = x, y, 0.
     # choose final vector
     target = cuda.local.array(shape=3, dtype=NB_FLOAT)
     target[0] = target[1] = 0.0
     target[2] = dist
     vec_f = cuda.local.array(shape=3, dtype=NB_FLOAT)
     solidangle = randvec_target_rect(target, xw, yh, r3, r4, vec_f)
     # vector from moderator to final position is
     # (vec_f[0]-x, vec_f[1]-y, dist)
     dx = vec_f[0]-x; dy = vec_f[1]-y
     dist1 = math.sqrt(dx*dx+dy*dy+dist*dist)
     # velocity scalar
     if wl_distr:
         L = Lambda0+dLambda*(r5*2-1)
         v = K2V*(2*math.pi/L)
     else:
         E = E0+dE*(r5*2-1)
         v = SE2V*math.sqrt(E)
     in_neutron[3:6] = v*dx/dist1, v*dy/dist1, v*dist/dist1
     in_neutron[-2] = 0
     in_neutron[-1] = pmul*solidangle
     return
Exemplo n.º 29
0
def move(points, best_point_index, forces, random_states):
    thread_id = cuda.threadIdx.x

    if thread_id == best_point_index:
        return

    step = xoroshiro128p_uniform_float32(random_states, thread_id)

    for k in range(20):
        points[thread_id][
            k] = points[thread_id][k] + step * forces[thread_id][k]
Exemplo n.º 30
0
def initial_state(state_array: cuda.devicearry,
                  initial_state: cuda.devicearray,
                  state_prob: cuda.devicearray,
                  rng_states):
    tx = cuda.threadIdx.x
    ty = cuda.blockIdx.y
    bw = cuda.blockDim.x

    pos = tx + ty * bw
    if pos < state_array.shape[0]:
        # for i in range(state_num.shape[0])
        for i in range(6):
            state_array[i, pos] =initial_state[i] + xoroshiro128p_uniform_float32(rng_states, tx)
Exemplo n.º 31
0
        def random_3d(arr, rng_states):
            # Per-dimension thread indices and strides
            startx, starty, startz = cuda.grid(3)
            stridex, stridey, stridez = cuda.gridsize(3)

            # Linearized thread index
            tid = (startz * stridey * stridex) + (starty * stridex) + startx

            # Use strided loops over the array to assign a random value to each entry
            for i in range(startz, arr.shape[0], stridez):
                for j in range(starty, arr.shape[1], stridey):
                    for k in range(startx, arr.shape[2], stridex):
                        arr[i, j, k] = xoroshiro128p_uniform_float32(
                            rng_states, tid)