Пример #1
def _randint_arg_check(low, high, endpoint, lower_bound, upper_bound):
    Check that low and high are within the bounds
    for the given datatype.

    if low < lower_bound:
        raise ValueError("low is out of bounds")

    # This is being done to avoid high being accidentally
    # casted to int64/32 while subtracting 1 before
    # checking bounds, avoids overflow.
    if high > 0:
        high = uint64(high)
        if not endpoint:
            high -= uint64(1)
        upper_bound = uint64(upper_bound)
        if low > 0:
            low = uint64(low)
        if high > upper_bound:
            raise ValueError("high is out of bounds")
        if low > high:  # -1 already subtracted, closed interval
            raise ValueError("low is greater than high in given interval")
        if high > upper_bound:
            raise ValueError("high is out of bounds")
        if low > high:  # -1 already subtracted, closed interval
            raise ValueError("low is greater than high in given interval")
Пример #2
def pack_n_nb(spaced, n, chunk_bits):
    a = 0
    for i in range(chunk_bits):
        bit_idx = nb.uint64(i * n)
        bit_to_set = nb.uint64(1) << bit_idx
        a |= (spaced & bit_to_set) >> (bit_idx - i)

    return a
Пример #3
def bounded_lemire_uint64(bitgen, rng):
    Generates a random unsigned 64 bit integer bounded
    within a given interval using Lemire's rejection.
    rng_excl = uint64(rng) + uint64(1)

    assert (rng != 0xFFFFFFFFFFFFFFFF)

    x = next_uint64(bitgen)

    leftover = uint64(x) * uint64(rng_excl)

    if (leftover < rng_excl):
        threshold = (UINT64_MAX - rng) % rng_excl

        while (leftover < threshold):
            x = next_uint64(bitgen)
            leftover = uint64(x) * uint64(rng_excl)

    x0 = x & uint64(0xFFFFFFFF)
    x1 = x >> 32
    rng_excl0 = rng_excl & uint64(0xFFFFFFFF)
    rng_excl1 = rng_excl >> 32
    w0 = x0 * rng_excl0
    t = x1 * rng_excl0 + (w0 >> 32)
    w1 = t & uint64(0xFFFFFFFF)
    w2 = t >> 32
    w1 += x0 * rng_excl1
    m1 = x1 * rng_excl1 + w2 + (w1 >> 32)

    return m1
def check_primes_cuda(p):
    if p < 10:
        if p in [4, 6, 8, 9]:
            return numba.uint64(0)
        for i in range(2, (p**0.5)//1):
            if (p % i) == 0:
                return numba.uint64(0)
    return p
Пример #5
def init_xoroshiro128p_states_cpu(states, seed, subsequence_start):
    n = states.shape[0]
    seed = uint64(seed)
    subsequence_start = uint64(subsequence_start)

    if n >= 1:
        init_xoroshiro128p_state(states, 0, seed)

        # advance to starting subsequence number
        for _ in range(subsequence_start):
            xoroshiro128p_jump(states, 0)

        # populate the rest of the array
        for i in range(1, n):
            states[i] = states[i - 1]  # take state of previous generator
            xoroshiro128p_jump(states, i)  # and jump forward 2**64 steps
Пример #6
def encode_single_coord(coord, chunk_bits):
    Encodes a coordinate in ℝⁿ in ℝ¹ using Morton ordering, assuming that
    the size of each dimension is 0..2^{chunk_bits}

    >>> morton_offsets = set()
    >>> for i in range(16):
    ...     for j in range(16):
    ...         morton_offsets.add(encode_single_coord(
    ...                             np.array([i, j], dtype=np.uint8),
    ...                             4))
    >>> morton_offsets == {i for i in range(256)}

    Here we demonstrate that there is mapping from coordinates in a 16x16 square
    to the numbers 0..255

    :param coord: coordinate to encode, numba array of type uint8, size <= 8
    :param chunk_bits: coordinate dimensions
    :return: Morton-coded offset of type uint64
    assert coord.shape[0] <= 8
    x = nb.uint64(0)
    for i in range(coord.shape[0]):
        x += separate_n_nb(coord[i], coord.shape[0], chunk_bits) << i

    return x
Пример #7
def init_xoroshiro128p_states_cpu(states, seed, subsequence_start):
    n = states.shape[0]
    seed = uint64(seed)
    subsequence_start = uint64(subsequence_start)

    if n >= 1:
        init_xoroshiro128p_state(states, 0, seed)

        # advance to starting subsequence number
        for _ in range(subsequence_start):
            xoroshiro128p_jump(states, 0)

        # populate the rest of the array
        for i in range(1, n):
            states[i] = states[i - 1]  # take state of previous generator
            xoroshiro128p_jump(states, i)  # and jump forward 2**64 steps
Пример #8
def atomic_sub_double_3(ary):
    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    sm = cuda.shared.array((4, 8), float64)
    sm[tx, ty] = ary[tx, ty]
    cuda.atomic.sub(sm, (tx, uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #9
def H(psi_1,psi_2,H_ising,pg,J,h):
    for i in prange(psi_1.size):
        b = uint64(1) # use this number fo flip bit to get column index
        ME = (1 + pg + J*H_ising[i])*psi_1[i] 
        for j in range(N):
            ME += -h*psi_1[i^b] # x-field action
            b <<= 1 # shift flipping fit to the right

        psi_2[i] = ME
Пример #10
def atomic_add3(ary):
    tx = roc.get_local_id(0)
    ty = roc.get_local_id(1)
    sm = roc.shared.array((4, 8), numba.uint32)
    sm[tx, ty] = ary[tx, ty]
    roc.atomic.add(sm, (tx, numba.uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #11
def atomic_add_float_3(ary):
    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    sm = cuda.shared.array((4, 8), float32)
    sm[tx, ty] = ary[tx, ty]
    cuda.atomic.add(sm, (tx, uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #12
def atomic_add3(ary):
    tx = roc.get_local_id(0)
    ty = roc.get_local_id(1)
    sm = roc.shared.array((4, 8), numba.uint32)
    sm[tx, ty] = ary[tx, ty]
    roc.atomic.add(sm, (tx, numba.uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #13
def atomic_add3(ary):
    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    sm = cuda.shared.array((4, 8), uint32)
    sm[tx, ty] = ary[tx, ty]
    cuda.atomic.add(sm, (tx, uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #14
def atomic_add3(ary):
    tx = hsa.get_local_id(0)
    ty = hsa.get_local_id(1)
    sm = hsa.shared.array((4, 8), numba.uint32)
    sm[tx, ty] = ary[tx, ty]
    hsa.atomic.add(sm, (tx, numba.uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #15
def atomic_add3(ary):
    tx = hsa.get_local_id(0)
    ty = hsa.get_local_id(1)
    sm = hsa.shared.array((4, 8), numba.uint32)
    sm[tx, ty] = ary[tx, ty]
    hsa.atomic.add(sm, (tx, numba.uint64(ty)), 1)
    ary[tx, ty] = sm[tx, ty]
Пример #16
def xoroshiro128p_next(states, index):
    '''Return the next random uint64 and advance the RNG in states[index].

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    :rtype: uint64
    index = int64(index)
    s0 = states[index]['s0']
    s1 = states[index]['s1']
    result = s0 + s1

    s1 ^= s0
    states[index]['s0'] = uint64(rotl(s0, uint32(55))) ^ s1 ^ (s1 << uint32(14))
    states[index]['s1'] = uint64(rotl(s1, uint32(36)))

    return result
Пример #17
def xoroshiro128p_next(states, index):
    '''Return the next random uint64 and advance the RNG in states[index].

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    :rtype: uint64
    index = int64(index)
    s0 = states[index]['s0']
    s1 = states[index]['s1']
    result = s0 + s1

    s1 ^= s0
    states[index]['s0'] = uint64(rotl(s0, uint32(55))) ^ s1 ^ (s1 << uint32(14))
    states[index]['s1'] = uint64(rotl(s1, uint32(36)))

    return result
Пример #18
def H_cuda(yin,yout,H_ising,pg,J,h):
    s = cuda.grid(1)
    if s < yin.size:
        b = uint64(1) # use this number fo flip bit to get column index
        ME = (1 + pg + J * H_ising[s])*yin[s] 
        for j in range(N):
            ME += -h*yin[s^b] # x-field action
            b <<= 1 # shift flipping fit to the right

        yout[s] = ME
Пример #19
def init_xoroshiro128p_states_kernel(states, seed, subsequence_start):
    seed = uint64(seed)
    subsequence_start = uint64(subsequence_start)

    # Only run this with a single thread and block
    n = states.shape[0]

    if n < 1:
        return  # assuming at least 1 state going forward

    init_xoroshiro128p_state(states, 0, seed)

    # advance to starting subsequence number
    for _ in range(subsequence_start):
        xoroshiro128p_jump(states, 0)

    # populate the rest of the array
    for i in range(1, n):
        states[i] = states[i - 1]  # take state of previous generator
        xoroshiro128p_jump(states, i)  # and jump forward 2**64 steps
Пример #20
def _2d_x_field(yin, yout, h):  # adds to yout.
    N = len(h)
    Ns = (1 << N)

    for i in range(Ns):
        b = uint64(1)  # use this number fo flip bit to get column index
        ME = 0
        for j in range(N):
            ME += h[j] * yin[i ^ b]  # x-field action
            b <<= 1  # shift flipping fit to the right

        yout[i] += ME
Пример #21
def block_analysis_jit(fd, xp, yp, vmin: np.float64, vmax: np.float64,
                       vdims: np.ndarray, bdims: np.ndarray,
                       bcount: np.ndarray, blocks: np.ndarray):
    diff = vmax - vmin
    num_vox = np.prod(vdims)

    for i in numba.prange(num_vox):
        bI = numba.uint64((i % vdims[0]) / bdims[0])
        bJ = numba.uint64(((i / vdims[0]) % vdims[1]) / bdims[1])
        bK = numba.uint64(((i / vdims[0]) / vdims[1]) / bdims[2])

        if bI < bcount[0] and bJ < bcount[1] and bK < bcount[2]:
            x = numba.float64((fd[i] - vmin) / diff)

            #if x <= xp[0]:
            #    return yp[0]

            max_idx = len(xp) - 1

            #if x >= xp[-1]:
            #    return yp[-1]

            idx = int((x * max_idx) + 0.5)

            if idx > max_idx:
                k0 = int(max_idx - 1)
                k1 = int(max_idx)
            elif idx == 0:
                k0 = int(0)
                k1 = int(1)
                k0 = int(idx - 1)
                k1 = int(idx)

            d = (x - xp[k0]) / (xp[k1] - xp[k0])
            rel = numba.float64(yp[k0] * (1.0 - d) + yp[k1] * d)

            bIdx = bI + bcount[0] * (bJ + bK * bcount[1])
            blocks[bIdx] += rel
Пример #22
def xoroshiro128p_jump(states, index):
    '''Advance the RNG in ``states[index]`` by 2**64 steps.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    index = int64(index)

    s0 = uint64(0)
    s1 = uint64(0)

    for i in range(2):
        for b in range(64):
            if XOROSHIRO128P_JUMP[i] & (uint64(1) << uint32(b)):
                s0 ^= states[index]['s0']
                s1 ^= states[index]['s1']
            xoroshiro128p_next(states, index)

    states[index]['s0'] = s0
    states[index]['s1'] = s1
Пример #23
def xoroshiro128p_jump(states, index):
    '''Advance the RNG in ``states[index]`` by 2**64 steps.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    index = int64(index)

    s0 = uint64(0)
    s1 = uint64(0)

    for i in range(2):
        for b in range(64):
            if XOROSHIRO128P_JUMP[i] & (uint64(1) << uint32(b)):
                s0 ^= states[index]['s0']
                s1 ^= states[index]['s1']
            xoroshiro128p_next(states, index)

    states[index]['s0'] = s0
    states[index]['s1'] = s1
Пример #24
def init_xoroshiro128p_state(states, index, seed):
    '''Use SplitMix64 to generate an xoroshiro128p state from 64-bit seed.

    This ensures that manually set small seeds don't result in a predictable
    initial sequence from the random number generator.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: uint64
    :param index: offset in states to update
    :type seed: int64
    :param seed: seed value to use when initializing state
    index = int64(index)
    seed = uint64(seed)

    z = seed + uint64(0x9E3779B97F4A7C15)
    z = (z ^ (z >> uint32(30))) * uint64(0xBF58476D1CE4E5B9)
    z = (z ^ (z >> uint32(27))) * uint64(0x94D049BB133111EB)
    z = z ^ (z >> uint32(31))

    states[index]['s0'] = z
    states[index]['s1'] = z
Пример #25
def init_xoroshiro128p_state(states, index, seed):
    '''Use SplitMix64 to generate an xoroshiro128p state from 64-bit seed.

    This ensures that manually set small seeds don't result in a predictable
    initial sequence from the random number generator.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: uint64
    :param index: offset in states to update
    :type seed: int64
    :param seed: seed value to use when initializing state
    index = int64(index)
    seed = uint64(seed)

    z = seed + uint64(0x9E3779B97F4A7C15)
    z = (z ^ (z >> uint32(30))) * uint64(0xBF58476D1CE4E5B9)
    z = (z ^ (z >> uint32(27))) * uint64(0x94D049BB133111EB)
    z = z ^ (z >> uint32(31))

    states[index]['s0'] = z
    states[index]['s1'] = z
Пример #26
def separate_n_nb(packed, n, chunk_bits):
    A relatively inefficient generalization of the "separate bits"
    step of Morton encoding. Assuming that each of the `n` coordinates
    has `chunk_bits` bits, we can "space out" each bit of each coordinate
    `n` spaces at a time.

    >>> for i in range(8):
    ...     print(i,
    ...           format(separate_n_nb(i, 3, 3), '#012b'),
    ...           format(separate_n_nb(i, 3, 3) << 1, '#012b'),
    ...           format(separate_n_nb(i, 3, 3) << 2, '#012b'))
    0 0b0000000000 0b0000000000 0b0000000000
    1 0b0000000001 0b0000000010 0b0000000100
    2 0b0000001000 0b0000010000 0b0000100000
    3 0b0000001001 0b0000010010 0b0000100100
    4 0b0001000000 0b0010000000 0b0100000000
    5 0b0001000001 0b0010000010 0b0100000100
    6 0b0001001000 0b0010010000 0b0100100000
    7 0b0001001001 0b0010010010 0b0100100100

    :param packed: packed tensor
    :param n: number of components that we will eventually want to Morton code
    :param chunk_bits: the number of bits that represent each coordinate
    :return: spaced-out bit representation, ready to be interleaved
    a = nb.uint64(packed)

    a = a & nb.uint64(0x00000000000000FF)

    x = 0
    for i in range(chunk_bits):
        bit_to_set = nb.uint64(1) << nb.uint64(i * n)
        x |= (a << nb.uint64((n - 1) * i)) & bit_to_set

    return x
Пример #27
def xoroshiro128p_jump(states, index):
    '''Advance the RNG in ``states[index]`` by 2**64 steps.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    index = int64(index)

    jump = (uint64(0xbeac0467eba5facb), uint64(0xd86b048b86aa9922))

    s0 = uint64(0)
    s1 = uint64(0)

    for i in range(2):
        for b in range(64):
            if jump[i] & (uint64(1) << uint32(b)):
                s0 ^= states[index]['s0']
                s1 ^= states[index]['s1']
            xoroshiro128p_next(states, index)

    states[index]['s0'] = s0
    states[index]['s1'] = s1
Пример #28
def H(t, psi_in, psi_out, H_ising, T, pg):
    # print np.linalg.norm(psi_in)
    J = (t / T)**2
    h = (1 - t / T)**2
    r = (1 - t / T)**2
    for s in prange(psi_in.size):
        b = uint64(1)  # use this number fo flip bit to get column index
        ME = (-1j * pg * r + J * H_ising[s]) * psi_in[s]
        for j in range(N):
            ME += -h * psi_in[s ^ b]  # x-field action
            b <<= 1  # shift flipping fit to the right

        psi_out[s] = -1j * ME

    return psi_out
Пример #29
def buffered_bounded_lemire_uint32(bitgen, rng):
    Generates a random unsigned 32 bit integer bounded
    within a given interval using Lemire's rejection.
    rng_excl = uint32(rng) + uint32(1)

    assert (rng != 0xFFFFFFFF)

    # Generate a scaled random number.
    m = uint64(next_uint32(bitgen)) * uint64(rng_excl)

    # Rejection sampling to remove any bias
    leftover = m & 0xFFFFFFFF

    if (leftover < rng_excl):
        # `rng_excl` is a simple upper bound for `threshold`.
        threshold = (UINT32_MAX - rng) % rng_excl

        while (leftover < threshold):
            m = uint64(next_uint32(bitgen)) * uint64(rng_excl)
            leftover = m & 0xFFFFFFFF

    return (m >> 32)
Пример #30
def xoroshiro128p_jump(states, index):
    """Advance the RNG in ``states[index]`` by 2**64 steps.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    index = int64(index)

    jump = (uint64(0xBEAC0467EBA5FACB), uint64(0xD86B048B86AA9922))

    s0 = uint64(0)
    s1 = uint64(0)

    for i in range(2):
        for b in range(64):
            if jump[i] & (uint64(1) << uint32(b)):
                s0 ^= states[index]["s0"]
                s1 ^= states[index]["s1"]
            xoroshiro128p_next(states, index)

    states[index]["s0"] = s0
    states[index]["s1"] = s1
Пример #31
def _2d_H_op(yin, yout, diag_signs, J, h):
    N = h.shape[0]
    Nd = J.shape[0]
    Ns = diag_signs.shape[0]
    for i in prange(Ns):
        diag = 0
        for j in range(Nd):
            diag += J[j] * diag_signs[i, j]

        b = uint64(1)  # use this number fo flip bit to get column index
        ME = 0
        for j in range(N):
            ME += h[j] * yin[i ^ b]  # x-field action
            b <<= 1  # shift flipping fit to the right

        yout[i] = diag * yin[i] + ME
Пример #32
def numba_decompress_blocks(input, block_size, last_block_size, block_ends,
    num_blocks = len(block_ends)

    for p in numba.prange(num_blocks):
        if p == 0:
            i = numba.uint64(0)
            i = numba.uint64(block_ends[p - numba.uint(1)])

        block_end = numba.uint64(block_ends[p])
        j = numba.uint64(block_size * p)

        if (p == (num_blocks - numba.uint8(1))):
            end = j + numba.uint64(last_block_size)
            end = j + numba.uint64(block_size)

        while ((j < end) and (i < block_end)):
            t1 = numba.uint16((input[i] & 0xF0) >> 4)
            t2 = numba.uint16((input[i] & 0x0F) + 4)
            i += numba.uint8(1)

            if (t1 == 15):
                while input[i] == 255:
                    t1 += numba.uint8(input[i])
                    i += numba.uint8(1)

                t1 += numba.uint8(input[i])
                i += numba.uint8(1)

            for n in range(t1):
                output[j] = input[i]
                i += numba.uint8(1)
                j += numba.uint8(1)

            if (j >= end): break

            off = numba.uint16(input[i]) + (numba.uint16(input[i + 1]) << 8)
            i += numba.uint8(2)

            if (t2 == 19):
                while input[i] == 255:
                    t2 += numba.uint8(input[i])
                    i += numba.uint8(1)

                t2 += numba.uint8(input[i])
                i += numba.uint8(1)

            for n in range(t2):
                output[j] = output[j - off]
                j += numba.uint8(1)
Пример #33
def atomic_cast_to_uint64(num):
    return uint64(num)
Пример #34

def power_set(iterable):
    s = list(iterable)
    return itertools.chain.from_iterable(
        itertools.combinations(s, r) for r in range(len(s) + 1))

flip_vert_const_1 = np.uint64(0x00FF00FF00FF00FF)
flip_vert_const_2 = np.uint64(0x0000FFFF0000FFFF)

def vectorized_flip_vertically(bb):
    bb = ((bb >> 8) & flip_vert_const_1) | ((bb & flip_vert_const_1) << 8)
    bb = ((bb >> 16) & flip_vert_const_2) | ((bb & flip_vert_const_2) << 16)
    bb = (bb >> 32) | (bb << 32)
    return bb

def get_castling_lookup_tables():
    possible_castling_rights = np.zeros(2**4, dtype=np.uint64)
    for j, set in enumerate(power_set([BB_A1, BB_H1, BB_A8, BB_H8])):
        possible_castling_rights[j] = np.uint64(
            functools.reduce(lambda x, y: x | y, set, np.uint64(0)))

    white_turn_castling_tables = create_index_table(possible_castling_rights)
    black_turn_castling_tables = create_index_table(
Пример #35
    # Let's find out the correct dtype depending on the max_value
    if max_value <= _UINT8_MAX:
        X = np.empty((n_samples, n_features), dtype=np.uint8, order="F")
    elif _UINT8_MAX < max_value <= _UINT16_MAX:
        X = np.empty((n_samples, n_features), dtype=np.uint16, order="F")
    elif _UINT16_MAX < max_value <= _UINT32_MAX:
        X = np.empty((n_samples, n_features), dtype=np.uint32, order="F")
    elif _UINT32_MAX < max_value <= _UINT64_MAX:
        X = np.empty((n_samples, n_features), dtype=np.uint64, order="F")
        raise ValueError("X cannot be created")
    return X

    uint64(uint64, uint64[::1], uint64, uint64, uint64),
def get_value_from_column(i, bitarray, bitmask, n_values_in_word, n_bits):
    """Get the bin value of a column based on the bitarray

    i : uint64
        Sample index

    bitarray :
Пример #36
        self.board_w = np.random.randint(2**64, size=(n, n), dtype=np.uint64)
        self.D = {}

    def __getitem__(self, state):
        _hash = get_hash(state, self.board_b, self.board_w)
        if _hash in self.D:
            return self.D[_hash]
            return False

    def __setitem__(self, state, value):
        _hash = get_hash(state, self.board_b, self.board_w)
        self.D[_hash] = value

@nb.njit(nb.uint64(nb.int8[:, :], nb.uint64[:, :], nb.uint64[:, :]))
def get_hash(state, board_b, board_w):
    _hash = 0
    n = state.shape[0]
    for y in range(n):
        for x in range(n):
            if state[y, x] == 1:
                _hash ^= board_b[y, x]
            elif state[y, x] == -1:
                _hash ^= board_w[y, x]
    return _hash

if __name__ == '__main__':
    N, M = 3, 3
    Zob = Zobrist(N)
Пример #37
        elif move.to_square == C1 and not board_state.rooks & BB_C1:
            return create_move(E1, A1)
    elif move.from_square == E8 and board_state.kings & BB_E8:
        if move.to_square == G8 and not board_state.rooks & BB_G8:
            return create_move(E8, H8)
        elif move.to_square == C8 and not board_state.rooks & BB_C8:
            return create_move(E8, A8)
    return move

@njit(uint64(BoardState.class_type.instance_type, Move.class_type.instance_type))
def push_with_hash_update(board_state, move):
    move = _to_chess960(board_state, move)

    # Reset ep square.
    ep_square = board_state.ep_square
    board_state.ep_square = None

    # reset the ep square in the hash
    if not ep_square is None:

        temp_ep_square = np.uint8(ep_square)

        if board_state.turn == True:
            ep_mask = shift_down(BB_SQUARES[temp_ep_square])
Пример #38
    :param index: offset in states to update
    :rtype: uint64
    index = int64(index)
    s0 = states[index]['s0']
    s1 = states[index]['s1']
    result = s0 + s1

    s1 ^= s0
    states[index]['s0'] = uint64(rotl(s0, uint32(55))) ^ s1 ^ (s1 << uint32(14))
    states[index]['s1'] = uint64(rotl(s1, uint32(36)))

    return result

XOROSHIRO128P_JUMP = (uint64(0xbeac0467eba5facb), uint64(0xd86b048b86aa9922))

def xoroshiro128p_jump(states, index):
    '''Advance the RNG in ``states[index]`` by 2**64 steps.

    :type states: 1D array, dtype=xoroshiro128p_dtype
    :param states: array of RNG states
    :type index: int64
    :param index: offset in states to update
    index = int64(index)

    s0 = uint64(0)
    s1 = uint64(0)
Пример #39
def uint64_to_unit_float64(x):
    '''Convert uint64 to float64 value in the range [0.0, 1.0)'''
    x = uint64(x)
    return (x >> uint32(11)) * (float64(1) / (uint64(1) << uint32(53)))
Пример #40
def atomic_max_double_normalizedindex(res, ary):
    tx = cuda.threadIdx.x
    bx = cuda.blockIdx.x
    cuda.atomic.max(res, 0, ary[tx, uint64(bx)])
Пример #41
def atomic_add_double_global_3(ary):
    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    cuda.atomic.add(ary, (tx, uint64(ty)), 1)
Пример #42
def uint64_to_unit_float32(x):
    '''Convert uint64 to float64 value in the range [0.0, 1.0)'''
    x = uint64(x)
    return float32(uint64_to_unit_float64(x))
Пример #43
def rotl(x, k):
    '''Left rotate x by k bits.'''
    x = uint64(x)
    k = uint32(k)
    return (x << k) | (x >> uint32(64 - k))