示例#1
0
    def compute(self, floatimage, histogram, k):
        width, height, nbins = np.shape(histogram)
        numpixels = width * height

        image_linear = np.reshape(floatimage, (numpixels, )).astype(np.float32)
        histogram_linear = np.reshape(
            histogram, (np.size(histogram), )).astype(np.float32)
        transform = np.zeros_like(image_linear).astype(np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image_linear)
        self.buf_histogram = cl.Buffer(self.context,
                                       mf.READ_ONLY | mf.COPY_HOST_PTR,
                                       hostbuf=histogram_linear)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE,
                                    transform.nbytes)

        kernel = self.program.IIF
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.float32] +
                                     [None] * 3)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.float32(k))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.buf_histogram)
        kernel.set_arg(5, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image_linear.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, transform).wait()
        return np.reshape(transform, (width, height)).astype(np.float)
示例#2
0
    def nodeInSimplex(self, nodeInd, simplexInd):
        # Function for checking if node is in simplex

        # If not saved mesh internally
        if self._internalID is None:
            # do that
            self._storeMeshInternally()

        # Enforce formating
        nodeInd = np.uintc(nodeInd)
        simplexInd = np.uintc(simplexInd)

        out = ctypes.c_bool(False)
        status = self._libInstance.implicitMesh_nodeInSimplex( self._internalID, \
                ctypes.c_uint( nodeInd ), ctypes.c_uint( simplexInd ), ctypes.byref(out) )

        if status != 0:
            # Try to save internally again
            self._storeMeshInternally()
            # Retry call
            status = self._libInstance.implicitMesh_nodeInSimplex( self._internalID, \
                ctypes.c_uint( nodeInd ), ctypes.c_uint( simplexInd ), ctypes.byref(out) )
        if status != 0:
            raise Exception("Uknown error occured! Error code " + str(status) +
                            " from implicitMesh_nodeInSimplex()")

        return out.value
示例#3
0
    def compute(self, image, num_bins):
        width, height = np.shape(image)
        numpixels = width * height

        image = np.reshape(image, (numpixels, )).astype(np.float32)
        result = np.zeros((numpixels * num_bins, ), dtype=np.float32)

        mf = cl.mem_flags
        self.buf_image = cl.Buffer(self.context,
                                   mf.READ_ONLY | mf.COPY_HOST_PTR,
                                   hostbuf=image)
        self.output_buf = cl.Buffer(self.context, mf.READ_WRITE, result.nbytes)

        kernel = self.program.iif_binid
        kernel.set_scalar_arg_dtypes([np.uintc, np.uintc, np.ubyte] +
                                     [None] * 2)
        kernel.set_arg(0, np.uintc(width))
        kernel.set_arg(1, np.uintc(height))
        kernel.set_arg(2, np.ubyte(num_bins))
        kernel.set_arg(3, self.buf_image)
        kernel.set_arg(4, self.output_buf)

        cl.enqueue_nd_range_kernel(self.queue, kernel, image.shape,
                                   None).wait()

        cl.enqueue_read_buffer(self.queue, self.output_buf, result).wait()
        return np.reshape(result, (width, height, num_bins)).astype(np.float32)
示例#4
0
def component_step1_shortcutting_p2(d_v, d_prevD, d_D, d_Q, length, s):
    """

    :param d_v:
    :param d_prevD:
    :param d_D:
    :param d_Q:
    :param length:
    :param s:
    :return:
    """

    import eulercuda.pyencode as enc
    logger = logging.getLogger('eulercuda.pycomponent.component_step1_shortcutting_p2')
    logger.info("started.")
    mod = SourceModule("""
       typedef struct Vertex
       {
           unsigned int vid;
           unsigned int n1;
           unsigned int n2;
       } Vertex;
    __global__ void componentStepOne_ShortCuttingP2(Vertex * v, unsigned  int * prevD, unsigned  int * curD, unsigned int * Q, unsigned int length, int s)
    {
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if( tid <length)
        {
            if(curD[tid]!=prevD[tid])
            {
                Q[curD[tid]]=s;
            }
        }
    }
       """)
    block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_v = gpuarray.to_gpu(d_v)
    np_d_D = gpuarray.to_gpu(d_D)
    np_d_prevD = gpuarray.to_gpu(d_prevD)
    np_d_Q = gpuarray.to_gpu(d_Q)
    shortcutting_p1_device = mod.get_function('componentStepOne_ShortCuttingP2')
    shortcutting_p1_device(
        np_d_v,
        np_d_prevD,
        np_d_D,
        np_d_Q,
        np.uintc(length),
        np.uintc(s),
        block=block_dim, grid=grid_dim
    )
    np_d_v.get(d_v)
    np_d_prevD.get(d_prevD)
    np_d_D.get(d_D)
    np_d_Q.get(d_Q)
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_Q
示例#5
0
def phase1_device(d_keys, d_offset, d_length, count, bucketCount):
    logger = logging.getLogger('eulercuda.pygpuhash.phase1_device')
    logger.info("started.")
    mod = SourceModule("""
    //#include <stdio.h>
    typedef unsigned long long  KEY_T ;
    typedef KEY_T               *KEY_PTR;
    typedef unsigned int        VALUE_T;
    typedef VALUE_T             *VALUE_PTR;
    #define C0  0x01010101
    #define C1	0x12345678
    #define LARGE_PRIME 1900813
    #define MAX_INT  0xffffffff

    __forceinline__ __device__ unsigned int hash_h(KEY_T  key, unsigned int bucketCount)
    {
        return ((C0 + C1 * key) % LARGE_PRIME ) % bucketCount;
    }
    __global__ void phase1(	KEY_PTR  keys,
                unsigned int * offset,
                unsigned int length,
                unsigned int* count,
                unsigned int bucketCount){

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if(tid<length)
        {
            KEY_T key=keys[tid];
            unsigned int bucket=hash_h(key,bucketCount);
            offset[tid]=atomicInc (count+bucket,MAX_INT);

        }
        __syncthreads();
    }
    """, options=['--compiler-options', '-Wall'])
    np_d_keys = np.array(d_keys).astype('Q')
    keys_gpu = gpuarray.to_gpu(np_d_keys)
    offset_gpu = gpuarray.zeros(len(d_keys), dtype='I')
    count_gpu = gpuarray.to_gpu(count)
    block_dim = (1024, 1, 1)
    if (d_length//1024) == 0:
        grid_dim = (1, 1, 1)
    else:
        grid_dim = (d_length//1024, 1, 1)
    phase1 = mod.get_function("phase1")
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    phase1(keys_gpu, offset_gpu, np.uintc(d_length), count_gpu,
            np.uintc(bucketCount), grid=grid_dim, block=block_dim)
    d_offset = offset_gpu.get()
    count = count_gpu.get()
    devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    logger.info('Finished. Leaving.')
 #   return [d_offset, d_bucketSize]
    return d_offset, count
示例#6
0
    def _storeMeshInternally(self):
        # Store mesh internally

        # Get pointers to actual mesh
        nodes_p = self._mesh._mesh.nodes.ctypes.data_as(self.c_double_p)
        simplices_p = self._mesh._mesh.triangles.ctypes.data_as(self.c_uint_p)
        neighs_p = None
        if (self._mesh._neighs is not None):
            neighs_p = self._mesh._neighs.ctypes.data_as(self.c_uint_p)
        offset_p = None
        if (self._mesh._offset is not None):
            offset_p = self._mesh._offset.ctypes.data_as(self.c_double_p)
        numPerDimension_p = None
        if (self._mesh._numPerDimension is not None):
            numPerDimension_p = self._mesh._numPerDimension.ctypes.data_as(
                self.c_uint_p)

        offsetHyper_p = None
        if self._offset is not None:
            offsetHyper_p = self._offset.ctypes.data_as(self.c_double_p)
        stepLengths_p = None
        if self._stepLengths is not None:
            stepLengths_p = self._stepLengths.ctypes.data_as(self.c_double_p)
        numSteps_p = None
        if self._numSteps is not None:
            numSteps_p = self._numSteps.ctypes.data_as(self.c_uint_p)
        hyperDim = np.uintc(0)
        if (offsetHyper_p is not None and stepLengths_p is not None
                and numSteps_p is not None):
            hyperDim = np.uintc(self._offset.size)

        # Preallocate output
        meshId = ctypes.c_uint(0)
        newNumNodes = ctypes.c_uint(0)
        newNumSimplices = ctypes.c_uint(0)

        # Create implicit mesh
        status = self._mesh._libInstance.hyperRectExtension_createMesh( \
           nodes_p, ctypes.c_uint( self._mesh._mesh.nodes.shape[0]) , ctypes.c_uint( self._mesh._mesh.embD ), \
           simplices_p, ctypes.c_uint( self._mesh._mesh.triangles.shape[0]) , ctypes.c_uint( self._mesh._mesh.topD ), \
           offset_p, numPerDimension_p, \
           offsetHyper_p, stepLengths_p, numSteps_p, ctypes.c_uint(hyperDim), \
           ctypes.byref( meshId ), ctypes.byref( newNumNodes ), ctypes.byref( newNumSimplices ), \
           neighs_p )
        if status != 0:
            raise Exception("Uknown error occured! Error code " + str(status) +
                            " from hyperRectExtension_createMesh()")

        # Store mesh internally
        self._internalID = meshId.value
        self.N = newNumNodes.value
        self.NT = newNumSimplices.value
示例#7
0
def calculate_circuit_graph_vertex_data_device(d_D, d_C, length):
    logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_vertex_data_device')
    logger.info("started.")
    mod = SourceModule("""
    __global__ void calculateCircuitGraphVertexData( unsigned int * D,unsigned int * C,unsigned int ecount){

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if( tid <ecount)
        {
            unsigned int c=D[tid];
            atomicExch(C+c,1);
        }
    }
    """)
    calculate_circuit_graph_vertex_data = mod.get_function('calculateCircuitGraphVertexData')
    block_dim, grid_dim = getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_D = gpuarray.to_gpu(d_D)
    np_d_C = gpuarray.to_gpu(d_C)
    calculate_circuit_graph_vertex_data(
        np_d_D,
        np_d_C,
        np.uintc(length),
        block=block_dim, grid=grid_dim
    )
    np_d_D.get(d_D)
    np_d_C.get(d_C)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    logger.info("Finished. Leaving.")
    return d_D, d_C
示例#8
0
 def test_numpy(self):
     """NumPy objects get serialized to readable JSON."""
     l = [
         np.float32(12.5),
         np.float64(2.0),
         np.float16(0.5),
         np.bool(True),
         np.bool(False),
         np.bool_(True),
         np.unicode_("hello"),
         np.byte(12),
         np.short(12),
         np.intc(-13),
         np.int_(0),
         np.longlong(100),
         np.intp(7),
         np.ubyte(12),
         np.ushort(12),
         np.uintc(13),
         np.ulonglong(100),
         np.uintp(7),
         np.int8(1),
         np.int16(3),
         np.int32(4),
         np.int64(5),
         np.uint8(1),
         np.uint16(3),
         np.uint32(4),
         np.uint64(5),
     ]
     l2 = [l, np.array([1, 2, 3])]
     roundtripped = loads(dumps(l2, cls=EliotJSONEncoder))
     self.assertEqual([l, [1, 2, 3]], roundtripped)
示例#9
0
 def test_numpy(self):
     """NumPy objects get serialized to readable JSON."""
     l = [
         np.float32(12.5),
         np.float64(2.0),
         np.float16(0.5),
         np.bool(True),
         np.bool(False),
         np.bool_(True),
         np.unicode_("hello"),
         np.byte(12),
         np.short(12),
         np.intc(-13),
         np.int_(0),
         np.longlong(100),
         np.intp(7),
         np.ubyte(12),
         np.ushort(12),
         np.uintc(13),
         np.ulonglong(100),
         np.uintp(7),
         np.int8(1),
         np.int16(3),
         np.int32(4),
         np.int64(5),
         np.uint8(1),
         np.uint16(3),
         np.uint32(4),
         np.uint64(5),
     ]
     l2 = [l, np.array([1, 2, 3])]
     roundtripped = loads(dumps(l2, cls=EliotJSONEncoder))
     self.assertEqual([l, [1, 2, 3]], roundtripped)
示例#10
0
def what_is_uint():
    '''
    - "np.uint" and "np.uintc" are aliases for real underlying NumPy scalar types
        - The values of those aliases depend on the operating system
            - On my system, "np.uint" creates an object whose class is "numpy.uint64"
                - "np.uint" has the same precision as ... ?
            - On my system, "np.uintc" creates an object whose class is "numpy.uint32"
                - "np.uintc" has the same precision as ... ?
        - If I want some size other than those specified by the aliases, I'll have to use a class with an explicit size, e.g. np.uint8
    '''
    print(np.uint is np.uint64)  # True
    print(np.uintc is np.uint32)  # True
    # No error because 1 certainly fits within the size of a C long
    ary = np.array(1, dtype=np.uint)
    print(ary.dtype)  # uint64
    #print(int(10**50)) # 100000000000000000000000000000000000000000000000000
    #np.array(10**50, dtype=np.uint) # OverflowError: Python int too large to convert to C long
    print(type(np.uint))  # <class 'type'>
    scalar = np.uint(10)
    print(type(scalar))  # <class 'numpy.uint64'>
    scalar = np.uint32(10)
    print(type(scalar))  # <class 'numpy.uint32'>
    scalar = np.uintc(10)
    print(type(scalar))  # <class 'numpy.uint32'>
    scalar = np.uint8(4)
    print(type(scalar))  # <class 'numpy.uint8'>
示例#11
0
def compute_kmer_device (lmers, pkmers, skmers, kmerBitMask, readLength, readCount):
    # module_logger = logging.getLogger('eulercuda.pyencode.compute_kmer_device')
    module_logger.info("started compute_kmer_device.")
    mod = SourceModule("""
    typedef unsigned  long long KEY_T ;
    typedef KEY_T * KEY_PTR ;
    #define LMER_PREFIX(lmer,bitMask) ((lmer & (bitMask<<2))>>2)
    #define LMER_SUFFIX(lmer,bitMask) ((lmer & bitMask))

    __global__ void computeKmerDevice(
            KEY_PTR lmers,
            KEY_PTR pkmers,
            KEY_PTR skmers,
            KEY_T validBitMask,
            unsigned int readCount
        )
    {
       const unsigned int tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) + (blockDim.x * blockDim.y * blockIdx.x) + (blockDim.x * threadIdx.y) + threadIdx.x;
        
        if (tid < readCount)
        {
     

            KEY_T lmer;
            //fetch lmer
            lmer = lmers[tid];
            //find prefix
            pkmers[tid] = LMER_PREFIX(lmer,validBitMask);
            //find suffix
            skmers[tid] = LMER_SUFFIX(lmer,validBitMask);
           // __syncthreads();
        }
    }
    """, options=['--compiler-options', '-Wall'])
    compute_kmer = mod.get_function("computeKmerDevice")

    block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, readLength)
    np_pkmers = gpuarray.to_gpu(pkmers)
    np_skmers = gpuarray.to_gpu(skmers)
    if isinstance(lmers, np.ndarray) and isinstance(pkmers, np.ndarray) and isinstance(skmers, np.ndarray):
        module_logger.info("Going to GPU.")
        compute_kmer(
            drv.In(lmers),
            np_pkmers,
            np_skmers,
            np.ulonglong(kmerBitMask),
            np.uintc(readCount),
            block=block_dim, grid=grid_dim
        )
        np_pkmers.get(pkmers)
        np_skmers.get(skmers)
    else:
        module_logger.warn("PROBLEM WITH GPU.")
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    module_logger.debug("Occupancy = %s" % (orec.occupancy * 100))

    module_logger.info("leaving compute_kmer_device.")
    return pkmers, skmers
示例#12
0
    def getFullNeighs(self):
        # Get whole neighborhood structure

        if self._neighs is None:
            return None

        newNeighs = np.zeros((self.NT, self._mesh.topD + 1), dtype=np.uintc)
        newNeighs_p = newNeighs.ctypes.data_as(self._mesh.c_uint_p)

        status = self._libInstance.implicitMesh_retrieveFullNeighsFromImplicitMesh( self._internalID, \
               newNeighs_p, ctypes.c_uint(np.uintc(self.NT) ), ctypes.c_uint( np.uintc(self._mesh.topD) ) )
        if status != 0:
            raise Exception(
                "Uknown error occured! Error code " + str(status) +
                " from implicitMesh_retrieveFullNeighsFromImplicitMesh()")

        return newNeighs
示例#13
0
def find_component_device(d_v, d_D,  length):
    """

    :param d_v:
    :param d_D:
    :param ecount:
    :return:
    """
    import eulercuda.pyencode as enc
    logger = logging.getLogger('eulercuda.pycomponent.find_component_device')
    logger.info("started.")
    mem_size = length
    d_prevD = np.zeros(mem_size, dtype=np.uintc)
    d_Q = np.zeros_like(d_prevD)
    d_t1 = np.zeros_like(d_prevD)
    d_t2 = np.zeros_like(d_prevD)
    d_val1 = np.zeros_like(d_prevD)
    d_val2 = np.zeros_like(d_prevD)
    sp = np.uintc(0)

    s = np.uintc

    d_D, d_Q = component_step_init(d_v, d_D, d_Q, length)
    s, sp = 1, 1

    sptemp = drv.pagelocked_zeros(4, dtype=np.intc, mem_flags=drv.host_alloc_flags.DEVICEMAP)
    d_sptemp = np.intp(sptemp.base.get_device_pointer())

    while s == sp:
        d_D, d_prevD = d_prevD, d_D

        d_D = component_step1_shortcutting_p1(d_v, d_prevD, d_D, d_Q, length, s)

        d_Q = component_step1_shortcutting_p2(d_v, d_prevD, d_D, d_Q, length, s)

        d_t1, d_t2, d_val1, d_val2 = component_Step2_P1(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s)

        d_D, d_Q = component_Step2_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s)

        d_t1, d_t2, d_val1, d_val2 = component_Step3_P1(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s)

        d_D = component_Step3_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s)

        d_val1 = component_step4_P1(d_v, d_D, d_val1, length)

        d_D = component_step4_P2(d_v, d_D, d_val1, length)

        sptemp[0] = 0

        d_sptemp = (d_Q, length, d_sptemp, s)

        sp += sptemp[0]

        s += 1

    logger.info("Finished. Leaving.")
    return d_D
示例#14
0
 def __init__(self, spatial_extent=15, timesteps=4, batchnorm=True, channel_sym=True,
              return_sequences=False, rand_seed=None, **kwargs):
     self.spatial_extent = spatial_extent
     self.timesteps = timesteps
     self.batchnorm = batchnorm
     self.channel_sym = channel_sym
     self.return_sequences = return_sequences
     self.rand_seed = rand_seed if rand_seed else np.uintc(hash(random.random()))
     super(hGRU, self).__init__(**kwargs)
示例#15
0
    def pointInSimplex(self,
                       point,
                       simplexInd,
                       embTol=0,
                       centerOfCurvature=None):
        # Function for checking if node is in simplex

        if (point.size != self._mesh.embD):
            raise Exception("Wrong dimensionality of point in input")

        # If not saved mesh internally
        if self._internalID is None:
            # do that
            self._storeMeshInternally()

        # Enforce formating
        if point.dtype is not np.dtype("float64"):
            point = point.astype(np.float64)
        point_p = point.ctypes.data_as(self.c_double_p)
        simplexInd = np.uintc(simplexInd)
        embTol = np.float64(embTol)
        centerOfCurvature_p = None
        if centerOfCurvature is not None:
            if isinstance(centerOfCurvature, np.ndarray):
                if centerOfCurvature.dtype is not np.dtype("float64"):
                    centerOfCurvature = centerOfCurvature.astype(np.float64)
                centerOfCurvature_p = centerOfCurvature.ctypes.data_as(
                    self.c_double_p)

        out = ctypes.c_bool(False)
        status = self._libInstance.implicitMesh_pointInSimplex( self._internalID, \
                 point_p, np.uintc(point.size), simplexInd, ctypes.byref(out), ctypes.c_double(embTol), centerOfCurvature_p )

        if status != 0:
            # Try to save internally again
            self._storeMeshInternally()
            # Retry call
            status = self._libInstance.implicitMesh_pointInSimplex( self._internalID, \
                 point_p, np.uintc(point.size), simplexInd, ctypes.byref(out) )
        if status != 0:
            raise Exception("Uknown error occured! Error code " + str(status) +
                            " from implicitMesh_pointInSimplex()")

        return out.value
示例#16
0
 def __init__(self, spatial_extent, timesteps, batchnorm, channel_sym, 
              rand_seed=None, **kwargs):
     
     self.spatial_extent = spatial_extent
     self.timesteps = timesteps
     self.batchnorm = batchnorm
     self.channel_sym = channel_sym
     self.rand_seed = rand_seed if rand_seed else np.uintc(hash(random.random()))
     
     super(hGRUCell, self).__init__(**kwargs)
示例#17
0
def _test_collision_robustness_3d(aspect, y, z, step):
    nx = nz = 10
    ny = int(aspect * nx)
    mesh = UnitCubeMesh(nx, ny, nz)
    bb = mesh.bounding_box_tree()

    x = 0.0
    while x <= 1.0:
        c = bb.compute_first_entity_collision(Point(x, y, z))
        assert c < np.uintc(-1)
        x += step
def _test_collision_robustness_3d(aspect, y, z, step):
    nx = nz = 10
    ny = int(aspect*nx)
    mesh = UnitCubeMesh(nx, ny, nz)
    bb = mesh.bounding_box_tree()

    x = 0.0
    while x <= 1.0:
        c = bb.compute_first_entity_collision(Point(x, y, z))
        assert c < np.uintc(-1)
        x += step
示例#19
0
def component_step5(d_Q,length,d_sptemp,s):
    """

    :param d_Q:
    :param length:
    :param d_sptemp:
    :param s:
    :return:
    """
    import eulercuda.pyencode as enc
    logger = logging.getLogger('eulercuda.pycomponent.component_Step5')
    logger.info("started.")
    mod = SourceModule("""
    __global__ void componentStepFive(unsigned int * Q,unsigned int length,unsigned  int * sprimtemp,unsigned int s){
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if(tid <length) {
            if(Q[tid]==s){
                atomicExch(sprimtemp,1);
                //*sprime=*sprimtemp+1;
            }
        }
    }
    """)
    block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_sptemp = gpuarray.to_gpu(d_sptemp)
    step5 = mod.get_function('componentStepFive')
    step5(
        drv.In(d_Q),
        np.uintc(length),
        np_d_sptemp,
        np.uintc(s),
        block=block_dim, grid=grid_dim
    )
    np_d_sptemp.get(d_sptemp)
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_sptemp
示例#20
0
def _test_collision_robustness_2d(aspect, y, step):
    nx = 10
    ny = int(aspect * nx)
    mesh = UnitSquareMesh(nx, ny, 'crossed')
    bb = mesh.bounding_box_tree()

    x = 0.0
    p = Point(x, y)
    while x <= 1.0:
        c = bb.compute_first_entity_collision(Point(x, y))
        assert c < np.uintc(-1)
        x += step
def _test_collision_robustness_2d(aspect, y, step):
    nx = 10
    ny = int(aspect*nx)
    mesh = UnitSquareMesh(nx, ny, 'crossed')
    bb = mesh.bounding_box_tree()

    x = 0.0
    p = Point(x, y)
    while x <= 1.0:
        c = bb.compute_first_entity_collision(Point(x, y))
        assert c < np.uintc(-1)
        x += step
示例#22
0
def construct_successor_graphP2_device(d_ee, d_v, ecount):
    logger = logging.getLogger('eulercuda.pyeulertour.construct_successor_graphP1_device')
    logger.info("started.")
    mod = SourceModule("""
    #include <stdio.h>
    typedef unsigned long long  KEY_T ;
    typedef KEY_T               *KEY_PTR;
    typedef unsigned int        VALUE_T;
    typedef VALUE_T             *VALUE_PTR;

    typedef struct EulerEdge{
        KEY_T eid;
        unsigned int v1;
        unsigned int v2;
        unsigned int s;
        unsigned int pad;
    }EulerEdge;
    typedef struct Vertex{
        unsigned int vid;
        unsigned int n1;
        unsigned int n2;
    } Vertex;

    __global__ void constructSuccessorGraphP2(EulerEdge* e, Vertex * v, unsigned int ecount)
    {
        unsigned int tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) + (blockDim.x*blockDim.y * blockIdx.x) +
        (blockDim.x * threadIdx.y) + threadIdx.x;

        if(tid<ecount)
        {
            if(v[tid].n1 <ecount )
            {
                v[v[tid].n1].n2=v[tid].vid;
            }
        }
    }
    """)
    construct_successor_graphP2 = mod.get_function("constructSuccessorGraphP2")
    block_dim, grid_dim = getOptimalLaunchConfiguration(ecount, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_v = gpuarray.to_gpu(d_v)
    construct_successor_graphP2(
        drv.In(d_ee),
        np_d_v,
        np.uintc(ecount),
        block=block_dim, grid=grid_dim
    )
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    np_d_v.get(d_v)
    logger.info("Finished. Leaving.")
    return d_v
 def griddedTemplateMatching( self, img1, img2, templateRadius, searchRadius, \
     estimateInds = None, templateSkip = 0, searchSkip = 0, templateStart = 0, searchStart = 0 ):
     """
     Function for performing template matching between two sets of bitmapped images
     """
     
     # Convert input
     img1 = np.ascontiguousarray(img1, dtype=np.double)
     img2 = np.ascontiguousarray(img2, dtype=np.double)    
     if estimateInds is not None:
         estimateInds = np.ascontiguousarray(estimateInds, dtype=np.bool)    
         
     # Preallocate output
     mapIndex = np.zeros( img1.shape, dtype = np.uintc, order = 'C' )
     maxCrossCorr = np.nan * np.ones( img1.shape, dtype = np.double, order = 'C' )
         
     
     # Acquire pointers
     img1_p = img1.ctypes.data_as( self.c_double_p )
     img2_p = img2.ctypes.data_as( self.c_double_p )
     mapIndex_p = mapIndex.ctypes.data_as( self.c_uint_p )
     if estimateInds is not None:
         estimateInds_p = estimateInds.ctypes.data_as( self.c_bool_p )
     else:
         estimateInds_p = None
     maxCrossCorr_p = maxCrossCorr.ctypes.data_as( self.c_double_p )
     
     # Perform template matching
     status = self._libInstance.misc_localMaxCrossCorr2D( \
         img1_p, img2_p, \
         ctypes.c_uint( img1.shape[1] ), ctypes.c_uint( img1.shape[0] ), \
         ctypes.c_uint( templateRadius ), ctypes.c_uint( searchRadius ), \
         ctypes.c_uint( np.uintc(templateSkip) ), ctypes.c_uint( np.uintc(searchSkip) ), \
         ctypes.c_uint( np.uintc(templateStart) ), ctypes.c_uint( np.uintc(searchStart) ), \
         mapIndex_p, estimateInds_p, maxCrossCorr_p )          
     if status != 0:            
         raise Exception( "Uknown error occured! Error status: " + str(status) ) 
     
     # Return index
     return (mapIndex, maxCrossCorr)
示例#24
0
    def toFullMesh(self):
        # Function for returning a full mesh corresponding to the ImplicitMesh

        # If not saved mesh internally
        if self._internalID is None:
            # do that
            self._storeMeshInternally()

        # Preallocate for full mesh
        newNodes = np.zeros((self.N, self._mesh.embD), dtype=np.float64)
        newSimplices = np.zeros((self.NT, self._mesh.topD + 1), dtype=np.uintc)

        newNodes_p = newNodes.ctypes.data_as(self._mesh.c_double_p)
        newSimplices_p = newSimplices.ctypes.data_as(self._mesh.c_uint_p)

        # Retrieve implicit mesh
        status = self._libInstance.implicitMesh_retrieveFullMeshFromImplicitMesh( self._internalID, \
           newNodes_p, ctypes.c_uint(np.uintc(self.N)) , ctypes.c_uint( np.uint(self._mesh.embD) ), \
           newSimplices_p, ctypes.c_uint(np.uintc(self.NT)) , ctypes.c_uint( np.uintc(self._mesh.topD) ) )

        if status != 0:
            # Try to save internally again
            self._storeMeshInternally()
            # Try again to retrieve implicit mesh
            status = self._libInstance.implicitMesh_retrieveFullMeshFromImplicitMesh( self._internalID, \
               newNodes_p, ctypes.c_uint(np.uintc(self.N)) , ctypes.c_uint( np.uintc(self._mesh.embD) ), \
               newSimplices_p, ctypes.c_uint(np.uintc(self.NT)) , ctypes.c_uint( self._mesh.topD ) )
        if status != 0:
            raise Exception(
                "Uknown error occured! Error code " + str(status) +
                " from implicitMesh_retrieveFullMeshFromImplicitMesh()")

        # Return
        return Mesh(newSimplices, newNodes, libPath=self._mesh._libPath)
    def _storeMeshInternally(self):
        # Store mesh internally

        # If is stored internally already
        if (self.checkInternal()):
            return

        self._mesh._storeMeshInternally()
        if (self._mesh._internalID is None):
            raise Exception("No internal ID was found!")

        numElementsPerTensor = np.uintc(np.prod(self._metricTensors.shape[1:]))
        numTensors = np.uintc(self._metricTensors.shape[0])
        tensorMode = np.int(self._tensorMode)
        metricTensors_p = self._metricTensors.ctypes.data_as(self.c_double_p)
        sectors_p = None
        numSectorDimensions_p = None

        # Preallocate output
        ID = ctypes.c_uint(0)
        numNodes = ctypes.c_uint(0)
        numSimplices = ctypes.c_uint(0)

        # Create implicit mesh
        status = self._mesh._libInstance.meshAndMetric_create( ctypes.c_uint(self._mesh._internalID), ctypes.byref(ID), \
                      metricTensors_p, ctypes.c_uint( numElementsPerTensor ), ctypes.c_uint( numTensors ), \
                      ctypes.c_int( tensorMode ), \
                      ctypes.byref(numNodes), ctypes.byref(numSimplices), \
                      sectors_p, numSectorDimensions_p )

        if status != 0:
            raise Exception("Uknown error occured! Error code " + str(status) +
                            " from meshAndMetric_create()")

        # Store mesh internally
        self._internalID = ID.value
        self._mesh.N = numNodes.value
        self._mesh.NT = numSimplices.value
示例#26
0
def component_step_init(d_v, d_D, d_Q, length):
    """

    :param d_v:
    :param d_Q:
    :param length:
    :return:
    """
    import eulercuda.pyencode as enc	    
    logger = logging.getLogger('eulercuda.pycomponent.component_step_init')
    logger.info("started.")
    mod = SourceModule("""
    typedef struct Vertex
    {
        unsigned int vid;
        unsigned int n1;
        unsigned int n2;
    } Vertex;

    __global__ void componentStepInit(Vertex * v, unsigned int * D,  unsigned int* Q, unsigned int length)
    {
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if( tid <length)
        {
            //v[tid].vid;
            D[tid]=tid;
            Q[tid]=0;
        }
    }
    """)
    component_step_init_device = mod.get_function('componentStepInit')
    block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512)
    np_d_D = gpuarray.to_gpu(d_D)
    np_d_Q = gpuarray.to_gpu(d_Q)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    component_step_init_device(
        drv.In(d_v),
        np_d_D,
        np_d_Q,
        np.uintc(length),
        block=block_dim, grid=grid_dim
    )
    np_d_D.get(d_D)
    np_d_Q.get(d_Q)
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_D, d_Q
示例#27
0
    def fromSectorAndExplicit2Node(self, sector, explicitInd):
        # Function for returning implicit node index from sector and explicit node index

        # Enforce formating
        sector = np.uintc(sector)
        explicitInd = np.uintc(explicitInd)
        # If not saved mesh internally
        if self._internalID is None:
            # do that
            self._storeMeshInternally()
        nodeInd = ctypes.c_uint(0)
        status = self._libInstance.implicitMesh_nodeSectorAndExplicit2Ind( self._internalID, ctypes.c_uint( sector ), \
                                                                        ctypes.c_uint( explicitInd ), ctypes.byref(nodeInd) )
        if status != 0:
            # Try to save internally again
            self._storeMeshInternally()
            # Retry call
            status = self._libInstance.implicitMesh_nodeSectorAndExplicit2Ind( self._internalID, ctypes.c_uint( sector ), \
                                                                        ctypes.c_uint( explicitInd ), ctypes.byref(nodeInd) )
        if status != 0:
            raise Exception("Uknown error occured! Error code " + str(status) +
                            " from implicitMesh_nodeSectorAndExplicit2Ind()")

        return nodeInd.value
示例#28
0
def component_step4_P2(d_v, d_D, d_val1, length):
    """

    :param d_v:
    :param d_D:
    :param d_val1:
    :param length:
    :return:
    """
    logger = logging.getLogger('eulercuda.pycomponent.component_Step4_P2')
    logger.info("started.")
    mod = SourceModule("""
         typedef struct Vertex
         {
             unsigned int vid;
             unsigned int n1;
             unsigned int n2;
         } Vertex;

    __global__ void componentStepFourP2(Vertex * v, unsigned  int * curD,unsigned int * val1,unsigned int length){
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if( tid < length){
            curD[tid]= val1[tid];
        }

    }
    """)
    block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_val1 = gpuarray.to_gpu(d_val1)
    np_d_D = gpuarray.to_gpu(d_D)
    step4_P2 = mod.get_function('componentStepFourP2')
    step4_P2(
        drv.In(d_v),
        np_d_D,
        np_d_val1,
        np.uintc(length),
        block=block_dim, grid=grid_dim
    )
    np_d_D.get(d_D)
    np_d_val1.get(d_val1)
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_D
示例#29
0
def identify_contig_start(d_ee, d_contigStart, ecount):
    logger = logging.getLogger('pyeulertour.identify_contig_start')
    logger.info("started.")
    mod = SourceModule("""
    typedef unsigned long long  KEY_T ;
    typedef struct EulerEdge{
        KEY_T eid;
        unsigned int v1;
        unsigned int v2;
        unsigned int s;
        unsigned int pad;
    }EulerEdge;

    __global__  void identifyContigStart( EulerEdge * e ,unsigned char * contigStart,unsigned int ecount){
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if(tid<ecount){
            if(e[tid].s < ecount){
                contigStart[e[tid].s]=0;
                //atomicExch(contigStart+e[tid].s,0);
            }
        }
    }

    """)
    block_dim, grid_dim = getOptimalLaunchConfiguration(ecount.item(), 512)
    np_d_contigStart = gpuarray.to_gpu(d_contigStart)
    c_start = mod.get_function('identifyContigStart')
    c_start(
        drv.In(d_ee),
        np_d_contigStart,
        np.uintc(ecount),
        block=block_dim,
        grid=grid_dim
    )
    np_d_contigStart.get(d_contigStart)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.debug("Occupancy = %s" % (orec.occupancy * 100))
    logger.info('Finished.')
    return d_contigStart
示例#30
0
    def __init__(self):
        NT = namedtuple('NT', tuple('abc'))

        self.values = [
                np.longlong(-1), np.int_(-1), np.intc(-1), np.short(-1), np.byte(-1),
                np.ubyte(1), np.ushort(1), np.uintc(1), np.uint(1), np.ulonglong(1),
                np.half(1.0), np.single(1.0), np.float_(1.0), np.longfloat(1.0),
                np.csingle(1.0j), np.complex_(1.0j), np.clongfloat(1.0j),
                np.bool_(0), np.str_('1'), np.unicode_('1'), np.void(1),
                np.object(), np.datetime64('NaT'), np.timedelta64('NaT'), np.nan,
                12, 12.0, True, None, float('NaN'), object(), (1, 2, 3),
                NT(1, 2, 3), datetime.date(2020, 12, 31), datetime.timedelta(14),
        ]

        # Datetime & Timedelta
        for precision in ['ns', 'us', 'ms', 's', 'm', 'h', 'D', 'M', 'Y']:
            for kind, ctor in (('m', np.timedelta64), ('M', np.datetime64)):
                self.values.append(ctor(12, precision))

        for size in (1, 8, 16, 32, 64, 128, 256, 512):
            self.values.append(bytes(size))
            self.values.append('x' * size)
示例#31
0
def construct_circuit_Graph_vertex(d_C, d_cg_offset, ecount, d_cv):
    """

    :param d_C:
    :param d_cg_offset:
    :param ecount:
    :param d_cv:
    :return:
    """
    logger = logging.getLogger('eulercuda.pyeulertour.construct_circuit_Graph_vertex')
    logger.info("started.")
    mod = SourceModule("""
        __global__ void constructCircuitGraphVertex(unsigned int * C,unsigned int * offset,unsigned int ecount, unsigned int * cv)
        {
            unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
            if(tid < ecount){
                if(C[tid] != 0){
                    cv[offset[tid]] = tid;
                }
            }
        }
    """)
    np_d_cv = gpuarray.to_gpu(d_cv)
    circuit_graph_vertex = mod.get_function('constructCircuitGraphVertex')
    block_dim, grid_dim = getOptimalLaunchConfiguration(ecount, 512)
    circuit_graph_vertex(
        drv.In(d_C),
        drv.In(d_cg_offset),
        np.uintc(ecount),
        np_d_cv,
        block=block_dim, grid=grid_dim
    )
    np_d_cv.get(d_cv)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    return d_cv
示例#32
0
class TestNumpyJSONEncoder(unittest.TestCase):
    @parameterized.expand(
        [(numpy.bool_(1), True), (numpy.bool8(1), True), (numpy.byte(1), 1),
         (numpy.int8(1), 1), (numpy.ubyte(1), 1), (numpy.uint8(1), 1),
         (numpy.short(1), 1), (numpy.int16(1), 1), (numpy.ushort(1), 1),
         (numpy.uint16(1), 1), (numpy.intc(1), 1), (numpy.int32(1), 1),
         (numpy.uintc(1), 1), (numpy.uint32(1), 1), (numpy.int_(1), 1),
         (numpy.int32(1), 1), (numpy.uint(1), 1), (numpy.uint32(1), 1),
         (numpy.longlong(1), 1), (numpy.int64(1), 1), (numpy.ulonglong(1), 1),
         (numpy.uint64(1), 1), (numpy.half(1.0), 1.0),
         (numpy.float16(1.0), 1.0), (numpy.single(1.0), 1.0),
         (numpy.float32(1.0), 1.0), (numpy.double(1.0), 1.0),
         (numpy.float64(1.0), 1.0), (numpy.longdouble(1.0), 1.0)] + ([
             (numpy.float128(1.0), 1.0)  # unavailable on windows
         ] if hasattr(numpy, 'float128') else []))
    def test_numpy_primary_type_encode(self, np_val, py_val):
        self.assertEqual(json.dumps(py_val),
                         json.dumps(np_val, cls=NumpyEncoder))

    @parameterized.expand([
        (numpy.array([1, 2, 3], dtype=numpy.int), [1, 2, 3]),
        (numpy.array([[1], [2], [3]], dtype=numpy.double), [[1.0], [2.0],
                                                            [3.0]]),
        (numpy.zeros((2, 2), dtype=numpy.bool_), [[False, False],
                                                  [False, False]]),
        (numpy.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)],
                     dtype=[('name', 'U10'), ('age', 'i4'),
                            ('weight', 'f4')]), [['Rex', 9, 81.0],
                                                 ['Fido', 3, 27.0]]),
        (numpy.rec.array([(1, 2., 'Hello'), (2, 3., "World")],
                         dtype=[('foo', 'i4'), ('bar', 'f4'),
                                ('baz', 'U10')]), [[1, 2.0, "Hello"],
                                                   [2, 3.0, "World"]])
    ])
    def test_numpy_array_encode(self, np_val, py_val):
        self.assertEqual(json.dumps(py_val),
                         json.dumps(np_val, cls=NumpyEncoder))
示例#33
0
def assign_successor_device(d_ev, d_l, d_e, vcount, d_ee, ecount):
    """

    :param d_ev:
    :param d_l:
    :param d_e:
    :param vcount:
    :param d_ee:
    :param ecount:
    :return:
    """
    # logger = logging.getLogger('eulercuda.pyeulertour.assign_successor_device')
    module_logger.info("started assign_successor_device.")
    mod = SourceModule("""
    #include <stdio.h>
    typedef unsigned long long  KEY_T ;
    typedef KEY_T               *KEY_PTR;
    typedef unsigned int        VALUE_T;
    typedef VALUE_T             *VALUE_PTR;

    typedef struct EulerEdge{
        KEY_T eid;
        unsigned int v1;
        unsigned int v2;
        unsigned int s;
        unsigned int pad;
    }EulerEdge;

    typedef struct EulerVertex{
        KEY_T	vid;
        unsigned int  ep;
        unsigned int  ecount;
        unsigned int  lp;
        unsigned int  lcount;
    }EulerVertex;

    __global__  void assignSuccessor(
                        EulerVertex * ev,
                        unsigned int * l,
                        unsigned int * e,
                        unsigned vcount,
                        EulerEdge * ee ,
                        unsigned int ecount)
    {
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        unsigned int eidx = 0;
        if(tid < vcount)
        {
            while(eidx < ev[tid].ecount && eidx < ev[tid].lcount)
            {
                unsigned int eindex, lindex, eeindex;
                eindex = ev[tid].ep + eidx;
                lindex = ev[tid].lp + eidx;
                if (eindex < ecount)
                {

                    eeindex = e[ev[tid].ep + eidx];
                    if (eindex < ecount && lindex < ecount && eeindex < ecount)
                    {
                      //  printf(" e = %u, l = %u, ee = %u ", eindex, lindex, eeindex);
                        ee[e[ev[tid].ep + eidx]].s = l[ev[tid].lp + eidx] ;
                    }
                }
                eidx++;
            }
        }
    }
    """)
    free, total = drv.mem_get_info()
    # module_logger.debug(" %s free out of %s total memory" % (free, total) )
    block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 256)
    module_logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_ev = gpuarray.to_gpu(d_ev)
    np_d_ee = gpuarray.to_gpu(d_ee)
    assign_successor = mod.get_function("assignSuccessor")
    assign_successor(             # ecount is list - should be uint
        np_d_ev,
        drv.In(d_l),
        drv.In(d_e),
        np.uintc(vcount),
        np_d_ee,
        np.uintc(ecount),
        block=block_dim, grid=grid_dim
    )
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # module_logger.info("Occupancy = %s" % (orec.occupancy * 100))
    np_d_ev.get(d_ev)
    np_d_ee.get(d_ee)
    module_logger.info("Finished. Leaving.")
    return d_ev, d_ee
示例#34
0
def component_Step3_P2(d_v, d_prevD, d_D, d_Q, d_t1, d_val1, d_t2, d_val2, length, s):
    """

    :param d_v:
    :param d_prevD:
    :param d_D:
    :param d_Q:
    :param d_t1:
    :param d_val1:
    :param d_t2:
    :param d_val2:
    :param length:
    :param s:
    :return:
    """
    import eulercuda.pyencode as enc
    logger = logging.getLogger('eulercuda.pycomponent.component_Step3_P2')
    logger.info("started.")
    mod = SourceModule("""
        typedef struct Vertex
        {
            unsigned int vid;
            unsigned int n1;
            unsigned int n2;
        } Vertex;
    __global__ void componentStepThreeP2(Vertex * v, unsigned int * prevD,unsigned  int * curD,unsigned int * Q,unsigned int * t1,unsigned int *val1 ,unsigned int * t2,unsigned int * val2,unsigned int length,unsigned int s){
        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        int a;
        int val;
        if( tid< length) {
            //it will be done for each edge 1
            if(t1[tid]<length){
                a=t1[tid];
                val= val1[tid];
                atomicMin(curD+a,val);

            }
            //it will be done for each edge 2
            if(t2[tid]<length){
                a=t2[tid];
                val= val2[tid];
                atomicMin(curD+a,val);

            }
        }
    }
    """)
    block_dim, grid_dim = enc.getOptimalLaunchConfiguration(length, 512)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_D = gpuarray.to_gpu(d_D)
    np_d_t1 = gpuarray.to_gpu(d_t1)
    np_d_t2 = gpuarray.to_gpu(d_t2)
    np_d_val1 = gpuarray.to_gpu(d_val1)
    np_d_val2 = gpuarray.to_gpu(d_val2)
    step3_P2 = mod.get_function('componentStepThreeP2')
    step3_P2(
        drv.In(d_v),
        drv.In(d_prevD),
        np_d_D,
        drv.In(d_Q),
        np_d_t1,
        np_d_val1,
        np_d_t2,
        np_d_val2,
        np.uintc(length),
        np.uintc(s),
        block=block_dim, grid=grid_dim
    )
    np_d_D.get(d_D)
    np_d_t1.get(d_t1)
    np_d_t2.get(d_t2)
    np_d_val1.get(d_val1)
    np_d_val2.get(d_val2)
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_D
示例#35
0
reveal_type(np.bytes0())  # E: numpy.bytes_
reveal_type(np.string_())  # E: numpy.bytes_
reveal_type(np.object0())  # E: numpy.object_
reveal_type(np.void0(0))  # E: numpy.void

reveal_type(np.byte())  # E: {byte}
reveal_type(np.short())  # E: {short}
reveal_type(np.intc())  # E: {intc}
reveal_type(np.intp())  # E: {intp}
reveal_type(np.int0())  # E: {intp}
reveal_type(np.int_())  # E: {int_}
reveal_type(np.longlong())  # E: {longlong}

reveal_type(np.ubyte())  # E: {ubyte}
reveal_type(np.ushort())  # E: {ushort}
reveal_type(np.uintc())  # E: {uintc}
reveal_type(np.uintp())  # E: {uintp}
reveal_type(np.uint0())  # E: {uintp}
reveal_type(np.uint())  # E: {uint}
reveal_type(np.ulonglong())  # E: {ulonglong}

reveal_type(np.half())  # E: {half}
reveal_type(np.single())  # E: {single}
reveal_type(np.double())  # E: {double}
reveal_type(np.float_())  # E: {double}
reveal_type(np.longdouble())  # E: {longdouble}
reveal_type(np.longfloat())  # E: {longdouble}

reveal_type(np.csingle())  # E: {csingle}
reveal_type(np.singlecomplex())  # E: {csingle}
reveal_type(np.cdouble())  # E: {cdouble}
示例#36
0
    def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target vector relative to X

        class_weight : {dict, 'auto'}, optional
            Weights associated with classes. If not given, all classes
            are supposed to have weight one.

        Returns
        -------
        self : object
            Returns self.
        """
        self._enc = LabelEncoder()
        y = self._enc.fit_transform(y)
        if len(self.classes_) < 2:
            raise ValueError("The number of classes has to be greater than"
                    " one.")

        X = atleast2d_or_csr(X, dtype=np.float64, order="C")
        y = np.asarray(y, dtype=np.float64).ravel()

        self.class_weight_, self.class_weight_label_ = \
                     _get_class_weight(self.class_weight, y)

        if X.shape[0] != y.shape[0]:
            raise ValueError("X and y have incompatible shapes.\n" +
                             "X has %s samples, but y has %s." % \
                             (X.shape[0], y.shape[0]))

        liblinear.set_verbosity_wrap(self.verbose)

        if sp.isspmatrix(X):
            train = liblinear.csr_train_wrap
        else:
            train = liblinear.train_wrap

        rnd = check_random_state(self.random_state)
        if self.verbose:
            print '[LibLinear]',
        self.raw_coef_ = train(X, y, self._get_solver_type(), self.tol,
                               self._get_bias(), self.C,
                               self.class_weight_label_, self.class_weight_,
                               # seed for srand in range [0..UINT_MAX]
                               rnd.randint(np.uintc(-1) + 1))

        if self.fit_intercept:
            self.coef_ = self.raw_coef_[:, :-1]
            self.intercept_ = self.intercept_scaling * self.raw_coef_[:, -1]
        else:
            self.coef_ = self.raw_coef_
            self.intercept_ = 0.

        return self
示例#37
0
def test_table_typing_numpy():
    # Pulled from https://numpy.org/devdocs/user/basics.types.html

    # Numerics
    table = wandb.Table(columns=["A"], dtype=[NumberType])
    table.add_data(None)
    table.add_data(42)
    table.add_data(np.byte(1))
    table.add_data(np.short(42))
    table.add_data(np.ushort(42))
    table.add_data(np.intc(42))
    table.add_data(np.uintc(42))
    table.add_data(np.int_(42))
    table.add_data(np.uint(42))
    table.add_data(np.longlong(42))
    table.add_data(np.ulonglong(42))
    table.add_data(np.half(42))
    table.add_data(np.float16(42))
    table.add_data(np.single(42))
    table.add_data(np.double(42))
    table.add_data(np.longdouble(42))
    table.add_data(np.csingle(42))
    table.add_data(np.cdouble(42))
    table.add_data(np.clongdouble(42))
    table.add_data(np.int8(42))
    table.add_data(np.int16(42))
    table.add_data(np.int32(42))
    table.add_data(np.int64(42))
    table.add_data(np.uint8(42))
    table.add_data(np.uint16(42))
    table.add_data(np.uint32(42))
    table.add_data(np.uint64(42))
    table.add_data(np.intp(42))
    table.add_data(np.uintp(42))
    table.add_data(np.float32(42))
    table.add_data(np.float64(42))
    table.add_data(np.float_(42))
    table.add_data(np.complex64(42))
    table.add_data(np.complex128(42))
    table.add_data(np.complex_(42))

    # Booleans
    table = wandb.Table(columns=["A"], dtype=[BooleanType])
    table.add_data(None)
    table.add_data(True)
    table.add_data(False)
    table.add_data(np.bool_(True))

    # Array of Numerics
    table = wandb.Table(columns=["A"], dtype=[[NumberType]])
    table.add_data(None)
    table.add_data([42])
    table.add_data(np.array([1, 0], dtype=np.byte))
    table.add_data(np.array([42, 42], dtype=np.short))
    table.add_data(np.array([42, 42], dtype=np.ushort))
    table.add_data(np.array([42, 42], dtype=np.intc))
    table.add_data(np.array([42, 42], dtype=np.uintc))
    table.add_data(np.array([42, 42], dtype=np.int_))
    table.add_data(np.array([42, 42], dtype=np.uint))
    table.add_data(np.array([42, 42], dtype=np.longlong))
    table.add_data(np.array([42, 42], dtype=np.ulonglong))
    table.add_data(np.array([42, 42], dtype=np.half))
    table.add_data(np.array([42, 42], dtype=np.float16))
    table.add_data(np.array([42, 42], dtype=np.single))
    table.add_data(np.array([42, 42], dtype=np.double))
    table.add_data(np.array([42, 42], dtype=np.longdouble))
    table.add_data(np.array([42, 42], dtype=np.csingle))
    table.add_data(np.array([42, 42], dtype=np.cdouble))
    table.add_data(np.array([42, 42], dtype=np.clongdouble))
    table.add_data(np.array([42, 42], dtype=np.int8))
    table.add_data(np.array([42, 42], dtype=np.int16))
    table.add_data(np.array([42, 42], dtype=np.int32))
    table.add_data(np.array([42, 42], dtype=np.int64))
    table.add_data(np.array([42, 42], dtype=np.uint8))
    table.add_data(np.array([42, 42], dtype=np.uint16))
    table.add_data(np.array([42, 42], dtype=np.uint32))
    table.add_data(np.array([42, 42], dtype=np.uint64))
    table.add_data(np.array([42, 42], dtype=np.intp))
    table.add_data(np.array([42, 42], dtype=np.uintp))
    table.add_data(np.array([42, 42], dtype=np.float32))
    table.add_data(np.array([42, 42], dtype=np.float64))
    table.add_data(np.array([42, 42], dtype=np.float_))
    table.add_data(np.array([42, 42], dtype=np.complex64))
    table.add_data(np.array([42, 42], dtype=np.complex128))
    table.add_data(np.array([42, 42], dtype=np.complex_))

    # Array of Booleans
    table = wandb.Table(columns=["A"], dtype=[[BooleanType]])
    table.add_data(None)
    table.add_data([True])
    table.add_data([False])
    table.add_data(np.array([True, False], dtype=np.bool_))

    # Nested arrays
    table = wandb.Table(columns=["A"])
    table.add_data([[[[1, 2, 3]]]])
    table.add_data(np.array([[[[1, 2, 3]]]]))
示例#38
0
	def run_simulation(self, weights, lengths, params_matrix, speeds, logger, args, n_nodes, n_work_items, n_params, nstep, n_inner_steps,
		buf_len, states, dt, min_speed):

		# setup data#{{{
		data = { 'weights': weights, 'lengths': lengths, 'params': params_matrix.T }
		base_shape = n_work_items,
		for name, shape in dict(
			tavg=(n_nodes,),
			state=(buf_len, states * n_nodes),
			).items():
			data[name] = np.zeros(shape + base_shape, 'f')

		gpu_data = self.make_gpu_data(data)#{{{
		# logger.info('history shape %r', data['state'].shape)
		logger.info('on device mem: %.3f MiB' % (self.nbytes(data) / 1024 / 1024, ))#}}}

		# setup CUDA stuff#{{{
		step_fn = self.make_kernel(
			source_file=args.filename,
			warp_size=32,
			block_dim_x=args.n_coupling,
			# ext_options=preproccesor_defines,
			# caching=args.caching,
			args=args,
			lineinfo=args.lineinfo,
			nh=buf_len,
			# model=args.model,
			)#}}}

		# setup simulation#{{{
		tic = time.time()
		# logger.info('nstep %i', nstep)
		streams = [drv.Stream() for i in range(32)]
		events = [drv.Event() for i in range(32)]
		tavg_unpinned = []
		tavg = drv.pagelocked_zeros(data['tavg'].shape, dtype=np.float32)
		# logger.info('data[tavg].shape %s', data['tavg'].shape)
		#}}}

		gridx = args.n_coupling // args.blockszx
		gridy = args.n_speed // args.blockszy
		final_block_dim = args.blockszx, args.blockszy, 1
		final_grid_dim = gridx, gridy

		# logger.info('final block dim %r', final_block_dim)
		logger.info('final grid dim %r', final_grid_dim)
		# assert n_coupling_per_block * n_coupling_blocks == args.n_coupling #}}}

		# logger.info('gpu_data[lengts] %s', gpu_data['lengths'].shape)
		# logger.info('nnodes %r', n_nodes)
		# logger.info('gpu_data[lengths] %r', gpu_data['lengths'])

		# run simulation#{{{
		# logger.info('submitting work')
		import tqdm
		for i in tqdm.trange(nstep):

			# event = events[i % 32]
			# stream = streams[i % 32]

			# stream.wait_for_event(events[(i - 1) % 32])

			step_fn(np.uintc(i * n_inner_steps), np.uintc(n_nodes), np.uintc(buf_len), np.uintc(n_inner_steps),
					np.uintc(n_params), np.float32(dt), np.float32(min_speed),
					gpu_data['weights'], gpu_data['lengths'], gpu_data['params'], gpu_data['state'],
					gpu_data['tavg'],
					block=final_block_dim,
					grid=final_grid_dim)

			# event.record(streams[i % 32])
			tavg_unpinned.append(tavg.copy())
			drv.memcpy_dtoh(
				tavg,
				gpu_data['tavg'].ptr)

		# logger.info('kernel finish..')
		# release pinned memory
		tavg = np.array(tavg_unpinned)
		return tavg
示例#39
0
def execute_swipe(d_ev, d_e, vcount, d_ee, d_mark, ecount):
    logger = logging.getLogger('eulercuda.pyeulertour.execute_swipe')
    logger.info("started.")
    mod = SourceModule("""
    typedef unsigned long long  KEY_T ;
    typedef struct EulerVertex
    {
        KEY_T	vid;
        unsigned int  ep;
        unsigned int  ecount;
        unsigned int  lp;
        unsigned int  lcount;
    } EulerVertex;

    typedef struct EulerEdge
    {
        KEY_T eid;
        unsigned int v1;
        unsigned int v2;
        unsigned int s;
        unsigned int pad;
    }EulerEdge;

    __global__ void executeSwipe(
                                EulerVertex * ev,
                                unsigned int * e,
                                unsigned int vcount ,
                                EulerEdge * ee,
                                unsigned int * mark,
                                unsigned int ecount)
        {

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        unsigned int t;
        unsigned int index=0;
        unsigned int maxIndex;
        unsigned int s;
        if (tid < vcount)
        {
            index = ev[tid].ep;
            maxIndex = index + ev[tid].ecount - 1;
            while (index < maxIndex && ee[e[index]].eid < ecount)
            {

              /*  if (mark[ee[e[index]].eid] == 1)
                {
                    t = index;
                    s = ee[e[index]].s;
                    while (mark[ee[e[index]].eid] == 1 && index < maxIndex)
                    {
                        ee[e[index]].s = ee[e[index+1]].s;
                        index = index + 1;
                    }
                    if(t != index)
                    {
                        ee[e[index]].s = s;
                    }
                }  */
                index++;
            }

        }
    }



    """)
    block_dim, grid_dim = getOptimalLaunchConfiguration(vcount.item(), 512)
    np_d_mark = gpuarray.to_gpu(d_mark)
    np_d_ee = gpuarray.to_gpu(d_ee)
    swipe = mod.get_function('executeSwipe')
    swipe(
        drv.In(d_ev),
        drv.In(d_e),
        np.uintc(vcount),
        np_d_ee,      # may have to do this one the "long way"
        np_d_mark,
        np.uintc(ecount),
        np.uintc(d_ee.size),
        block = block_dim,
        grid = grid_dim
    )
    np_d_ee.get(d_ee)
    np_d_mark.get(d_mark)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.debug("Occupancy = %s" % (orec.occupancy * 100))
    logger.info('Finished.')
    return d_ee, d_mark
示例#40
0
 def test_ndarray_uintc(self):
     self.run_test(
         'def ndarray_uintc(a): import numpy as np; return np.uintc(a), np.array([a, a], dtype=np.uintc)',
         numpy.uintc(5),
         ndarray_uintc=[numpy.uintc])
示例#41
0
文件: scalars.py 项目: nanbo99/numpy
np.bytes0()
np.string_()
np.object0()
np.void0(0)

np.byte()
np.short()
np.intc()
np.intp()
np.int0()
np.int_()
np.longlong()

np.ubyte()
np.ushort()
np.uintc()
np.uintp()
np.uint0()
np.uint()
np.ulonglong()

np.half()
np.single()
np.double()
np.float_()
np.longdouble()
np.longfloat()

np.csingle()
np.singlecomplex()
np.cdouble()
示例#42
0
def calculate_circuit_graph_edge_data(d_ev, d_e, vcount, d_D, d_cg_offset, ecount, d_cedgeCount ):
    """

    :param d_ev:
    :param d_e:
    :param vcount:
    :param d_D:
    :param d_cg_offset:
    :param ecount:
    :param d_cedgeCount:
    :return:
    """
    logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_edge_data')
    logger.info("started.")
    mod = SourceModule("""
    #include <stdio.h>
        typedef unsigned long long  KEY_T;
        typedef struct EulerVertex{
            KEY_T	vid;
            unsigned int  ep;
            unsigned int  ecount;
            unsigned int  lp;
            unsigned int  lcount;
        }EulerVertex;
        __global__ void calculateCircuitGraphEdgeData(
                                    EulerVertex* v,
                                    unsigned int * e,
                                    unsigned vCount,
                                    unsigned int * D,
                                    unsigned int * map,
                                    unsigned int ecount,
                                    unsigned int * cedgeCount)
        {

            unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
            unsigned int index = 0;
            unsigned int maxIndex = 0;
            unsigned int c1;
            unsigned int c2;
            index = 0;
            maxIndex = 0;
            if(tid < vCount && v[tid].ecount > 0 )
            {
                index = v[tid].ep;
                maxIndex = index + v[tid].ecount - 1;
               // printf(" index = %u, max = %u ", index, maxIndex);
                while (index < maxIndex && index < ecount )
                {

                    if (e[index] < ecount && e[index + 1] < ecount)
                    {
                       // printf(" map = %u, D = %u ",  map[D[e[index]]], D[e[index]]);
                        c1 = map[D[e[index]]];
                        c2 = map[D[e[index + 1]]];
                        if( c1 != c2)
                        {
                            unsigned int c = min(c1, c2);
                            atomicInc(cedgeCount + c, ecount);
                        }
                    }
                    index++;
                }
            }

        }
    """)
    circuit_graph_edge = mod.get_function('calculateCircuitGraphEdgeData')
    np_d_cedgeCount = gpuarray.to_gpu(d_cedgeCount)
    block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 512)
    circuit_graph_edge(
        drv.In(d_ev),
        drv.In(d_e),
        np.uintc(vcount),
        drv.In(d_D),
        drv.In(d_cg_offset),
        np.uintc(ecount),
        np_d_cedgeCount,
        block=block_dim, grid=grid_dim
    )
    np_d_cedgeCount.get(d_cedgeCount)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    return d_cedgeCount
示例#43
0
    def __init__(self,
                 triangles,
                 nodes,
                 minGraphDiam,
                 maxNumGraphNodes=10,
                 minNumTrianglesInGraph=1,
                 libPath=None):

        if libPath is not None:
            self._libPath = libPath

        # Instantiate C library
        self._libInstance = ctypes.CDLL(self._libPath)

        # triangles = triangles.copy().astype(ctypes.c_uint)
        # nodes = nodes.copy().astype(ctypes.c_double)

        # Maximum number of graph nodes
        maxNumGraphNodes = ctypes.c_uint(maxNumGraphNodes)
        # Minimum graph node diameter
        minGraphDiam = ctypes.c_double(minGraphDiam)
        # Minimum number of triangles in each graph node
        minNumTrianglesInGraph = ctypes.c_uint(minNumTrianglesInGraph)

        # Represent the triangles
        triangles_p = triangles.ctypes.data_as(self.c_uint_p)
        # Represent the nodes
        points_p = nodes.ctypes.data_as(self.c_double_p)
        # Init number of graph nodes
        numGraphNodes = ctypes.c_uint(np.uintc(0))
        # Init graph index
        idx = ctypes.c_uint(np.uintc(0))

        # Create graph
        self._libInstance.MeshGraph_createGraph.restype = ctypes.c_int
        self._libInstance.MeshGraph_createGraph.argtypes = \
            [ self.c_double_p, ctypes.c_uint, ctypes.c_uint, \
             self.c_uint_p, ctypes.c_uint, ctypes.c_uint, \
             ctypes.c_uint, ctypes.c_double, ctypes.c_uint, \
             self.c_uint_p, self.c_uint_p ]
        status = self._libInstance.MeshGraph_createGraph( \
            points_p, ctypes.c_uint( nodes.shape[0] ), ctypes.c_uint( nodes.shape[1] ), \
            triangles_p, ctypes.c_uint( triangles.shape[0] ), ctypes.c_uint( triangles.shape[1] - 1 ), \
            maxNumGraphNodes, minGraphDiam, minNumTrianglesInGraph, \
            ctypes.byref( numGraphNodes ), ctypes.byref( idx ) )
        if status != 0:
            raise Exception("Uknown error occured!")

        # Preallocate boundaries
        self.boundaries = np.NaN * np.ones(
            (np.uintc(numGraphNodes), nodes.shape[1], 2), dtype=np.float64)
        boundaries_p = self.boundaries.ctypes.data_as(self.c_double_p)

        # Get node boundaries
        self._libInstance.MeshGraph_getNodeBoundaries.restype = ctypes.c_int
        self._libInstance.MeshGraph_getNodeBoundaries.argtypes = \
            [ ctypes.c_uint, self.c_double_p, ctypes.c_uint, ctypes.c_uint ]
        status = self._libInstance.MeshGraph_getNodeBoundaries(
            idx, boundaries_p, numGraphNodes, ctypes.c_uint(nodes.shape[1]))
        if status != 0:
            raise Exception("Uknown error occured!")

        # Create a list of list of triangles (one for each graph node)
        self.triangleList = [None] * np.uintc(numGraphNodes)
        numTriangles = ctypes.c_uint(0)

        # Define functions for acquiring triangle lists
        self._libInstance.MeshGraph_getNodeNumTriangles.restype = ctypes.c_int
        self._libInstance.MeshGraph_getNodeNumTriangles.argtypes = [
            ctypes.c_uint, ctypes.c_uint, self.c_uint_p
        ]

        self._libInstance.MeshGraph_getNodeTriangles.restype = ctypes.c_int
        self._libInstance.MeshGraph_getNodeTriangles.argtypes = [ ctypes.c_uint, ctypes.c_uint, ctypes.c_uint, \
                                             ctypes.c_uint, self.c_uint_p ]

        # Loop through all nodes and populate
        for iterNodes in range(np.uintc(numGraphNodes)):
            # Get number of triangles
            status = self._libInstance.MeshGraph_getNodeNumTriangles(
                idx, ctypes.c_uint(iterNodes), ctypes.byref(numTriangles))
            if status != 0:
                raise Exception("Uknown error occured!")
            # Preallocate space for the triangles
            self.triangleList[iterNodes] = np.zeros(np.uintc(numTriangles),
                                                    dtype=np.uintc)
            triangles_p = self.triangleList[iterNodes].ctypes.data_as(
                self.c_uint_p)
            # Acquire the triangles
            status = self._libInstance.MeshGraph_getNodeTriangles( \
                  idx, ctypes.c_uint( iterNodes ), ctypes.c_uint( nodes.shape[1] ), numTriangles, triangles_p )
            if status != 0:
                raise Exception("Uknown error occured!")

        # Free graph
        self._libInstance.MeshGraph_freeGraph.restype = ctypes.c_int
        self._libInstance.MeshGraph_freeGraph.argtypes = [ctypes.c_uint]
        status = self._libInstance.MeshGraph_freeGraph(idx)
        if status != 0:
            raise Exception("Uknown error occured!")
示例#44
0
def bucket_sort_device(d_bufferK, d_bufferV, d_start, d_bucketSize, bucketCount, d_TK, d_TV):
    logger = logging.getLogger('eulercuda.pygpuhash.bucket_sort_device')
    logger.info("started.")
    mod = SourceModule("""
    typedef unsigned long long  KEY_T ;
    typedef KEY_T               *KEY_PTR;
    typedef unsigned int        VALUE_T;
    typedef VALUE_T             *VALUE_PTR;
    #define MAX_BUCKET_ITEM (520)

    #define GET_KEY_INDEX(blockIdx,itemIdx) ((blockIdx) * MAX_BUCKET_ITEM + (itemIdx))
    #define GET_VALUE_INDEX(blockIdx,itemIdx) ((blockIdx) * MAX_BUCKET_ITEM + (itemIdx))

    __global__ void bucketSort(
                                KEY_PTR         bufferK,
                                VALUE_PTR       bufferV,
                                unsigned int    *start,
                                unsigned int    *bucketSize,
                                unsigned int    bucketCount,
                                KEY_PTR         TK,
                                VALUE_PTR       TV)
    {

        __shared__ KEY_T keys[MAX_BUCKET_ITEM];
        unsigned int keyCount[MAX_BUCKET_ITEM / 32];

        unsigned int blockOffset = start[blockIdx.x];
        unsigned int size = bucketSize[blockIdx.x];

        unsigned int chunks = size >> 5;
        chunks = (chunks << 5 == size) ? chunks : chunks + 1;
        for(unsigned int j = 0; j < chunks; j++)
        {
            if ((j << 5) + threadIdx.x < size)
                keys[(j << 5) + threadIdx.x] = bufferK[blockOffset + (j << 5) + threadIdx.x];
        }

        __syncthreads();
        for (unsigned int j = 0; j < chunks; j++)
        {
            if ((j << 5) + threadIdx.x < size)
            {
                keyCount[j] = 0;
                for(int i=0; i < size; i++)
                {
                    keyCount[j] = ( keys[(j << 5) + threadIdx.x] > keys[i] ) ? keyCount[j] + 1 : keyCount[j];
                }
            }
        }
            __syncthreads();
        for (unsigned int j = 0; j < chunks; j++)
        {
            if ((j << 5) + threadIdx.x < size)
            {
                TK[GET_KEY_INDEX(blockIdx.x, keyCount[j])] = keys[(j << 5) + threadIdx.x];
                TV[GET_VALUE_INDEX(blockIdx.x, keyCount[j])] = bufferV[blockOffset + (j << 5) + threadIdx.x];
            }
        }
    }
    """)
    bucket_sort = mod.get_function('bucketSort')
    block_dim = (32, 1, 1)
    grid_dim = (bucketCount, 1, 1)#(32, 1, 1)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    np_d_TK = gpuarray.to_gpu(d_TK)
    np_d_TV = gpuarray.to_gpu(d_TV)
    bucket_sort(
        drv.In(d_bufferK),
        drv.In(d_bufferV),
        drv.In(d_start),
        drv.In(d_bucketSize),
        np.uintc(bucketCount),
        np_d_TK,
        np_d_TV,
        grid=grid_dim,
        block=block_dim # What about shared? Original source doesn't have it.

    )
    np_d_TK.get(d_TK)
    np_d_TV.get(d_TV)
    devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))

    logger.info("Finished. Leaving.")
    return d_TK, d_TV
示例#45
0
def copy_to_bucket_device(d_keys, d_values, d_offset, d_length, d_start, bucketCount, d_bufferK, d_bufferV):
    logger = logging.getLogger('eulercuda.pygpuhash.copy_to_bucket_device')
    logger.info("started.")
    mod = SourceModule("""
   // #include <stdio.h>
    //typedef unsigned long long  KEY_T ;
    //typedef KEY_T               *KEY_PTR;
    //typedef unsigned int        VALUE_T;
    //typedef VALUE_T             *VALUE_PTR;
    #define C0  0x01010101
    #define C1	0x12345678
    #define LARGE_PRIME 1900813
    #define MAX_INT  0xffffffff

    __forceinline__ __device__ unsigned int hash_h(unsigned long long key, unsigned int bucketCount)
    {
        return ((C0 + C1 * key) % LARGE_PRIME ) % bucketCount;
    }
     __global__ void copyToBucket(	unsigned long long *keys,
                    unsigned int *values,
                    unsigned int * offset,
                    unsigned int length,
                    unsigned int* start,
                    unsigned int bucketCount,
                    unsigned long long * bufferK,
                    unsigned int *bufferV)
    {

        unsigned tid = (blockDim.x * blockDim.y * gridDim.x * blockIdx.y) +
                    (blockDim.x * blockDim.y * blockIdx.x) + (blockDim.x * threadIdx.y) + threadIdx.x;

        if (tid < length)
      {
            unsigned long long key = keys[tid];
            unsigned int bucket = hash_h(key,bucketCount);

           // printf(" bucket = %u ", bucket);

            unsigned int value = values[tid];
            unsigned int index = start[bucket] + offset[tid];

           // printf(" index = %u ", index);
           // printf(" tid = %u, offset = %u bucket = %u start = %u index = %u ", tid, offset[tid], bucket, start[bucket], (start[bucket] + offset[tid]));

            bufferK[index] = key;
            bufferV[index] = value;

            //printf(" bufferV = %u ", bufferV[index]);

        }
    }
    """)
    copy_to_bucket = mod.get_function("copyToBucket")

    np_d_keys = np.array(d_keys).astype(np.ulonglong)
    np_d_values = np.array(d_values).astype(np.uintc)
    # np_d_start = np.array(len(d_keys), dtype = np.uint32)
    # np_d_bufferK = np.empty(np_d_keys.size, dtype = np.uint64)
    #
    # np_d_bufferV = np.empty(np_d_values.size, dtype = np.uint32)

    keys_gpu = gpuarray.to_gpu(np_d_keys)
    values_gpu = gpuarray.to_gpu(np_d_values)
    offset_gpu = gpuarray.to_gpu(d_offset)
    # start_gpu = gpuarray.to_gpu(np_d_start)
    np_d_bufferK = gpuarray.to_gpu(d_bufferK)
    np_d_bufferV = gpuarray.to_gpu(d_bufferV)
    block_dim = (1024, 1, 1)
    if (d_length//1024) == 0:
        grid_dim = (1, 1, 1)
    else:
        grid_dim = (d_length//1024, 1, 1)
    logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    copy_to_bucket(
        keys_gpu,
        values_gpu,
        offset_gpu,
        np.uintc(d_length),
        drv.In(d_start), #start_gpu,
        np.uintc(bucketCount),
        np_d_bufferK, #bufferK_gpu,
        np_d_bufferV, #bufferV_gpu,
        grid = grid_dim,
        block = block_dim
    )
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    # logger.info("Occupancy = %s" % (orec.occupancy * 100))
    np_d_bufferK.get(d_bufferK)
    np_d_bufferV.get(d_bufferV)
    logger.info('Finished. Leaving.')
    # d_start  = start_gpu.get()
    # d_bufferK = bufferK_gpu.get()
    # d_bufferV = bufferV_gpu.get()
    return d_bufferK, d_bufferV
示例#46
0
文件: scalars.py 项目: wuye0109/numpy
# Aliases
reveal_type(np.unicode_())  # E: numpy.str_
reveal_type(np.str0())  # E: numpy.str_

reveal_type(np.byte())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.short())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.intc())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.intp())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.int0())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.int_())  # E: numpy.signedinteger[numpy.typing._
reveal_type(np.longlong())  # E: numpy.signedinteger[numpy.typing._

reveal_type(np.ubyte())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.ushort())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.uintc())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.uintp())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.uint0())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.uint())  # E: numpy.unsignedinteger[numpy.typing._
reveal_type(np.ulonglong())  # E: numpy.unsignedinteger[numpy.typing._

reveal_type(np.half())  # E: numpy.floating[numpy.typing._
reveal_type(np.single())  # E: numpy.floating[numpy.typing._
reveal_type(np.double())  # E: numpy.floating[numpy.typing._
reveal_type(np.float_())  # E: numpy.floating[numpy.typing._
reveal_type(np.longdouble())  # E: numpy.floating[numpy.typing._
reveal_type(np.longfloat())  # E: numpy.floating[numpy.typing._

reveal_type(np.csingle())  # E: numpy.complexfloating[numpy.typing._
reveal_type(np.singlecomplex())  # E: numpy.complexfloating[numpy.typing._
reveal_type(np.cdouble())  # E: numpy.complexfloating[numpy.typing._
示例#47
0
from time import time

limit = 10000  # limit = int(input('Limit: '))
start_time = time()
with open('primes1.txt') as f:
    primes = np.fromiter(map(int,
                             f.read().strip().split(',')),
                         dtype=np.uint32)

antiprimes = []
most = 0
for x in np.arange(2, limit + 1):
    fac = {}
    x_temp = x
    for prime in primes:
        more = np.mod(x_temp, prime) == np.uintc(0)
        if more:
            fac[prime] = 0
        while more:
            fac[prime] += 1
            x_temp = np.floor_divide(x_temp, prime)
            more = np.mod(x_temp, prime) == np.uintc(0)
        if x_temp <= np.uintc(1):
            break
    if not fac:
        continue
    num_primes = len(fac)
    if not np.array_equal(np.fromiter(fac, np.uint32), primes[:num_primes]):
        continue
    '''
    iter_fac = iter(fac)
示例#48
0
limit = 100  # limit = int(input('Limit: '))
start_time = time()
with open('primes1.txt') as f:
    primes = np.fromiter(map(int,
                             f.read().strip().split(',')),
                         dtype=np.uint32)
p_gpu = gpuarray.to_gpu(primes)

antiprimes = []
most = 0
for x in gpuarray.arange(2, limit + 1, dtype=np.uint32):
    print(x)
    fac = [[], []]
    x_temp = x.copy()
    for prime in p_gpu:
        more = cumath.fmod(x_temp, prime).get() == np.uintc(0)
        if more:
            power = 0
        while more:
            power += 1
            x_temp = x_temp / prime
            more = cumath.fmod(x_temp, prime).get() == np.uintc(0)
        if more:
            fac[0].append(prime)
            fac[1].append(power)
        if x_temp.get() <= np.uintc(1):
            break
    if not fac:
        continue
    num_primes = len(fac[0])
    eq_gpu = gpuarray.to_gpu(np.empty(num_primes, dtype=np.int32))
示例#49
0
    def Seed2Key(self, recievedSeedBytes):
        SECURITY_MASK = np.uint32(0xEF6FD7)   # Mask for position C. Bytes C21, C16, C13, C6 and C4. (111011110110111111010111)
        SECURITY_POSITION_A_CONSTANT = np.uint32(0xC541A9)   # Position A:3 bytes fixed constants in the specification
        SECURITY_FIXEDBYTES = np.uint64(0x7A03DB3571)   # Position A:3 bytes fixed constants in the specification

        retVal = np.uint32(0x0)        # return value 00,R1,R2,R3
        
        seed = np.uint32(0x0)

        # convert the recievedSeedBytes [0x00,0x00,0x00] into np uint32 type
        
        for i in range(3):
            seed = np.left_shift(seed,8)
            seed = np.bitwise_or(seed,np.uint8(recievedSeedBytes[i]))
            #print("recievedSeedBytes[i]: ", str(hex(recievedSeedBytes[i])))            
        #print("seed Value: ", str(hex(seed)))   
        # np.int_() , c type, long.
        # np.uintc() , c type, unsigned int.
        R1, R2, R3, R_LHS, R_RHS = np.int_(),np.int_(),np.int_(),np.int_(),np.int_()  # final response bytes
        CB_H, CB_L = np.int_(),np.int_() # challenge bytes 
        A = np.int_()  # A initial
        B24, B21, B16, B13, B6, B4 = np.int_(),np.int_(),np.int_(),np.int_(),np.int_(),np.int_() # single bits in lower block 
        C21, C16, C13, C6, C4 = np.int_(),np.int_(),np.int_(),np.int_(),np.int_()  # single bits in upper block

        i = np.uintc()
        CB_32 = np.uint32(0x0)
        S1, S2, S3 =np.uint8(0x0),np.uint8(0x0),np.uint8(0x0)

        # np.right_shift(x1, x2) , Shift the bits of an integer to the right.
        S1 = np.bitwise_and(np.right_shift(seed,16),0xFF)
        S2 = np.bitwise_and(np.right_shift(seed,8),0xFF)
        S3 = np.bitwise_and(seed,0xFF)

        print("current seed byte1 : ", str(hex(S1)),"current seed byte2 : ", str(hex(S2)),"current seed byte3 : ", str(hex(S3)))

        # Calculate last 4 bytes of the challenge number (F5 F4 F3 F2) */
        # CB_H = np.right_shift(SECURITY_FIXEDBYTES,8)
        CB_H = np.uint32(0x7A03DB35)

        # Calculate first 4 bytes of the challenge number (F1 S3 S2 S1) */
        CB_L = (np.bitwise_and(0x71 ,0xFF) * 256) + S3;
        CB_L = (CB_L * 256) + S2;
        CB_L = (CB_L * 256) + S1;  #First 4 bytes of the challenge number (F1 S3 S2 S1) */

        A = SECURITY_POSITION_A_CONSTANT; # 3 bytes initial value, These are fixed constants in the Ford specification */

        i = 0
        CB_32 = CB_L


        ##########################################
        while (i < 64):
            i=i+1
            if (i == 33):
                CB_32 = CB_H

            B24 = np.bitwise_xor(np.bitwise_and(A ,0x01) ,np.bitwise_and(CB_32 ,0x01))

            A = np.right_shift(A,1)
            # printf("\nA first time %X\n", A); */

            A = np.left_shift(B24 ,23) + A
            # printf("\nA second time %X B24 second time %X\n", A, B24); */

            # Position A */
            B21 = np.right_shift(A , 20)
            B21 = np.bitwise_and(B21 , 0x01)
            B16 = np.right_shift(A ,15)
            B16 = np.bitwise_and(B16 , 0x01)
            B13 = np.right_shift(A , 12)
            B13 = np.bitwise_and(B13 , 0x01)
            B6 = np.right_shift(A , 5)
            B6 = np.bitwise_and(B6 , 0x01)
            B4 = np.right_shift(A , 3)
            B4 = np.bitwise_and(B4 , 0x01)

            # Position B */
            C21 = np.bitwise_xor(B24 , B21)
            C21 = np.bitwise_and(C21 , 0x01)
            C16 = np.bitwise_xor(B24 , B16)
            C16 = np.bitwise_and(C16 , 0x01)
            C13 = np.bitwise_xor(B24 , B13)
            C13 = np.bitwise_and(C13 , 0x01)
            C6 = np.bitwise_xor(B24 , B6)
            C6 = np.bitwise_and(C6 , 0x01)
            C4 = np.bitwise_xor(B24 , B4)
            C4 = np.bitwise_and(C4 , 0x01)

            A = np.bitwise_and(A , SECURITY_MASK)

            # Position C */
            A = np.left_shift(C21 , 20) + A
            A = np.left_shift(C16 , 15) + A
            A = np.left_shift(C13 , 12) + A
            A = np.left_shift(C6 , 5) + A
            A = np.left_shift(C4 , 3) + A

            CB_32 = np.right_shift(CB_32 , 1)

        # Calculate R1 */
        R1 = np.bitwise_and(A , 0xFFF)
        R1 = np.right_shift(R1 , 4)

        # Calculate R2 */
        R_RHS = np.right_shift(A , 20)
        R_RHS = np.bitwise_and(R_RHS , 0xF)
        R_LHS = np.right_shift(A , 12)
        R_LHS = np.bitwise_and(R_LHS , 0xF)
        R_LHS = np.left_shift(R_LHS , 4)
        R2 = R_LHS + R_RHS;

        # Calculate R3 */
        R_LHS = np.bitwise_and(A , 0xF)
        R_LHS = np.left_shift(R_LHS , 4)
        R_RHS = np.right_shift(A , 16)
        R_RHS = np.bitwise_and(R_RHS , 0xF)
        R3 = R_LHS + R_RHS;

        #print("A Value: ", str(hex(A)))

        #print("current R1 : ", str(hex(R1)),"current R2 : ", str(hex(R2)),"current R3 : ", str(hex(R3)))

        retVal = np.uintc( (np.left_shift(np.bitwise_and(R1 , 0x000000FF) , 16) | np.left_shift(np.bitwise_and(R2 , 0x000000FF) , 8) | np.bitwise_and(R3 , 0x000000FF)) )
        print("current retVal : ", str(hex(retVal)))
    ##########################################

        # retVal into the decodedKeyBytes[] and return it back
        # highest bytes in the retVal set into 1st position in the list
        for i in range(2,-1,-1):
            self.decodedKeyBytes[i] = np.bitwise_and(retVal,0xFF)
            retVal = np.right_shift(retVal,8)
            #print("current decodedKeyBytes[ ", i,"]hex value: ", str(hex(self.decodedKeyBytes[i])))
        return self.decodedKeyBytes
示例#50
0
def assign_circuit_graph_edge_data(d_ev, d_e, vcount, d_D, d_cg_offset, ecount, d_cg_edge_start, d_cedgeCount,
                               circuitVertexSize, d_cg_edge, circuitGraphEdgeCount):
    """
    :param d_ev:
    :param d_e:
    :param vcount:
    :param d_D:
    :param d_cg_offset:
    :param ecount:
    :param d_cg_edge_start:
    :param d_cedgeCount:
    :param circuitVertexSize:
    :param d_cg_edge:
    :param circuitGraphEdgeCount:
    :return:
    """
    logger = logging.getLogger('eulercuda.pyeulertour.assign_circuit_graph_edge_data')
    logger.info("started.")
    mod = SourceModule("""
    typedef unsigned long long  KEY_T ;
    typedef struct EulerVertex{
        KEY_T	vid;
        unsigned int  ep;
        unsigned int  ecount;
        unsigned int  lp;
        unsigned int  lcount;
    }EulerVertex;
    typedef struct CircuitEdge{
        unsigned int ceid;
        unsigned e1;
        unsigned e2;
        unsigned c1;
        unsigned c2;
    }CircuitEdge;

    __global__ void assignCircuitGraphEdgeData(EulerVertex* v,
                           unsigned int * e,
                           unsigned vCount,
                           unsigned int * D,
                           unsigned int * map,
                           unsigned int ecount,
                           unsigned int * cedgeOffset,
                           unsigned int * cedgeCount,
                           unsigned int cvCount,
                           CircuitEdge * cedge,
                           unsigned int cecount)
    {

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+
                        (blockDim.x*threadIdx.y)+threadIdx.x;
        unsigned int index=0;
        unsigned int maxIndex=0;
        if(tid<vCount && v[tid].ecount>0){
            index=v[tid].ep;
            maxIndex=index+v[tid].ecount-1;
            while (index<maxIndex  && index < ecount )
            {
                if (e[index] < ecount && e[index + 1] < ecount)
                {
                    unsigned int c1=map[D[e[index]]];
                    unsigned int c2=map[D[e[index+1]]];
                    if( c1 != c2)
                    {
                        unsigned int c=min(c1,c2);
                        unsigned int t=max(c1,c2);
                        unsigned int i=atomicDec(cedgeCount+c,ecount);
                        i=i-1;
                        cedge[cedgeOffset[c]+i].c1=c;
                        cedge[cedgeOffset[c]+i].c2=t;
                        cedge[cedgeOffset[c]+i].e1=e[index];
                        cedge[cedgeOffset[c]+i].e2=e[index+1];
                    }
                }
                index++;
            }
        }
    }
    """)
    block_dim, grid_dim = getOptimalLaunchConfiguration(vcount, 512)
    np_d_cg_edge = gpuarray.to_gpu(d_cg_edge)
    cged = mod.get_function('assignCircuitGraphEdgeData')
    cged(
        drv.In(d_ev),
        drv.In(d_e),
        np.uintc(vcount),
        drv.In(d_D),
        drv.In(d_cg_offset),
        np.uintc(ecount),
        drv.In(d_cg_edge_start),
        drv.In(d_cedgeCount),
        np.uintc(circuitVertexSize),
        np_d_cg_edge,
        np.uintc(circuitGraphEdgeCount),
        block=block_dim, grid=grid_dim
    )
    np_d_cg_edge.get(d_cg_edge)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.debug("Occupancy = %s" % (orec.occupancy * 100))
    logger.info('Finished.')
    return d_cg_edge
示例#51
0
def encode_lmer_device (buffer, readCount, d_lmers, readLength, lmerLength):
    # module_logger = logging.getLogger('eulercuda.pyencode.encode_lmer_device')
    module_logger.info("started encode_lmer_device.")
    # readLength is total number of bases read.
    mod = SourceModule("""
    #include <stdio.h>
    typedef unsigned  long long KEY_T ;
    typedef KEY_T * KEY_PTR ;
    __device__ __constant__ KEY_T lmerMask[] ={
    0x0000000000000003, 0x000000000000000F, 0x000000000000003F, 0x00000000000000FF, // 0   1   2   3
    0x00000000000003FF, 0x0000000000000FFF, 0x0000000000003FFF, 0x000000000000FFFF, // 4   5   6   7
    0x000000000003FFFF, 0x00000000000FFFFF, 0x00000000003FFFFF, 0x0000000000FFFFFF, // 8   9   10  11
    0x0000000003FFFFFF, 0x000000000FFFFFFF, 0x000000003FFFFFFF, 0x00000000FFFFFFFF, // 12  13  14  15
    0x00000003FFFFFFFF, 0x0000000FFFFFFFFF, 0x0000003FFFFFFFFF, 0x000000FFFFFFFFFF, // 16  17  18  19
    0x000003FFFFFFFFFF, 0x00000FFFFFFFFFFF, 0x00003FFFFFFFFFFF, 0x0000FFFFFFFFFFFF, // 20  21  22  23
    0x0003FFFFFFFFFFFF, 0x000FFFFFFFFFFFFF, 0x003FFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, // 24  25  26  27
    0x03FFFFFFFFFFFFFF, 0x0FFFFFFFFFFFFFFF, 0x3FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF // 28  29  30  31
    };

    __device__ __constant__ unsigned char shifter[4] [4]=
    {
            {0,0,0,0},
            {1,4,16,64},
            {2,8,32,128},
            {3,12,48,192},
    };
    __device__ __constant__ char  codeF[]={0,0,0,1,3,0,0,2};
    __device__ __constant__ char  codeR[]={0,3,0,2,0,0,0,1};

    __global__ void encodeLmerDevice(	char  * read,
                //    const unsigned int buffSize,
                //    const unsigned int readLength,
                    KEY_PTR lmers,
                    const unsigned int lmerLength
                    )
    {

       // extern __shared__ char read[];
      //  const unsigned int tid=threadIdx.x;
        const unsigned int rOffset=(blockDim.x*blockDim.y*gridDim.x*blockIdx.y) +(blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y);
        const unsigned int tid = rOffset + threadIdx.x;

        KEY_T lmer=0;

      //  read[tid] = buffer[rOffset + tid];

        __syncthreads();

        for (unsigned int i = 0; i < 8; i++)    //calculate lmer
        {
            lmer = (lmer<< 8) |	((KEY_T)(shifter[codeF[read[tid + i * 4]& 0x07]][3] |
                                    shifter[codeF[read[tid + i * 4 + 1]& 0x07]][2] |
                                    shifter[codeF[read[tid + i * 4 + 2]& 0x07]][1] |
                                    codeF[read[tid + i * 4 + 3] & 0x07]) ) ;
        }
        lmer = (lmer >> ((32 - lmerLength) << 1)) & lmerMask[lmerLength-1];
        // printf(" offset = %u, lmer = %llu ", (tid + rOffset),lmer);
        //lmers[rOffset + tid] = lmer;
        lmers[tid] = lmer;

    }
    """)

    encode_lmer = mod.get_function("encodeLmerDevice")

    block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, lmerLength)
    module_logger.debug("block_dim = %s, grid_dim = %s" % (block_dim, grid_dim))
    if isinstance(buffer, np.ndarray) and isinstance(d_lmers, np.ndarray):
        module_logger.info("Going to GPU.")
        np_d_lmers = gpuarray.to_gpu(d_lmers)
        encode_lmer(drv.In(buffer),
                    np_d_lmers,
                    np.uintc(lmerLength),
                    block=block_dim,
                    grid=grid_dim) #,
                  #  shared=48000)
        np_d_lmers.get(d_lmers)
    else:
        print(isinstance(buffer, np.ndarray), isinstance(d_lmers, np.ndarray))
    module_logger.debug("Generated %s lmers." % (len(d_lmers)))
    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    module_logger.debug("Occupancy = %s" % (orec.occupancy * 100))
    module_logger.info("finished encode_lmer_device.")
    return d_lmers
示例#52
0
def mark_spanning_euler_edges(d_ee, d_mark , ecount,d_cg_edge,cg_edgeCount,d_tree, treeCount):
    logger = logging.getLogger(__name__)
    logger.info("started.")
    mod = SourceModule("""
    typedef unsigned long long  KEY_T ;
    typedef struct EulerVertex{
        KEY_T	vid;
        unsigned int  ep;
        unsigned int  ecount;
        unsigned int  lp;
        unsigned int  lcount;
    }EulerVertex;
    typedef struct CircuitEdge{
        unsigned int ceid;
        unsigned e1;
        unsigned e2;
        unsigned c1;
        unsigned c2;
    }CircuitEdge;
    typedef struct EulerEdge{
        KEY_T eid;
        unsigned int v1;
        unsigned int v2;
        unsigned int s;
        unsigned int pad;
    }EulerEdge;

    __global__ void  markSpanningEulerEdges(
                                            EulerEdge * ee,
                                            unsigned int * mark ,
                                            unsigned int ecount,
                                            CircuitEdge * cg_edge,
                                            unsigned int cg_edgeCount,
                                            unsigned int * tree,
                                            unsigned int treeCount)
    {

        unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
        if(tid < treeCount)
        {
            /*if(tree[tid]==1)*/{
                atomicExch(mark+min(cg_edge[tree[tid]].e1,cg_edge[tree[tid]].e2),1); // important: assumption if(mark[i]=1) means mark[i]and mark[i+1] are swipe
                //atomicExch(mark+cg_edge[tree[tid]].e2,1);

            }
        }
    }
    """)
    block_dim, grid_dim = getOptimalLaunchConfiguration(treeCount, 512)
    mark = mod.get_function('markSpanningEulerEdges')
    np_d_mark = gpuarray.to_gpu(d_mark)
    mark(
        drv.In(d_ee),
        np_d_mark,
        np.uintc(ecount),
        drv.In(d_cg_edge),
        np.uintc(cg_edgeCount),
        drv.In(d_tree),
        np.uintc(treeCount),
        block = block_dim,
        grid = grid_dim
    )
    np_d_mark.get(d_mark)
    # devdata = pycuda.tools.DeviceData()
    # orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
    # logger.debug("Occupancy = %s" % (orec.occupancy * 100))
    logger.info('Finished.')
    return d_mark
示例#53
0
def compute_lmer_complement_device(buffer, readCount, d_lmers, readLength, lmerLength):
    # logger = logging.getLogger('eulercuda.pyencode.compute_lmer_complement_device')
    module_logger.info("started compute_lmer_complement_device.")
    mod = SourceModule("""
    __device__ __constant__ char  codeF[]={0,0,0,1,3,0,0,2};
    __device__ __constant__ char  codeR[]={0,3,0,2,0,0,0,1};
    typedef unsigned  long long KEY_T ;
    typedef KEY_T * KEY_PTR ;

    __global__ void encodeLmerComplementDevice(
            char  * dnaRead,
            KEY_PTR lmers,
            const unsigned int lmerLength,
            const unsigned int readCount
            )
    {
        const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
        const unsigned int row = blockIdx.x * blockDim.x + threadIdx.x;
     //   const unsigned int col = blockIdx.y + blockDim.y + threadIdx.y;

        
        if (tid < readCount)
        {
     
          //  extern __shared__ char dnaRead[];
            //unsigned int lmerLength = 0;
            KEY_T lmer = 0;
            KEY_T temp = 0;
    
           // lmerLength = d_lmerLength[tid];
           // dnaRead[tid] = buffer[row + tid];
    
            __syncthreads();
    
            dnaRead[tid] = codeR[dnaRead[tid] & 0x07];
            __syncthreads();
    
            for (unsigned int i = 0; i < lmerLength; i++)
            {
                temp = ((KEY_T)dnaRead[(tid + i) % blockDim.x]);
                lmer = (temp << (i << 1)) | lmer;
            }
            lmers[row + tid] = lmer;
            __syncthreads();
        }
    }
    """)

    encode_lmer_complement = mod.get_function("encodeLmerComplementDevice")
    block_dim, grid_dim = getOptimalLaunchConfiguration(readCount, readLength)

    module_logger.debug('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
    if isinstance(buffer, np.ndarray) and isinstance(d_lmers, np.ndarray):
        np_lmerLength = np.uintc(lmerLength)
        np_d_lmers = gpuarray.to_gpu(d_lmers)
        module_logger.info("Going to GPU.")
        encode_lmer_complement(
            drv.In(buffer),  np_d_lmers, np_lmerLength, np.uintc(readCount),
            block=block_dim, grid=grid_dim
        )
        np_d_lmers.get(d_lmers)
    else:
        print("Problem with data to GPU")
        module_logger.warn("problem with data to GPU.")

    devdata = pycuda.tools.DeviceData()
    orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[0])
    module_logger.info("Occupancy = %s" % (orec.occupancy * 100))

    module_logger.info("Finished compute_lmer_complement_device.")
    return d_lmers
示例#54
0
	def run_simulation(self):

		# setup data#{{{
		data = { 'weights': self.weights, 'lengths': self.lengths, 'params': self.params.T }
		base_shape = self.n_work_items,
		for name, shape in dict(
			tavg0=(self.exposures, self.args.n_regions,),
			tavg1=(self.exposures, self.args.n_regions,),
			state=(self.buf_len, self.states * self.args.n_regions),
			).items():
			# memory error exception for compute device
			try:
				data[name] = np.zeros(shape + base_shape, 'f')
			except MemoryError as e:
				self.logger.error('%s.\n\t Please check the parameter dimensions %d x %d, they are to large '
							 'for this compute device',
							 e, self.args.n_sweep_arg0, self.args.n_sweep_arg1)
				exit(1)

		gpu_data = self.make_gpu_data(data)#{{{

		# setup CUDA stuff#{{{
		step_fn = self.make_kernel(
			source_file=self.args.filename,
			warp_size=32,
			# block_dim_x=self.args.n_sweep_arg0,
			# ext_options=preproccesor_defines,
			# caching=args.caching,
			args=self.args,
			lineinfo=self.args.lineinfo,
			nh=self.buf_len,
			)#}}}

		# setup simulation#{{{
		tic = time.time()

		n_streams = 32
		streams = [drv.Stream() for i in range(n_streams)]
		events = [drv.Event() for i in range(n_streams)]
		tavg_unpinned = []

		try:
			tavg = drv.pagelocked_zeros((n_streams,) + data['tavg0'].shape, dtype=np.float32)
		except drv.MemoryError as e:
			self.logger.error(
				'%s.\n\t Please check the parameter dimensions, %d parameters are too large for this GPU',
				e, self.params.size)
			exit(1)

		# determine optimal grid recursively
		def dog(fgd):
			maxgd, mingd = max(fgd), min(fgd)
			maxpos = fgd.index(max(fgd))
			if (maxgd - 1) * mingd * bx * by >= nwi:
				fgd[maxpos] = fgd[maxpos] - 1
				dog(fgd)
			else:
				return fgd

		# n_sweep_arg0 scales griddim.x, n_sweep_arg1 scales griddim.y
		# form an optimal grid recursively
		bx, by = self.args.blockszx, self.args.blockszy
		nwi = self.n_work_items
		rootnwi = int(np.ceil(np.sqrt(nwi)))
		gridx = int(np.ceil(rootnwi / bx))
		gridy = int(np.ceil(rootnwi / by))

		final_block_dim = bx, by, 1

		fgd = [gridx, gridy]
		dog(fgd)
		final_grid_dim = fgd[0], fgd[1]

		assert gridx * gridy * bx * by >= nwi

		self.logger.info('history shape %r', gpu_data['state'].shape)
		self.logger.info('gpu_data %s', gpu_data['tavg0'].shape)
		self.logger.info('on device mem: %.3f MiB' % (self.nbytes(data) / 1024 / 1024, ))
		self.logger.info('final block dim %r', final_block_dim)
		self.logger.info('final grid dim %r', final_grid_dim)

		# run simulation#{{{
		nstep = self.args.n_time

		self.gpu_mem_info() if self.args.verbose else None

		try:
			for i in tqdm.trange(nstep, file=sys.stdout):

				try:
					event = events[i % n_streams]
					stream = streams[i % n_streams]

					if i > 0:
						stream.wait_for_event(events[(i - 1) % n_streams])

					step_fn(np.uintc(i * self.n_inner_steps), np.uintc(self.args.n_regions), np.uintc(self.buf_len),
							np.uintc(self.n_inner_steps), np.uintc(self.n_work_items), np.float32(self.dt),
							gpu_data['weights'], gpu_data['lengths'], gpu_data['params'], gpu_data['state'],
							gpu_data['tavg%d' % (i%2,)],
							block=final_block_dim, grid=final_grid_dim)

					event.record(streams[i % n_streams])
				except drv.LaunchError as e:
					self.logger.error('%s', e)
					exit(1)

				tavgk = 'tavg%d' % ((i + 1) % 2,)

				# async wrt. other streams & host, but not this stream.
				if i >= n_streams:
					stream.synchronize()
					tavg_unpinned.append(tavg[i % n_streams].copy())

				drv.memcpy_dtoh_async(tavg[i % n_streams], gpu_data[tavgk].ptr, stream=stream)

			# recover uncopied data from pinned buffer
			if nstep > n_streams:
				for i in range(nstep % n_streams, n_streams):
					stream.synchronize()
					tavg_unpinned.append(tavg[i].copy())

			for i in range(nstep % n_streams):
				stream.synchronize()
				tavg_unpinned.append(tavg[i].copy())

		except drv.LogicError as e:
			self.logger.error('%s. Check the number of states of the model or '
						 'GPU block shape settings blockdim.x/y %r, griddim %r.',
						 e, final_block_dim, final_grid_dim)
			exit(1)
		except drv.RuntimeError as e:
			self.logger.error('%s', e)
			exit(1)


		# self.logger.info('kernel finish..')
		# release pinned memory
		tavg = np.array(tavg_unpinned)

		# also release gpu_data
		self.release_gpumem(gpu_data)

		self.logger.info('kernel finished')
		return tavg
class TestNumpy:
    @staticmethod
    def test_get_numpy() -> None:
        """
        Test get_numpy when module is present
        """
        # Arrange

        # Act
        result = Numpy.get_numpy()

        # Assert
        assert result is np

    @staticmethod
    def test_get_numpy_missing(mocker: MockFixture) -> None:
        """
        Test get_numpy when module is missing
        """
        # Arrange
        mocker.patch.dict("sys.modules", {"numpy": None})

        # Act
        result = Numpy.get_numpy()

        # Assert
        assert result is None

    @staticmethod
    def test_get_numpy_missing_error(mocker: MockFixture) -> None:
        """
        Test get_numpy when module is missing raises error
        """
        # Arrange
        mocker.patch.dict("sys.modules", {"numpy": None})

        # Act / assert
        with pytest.raises(ImportError, match="foo"):
            Numpy.get_numpy(raise_error=True, custom_error_message="foo")

    @staticmethod
    @pytest.mark.parametrize("value, expected", [(np.array([1, 2, 3]), True),
                                                 ([1, 2, 3], False)])
    def test_is_numpy_object(value, expected) -> None:
        """
        Test is_numpy_object
        """
        # Arrange

        # Act
        result = Numpy.is_numpy_object(value)

        # Assert
        assert result == expected

    @staticmethod
    def test_get_numpy_primatives() -> None:
        """
        Test _get_numpy_primatives
        """
        # Arrange

        # Act
        result = Numpy._get_numpy_primatives(np)

        # Assert
        assert len(result) == 33  # Expected number of types
        for thing in result:
            assert "numpy" in getattr(thing, "__module__", "").split(
                ".")  # Check that type is from numpy
            assert type(thing) is type  # Check that each type is a type

    @staticmethod
    def test_encode_numpy_error():
        """ Test that the encode_numpy raises an error if no encoding is defined. """
        # Arrange
        value = "not a numpy"

        # Act & Assert
        with pytest.raises(NotImplementedError):
            Numpy.encode_numpy(value)

    @staticmethod
    @pytest.mark.parametrize(
        "value, expected",
        [
            # fmt: off
            (np.array([['balloons'], ['are'], ['awesome']
                       ]), [['balloons'], ['are'], ['awesome']]),
            (np.bool_(1), True),
            (np.byte(4), 4),
            (np.ubyte(4), 4),
            (np.short(4), 4),
            (np.ushort(4), 4),
            (np.intc(4), 4),
            (np.uintc(4), 4),
            (np.int_(4), 4),
            (np.uint(4), 4),
            (np.longlong(4), 4),
            (np.ulonglong(4), 4),
            (np.float16(4), 4),
            (np.single(4), 4),
            (np.double(4), 4),
            (np.longdouble(4), 4),
            (np.csingle(4), 4),
            (np.cdouble(4), 4),
            (np.clongdouble(4), 4),
            (np.int8(4), 4),
            (np.int16(4), 4),
            (np.int32(4), 4),
            (np.int64(4), 4),
            (np.uint8(4), 4),
            (np.uint16(4), 4),
            (np.uint32(4), 4),
            (np.uint64(4), 4),
            (np.intp(4), 4),
            (np.uintp(4), 4),
            (np.float32(4), 4),
            (np.float64(4), 4),
            (np.complex64(4), 4 + 0j),
            (np.complex128(4), 4 + 0j),
            (np.complex_(4), 4 + 0j),
            # fmt: on
        ],
    )
    def test_encode_numpy(value, expected) -> None:
        """
        Test encode_numpy
        """
        # Arrange

        # Act
        result = Numpy.encode_numpy(value)

        # Assert
        assert result == expected