def test_init(self): import numpy as np import math import sys assert np.intp() == np.intp(0) assert np.intp("123") == np.intp(123) raises(TypeError, np.intp, None) assert np.float64() == np.float64(0) assert math.isnan(np.float64(None)) assert np.bool_() == np.bool_(False) assert np.bool_("abc") == np.bool_(True) assert np.bool_(None) == np.bool_(False) assert np.complex_() == np.complex_(0) # raises(TypeError, np.complex_, '1+2j') assert math.isnan(np.complex_(None)) for c in ["i", "I", "l", "L", "q", "Q"]: assert np.dtype(c).type().dtype.char == c for c in ["l", "q"]: assert np.dtype(c).type(sys.maxint) == sys.maxint for c in ["L", "Q"]: assert np.dtype(c).type(sys.maxint + 42) == sys.maxint + 42 assert np.float32(np.array([True, False])).dtype == np.float32 assert type(np.float32(np.array([True]))) is np.ndarray assert type(np.float32(1.0)) is np.float32 a = np.array([True, False]) assert np.bool_(a) is a
def getrows(self, privateKey, senderID, mType, params, extra): """ Receive a single row or list of rows and store in a class property variable Use **client.crow** or **client.rowlist** to access the indices of **previously broadcasted** rows """ filen = params['url'] if mType == 'table.highlight.row': idx = np.intp(params['row']) print '[SAMP] Selected row %s from %s' % (idx,filen) print '[SAMP] Row index stored in property -> crow' self.crow = idx elif mType == 'table.select.rowList': idx = np.intp(params['row-list']) print '[SAMP] Selected %s rows from %s' % (len(idx),filen) print '[SAMP] List stored in property -> rowlist' self.rowlist = idx self.lastMessage = {'label':'Selected Rows', 'privateKey':privateKey, 'senderID': senderID, 'mType': mType, 'params': params, 'extra': extra }
def __init__(self, ldis, Nv, VX, VY, K, EToV): l = self.ldis = ldis self.dimensions = ldis.dimensions self.Nv = Nv self.VX = VX self.K = K va = np.intp(EToV[:, 0].T) vb = np.intp(EToV[:, 1].T) vc = np.intp(EToV[:, 2].T) x = self.x = 0.5*( -np.outer(VX[va], l.r+l.s, ) +np.outer(VX[vb], 1+l.r) +np.outer(VX[vc], 1+l.s)) y = self.y = 0.5*( -np.outer(VY[va], l.r+l.s) +np.outer(VY[vb], 1+l.r) +np.outer(VY[vc], 1+l.s)) self.rx, self.sx, self.ry, self.sy, self.J = GeometricFactors2D(x, y, l.Dr, l.Ds) self.nx, self.ny, self.sJ = Normals2D(l, x, y, K) self.Fscale = self.sJ/self.J[:, l.FmaskF] # element-to-element, element-to-face connectivity self.EToE, self.EToF = Connect2D(EToV) self.mapM, self.mapP, self.vmapM, self.vmapP, self.vmapB, self.mapB = \ BuildMaps2D(l, l.Fmask, VX, VY, EToV, self.EToE, self.EToF, K, l.N, x, y)
def test_init(self): import numpy as np import math import sys assert np.intp() == np.intp(0) assert np.intp('123') == np.intp(123) raises(TypeError, np.intp, None) assert np.float64() == np.float64(0) assert math.isnan(np.float64(None)) assert np.bool_() == np.bool_(False) assert np.bool_('abc') == np.bool_(True) assert np.bool_(None) == np.bool_(False) assert np.complex_() == np.complex_(0) #raises(TypeError, np.complex_, '1+2j') assert math.isnan(np.complex_(None)) for c in ['i', 'I', 'l', 'L', 'q', 'Q']: assert np.dtype(c).type().dtype.char == c for c in ['l', 'q']: assert np.dtype(c).type(sys.maxint) == sys.maxint for c in ['L', 'Q']: assert np.dtype(c).type(sys.maxint + 42) == sys.maxint + 42 assert np.float32(np.array([True, False])).dtype == np.float32 assert type(np.float32(np.array([True]))) is np.ndarray assert type(np.float32(1.0)) is np.float32 a = np.array([True, False]) assert np.bool_(a) is a
def execute(self, solver, stream=None): slvr = solver # The gaussian shape array can be empty if # no gaussian sources were specified. gauss = np.intp(0) if np.product(slvr.gauss_shape.shape) == 0 \ else slvr.gauss_shape sersic = np.intp(0) if np.product(slvr.sersic_shape.shape) == 0 \ else slvr.sersic_shape self.kernel(slvr.uvw, slvr.brightness, gauss, sersic, slvr.wavelength, slvr.antenna1, slvr.antenna2, slvr.jones_scalar, slvr.flag, slvr.weight_vector, slvr.model_vis, slvr.observed_vis, slvr.chi_sqrd_result, **self.get_kernel_params(slvr)) # Call the pycuda reduction kernel. # Divide by the single sigma squared value if a weight vector # is not required. Otherwise the kernel will incorporate the # individual sigma squared values into the sum gpu_sum = gpuarray.sum(slvr.chi_sqrd_result).get() if not self.weight_vector: slvr.set_X2(gpu_sum/slvr.sigma_sqrd) else: slvr.set_X2(gpu_sum)
def from_range(cat_comp, lo, hi): """ Utility function to help construct the Roi from a range. :param cat_comp: Anything understood by ._categorical_helper ... array, list or component :param lo: lower bound of the range :param hi: upper bound of the range :return: CategoricalROI object """ # Convert lo and hi to integers. Note that if lo or hi are negative, # which can happen if the user zoomed out, we need to reset the to zero # otherwise they will have strange effects when slicing the categories. # Note that we used ceil for lo, because if lo is 0.9 then we should # only select 1 and above. lo = np.intp(np.ceil(lo) if lo > 0 else 0) hi = np.intp(np.ceil(hi) if hi > 0 else 0) roi = CategoricalROI() cat_data = cat_comp.categories roi.update_categories(cat_data[lo:hi]) return roi
def Connect3D(EToV): """Build global connectivity arrays for grid based on standard EToV input array from grid generator. """ EToV = EToV.astype(np.intp) Nfaces = 4 # Find number of elements and vertices K = EToV.shape[0] #Nv = EToV.max()+1 # Create face to node connectivity matrix #TotalFaces = Nfaces*K # List of local face to local vertex connections vn = np.int32([[0,1,2],[0,1,3],[1,2,3],[0,2,3]]) # Build global face to node connectivity g_face_no = 0 vert_indices_to_face_numbers = {} face_numbers = xrange(Nfaces) for k in xrange(K): for face in face_numbers: vert_indices_to_face_numbers.setdefault( frozenset(EToV[k,vn[face]]), []).append(g_face_no) g_face_no += 1 faces1 = [] faces2 = [] # check this for i in vert_indices_to_face_numbers.itervalues(): if len(i) == 2: faces1.append(i[0]) faces2.append(i[1]) faces2.append(i[0]) faces1.append(i[1]) faces1 = np.intp(faces1) faces2 = np.intp(faces2) # Convert faceglobal number to element and face numbers element1, face1 = divmod(faces1, Nfaces) element2, face2 = divmod(faces2, Nfaces) # Rearrange into Nelements x Nfaces sized arrays ind = element1*Nfaces + face1 EToE = np.outer(np.arange(K), np.ones((1, Nfaces))) EToF = np.outer(np.ones((K,1)), np.arange(Nfaces)) EToE = EToE.reshape(K*Nfaces) EToF = EToF.reshape(K*Nfaces) EToE[np.int32(ind)] = element2 EToF[np.int32(ind)] = face2 EToE = EToE.reshape(K, Nfaces) EToF = EToF.reshape(K, Nfaces) return EToE, EToF
def test_intp(self): # Ticket #99 i_width = np.int_(0).nbytes*2 - 1 np.intp('0x' + 'f'*i_width, 16) assert_raises(OverflowError, np.intp, '0x' + 'f'*(i_width+1), 16) assert_raises(ValueError, np.intp, '0x1', 32) assert_equal(255, np.intp('0xFF', 16))
def _accumDiffStims(self, d_resp_tmp, diffV1GausBuf, sizes, orderX, orderY, orderT): """ Gets the responses of the filters specified in d_v1popDirs by interpolation. This is basically what shSwts.m did in the original S&H code.""" # a useful list of factorials for computing the scaling factors for # the derivatives factorials = (1, 1, 2, 6) # the scaling factor for this directional derivative # similar to the binomial coefficients scale = 6/factorials[orderX]/factorials[orderY]/factorials[orderT] gdim = (int(iDivUp(sizes[0] * sizes[1], 256)), 1) bdim = (256, 1, 1) self.dev_accumDiffStims( np.intp(d_resp_tmp), np.intp(diffV1GausBuf), np.int32(sizes[0] * sizes[1]), np.int32(scale), np.int32(orderX), np.int32(orderY), np.int32(orderT), block=bdim, grid=gdim)
def __init__(self, code, point, struct_ptr): self.code = cuda.to_device(code) self.point = cuda.to_device(point) self.code_shape, self.code_dtype = code.shape, code.dtype self.point_shape, self.point_dtype = point.shape, point.dtype cuda.memcpy_htod(int(struct_ptr), np.int32(code.size)) cuda.memcpy_htod(int(struct_ptr) + 8, np.intp(int(self.code))) cuda.memcpy_htod(int(struct_ptr) + 8 + np.intp(0).nbytes, np.intp(int(self.point)))
def check_intp(self,level=rlevel): """Ticket #99""" i_width = N.int_(0).nbytes*2 - 1 N.intp('0x' + 'f'*i_width,16) self.failUnlessRaises(OverflowError,N.intp,'0x' + 'f'*(i_width+1),16) self.failUnlessRaises(ValueError,N.intp,'0x1',32) assert_equal(255,N.intp('0xFF',16)) assert_equal(1024,N.intp(1024))
def test_intp(self,level=rlevel): """Ticket #99""" i_width = np.int_(0).nbytes*2 - 1 np.intp('0x' + 'f'*i_width,16) self.assertRaises(OverflowError,np.intp,'0x' + 'f'*(i_width+1),16) self.assertRaises(ValueError,np.intp,'0x1',32) assert_equal(255,np.intp('0xFF',16)) assert_equal(1024,np.intp(1024))
def getSRT(self, gmap, store_srt=False): """ Computes the sample rank templates for the expression matrix(on instantiation) and gmap gmap is a 1d numpy array where gmap[2*i] and gmap[2*i +1] are gene indices for comparison i b_size is the block size to use in gpu computation store_srt - determines what is returned, False(default) - returns the srt numpy array (npairs,nsamp) True - returns the srt_gpu object and the object's padded shape (npairs,nsamp) """ #the x coords in the gpu map to sample_ids #the y coords to gmap #sample blocks b_size = self.b_size exp = self.exp g_y_sz = self.getGrid( exp.shape[1] ) #pair blocks g_x_sz = self.getGrid( gmap.shape[0]/2 ) #put gene map on gpu gmap_buffer = self.gmap_buffer= self.getBuff(gmap, 2*(g_x_sz*b_size), 1,np.int32) gmap_gpu = np.intp(gmap_buffer.base.get_device_pointer()) #cuda.mem_alloc(gmap_buffer.nbytes) #cuda.memcpy_htod(gmap_gpu,gmap_buffer) #make room for srt srt_shape = (g_x_sz*b_size , g_y_sz*b_size) srt_buffer = self.srt_buffer = self.getBuff(np.zeros(srt_shape, dtype=np.int32),srt_shape[0],srt_shape[1], np.int32) srt_gpu = np.intp(srt_buffer.base.get_device_pointer()) #cuda.mem_alloc(srt_shape[0]*srt_shape[1]*np.int32(1).nbytes) srtKern = self.getsrtKern() exp_gpu = self.exp_gpu nsamp = np.uint32( g_y_sz * b_size ) ngenes = np.uint32( self.exp.shape[0] ) npairs = np.uint32( g_x_sz * b_size ) block = (b_size,b_size,1) grid = (g_x_sz, g_y_sz) srtKern(exp_gpu, nsamp, ngenes, gmap_gpu, npairs, srt_gpu, block=block, grid=grid) #gmap_gpu.free() if store_srt: #this is in case we want to run further stuff without #transferring back and forth return (srt_gpu, npairs , nsamp) else: #srt_buffer = np.zeros(srt_shape, dtype=np.int32) #cuda.memcpy_dtoh(srt_buffer, srt_gpu) #srt_gpu.free() return srt_buffer[:gmap.shape[0]/2,:self.exp.shape[1]]
def _default_norm(self, layer): vals = np.sort(layer.ravel()) vals = vals[np.isfinite(vals)] result = DS9Normalize() result.stretch = 'arcsinh' result.clip = True if vals.size > 0: result.vmin = vals[np.intp(.01 * vals.size)] result.vmax = vals[np.intp(.99 * vals.size)] return result
def _loadInput(self, stim): logging.debug('loadInput') # shortcuts nrXY = self.nrX * self.nrY nrXYD = self.nrX * self.nrY * self.nrDirs # parse input assert type(stim).__module__ == "numpy", "stim must be numpy array" assert type(stim).__name__ == "ndarray", "stim must be numpy.ndarray" assert stim.size > 0, "stim cannot be []" stim = stim.astype(np.ubyte) rows, cols = stim.shape logging.debug("- stim shape={0}x{1}".format(rows, cols)) # shift d_stimBuf in time by 1 frame, from frame i to frame i-1 # write our own memcpy kernel... :-( gdim = (int(iDivUp(nrXY, 128)), 1) bdim = (128, 1, 1) for i in xrange(1, self.nrT): stimBufPt_dst = np.intp(self.d_stimBuf) + self.szXY * (i - 1) stimBufPt_src = np.intp(self.d_stimBuf) + self.szXY * i self.dev_memcpy_dtod( stimBufPt_dst, stimBufPt_src, np.int32(nrXY), block=bdim, grid=gdim) # index into d_stimBuf array to place the new stim at the end # (newest frame at pos: nrT-1) d_stimBufPt = np.intp(self.d_stimBuf) + self.szXY * (self.nrT-1) # \TODO implement RGB support self.dev_split_gray( d_stimBufPt, cuda.In(stim), np.int32(stim.size), block=bdim, grid=gdim) # create working copy of d_stimBuf cuda.memcpy_dtod(self.d_scalingStimBuf, self.d_stimBuf, self.szXY*self.nrT) # reset V1complex responses to 0 # \FIXME not sure how to use memset...doesn't seem to give expected # result tmp = np.zeros(nrXYD).astype(np.float32) cuda.memcpy_htod(self.d_respV1c, tmp) # allocate d_resp, which will contain the response to all 28 # (nrFilters) space-time orientations at 3 (nrScales) scales for # every pixel location (nrX*nrY) tmp = np.zeros(nrXY*self.nrFilters*self.nrScales).astype(np.float32) cuda.memcpy_htod(self.d_resp, tmp)
def readout(mesh, pos, mode="raise", period=None, transform=None, out=None): """ CIC approximation, reading out mesh values at pos, see document of paint. """ pos = numpy.array(pos) if out is None: out = numpy.zeros(len(pos), dtype='f8') else: out[:] = 0 chunksize = 1024 * 16 * 4 Ndim = pos.shape[-1] Np = pos.shape[0] if transform is None: transform = lambda x: x neighbours = ((numpy.arange(2 ** Ndim)[:, None] >> \ numpy.arange(Ndim)[None, :]) & 1) for start in range(0, Np, chunksize): chunk = slice(start, start+chunksize) if mode == 'raise': gridpos = transform(pos[chunk]) rmi_mode = 'raise' intpos = numpy.intp(numpy.floor(gridpos)) elif mode == 'ignore': gridpos = transform(pos[chunk]) rmi_mode = 'raise' intpos = numpy.intp(numpy.floor(gridpos)) for i, neighbour in enumerate(neighbours): neighbour = neighbour[None, :] targetpos = intpos + neighbour kernel = (1.0 - numpy.abs(gridpos - targetpos)).prod(axis=-1) if period is not None: period = numpy.int32(period) numpy.remainder(targetpos, period, targetpos) if mode == 'ignore': # filter out those outside of the mesh mask = (targetpos >= 0).all(axis=-1) for d in range(Ndim): mask &= (targetpos[..., d] < mesh.shape[d]) targetpos = targetpos[mask] kernel = kernel[mask] else: mask = Ellipsis if len(targetpos) > 0: targetindex = numpy.ravel_multi_index( targetpos.T, mesh.shape, mode=rmi_mode) out[chunk][mask] += kernel * mesh.flat[targetindex] return out
def getRT(self, s_map, srt_gpu, srt_nsamp, srt_npairs, npairs, store_rt=False): """ Computes the rank template s_map(Sample Map) - an list of 1s and 0s of length nsamples where 1 means use this sample to compute rank template srt_gpu - cuda memory object containing srt(sample rank template) array on gpu srt_nsamp, srt_npairs - shape(buffered) of srt_gpu object npairs - true number of gene pairs being compared b_size - size of the blocks for computation store_rt - determines the RETURN value False(default) = returns an numpy array shape(npairs) of the rank template True = returns the rt_gpu object and the padded size of the rt_gpu objet (rt_obj, npairs_padded) """ b_size = self.b_size s_map_buff = self.s_map_buff = cuda.pagelocked_zeros((int(srt_nsamp),), np.int32, mem_flags=cuda.host_alloc_flags.DEVICEMAP) s_map_buff[:len(s_map)] = np.array(s_map,dtype=np.int32) s_map_gpu = np.intp(s_map_buff.base.get_device_pointer()) #cuda.memcpy_htod(s_map_gpu, s_map_buff) #sample blocks g_y_sz = self.getGrid( srt_nsamp) #pair blocks g_x_sz = self.getGrid( srt_npairs ) block_rt_gpu = cuda.mem_alloc(int(g_y_sz*srt_npairs*(np.uint32(1).nbytes)) ) grid = (g_x_sz, g_y_sz) func1,func2 = self.getrtKern(g_y_sz) shared_size = b_size*b_size*np.uint32(1).nbytes func1( srt_gpu, np.uint32(srt_nsamp), np.uint32(srt_npairs), s_map_gpu, block_rt_gpu, np.uint32(g_y_sz), block=(b_size,b_size,1), grid=grid, shared=shared_size) rt_buffer =self.rt_buffer = cuda.pagelocked_zeros((int(srt_npairs),), np.int32, mem_flags=cuda.host_alloc_flags.DEVICEMAP) rt_gpu = np.intp(rt_buffer.base.get_device_pointer()) func2( block_rt_gpu, rt_gpu, np.int32(s_map_buff.sum()), block=(b_size,1,1), grid=(g_x_sz,)) if store_rt: #this is in case we want to run further stuff without #transferring back and forth return (rt_gpu, srt_npairs) else: #rt_buffer = np.zeros((srt_npairs ,), dtype=np.int32) #cuda.memcpy_dtoh(rt_buffer, rt_gpu) #rt_gpu.free() return rt_buffer[:npairs]
def __init__(self, ldis, Nv, VX, VY, VZ, K, EToV): l = self.ldis = ldis self.dimensions = ldis.dimensions self.Nv = Nv self.VX = VX self.VY = VY; # check ? self.VZ = VZ self.K = K va = np.intp(EToV[:, 0].T) vb = np.intp(EToV[:, 1].T) vc = np.intp(EToV[:, 2].T) vd = np.intp(EToV[:, 3].T) x = self.x = 0.5*( -np.outer(VX[va], 1+l.r+l.s+l.t, ) +np.outer(VX[vb], 1+l.r) +np.outer(VX[vc], 1+l.s) +np.outer(VX[vd], 1+l.t)) y = self.y = 0.5*( -np.outer(VY[va], 1+l.r+l.s+l.t, ) +np.outer(VY[vb], 1+l.r) +np.outer(VY[vc], 1+l.s) +np.outer(VY[vd], 1+l.t)) z = self.z = 0.5*( -np.outer(VZ[va], 1+l.r+l.s+l.t, ) +np.outer(VZ[vb], 1+l.r) +np.outer(VZ[vc], 1+l.s) +np.outer(VZ[vd], 1+l.t)) drst_dxyz = np.empty((3,3), dtype=object) geo_fac = GeometricFactors3D(x, y, z, l.Dr, l.Ds, l.Dt) self.J = geo_fac[-1] drst_dxyz.reshape(-1)[:] = geo_fac[:-1] self.drst_dxyz = drst_dxyz.T self.nx, self.ny, self.nz, self.sJ = Normals3D(l, x, y, z, K) self.Fscale = self.sJ/self.J[:, l.FmaskF] # element-to-element, element-to-face connectivity self.EToE, self.EToF = Connect3D(EToV) self.mapM, self.mapP, self.vmapM, self.vmapP, self.vmapB, self.mapB = \ BuildMaps3D(l, l.Fmask, VX, VY, VZ, EToV, self.EToE, self.EToF, K, l.N, x, y, z) self.bc = np.ones((K, l.Nfp*l.Nfaces)) self.bc.reshape(-1)[self.mapB] = -1
def coarsegrain(self,flowy,deltafy,Ny): if not hasattr(self,'Moments'): self.getMultipleMoments(msign='pn') Nx = self.Length flowx = self.FreqOffset deltafx = self.Cadence if ( (deltafx <= 0) | (deltafy <= 0) | (Ny <= 0) ): raise ValueError, 'bad input argument' if ( deltafy < deltafx ): raise ValueError, 'deltaf coarse-grain < deltaf fine-grain' if ( (flowy - 0.5*deltafy) < (flowx - 0.5*deltafx) ): raise ValueError, 'desired coarse-grained start frequency is too low' fhighx = flowx + (Nx-1)*deltafx fhighy = flowy + (Ny-1)*deltafy if ( (fhighy + 0.5*deltafy) > (fhighx + 0.5*deltafx) ): raise ValueError, 'desired coarse-rained stop frequency is too high' i = numpy.arange(Ny) jlow = numpy.intp( 1 + numpy.floor((flowy + (i-0.5)*deltafy - flowx - 0.5*deltafx)/deltafx)) jhigh = numpy.intp( 1 + numpy.floor((flowy + (i+0.5)*deltafy - flowx - 0.5*deltafx)/deltafx)) index1 = jlow[0] index2 = jhigh[-1] fraclow = (flowx + (jlow+0.5)*deltafx - flowy - (i-0.5)*deltafy)/deltafx frachigh = (flowy + (i+0.5)*deltafy - flowx - (jhigh-0.5)*deltafx)/deltafx frac1 = fraclow[0] frac2 = frachigh[-1] jtemp = jlow + 1 coarseMoments = numpy.zeros( (numpy.shape(self.Moments)[0],Ny) , complex) for lm in range(numpy.shape(self.Moments)[0]): midsum = sumTerms(self.Moments[lm,:], jtemp, jhigh) ya = (deltafx/deltafy)*(self.Moments[lm,:][jlow[:-1]]*fraclow[:-1] + self.Moments[lm,:][jhigh[:-1]]*frachigh[:-1] + midsum[:-1]) if (jhigh[-1] > Nx-1): yb = (deltafx/deltafy)*(self.Moments[lm,:][jlow[-1]]*fraclow[-1] + midsum[-1]) else: yb = (deltafx/deltafy)*(self.Moments[lm,:][jlow[-1]]*fraclow[-1] + self.Moments[lm,:][jhigh[-1]]*frachigh[-1] + midsum[-1]) coarseMoments[lm,:] = numpy.array( list(ya) + [yb] ) self.coarseMoments = coarseMoments self.coarseFreqOffset = flowy self.coarseCadence = deltafy self.coarseLength = numpy.shape(self.coarseMoments)[1] self.index1 = index1 self.index2 = index2 self.frac1 = frac1 self.frac2 = frac2 return coarseMoments
def rfGbmCombined(X, Y_casual, Y_registered, testSet_final): #creating models rf1 = randomForestModel() #train for casual rf2 = randomForestModel() #train for registered gbm1 = gradientDescentModel() #train for casual gbm2 = gradientDescentModel() #train for registered #fitting models # rf1.fit(train_X, train_Y[:, 0]) #train_Y[:, 0] - use 0th column of train_Y rf1.fit(X, Y_casual) plotFeatureImportance(rf1) rf2.fit(X, Y_registered) gbm1.fit(X, Y_casual) gbm2.fit(X, Y_registered) #prediction rf1_Y = np.exp(rf1.predict(testSet_final))-1 rf2_Y = np.exp(rf2.predict(testSet_final))-1 gbm1_Y = np.exp(gbm1.predict(testSet_final))-1 gbm2_Y = np.exp(gbm2.predict(testSet_final))-1 #Average the prediction from classifiers final_prediction = (rf1_Y + rf2_Y + gbm1_Y + gbm2_Y)/2 final_prediction = np.intp(np.around(final_prediction)) #round and convert to integer return final_prediction
def __new__(cls, x=0): if isinstance(x, afnumpy.ndarray): return x.astype(cls) elif isinstance(x, numbers.Number): return numpy.intp(x) else: return afnumpy.array(x).astype(cls)
def _build_arg_buf(args): handlers = [] arg_data = [] format = "" for i, arg in enumerate(args): if isinstance(arg, np.number): arg_data.append(arg) format += arg.dtype.char elif isinstance(arg, (DeviceAllocation, PooledDeviceAllocation)): arg_data.append(int(arg)) format += "P" elif isinstance(arg, ArgumentHandler): handlers.append(arg) arg_data.append(int(arg.get_device_alloc())) format += "P" elif isinstance(arg, np.ndarray): arg_data.append(arg) format += "%ds" % arg.nbytes else: try: gpudata = np.intp(arg.gpudata) except AttributeError: raise TypeError("invalid type on parameter #%d (0-based)" % i) else: # for gpuarrays arg_data.append(int(gpudata)) format += "P" from pycuda._pvt_struct import pack return handlers, pack(format, *arg_data)
def small_view_array(data): """ Same as small_view, except using a numpy array as input """ shp = data.shape view = tuple([slice(None, None, np.intp(max(s / 50, 1))) for s in shp]) return np.asarray(data)[view]
def test_simple_kernel_2(self): mod = SourceModule(""" __global__ void multiply_them(float *dest, float *a, float *b) { const int i = threadIdx.x; dest[i] = a[i] * b[i]; } """) multiply_them = mod.get_function("multiply_them") a = np.random.randn(400).astype(np.float32) b = np.random.randn(400).astype(np.float32) a_gpu = drv.to_device(a) b_gpu = drv.to_device(b) dest = np.zeros_like(a) multiply_them( drv.Out(dest), a_gpu, b_gpu, block=(400, 1, 1)) assert la.norm(dest-a*b) == 0 drv.Context.synchronize() # now try with offsets dest = np.zeros_like(a) multiply_them( drv.Out(dest), np.intp(a_gpu)+a.itemsize, b_gpu, block=(399, 1, 1)) assert la.norm((dest[:-1]-a[1:]*b[:-1])) == 0
def test_numpy(self): """NumPy objects get serialized to readable JSON.""" l = [ np.float32(12.5), np.float64(2.0), np.float16(0.5), np.bool(True), np.bool(False), np.bool_(True), np.unicode_("hello"), np.byte(12), np.short(12), np.intc(-13), np.int_(0), np.longlong(100), np.intp(7), np.ubyte(12), np.ushort(12), np.uintc(13), np.ulonglong(100), np.uintp(7), np.int8(1), np.int16(3), np.int32(4), np.int64(5), np.uint8(1), np.uint16(3), np.uint32(4), np.uint64(5), ] l2 = [l, np.array([1, 2, 3])] roundtripped = loads(dumps(l2, cls=EliotJSONEncoder)) self.assertEqual([l, [1, 2, 3]], roundtripped)
def fast_limits(data, plo, phi): """ Quickly estimate percentiles in an array, using a downsampled version Parameters ---------- data : `numpy.ndarray` The array to estimate the percentiles for plo, phi : float The percentile values Returns ------- lo, hi : float The percentile values """ shp = data.shape view = tuple([slice(None, None, np.intp(max(s / 50, 1))) for s in shp]) values = np.asarray(data)[view] if ~np.isfinite(values).any(): return (0.0, 1.0) limits = (-np.inf, np.inf) lo = _scoreatpercentile(values.flat, plo, limit=limits) hi = _scoreatpercentile(values.flat, phi, limit=limits) return lo, hi
def small_view(data, attribute): """ Extract a downsampled view from a dataset, for quick statistical summaries """ shp = data.shape view = tuple([slice(None, None, np.intp(max(s / 50, 1))) for s in shp]) return data[attribute, view]
def coarsegrain(self,flowy,deltafy,Ny): if not hasattr(self,'CAE'): self.CrossSpectraDataAE() Nx = self.FreqLength flowx = self.FreqOffset deltafx = self.FreqCadence if ( (deltafx <= 0) | (deltafy <= 0) | (Ny <= 0) ): raise ValueError, 'bad input argument' if ( deltafy < deltafx ): raise ValueError, 'deltaf coarse-grain < deltaf fine-grain' if ( (flowy - 0.5*deltafy) < (flowx - 0.5*deltafx) ): raise ValueError, 'desired coarse-grained start frequency is too low' fhighx = flowx + (Nx-1)*deltafx fhighy = flowy + (Ny-1)*deltafy if ( (fhighy + 0.5*deltafy) > (fhighx + 0.5*deltafx) ): raise ValueError, 'desired coarse-grained stop frequency is too high' i = numpy.arange(Ny) jlow = numpy.intp( 1 + numpy.floor((flowy + (i-0.5)*deltafy - flowx - 0.5*deltafx)/deltafx)) jhigh = numpy.intp( 1 + numpy.floor((flowy + (i+0.5)*deltafy - flowx - 0.5*deltafx)/deltafx)) index1 = jlow[0] index2 = jhigh[-1] fraclow = (flowx + (jlow+0.5)*deltafx - flowy - (i-0.5)*deltafy)/deltafx frachigh = (flowy + (i+0.5)*deltafy - flowx - (jhigh-0.5)*deltafx)/deltafx frac1 = fraclow[0] frac2 = frachigh[-1] jtemp = jlow + 1 midsum = sumTerms(self.CAE, jtemp, jhigh) ya = (deltafx/deltafy)*(self.CAE[jlow[:-1]]*fraclow[:-1] + self.CAE[jhigh[:-1]]*frachigh[:-1] + midsum[:-1]) if (jhigh[-1] > Nx-1): yb = (deltafx/deltafy)*(self.CAE[jlow[-1]]*fraclow[-1] + midsum[-1]) else: yb = (deltafx/deltafy)*(self.CAE[jlow[-1]]*fraclow[-1] + self.CAE[jhigh[-1]]*frachigh[-1] + midsum[-1]) self.coarseCAE = numpy.array( list(ya) + [yb] ) self.coarseFreqOffset = flowy self.coarseFreqCadence = deltafy self.coarseFreqLength = len(self.coarseCAE) self.index1 = index1 self.index2 = index2 self.frac1 = frac1 self.frac2 = frac2 return self.coarseCAE
def test_more_barycentric_transforms(self): # Triangulate some "nasty" grids eps = np.finfo(float).eps npoints = {2: 70, 3: 11, 4: 5, 5: 3} _is_32bit_platform = np.intp(0).itemsize < 8 for ndim in xrange(2, 6): # Generate an uniform grid in n-d unit cube x = np.linspace(0, 1, npoints[ndim]) grid = np.c_[list(map(np.ravel, np.broadcast_arrays(*np.ix_(*([x]*ndim)))))].T err_msg = "ndim=%d" % ndim # Check using regular grid tri = qhull.Delaunay(grid) self._check_barycentric_transforms(tri, err_msg=err_msg, unit_cube=True) # Check with eps-perturbations np.random.seed(1234) m = (np.random.rand(grid.shape[0]) < 0.2) grid[m,:] += 2*eps*(np.random.rand(*grid[m,:].shape) - 0.5) tri = qhull.Delaunay(grid) self._check_barycentric_transforms(tri, err_msg=err_msg, unit_cube=True, unit_cube_tol=2*eps) # Check with duplicated data tri = qhull.Delaunay(np.r_[grid, grid]) self._check_barycentric_transforms(tri, err_msg=err_msg, unit_cube=True, unit_cube_tol=2*eps) if not _is_32bit_platform: # test numerically unstable, and reported to fail on 32-bit # installs # Check with larger perturbations np.random.seed(4321) m = (np.random.rand(grid.shape[0]) < 0.2) grid[m,:] += 1000*eps*(np.random.rand(*grid[m,:].shape) - 0.5) tri = qhull.Delaunay(grid) self._check_barycentric_transforms(tri, err_msg=err_msg, unit_cube=True, unit_cube_tol=1500*eps) # Check with yet larger perturbations np.random.seed(4321) m = (np.random.rand(grid.shape[0]) < 0.2) grid[m,:] += 1e6*eps*(np.random.rand(*grid[m,:].shape) - 0.5) tri = qhull.Delaunay(grid) self._check_barycentric_transforms(tri, err_msg=err_msg, unit_cube=True, unit_cube_tol=1e7*eps)
def test_64bit_integer(self): a = scipy.sparse.csr_matrix(array([[2**32+1, 2**32+1], [-2**63+2, 2**63-2]], dtype=np.int64)) if (np.intp(0).itemsize < 8): assert_raises(OverflowError, mmwrite, self.fn, a) else: self.check_exact(a, (2, 2, 4, 'coordinate', 'integer', 'general'))
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None ########################################################################### # TODO: Implement the max pooling forward pass # ########################################################################### N, C, H, W = x.shape HH, WW, stride = pool_param['pool_height'], pool_param[ 'pool_width'], pool_param['stride'] HP = np.intp(1 + (H - HH) / stride) WP = np.intp(1 + (W - WW) / stride) hs = stride * np.arange(HP) ws = stride * np.arange(WP) out = np.zeros((N, C, HP, WP)) for i in range(HP): for j in range(WP): out[:, :, i, j] = np.amax(x[:, :, hs[i]:hs[i] + HH, ws[j]:ws[j] + WW], axis=(2, 3)) ########################################################################### # END OF YOUR CODE # ########################################################################### cache = (x, pool_param) return out, cache
def control_signed(a, b): tp = self.get_numpy_signed_upcast(a, b) if b >= 0: return tp(a)**tp(b) else: inv = tp(a)**tp(-b) if inv == 0: # Overflow return 0 return np.intp(1.0 / inv)
def _unique_internal(data): if len(data.shape) != 1: raise ValueError("_unique_internal currently " "only supports 1D arrays") # Handle the empty array case if data.shape[0] == 0: return (data, np.empty((0,), dtype=np.intp), np.empty((0,), dtype=np.intp), np.empty((0,), dtype=np.intp)) # See numpy's unique1d perm = np.argsort(data, kind='mergesort') # Combine these arrays to save on allocations? aux = np.empty_like(data) mask = np.empty(aux.shape, dtype=np.bool_) inv_idx = np.empty(mask.shape, dtype=np.intp) # Hard code first iteration p = perm[0] aux[0] = data[p] mask[0] = True cumsum = 1 inv_idx[p] = cumsum - 1 counts = [np.intp(0)] for i in range(1, aux.shape[0]): p = perm[i] aux[i] = data[p] d = aux[i] != aux[i - 1] mask[i] = d cumsum += d inv_idx[p] = cumsum - 1 if d: counts.append(np.intp(i)) counts.append(aux.shape[0]) # (uniques, indices, inverse index, counts) return aux[mask], perm[mask], inv_idx, np.diff(np.array(counts))
def _imhistmatch(I_inverse, I_counts, ref_unique, ref_counts): I_unique = empty(len(I_counts), dtype=ref_unique.dtype) ir = intp(0) for iI in range(len(I_counts)): while I_counts[iI] > ref_counts[ir]: ir += 1 I_unique[iI] = ref_unique[ir] return I_unique[I_inverse]
def __init__(self, array, struct_arr_ptr): print "copying data to device" self.data = cuda.to_device(array) self.shape, self.dtype = array.shape, array.dtype cuda.memcpy_htod(int(struct_arr_ptr), numpy.getbuffer(numpy.int32(len(array[0])))) cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.getbuffer(numpy.intp(int(self.data))))
def __init__(self, ar): self.inputs = np.float32(ar[:, 0:4]) self.outputs = np.intp(ar[:, 4:7]) self.classes = np.array( list(map(lambda x: output_to_class(x), self.outputs))) self.count = self.outputs.size self.bias = np.array([]) self.inputs_m = np.matrix(self.inputs) self.outputs_m = np.matrix(self.outputs)
def get_rgba(ptr): #global plotData_d, plot_rgba_d, colorMap_rgba_d get_rgbaKernel(nCol, minVar, maxVar, plotData_d, np.intp(ptr), colorMap_rgba_d, background_d, grid=grid2D_GL, block=block2D_GL)
class DoubleOpStruct: mem_size = 8 + numpy.intp(0).nbytes def __init__(self, array, struct_arr_ptr): self.data = cuda.to_device(array) self.shape, self.dtype = array.shape, array.dtype cuda.memcpy_htod(int(struct_arr_ptr), numpy.int32(array.size)) cuda.memcpy_htod(int(struct_arr_ptr) + 8, numpy.intp(int(self.data))) def __str__(self): return str(cuda.from_device(self.data, self.shape, self.dtype))
def Mapped(array): '''Analog to pycuda.driver.InOut(), but indicates this array is memory mapped to the device space and should not be copied. To simplify coding, Mapped() will pass anything with a gpudata member, like a gpuarray, through unchanged. ''' if hasattr(array, 'gpudata'): return array else: return np.intp(array.base.get_device_pointer())
def stars_mapping(P, stars): ''' Applying affine mapping to stars ''' stars = np.flip(stars, axis=1) stars = np.hstack((stars, np.ones((3, 1)))) cords = np.matmul(P, stars.T).T cords = np.flip(np.delete(cords, -1, axis=1), axis=1) cords = np.intp(np.round(cords.ravel())) return cords
def test_2d(self): def pyfunc(arg): return np.array(arg) cfunc = nrtjit(pyfunc) # A list of tuples got = cfunc([(1, 2), (3, 4)]) self.assertPreciseEqual(got, np.intp([[1, 2], [3, 4]])) got = cfunc([(1, 2.5), (3, 4.5)]) self.assertPreciseEqual(got, np.float64([[1, 2.5], [3, 4.5]])) # A tuple of lists got = cfunc(([1, 2], [3, 4])) self.assertPreciseEqual(got, np.intp([[1, 2], [3, 4]])) got = cfunc(([1, 2], [3.5, 4.5])) self.assertPreciseEqual(got, np.float64([[1, 2], [3.5, 4.5]])) # A tuple of tuples got = cfunc(((1.5, 2), (3.5, 4.5))) self.assertPreciseEqual(got, np.float64([[1.5, 2], [3.5, 4.5]])) got = cfunc(((), ())) self.assertPreciseEqual(got, np.float64(((), ())))
def rf(X, Y_casual, Y_registered, testSet_final): rf1 = randomForestModel() #train for casual rf2 = randomForestModel() #train for registered rf1.fit(X, Y_casual) rf2.fit(X, Y_registered) rf1_Y = np.exp(rf1.predict(testSet_final)) - 1 rf2_Y = np.exp(rf2.predict(testSet_final)) - 1 final_prediction = (rf1_Y + rf2_Y) final_prediction = np.intp( np.around(final_prediction)) #round and convert to integer return final_prediction
def gidx_frm_xycrd(xy_crd, xy_dim): """ Obtain the grid idex from grid xy_cord return 'g_idx', starting from 0. Example for a 3*2 grid 1 | 3 | 5 --------- 0 | 2 | 4 """ g_idx = np.intp((xy_crd[0] - 1) * xy_dim[1] + xy_crd[1] - 1) return g_idx
def _predict(self, xs): ys0 = np.zeros(xs.shape[0]) ys1 = np.zeros(xs.shape[0]) for r in self.regs: r0 = r.r0 r1 = r.r1 ys0 += np.exp(r0.predict(xs[:, r.select])) - 1 ys1 += np.exp(r1.predict(xs[:, r.select])) - 1 ys = np.intp(np.around((ys0 + ys1) * 1.0 / len(self.regs))) ys[ys < 0] = 0 return ys
def check_read(self, example, a, info, dense, over32, over64): with open(self.fn, 'w') as f: f.write(example) assert_equal(mminfo(self.fn), info) if (over32 and (np.intp(0).itemsize < 8)) or over64: assert_raises(OverflowError, mmread, self.fn) else: b = mmread(self.fn) if not dense: b = b.toarray() assert_equal(a, b)
def __init__(self, offsets): # Fix an issue where intp isn't always stored as intp from numpy import intp if offsets.dtype != intp: if offsets.dtype.kind == 'i' and offsets.dtype.itemsize == intp( 0).itemsize: offsets = offsets.view(intp) else: offsets = offsets.astype(intp) super(Intensity, self).__init__(abs(offsets.ravel()).max(), offsets.shape[1]) self.__offsets = offsets
def _assemble(self): if self._assembled: return self._assembled = True mod, sfun, vfun = Mat._lma2csr_cache.get(self.dtype, (None, None, None)) if mod is None: d = {'type': self.ctype} src = _matrix_support_template.render(d).encode('ascii') compiler_opts = [ '-m64', '-Xptxas', '-dlcm=ca', '-Xptxas=-v', '-O3', '-use_fast_math', '-DNVCC' ] mod = SourceModule(src, options=compiler_opts) sfun = mod.get_function('__lma_to_csr') vfun = mod.get_function('__lma_to_csr_vector') sfun.prepare('PPPPPiPii') vfun.prepare('PPPPPiiPiii') Mat._lma2csr_cache[self.dtype] = mod, sfun, vfun for rowmap, colmap in self.sparsity.maps: assert rowmap.iterset is colmap.iterset nelems = rowmap.iterset.size nthread = 128 nblock = (nelems * rowmap.arity * colmap.arity) / nthread + 1 rowmap._to_device() colmap._to_device() offset = self._lmaoffset(rowmap.iterset) * self.dtype.itemsize arglist = [ np.intp(self._lmadata.gpudata) + offset, self._csrdata.gpudata, self._rowptr.gpudata, self._colidx.gpudata, rowmap._device_values.gpudata, np.int32(rowmap.arity) ] if self._is_scalar_field: arglist.extend([ colmap._device_values.gpudata, np.int32(colmap.arity), np.int32(nelems) ]) fun = sfun else: arglist.extend([ np.int32(self.dims[0]), colmap._device_values.gpudata, np.int32(colmap.arity), np.int32(self.dims[1]), np.int32(nelems) ]) fun = vfun _stream.synchronize() fun.prepared_async_call((int(nblock), 1, 1), (nthread, 1, 1), _stream, *arglist)
def computer_skin_probability(self): ''' Every pixel has a mess of probability of being a skin pixel according to their RGB values ''' r = 1.0 * self.image[:, :, 0] g = 1.0 * self.image[:, :, 1] b = 1.0 * self.image[:, :, 2] #print r , g , b im = numpy.intp(1 + floor(r / 8.0) + floor(g / 8.0) * 32 + floor(b / 8.0) * 32 * 32) self.skinProb = skinData[im - 1]
def get_indexer( self, target: AnyArrayLike, method: Optional[str] = None, limit: Optional[int] = None, tolerance: Optional[Any] = None, ) -> np.ndarray: self._check_method(method) if self.is_overlapping: msg = ( "cannot handle overlapping indices; use " "IntervalIndex.get_indexer_non_unique" ) raise InvalidIndexError(msg) target = ensure_index(target) if isinstance(target, IntervalIndex): # equal indexes -> 1:1 positional match if self.equals(target): return np.arange(len(self), dtype="intp") # different closed or incompatible subtype -> no matches common_subtype = find_common_type( [self.dtype.subtype, target.dtype.subtype] ) if self.closed != target.closed or is_object_dtype(common_subtype): return np.repeat(np.intp(-1), len(target)) # non-overlapping -> at most one match per interval in target # want exact matches -> need both left/right to match, so defer to # left/right get_indexer, compare elementwise, equality -> match left_indexer = self.left.get_indexer(target.left) right_indexer = self.right.get_indexer(target.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) elif not is_object_dtype(target): # homogeneous scalar index: use IntervalTree target = self._maybe_convert_i8(target) indexer = self._engine.get_indexer(target.values) else: # heterogeneous scalar index: defer elementwise to get_loc # (non-overlapping so get_loc guarantees scalar of KeyError) indexer = [] for key in target: try: loc = self.get_loc(key) except KeyError: loc = -1 indexer.append(loc) return ensure_platform_int(indexer)
def pytest_runtest_setup(item): mark = _get_mark(item, "xslow") if mark is not None: try: v = int(os.environ.get('SCIPY_XSLOW', '0')) except ValueError: v = False if not v: pytest.skip("very slow test; set environment variable SCIPY_XSLOW=1 to run it") mark = _get_mark(item, 'xfail_on_32bit') if mark is not None and np.intp(0).itemsize < 8: pytest.xfail('Fails on our 32-bit test platform(s): %s' % (mark.args[0],))
def gbm(X, Y_casual, Y_registered, testSet_final): gbm1 = gradientDescentModel() #train for casual gbm2 = gradientDescentModel() #train for registered gbm1.fit(X, Y_casual) gbm2.fit(X, Y_registered) gbm1_Y = np.exp(gbm1.predict(testSet_final)) - 1 gbm2_Y = np.exp(gbm2.predict(testSet_final)) - 1 final_prediction = (gbm1_Y + gbm2_Y) final_prediction = np.intp( np.around(final_prediction)) #round and convert to integer return final_prediction
class YoloInfo: mem_size = 8 * 4 + np.intp(0).nbytes def __init__(self, n_classes, n_anchors, l_obj, l_noobj, anchors, ptr): array = np.asarray(anchors, dtype=np.float32) self.anchors = cuda.to_device(array) cuda.memcpy_htod(int(ptr), np.getbuffer(np.int32(n_classes))) cuda.memcpy_htod(int(ptr) + 8, np.getbuffer(np.int32(n_anchors))) cuda.memcpy_htod(int(ptr) + 16, np.getbuffer(np.float32(l_obj))) cuda.memcpy_htod(int(ptr) + 24, np.getbuffer(np.float32(l_noobj))) cuda.memcpy_htod( int(ptr) + 32, np.getbuffer(np.intp(int(self.anchors))))
def lasso(X, Y_casual, Y_registered, testSet_final): alpha = 0.5 lasso1 = linear_model.Lasso(alpha=alpha) lasso2 = linear_model.Lasso(alpha=alpha) lasso1.fit(X, Y_casual) lasso2.fit(X, Y_registered) lasso1_Y = np.exp(lasso1.predict(testSet_final)) - 1 lasso2_Y = np.exp(lasso2.predict(testSet_final)) - 1 final_prediction = np.intp(np.around(lasso1_Y + lasso2_Y)) return final_prediction
def pytest_runtest_setup(item): mark = _get_mark(item, "xslow") if mark is not None: try: v = int(os.environ.get('SCIPY_XSLOW', '0')) except ValueError: v = False if not v: pytest.skip( "very slow test; set environment variable SCIPY_XSLOW=1 to run it" ) mark = _get_mark(item, 'xfail_on_32bit') if mark is not None and np.intp(0).itemsize < 8: pytest.xfail('Fails on our 32-bit test platform(s): %s' % (mark.args[0], )) # Older versions of threadpoolctl have an issue that may lead to this # warning being emitted, see gh-14441 with npt.suppress_warnings() as sup: sup.filter(pytest.PytestUnraisableExceptionWarning) try: from threadpoolctl import threadpool_limits HAS_THREADPOOLCTL = True except Exception: # observed in gh-14441: (ImportError, AttributeError) # Optional dependency only. All exceptions are caught, for robustness HAS_THREADPOOLCTL = False if HAS_THREADPOOLCTL: # Set the number of openmp threads based on the number of workers # xdist is using to prevent oversubscription. Simplified version of what # sklearn does (it can rely on threadpoolctl and its builtin OpenMP helper # functions) try: xdist_worker_count = int( os.environ['PYTEST_XDIST_WORKER_COUNT']) except KeyError: # raises when pytest-xdist is not installed return if not os.getenv('OMP_NUM_THREADS'): max_openmp_threads = os.cpu_count( ) // 2 # use nr of physical cores threads_per_worker = max( max_openmp_threads // xdist_worker_count, 1) try: threadpool_limits(threads_per_worker, user_api='blas') except Exception: # May raise AttributeError for older versions of OpenBLAS. # Catch any error for robustness. return
def _empty(cls, shape: Shape, dtype: ExtensionDtype): """ Create an ExtensionArray with the given shape and dtype. """ obj = cls._from_sequence([], dtype=dtype) taker = np.broadcast_to(np.intp(-1), shape) result = obj.take(taker, allow_fill=True) if not isinstance(result, cls) or dtype != result.dtype: raise NotImplementedError( f"Default 'empty' implementation is invalid for dtype='{dtype}'" ) return result
class WordRep(object): """ This is used to store representation of word as a path from root to leaf. length: length of the path from root to leaf. code: sequence of {+1, -1} +1 if left child, -1 if right child. point: sequence of indices into syn1 matrix. """ memsize = 8 + np.intp(0).nbytes + np.intp(0).nbytes def __init__(self, code, point, struct_ptr): self.code = cuda.to_device(code) self.point = cuda.to_device(point) self.code_shape, self.code_dtype = code.shape, code.dtype self.point_shape, self.point_dtype = point.shape, point.dtype cuda.memcpy_htod(int(struct_ptr), np.int32(code.size)) cuda.memcpy_htod(int(struct_ptr) + 8, np.intp(int(self.code))) cuda.memcpy_htod( int(struct_ptr) + 8 + np.intp(0).nbytes, np.intp(int(self.point))) def __str__(self): return "len: " + str(self.code_shape) + " code: " + str(cuda.from_device(self.code, self.code_shape, self.code_dtype)) + \ " point: " + str(cuda.from_device(self.point, self.point_shape, self.point_dtype))
def __init__(self, ptrList, struct_ptr, structtype='ARG'): if structtype == 'ARG': cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[0]))) struct_ptr = int (struct_ptr) + 4 cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[1]))) struct_ptr = int (struct_ptr) + 4 cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[2]))) struct_ptr = int (struct_ptr) + DOUBLE_SIZE cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[3]))) struct_ptr = int (struct_ptr) + DOUBLE_SIZE cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[4]))) struct_ptr = int (struct_ptr) + DOUBLE_SIZE cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.int32(ptrList[5]))) struct_ptr = int (struct_ptr) + DOUBLE_SIZE #print np.int32(ptrList[1]) for value in ptrList[6:]: cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.intp(int(value)))) struct_ptr = int (struct_ptr) + np.intp(0).nbytes else: for value in ptrList: cuda.memcpy_htod(int(struct_ptr), np.getbuffer(np.intp(int(value)))) struct_ptr = int (struct_ptr) + np.intp(0).nbytes
def __init__(self, array, ptr): assert (len(array.shape) == 2) if isinstance(array, gpuarray.GPUArray): self.data = array.gpudata else: if array.dtype != np.float32: array = array.astype(np.float32) self.data = cuda.to_device(array) self.shape = array.shape self.dtype = array.dtype cuda.memcpy_htod(int(ptr), np.getbuffer(np.int32(array.shape[0]))) cuda.memcpy_htod(int(ptr) + 8, np.getbuffer(np.int32(array.shape[1]))) cuda.memcpy_htod(int(ptr) + 16, np.getbuffer(np.intp(int(self.data))))
def elasticnet(X, Y_casual, Y_registered, testSet_final): alpha = 0.001 l1_ratio = 0.1 glmnet1 = linear_model.ElasticNetCV() glmnet2 = linear_model.ElasticNetCV() glmnet1.fit(X, Y_casual) glmnet2.fit(X, Y_registered) glmnet1_Y = np.exp(glmnet1.predict(testSet_final)) - 1 glmnet2_Y = np.exp(glmnet2.predict(testSet_final)) - 1 final_prediction = np.intp(np.around(glmnet1_Y + glmnet2_Y)) return final_prediction
def _conv2D(self, d_idata, d_odata, sizes, d_filt, filtlen): logging.debug("conv2D") # convolve the first dimension gdim = (int(iDivUp(sizes[0], self.CONV1_THREAD_SIZE - (filtlen - 1))), sizes[1] * sizes[2]) bdim = (self.CONV1_THREAD_SIZE, 1, 1) self.dev_conv1(d_idata, d_odata, np.int32(sizes[0]), np.intp(d_filt), np.int32(filtlen), block=bdim, grid=gdim) szBytes = self.sizeofFloat * reduce(lambda x, y: x * y, sizes) d_tmp = cuda.mem_alloc(szBytes) cuda.memcpy_dtod(d_tmp, d_idata, szBytes) cuda.memcpy_dtod(d_idata, d_odata, szBytes) cuda.memcpy_dtod(d_odata, d_tmp, szBytes) # convolve the second dimension gdim = (int(iDivUp(sizes[0], self.CONVN_THREAD_SIZE1)), int( iDivUp(sizes[1], self.CONVN_THREAD_SIZE2 - (filtlen - 1)) * sizes[2])) bdim = (self.CONVN_THREAD_SIZE1, self.CONVN_THREAD_SIZE2, 1) self.dev_convn(d_idata, d_odata, np.int32(sizes[0]), np.int32(sizes[1]), np.int32(sizes[0]), np.int32(sizes[0] * sizes[1]), np.int32(sizes[2]), np.intp(d_filt), np.int32(filtlen), block=bdim, grid=gdim)