def components(prof, p): ''' Interpolates the given data to calculate the U and V components at a given pressure Parameters ---------- prof : profile object Profile object p : number, numpy array Pressure (hPa) of a level Returns ------- U and V components at the given pressure (kts) : number, numpy array ''' # Note: numpy's interpolation routine expects the interpolation # routine to be in ascending order. Because pressure decreases in the # vertical, we must reverse the order of the two arrays to satisfy # this requirement. if prof.wdir.count() == 0: # JTS - Fixed a bug where clicking "Interpolate Focused Profile" throws an error for NUCAPS. return ma.masked_where(ma.ones(np.shape(p)), p), ma.masked_where(ma.ones(np.shape(p)), p) U = generic_interp_pres(ma.log10(p), prof.logp[::-1], prof.u[::-1]) V = generic_interp_pres(ma.log10(p), prof.logp[::-1], prof.v[::-1]) return U, V
def trainNB0(trainMatrix, trainCategory): """ 朴素贝叶斯分类器训练函数 :param trainMatrix: 训练集输入向量 :param trainCategory: 每篇文档类别标签所构成的向量 :return: pAbusive:侮辱性文档的概率; 给定文档类别条件下p0:词汇表中正常单词出现的概率,p1:词汇表中侮辱性单词出现的概率 """ numTrainDocs = len(trainMatrix) numWords = len(trainMatrix[0]) # sum函数求和 pAbusive = sum(trainCategory) / float(numTrainDocs) # p0Num, p1Num = zeros(numWords), zeros(numWords) # p0Denom, p1Denom = 0.0, 0.0 # 为防止出现概率为0的情况,将所有词的出现数初始化为1,分母初始化为2 p0Num, p1Num = ones(numWords), ones(numWords) p0Denom, p1Denom = 2.0, 2.0 for i in range(numTrainDocs): if trainCategory[i] == 1: p1Num += trainMatrix[i] p1Denom += sum(trainMatrix[i]) else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) # p1Vect = p1Num/p1Denom # p0Vect = p0Num/p0Denom # 为防止下溢出,太多很小的数相乘,在python里会出现下溢出变为0 # 对每个元素除以该类别中的总词数 p1Vect = log(p1Num / p1Denom) p0Vect = log(p0Num / p0Denom) return p0Vect, p1Vect, pAbusive
def aF_oneway(*args, **kwargs): dim = kwargs.get("dim", None) arrays = args means = [ma.mean(a, dim) for a in arrays] vars = [ma.var(a, dim) for a in arrays] lens = [ ma.sum(ma.array(ma.ones(a.shape), mask=ma.asarray(a).mask), dim) for a in arrays ] alldata = ma.concatenate(arrays, dim if dim is not None else 0) bign = ma.sum(ma.array(ma.ones(alldata.shape), mask=alldata.mask), dim) sstot = ma.sum(alldata**2, dim) - (ma.sum(alldata, dim)**2) / bign ssbn = ma.sum([(ma.sum(a, dim)**2) / L for a, L in zip(arrays, lens)], dim) # print ma.sum(alldata, dim) ** 2 / bign, ssbn ssbn -= ma.sum(alldata, dim)**2 / bign sswn = sstot - ssbn dfbn = dfnum = float(len(args) - 1.0) dfwn = bign - len(args) # + 1.0 F = (ssbn / dfbn) / (sswn / dfwn) if F.ndim == 0 and dfwn.ndim == 0: return (F,scipy.stats.betai(0.5 * dfwn, 0.5 * dfnum, dfwn/float(dfwn+dfnum*F)) if F is not ma.masked and dfwn/float(dfwn+dfnum*F) <= 1.0 \ and dfwn/float(dfwn+dfnum*F) >= 0.0 else ma.masked) else: prob = [scipy.stats.betai(0.5 * dfden, 0.5 * dfnum, dfden/float(dfden+dfnum*f)) if f is not ma.masked and dfden/float(dfden+dfnum*f) <= 1.0 \ and dfden/float(dfden+dfnum*f) >= 0.0 else ma.masked for dfden, f in zip (dfwn, F)] return F, prob
def test_upsample_snow_masked_binary_logic_ones(self): n = 4 size = 2 ones_data = ma.ones((n, n), dtype=np.int16) ones_correct = ma.ones((n/2, n/2), dtype=np.int16) ones_result = lib.upsample_snow(ones_data, lib.masked_binary_logic, size=size) npt.assert_array_equal(ones_correct, ones_result)
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6) assert_(allclose(average(x, axis=0), 2.5)) assert_(allclose(average(x, axis=0, weights=w1), 2.5)) y = array([arange(6), 2.0 * arange(6)]) assert_(allclose(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.)) assert_(allclose(average(y, axis=0), np.arange(6) * 3. / 2.)) assert_(allclose(average(y, axis=1), [average(x, axis=0), average(x, axis=0)*2.0])) assert_(allclose(average(y, None, weights=w2), 20. / 6.)) assert_(allclose(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.])) assert_(allclose(average(y, axis=1), [average(x, axis=0), average(x, axis=0)*2.0])) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_(allclose(average(masked_array(x, m1), axis=0), 2.5)) assert_(allclose(average(masked_array(x, m2), axis=0), 2.5)) assert_(average(masked_array(x, m4), axis=0) is masked) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_(allclose(average(z, None), 20. / 6.)) assert_(allclose(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5])) assert_(allclose(average(z, axis=1), [2.5, 5.0])) assert_(allclose(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])) a = arange(6) b = arange(6) * 3 r1, w1 = average([[a, b], [b, a]], axis=1, returned=True) assert_equal(shape(r1), shape(w1)) assert_equal(r1.shape, w1.shape) r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=True) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), returned=True) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=True) assert_(shape(w2) == shape(r2)) a2d = array([[1, 2], [0, 4]], float) a2dm = masked_array(a2d, [[0, 0], [1, 0]]) a2da = average(a2d, axis=0) assert_(eq(a2da, [0.5, 3.0])) a2dma = average(a2dm, axis=0) assert_(eq(a2dma, [1.0, 3.0])) a2dma = average(a2dm, axis=None) assert_(eq(a2dma, 7. / 3.)) a2dma = average(a2dm, axis=1) assert_(eq(a2dma, [1.5, 4.0]))
def test_testAverage2(self): # More tests of average. w1 = [0, 1, 1, 1, 1, 0] w2 = [[0, 1, 1, 1, 1, 0], [1, 0, 0, 0, 0, 1]] x = arange(6) assert_(allclose(average(x, axis=0), 2.5)) assert_(allclose(average(x, axis=0, weights=w1), 2.5)) y = array([arange(6), 2.0 * arange(6)]) assert_(allclose(average(y, None), np.add.reduce(np.arange(6)) * 3. / 12.)) assert_(allclose(average(y, axis=0), np.arange(6) * 3. / 2.)) assert_(allclose(average(y, axis=1), [average(x, axis=0), average(x, axis=0)*2.0])) assert_(allclose(average(y, None, weights=w2), 20. / 6.)) assert_(allclose(average(y, axis=0, weights=w2), [0., 1., 2., 3., 4., 10.])) assert_(allclose(average(y, axis=1), [average(x, axis=0), average(x, axis=0)*2.0])) m1 = zeros(6) m2 = [0, 0, 1, 1, 0, 0] m3 = [[0, 0, 1, 1, 0, 0], [0, 1, 1, 1, 1, 0]] m4 = ones(6) m5 = [0, 1, 1, 1, 1, 1] assert_(allclose(average(masked_array(x, m1), axis=0), 2.5)) assert_(allclose(average(masked_array(x, m2), axis=0), 2.5)) assert_(average(masked_array(x, m4), axis=0) is masked) assert_equal(average(masked_array(x, m5), axis=0), 0.0) assert_equal(count(average(masked_array(x, m4), axis=0)), 0) z = masked_array(y, m3) assert_(allclose(average(z, None), 20. / 6.)) assert_(allclose(average(z, axis=0), [0., 1., 99., 99., 4.0, 7.5])) assert_(allclose(average(z, axis=1), [2.5, 5.0])) assert_(allclose(average(z, axis=0, weights=w2), [0., 1., 99., 99., 4.0, 10.0])) a = arange(6) b = arange(6) * 3 r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) assert_equal(shape(r1), shape(w1)) assert_equal(r1.shape, w1.shape) r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) assert_(shape(w2) == shape(r2)) a2d = array([[1, 2], [0, 4]], float) a2dm = masked_array(a2d, [[0, 0], [1, 0]]) a2da = average(a2d, axis=0) assert_(eq(a2da, [0.5, 3.0])) a2dma = average(a2dm, axis=0) assert_(eq(a2dma, [1.0, 3.0])) a2dma = average(a2dm, axis=None) assert_(eq(a2dma, 7. / 3.)) a2dma = average(a2dm, axis=1) assert_(eq(a2dma, [1.5, 4.0]))
def default_input_values(shape, param=None): """ purpose: give a matrix set with a default value for a parameter you dont have for flux computation Args: shape (tuple): give the dimension of input field latxlon (e.g 720x1440) param (str): name of the variable you want return: x (list or dict): containing default values to be used in coare3/4 """ vals = {} pwd = os.path.dirname(__file__) fid = open(os.path.join(pwd, 'list_default_values.txt'), 'r') lines = fid.readlines() fid.close() for ll in lines: ll = ll.strip('\n') key = ll.split('=')[0] value = float(ll.split('=')[1]) if key in ['zu', 'zt', 'zq', 'jcool', 'jwave', 'zi']: vals[key] = value elif key == 'lat': vals[key] = value * ma.ones((shape[0], )) else: vals[key] = value * ma.ones(shape) # vals['zu'] = 10.0 # vals['zt'] = 10.0 # vals['zq'] = 10.0 # vals['u'] = vals['u']*ma.ones(shape) # vals['us'] = vals['us']*ma.ones(shape) # vals['ts'] = 20.0*ma.ones(shape) # vals['t'] = 25.0*ma.ones(shape) # vals['Qs'] = 18.0*ma.ones(shape)#g/kg # vals['Q'] = 15.0*ma.ones(shape)#g/kg # vals['Rs'] = 150.0*ma.ones(shape)# ben # vals['Rl'] = 370.0*ma.ones(shape)# ben # vals['zi'] = 600.0 #600.0 since Fairall # vals['rain'] = 0.*ma.ones(shape) # # vals['jcool'] = 0 # # vals['jwave'] = 0 # vals['twave'] = 6.0*ma.ones(shape) # vals['hwave'] = 0.0*ma.ones(shape) # vals['lat'] = 0.*ma.ones((shape[0],)) # vals['P'] = 1008.*ma.ones(shape) # vals['rh'] = 80.0*ma.ones(shape) if param is None: x = vals # x = u,us,ts,t,Qs,Q,Rs,Rl,rain,zi,P,zu,zt,zq,lat,jcool,jwave,twave,hwave else: if param in vals.keys(): x = vals[param] else: raise Exception('%s is not defined.' % param) return x
def __call__(self, target): array = equi_n_discretization(self.array.copy(), intervals=5, dim=0) ind1, ind2 = self.test_indices(target) a1, a2 = array[ind1, :], array[ind2, :] dist1, dist2 = [], [] dist = ma.zeros((array.shape[1], 2, 5)) for i in range(5): dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) dist[:, 0, i] = dist1[-1] dist[:, 1, i] = dist2[-1] return list(zip(self.keys, achisquare_indtest(np.array(dist), dim=1)))
def test_invalid_but_masked(): data1 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data2 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data1.mask = False data1['f'].mask[2] = True data1['f'].data[2] = np.nan data2.mask = False data2['f'].mask[4] = True data2['f'].data[4] = np.inf result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w', join='i') valid = result['w'] != 0 assert np.all(result['f'][valid] == 1), 'Incorrect addition result.'
def _get_warped_array( input_file=None, band_idx=None, dst_bounds=None, dst_shape=None, dst_affine=None, dst_crs=None, resampling="nearest" ): """Extract a numpy array from a raster file.""" LOGGER.debug("read array using rasterio") with rasterio.open(input_file, "r") as src: if dst_crs == src.crs: src_left, src_bottom, src_right, src_top = dst_bounds else: # Return empty array if destination bounds don't intersect with # file bounds. file_bbox = box(*src.bounds) tile_bbox = reproject_geometry( box(*dst_bounds), src_crs=dst_crs, dst_crs=src.crs) if not file_bbox.intersects(tile_bbox): LOGGER.debug("file bounding box does not intersect with tile") return ma.MaskedArray( data=ma.zeros(dst_shape, dtype=src.profile["dtype"]), mask=ma.ones(dst_shape), fill_value=src.nodata) # Reproject tile bounds to source file SRS. src_left, src_bottom, src_right, src_top = transform_bounds( dst_crs, src.crs, *dst_bounds, densify_pts=21) if float('Inf') in (src_left, src_bottom, src_right, src_top): # Maybe not the best way to deal with it, but if bounding box # cannot be translated, it is assumed that data is emtpy LOGGER.debug("tile seems to be outside of input CRS bounds") return ma.MaskedArray( data=ma.zeros(dst_shape, dtype=src.profile["dtype"]), mask=ma.ones(dst_shape), fill_value=src.nodata) # Read data window. window = src.window( src_left, src_bottom, src_right, src_top, boundless=True) start = time.time() src_band = src.read(band_idx, window=window, boundless=True) LOGGER.debug("window read in %ss" % round(time.time() - start, 3)) # Quick fix because None nodata is not allowed. nodataval = 0 if not src.nodata else src.nodata # Prepare reprojected array. dst_band = np.empty(dst_shape, src.dtypes[band_idx-1]) # Run rasterio's reproject(). start = time.time() reproject( src_band, dst_band, src_transform=src.window_transform(window), src_crs=src.crs, src_nodata=nodataval, dst_transform=dst_affine, dst_crs=dst_crs, dst_nodata=nodataval, resampling=RESAMPLING_METHODS[resampling]) LOGGER.debug( "window reprojected in %ss" % round(time.time() - start, 3)) return ma.MaskedArray(dst_band, mask=dst_band == nodataval)
def test_both_masked(): data1 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data2 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data1.mask = False data1['f'].mask[2:4] = True data2.mask = False data2['f'].mask[3:5] = True result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w', join='i') assert not ma.isMA(result), 'Result should not be masked.' valid = result['w'] != 0 assert np.all(result['f'][valid] == 1), 'Incorrect addition result.' assert np.array_equal(result['w'][1:6], (2, 1, 0, 1, 2)),\ 'Mask not used correctly.'
def __call__(self, target): array = equi_n_discretization(self.array.copy(), intervals=5, dim=1) ind1, ind2 = self.test_indices(target) a1, a2 = array[ind1, :], array[ind2, :] dist1, dist2 = [], [] dist = ma.zeros((array.shape[1], 2, 5)) for i in range(5): dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) dist[:, 0, i] = dist1[-1] dist[:, 1, i] = dist2[-1] classinfo = entropy(np.array([len(ind1), len(ind2)])) E = ma.sum(entropy(dist, dim=1) * ma.sum(dist, 1), 1) / ma.sum(ma.sum(dist, 1), 1) return list(zip(self.keys, classinfo - E))
def _create_cube(long_name="", var_name="", units="1", dim_coords_and_dims=[], fill_value=-1): """ Create an iris.cube.Cube given by its dimensions Kwargs: * long_name (string): Long description of the variable * var_name (string): Variable name * units (iris.unit.Unit or string): the unit of the variable * dims_coords_and_dims (list of iris.coords.DimCoord): the dimension of the variable Returns: An 'empty' iris.cube.Cube """ shape = [x[0].shape[0] for x in dim_coords_and_dims] array = ma.ones(shape) * fill_value array.mask = True array.fill_value = fill_value if isinstance(units, str): units = Unit(units) return iris.cube.Cube( array, long_name=long_name, var_name=var_name, units=units, dim_coords_and_dims=dim_coords_and_dims )
def _evals_to_util(self, evals): ''' converts 1d array of hand evals (strength of hands) into 2d utilities (win/loss) for hand i vs hand j as in terms of the chips bet: 1, 0, -1 represents win, tie, loss respectively Parameters ---------- evals : 1d array of hand evals Returns ------- util : 2d np.ma_array of int8 ''' util = ma.ones((len(evals), len(evals)), dtype=np.int8) removed = np.where(evals == -1)[0] util[removed, :] = ma.masked util[:, removed] = ma.masked hands = np.argsort(evals)[len(removed):] better = [] while len(hands) >= 1: ties = np.where(evals == evals[hands[0]])[0] util[np.ix_(ties, ties)] = 0 try: util[np.ix_(ties, better)] = -1 except: pass better += list(ties) hands = hands[len(ties):] return util
def _create_cube(long_name='', var_name='', units='1', dim_coords_and_dims=[], fill_value=-1): """ Create an iris.cube.Cube given by its dimensions Kwargs: * long_name (string): Long description of the variable * var_name (string): Variable name * units (iris.unit.Unit or string): the unit of the variable * dims_coords_and_dims (list of iris.coords.DimCoord): the dimension of the variable Returns: An 'empty' iris.cube.Cube """ shape = [x[0].shape[0] for x in dim_coords_and_dims] array = ma.ones(shape) * fill_value array.mask = True array.fill_value = fill_value if isinstance(units, str): units = Unit(units) return iris.cube.Cube(array, long_name=long_name, var_name=var_name, units=units, dim_coords_and_dims=dim_coords_and_dims)
def __call__(self, target): array = equi_n_discretization(self.array.copy(), intervals=5, dim=1) ind1, ind2 = self.test_indices(target) a1, a2 = array[ind1, :], array[ind2, :] dist1, dist2 = [], [] dist = ma.zeros((array.shape[1], 2, 5)) for i in range(5): dist1.append(ma.sum(ma.ones(a1.shape) * (a1 == i), 0)) dist2.append(ma.sum(ma.ones(a2.shape) * (a2 == i), 0)) dist[:, 0, i] = dist1[-1] dist[:, 1, i] = dist2[-1] classinfo = entropy(np.array([len(ind1), len(ind2)])) E = ma.sum(entropy(dist, dim=1) * ma.sum(dist, 1), 1) / ma.sum( ma.sum(dist, 1), 1) return list(zip(self.keys, classinfo - E))
def mask_rois(self): """ Mask all pixels that are NOT in the user-supplied ROIs. These masked pixels will not be used in the pattern determination or be pattern subtracted. Expect ROI strings will have the format y1:y2 If roimirror == True then apply ROIs to the opposite (top/bottom) half of the detector. """ logger = logging.getLogger('mask_rois') if self.roi: logger.info('Masking around ROIs: %s', self.roi) self.roimask = ma.ones((self.naxis2, self.naxis1)) self.roimask.mask = True for roi in self.roi: r = roi.split(':') if len(r) != 2: logger.error('ROI must have 2 values: y1:y2: %s', roi) raise SystemExit y1 = int(r[0]) - 1 # convert to zero-index y2 = int(r[1]) # zero index +1 because slicing does not include upper limit logger.debug('...%d-%d', y1,y2) # Unmask the ROI: mask[y1:y2,x1:x2] = False self.roimask.mask[y1:y2, ] = False if self.roimirror: y3 = 1024-y2 y4 = 1024-y1 logger.debug('...%d-%d', y3,y4) self.roimask.mask[y3:y4, ] = False # Apply the ROI mask to the science data: self.mdata *= self.roimask return
def _safe_get_mesh(self): x, y, z = self._series.get_meshes() nx = ny = "nb_of_points_" if self._is_parametric: nx += 'u' ny += 'v' else: nx += 'x' ny += 'y' if not x.shape: x = x.max() * ma.ones((getattr(self._series, ny), getattr(self._series, nx))) if not y.shape: y = y.max() * ma.ones((getattr(self._series, ny), getattr(self._series, nx))) if not z.shape: z = z.max() * ma.ones((getattr(self._series, ny), getattr(self._series, nx))) return x, y, z
def returnTableAsMatrix(self, tableName, originColName, destinationColName, skimColName, fillValue=9999): tableRef = self.returnTableReference(tableName) #print 'OLDER IMPLEMENTATION' origin = tableRef.col(originColName) destination = tableRef.col(destinationColName) skims = tableRef.col(skimColName) skimsValues = tableRef[0:] # Initialize matrix skimsMatrix = ones((max(origin) + 1, max(destination) + 1)) * fillValue # Populate matrix skimsMatrix[origin, destination] = skims #skimsMatrix = masked_equal(skimsMatrix, 9999) print 'Skims Values for O,D pair (1226, 896) and (1538, 1562)- ', skimsMatrix[ 1226, 896], skimsMatrix[1538, 1562] #raw_input() return masked_equal(skimsMatrix, 9999), unique(origin)
def test_known(self): dims = self.Blocks[0].dims data = ma.ones(dims) * sp.arange(1, dims[-1] + 1) data[0:-1:2, 0, 0, :] = 2 * sp.arange(1, dims[-1] + 1) for Data in self.Blocks: Data.data = data var = tools.calc_time_var_file(self.Blocks, 0, 0) self.assertTrue(sp.allclose(var.filled(), sp.arange(1, dims[-1] + 1) ** 2 / 4.0))
def log_linear_vinterp(T,P,levs): ''' # Author Charles Doutriaux # Version 1.1 # Expect 2D field here so there''s no reorder which I suspect to do a memory leak # email: [email protected] # Converts a field from sigma levels to pressure levels # Log linear interpolation # Input # T : temperature on sigma levels # P : pressure field from TOP (level 0) to BOTTOM (last level) # levs : pressure levels to interplate to (same units as P) # Output # t : temperature on pressure levels (levs) # External: Numeric''' import numpy.ma as MA ## from numpy.oldnumeric.ma import ones,Float,greater,less,logical_and,where,equal,log,asarray,Float16 sh=P.shape nsigma=sh[0] # Number of sigma levels try: nlev=len(levs) # Number of pressure levels except: nlev=1 # if only one level len(levs) would breaks t=[] for ilv in range(nlev): # loop through pressure levels try: lev=levs[ilv] # get value for the level except: lev=levs # only 1 level passed # print ' ......... level:',lev Pabv=MA.ones(P[0].shape,Numeric.Float) Tabv=-Pabv # Temperature on sigma level Above Tbel=-Pabv # Temperature on sigma level Below Pbel=-Pabv # Pressure on sigma level Below Pabv=-Pabv # Pressure on sigma level Above for isg in range(1,nsigma): # loop from second sigma level to last one ## print 'Sigma level #',isg a = MA.greater(P[isg], lev) # Where is the pressure greater than lev b = MA.less(P[isg-1],lev) # Where is the pressure less than lev # Now looks if the pressure level is in between the 2 sigma levels # If yes, sets Pabv, Pbel and Tabv, Tbel Pabv=MA.where(MA.logical_and(a,b),P[isg],Pabv) # Pressure on sigma level Above Tabv=MA.where(MA.logical_and(a,b),T[isg],Tabv) # Temperature on sigma level Above Pbel=MA.where(MA.logical_and(a,b),P[isg-1],Pbel) # Pressure on sigma level Below Tbel=MA.where(MA.logical_and(a,b),T[isg-1],Tbel) # Temperature on sigma level Below # end of for isg in range(1,nsigma) # val=where(equal(Pbel,-1.),Pbel.missing_value,lev) # set to missing value if no data below lev if there is tl=MA.masked_where(MA.equal(Pbel,-1.),MA.log(lev/MA.absolute(Pbel))/MA.log(Pabv/Pbel)*(Tabv-Tbel)+Tbel) # Interpolation t.append(tl) # add a level to the output # end of for ilv in range(nlev) return asMA(t).astype(Numeric.Float32) # convert t to an array
def test_masked_weighted_invalid(): data_in = ma.ones((10,), dtype=[('x', float), ('y', float)]) data_in['x'][2] = np.inf data_in['x'].mask[2] = True data_in['y'][7] = np.nan data_in['y'].mask[7] = True data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def applyCityfilter(sar, L8_maskCity): r, c = sar.shape result = sar mask = np.ones((r, c)) for i in range(0, r): for j in range(0, c): if (L8_maskCity[i, j] != 0): mask[i, j] = 0 result = result * mask return result, mask
def generic_interp_hght(h, hght, field, log=False): ''' Generic interpolation routine Parameters ---------- h : number, numpy array Height (m) of the level for which pressure is desired hght : numpy array The array of heights field : numpy array The variable which is being interpolated log : bool Flag to determine whether the 'field' variable is in log10 space Returns ------- Value of the 'field' variable at the given height : number, numpy array ''' if field.count() == 0 or hght.count() == 0: return ma.masked_where(ma.ones(np.shape(h)), h) # JTS if ma.isMaskedArray(hght): # Multiplying by ones ensures that the result is an array, not a single value ... which # happens sometimes ... >.< not_masked1 = ~hght.mask * np.ones(hght.shape, dtype=bool) else: not_masked1 = np.ones(hght.shape) if ma.isMaskedArray(field): not_masked2 = ~field.mask * np.ones(field.shape, dtype=bool) else: not_masked2 = np.ones(field.shape) not_masked = not_masked1 * not_masked2 field_intrp = np.interp(h, hght[not_masked], field[not_masked], left=ma.masked, right=ma.masked) if hasattr(h, 'shape') and h.shape == tuple(): h = h[()] # Another bug fix: np.interp() returns masked values as nan. We want ma.masked, dangit! field_intrp = ma.where(np.isnan(field_intrp), ma.masked, field_intrp) # ma.where() returns a 0-d array when the arguments are floats, which confuses subsequent code. if hasattr(field_intrp, 'shape') and field_intrp.shape == tuple(): field_intrp = field_intrp[()] if log: return 10**field_intrp else: return field_intrp
def trainNB1(trainMatrix, trainCategory): numTrainDocs = len(trainMatrix) numWord = len(trainMatrix[0]) pAbusive = sum(trainCategory) / float(numTrainDocs) p0Num = ones(numWord) p1Num = ones(numWord) p0Denom = 2.0 p1Denom = 2.0 for i in range(numTrainDocs): if trainCategory[i] == 1: p1Num += trainMatrix[i] p1Denom += sum(trainMatrix[i]) else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) p0Vect = log(p0Num / p0Denom) p1Vect = log(p1Num / p1Denom) return p0Vect, p1Vect, pAbusive
def test_masked_unweighted(): data_in = ma.ones((10,), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2) assert ma.isMA(data_out) assert np.array_equal(data_out, data_in[:5]) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2) assert np.array_equal(data_out, data_in[:5])
def test_missing_required_weight(): data1 = ma.ones((10,), dtype=[('f', float)]) data2 = np.ones((10,), dtype=[('f', float)]) data1.mask = False data1['f'].mask[2:4] = True with pytest.raises(ValueError): accumulate(data1_in=data1, data2_in=data2, add='f') with pytest.raises(ValueError): accumulate(data1_in=data2, data2_in=data1, add='f')
def stocGradAscent0(dataMatrix, classLabels): m, n = shape(dataMatrix) alpha = 0.01 weights = ones(n) for i in range(m): h = sigmoid(sum(dataMatrix[i] * weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] return weights
def test_masked_weighted_invalid(): data_in = ma.ones((10, ), dtype=[('x', float), ('y', float)]) data_in['x'][2] = np.inf data_in['x'].mask[2] = True data_in['y'][7] = np.nan data_in['y'].mask[7] = True data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def test_known2(self): dims = self.Blocks[0].dims data = ma.ones(dims) * sp.arange(1, dims[-1] + 1) ii = 0.0 for Data in self.Blocks: ii += 1.0 Data.data = data * ii var = tools.calc_time_var_file(self.Blocks, 0, 0) self.assertTrue(sp.allclose(var.filled(), sp.arange(1, dims[-1] + 1) ** 2 / 4.0))
def trainNB0(trainMatrix, trainCategory): numTrainDocs = len(trainMatrix) # 总的训练文档数 numWords = len(trainMatrix[0]) # 词表的长度 pAbusive = sum(trainCategory) / float( numTrainDocs) # 伯努利模型先验概率 类型为1的文档 / 总训练文档 p0Num = ones(numWords) p1Num = ones(numWords) p0Denom = 2.0 p1Denom = 2.0 for i in range(numTrainDocs): # 伯努利模型的实现 if trainCategory[i] == 1: p1Num += trainMatrix[i] # 统计该类别下每个单词出现过的文档数 else: p0Num += trainMatrix[i] p0Denom += (numTrainDocs - sum(trainCategory)) p1Denom += sum(trainCategory) p1Vect = log(p1Num / p1Denom) # 伯努利模型条件概率 包含单词的文档数+1 / 类下文档总数+2 p0Vect = log(p0Num / p0Denom) return p0Vect, p1Vect, pAbusive
def test_one_masked(): data1 = ma.ones((10,), dtype=[('f', float), ('w', float)]) data2 = np.ones((10,), dtype=[('f', float), ('w', float)]) data1.mask = False data1['f'].mask[2] = True result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w') assert not ma.isMA(result), 'Result should not be masked.' assert np.all(result['f'] == 1), 'Incorrect addition result.' assert np.array_equal(result['w'][1:4], (2, 1, 2)),\ 'Mask not used correctly.'
def test_known(self): dims = self.Blocks[0].dims data = ma.ones(dims) * sp.arange(1, dims[-1] + 1) data[0:-1:2, 0, 0, :] = 2 * sp.arange(1, dims[-1] + 1) for Data in self.Blocks: Data.data = data var = tools.calc_time_var_file(self.Blocks, 0, 0) self.assertTrue( sp.allclose(var.filled(), sp.arange(1, dims[-1] + 1)**2 / 4.0))
def trainNB0(trainMatrix, trainLabels): numTrainDocs = len(trainMatrix) numWords = len(trainMatrix[0]) p0Num = ones(numWords) p1Num = ones(numWords) p0Denom = 2.0 p1Denom = 2.0 for i in range(numTrainDocs): if trainLabels[i] == 1: # 这里计算的p(w|c) 用的是类别中word w 出现的总数 除以类别 c 中词的总数,也就是将类别中所有的词作为一个大词袋 # 当然也可以以 文档为单位计算,那么p(w=i|c)= (c中w=i的文档数)/(c 中文档总数) p1Denom += sum(trainMatrix[i]) p1Num += trainMatrix[i] else: p0Num += trainMatrix[i] p0Denom += sum(trainMatrix[i]) p0Vec = log(p0Num / p0Denom) p1Vec = log(p1Num / p1Denom) pAbusive = sum(trainLabels) / len(trainLabels) return p0Vec, p1Vec, pAbusive
def test_known2(self): dims = self.Blocks[0].dims data = ma.ones(dims) * sp.arange(1, dims[-1] + 1) ii = 0.0 for Data in self.Blocks: ii += 1.0 Data.data = data * ii var = tools.calc_time_var_file(self.Blocks, 0, 0) self.assertTrue( sp.allclose(var.filled(), sp.arange(1, dims[-1] + 1)**2 / 4.0))
def graAscent02(dataMatIn, labelMatIn): dataMatrix = array(dataMatIn) m, n = shape(dataMatIn) alpha = 0.01 weights = ones(n) for i in range(m): h = sigmod(sum(dataMatrix[i] * weights)) err = labelMatIn[i] - h offset = alpha * err * dataMatrix[i] weights = weights + offset return weights
def generic_interp_pres(p, pres, field): ''' Generic interpolation routine Parameters ---------- p : number, numpy array Pressure (hPa) of the level for which the field variable is desired pres : numpy array The array of pressure field : numpy array The variable which is being interpolated log : bool Flag to determine whether the 'field' variable is in log10 space Returns ------- Value of the 'field' variable at the given pressure : number, numpy array ''' if field.count() == 0 or pres.count() == 0: return ma.masked_where(ma.ones(np.shape(p)), p) # JTS if ma.isMaskedArray(pres): not_masked1 = ~pres.mask * np.ones(pres.shape, dtype=bool) else: not_masked1 = np.ones(pres.shape, dtype=bool) not_masked1[:] = True if ma.isMaskedArray(field): not_masked2 = ~field.mask * np.ones(field.shape, dtype=bool) else: not_masked2 = np.ones(field.shape, dtype=bool) not_masked2[:] = True not_masked = not_masked1 * not_masked2 field_intrp = np.interp(p, pres[not_masked], field[not_masked], left=ma.masked, right=ma.masked) if hasattr(p, 'shape') and p.shape == tuple(): p = p[()] # Another bug fix: np.interp() returns masked values as nan. We want ma.masked, dangit! field_intrp = ma.where(np.isnan(field_intrp), ma.masked, field_intrp) # ma.where() returns a 0-d array when the arguments are floats, which confuses subsequent code. if hasattr(field_intrp, 'shape') and field_intrp.shape == tuple(): field_intrp = field_intrp[()] return field_intrp
def test_masked_weighted(): data_in = ma.ones((10,), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2, weight='y') assert ma.isMA(data_out) assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == 2.) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def test_masked_weighted(): data_in = ma.ones((10, ), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2, weight='y') assert ma.isMA(data_out) assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == 2.) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def graAscent01(dataMatIn, labelMatIn): dataMatrix = mat(dataMatIn) labelMatrix = mat(labelMatIn).transpose() m, n = shape(dataMatrix) alpha = 0.0001 maxCycle = 500 weights = ones((n, 1)) for k in range(maxCycle): h = sigmod(dataMatrix * weights) err = labelMatrix - h weights = weights + alpha * dataMatrix.transpose() * err return weights
def stocGradAscent1(dataMatrix, classLabels, numIter=150): m, n = shape(dataMatrix) weights = ones(n) for j in range(numIter): dataIndex = range(m) for i in range(m): alpha = 0.01 + 4 / (1.0 + j + i) randIndex = int(random.uniform(0, len(dataIndex))) h = sigmoid(sum(dataMatrix[randIndex] * weights)) error = classLabels[randIndex] - h weights = weights + alpha * error * dataMatrix[randIndex] return weights
def test_propagated_array_mask(): wlen = np.arange(10) flux = ma.ones((10,)) flux.mask = False flux[2] = ma.masked result = redshift(z_in=0, z_out=1, rules=[ {'name': 'wlen', 'exponent': +1, 'array_in': wlen}, {'name': 'flux', 'exponent': -1, 'array_in': flux}]) assert ma.isMA(result) assert not result['wlen'].mask[2], 'Input mask not propagated.' assert result['flux'].mask[2], 'Input mask not propagated.' assert not result['flux'].mask[3], 'Input mask not propagated.'
def test_propagated_data_mask(): data_in = ma.ones((10,), dtype=[ ('wlen', float), ('flux', float), ('extra', int)]) data_in['wlen'][1] = ma.masked data_in['extra'][2] = ma.masked result = redshift(z_in=0, z_out=1, data_in=data_in, rules=[ {'name': 'wlen', 'exponent': +1}, {'name': 'flux', 'exponent': -1}]) assert ma.isMA(result) assert not result['wlen'].mask[0], 'Input mask not propagated.' assert not result['flux'].mask[0], 'Input mask not propagated.' assert result['wlen'].mask[1], 'Input mask not propagated.' assert result['extra'].mask[2], 'Input mask not propagated.'
def setUp(self) : self.shape = (5, 5, 20) self.ntime = 15 self.map = ma.zeros(self.shape, dtype=float) self.noise_inv = sp.zeros(self.shape, dtype=float) self.data = ma.ones((self.ntime, self.shape[-1])) self.centre = (90.0,0.0,100.0) self.spacing = 1.0 self.ra = 0.2*(sp.arange(self.ntime)-self.ntime/2.0) + self.centre[0] self.dec = 0.2*(sp.arange(self.ntime)-self.ntime/2.0) + self.centre[1] self.ra_inds = tools.calc_inds(self.ra, self.centre[0], self.shape[0], self.spacing) self.dec_inds = tools.calc_inds(self.dec, self.centre[1], self.shape[1], self.spacing)
def test_scales_rebined(self) : self.Data.set_data(ma.ones((10,4,2,600))) self.Data.verify() rebin_freq.rebin(self.Data, 1.0) DataCopy = copy.deepcopy(self.Data) DataCopy.calc_freq() calibrate.multiply_by_cal(self.Data, self.CalData) gains = sp.array([1,sp.sqrt(5),sp.sqrt(5),5]) gains.shape = (1,4,1,1) expected = (DataCopy.data*gains*DataCopy.freq) # don't expect this test to be perfect, 4 figures good enough. Edge # bins adversly affected by missing frequencies in CalData. self.assertTrue(ma.allclose(expected[:,:,:1:-1], self.Data.data[:,:,:1:-1], 4))
def graAscent03(dataMatIn, labelMatIn): dataMatrix = array(dataMatIn) m, n = shape(dataMatIn) weights = ones(n) numIter = 15 for j in range(numIter): dataIndex = range(m) for i in range(m): alpha = 4.0 / (i + j + 1.0) + 0.001 randIndex = int(random.uniform(0, len(dataIndex))) h = sigmod(sum(dataMatrix[randIndex] * weights)) err = labelMatIn[randIndex] - h weights = weights + alpha * err * dataMatrix[randIndex] return weights
def aF_oneway(*args, **kwargs): dim = kwargs.get("dim", None) arrays = args means = [ma.mean(a, dim) for a in arrays] vars = [ma.var(a, dim) for a in arrays] lens = [ma.sum(ma.array(ma.ones(a.shape), mask=ma.asarray(a).mask), dim) for a in arrays] alldata = ma.concatenate(arrays, dim if dim is not None else 0) bign = ma.sum(ma.array(ma.ones(alldata.shape), mask=alldata.mask), dim) sstot = ma.sum(alldata ** 2, dim) - (ma.sum(alldata, dim) ** 2) / bign ssbn = ma.sum([(ma.sum(a, dim) ** 2) / L for a, L in zip(arrays, lens)], dim) # print ma.sum(alldata, dim) ** 2 / bign, ssbn ssbn -= ma.sum(alldata, dim) ** 2 / bign sswn = sstot - ssbn dfbn = dfnum = float(len(args) - 1.0) dfwn = bign - len(args) # + 1.0 F = (ssbn / dfbn) / (sswn / dfwn) if F.ndim == 0 and dfwn.ndim == 0: return (F,scipy.stats.betai(0.5 * dfwn, 0.5 * dfnum, dfwn/float(dfwn+dfnum*F)) if F is not ma.masked and dfwn/float(dfwn+dfnum*F) <= 1.0 \ and dfwn/float(dfwn+dfnum*F) >= 0.0 else ma.masked) else: prob = [scipy.stats.betai(0.5 * dfden, 0.5 * dfnum, dfden/float(dfden+dfnum*f)) if f is not ma.masked and dfden/float(dfden+dfnum*f) <= 1.0 \ and dfden/float(dfden+dfnum*f) >= 0.0 else ma.masked for dfden, f in zip (dfwn, F)] return F, prob
def test_testInplace(self): # Test of inplace operations and rich comparisons y = arange(10) x = arange(10) xm = arange(10) xm[2] = masked x += 1 assert_(eq(x, y + 1)) xm += 1 assert_(eq(x, y + 1)) x = arange(10) xm = arange(10) xm[2] = masked x -= 1 assert_(eq(x, y - 1)) xm -= 1 assert_(eq(xm, y - 1)) x = arange(10) * 1.0 xm = arange(10) * 1.0 xm[2] = masked x *= 2.0 assert_(eq(x, y * 2)) xm *= 2.0 assert_(eq(xm, y * 2)) x = arange(10) * 2 xm = arange(10) xm[2] = masked x //= 2 assert_(eq(x, y)) xm //= 2 assert_(eq(x, y)) x = arange(10) * 1.0 xm = arange(10) * 1.0 xm[2] = masked x /= 2.0 assert_(eq(x, y / 2.0)) xm /= arange(10) assert_(eq(xm, ones((10,)))) x = arange(10).astype(np.float32) xm = arange(10) xm[2] = masked x += 1. assert_(eq(x, y + 1.))
def running_mean(a, window): """ perform a running mean on a, with window "window" the window / 2 values at the beginning and end are masked """ shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) strides = a.strides + (a.strides[-1],) xx = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) xa = ma.masked_values(ma.ones(a.shape),1.) xx = np.mean(xx, -1) xa[(window/2):(len(xx)+(window/2)),...] = xx return xa
def achisquare_indtest(observed, dim=None): if observed.ndim == 2: observed = ma.array([observed]) if dim is not None: dim += 1 if dim is None: dim = observed.ndim - 2 rowtotal = ma.sum(observed, dim + 1) coltotal = ma.sum(observed, dim) total = ma.sum(rowtotal, dim) ones = ma.array(ma.ones(observed.shape)) expected = ones * rowtotal.reshape(rowtotal.shape[:dim] + (-1, 1)) a = ones * coltotal[..., np.zeros(observed.shape[dim], dtype=int),:] expected = expected * (a) / total.reshape((-1, 1, 1)) chisq = ma.sum(ma.sum((observed - expected) ** 2 / expected, dim + 1), dim) return chisq
def returnTableAsMatrix(self, tableName, originColName, destinationColName, skimColName, fileLoc, fillValue=9999): data = load("%s/%s.npy" % (fileLoc, tableName)) origin = data[:, 0].astype(int) destination = data[:, 1].astype(int) skims = data[:, 2] # print origin[:5] # Initialize matrix skimsMatrix = ones((max(origin) + 1, max(destination) + 1)) * fillValue # Populate matrix skimsMatrix[origin, destination] = skims # skimsMatrix = masked_equal(skimsMatrix, 9999) return masked_equal(skimsMatrix, 9999), unique(origin)
def test_known_with_masked(self): dims = self.Blocks[0].dims data = ma.ones(dims) * sp.arange(1, dims[-1] + 1) data[0:-1:2, 0, 0, :] = 2 * sp.arange(1, dims[-1] + 1) for Data in self.Blocks: Data.data = data self.Blocks[0].data[2, 0, 0, 43] = ma.masked self.Blocks[0].data[7, 0, 0, 43] = ma.masked self.Blocks[1].data[4, 0, 0, 43] = ma.masked self.Blocks[1].data[3, 0, 0, 43] = ma.masked self.Blocks[0].data[:, 0, 0, 103] = ma.masked self.Blocks[1].data[:, 0, 0, 103] = ma.masked self.Blocks[0].data[:, 0, 0, 554] = ma.masked self.Blocks[1].data[1:, 0, 0, 554] = ma.masked var = tools.calc_time_var_file(self.Blocks, 0, 0) expected = ma.arange(1, dims[-1] + 1) ** 2 / 4.0 expected[103] = ma.masked expected[554] = ma.masked self.assertTrue(sp.allclose(var.filled(-1), expected.filled(-1)))