def get_T63_landseamask(shift_lon, mask_antarctica=True, area='land'): """ get JSBACH T63 land sea mask the LS mask is read from the JSBACH init file area : str ['land','ocean']: When 'land', then the mask returned is True on land pixels, for ocean it is vice versa. In any other case, you get a valid field everywhere (globally) mask_antarctica : bool if True, then the mask is FALSE over Antarctica (<60S) """ ls_file = get_data_pool_directory() \ + 'variables/land/land_sea_mask/jsbach_T63_GR15_4tiles_1992.nc' ls_mask = Data(ls_file, 'slm', read=True, label='T63 land-sea mask', lat_name='lat', lon_name='lon', shift_lon=shift_lon) if area == 'land': msk = ls_mask.data > 0. elif area == 'ocean': msk = ls_mask.data == 0. else: msk = np.ones(ls_mask.data.shape).astype('bool') ls_mask.data[~msk] = 0. ls_mask.data[msk] = 1. ls_mask.data = ls_mask.data.astype('bool') if mask_antarctica: ls_mask.data[ls_mask.lat < -60.] = False return ls_mask
def setUp(self): D = Data(None, None) D.data = np.random.random((10, 20)) lon = np.arange(-10.,10.) # -10 ... 9 lat = np.arange(-60., 50., 2.) # -60 ... 48 D.lon, D.lat = np.meshgrid(lon, lat) self.x = D
def test_read_binary_subset_int(self): # INT16 = H fname = tempfile.mktemp() f = open(fname, 'w') ref = (self.x*10).astype('int16') f.write(ref) f.close() D = Data(None, None) f = open(fname, 'r') ny, nx = self.x.shape nt = 1 # test 1: read entire file file_content = D._read_binary_subset2D(f, 2, ny=ny, nx=nx, xbeg=0, xend=nx, ybeg=0, yend=ny) d = np.reshape(np.asarray(struct.unpack('H'*ny*nx*nt, file_content)), (ny, nx)) self.assertTrue(np.all(d-ref == 0.)) # test 2: read subset with 1-values only ny1 = self.ymax - self.ymin nx1 = self.xmax - self.xmin nt1 = 1 file_content = D._read_binary_subset2D(f, 2, ny=ny, nx=nx, xbeg=self.xmin, xend=self.xmax, ybeg=self.ymin, yend=self.ymax) d1 = np.reshape(np.asarray(struct.unpack('H'*ny1*nx1*nt1, file_content)), (ny1, nx1)) self.assertTrue(np.all(d1 - ref[self.ymin:self.ymax, self.xmin:self.xmax] == 0.))
def test_read_binary_subset_Data_int(self): # binary data from subset in Data object # write binary test data fname = tempfile.mktemp() f = open(fname, 'w') tmp = (np.random.random(self.x.shape) * 100.).astype('int16') f.write(tmp) f.close() D = Data(None, None) D.filename = fname ny, nx = self.x.shape latmin = self.lat[self.ymin] latmax = self.lat[self.ymax] lonmin = self.lon[self.xmin] lonmax = self.lon[self.xmax] D._read_binary_file(nt=1, dtype='int16', latmin=latmin, latmax=latmax, lonmin=lonmin, lonmax=lonmax, lat=self.lat, lon=self.lon) self.assertTrue( np.all( D.data - tmp[self.ymin:self.ymax + 1, self.xmin:self.xmax + 1] == 0.))
def test_mean_model(): #The following code provides a routine that allows to validate the MeanModel() class print ('Jetzt gehts los') # generate some sample data --- x = Data(None, None) x.data = np.random.random((10,20,30)) x.label='nothing' y = x.mulc(0.3) z = x.mulc(0.5) m = x.add(y).add(z).divc(3.) r = m.div(x) # gives 0.6 as reference solution # generate Model instances and store Data objects as 'variables' --- dic_variables = ['var1', 'var2'] X = Model(None, dic_variables, name='x', intervals='season') X.variables = {'var1': x, 'var2': x} Y = Model(None, dic_variables, name='y', intervals='season') Y.variables = {'var1': y, 'var2': y} Z = Model(None, dic_variables, name='z', intervals='season') Z.variables={'var1': z, 'var2': z} #... now try multimodel ensemble M=MeanModel(dic_variables,intervals='season') M.add_member(X) M.add_member(Y) M.add_member(Z) M.ensmean() # calculate ensemble mean # print M.variables['var2'].div(x).data #should give 0.6 npt.assert_equal(np.all(np.abs(1. - M.variables['var2'].div(x).data/0.6) < 0.00000001), True)
def setUp(self): D = Data(None, None) tmp = np.random.random((55, 20)) D.data = np.ma.array(tmp, mask=tmp != tmp) lon = np.arange(-10., 10.) # -10 ... 9 lat = np.arange(-60., 50., 2.) # -60 ... 48 LON, LAT = np.meshgrid(lon, lat) D.lon = np.ma.array(LON, mask=LON != LON) D.lat = np.ma.array(LAT, mask=LAT != LAT) self.x = D
def setUp(self): D = Data(None, None) tmp = np.random.random((55, 20)) D.data = np.ma.array(tmp, mask=tmp!=tmp) lon = np.arange(-10.,10.) # -10 ... 9 lat = np.arange(-60., 50., 2.) # -60 ... 48 LON, LAT = np.meshgrid(lon, lat) D.lon = np.ma.array(LON, mask=LON!=LON) D.lat = np.ma.array(LAT, mask=LAT!=LAT) self.x = D
def test_read_full_binary_file_double(self): # write binary test data fname = tempfile.mktemp() f = open(fname, 'w') f.write(self.x) f.close() D = Data(None, None) D.filename = fname ny, nx = self.x.shape D._read_binary_file(ny=ny, nx=nx, nt=1, dtype='double') self.assertTrue(np.all(D.data - self.x == 0.))
def test_read_full_binary_file_double(self): # write binary test data fname = tempfile.mktemp() f = open(fname, 'w') f.write(self.x) f.close() D = Data(None, None) D.filename = fname ny, nx = self.x.shape D._read_binary_file(ny=ny, nx=nx, nt=1, dtype='double') self.assertTrue(np.all(D.data-self.x == 0.))
def get_rainfall_data(self, interval='season'): """ get rainfall data for JSBACH returns Data object """ if interval == 'season': pass else: raise ValueError('Invalid value for interval: %s' % interval) #/// PREPROCESSING: seasonal means /// s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] filename1 = self.data_dir + self.experiment + '_echam6_BOT_mm_1980_sel.nc' tmp = pyCDO(filename1, s_start_time, s_stop_time).seldate() tmp1 = pyCDO(tmp, s_start_time, s_stop_time).seasmean() filename = pyCDO(tmp1, s_start_time, s_stop_time).yseasmean() #/// READ DATA /// #1) land / sea mask ls_mask = get_T63_landseamask(self.shift_lon) #2) precipitation data try: v = 'var4' rain = Data(filename, v, read=True, scale_factor=86400., label='MPI-ESM ' + self.experiment, unit='mm/day', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data) except: v = 'var142' rain = Data(filename, v, read=True, scale_factor=86400., label='MPI-ESM ' + self.experiment, unit='mm/day', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data) return rain
def setUp(self): #init Data object for testing n=4 #slows down significantly! constraint is percentile test x = sc.randn(n)*100. #generate dummy data self.D = Data(None,None) d=np.ones((n,1,2)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data,mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') - 1
def test_rasterize_init(self): x = Data(None, None) x._init_sample_object(ny=1, nx=272) x.lon = np.random.random(272) * 10. + 5. # 5 ... 15 x.lat = np.random.random(272) * 20. + 0. # 0 ... 20 lon = np.random.random((10, 20)) lat = np.random.random((30, 20)) with self.assertRaises(ValueError): x._rasterize(lon, lat, radius=0.1) lon = np.random.random((10, 20)) lat = np.random.random((10, 20)) with self.assertRaises(ValueError): x._rasterize(lon, lat, radius=None)
def get_tree_fraction(self, interval='season'): """ todo implement this for data from a real run !!! """ if interval != 'season': raise ValueError( 'Other temporal sampling than SEASON not supported yet for JSBACH BOT files, sorry' ) ls_mask = get_T63_landseamask(self.shift_lon) filename = '/home/m300028/shared/dev/svn/trstools-0.0.1/lib/python/pyCMBS/framework/external/vegetation_benchmarking/VEGETATION_COVER_BENCHMARKING/example/historical_r1i1p1-LR_1850-2005_forest_shrub.nc' v = 'var12' tree = Data(filename, v, read=True, label='MPI-ESM tree fraction ' + self.experiment, unit='-', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data, start_time=pl.num2date(pl.datestr2num('2001-01-01')), stop_time=pl.num2date(pl.datestr2num('2001-12-31'))) return tree
def get_albedo_data(self, interval='season'): """ get albedo data for JSBACH returns Data object """ if interval != 'season': raise ValueError( 'Other temporal sampling than SEASON not supported yet for JSBACH BOT files, sorry' ) v = 'var176' filename = self.data_dir + 'data/model1/' + self.experiment + '_echam6_BOT_mm_1979-2006_albedo_yseasmean.nc' ls_mask = get_T63_landseamask(self.shift_lon) albedo = Data(filename, v, read=True, label='MPI-ESM albedo ' + self.experiment, unit='-', lat_name='lat', lon_name='lon', shift_lon=self.shift_lon, mask=ls_mask.data.data) return albedo
def setUp(self): n=1000 # slows down significantly! constraint is percentile test x = sc.randn(n)*100. # generate dummy data self.D = Data(None, None) d=np.ones((n, 1, 1)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data, mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.label = 'testlabel' self.D.filename = 'testinputfilename.nc' self.D.varname = 'testvarname' self.D.long_name = 'This is the longname' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') self.D.time_str = "days since 0001-01-01 00:00:00" self.D.calendar = 'gregorian' self.D.oldtime=False # generate dummy Model object data_dir = './test/' varmethods = {'albedo':'get_albedo()', 'sis': 'get_sis()'} self.model = models.Model(data_dir, varmethods, name='testmodel', intervals='monthly') sis = self.D.copy() sis.mulc(5., copy=False) sis.label='sisdummy' alb = self.D.copy() alb.label='albedodummy' # add some dummy data variable self.model.variables = {'albedo':alb, 'sis':sis}
def get_temperature_2m(self, interval=None): """ return data object of a) seasonal means for air temperature b) global mean timeseries for TAS at original temporal resolution """ print 'Needs revision to support CMIP RAWDATA!!' assert False if interval != 'season': raise ValueError('Other data than seasonal not supported at the moment for CMIP5 data and temperature!') #original data filename1 = self.data_dir + 'tas/' + self.model + '/' + 'tas_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] tmp = pyCDO(filename1, s_start_time, s_stop_time, force=force_calc).seldate() tmp1 = pyCDO(tmp, s_start_time, s_stop_time).seasmean() filename = pyCDO(tmp1, s_start_time, s_stop_time).yseasmean() if not os.path.exists(filename): print 'WARNING: Temperature file not found: ', filename return None tas = Data(filename, 'tas', read=True, label=self._unique_name, unit='K', lat_name='lat', lon_name='lon', shift_lon=False) tasall = Data(filename1, 'tas', read=True, label=self._unique_name, unit='K', lat_name='lat', lon_name='lon', shift_lon=False) if tasall.time_cycle != 12: raise ValueError('Timecycle of 12 expected here!') tasmean = tasall.fldmean() retval = (tasall.time, tasmean, tasall) del tasall tas.data = np.ma.array(tas.data, mask=tas.data < 0.) return tas, retval
def test_read_binary_subset_int(self): # INT16 = H fname = tempfile.mktemp() f = open(fname, 'w') ref = (self.x * 10).astype('int16') f.write(ref) f.close() D = Data(None, None) f = open(fname, 'r') ny, nx = self.x.shape nt = 1 # test 1: read entire file file_content = D._read_binary_subset2D(f, 2, ny=ny, nx=nx, xbeg=0, xend=nx, ybeg=0, yend=ny) d = np.reshape( np.asarray(struct.unpack('H' * ny * nx * nt, file_content)), (ny, nx)) self.assertTrue(np.all(d - ref == 0.)) # test 2: read subset with 1-values only ny1 = self.ymax - self.ymin nx1 = self.xmax - self.xmin nt1 = 1 file_content = D._read_binary_subset2D(f, 2, ny=ny, nx=nx, xbeg=self.xmin, xend=self.xmax, ybeg=self.ymin, yend=self.ymax) d1 = np.reshape( np.asarray(struct.unpack('H' * ny1 * nx1 * nt1, file_content)), (ny1, nx1)) self.assertTrue( np.all(d1 - ref[self.ymin:self.ymax, self.xmin:self.xmax] == 0.))
def test_SingleMap_add_cyclic(self): file = '/home/m300028/shared/data/SEP/variables/land/Ta_2m/cru_ts_3_00.1901.2006.tmp_miss_t63.nc' ofile = 'world.png' if os.path.exists(ofile): os.remove(ofile) d = Data(file, 'tmp', read=True) map_plot(d, use_basemap=True, savegraphicfile=ofile) if os.path.exists(ofile): os.remove(ofile)
def setUp(self): self.D = Data(None, None) self.D._init_sample_object(nt=1000, ny=1, nx=1) # generate dummy Model object data_dir = '.' + os.sep + 'test' + os.sep varmethods = {'albedo':'get_albedo()', 'sis': 'get_sis()'} self.model = models.Model(data_dir, varmethods, name='testmodel', intervals='monthly') sis = self.D.copy() sis.mulc(5., copy=False) sis.label='sisdummy' alb = self.D.copy() alb.label='albedodummy' # add some dummy data variable self.model.variables = {'albedo':alb, 'sis':sis}
def setUp(self): # init Data object for testing n = 100 # slows down significantly! constraint is percentile test x = sc.randn(n) * 100. # generate dummy data self.D = Data(None, None) d = np.ones((n, 1, 1)) self.D.data = d self.D.data[:, 0, 0] = x self.D.data = np.ma.array(self.D.data, mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.label = 'testlabel' self.D.filename = 'testinputfilename.nc' self.D.varname = 'testvarname' self.D.long_name = 'This is the longname' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') - 1 self.D.time_str = "days since 0001-01-01 00:00:00" self.D.calendar = 'gregorian' self.D.cell_area = np.ones_like(self.D.data[0, :, :])
def test_rasterize_init(self): x = Data(None, None) x._init_sample_object(ny=1, nx=272) x.lon = np.random.random(272)*10. + 5. # 5 ... 15 x.lat = np.random.random(272)*20. + 0. # 0 ... 20 lon = np.random.random((10,20)) lat = np.random.random((30,20)) with self.assertRaises(ValueError): x._rasterize(lon, lat, radius=0.1) lon = np.random.random((10,20)) lat = np.random.random((10,20)) with self.assertRaises(ValueError): x._rasterize(lon, lat, radius=None)
def __init__(self, filename, gridfile, varname, read=False, **kwargs): """ Parameters ---------- filename : str filename of data file gridfile : str filename of grid definition file varname : str name of variable to handle read : bool specify if data should be read immediately """ Data.__init__(self, filename, varname, **kwargs) self.gridfile = gridfile self.gridtype = 'unstructured'
def setUp(self): self.nx = 20 self.ny = 10 self.tempfile = tempfile.mktemp(suffix='.nc') self.gfile1 = tempfile.mktemp(suffix='.nc') self.gfile2 = tempfile.mktemp(suffix='.nc') self.gfile3 = tempfile.mktemp(suffix='.nc') self.x = Data(None, None) self.x._init_sample_object(nt=10, ny=self.ny, nx=self.nx) self.x.save(self.tempfile, varname='myvar') # generate some arbitrary geometry file F = NetCDFHandler() F.open_file(self.gfile1, 'w') F.create_dimension('ny', size=self.ny) F.create_dimension('nx', size=self.nx) F.create_variable('lat', 'd', ('ny', 'nx')) F.create_variable('lon', 'd', ('ny', 'nx')) F.assign_value('lat', np.ones((self.ny, self.nx)) * 5.) F.assign_value('lon', np.ones((self.ny, self.nx)) * 3.) F.close() F = NetCDFHandler() F.open_file(self.gfile2, 'w') F.create_dimension('ny', size=self.ny) F.create_dimension('nx', size=self.nx) F.create_variable('latitude', 'd', ('ny', 'nx')) F.create_variable('longitude', 'd', ('ny', 'nx')) F.assign_value('latitude', np.ones((self.ny, self.nx)) * 7.) F.assign_value('longitude', np.ones((self.ny, self.nx)) * 8.) F.close() F = NetCDFHandler() F.open_file(self.gfile3, 'w') F.create_dimension('ny', size=self.ny * 2) F.create_dimension('nx', size=self.nx * 3) F.create_variable('latitude', 'd', ('ny', 'nx')) F.create_variable('longitude', 'd', ('ny', 'nx')) F.assign_value('latitude', np.ones((self.ny * 2, self.nx * 3)) * 7.) F.assign_value('longitude', np.ones((self.ny * 2, self.nx * 3)) * 8.) F.close()
def main(): plt.close('all') shp_file = '/Users/mpim/Desktop/ben/TP/TibeatanPlateau' # specify name of shapefile; note that it should be done WITHOUT the file extension # set a array as masked array: x.data = np.ma.array(arr, mask=arr!=arr) #set the region for masking r = RegionBboxLatLon(777, 70., 105., 25., 40., label='testregion') r.mask = None #Read files filename_Landevl = '/Users/mpim/Desktop/ben/chen_sebs_wgs84_n_0.13x0.13.nc' #'/data/share/mpiles/TRS/m300157/land_eval/LandFluxEVAL.merged.89-05.monthly.diagnostic.nc' Landevl = Data(filename_Landevl, 'ETmon', read=True) #'lat','lon', ET_mean #get aoi Landevl.get_aoi_lat_lon(r) Landevl.cut_bounding_box() # read regions from shapefile # This gives an object which contains all regions stored in the shapefile RS = RegionShape(shp_file) # just print the region keys for illustration for k in RS.regions.keys(): print k # if you now want to generate a particular mask we can do that # in the following example we mask the airt temperature for the # Tibetean plateau # and then mask it r_tibet = RS.regions[1] # gives a Region object # mask with region Landevl.mask_region(r_tibet) Landevl.save('/Users/mpim/Desktop/ben/chen_sebs_recut2.nc') plt.show()
def test_mean_model(): #The following code provides a routine that allows to validate the MeanModel() class print('Jetzt gehts los') # generate some sample data --- x = Data(None, None) x.data = np.random.random((10, 20, 30)) x.label = 'nothing' y = x.mulc(0.3) z = x.mulc(0.5) m = x.add(y).add(z).divc(3.) r = m.div(x) # gives 0.6 as reference solution # generate Model instances and store Data objects as 'variables' --- dic_variables = ['var1', 'var2'] X = Model(None, dic_variables, name='x', intervals='season') X.variables = {'var1': x, 'var2': x} Y = Model(None, dic_variables, name='y', intervals='season') Y.variables = {'var1': y, 'var2': y} Z = Model(None, dic_variables, name='z', intervals='season') Z.variables = {'var1': z, 'var2': z} #... now try multimodel ensemble M = MeanModel(dic_variables, intervals='season') M.add_member(X) M.add_member(Y) M.add_member(Z) M.ensmean() # calculate ensemble mean # print M.variables['var2'].div(x).data #should give 0.6 npt.assert_equal( np.all( np.abs(1. - M.variables['var2'].div(x).data / 0.6) < 0.00000001), True)
def test_read_coordinates(self): # read data normal x1 = Data(self.tempfile, 'myvar', read=True) self.assertEqual(x1.nx, self.nx) self.assertEqual(x1.ny, self.ny) # read data with separate geometry file 'lat', 'lon' names x2 = Data(self.tempfile, 'myvar', read=True, geometry_file=self.gfile1) self.assertTrue(np.all(x2.lat == 5.)) self.assertTrue(np.all(x2.lon == 3.)) # read data with separate geometry file 'latitude', 'longitude' names x3 = Data(self.tempfile, 'myvar', read=True, geometry_file=self.gfile2) self.assertTrue(np.all(x3.lat == 7.)) self.assertTrue(np.all(x3.lon == 8.)) # read data with separate geometry file 'lat', 'lon' names, invalid geometry with self.assertRaises(ValueError): x4 = Data(self.tempfile, 'myvar', read=True, geometry_file=self.gfile3)
def test_rasterize_data(self): """ testdataset +---+---+---+ |1.2|2.3| | +---+---+---+ | | |0.7| +---+---+---+ | |5.2| | +---+---+---+ """ x = Data(None, None) x._init_sample_object(ny=1, nx=272) x.lon = np.asarray([2.25, 2.45, 1.8, 3.6]) x.lat = np.asarray([11.9, 10.1, 10.2, 11.3]) x.data = np.asarray([5.2, 2.3, 1.2, 0.7]) # target grid lon = np.asarray([1.5, 2.5, 3.5]) lat = np.asarray([10., 11., 12.]) LON, LAT = np.meshgrid(lon, lat) # rasterize data # no valid data res = x._rasterize(LON, LAT, radius=0.000001, return_object=True) self.assertEqual(res.data.mask.sum(), np.prod(LON.shape)) with self.assertRaises(ValueError): res = x._rasterize(LON, LAT, radius=0.000001, return_object=False) # check valid results res = x._rasterize(LON, LAT, radius=0.5, return_object=True) self.assertEqual(res.data[0, 0], 1.2) self.assertEqual(res.data[0, 1], 2.3) self.assertEqual(res.data[1, 2], 0.7) self.assertEqual(res.ny * res.nx - res.data.mask.sum(), 4)
def test_read_binary_subset_Data_int(self): # binary data from subset in Data object # write binary test data fname = tempfile.mktemp() f = open(fname, 'w') tmp = (np.random.random(self.x.shape)*100.).astype('int16') f.write(tmp) f.close() D = Data(None, None) D.filename = fname ny, nx = self.x.shape latmin = self.lat[self.ymin] latmax = self.lat[self.ymax] lonmin = self.lon[self.xmin] lonmax = self.lon[self.xmax] D._read_binary_file(nt=1, dtype='int16', latmin=latmin, latmax=latmax, lonmin=lonmin, lonmax=lonmax, lat=self.lat, lon=self.lon) self.assertTrue(np.all(D.data-tmp[self.ymin:self.ymax+1,self.xmin:self.xmax+1] == 0.))
def get_sample_file(name='air', return_object=True): """ returns Data object of example file including or the filename with the full path. If the file is not existing yet, then it will be downloaded. Parameters ---------- name : str specifies which type of sample file should be returned ['air','rain'] return_object : bool return Data object if True, otherwise the filename is returned """ files = { 'air': { 'name': 'air.mon.mean.nc', 'url': 'ftp://ftp.cdc.noaa.gov/Datasets/ncep.reanalysis.derived/surface/air.mon.mean.nc', 'variable': 'air' }, 'rain': { 'name': 'pr_wtr.eatm.mon.mean.nc', 'url': 'ftp://ftp.cdc.noaa.gov/Datasets/ncep.reanalysis.derived/surface/pr_wtr.eatm.mon.mean.nc', 'variable': 'pr_wtr' } } if name not in files.keys(): raise ValueError('Invalid sample file') fname = get_example_data_directory() + files[name]['name'] # download data if not existing yet if not os.path.exists(fname): tdir = get_example_data_directory() url = files[name]['url'] _download_file(url, tdir) if not os.path.exists(fname): print fname raise ValueError('Download failed!') # ... here everything should be fine if return_object: return Data(fname, files[name]['variable'], read=True) else: return fname
def get_surface_shortwave_radiation_down(self, interval='season'): """ get surface shortwave incoming radiation data for JSBACH returns Data object """ if interval != 'season': raise ValueError( 'Other temporal sampling than SEASON not supported yet for JSBACH BOT files, sorry' ) v = 'var176' y1 = '1979-01-01' y2 = '2006-12-31' rawfilename = self.data_dir + 'data/model/' + self.experiment + '_echam6_BOT_mm_1979-2006_srads.nc' if not os.path.exists(rawfilename): return None #--- read data cdo = pyCDO(rawfilename, y1, y2) if interval == 'season': seasfile = cdo.seasmean() del cdo print 'seasfile: ', seasfile cdo = pyCDO(seasfile, y1, y2) filename = cdo.yseasmean() else: raise ValueError('Invalid interval option %s ' % interval) #--- read land-sea mask ls_mask = get_T63_landseamask(self.shift_lon) #--- read SIS data sis = Data( filename, v, read=True, label='MPI-ESM SIS ' + self.experiment, unit='-', lat_name='lat', lon_name='lon', #shift_lon=shift_lon, mask=ls_mask.data.data) return sis
def test_rasterize_data(self): """ testdataset +---+---+---+ |1.2|2.3| | +---+---+---+ | | |0.7| +---+---+---+ | |5.2| | +---+---+---+ """ x = Data(None, None) x._init_sample_object(ny=1, nx=272) x.lon = np.asarray([2.25, 2.45, 1.8, 3.6]) x.lat = np.asarray([11.9, 10.1, 10.2, 11.3]) x.data = np.asarray([5.2, 2.3, 1.2, 0.7]) # target grid lon = np.asarray([1.5, 2.5, 3.5]) lat = np.asarray([10., 11., 12.]) LON, LAT = np.meshgrid(lon, lat) # rasterize data # no valid data res = x._rasterize(LON, LAT, radius=0.000001, return_object=True) self.assertEqual(res.data.mask.sum(), np.prod(LON.shape)) with self.assertRaises(ValueError): res = x._rasterize(LON, LAT, radius=0.000001, return_object=False) # check valid results res = x._rasterize(LON, LAT, radius=0.5, return_object=True) self.assertEqual(res.data[0,0], 1.2) self.assertEqual(res.data[0,1], 2.3) self.assertEqual(res.data[1,2], 0.7) self.assertEqual(res.ny*res.nx - res.data.mask.sum(), 4)
def setUp(self): self.D = Data(None, None) self.D._init_sample_object(nt=1000, ny=1, nx=1) # generate dummy Model object data_dir = './test/' varmethods = {'albedo':'get_albedo()', 'sis': 'get_sis()'} self.model = models.Model(data_dir, varmethods, name='testmodel', intervals='monthly') sis = self.D.copy() sis.mulc(5., copy=False) sis.label='sisdummy' alb = self.D.copy() alb.label='albedodummy' # add some dummy data variable self.model.variables = {'albedo':alb, 'sis':sis}
def setUp(self): # init Data object for testing n=100 # slows down significantly! constraint is percentile test x = sc.randn(n)*100. # generate dummy data self.D = Data(None, None) d=np.ones((n, 1, 1)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data, mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.label = 'testlabel' self.D.filename = 'testinputfilename.nc' self.D.varname = 'testvarname' self.D.long_name = 'This is the longname' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') - 1 self.D.time_str = "days since 0001-01-01 00:00:00" self.D.calendar = 'gregorian' self.D.cell_area = np.ones_like(self.D.data[0,:,:])
def setUp(self): self.nx = 20 self.ny = 10 self.tempfile = tempfile.mktemp(suffix='.nc') self.gfile1 = tempfile.mktemp(suffix='.nc') self.gfile2 = tempfile.mktemp(suffix='.nc') self.gfile3 = tempfile.mktemp(suffix='.nc') self.x = Data(None, None) self.x._init_sample_object(nt=10, ny=self.ny, nx=self.nx) self.x.save(self.tempfile, varname='myvar') # generate some arbitrary geometry file F = NetCDFHandler() F.open_file(self.gfile1, 'w') F.create_dimension('ny', size=self.ny) F.create_dimension('nx', size=self.nx) F.create_variable('lat', 'd', ('ny', 'nx')) F.create_variable('lon', 'd', ('ny', 'nx')) F.assign_value('lat', np.ones((self.ny,self.nx)) * 5.) F.assign_value('lon', np.ones((self.ny,self.nx)) * 3.) F.close() F = NetCDFHandler() F.open_file(self.gfile2, 'w') F.create_dimension('ny', size=self.ny) F.create_dimension('nx', size=self.nx) F.create_variable('latitude', 'd', ('ny', 'nx')) F.create_variable('longitude', 'd', ('ny', 'nx')) F.assign_value('latitude', np.ones((self.ny,self.nx)) * 7.) F.assign_value('longitude', np.ones((self.ny,self.nx)) * 8.) F.close() F = NetCDFHandler() F.open_file(self.gfile3, 'w') F.create_dimension('ny', size=self.ny*2) F.create_dimension('nx', size=self.nx*3) F.create_variable('latitude', 'd', ('ny', 'nx')) F.create_variable('longitude', 'd', ('ny', 'nx')) F.assign_value('latitude', np.ones((self.ny*2,self.nx*3)) * 7.) F.assign_value('longitude', np.ones((self.ny*2,self.nx*3)) * 8.) F.close()
def get_T63_landseamask(shift_lon, mask_antarctica=True, area='land'): """ get JSBACH T63 land sea mask the LS mask is read from the JSBACH init file area : str ['land','ocean']: When 'land', then the mask returned is True on land pixels, for ocean it is vice versa. In any other case, you get a valid field everywhere (globally) mask_antarctica : bool if True, then the mask is FALSE over Antarctica (<60S) """ ls_file = get_data_pool_directory() \ + 'data_sources/LSMASK/jsbach_T63_GR15_4tiles_1992.nc' ls_mask = Data(ls_file, 'slm', read=True, label='T63 land-sea mask', lat_name='lat', lon_name='lon', shift_lon=shift_lon) if area == 'land': msk = ls_mask.data > 0. elif area == 'ocean': msk = ls_mask.data == 0. else: msk = np.ones(ls_mask.data.shape).astype('bool') ls_mask.data[~msk] = 0. ls_mask.data[msk] = 1. ls_mask.data = ls_mask.data.astype('bool') if mask_antarctica: ls_mask.data[ls_mask.lat < -60.] = False # ensure that also the mask attribute is set properly ls_mask._apply_mask(~msk) return ls_mask
def xxxxtest_median_model(): x = Data(None, None) x.label = 'nothing' d = np.random.random((100, 1, 1)) x.data = np.ma.array(d, mask=d != d) # odd number and no masked values a = x.copy() a.data[:, 0, 0] = 1. b = x.copy() b.data[:, 0, 0] = 3. c = x.copy() c.data[:, 0, 0] = 2. d = x.copy() d.data[:, 0, 0] = 5. e = x.copy() e.data[:, 0, 0] = 4. m = MedianModel() m.add_member(a) m.add_member(b) m.add_member(c) m.add_member(d) m.add_member(e) m.ensmedian() # should give the value of 3. for all timesteps del m # even number and no masked values a = x.copy() a.data[:, 0, 0] = 1. b = x.copy() b.data[:, 0, 0] = 3. c = x.copy() c.data[:, 0, 0] = 2. d = x.copy() c.data[:, 0, 0] = 4. m = MedianModel() m.add_member(a) m.add_member(b) m.add_member(c) m.add_member(d) m.ensmedian() # should give the value of 2.5 for all timesteps del m
""" from pycmbs.region import RegionBboxLatLon from pycmbs.examples import download from pycmbs.data import Data from pycmbs.mapping import map_plot import matplotlib.pyplot as plt # specify some region using bounding box # here: 20deg W ... 30 DEG E, 40 DEG S ... 5 DEG N r = RegionBboxLatLon(777, -20.0, 30.0, -40.0, 5.0, label="testregion") # 777 is just the ID value # read some data as Data object filename = download.get_sample_file(name="air", return_object=False) air = Data(filename, "air", read=True) # generate some plot BEFORE the masking map_plot(air, title="before", use_basemap=True) # now mask the data ... air.get_aoi_lat_lon(r) # generate some plot AFTER the masking map_plot(air, title="after", use_basemap=True) # ... o.k., so far so good, but the dataset "air" still contains data for the entire domain. # even if it is masked it will eat some of your memory. You can see this by plotting the size of the matrix print(air.shape) # wouldn't it be nice to just remove everything which is not needed?
from pycmbs.data import Data from pycmbs.utils import download import matplotlib.pyplot as plt plt.close('all') # load some sample data # filename = '<THEINPUTFILE>' filename = download.get_sample_file(name='<VARNAME>', return_object=False) thevar = '<VARNAME>' if thevar == 'rain': thevar = 'pr_wtr' x = Data(filename, thevar, read=True) print 'Data dimensions: ', x.shape # calculate global mean temperature timeseries t = x.fldmean() # plot results as a figure f = plt.figure() ax = f.add_subplot(111) ax.plot(x.date, t, label='global mean') ax.set_xlabel('Years') ax.set_ylabel('Temperature [degC]') # perhaps you also want to calculate some statistics like the temperature trend from scipy import stats import numpy as np
""" This is an example that should illustrate how you can scale a dataset by the length of the month """ from pycmbs.examples import download from pycmbs.data import Data from pycmbs.mapping import map_plot import matplotlib.pyplot as plt plt.close('all') # read some data as Data object filename = download.get_sample_file(name='air', return_object=False) air = Data(filename, 'air', read=True) # this dataset has the following times print air.date # obviously the different months have different numbers of days. # Let's say you want now to perform a proper averaging of the data # taking into account the different lengths of the months # # the way how you would do it is like # y = sum(w[i] * x[i]) # whereas w is a weighting factor for each timestep and 'x' is the input data # how can you easily do that with the Data object? # 1) calculate the weights ... # these are dependent on the number of days which you get as ...
def xxxxtest_median_model(): x = Data(None, None) x.label = 'nothing' d = np.random.random((100, 1, 1)) x.data = np.ma.array(d, mask= d!=d) # odd number and no masked values a = x.copy() a.data[:, 0, 0] = 1. b = x.copy() b.data[:, 0, 0] = 3. c = x.copy() c.data[:, 0, 0] = 2. d = x.copy() d.data[:, 0, 0] = 5. e = x.copy() e.data[:, 0, 0] = 4. m = MedianModel() m.add_member(a) m.add_member(b) m.add_member(c) m.add_member(d) m.add_member(e) m.ensmedian() # should give the value of 3. for all timesteps del m # even number and no masked values a = x.copy() a.data[:, 0, 0] = 1. b = x.copy() b.data[:, 0, 0] = 3. c = x.copy() c.data[:, 0, 0] = 2. d = x.copy() c.data[:, 0, 0] = 4. m = MedianModel() m.add_member(a) m.add_member(b) m.add_member(c) m.add_member(d) m.ensmedian() # should give the value of 2.5 for all timesteps del m
""" This file is part of pyCMBS. (c) 2012-2014 For COPYING and LICENSE details, please refer to the file COPYRIGHT.md """ from pycmbs.data import Data import numpy as np fname = '../pycmbs/examples/example_data/air.mon.mean.nc' d = Data(fname, 'air', read=True) c = d.get_climatology(return_object=True) print 'c raw: ', c.fldmean() print c.date print '' # create some invalid data d1 = d.copy() t = d1.time * 1. d1.time[20:] = t[0:-20] d1.time[0:20] = t[-20:] tmp = d1.data * 1. d1.data[20:, :, :] = tmp[0:-20, :, :] d1.data[0:20, :, :] = tmp[-20:, :, :] c1 = d1.get_climatology(return_object=True, ensure_start_first=True) print ''
def test_lomb_basic(self): def _sample_data(t, w, A, B): e = np.random.random(len(t))*0. y = A * np.cos(w*self.t + B) return y, e def _test_ratio(x,y, thres=0.05): r = np.abs(1. - x / y) print r, x/y self.assertTrue(r <= thres) # accuracy of ration by 5% # test with single frequency p_ref = 10. w = 2.*np.pi / p_ref y, e = _sample_data(self.t, w, 5., 0.1) P = np.arange(2., 20., 2.) # target period [days] Ar, Br = lomb_scargle_periodogram(self.t, P, y+e, corr=False) _test_ratio(Ar[4], 5.) _test_ratio(Br[4], 0.1) Ar, Br, Rr, Pr = lomb_scargle_periodogram(self.t, P, y) _test_ratio(Ar[4], 5.) _test_ratio(Br[4], 0.1) #~ self.assertEqual(Rr[4], 1.) #~ self.assertEqual(Pr[4], 0.) # test for functions with overlapping frequencies p_ref1 = 365. p_ref2 = 365. w1 = 2.*np.pi / p_ref1 w2 = 2.*np.pi / p_ref2 y1, e1 = _sample_data(self.t, w1, 4., 0.1) y2, e2 = _sample_data(self.t, w2, 3.6, 0.1) P = np.arange(1., 366., 1.) # target period [days] Ar, Br = lomb_scargle_periodogram(self.t, P, y1+e1+y2+e2, corr=False) _test_ratio(Ar[-1], 7.6) _test_ratio(Br[-1], 0.1) # overlapping frequencies 2 p_ref1 = 100. p_ref2 = 200. w1 = 2.*np.pi / p_ref1 w2 = 2.*np.pi / p_ref2 y1, e1 = _sample_data(self.t, w1, 2., np.pi*0.3) # don't choose pi for phase, as this will result in an optimization with negative amplitude and zero phase (= sin) y2, e2 = _sample_data(self.t, w2, 3., np.pi*0.5) P = np.arange(1., 366., 1.) # target period [days] hlp = y1+e1+y2+e2 Ar, Br = lomb_scargle_periodogram(self.t, P, hlp, corr=False) # sample data object D = Data(None, None) D._init_sample_object(nt=len(y), ny=1, nx=1) D.data[:,0,0] = np.ma.array(hlp, mask=hlp!=hlp) D.time = self.t D_dummy = Data(None, None) D_dummy._init_sample_object(nt=len(y), ny=1, nx=1) with self.assertRaises(ValueError): D_dummy.time_str = 'hours since 2001-01-01' # only days currently supported! xx, yy = D_dummy.lomb_scargle_periodogram(P, return_object=False) AD, BD = D.lomb_scargle_periodogram(P, return_object=False, corr=False) AD1, BD1 = D.lomb_scargle_periodogram(P, return_object=True, corr=False) self.assertEqual(AD.shape, BD.shape) self.assertEqual(D.ny, AD.shape[1]) self.assertEqual(D.nx, AD.shape[2]) _test_ratio(Ar[99], 2.) _test_ratio(AD[99,0,0], 2.) _test_ratio(AD1.data[99, 0,0], 2.) _test_ratio(Ar[199], 3.) _test_ratio(AD[199,0,0], 3.) _test_ratio(AD1.data[199,0,0], 3.) _test_ratio(Br[99], np.pi*0.3) _test_ratio(BD[99,0,0], np.pi*0.3) _test_ratio(BD1.data[99,0,0], np.pi*0.3) _test_ratio(Br[199], np.pi*0.5) _test_ratio(BD[199,0,0], np.pi*0.5) _test_ratio(BD1.data[199,0,0], np.pi*0.5) # test for data with gaps # tests are not very robust yet as results depend on noise applied! p_ref1 = 100. p_ref2 = 200. w1 = 2.*np.pi / p_ref1 w2 = 2.*np.pi / p_ref2 y1, e1 = _sample_data(self.t, w1, 2., np.pi*0.3) # don't choose pi for phase, as this will result in an optimization with negative amplitude and zero phase (= sin) y2, e2 = _sample_data(self.t, w2, 3., np.pi*0.5) P = np.arange(1., 366., 1.) # target period [days] ran = np.random.random(len(self.t)) msk = ran > 0.1 tmsk = self.t[msk] yref = y1+e1+y2+e2 ymsk = yref[msk] Ar, Br = lomb_scargle_periodogram(tmsk, P, ymsk, corr=False)
# -*- coding: utf-8 -*- """ This file is part of pyCMBS. (c) 2012- Alexander Loew For COPYING and LICENSE details, please refer to the LICENSE files """ from pycmbs.data import Data from pycmbs.plots import map_difference import matplotlib.pyplot as plt file_name = '../../../pycmbs/examples/example_data/air.mon.mean.nc' A = Data(file_name, 'air', lat_name='lat', lon_name='lon', read=True, label='air temperature') B = A.copy() B.mulc(2.3, copy=False) a = A.get_climatology(return_object=True) b = B.get_climatology(return_object=True) # a quick plot as well as a projection plot f1 = map_difference(a, b, show_stat=False, vmin=-30., vmax=30., dmin=-60., dmax=60.) # unprojected plt.show()
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print '' print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys( ) assert False locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable', None) #~ print self.type #~ print locdict.keys() assert varname is not None, 'ERROR: provide varname!' units = locdict.pop('unit', None) assert units is not None, 'ERROR: provide unit!' lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix', None) model_prefix = locdict.pop('model_prefix', None) file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = self.get_raw_filename( varname, **kwargs) # routine needs to be implemented by each subclass else: filename1 = custom_path + self.get_raw_filename(varname, **kwargs) if filename1 is None: print_log(WARNING, 'No valid model input data') return None force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[: -3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename( file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError( 'Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N # ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask( get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() mdata._raw_filename = filename1 mdata._monthly_filename = file_monthly mdata._clim_filename = mdata_clim_file mdata._varname = varname # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
class TestData(TestCase): def setUp(self): # init Data object for testing n=100 # slows down significantly! constraint is percentile test x = sc.randn(n)*100. # generate dummy data self.D = Data(None, None) d=np.ones((n, 1, 1)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data, mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.label = 'testlabel' self.D.filename = 'testinputfilename.nc' self.D.varname = 'testvarname' self.D.long_name = 'This is the longname' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') - 1 self.D.time_str = "days since 0001-01-01 00:00:00" self.D.calendar = 'gregorian' self.D.cell_area = np.ones_like(self.D.data[0,:,:]) @unittest.skip('wait for bug free scipy') def test_pattern_correlation(self): """ test pattern correlation function """ x = self.D.copy() # correlation with random values y = self.D.copy() tmp = np.random.random(y.shape) y.data = np.ma.array(tmp, mask=tmp != tmp) P2 = PatternCorrelation(x, y) P2._correlate() self.assertEqual(x.nt,len(P2.r_value)) self.assertEqual(x.nt,len(P2.t)) for i in xrange(x.nt): slope, intercept, r_value, p_value, std_err = stats.mstats.linregress(x.data[i,:,:].flatten(),y.data[i,:,:].flatten()) self.assertEqual(P2.r_value[i], r_value) self.assertEqual(P2.p_value[i], p_value) self.assertEqual(P2.slope[i], slope) self.assertEqual(P2.intercept[i], intercept) self.assertEqual(P2.std_err[i], std_err) def test_gleckler_index(self): """ test Reichler index/Gleckler plot """ # generate sample data # sample data tmp = np.zeros((5, 3, 1)) tmp[:,0,0] = np.ones(5)*1. tmp[:,1,0] = np.ones(5)*2. tmp[:,2,0] = np.ones(5)*5. # The data is like ... #| 1 | 2 | 5 | #| 1 | 2 | 5 | #| 1 | 2 | 5 | #| 1 | 2 | 5 | #| 1 | 2 | 5 | x = self.D.copy() x._temporal_subsetting(0, 4) x.data = np.ma.array(tmp, mask=tmp!=tmp) x.std = np.ones(x.data.shape) x.time[0] = pl.datestr2num('2000-02-15') x.time[1] = pl.datestr2num('2000-03-15') x.time[2] = pl.datestr2num('2000-04-15') x.time[3] = pl.datestr2num('2000-05-15') x.time[4] = pl.datestr2num('2000-06-15') y = self.D.copy() y._temporal_subsetting(0, 4) tmp = np.ones(x.data.shape) # sample data 2 y.data = np.ma.array(tmp, mask=tmp!=tmp) y.time[0] = pl.datestr2num('2000-02-15') y.time[1] = pl.datestr2num('2000-03-15') y.time[2] = pl.datestr2num('2000-04-15') y.time[3] = pl.datestr2num('2000-05-15') y.time[4] = pl.datestr2num('2000-06-15') # Case 1: same area weights # cell area tmp = np.ones((3, 1)) x.cell_area = tmp*1. #| 1-1 | 2-1 | 5-1 | #| 1-1 | 2-1 | 5-1 | #| 1-1 | 2-1 | 5-1 | #| 1-1 | 2-1 | 5-1 | #| 1-1 | 2-1 | 5-1 | #=================== #| 0 | 5 | 5*4**2=5*16. = 80 | #==> E2 = sqrt(85./(15.)) D = GlecklerPlot() r = D.calc_index(x, y, 'a', 'b', time_weighting=False) wt = np.ones(5) / 5. ref = np.sqrt(((85./15.) * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error D = GlecklerPlot() r = D.calc_index(x, y, 'a', 'b') wt = np.asarray([29., 31., 30., 31., 30.]) wt = wt / wt.sum() ref = np.sqrt(((85./15.) * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error # Case 2: Different area weights # cell area tmp = np.ones((3, 1)) tmp[1, 0] = 2. x.cell_area = tmp*1. #| 1-1=0 | 2-1=1 | 5-1=16 | #| 1-1=0 | 2-1=1 | 5-1=16 | #| 1-1=0 | 2-1=1 | 5-1=16 | #| 1-1=0 | 2-1=1 | 5-1=16 | #| 1-1=0 | 2-1=1 | 5-1=16 | #-------------------------- # w = 0.25 w = 0.5 w=0.25| #-------------------------- # 0.25*0 + 0.5 * 1 + 0.25 * 16 = 0 + 0.5 + 4 = 4.5 # the mean of that is 4.5 for each timestep # mean because the overall weights are calculated as such that # they give a total weight if 1 # diagnostic D = GlecklerPlot() r = D.calc_index(x, y, 'a', 'b', time_weighting=False) wt = np.ones(5) / 5. ref = np.sqrt((4.5 * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error wt = np.asarray([29., 31., 30., 31., 30.]) wt = wt / wt.sum() ref = np.sqrt((4.5 * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error # Case 3: use different std x.std = np.ones(x.data.shape) x.std[:, 2, 0] = 0.5 #| 1-1=0 | 2-1=1 | 5-1=16 / 0.5 | #| 1-1=0 | 2-1=1 | 5-1=16 / 0.5 | #| 1-1=0 | 2-1=1 | 5-1=16 / 0.5 | #| 1-1=0 | 2-1=1 | 5-1=16 / 0.5 | #| 1-1=0 | 2-1=1 | 5-1=16 / 0.5 | #-------------------------------- # w = 0.25 w = 0.5 w=0.25| # 0 + 0.5 + 0.25*32 = 0.5 + 8 = 8.5 D = GlecklerPlot() r = D.calc_index(x, y, 'a', 'b', time_weighting=False) wt = np.ones(5) / 5. ref = np.sqrt((8.5 * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error wt = np.asarray([29., 31., 30., 31., 30.]) wt = wt / wt.sum() ref = np.sqrt((8.5 * wt).sum()) t = np.abs(1. - r / ref) self.assertLess(t, 0.000001) # relative error def test_RegionalAnalysis_xNone(self): region = RegionIndex(55, 1, 1, 1, 1, label='test') R = RegionalAnalysis(None, self.D, region) self.assertEqual(R.x, None) def test_RegionalAnalysis_InvalidX(self): region = RegionIndex(77, 1, 1, 1, 1, label='test') with self.assertRaises(ValueError): R = RegionalAnalysis([123.], self.D, region) def test_RegionalAnalysis_InvalidY(self): region = RegionIndex(88, 1, 1, 1, 1, label='test') with self.assertRaises(ValueError): R = RegionalAnalysis(self.D, [123.], region) def test_RegionalAnalysis_yNone(self): region = RegionIndex(55, 1, 1, 1, 1, label='test') R = RegionalAnalysis(self.D, None, region) self.assertEqual(R.y, None) def test_RegionalAnalysis_InvalidRegion(self): region = 1. with self.assertRaises(ValueError): R = RegionalAnalysis(self.D, self.D, region) def test_RegionalAnalysis_InvalidGeometry(self): region = RegionIndex(99, 1, 1, 1, 1, label='test') x = self.D.copy() y = self.D.copy() y.data = np.random.random((2,3,4,5)) with self.assertRaises(ValueError): R = RegionalAnalysis(x, y, region) @unittest.skip('wait for solving logplot proplem in map_plot') def test_EOF(self): x = np.random.random((self.D.nt, 20, 30)) self.D.data = np.ma.array(x, mask=x != x) self.D.cell_area = np.ones_like(self.D.data[0,:,:]) E = EOF(self.D) r = E.reconstruct_data() c = E.get_correlation_matrix() E.get_eof_data_correlation() #~ E.plot_channnel_correlations(100000) #slow!! E.plot_eof_coefficients(None, all=True) E._calc_anomalies() E.plot_EOF(None, all=True) #~ def test_koeppen(self): #~ T = self.D.copy() #~ T.data = np.random.random((10,20,30)) #~ T.unit = 'K' #~ P = self.D.copy() #~ P.data = np.random.random((10,20,30)) #~ P.unit = 'kg/m^2s' #~ lsm = self.D.copy() #~ lsm.unit = 'fractional' #~ lsm.data = np.ones((20,30)) #~ #~ k = Koeppen(temp=T, precip=P, lsm=lsm) def test_koeppen_InvalidInput(self): T = self.D.copy() P = self.D.copy() lsm = self.D.copy() with self.assertRaises(ValueError): k = Koeppen(temp=None, precip=P, lsm=lsm) with self.assertRaises(ValueError): k = Koeppen(temp=T, precip=None, lsm=lsm) with self.assertRaises(ValueError): k = Koeppen(temp=T, precip=P, lsm=None)
from pycmbs.region import RegionBboxLatLon from pycmbs.examples import download from pycmbs.data import Data from pycmbs.mapping import map_plot import matplotlib.pyplot as plt # specify some region using bounding box # here: 20deg W ... 30 DEG E, 40 DEG S ... 5 DEG N r = RegionBboxLatLon(777, -20., 30., -40., 5., label='testregion') #777 is just the ID value # read some data as Data object filename = download.get_sample_file(name='air', return_object=False) air = Data(filename, 'air', read=True) # generate some plot BEFORE the masking map_plot(air, title='before', use_basemap=True) # now mask the data ... air.get_aoi_lat_lon(r) # generate some plot AFTER the masking map_plot(air, title='after', use_basemap=True) #... o.k., so far so good, but the dataset "air" still contains data for the entire domain. # even if it is masked it will eat some of your memory. You can see this by plotting the size of the matrix print(air.shape) # wouldn't it be nice to just remove everything which is not needed?
class TestEOF(TestCase): def setUp(self): #init Data object for testing n=4 #slows down significantly! constraint is percentile test x = sc.randn(n)*100. #generate dummy data self.D = Data(None,None) d=np.ones((n,1,2)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data,mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') - 1 def test_eof_analysis(self): #test of EOF #example taken from: # http://www.atmos.washington.edu/~dennis/552_Notes_4.pdf , page 80 #assign sampled data D = self.D.copy() # d1 = np.array([2,4,-6,8]) # d2 = np.array([1,2,-3,4]) #d1 = np.array([2,1]) #d2 = np.array([4,2]) #d3 = np.array([-6,-3]) #d4 = np.array([8,4]) #D.data[:,0,0] = d1; D.data[:,0,1] = d2 #D.data[:,0,2] = d3; D.data[:,0,3] = d4 # D.data[:,0,0] = d1 # D.data[:,0,1] = d2 # # E = EOF(D,cov_norm=False) #do not normalize covariance matrix, as this is also not done in example # print E.C #covariance matrix # shape of EOF is wrong !!!!!!! # # something is not really working here !!!! # # # irgendwie ist hier ein problem # warum einmal 4x4 und einmal 2x2 ??? # print 'eigval' # print E.eigval # # print 'eigvec' # print E.eigvec pass
# -*- coding: utf-8 -*- """ This file is part of pyCMBS. (c) 2012- Alexander Loew For COPYING and LICENSE details, please refer to the LICENSE file """ from pycmbs.data import Data from pycmbs.plots import map_season import matplotlib.pyplot as plt file_name = '../../../pycmbs/examples/example_data/air.mon.mean.nc' air = Data(file_name, 'air', lat_name='lat', lon_name='lon', read=True, label='air temperature') c = air.get_climatology(return_object=True) # a quick plot as well as a projection plot f1 = map_season(c, show_stat=False, vmin=-30., vmax=30., cticks=[-30., 0., 30.]) # unprojected plt.show()
def xxxxxxxxxxxxxxxxxxxget_surface_shortwave_radiation_down(self, interval='season', force_calc=False, **kwargs): """ return data object of a) seasonal means for SIS b) global mean timeseries for SIS at original temporal resolution """ the_variable = 'rsds' locdict = kwargs[self.type] valid_mask = locdict.pop('valid_mask') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] if self.type == 'CMIP5': filename1 = self.data_dir + 'rsds' + os.sep + self.experiment + '/ready/' + self.model + '/rsds_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown model type! not supported here!') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None #/// PREPROCESSING /// cdo = Cdo() #1) select timeperiod and generatget_she monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) print file_monthly sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': sis_clim_file = file_monthly[:-3] + '_ymonmean.nc' sis_sum_file = file_monthly[:-3] + '_ymonsum.nc' sis_N_file = file_monthly[:-3] + '_ymonN.nc' sis_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples elif interval == 'season': sis_clim_file = file_monthly[:-3] + '_yseasmean.nc' sis_sum_file = file_monthly[:-3] + '_yseassum.nc' sis_N_file = file_monthly[:-3] + '_yseasN.nc' sis_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sis_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sis_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sis_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sis_N_file, input=sis_sum_file + ' ' + sis_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(sis_clim_file): return None #3) read data sis = Data(sis_clim_file, 'rsds', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sis_std = Data(sis_clim_std_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.std = sis_std.data.copy() del sis_std sis_N = Data(sis_N_file, 'rsds', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sis.n = sis_N.data.copy() del sis_N #ensure that climatology always starts with January, therefore set date and then sort sis.adjust_time(year=1700, day=15) # set arbitrary time for climatology sis.timsort() #4) read monthly data sisall = Data(file_monthly, 'rsds', read=True, label=self._unique_name, unit='W m^{-2}', lat_name='lat', lon_name='lon', shift_lon=False) if not sisall._is_monthly(): raise ValueError('Timecycle of 12 expected here!') sisall.adjust_time(day=15) # land/sea masking ... if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False sis._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sisall._apply_mask(get_T63_landseamask(False, mask_antarctica=mask_antarctica, area=valid_mask)) sismean = sisall.fldmean() # return data as a tuple list retval = (sisall.time, sismean, sisall) del sisall # mask areas without radiation (set to invalid): all data < 1 W/m**2 sis.data = np.ma.array(sis.data, mask=sis.data < 1.) return sis, retval
""" """ This is an example that should illustrate how you can scale a dataset by the length of the month """ from pycmbs.examples import download from pycmbs.data import Data from pycmbs.mapping import map_plot import matplotlib.pyplot as plt plt.close('all') # read some data as Data object filename = download.get_sample_file(name='air', return_object=False) air = Data(filename, 'air', read=True) # this dataset has the following times print air.date # obviously the different months have different numbers of days. # Let's say you want now to perform a proper averaging of the data # taking into account the different lengths of the months # # the way how you would do it is like # y = sum(w[i] * x[i]) # whereas w is a weighting factor for each timestep and 'x' is the input data # how can you easily do that with the Data object? # 1) calculate the weights ...
def xxxxxget_surface_shortwave_radiation_up(self, interval='season', force_calc=False, **kwargs): the_variable = 'rsus' if self.type == 'CMIP5': filename1 = self.data_dir + the_variable + os.sep + self.experiment + os.sep + 'ready' + os.sep + self.model + os.sep + 'rsus_Amon_' + self.model + '_' + self.experiment + '_ensmean.nc' elif self.type == 'CMIP5RAW': # raw CMIP5 data based on ensembles filename1 = self._get_ensemble_filename(the_variable) elif self.type == 'CMIP5RAWSINGLE': filename1 = self.get_single_ensemble_file(the_variable, mip='Amon', realm='atmos', temporal_resolution='mon') else: raise ValueError('Unknown type! not supported here!') if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') if not os.path.exists(filename1): print ('WARNING file not existing: %s' % filename1) return None # PREPROCESSING cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_T63_monmean.nc' file_monthly = get_temporary_directory() + os.path.basename(file_monthly) cdo.monmean(options='-f nc', output=file_monthly, input='-remapcon,t63grid -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) #2) calculate monthly or seasonal climatology if interval == 'monthly': sup_clim_file = file_monthly[:-3] + '_ymonmean.nc' sup_sum_file = file_monthly[:-3] + '_ymonsum.nc' sup_N_file = file_monthly[:-3] + '_ymonN.nc' sup_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples elif interval == 'season': sup_clim_file = file_monthly[:-3] + '_yseasmean.nc' sup_sum_file = file_monthly[:-3] + '_yseassum.nc' sup_N_file = file_monthly[:-3] + '_yseasN.nc' sup_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=sup_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=sup_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=sup_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=sup_N_file, input=sup_sum_file + ' ' + sup_clim_file, force=force_calc) # number of samples else: print interval raise ValueError('Unknown temporal interval. Can not perform preprocessing! ') if not os.path.exists(sup_clim_file): print 'File not existing (sup_clim_file): ' + sup_clim_file return None #3) read data sup = Data(sup_clim_file, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) sup_std = Data(sup_clim_std_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.std = sup_std.data.copy() del sup_std sup_N = Data(sup_N_file, 'rsus', read=True, label=self._unique_name + ' std', unit='-', lat_name='lat', lon_name='lon', shift_lon=False) sup.n = sup_N.data.copy() del sup_N # ensure that climatology always starts with January, therefore set date and then sort sup.adjust_time(year=1700, day=15) # set arbitrary time for climatology sup.timsort() #4) read monthly data supall = Data(file_monthly, 'rsus', read=True, label=self._unique_name, unit='$W m^{-2}$', lat_name='lat', lon_name='lon', shift_lon=False) supall.adjust_time(day=15) if not supall._is_monthly(): raise ValueError('Monthly timecycle expected here!') supmean = supall.fldmean() #/// return data as a tuple list retval = (supall.time, supmean, supall) del supall #/// mask areas without radiation (set to invalid): all data < 1 W/m**2 #sup.data = np.ma.array(sis.data,mask=sis.data < 1.) return sup, retval
class TestPycmbsBenchmarkingModels(unittest.TestCase): def setUp(self): self.D = Data(None, None) self.D._init_sample_object(nt=1000, ny=1, nx=1) # generate dummy Model object data_dir = './test/' varmethods = {'albedo':'get_albedo()', 'sis': 'get_sis()'} self.model = models.Model(data_dir, varmethods, name='testmodel', intervals='monthly') sis = self.D.copy() sis.mulc(5., copy=False) sis.label='sisdummy' alb = self.D.copy() alb.label='albedodummy' # add some dummy data variable self.model.variables = {'albedo':alb, 'sis':sis} def test_save_prefix_missing(self): m = self.model odir = tempfile.mkdtemp() + os.sep with self.assertRaises(ValueError): m.save(odir) def test_save_create_odir(self): m = self.model odir = tempfile.mkdtemp() + os.sep if os.path.exists(odir): os.system('rm -rf ' + odir) m.save(odir, prefix='test') self.assertTrue(os.path.exists(odir)) os.system('rm -rf ' + odir) def test_save(self): m = self.model odir = tempfile.mkdtemp() + os.sep sisfile = odir + 'testoutput_SIS.nc' albfile = odir + 'testoutput_ALBEDO.nc' if os.path.exists(sisfile): os.remove(sisfile) if os.path.exists(albfile): os.remove(albfile) m.save(odir, prefix='testoutput') self.assertTrue(os.path.exists(sisfile)) self.assertTrue(os.path.exists(albfile)) if os.path.exists(sisfile): os.remove(sisfile) if os.path.exists(albfile): os.remove(albfile) os.system('rm -rf ' + odir) def test_cmip5_init_singlemember(self): data_dir = tempfile.mkdtemp() # invalid model identifier with self.assertRaises(ValueError): M = models.CMIP5RAW_SINGLE(data_dir, 'MPI-M:MPI-ESM-LR1', 'amip', {}, intervals='monthly') with self.assertRaises(ValueError): M = models.CMIP5RAW_SINGLE(data_dir, 'MPI-M:MPI-ESM-LR#1#2', 'amip', {}, intervals='monthly') M1 = models.CMIP5RAW_SINGLE(data_dir, 'MPI-M:MPI-ESM-LR#1', 'amip', {}, intervals='monthly') M2 = models.CMIP5RAW_SINGLE(data_dir, 'MPI-M:MPI-ESM-LR#728', 'amip', {}, intervals='monthly') self.assertEqual(M1.ens_member, 1) self.assertEqual(M2.ens_member, 728) def test_cmip5_singlemember_filename(self): data_dir = tempfile.mkdtemp() # generate testfile testfile = data_dir + os.sep + 'MPI-M' + os.sep + 'MPI-ESM-LR' + os.sep + 'amip' + os.sep + 'mon' + os.sep + 'atmos' + os.sep + 'Amon' + os.sep + 'r1i1p1' + os.sep + 'ta' + os.sep + 'ta_Amon_MPI-ESM-LR_amip_r1i1p1_197901-200812.nc' os.makedirs(os.path.dirname(testfile)) os.system('touch ' + testfile) self.assertTrue(os.path.exists(testfile)) M = models.CMIP5RAW_SINGLE(data_dir, 'MPI-M:MPI-ESM-LR#1', 'amip', {}, intervals='monthly') f = M.get_single_ensemble_file('ta', mip='Amon', realm='atmos') self.assertTrue(os.path.exists(f)) self.assertEqual(f, testfile)
def get_model_data_generic(self, interval='season', **kwargs): """ unique parameters are: filename - file basename variable - name of the variable as the short_name in the netcdf file kwargs is a dictionary with keys for each model. Then a dictionary with properties follows """ if not self.type in kwargs.keys(): print 'WARNING: it is not possible to get data using generic function, as method missing: ', self.type, kwargs.keys() return None locdict = kwargs[self.type] # read settings and details from the keyword arguments # no defaults; everything should be explicitely specified in either the config file or the dictionaries varname = locdict.pop('variable') units = locdict.pop('unit', 'Crazy Unit') #interval = kwargs.pop('interval') #, 'season') #does not make sense to specifiy a default value as this option is specified by configuration file! lat_name = locdict.pop('lat_name', 'lat') lon_name = locdict.pop('lon_name', 'lon') model_suffix = locdict.pop('model_suffix') model_prefix = locdict.pop('model_prefix') file_format = locdict.pop('file_format') scf = locdict.pop('scale_factor') valid_mask = locdict.pop('valid_mask') custom_path = locdict.pop('custom_path', None) thelevel = locdict.pop('level', None) target_grid = self._actplot_options['targetgrid'] interpolation = self._actplot_options['interpolation'] if custom_path is None: filename1 = ("%s%s/merged/%s_%s_%s_%s_%s.%s" % (self.data_dir, varname, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) else: if self.type == 'CMIP5': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAW': filename1 = ("%s/%s_%s_%s_%s_%s.%s" % (custom_path, varname, model_prefix, self.model, self.experiment, model_suffix, file_format)) elif self.type == 'CMIP5RAWSINGLE': print 'todo needs implementation!' assert False elif self.type == 'CMIP3': filename1 = ("%s/%s_%s_%s_%s.%s" % (custom_path, self.experiment, self.model, varname, model_suffix, file_format)) else: print self.type raise ValueError('Can not generate filename: invalid model type! %s' % self.type) force_calc = False if self.start_time is None: raise ValueError('Start time needs to be specified') if self.stop_time is None: raise ValueError('Stop time needs to be specified') #/// PREPROCESSING /// cdo = Cdo() s_start_time = str(self.start_time)[0:10] s_stop_time = str(self.stop_time)[0:10] #1) select timeperiod and generate monthly mean file if target_grid == 't63grid': gridtok = 'T63' else: gridtok = 'SPECIAL_GRID' file_monthly = filename1[:-3] + '_' + s_start_time + '_' + s_stop_time + '_' + gridtok + '_monmean.nc' # target filename file_monthly = get_temporary_directory() + os.path.basename(file_monthly) sys.stdout.write('\n *** Model file monthly: %s\n' % file_monthly) if not os.path.exists(filename1): print 'WARNING: File not existing: ' + filename1 return None cdo.monmean(options='-f nc', output=file_monthly, input='-' + interpolation + ',' + target_grid + ' -seldate,' + s_start_time + ',' + s_stop_time + ' ' + filename1, force=force_calc) sys.stdout.write('\n *** Reading model data... \n') sys.stdout.write(' Interval: ' + interval + '\n') #2) calculate monthly or seasonal climatology if interval == 'monthly': mdata_clim_file = file_monthly[:-3] + '_ymonmean.nc' mdata_sum_file = file_monthly[:-3] + '_ymonsum.nc' mdata_N_file = file_monthly[:-3] + '_ymonN.nc' mdata_clim_std_file = file_monthly[:-3] + '_ymonstd.nc' cdo.ymonmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.ymonsum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.ymonstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples elif interval == 'season': mdata_clim_file = file_monthly[:-3] + '_yseasmean.nc' mdata_sum_file = file_monthly[:-3] + '_yseassum.nc' mdata_N_file = file_monthly[:-3] + '_yseasN.nc' mdata_clim_std_file = file_monthly[:-3] + '_yseasstd.nc' cdo.yseasmean(options='-f nc -b 32', output=mdata_clim_file, input=file_monthly, force=force_calc) cdo.yseassum(options='-f nc -b 32', output=mdata_sum_file, input=file_monthly, force=force_calc) cdo.yseasstd(options='-f nc -b 32', output=mdata_clim_std_file, input=file_monthly, force=force_calc) cdo.div(options='-f nc -b 32', output=mdata_N_file, input=mdata_sum_file + ' ' + mdata_clim_file, force=force_calc) # number of samples else: raise ValueError('Unknown temporal interval. Can not perform preprocessing!') if not os.path.exists(mdata_clim_file): return None #3) read data if interval == 'monthly': thetime_cylce = 12 elif interval == 'season': thetime_cylce = 4 else: print interval raise ValueError('Unsupported interval!') mdata = Data(mdata_clim_file, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel, time_cycle=thetime_cylce) mdata_std = Data(mdata_clim_std_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, level=thelevel, time_cycle=thetime_cylce) mdata.std = mdata_std.data.copy() del mdata_std mdata_N = Data(mdata_N_file, varname, read=True, label=self._unique_name + ' std', unit='-', lat_name=lat_name, lon_name=lon_name, shift_lon=False, scale_factor=scf, level=thelevel) mdata.n = mdata_N.data.copy() del mdata_N #ensure that climatology always starts with January, therefore set date and then sort mdata.adjust_time(year=1700, day=15) # set arbitrary time for climatology mdata.timsort() #4) read monthly data mdata_all = Data(file_monthly, varname, read=True, label=self._unique_name, unit=units, lat_name=lat_name, lon_name=lon_name, shift_lon=False, time_cycle=12, scale_factor=scf, level=thelevel) mdata_all.adjust_time(day=15) #mask_antarctica masks everything below 60 degrees S. #here we only mask Antarctica, if only LAND points shall be used if valid_mask == 'land': mask_antarctica = True elif valid_mask == 'ocean': mask_antarctica = False else: mask_antarctica = False if target_grid == 't63grid': mdata._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) mdata_all._apply_mask(get_T63_landseamask(False, area=valid_mask, mask_antarctica=mask_antarctica)) else: tmpmsk = get_generic_landseamask(False, area=valid_mask, target_grid=target_grid, mask_antarctica=mask_antarctica) mdata._apply_mask(tmpmsk) mdata_all._apply_mask(tmpmsk) del tmpmsk mdata_mean = mdata_all.fldmean() # return data as a tuple list retval = (mdata_all.time, mdata_mean, mdata_all) del mdata_all return mdata, retval
# -*- coding: utf-8 -*- """ This file is part of pyCMBS. (c) 2012- Alexander Loew For COPYING and LICENSE details, please refer to the LICENSE file """ from pycmbs.data import Data from pycmbs.diagnostic import PatternCorrelation import matplotlib.pyplot as plt import numpy as np file_name = '../../../pycmbs/examples/example_data/air.mon.mean.nc' A = Data(file_name, 'air', lat_name='lat', lon_name='lon', read=True, label='air temperature') B = A.copy() B.mulc(2.3, copy=False) B.data = B.data + np.random.random(B.shape) * 100. # calculate spatial correlation for all timesteps ... P = PatternCorrelation(A, B) # ... and vizalize it P.plot() plt.show()
""" This file is part of pyCMBS. (c) 2012- Alexander Loew For COPYING and LICENSE details, please refer to the LICENSE file """ """ development script for pattern correlation analysis """ from pycmbs.diagnostic import PatternCorrelation from pycmbs.data import Data import numpy as np import matplotlib.pyplot as plt plt.close('all') fname = '../pycmbs/examples/example_data/air.mon.mean.nc' # generate two datasets x = Data(fname, 'air', read=True) xc = x.get_climatology(return_object=True) yc = xc.copy() yc.data = yc.data * np.random.random(yc.shape)*10. PC = PatternCorrelation(xc, yc) PC.plot() plt.show()
class TestPycmbsBenchmarkingModels(unittest.TestCase): def setUp(self): n=1000 # slows down significantly! constraint is percentile test x = sc.randn(n)*100. # generate dummy data self.D = Data(None, None) d=np.ones((n, 1, 1)) self.D.data = d self.D.data[:,0,0]=x self.D.data = np.ma.array(self.D.data, mask=self.D.data != self.D.data) self.D.verbose = True self.D.unit = 'myunit' self.D.label = 'testlabel' self.D.filename = 'testinputfilename.nc' self.D.varname = 'testvarname' self.D.long_name = 'This is the longname' self.D.time = np.arange(n) + pl.datestr2num('2001-01-01') self.D.time_str = "days since 0001-01-01 00:00:00" self.D.calendar = 'gregorian' self.D.oldtime=False # generate dummy Model object data_dir = './test/' varmethods = {'albedo':'get_albedo()', 'sis': 'get_sis()'} self.model = models.Model(data_dir, varmethods, name='testmodel', intervals='monthly') sis = self.D.copy() sis.mulc(5., copy=False) sis.label='sisdummy' alb = self.D.copy() alb.label='albedodummy' # add some dummy data variable self.model.variables = {'albedo':alb, 'sis':sis} def test_save_prefix_missing(self): m = self.model odir = './odir/' with self.assertRaises(ValueError): m.save(odir) def test_save_create_odir(self): m = self.model odir = './odir/' if os.path.exists(odir): os.system('rm -rf ' + odir) m.save(odir, prefix='test') self.assertTrue(os.path.exists(odir)) os.system('rm -rf ' + odir) def test_save(self): m = self.model odir = './odir/' sisfile = odir + 'testoutput_SIS.nc' albfile = odir + 'testoutput_ALBEDO.nc' if os.path.exists(sisfile): os.remove(sisfile) if os.path.exists(albfile): os.remove(albfile) m.save(odir, prefix='testoutput') self.assertTrue(os.path.exists(sisfile)) self.assertTrue(os.path.exists(albfile)) if os.path.exists(sisfile): os.remove(sisfile) if os.path.exists(albfile): os.remove(albfile) os.system('rm -rf ' + odir)