def test_img2ts_daily_no_resampling_missing_day(): """ Test resampling over missing day 2016-01-01 (see reader above) """ input_grid = BasicGrid( np.array([0.5, 0.5, -0.5, -0.5]), np.array([1, -1, 1, -1]), ) outputpath = tempfile.mkdtemp() start = datetime(2015, 12, 5) end = datetime(2016, 1, 10) ds_in = TestMultiTemporalImageDatasetDaily() img2ts = Img2Ts(ds_in, outputpath, start, end, imgbuffer=15, input_grid=input_grid) ts_should = np.concatenate( [np.arange(5, 32, dtype=np.float), np.arange(2, 11, dtype=np.float)]) dates_should = ds_in.tstamps_for_daterange(start, end) dates_should.remove(datetime(2016, 1, 1)) img2ts.calc() ts_file = os.path.join(outputpath, '0000.nc') with OrthoMultiTs(ts_file) as ds: ts = ds.read_ts('var1', 0) nptest.assert_allclose(ts['var1'], ts_should) assert dates_should == list(ts['time']) nptest.assert_allclose(ds.dataset.variables['location_id'][:], np.array([0, 1, 2, 3]))
def reshuffle(input_root, outputpath, startdate, enddate, parameters=None, land_points=True, ignore_meta=False, imgbuffer=200): """ Reshuffle method applied to ESA CCI SM images. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list, optional (default: None) parameters to read and convert If none are passed, we read an image in the root path and use vars from the image. land_points : bool, optional (default: True) Use the land grid to calculate time series on. Leads to faster processing and smaller files. imgbuffer: int, optional How many images to read at once before writing time series. """ if land_points: grid = CCILandGrid() else: grid = CCICellGrid() if not os.path.exists(outputpath): os.makedirs(outputpath) file_args, file_vars = parse_filename(input_root) if parameters is None: parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] input_dataset = CCI_SM_025Ds(data_path=input_root, parameter=parameters, subgrid=grid, array_1D=True) if not ignore_meta: global_attr, ts_attributes = read_metadata(sensortype=file_args['sensor_type'], version=int(file_args['version']), varnames=parameters) global_attr['time_coverage_start'] = str(startdate) global_attr['time_coverage_end'] = str(enddate) else: global_attr = {'product' : 'ESA CCI SM'} ts_attributes = None reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, land_points=True, imgbuffer=50): """ Reshuffle method applied to GLDAS data. Parameters ---------- input_root: string input path where gldas data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ if land_points: landgrid = GLDAS025LandGrid() else: landgrid = None if get_filetype(input_root) == 'grib': input_dataset = GLDAS_Noah_v1_025Ds(input_root, parameters, array_1D=True) if land_points: warnings.warn('Land Grid is fit to GLDAS 2.x netCDF data') else: input_dataset = GLDAS_Noah_v21_025Ds(input_root, parameters, landgrid, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'GLDAS'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata if landgrid: grid = landgrid else: grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters=None, img_kwargs={}, imgbuffer=50): """ Reshuffle method applied to SMOS image data. Parameters ---------- input_root: string input path where gldas data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert img_kwargs: dict Kwargs that are passed to the image class imgbuffer: int, optional How many images to read at once before writing time series. """ ff, file_vars = firstfile(input_root) fp, ff = os.path.split(ff) grid = EASE25CellGrid() if parameters is None: parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] # this is only for reading the ts_attrs input_dataset = SMOSImg(filename=os.path.join(fp, ff), parameters=parameters, grid=grid, flatten=True, **img_kwargs) data = input_dataset.read() ts_attributes = data.metadata ts_attributes = None # todo: fails for Quality_Flags input_dataset = SMOSDs(input_root, parameters, grid=grid, flatten=True, **img_kwargs) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'SMOS_IC'} # get time series attributes from first day of data. reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def _create_reshuffler( dataset_root, timeseries_root, startdate, enddate, imgbuffer=365, only_land=False, bbox=None, ): """ Create a reshuffler for converting images to timeseries. Parameters ---------- dataset_root : str or Path Path of the directory containing the data files. timeseries_root : str or Path Path of where to store the timeseries files. startdate : np.datetime64 Start date of processing enddate : np.datetime64 End date of processing imgbuffer : int, optional (default: 365) Number of images to read at once. only_land : bool, optional (default: False) Use the land mask to reduce the grid to land grid points only. bbox : list/tuple Bounding box parameters in the form [min_lon, min_lat, max_lon, max_lat] Returns ------- reshuffler : Img2Ts object """ input_dataset = GSWPDataset( Path(dataset_root) / "*.nc", only_land=only_land, bbox=bbox, ) Path(timeseries_root).mkdir(parents=True, exist_ok=True) reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=timeseries_root, startdate=startdate, enddate=enddate, ts_attributes=input_dataset.metadata, zlib=True, imgbuffer=imgbuffer, # this is necessary currently due to bug in repurpose cellsize_lat=input_dataset.cellsize, cellsize_lon=input_dataset.cellsize, ) return reshuffler
def reshuffle(input_root, outputpath, startdate, enddate, imgbuffer=200, **ds_kwargs): """ Reshuffle method applied to SMOS image data. Parameters ---------- input_root: string input path where smos ic data was downloaded to (yearly folders) outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. imgbuffer: int, optional How many images to read at once before writing time series. ds_kwargs: dict Kwargs that are passed to the image datastack class """ ff, file_vars = firstfile(input_root) fp, ff = os.path.split(ff) if 'grid' not in ds_kwargs.keys(): ds_kwargs['grid'] = EASE25CellGrid(None) if 'parameters' not in ds_kwargs.keys(): ds_kwargs['parameters'] = None # this is only for reading the ts_attrs input_dataset = SMOSImg(filename=os.path.join(fp, ff), parameters=ds_kwargs['parameters'], flatten=True, read_flags=None, grid=ds_kwargs['grid']) _, ts_attributes = input_dataset._read_img() global_attr = input_dataset.get_global_attrs() if ds_kwargs['parameters'] is None: ds_kwargs['parameters'] = input_dataset.parameters input_dataset = SMOSDs(input_root, flatten=True, **ds_kwargs) if not os.path.exists(outputpath): os.makedirs(outputpath) # get time series attributes from first day of data. reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=ds_kwargs['grid'].cut(), # drop points that are not subset imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, overpass=None, crid=None, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert overpass : str Select 'AM' for the descending overpass or 'PM' for the ascending one. If the version data does not contain multiple overpasses, this must be None crid : int, optional (default: None) Search for files with this Composite Release ID for reshuffling only. See also https://nsidc.org/data/smap/data_versions#CRID imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = SPL3SMP_Ds(input_root, parameter=parameters, overpass=overpass, crid=crid, flatten=True) global_attr = {'product': 'SPL3SMP'} if overpass: global_attr['overpass'] = overpass if not os.path.exists(outputpath): os.makedirs(outputpath) # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata ease36 = EASE2_grid(36000) lons, lats = np.meshgrid(ease36.londim, ease36.latdim) grid = BasicGrid(lons.flatten(), lats.flatten()) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ESACCI SM v0.42 data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = CCI_SM_v042_025Ds(input_root, parameters, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ESACCI'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = ERAInterimDs(parameters, input_root, expand_grid=False) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ERA Interim'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype('float32'), global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = SPL3SMP_Ds(input_root, parameter=parameters, flatten=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'SPL3SMP'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata ease36 = EASE2_grid(36000) lons, lats = np.meshgrid(ease36.londim, ease36.latdim) grid = BasicGrid(lons.flatten(), lats.flatten()) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters=None, land_points=True, bbox=None, ignore_meta=False, imgbuffer=500): """ Reshuffle method applied to C3S data. Parameters ---------- input_root: string input path where c3s images were downloaded. outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list, optional (default: None) parameters to read and convert land_points : bool, optional (default: True) Use the land grid to calculate time series on. Leads to faster processing and smaller files. bbox : tuple Min lon, min lat, max lon, max lat BBox to read data for. ignore_meta : bool, optional (default: False) Ignore metadata and reshuffle only the values. Can be used e.g. if a version is not yet supported. imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. """ if land_points: grid = SMECV_Grid_v052('land') else: grid = SMECV_Grid_v052(None) if bbox: grid = grid.subgrid_from_bbox(*bbox) if parameters is None: file_args, file_vars = parse_filename(input_root) parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] subpath_templ = ('%Y', ) if os.path.isdir( os.path.join(input_root, str(startdate.year))) else None input_dataset = C3S_Nc_Img_Stack(data_path=input_root, parameters=parameters, subgrid=grid, flatten=True, fillval=None, subpath_templ=subpath_templ) if not ignore_meta: prod_args = input_dataset.fname_args kwargs = { 'sensor_type': prod_args['prod'].lower(), 'cdr_type': prod_args['cdr'], 'product_temp_res': prod_args['temp'], 'cls': getattr(metadata, f"C3S_SM_TS_Attrs_{prod_args['vers']}") } if prod_args['temp'].upper() == 'DAILY': kwargs.pop('product_temp_res') attrs = C3S_daily_tsatt_nc(**kwargs) else: attrs = C3S_dekmon_tsatt_nc(**kwargs) ts_attributes = {} global_attributes = attrs.global_attr for var in parameters: ts_attributes.update(attrs.ts_attributes[var]) else: global_attributes = None ts_attributes = None if not os.path.exists(outputpath): os.makedirs(outputpath) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attributes, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = ECMWF_ERA5_025Ds(input_root, parameters, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ERA5'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) # test test_data = data['skt'] #test_data_res = np.reshape(test_data, (720,1440)) #test_lon_res = np.reshape(data.lon, (720, 1440)) #test_lat_res = np.reshape(data.lat, (720, 1440)) #test_data_res[test_data_res > 100] = np.nan #plt.figure(1) #plt.pcolor(test_lon_res, test_lat_res, test_data_res) #plt.show() reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters=None, land_points=True, ignore_meta=False, imgbuffer=200): """ Reshuffle method applied to ESA CCI SM images. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list, optional (default: None) parameters to read and convert If none are passed, we read an image in the root path and use vars from the image. land_points : bool, optional (default: True) Use the land grid to calculate time series on. Leads to faster processing and smaller files. imgbuffer: int, optional How many images to read at once before writing time series. """ if land_points: grid = CCILandGrid() else: grid = CCICellGrid() gpis, lons, lats, cells = grid.get_grid_points() grid_vars = {'gpis': gpis, 'lons': lons, 'lats': lats} # repurpose cannot handle masked arrays for k, v in grid_vars.items(): # type v: np.ma.MaskedArray if isinstance(v, np.ma.MaskedArray): grid_vars[k] = v.filled() grid = BasicGrid(lon=grid_vars['lons'], lat=grid_vars['lats'], gpis=grid_vars['gpis']).to_cell_grid(5.) if not os.path.exists(outputpath): os.makedirs(outputpath) file_args, file_vars = parse_filename(input_root) if parameters is None: parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] input_dataset = CCI_SM_025Ds(data_path=input_root, parameter=parameters, subgrid=grid, array_1D=True) if not ignore_meta: global_attr, ts_attributes = read_metadata( sensortype=file_args['sensor_type'], version=int(file_args['version']), varnames=parameters, subversion=file_args['sub_version']) else: global_attr = {'product': 'ESA CCI SM'} ts_attributes = None reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle( input_root, outputpath, startdate, enddate, variables, product=None, bbox=None, h_steps=(0, 6, 12, 18), land_points=False, imgbuffer=50, ): """ Reshuffle method applied to ERA images for conversion into netcdf time series format. Parameters ---------- input_root: str Input path where ERA image data was downloaded to. outputpath : str Output path, where the reshuffled netcdf time series are stored. startdate : datetime Start date, from which images are read and time series are generated. enddate : datetime End date, from which images are read and time series are generated. variables: tuple or list or str Variables to read from the passed images and convert into time series format. product : str, optional (default: None) Either era5 or era5-land, if None is passed we guess the product from the downloaded image files. bbox: tuple optional (default: None) (min_lon, min_lat, max_lon, max_lat) - wgs84. To load only a subset of the global grid / file. h_steps : list or tuple, optional (default: (0, 6, 12, 18)) Hours at which images are read for each day and used for reshuffling, therefore this defines the sub-daily temporal resolution of the time series that are generated. land_points: bool, optional (default: False) Reshuffle only land points. Uses the ERA5 land mask to create a land grid. The land grid is fixed to 0.25*0.25 or 0.1*0.1 deg for now. imgbuffer: int, optional (default: 200) How many images to read at once before writing time series. This number affects how many images are stored in memory and should be chosen according to the available amount of memory and the size of a single image. """ if h_steps is None: h_steps = (0, 6, 12, 18) filetype = parse_filetype(input_root) product = parse_product(input_root) if not product else product if land_points: if product == "era5": grid = ERA5_RegularImgLandGrid(res_lat=0.25, res_lon=0.25, bbox=bbox) elif product == "era5-land": grid = ERA5_RegularImgLandGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) else: raise NotImplementedError( product, "Land grid not implemented for product.") else: if product == "era5": grid = ERA_RegularImgGrid(res_lat=0.25, res_lon=0.25, bbox=bbox) elif product == "era5-land": grid = ERA_RegularImgGrid(res_lat=0.1, res_lon=0.1, bbox=bbox) else: raise NotImplementedError(product, "Grid not implemented for product.") if filetype == "grib": if land_points: raise NotImplementedError( "Reshuffling land points only implemented for netcdf files") input_dataset = ERA5GrbDs( root_path=input_root, parameter=variables, subgrid=grid, array_1D=True, h_steps=h_steps, product=product, mask_seapoints=False, ) elif filetype == "netcdf": input_dataset = ERA5NcDs( root_path=input_root, parameter=variables, subgrid=grid, array_1D=True, h_steps=h_steps, product=product, mask_seapoints=False, ) else: raise Exception("Unknown file format") if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {f"product": f"{product.upper()} (from {filetype})"} # get time series attributes from first day of data. first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) # get time series attributes from first day of data. data = input_dataset.read(first_date_time) ts_attributes = data.metadata reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype("float32"), global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes, ) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, land_points=False, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert landpoints: bool reshuffle land points only (not implemented yet) imgbuffer: int, optional How many images to read at once before writing time series. """ filetype = get_filetype(input_root) if filetype == 'grib': input_dataset = ERAGrbDs(input_root, parameters, expand_grid=False) elif filetype == 'netcdf': input_dataset = ERANcDs(input_root, parameters, subgrid=False, array_1D=True) else: raise Exception('Unknown file format') if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ECMWF Reanalysis from {}'.format(filetype)} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype('float32'), global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(in_path, out_path, start_date, end_date, parameters, temporal_sampling=6, img_buffer=50): """ Reshuffle method applied to MERRA2 data. Parameters ---------- in_path: string input path where merra2 data was downloaded out_path : string Output path. start_date : datetime Start date. end_date : datetime End date. parameters: list parameters to read and convert temporal_sampling: int in range [1, 24] Get an image every n hours where n=temporal_sampling. For example: if 1: return hourly sampled data -> hourly sampling if 6: return an image every 6 hours -> 6 hourly sampling if 24: return the 00:30 image of each day -> daily sampling img_buffer: int, optional How many images to read at once before writing the time series. """ # define input dataset # the img_bulk class in img2ts iterates through every nth # timestamp as specified by temporal_sampling input_dataset = MerraImageStack(data_path=in_path, parameter=parameters, temporal_sampling=temporal_sampling, array_1d=True) product = 'MERRA2_hourly' # create out_path directory if it does not exist yet if not os.path.exists(out_path): os.makedirs(out_path) # set global attribute global_attributes = {'product': product} # get ts attributes from fist day of data data = input_dataset.read(start_date) ts_attributes = data.metadata # define grid grid = BasicGrid(data.lon, data.lat) # define reshuffler reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=out_path, startdate=start_date, enddate=end_date, input_grid=grid, imgbuffer=img_buffer, cellsize_lat=5.0, cellsize_lon=6.25, global_attr=global_attributes, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, land_points=True, imgbuffer=50): """ Reshuffle method applied to C3S data. Parameters ---------- input_root: string input path where c3s images were downloaded. outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert land_points : bool, optional (default: True) Use the land grid to calculate time series on. Leads to faster processing and smaller files. imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. """ if land_points: grid = C3SLandGrid() else: grid = C3SCellGrid() gpis, lons, lats, cells = grid.get_grid_points() grid_vars = {'gpis': gpis, 'lons': lons, 'lats': lats} # repurpose cannot handle masked arrays for k, v in grid_vars.items(): # type v: np.ma.MaskedArray if isinstance(v, np.ma.MaskedArray): grid_vars[k] = v.filled() grid = BasicGrid(lon=grid_vars['lons'], lat=grid_vars['lats'], gpis=grid_vars['gpis']).to_cell_grid(5.) if parameters is None: file_args, file_vars = parse_filename(input_root) parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] input_dataset = C3S_Nc_Img_Stack(data_path=input_root, parameters=parameters, subgrid=grid, array_1D=True) prod_args = input_dataset.fname_args kwargs = { 'product_sensor_type': prod_args['sensor_type'].lower(), 'sub_version': '.' + prod_args['sub_version'], 'product_sub_type': prod_args['sub_prod'] } class_str = "C3S_SM_TS_Attrs_%s" % (prod_args['version']) subattr = getattr(metadata, class_str) if prod_args['temp_res'] == 'DAILY': attrs = C3S_daily_tsatt_nc(subattr, **kwargs) else: attrs = C3S_dekmon_tsatt_nc(subattr, **kwargs) ts_attributes = {} global_attributes = attrs.global_attr # todo: attrs for all vars or only for the ones that TS were created for. for var in parameters: ts_attributes.update(attrs.ts_attributes[var]) if not os.path.exists(outputpath): os.makedirs(outputpath) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attributes, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle( input_root, outputpath, startdate, enddate, variables, mask_seapoints=False, h_steps=(0, 6, 12, 18), imgbuffer=50, ): """ Reshuffle method applied to ERA images for conversion into netcdf time series format. Parameters ---------- input_root: str Input path where ERA image data was downloaded to. outputpath : str Output path, where the reshuffled netcdf time series are stored. startdate : datetime Start date, from which images are read and time series are generated. enddate : datetime End date, from which images are read and time series are generated. variables: list or str or tuple Variables to read from the passed images and convert into time series format. mask_seapoints: bool, optional (default: False) Mask points over sea, replace them with nan. h_steps: tuple, optional (default: (0,6,12,18)) Full hours for which images are available. imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. This number affects how many images are stored in memory and should be chosen according to the available amount of memory and the size of a single image. """ filetype = parse_filetype(input_root) if filetype == "grib": input_dataset = ERAIntGrbDs( root_path=input_root, parameter=variables, subgrid=None, array_1D=True, mask_seapoints=mask_seapoints, h_steps=h_steps, ) elif filetype == "netcdf": input_dataset = ERAIntNcDs( root_path=input_root, parameter=variables, subgrid=None, array_1D=True, mask_seapoints=mask_seapoints, h_steps=h_steps, ) else: raise Exception("Unknown file format") if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {"product": "ERA Interim (from {})".format(filetype)} # get time series attributes from first day of data. first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) data = input_dataset.read(first_date_time) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype("float32"), global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes, ) reshuffler.calc()