def average_timeseries(self, source, region, variable=None): """ Calculates mean of all time series in a region. Parameters ---------- source : str Data source from which time series should be read. region : str, optional Region of interest, set to first defined region if None. variable : str, optional Variable to display, set to first variable of source if None. Returns ------- ts : pd.DataFrame Timeseries for the selected data. """ grid = ShapeGrid(region, self.spatial_resolution, self.shapefile) points = grid.get_gridpoints() df = pd.DataFrame() locations = [] if region in self.regions: region = region locations = list(points.index) else: for i, sr in enumerate(self.sub_regions): if region in sr: idx = i break region = self.regions[idx] lat = points['lat'].tolist() lon = points['lon'].tolist() for i in range(0, points.shape[0]): locations.append((lon[i], lat[i])) if len(points) < 1: return 'ERROR: No points available in the selected region.' df, gpis = self.bulkread_timeseries(source, locations, region, variable=variable) df_mean = pd.DataFrame() for var in df.keys(): df_mean[var + '_mean'] = df[var].mean(axis=1) return df_mean, len(gpis)
def test_ShapeGrid(self): # general case cgrid = ShapeGrid(self.region, sp_res=self.sp_res) bbox = (cgrid.arrlat.min(), cgrid.arrlat.max(), cgrid.arrlon.min(), cgrid.arrlon.max()) assert cgrid.get_grid_points()[0].size == 290 assert bbox == (46.625, 48.875, 9.875, 16.875) assert cgrid.get_gridpoints().shape == (158, 2) # test special case NZ cgrid = ShapeGrid('NZ', sp_res=self.sp_res1) bbox = (cgrid.arrlat[0], cgrid.arrlat[-1], cgrid.arrlon[0], cgrid.arrlon[-1]) assert cgrid.shape == (34, 24) assert cgrid.get_gridpoints().shape == (116, 2) assert bbox == (-46.75, -35.25, 167.37, -176.5)
def get_gridpoints(self): """Returns gridpoints from NetCDF file. Parameters ---------- region : str Identifier of the region in the NetCDF file. Returns ------- gridpoints : dict of pandas.DataFrame Dict containing Dataframes with gridpoint index as index, longitutes and latitudes as columns for each region. """ gridpoints = {} if self.regions == ['global']: filename = (self.regions[0] + '_' + str(self.spatial_resolution) + '_' + str(self.temporal_resolution) + '.nc') ncfile = os.path.join(self.data_path, filename) with Dataset(ncfile, 'r+', format='NETCDF4') as nc: gpis = nc.variables['gpi'][:] lons = nc.variables['lon'][:] lats = nc.variables['lat'][:] gpis = gpis.flatten() lons, lats = np.meshgrid(lons, lats) lons = lons.flatten() lats = lats.flatten() points = pd.DataFrame(index=gpis) points['lon'] = lons points['lat'] = lats gridpoints['global'] = points else: for region in self.regions: grid = ShapeGrid(region, self.spatial_resolution, self.shapefile) points = grid.get_gridpoints() gridpoints[region] = points return gridpoints
def image_bounds(country, sp_res, shapefile=None): """ Calculates bounding box, center coordinates and zoom level of an image for web overlay purposes. Parameters ---------- country : str FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code) sp_res : numeric Spatial resolution of the image shapefile : str, optional Paht to a custom shapefile. Returns ------- lon_min : int Minimum longitude. lon_max : int Maximum longitude. lat_min : int Minimum latitude. lat_max : int Maximum latitude. c_lat : int Center latidute of image. c_lon : int Center longitude of image. zoom : int Zoom level for openlayers. """ shp = ShapeGrid(country, sp_res, shapefile=shapefile) lon_min = shp.arrlon.min() - (sp_res / 2) lon_max = shp.arrlon.max() + (sp_res / 2) lat_min = shp.arrlat.min() - (sp_res / 2) lat_max = shp.arrlat.max() + (sp_res / 2) e_lon = lon_max - lon_min e_lat = lat_max - lat_min c_lon = lon_min + e_lon / 2 c_lat = lat_min + e_lat / 2 zoom = 0 i = 1024 # To be replaced with the width of the map container! while i / 2 > e_lon: zoom += 1 i = i / 2 return lon_min, lon_max, lat_min, lat_max, c_lat, c_lon, zoom
def bulkread_ts(self, locations, region=None, variable=None, shapefile=None, scaled=True, grid=None): """Gets timeseries from netCDF file for a number of gridpoints. Parameters ---------- location : list of int or list of tuples Either a list of Grid point indices given as integer value [0,1,2] or a list of Longitude/Latitude tuples [(0.0, 0.0),(45.0, 45.0)]. region : str, optional Region of interest, set to first defined region if not set. variable : str, optional Variable to display, selects all available variables if None. shapefile : str, optional Path to custom shapefile. scaled : bool, optional If true, data will be scaled to a predefined range; if false, data will be shown as given in rawdata file; defaults to True grid : poets.grid.grids RegularGrid or ShapeGrid, optional Grid with point and lon/lat information; defaults to None. Returns ------- df_list : list of pd.DataFrames List with timeseries for selected variables. gpis : list of int List of gpi values. """ if region is None: region = self.valid_regions[0] if type(locations[0]) is tuple: if grid is None: if region == 'global': grid = RegularGrid(self.dest_sp_res) else: grid = ShapeGrid(region, self.dest_sp_res, shapefile) gpis = [] for loc in locations: gp, _ = grid.find_nearest_gpi(loc[0], loc[1]) gpis.append(gp) else: gpis = locations variable = self._set_variable(variable) source_file = self.src_file[region] var_dates = self._check_current_date() df_list = {} lat_pos = [] lon_pos = [] with Dataset(source_file, 'r', format='NETCDF4') as nc: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) for gp in gpis: position = np.where(nc.variables['gpi'][:] == gp) lat_pos = np.append(lat_pos, int(position[0][0])) lon_pos = np.append(lon_pos, int(position[1][0])) lat_pos = map(int, lat_pos) lon_pos = map(int, lon_pos) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) for ncv in variable: begin = np.where(dates == var_dates[region][ncv][0])[0][0] end = np.where(dates == var_dates[region][ncv][1])[0][0] ts = nc.variables[ncv][begin:end + 1, :, :] ts = ts[:, lat_pos, lon_pos] ts = np.swapaxes(ts, 1, 0) for idx, gp in enumerate(gpis): df['gpi_' + str(gp)] = np.NAN df['gpi_' + str(gp)][begin:end + 1] = ts[idx, :] if nc.variables[ncv]._FillValue is not None: df = df.replace(nc.variables[ncv]._FillValue, np.NAN) if 'scaling_factor' in nc.variables[ncv].ncattrs(): nv = nc.variables[ncv] if nv.getncattr('scaling_factor') < 0: df[ncv] = (df[ncv] * float(nv.getncattr('scaling_factor'))) else: df[ncv] = (df[ncv] / float(nv.getncattr('scaling_factor'))) if scaled: if self.valid_range is not None: if self.data_range is not None: df[ncv] = self._scale_values(df[ncv]) df_list[ncv] = df return df_list, gpis
def read_ts(self, location, region=None, variable=None, shapefile=None, scaled=True): """Gets timeseries from netCDF file for one gridpoint. Parameters ---------- location : int or tuple of floats Either Grid point index as integer value or Longitude/Latitude given as tuple. region : str, optional Region of interest, set to first defined region if not set. variable : str, optional Variable to display, selects all available variables if None. shapefile : str, optional Path to custom shapefile. scaled : bool, optional If true, data will be scaled to a predefined range; if false, data will be shown as given in rawdata file; defaults to True Returns ------- df : pd.DataFrame Timeseries for selected variables. """ if region is None: region = self.valid_regions[0] if type(location) is tuple: if region == 'global': grid = RegularGrid(self.dest_sp_res) else: grid = ShapeGrid(region, self.dest_sp_res, shapefile) gp, _ = grid.find_nearest_gpi(location[0], location[1]) else: gp = location variable = self._set_variable(variable) source_file = self.src_file[region] var_dates = self._check_current_date() with Dataset(source_file, 'r', format='NETCDF4') as nc: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df = pd.DataFrame(index=pd.DatetimeIndex(dates)) for ncvar in variable: begin = np.where(dates == var_dates[region][ncvar][0])[0][0] end = np.where(dates == var_dates[region][ncvar][1])[0][0] df[ncvar] = np.NAN ts = nc.variables[ncvar][begin:end + 1, lat_pos, lon_pos] df[ncvar][begin:end + 1] = ts if nc.variables[ncvar]._FillValue is not None: df = df.replace(nc.variables[ncvar]._FillValue, np.NAN) if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = (df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = (df[ncvar] / float(vvar.getncattr('scaling_factor'))) if scaled: if self.valid_range is not None: if self.data_range is not None: df[ncvar] = self._scale_values(df[ncvar]) return df