def centre_mask(shapes_gp, lons, lats, output="2D"): """Create an array indicating grid cells whose centre falls within each shape. Parameters ---------- shapes_gp : geopandas GeoDataFrame Shapes/regions lons : numpy ndarray Grid longitude values lats : numpy ndarray Grid latitude values output : {'2D', '3D'} Dimensions for output array Returns ------- mask : xarray DataArray For 2D (i.e. lat/lon) output values are a region number or NaN For 3D (i.e. region/lat/lon) output values are bool """ if output == "2D": mask = regionmask.mask_geopandas(shapes_gp, lons, lats) elif output == "3D": mask = regionmask.mask_3D_geopandas(shapes_gp, lons, lats) else: raise ValueError("""Output argument must be '2D' and '3D'""") mask = mask.rename("region") return mask
def test_mask_geopandas(geodataframe_clean, method): expected = expected_mask() result = mask_geopandas(geodataframe_clean, lon, lat, method=method) assert isinstance(result, xr.DataArray) assert np.allclose(result, expected, equal_nan=True) assert np.all(np.equal(result.lat.values, lat)) assert np.all(np.equal(result.lon.values, lon))
def test_mask_geopandas_warns_empty(geodataframe_clean, method): with pytest.warns(UserWarning, match="No gridpoint belongs to any region."): result = mask_geopandas( geodataframe_clean, [10, 11], [10, 11], method=method, numbers="numbers" ) assert isinstance(result, xr.DataArray) assert result.isnull().all() assert np.all(np.equal(result.lat.values, [10, 11])) assert np.all(np.equal(result.lon.values, [10, 11]))
def test_mask_geopandas_numbers(geodataframe_clean, method): result = mask_geopandas( geodataframe_clean, dummy_lon, dummy_lat, method=method, numbers="numbers" ) expected = expected_mask_2D(1, 2) assert isinstance(result, xr.DataArray) assert np.allclose(result, expected, equal_nan=True) assert np.all(np.equal(result.lat.values, dummy_lat)) assert np.all(np.equal(result.lon.values, dummy_lon))
def maskgen(shpfile, dat4mask, regionname): """ Generate a mask using information from a shapefile. Mask will have 1's within the desired region, nan's everywhere else Input: shpfile = the shapefile dat4mask = the data that you're planning to mask regionname (list) = a list of the region you want to mask. (assuming this is specified using NAME_1 i.e., full name of the state or country ["Alabama", "Alaska"...]) Output: mask = the mask """ # setup of the grid for the mask from dat4mask maskcoords = xr.Dataset({'lat': (['lat'], dat4mask['lat'])}, {'lon': (['lon'], dat4mask['lon'])}) mask = np.zeros([maskcoords.lat.size, maskcoords.lon.size]) # read in shapefile shpcontents = gp.read_file(shpfile) # loop over states to mask for i in range(0, len(regionname), 1): print("masking " + regionname[i]) try: region = shpcontents[shpcontents.NAME_1 == regionname[i]] except: region = shpcontents[shpcontents.NAME_0 == regionname[i]] maskt = regionmask.mask_geopandas(region, maskcoords["lon"], maskcoords["lat"]) maskt = np.where(np.isnan(maskt), 0, 1) mask[:, :] = mask[:, :] + maskt[:, :] # ensure unmasked region is set to 1, rest set to nan's mask = np.where(mask == 0, nan, 1) mask = xr.DataArray(mask, coords=maskcoords.coords) return mask
def main() -> None: """Run the analysis""" # parse command line arguments parser = argparse.ArgumentParser() parser.add_argument("--hdd", type=str) parser.add_argument("--boundary", type=str) parser.add_argument("-o", "--outfile", type=str) args = parser.parse_args() # get the HDD data hdd = xr.open_dataset(args.hdd).rename({ "longitude": "lon", "latitude": "lat" }) # read the mask ercot = gp.read_file(args.boundary).loc[lambda df: df["NERC"] == "TRE"] mask = regionmask.mask_geopandas(ercot, hdd) # mask and clip masked = hdd.where(~mask.isnull(), drop=True) # create weights pop_weights = hdd["pop_density"].fillna(0) spatial_weights = np.cos(np.deg2rad(hdd["lat"])) # take spatial averages hdd_ercot = xr.Dataset({ "pop_weighted": (masked["heating_demand"].weighted(pop_weights).mean( dim=["lon", "lat"])), "area_weighted": (masked["heating_demand"].weighted(spatial_weights).mean( dim=["lon", "lat"])), }) # save hdd_ercot.to_netcdf(args.outfile, format="NETCDF4")
def test_mask_geopandas_duplicates_error(geodataframe_duplicates): with pytest.raises(ValueError, match="cannot contain duplicate values"): mask_geopandas(geodataframe_duplicates, lon, lat, numbers="numbers")
def test_mask_geopandas_missing_error(geodataframe_missing): with pytest.raises(ValueError, match="cannot contain missing values"): mask_geopandas(geodataframe_missing, lon, lat, numbers="numbers")
def test_mask_geopandas_wrong_numbers(geodataframe_clean): with pytest.raises(KeyError): mask_geopandas(geodataframe_clean, lon, lat, numbers="not_a_column")
def test_mask_geopandas_raises_legacy(geodataframe_clean): with pytest.raises(ValueError, match="method 'legacy' not supported"): mask_geopandas(geodataframe_clean, lon, lat, method="legacy")
def test_mask_geopandas_wrong_input(): with pytest.raises(TypeError, match="'GeoDataFrame' or 'GeoSeries'"): mask_geopandas(None, lon, lat)
ax.boxplot(data2, labels=titles, showmeans=True, meanline=True) ax.grid(False) ax.set_ylabel('Optimal lag (days)') ax.set_xlabel('Pollutant') plt.savefig('figs/boxplot.png', dpi=600, bbox_inches='tight') plt.close() # ---- Plotting ---- # import regionmask import geopandas as gp lon = np.arange(-130, -60, .1) lat = np.arange(25, 55, .1) shppath = 'QGis/cb_2018_us_county_5m/cb_2018_us_county_5m.shp' gp_shp = gp.read_file(shppath) mask = regionmask.mask_geopandas(gp_shp, lon, lat) da_p_values = mask.copy(data=np.zeros(mask.shape)) da_p_values_pm25 = da_p_values.where(da_p_values != 0) # create nans da_p_values_pm10 = da_p_values_pm25.copy() da_p_values_co = da_p_values_pm25.copy() da_p_values_no2 = da_p_values_pm25.copy() coords = zip(ds_total.longitude.values[0], ds_total.latitude.values[0], ds_total.location_name.values) for lon, lat, city in coords: county_id = mask.interp(lat=lat, lon=lon, method='nearest').values try: da_p_values_pm25 = da_p_values_pm25.where( mask != county_id, dsss['p-value'].sel(City=city, Variable='PM25').values) da_p_values_pm10 = da_p_values_pm10.where(