def __init__(self, *args, local_crs: int = None, x_col: str = None, y_col: str = None, z_col: str = None, time_col: str = None, time_sort: bool = True, **kwargs) -> '_GpsBase': # Get data data = kwargs.get("data", None) if data is None and len(args) > 0: data = args[0] if isinstance(data, (_GpsBase, pd.core.internals.managers.BlockManager)): is_base = True else: is_base = False # Create a GeoDataFrame df = gpd.GeoDataFrame(*args, **kwargs) # Set CRS if df.crs is None: try: df.crs = data.crs except AttributeError: df.crs = self._default_input_crs if not is_base: # Get default values x_col = x_col if x_col is not None else self._default_x_col y_col = y_col if y_col is not None else self._default_y_col z_col = z_col if z_col is not None else self._default_z_col time_col = time_col if time_col is not None else self._default_time_col # Format data self._format_data( df, x_col, y_col, z_col, time_col, time_sort, ) # Project data if local_crs is not None: self_crs = CRS(df.crs) local_crs = CRS(local_crs) if local_crs != self_crs: df.to_crs(local_crs, inplace=True) # Compute normalized data if not is_base and self._has_time: self._normalize_data(df) super(_GpsBase, self).__init__(df, crs=df.crs)
def _transform_to_coordinates(self, x, y): if self.file.crs == CRS("EPSG:4326"): return Point(y, x) transformer = Transformer.from_proj(self.file.crs, CRS("EPSG:4326")) lat, lon = transformer.transform(x, y) lon = abs(lon + 90) if abs(lon - 90) > abs(lon - 180): if abs(lon - 180) > abs(lon - 270): lon = abs(270 - lon) + 90 else: lon = lon - 90 return Point(lat, lon)
def test_bound_crs_crs__from_methods(): crs_str = "+proj=latlon +towgs84=0,0,0" with pytest.raises(CRSError, match="Invalid type"): BoundCRS.from_epsg(4326) assert_maker_inheritance_valid(BoundCRS.from_string(crs_str), BoundCRS) assert_maker_inheritance_valid(BoundCRS.from_proj4(crs_str), BoundCRS) assert_maker_inheritance_valid( BoundCRS.from_user_input(BoundCRS.from_string(crs_str)), BoundCRS) assert_maker_inheritance_valid(BoundCRS.from_json(CRS(crs_str).to_json()), BoundCRS) assert_maker_inheritance_valid( BoundCRS.from_json_dict(CRS(crs_str).to_json_dict()), BoundCRS)
def test_vertical_crs__from_methods(): assert_maker_inheritance_valid(VerticalCRS.from_epsg(5703), VerticalCRS) assert_maker_inheritance_valid(VerticalCRS.from_string("EPSG:5703"), VerticalCRS) with pytest.raises(CRSError, match="Invalid type"): VerticalCRS.from_proj4("+proj=latlon") assert_maker_inheritance_valid( VerticalCRS.from_user_input(VerticalCRS.from_string("EPSG:5703")), VerticalCRS) assert_maker_inheritance_valid(VerticalCRS.from_json(CRS(5703).to_json()), VerticalCRS) assert_maker_inheritance_valid( VerticalCRS.from_json_dict(CRS(5703).to_json_dict()), VerticalCRS)
def test_projected_crs__from_methods(): assert_maker_inheritance_valid(ProjectedCRS.from_epsg(6933), ProjectedCRS) assert_maker_inheritance_valid(ProjectedCRS.from_string("EPSG:6933"), ProjectedCRS) assert_maker_inheritance_valid( ProjectedCRS.from_proj4("+proj=aea +lat_1=1"), ProjectedCRS) assert_maker_inheritance_valid( ProjectedCRS.from_user_input(ProjectedCRS.from_string("EPSG:6933")), ProjectedCRS, ) assert_maker_inheritance_valid(ProjectedCRS.from_json(CRS(6933).to_json()), ProjectedCRS) assert_maker_inheritance_valid( ProjectedCRS.from_json_dict(CRS(6933).to_json_dict()), ProjectedCRS) with pytest.raises(CRSError, match="Invalid type"): ProjectedCRS.from_epsg(4326)
def _proj4_str_to_dict(self, proj4_string): """ Converts PROJ4 compatible string to dictionary. Parameters ---------- proj4_string : str PROJ4 parameters as a string (e.g., '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'). Returns ------- dict Dictionary containing PROJ4 parameters. Notes ----- Key only parameters will be assigned a value of `True`. EPSG codes should be provided as "EPSG:XXXX" where "XXXX" is the EPSG code number. It can also be provided as "+init=EPSG:XXXX" as long as the underlying PROJ library supports it (deprecated in PROJ 6.0+). """ # convert EPSG codes to equivalent PROJ4 string definition if proj4_string.lower().startswith('epsg:'): crs = CRS(proj4_string) return crs.to_dict() else: proj4_pairs = (x.split('=', 1) for x in proj4_string.replace('+', '').split(" ")) return self.__convert_proj4_pairs_to_dict(proj4_pairs)
def test_geographic_crs__from_methods(): assert_maker_inheritance_valid(GeographicCRS.from_epsg(4326), GeographicCRS) assert_maker_inheritance_valid(GeographicCRS.from_string("EPSG:4326"), GeographicCRS) assert_maker_inheritance_valid(GeographicCRS.from_proj4("+proj=latlon"), GeographicCRS) assert_maker_inheritance_valid( GeographicCRS.from_user_input(GeographicCRS.from_string("EPSG:4326")), GeographicCRS, ) assert_maker_inheritance_valid( GeographicCRS.from_json(CRS(4326).to_json()), GeographicCRS) assert_maker_inheritance_valid( GeographicCRS.from_json_dict(CRS(4326).to_json_dict()), GeographicCRS) with pytest.raises(CRSError, match="Invalid type"): GeographicCRS.from_epsg(6933)
def split_linestring_df(df: Union[pd.DataFrame, gpd.GeoDataFrame], max_length: float, pool: Pool = None, **kwargs) -> gpd.GeoDataFrame: """ Splits the LineString existing in a data frame based on the provided `max_length`. All the other columns are retpeated untouched. :param df: The input `DataFrame` or `GeoDataFrame` :param max_length: The maximum length that each LineString is allowed to have. Has not effect on other geometry t types. :param pool: A pool of parallel workers to speed up processing large data frames. :param kwargs: Extra keywords controlling the behavior of this function. Currently available keywords are: - 'length_epsg`: the EPSG that the max_length is provided. The default is EPSG:3857 hence, the `max_length` is assumed to be meter. - `geom_field`: The name of the column containing the geometry. Default is 'geometry'. - `part_id_field`: The name of the columns to be added to the output which identifies different parts of the same geometry. Default is `part_id`. :return: a new `GeoDataFrame` where LineString or MultiLineStrings do not exceed the `max_length` """ length_epsg = kwargs.get('length_epsg', 3857) length_crs = CRS(length_epsg) original_crs = None if length_crs.name == df.crs.name else df.crs df = df if length_crs.name == df.crs.name else df.to_crs(length_crs) geom_field = kwargs.get('geom_field', 'geometry') split_geometry = split_linestring(geometry=df[geom_field], max_length=max_length, pool=pool) part_id_field = kwargs.get('part_id_field', 'part_id') output = pd.concat([ gpd.GeoDataFrame( data={ key: ( [row[1][key]] * len(split_geometry[row[0]]) \ if key != part_id_field \ else list(range(len(split_geometry[row[0]]))) ) for key in list(row[1].keys()) + [part_id_field] if key != geom_field }, geometry=split_geometry[row[0]] ) for row in df.iterrows() ]) output.reset_index(drop=True, inplace=True) output.crs = length_crs if original_crs is not None: output = output.to_crs(original_crs) return output
def test_derived_geographic_crs__from_methods(): crs_str = "+proj=ob_tran +o_proj=longlat +o_lat_p=0 +o_lon_p=0 +lon_0=0" with pytest.raises(CRSError, match="Invalid type Geographic 2D CRS"): DerivedGeographicCRS.from_epsg(4326) assert_maker_inheritance_valid(DerivedGeographicCRS.from_string(crs_str), DerivedGeographicCRS) assert_maker_inheritance_valid(DerivedGeographicCRS.from_proj4(crs_str), DerivedGeographicCRS) assert_maker_inheritance_valid( DerivedGeographicCRS.from_user_input( DerivedGeographicCRS.from_string(crs_str)), DerivedGeographicCRS, ) assert_maker_inheritance_valid( DerivedGeographicCRS.from_json(CRS(crs_str).to_json()), DerivedGeographicCRS) assert_maker_inheritance_valid( DerivedGeographicCRS.from_json_dict(CRS(crs_str).to_json_dict()), DerivedGeographicCRS, )
def _is_longlat(crs): '''Test if CRS is in lat/long coordinates''' try: return crs['proj'] == 'longlat' except (KeyError, TypeError, AttributeError): pass projection = CRS(crs) try: return projection.is_geographic except AttributeError: return crs.to_dict().get('proj') == 'longlat'
def proj4_radius_parameters(proj4_dict): """Calculate 'a' and 'b' radius parameters. Arguments: proj4_dict (str or dict): PROJ.4 parameters Returns: a (float), b (float): equatorial and polar radius """ if CRS is not None: import math crs = CRS(proj4_dict) a = crs.ellipsoid.semi_major_metre b = crs.ellipsoid.semi_minor_metre if not math.isnan(b): return a, b # older versions of pyproj didn't always have a valid minor radius proj4_dict = crs.to_dict() if isinstance(proj4_dict, str): new_info = proj4_str_to_dict(proj4_dict) else: new_info = proj4_dict.copy() # load information from PROJ.4 about the ellipsis if possible from pyproj import Geod if 'ellps' in new_info: geod = Geod(**new_info) new_info['a'] = geod.a new_info['b'] = geod.b elif 'a' not in new_info or 'b' not in new_info: if 'rf' in new_info and 'f' not in new_info: new_info['f'] = 1. / float(new_info['rf']) if 'a' in new_info and 'f' in new_info: new_info['b'] = float(new_info['a']) * (1 - float(new_info['f'])) elif 'b' in new_info and 'f' in new_info: new_info['a'] = float(new_info['b']) / (1 - float(new_info['f'])) elif 'R' in new_info: new_info['a'] = new_info['R'] new_info['b'] = new_info['R'] else: geod = Geod(**{'ellps': 'WGS84'}) new_info['a'] = geod.a new_info['b'] = geod.b return float(new_info['a']), float(new_info['b'])
def find_inlet_nodes(nodes, inlets_shp, gdobj): """ Load inlets from a shapefile. Loads the user-defined inlet nodes point shapefile and uses it to identify the inlet nodes within the network. Parameters ---------- links : dict stores the network's links and their properties inlets_shp : str path to the shapefile of inlet locations (point shapefile) gdobj : osgeo.gdal.Dataset gdal object corresponding to the georeferenced input binary channel mask Returns ------- nodes : dict nodes dictionary with 'inlets' key containing list of inlet node ids """ # Check that CRSs match; reproject inlet points if not inlets_gpd = gpd.read_file(inlets_shp) mask_crs = CRS(gdobj.GetProjection()) if inlets_gpd.crs != mask_crs: inlets_gpd = inlets_gpd.to_crs(mask_crs) logger.info( 'Provided inlet points file does not have the same CRS as provided mask. Reprojecting.' ) # Convert all nodes to xy coordinates for distance search nodes_xy = gu.idx_to_coords(nodes['idx'], gdobj) # Map provided inlet nodes to actual network nodes inlets = [] for inlet_geom in inlets_gpd.geometry.values: # Distances between inlet node and all nodes in network xy = inlet_geom.xy dists = np.sqrt((xy[0][0] - nodes_xy[0])**2 + (xy[1][0] - nodes_xy[1])**2) inlets.append(nodes['id'][np.argmin(dists)]) # Append inlets to nodes dict nodes['inlets'] = inlets return nodes
def _proj4_str_to_dict(self, proj4_str): """Convert PROJ.4 compatible string definition to dict EPSG codes should be provided as "EPSG:XXXX" where "XXXX" is the EPSG number code. It can also be provided as ``"+init=EPSG:XXXX"`` as long as the underlying PROJ library supports it (deprecated in PROJ 6.0+). Note: Key only parameters will be assigned a value of `True`. """ # TODO: @shahn I would got for one solution, i.e. PyProj > 2.2 # TODO: test if this can be simplified # # convert EPSG codes to equivalent PROJ4 string definition if proj4_str.startswith('EPSG:'): crs = CRS(proj4_str) return crs.to_dict() else: proj4_pairs = (x.split('=', 1) for x in proj4_str.replace('+', '').split(" ")) return self.__convert_proj4_pairs_to_dict(proj4_pairs)
def _check_crs_compatibility(shape_crs: CRS, raster_crs: CRS): """If CRS definitions are not WGS84 or incompatible, raise operation warnings.""" wgs84 = CRS(4326) if not shape_crs.equals(raster_crs): if ("lon_wrap" in raster_crs.to_string() and "lon_wrap" not in shape_crs.to_string()): warnings.warn( "CRS definitions are similar but raster lon values must be wrapped.", UserWarning, stacklevel=3, ) elif not shape_crs.equals(wgs84) and not raster_crs.equals(wgs84): warnings.warn( "CRS definitions are not similar or both not using WGS84 datum. Tread with caution.", UserWarning, stacklevel=3, )
def proj4_str_to_dict(proj4_str): """Convert PROJ.4 compatible string definition to dict EPSG codes should be provided as "EPSG:XXXX" where "XXXX" is the EPSG number code. It can also be provided as ``"+init=EPSG:XXXX"`` as long as the underlying PROJ library supports it (deprecated in PROJ 6.0+). Note: Key only parameters will be assigned a value of `True`. """ # convert EPSG codes to equivalent PROJ4 string definition if proj4_str.startswith('EPSG:') and CRS is not None: crs = CRS(proj4_str) if hasattr(crs, 'to_dict'): # pyproj 2.2+ return crs.to_dict() proj4_str = crs.to_proj4() elif proj4_str.startswith('EPSG:'): # legacy +init= PROJ4 string and no pyproj 2.0+ to help convert proj4_str = "+init={}".format(proj4_str) pairs = (x.split('=', 1) for x in proj4_str.replace('+', '').split(" ")) return convert_proj_floats(pairs)
def __init__( self, projparams: Any = None, preserve_units: bool = True, network=None, **kwargs, ) -> None: """ A Proj class instance is initialized with proj map projection control parameter key/value pairs. The key/value pairs can either be passed in a dictionary, or as keyword arguments, or as a PROJ string (compatible with the proj command). See https://proj.org/operations/projections/index.html for examples of key/value pairs defining different map projections. .. versionadded:: 3.0.0 network Parameters ---------- projparams: int, str, dict, pyproj.CRS A PROJ or WKT string, PROJ dict, EPSG integer, or a pyproj.CRS instance. preserve_units: bool If false, will ensure +units=m. network: bool, optional Default is None, which uses the system defaults for networking. If True, it will force the use of network for grids regardless of any other network setting. If False, it will force disable use of network for grids regardless of any other network setting. **kwargs: PROJ projection parameters. Example usage: >>> from pyproj import Proj >>> p = Proj(proj='utm',zone=10,ellps='WGS84', preserve_units=False) >>> x,y = p(-120.108, 34.36116666) >>> 'x=%9.3f y=%11.3f' % (x,y) 'x=765975.641 y=3805993.134' >>> 'lon=%8.3f lat=%5.3f' % p(x,y,inverse=True) 'lon=-120.108 lat=34.361' >>> # do 3 cities at a time in a tuple (Fresno, LA, SF) >>> lons = (-119.72,-118.40,-122.38) >>> lats = (36.77, 33.93, 37.62 ) >>> x,y = p(lons, lats) >>> 'x: %9.3f %9.3f %9.3f' % x 'x: 792763.863 925321.537 554714.301' >>> 'y: %9.3f %9.3f %9.3f' % y 'y: 4074377.617 3763936.941 4163835.303' >>> lons, lats = p(x, y, inverse=True) # inverse transform >>> 'lons: %8.3f %8.3f %8.3f' % lons 'lons: -119.720 -118.400 -122.380' >>> 'lats: %8.3f %8.3f %8.3f' % lats 'lats: 36.770 33.930 37.620' >>> p2 = Proj('+proj=utm +zone=10 +ellps=WGS84', preserve_units=False) >>> x,y = p2(-120.108, 34.36116666) >>> 'x=%9.3f y=%11.3f' % (x,y) 'x=765975.641 y=3805993.134' >>> p = Proj("epsg:32667", preserve_units=False) >>> 'x=%12.3f y=%12.3f (meters)' % p(-114.057222, 51.045) 'x=-1783506.250 y= 6193827.033 (meters)' >>> p = Proj("epsg:32667") >>> 'x=%12.3f y=%12.3f (feet)' % p(-114.057222, 51.045) 'x=-5851386.754 y=20320914.191 (feet)' >>> # test data with radian inputs >>> p1 = Proj("epsg:4214") >>> x1, y1 = p1(116.366, 39.867) >>> f'{x1:.3f} {y1:.3f}' '116.366 39.867' >>> x2, y2 = p1(x1, y1, inverse=True) >>> f'{x2:.3f} {y2:.3f}' '116.366 39.867' """ self.crs = CRS.from_user_input(projparams, **kwargs) # make sure units are meters if preserve_units is False. if not preserve_units and "foot" in self.crs.axis_info[0].unit_name: # ignore export to PROJ string deprecation warning with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "You will likely lose important projection information", UserWarning, ) projstring = self.crs.to_proj4(4) projstring = re.sub(r"\s\+units=[\w-]+", "", projstring) projstring += " +units=m" self.crs = CRS(projstring) # ignore export to PROJ string deprecation warning with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "You will likely lose important projection information", UserWarning, ) projstring = self.crs.to_proj4() or self.crs.srs self.srs = re.sub(r"\s\+?type=crs", "", projstring).strip() super().__init__( _Transformer.from_pipeline(cstrencode(self.srs), network=network) )
def subset_shape( ds: Union[xarray.DataArray, xarray.Dataset], shape: Union[str, Path, gpd.GeoDataFrame], vectorize: bool = True, raster_crs: Optional[Union[str, int]] = None, shape_crs: Optional[Union[str, int]] = None, buffer: Optional[Union[int, float]] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, ) -> Union[xarray.DataArray, xarray.Dataset]: """Subset a DataArray or Dataset spatially (and temporally) using a vector shape and date selection. Return a subset of a DataArray or Dataset for grid points falling within the area of a Polygon and/or MultiPolygon shape, or grid points along the path of a LineString and/or MultiLineString. Parameters ---------- ds : Union[xarray.DataArray, xarray.Dataset] Input values. shape : Union[str, Path, gpd.GeoDataFrame] Path to shape file, or directly a geodataframe. Supports formats compatible with geopandas. vectorize: bool Whether to use the spatialjoin or vectorize backend. raster_crs : Optional[Union[str, int]] EPSG number or PROJ4 string. shape_crs : Optional[Union[str, int]] EPSG number or PROJ4 string. buffer : Optional[Union[int, float]] Buffer the shape in order to select a larger region stemming from it. Units are based on the shape degrees/metres. start_date : Optional[str] Start date of the subset. Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d"). Defaults to first day of input data-array. end_date : Optional[str] End date of the subset. Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d"). Defaults to last day of input data-array. Returns ------- Union[xarray.DataArray, xarray.Dataset] A subset of `ds` Examples -------- >>> import xarray as xr # doctest: +SKIP >>> from xclim.subset import subset_shape # doctest: +SKIP >>> pr = xr.open_dataset(path_to_pr_file).pr # doctest: +SKIP ... # Subset data array by shape >>> prSub = subset_shape(pr, shape=path_to_shape_file) # doctest: +SKIP ... # Subset data array by shape and single year >>> prSub = subset_shape(pr, shape=path_to_shape_file, start_date='1990-01-01', end_date='1990-12-31') # doctest: +SKIP ... # Subset multiple variables in a single dataset >>> ds = xr.open_mfdataset([path_to_tasmin_file, path_to_tasmax_file]) # doctest: +SKIP >>> dsSub = subset_shape(ds, shape=path_to_shape_file) # doctest: +SKIP """ wgs84 = CRS(4326) # PROJ4 definition for WGS84 with longitudes ranged between -180/+180. wgs84_wrapped = CRS.from_string( "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs lon_wrap=180") if isinstance(ds, xarray.DataArray): ds_copy = ds._to_temp_dataset() else: ds_copy = ds.copy() if isinstance(shape, gpd.GeoDataFrame): poly = shape.copy() else: poly = gpd.GeoDataFrame.from_file(shape) if buffer is not None: poly.geometry = poly.buffer(buffer) # Get the shape's bounding box. minx, miny, maxx, maxy = poly.total_bounds lon_bnds = (minx, maxx) lat_bnds = (miny, maxy) # If polygon doesn't cross prime meridian, subset bbox first to reduce processing time # Only case not implemented is when lon_bnds cross the 0 deg meridian but dataset grid has all positive lons try: ds_copy = subset_bbox(ds_copy, lon_bnds=lon_bnds, lat_bnds=lat_bnds) except NotImplementedError: pass if ds_copy.lon.size == 0 or ds_copy.lat.size == 0: raise ValueError( "No grid cell centroids found within provided polygon bounding box. " 'Try using the "buffer" option to create an expanded area.') if start_date or end_date: ds_copy = subset_time(ds_copy, start_date=start_date, end_date=end_date) # Determine whether CRS types are the same between shape and raster if shape_crs is not None: try: shape_crs = CRS.from_user_input(shape_crs) except ValueError: raise else: shape_crs = CRS(poly.crs) wrap_lons = False if raster_crs is not None: try: raster_crs = CRS.from_user_input(raster_crs) except ValueError: raise else: if np.min(lat_bnds) < -90 or np.max(lat_bnds) > 90: raise ValueError( "Latitudes exceed domain of WGS84 coordinate system.") if np.min(lon_bnds) < -180 or np.max(lon_bnds) > 180: raise ValueError( "Longitudes exceed domain of WGS84 coordinate system.") try: # Extract CF-compliant CRS_WKT from crs variable. raster_crs = CRS.from_cf(ds_copy.crs.attrs) except AttributeError: if np.min(ds_copy.lon) >= 0 and np.max(ds_copy.lon) <= 360: wrap_lons = True raster_crs = wgs84_wrapped else: raster_crs = wgs84 _check_crs_compatibility(shape_crs=shape_crs, raster_crs=raster_crs) # Create mask using the vectorize or spatial join methods. if vectorize: mask_2d = create_mask_vectorize(x_dim=ds_copy.lon, y_dim=ds_copy.lat, poly=poly, wrap_lons=wrap_lons) else: mask_2d = create_mask(x_dim=ds_copy.lon, y_dim=ds_copy.lat, poly=poly, wrap_lons=wrap_lons) if np.all(mask_2d.isnull()): raise ValueError( f"No grid cell centroids found within provided polygon bounds ({poly.bounds}). " 'Try using the "buffer" option to create an expanded areas or verify polygon.' ) # loop through variables for v in ds_copy.data_vars: if set.issubset(set(mask_2d.dims), set(ds_copy[v].dims)): ds_copy[v] = ds_copy[v].where(mask_2d.notnull()) # Remove coordinates where all values are outside of region mask for dim in mask_2d.dims: mask_2d = mask_2d.dropna(dim, how="all") ds_copy = ds_copy.sel({dim: mask_2d[dim] for dim in mask_2d.dims}) # Add a CRS definition using CF conventions and as a global attribute in CRS_WKT for reference purposes ds_copy.attrs["crs"] = raster_crs.to_string() ds_copy["crs"] = 1 ds_copy["crs"].attrs.update(raster_crs.to_cf()) for v in ds_copy.variables: if {"lat", "lon"}.issubset(set(ds_copy[v].dims)): ds_copy[v].attrs["grid_mapping"] = "crs" if isinstance(ds, xarray.DataArray): return ds._from_temp_dataset(ds_copy) return ds_copy
def create_mask( *, x_dim: xarray.DataArray = None, y_dim: xarray.DataArray = None, poly: gpd.GeoDataFrame = None, wrap_lons: bool = False, check_overlap: bool = False, ): """Create a mask with values corresponding to the features in a GeoDataFrame using spatial join methods. The returned mask's points have the value of the first geometry of `poly` they fall in. Parameters ---------- x_dim : xarray.DataArray X or longitudinal dimension of xarray object. y_dim : xarray.DataArray Y or latitudinal dimension of xarray object. poly : gpd.GeoDataFrame GeoDataFrame used to create the xarray.DataArray mask. wrap_lons : bool Shift vector longitudes by -180,180 degrees to 0,360 degrees; Default = False check_overlap: bool Perform a check to verify if shapes contain overlapping geometries. Returns ------- xarray.DataArray Examples -------- >>> import xarray as xr # doctest: +SKIP >>> import geopandas as gpd # doctest: +SKIP >>> from xclim.subset import create_mask # doctest: +SKIP >>> ds = xr.open_dataset(path_to_tasmin_file) # doctest: +SKIP >>> polys = gpd.read_file(path_to_multi_shape_file) # doctest: +SKIP ... # Get a mask from all polygons in the shape file >>> mask = create_mask(x_dim=ds.lon, y_dim=ds.lat, poly=polys) # doctest: +SKIP >>> ds = ds.assign_coords(regions=mask) # doctest: +SKIP ... # Operations can be applied to each regions with `groupby`. Ex: >>> ds = ds.groupby('regions').mean() # doctest: +SKIP ... # Extra step to retrieve the names of those polygons stored in the "id" column >>> region_names = xr.DataArray(polys.id, dims=('regions',)) # doctest: +SKIP >>> ds = ds.assign_coords(regions_names=region_names) # doctest: +SKIP """ wgs84 = CRS(4326) if check_overlap: _check_has_overlaps(polygons=poly) if wrap_lons: warnings.warn("Wrapping longitudes at 180 degrees.") if len(x_dim.shape) == 1 & len(y_dim.shape) == 1: # create a 2d grid of lon, lat values lon1, lat1 = np.meshgrid(np.asarray(x_dim.values), np.asarray(y_dim.values), indexing="ij") dims_out = x_dim.dims + y_dim.dims coords_out = dict() coords_out[dims_out[0]] = x_dim.values coords_out[dims_out[1]] = y_dim.values else: lon1 = x_dim.values lat1 = y_dim.values dims_out = x_dim.dims coords_out = x_dim.coords # create pandas Dataframe from NetCDF lat and lon points df = pd.DataFrame({ "id": np.arange(0, lon1.size), "lon": lon1.flatten(), "lat": lat1.flatten() }) df["Coordinates"] = list(zip(df.lon, df.lat)) df["Coordinates"] = df["Coordinates"].apply(Point) # create GeoDataFrame (spatially referenced with shifted longitude values if needed). if wrap_lons: wgs84 = CRS.from_string( "+proj=longlat +datum=WGS84 +no_defs +type=crs +lon_wrap=180") gdf_points = gpd.GeoDataFrame(df, geometry="Coordinates", crs=wgs84) # spatial join geodata points with region polygons and remove duplicates point_in_poly = gpd.tools.sjoin(gdf_points, poly, how="left", op="intersects") point_in_poly = point_in_poly.loc[~point_in_poly.index.duplicated( keep="first")] # extract polygon ids for points mask = point_in_poly["index_right"] mask_2d = np.array(mask).reshape(lat1.shape[0], lat1.shape[1]) mask_2d = xarray.DataArray(mask_2d, dims=dims_out, coords=coords_out) return mask_2d
def func_checker(*args, **kwargs): """ Split and reproject polygon vectors in a GeoDataFrame whose values cross the Greenwich Meridian. Begins by examining whether the geometry bounds the supplied cross longitude = 0 and if so, proceeds to split the polygons at the meridian into new polygons and erase a small buffer to prevent invalid geometries when transforming the lons from WGS84 to WGS84 +lon_wrap=180 (longitudes from 0 to 360). Returns a GeoDataFrame with the new features in a wrap_lon WGS84 projection if needed. """ try: poly = kwargs["poly"] x_dim = kwargs["x_dim"] wrap_lons = kwargs["wrap_lons"] except KeyError: return func(*args, **kwargs) if wrap_lons: if (np.min(x_dim) < 0 and np.max(x_dim) >= 360) or (np.min(x_dim) < -180 and np.max >= 180): # TODO: This should raise an exception, right? warnings.warn( "DataArray doesn't seem to be using lons between 0 and 360 degrees or between -180 and 180 degrees." " Tread with caution.", UserWarning, stacklevel=4, ) split_flag = False for index, feature in poly.iterrows(): if (feature.geometry.bounds[0] < 0) and (feature.geometry.bounds[2] > 0): split_flag = True warnings.warn( "Geometry crosses the Greenwich Meridian. Proceeding to split polygon at Greenwich." " This feature is experimental. Output might not be accurate.", UserWarning, stacklevel=4, ) # Create a meridian line at Greenwich, split polygons at this line and erase a buffer line if isinstance(feature.geometry, MultiPolygon): union = MultiPolygon(cascaded_union(feature.geometry)) else: union = Polygon(cascaded_union(feature.geometry)) meridian = LineString([Point(0, 90), Point(0, -90)]) buffered = meridian.buffer(0.000000001) split_polygons = split(union, meridian) buffered_split_polygons = [ feat.difference(buffered) for feat in split_polygons ] # Cannot assign iterable with `at` (pydata/pandas#26333) so a small hack: # Load split features into a new GeoDataFrame with WGS84 CRS split_gdf = gpd.GeoDataFrame( geometry=[cascaded_union(buffered_split_polygons)], crs=CRS(4326), ) poly.at[[index], "geometry"] = split_gdf.geometry.values # Reproject features in WGS84 CSR to use 0 to 360 as longitudinal values wrapped_lons = CRS.from_string( "+proj=longlat +ellps=WGS84 +lon_wrap=180 +datum=WGS84 +no_defs" ) poly = poly.to_crs(crs=wrapped_lons) if split_flag: warnings.warn( "Rebuffering split polygons to ensure edge inclusion in selection.", UserWarning, stacklevel=4, ) poly = gpd.GeoDataFrame(poly.buffer(0.000000001), columns=["geometry"]) poly.crs = wrapped_lons kwargs["poly"] = poly return func(*args, **kwargs)
def __init__(self, projparams=None, preserve_units=True, **kwargs): """ initialize a Proj class instance. See the PROJ documentation (https://proj.org) for more information about projection parameters. Parameters ---------- projparams: int, str, dict, pyproj.CRS A PROJ or WKT string, PROJ dict, EPSG integer, or a pyproj.CRS instnace. preserve_units: bool If false, will ensure +units=m. **kwargs: PROJ projection parameters. Example usage: >>> from pyproj import Proj >>> p = Proj(proj='utm',zone=10,ellps='WGS84', preserve_units=False) >>> x,y = p(-120.108, 34.36116666) >>> 'x=%9.3f y=%11.3f' % (x,y) 'x=765975.641 y=3805993.134' >>> 'lon=%8.3f lat=%5.3f' % p(x,y,inverse=True) 'lon=-120.108 lat=34.361' >>> # do 3 cities at a time in a tuple (Fresno, LA, SF) >>> lons = (-119.72,-118.40,-122.38) >>> lats = (36.77, 33.93, 37.62 ) >>> x,y = p(lons, lats) >>> 'x: %9.3f %9.3f %9.3f' % x 'x: 792763.863 925321.537 554714.301' >>> 'y: %9.3f %9.3f %9.3f' % y 'y: 4074377.617 3763936.941 4163835.303' >>> lons, lats = p(x, y, inverse=True) # inverse transform >>> 'lons: %8.3f %8.3f %8.3f' % lons 'lons: -119.720 -118.400 -122.380' >>> 'lats: %8.3f %8.3f %8.3f' % lats 'lats: 36.770 33.930 37.620' >>> p2 = Proj('+proj=utm +zone=10 +ellps=WGS84', preserve_units=False) >>> x,y = p2(-120.108, 34.36116666) >>> 'x=%9.3f y=%11.3f' % (x,y) 'x=765975.641 y=3805993.134' >>> p = Proj(init="epsg:32667", preserve_units=False) >>> 'x=%12.3f y=%12.3f (meters)' % p(-114.057222, 51.045) 'x=-1783506.250 y= 6193827.033 (meters)' >>> p = Proj("+init=epsg:32667") >>> 'x=%12.3f y=%12.3f (feet)' % p(-114.057222, 51.045) 'x=-5851386.754 y=20320914.191 (feet)' >>> # test data with radian inputs >>> p1 = Proj(init="epsg:4214") >>> x1, y1 = p1(116.366, 39.867) >>> '{:.3f} {:.3f}'.format(x1, y1) '2.031 0.696' >>> x2, y2 = p1(x1, y1, inverse=True) >>> '{:.3f} {:.3f}'.format(x2, y2) '116.366 39.867' """ self.crs = CRS.from_user_input( projparams if projparams is not None else kwargs) # make sure units are meters if preserve_units is False. if not preserve_units and "foot" in self.crs.axis_info[0].unit_name: projstring = self.crs.to_proj4(4) projstring = re.sub(r"\s\+units=[\w-]+", "", projstring) projstring += " +units=m" self.crs = CRS(projstring) projstring = self.crs.to_proj4() or self.crs.srs projstring = re.sub(r"\s\+?type=crs", "", projstring) super(Proj, self).__init__(cstrencode(projstring.strip()))
def __init__(self, name, path_to_mask, results_folder=None, exit_sides=None, verbose=False): """ Initializes a channelnetwork class. Parameters ---------- name : str The name of the channel network; also defines the folder name for storing results. path_to_mask : str Points to the channel network mask file path results_folder : str, optional Specifies a directory where results should be stored exit_sides : str, optional Only required for river channel netowrks. A two-character string (from N, E, S, or W) that denotes which sides of the image the river intersects (upstream first) -- e.g. 'NS', 'EW', 'NW', etc. verbose : bool, optional If True, print run information and warnings to the console, default is False. Attributes ---------- name : str the name of the channel network, usually the river or delta's name verbose : bool, optional (False by default) True or False to specify if processing updates should be printed. d : osgeo.gdal.Dataset object created by gdal.Open() that provides access to geotiff metadata mask_path : str filepath to the input binary channel network mask imshape : tuple dimensions of the image (rows, cols) gt : tuple gdal-type Geotransform of the input mask geotiff wkt : str well known text representation of coordinate reference system of input mask geotiff epsg: int epsg code of the coordinate reference system of input mask geotiff unit: str units of the coordinate reference system; typically 'degree' or 'meter' pixarea: int or float area of each pixel, in units of 'unit' pixlen: int or float length of each pixel, assumes sides are equal-length paths: dict dictionary of strings for managing where files should be read/written exit_sides: str two-character string denoting which sides of the image the channel network intersects (N,E,S, and/or W). Upstream side should be given first. Imask: numpy.ndarray binary mask found at mask_path loaded into a numpy array via `gdal.Open().ReadAsArray()`, dtype=np.bool links: dict Stores the links of the network and associated properties nodes: dict Stores the nodes of the network and associated properties Idist: numpy.ndarray image of the distance transform of the binary mask, dtype=np.float """ # Store some class attributes self.name = name self.verbose = verbose # Prepare paths for saving if results_folder is not None: self.paths = io.prepare_paths(results_folder, name, path_to_mask) else: self.paths = io.prepare_paths( os.path.dirname( os.path.abspath(path_to_mask)), name, path_to_mask) self.paths['input_mask'] = os.path.normpath(path_to_mask) # init logger - prints out to stdout if verbose is True # ALWAYS writes output to log file (doesn't print if verbose is False) self.init_logger() # Handle georeferencing # GA_Update required for setting dummy projection/geotransform self.gdobj = gdal.Open(self.paths['input_mask'], gdal.GA_Update) self.imshape = (self.gdobj.RasterYSize, self.gdobj.RasterXSize) # Create dummy georeferencing if none is supplied if self.gdobj.GetProjection() == '': logger.info('Input mask is unprojected; assigning a dummy projection.') # Creates a dummy projection in EPSG:4326 with UL coordinates (0,0) # and pixel resolution = 1. self.wkt = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.01745329251994328,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]' # 4326 self.gdobj.SetProjection(self.wkt) self.gdobj.SetGeoTransform((0, 1, 0, self.imshape[1], 0, -1)) else: self.wkt = self.gdobj.GetProjection() self.gt = self.gdobj.GetGeoTransform() # Store crs as pyproj CRS object for interacting with geopandas self.crs = CRS(self.gdobj.GetProjection()) self.unit = gu.get_unit(self.crs) self.pixarea = abs(self.gt[1] * self.gt[5]) self.pixlen = abs(self.gt[1]) # Save exit sides if exit_sides is not None: self.exit_sides = exit_sides.lower() # Load mask into memory self.Imask = self.gdobj.ReadAsArray()
def test_crs_to_json_dict(): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) json_dict = aeqd_crs.to_json_dict() assert json_dict["type"] == "ProjectedCRS"
def test_coordinate_system_from_json(): # separate test from other properties due to # https://github.com/OSGeo/PROJ/issues/1818 aeqd_cs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5).coordinate_system assert CoordinateSystem.from_json(aeqd_cs.to_json()) == aeqd_cs
def test_crs_to_json(): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) json_data = aeqd_crs.to_json() assert "ProjectedCRS" in json_data assert "\n" not in json_data
def test_crs_to_json__pretty__indenation(): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) json_data = aeqd_crs.to_json(pretty=True, indentation=4) assert "ProjectedCRS" in json_data assert json_data.startswith('{\n "')
def test_crs_from_json_dict(): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) assert CRS.from_json_dict(aeqd_crs.to_json_dict()) == aeqd_crs
def test_properties_to_json(property_name, expected_type): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) json_data = getattr(aeqd_crs, property_name).to_json() assert expected_type in json_data assert "\n" not in json_data
def _transform_to_meters(self, coordinate): transformer = Transformer.from_crs(CRS("EPSG:4326"), self.file.crs) x, y = transformer.transform(coordinate.latitude, coordinate.longitude) if self.file.crs != CRS("EPSG:4326"): return y, x return x, y
def test_properties_to_json__pretty__indentation(property_name, expected_type): aeqd_crs = CRS(proj="aeqd", lon_0=-80, lat_0=40.5) json_data = getattr(aeqd_crs, property_name).to_json(pretty=True, indentation=4) assert expected_type in json_data assert json_data.startswith('{\n "')
def zonalstats_raster_file( stats: dict, working_dir: str = None, raster_compression: str = RASTERIO_TIFF_COMPRESSION, data_type: str = None, crs: str = None, zip_archive: bool = False, ) -> Union[str, List[Path]]: """ Extract the zonalstats grid(s) to a zipped GeoTIFF file and ensure that it is projected with specified CRS. Parameters ---------- stats : dict The dictionary produced by the rasterstats `zonalstats` function. working_dir : str The working directory. raster_compression : str The type of compression used on the raster file (default: 'lzw'). data_type : str The data encoding of the raster used to write the grid (e.g. 'int16'). crs : str The coordinate reference system. zip_archive: bool Return the files as a zipped archive (default: False). Returns ------- Union[str, List[Path]] """ out_dir = Path(working_dir).joinpath("output") out_dir.mkdir(exist_ok=True) crs = CRS(crs) for i in range(len(stats)): fn = f"subset_{i + 1}.tiff" raster_subset = Path(out_dir).joinpath(fn) try: raster_location = stats[i] raster = raster_location["mini_raster_array"] grid_properties = raster_location["mini_raster_affine"][0:6] nodata = raster_location["mini_raster_nodata"] aff = Affine(*grid_properties) LOGGER.info(f"Writing raster data to {raster_subset}") masked_array = np.ma.masked_values(raster, nodata) if masked_array.mask.all(): msg = f"Subset {i} is empty, continuing..." LOGGER.warning(msg) normal_array = np.asarray(masked_array, dtype=data_type) # Write to GeoTIFF with rasterio.open( raster_subset, "w", driver="GTiff", count=1, compress=raster_compression, height=raster.shape[0], width=raster.shape[1], dtype=data_type, transform=aff, crs=crs, nodata=nodata, ) as f: f.write(normal_array, 1) except Exception as e: msg = f"Failed to write raster outputs: {e}" LOGGER.error(msg) raise Exception(msg) # `shutil.make_archive` could potentially cause problems with multi-thread? Worth investigating later. if zip_archive: foldername = f"subset_{''.join(choice(ascii_letters) for _ in range(10))}" out_fn = Path(working_dir).joinpath(foldername) shutil.make_archive(base_name=out_fn, format="zip", root_dir=out_dir, logger=LOGGER) return f"{out_fn}.zip" else: return [f for f in out_dir.glob("*")]