def test_index_scalar(self): # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(['foo', 'bar'])) assert np.array(x[B[0]][B[()]]) == 'foo'
def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) wrapped[B[:]] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10))
def test_setitem(self): original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) wrapped[:] = 0 self.assertArrayEqual(original, np.arange(10)) self.assertArrayEqual(wrapped, np.zeros(10))
def open_rasterio( filename, parse_coordinates=None, chunks=None, cache=None, lock=None, masked=False, mask_and_scale=False, variable=None, group=None, default_name=None, decode_times=True, decode_timedelta=None, **open_kwargs, ): # pylint: disable=too-many-statements,too-many-locals,too-many-branches """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). Parameters ---------- filename: str, rasterio.io.DatasetReader, or rasterio.vrt.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates: bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks: int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. Chunks can also be set to ``True`` or ``"auto"`` to choose sensible chunk sizes according to ``dask.config.get("array.chunk-size")``. cache: bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock: bool or dask.utils.SerializableLock, optional If chunks is provided, this argument is used to ensure that only one thread per process is reading from a rasterio file object at a time. By default and when a lock instance is provided, a :class:`xarray.backends.CachingFileManager` is used to cache File objects. Since rasterio also caches some data, this will make repeated reads from the same object fast. When ``lock=False``, no lock is used, allowing for completely parallel reads from multiple threads or processes. However, a new file handle is opened on each request. masked: bool, optional If True, read the mask and set values to NaN. Defaults to False. mask_and_scale: bool, optional Lazily scale (using the `scales` and `offsets` from rasterio) and mask. If the _Unsigned attribute is present treat integer arrays as unsigned. variable: str or list or tuple, optional Variable name or names to use to filter loading. group: str or list or tuple, optional Group name or names to use to filter loading. default_name: str, optional The name of the data array if none exists. Default is None. decode_times: bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. decode_timedelta: bool, optional If True, decode variables and coordinates with time units in {“days”, “hours”, “minutes”, “seconds”, “milliseconds”, “microseconds”} into timedelta objects. If False, leave them encoded as numbers. If None (default), assume the same value of decode_time. **open_kwargs: kwargs, optional Optional keyword arguments to pass into rasterio.open(). Returns ------- :obj:`xarray.Dataset` | :obj:`xarray.DataArray` | List[:obj:`xarray.Dataset`]: The newly created dataset(s). """ parse_coordinates = True if parse_coordinates is None else parse_coordinates masked = masked or mask_and_scale vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( src_crs=vrt.src_crs.to_string(), crs=vrt.crs.to_string(), resampling=vrt.resampling, tolerance=vrt.tolerance, src_nodata=vrt.src_nodata, nodata=vrt.nodata, width=vrt.width, height=vrt.height, src_transform=vrt.src_transform, transform=vrt.transform, dtype=vrt.working_dtype, warp_extras=vrt.warp_extras, ) if lock in (True, None): lock = RASTERIO_LOCK elif lock is False: lock = NO_LOCK # ensure default for sharing is False # ref https://github.com/mapbox/rasterio/issues/1504 open_kwargs["sharing"] = open_kwargs.get("sharing", False) with warnings.catch_warnings(record=True) as rio_warnings: if lock is not NO_LOCK: manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs) else: manager = URIManager(rasterio.open, filename, mode="r", kwargs=open_kwargs) riods = manager.acquire() captured_warnings = rio_warnings.copy() riods = manager.acquire() captured_warnings = rio_warnings.copy() # raise the NotGeoreferencedWarning if applicable for rio_warning in captured_warnings: if not riods.subdatasets or not isinstance(rio_warning.message, NotGeoreferencedWarning): warnings.warn(str(rio_warning.message), type(rio_warning.message)) # open the subdatasets if they exist if riods.subdatasets: return _load_subdatasets( riods=riods, group=group, variable=variable, parse_coordinates=parse_coordinates, chunks=chunks, cache=cache, lock=lock, masked=masked, mask_and_scale=mask_and_scale, decode_times=decode_times, decode_timedelta=decode_timedelta, **open_kwargs, ) if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None # Get bands if riods.count < 1: raise ValueError("Unknown dims") # parse tags & load alternate coords attrs = _get_rasterio_attrs(riods=riods) coords = _load_netcdf_1d_coords(riods.tags()) _parse_driver_tags(riods=riods, attrs=attrs, coords=coords) for coord in coords: if f"NETCDF_DIM_{coord}" in attrs: coord_name = coord attrs.pop(f"NETCDF_DIM_{coord}") break else: coord_name = "band" coords[coord_name] = np.asarray(riods.indexes) # Get geospatial coordinates if parse_coordinates: coords.update( _generate_spatial_coords(_rio_transform(riods), riods.width, riods.height)) unsigned = False encoding = {} if mask_and_scale and "_Unsigned" in attrs: unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true" if masked: encoding["dtype"] = str(riods.dtypes[0]) da_name = attrs.pop("NETCDF_VARNAME", default_name) data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper( manager, lock, name=da_name, vrt_params=vrt_params, masked=masked, mask_and_scale=mask_and_scale, unsigned=unsigned, )) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) result = DataArray(data=data, dims=(coord_name, "y", "x"), coords=coords, attrs=attrs, name=da_name) result.encoding = encoding # update attributes from NetCDF attributess _load_netcdf_attrs(riods.tags(), result) result = _decode_datetime_cf(result, decode_times=decode_times, decode_timedelta=decode_timedelta) # make sure the _FillValue is correct dtype if "_FillValue" in attrs: attrs["_FillValue"] = result.dtype.type(attrs["_FillValue"]) # handle encoding _handle_encoding(result, mask_and_scale, masked, da_name) # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) result.rio.write_transform(_rio_transform(riods), inplace=True) if hasattr(riods, "crs") and riods.crs: result.rio.write_crs(riods.crs, inplace=True) if chunks is not None: result = _prepare_dask(result, riods, filename, chunks) # Make the file closeable result.set_close(manager.close) result.rio._manager = manager # add file path to encoding result.encoding["source"] = riods.name return result
def test_implicit_indexing_adapter_copy_on_write(): array = np.arange(10, dtype=np.int64) implicit = indexing.ImplicitToExplicitIndexingAdapter( indexing.CopyOnWriteArray(array)) assert isinstance(implicit[:], indexing.ImplicitToExplicitIndexingAdapter)
def open_rasterio( filename, parse_coordinates=None, chunks=None, cache=None, lock=None, masked=False, variable=None, group=None, default_name=None, **open_kwargs, ): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates: bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks: int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. Chunks can also be set to ``True`` or ``"auto"`` to choose sensible chunk sizes according to ``dask.config.get("array.chunk-size"). cache: bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock: False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. masked: bool, optional If True, read the mask and to set values to NaN. Defaults to False. variable: str or list or tuple, optional Variable name or names to use to filter loading. group: str or list or tuple, optional Group name or names to use to filter loading. default_name: str, optional The name of the data array if none exists. Default is None. **open_kwargs: kwargs, optional Optional keyword arguments to pass into rasterio.open(). Returns ------- data : DataArray The newly created DataArray. """ parse_coordinates = True if parse_coordinates is None else parse_coordinates vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, dst_nodata=vrt.dst_nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, height=vrt.height, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK # ensure default for sharing is False # ref https://github.com/mapbox/rasterio/issues/1504 open_kwargs["sharing"] = open_kwargs.get("sharing", False) manager = CachingFileManager( rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs ) riods = manager.acquire() # open the subdatasets if they exist if riods.subdatasets: return _load_subdatasets( riods=riods, group=group, variable=variable, parse_coordinates=parse_coordinates, chunks=chunks, cache=cache, lock=lock, masked=masked, ) if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None # Get bands if riods.count < 1: raise ValueError("Unknown dims") coords = OrderedDict() coords["band"] = np.asarray(riods.indexes) # parse tags attrs, encoding = _get_rasterio_attrs(riods=riods, masked=masked) _parse_driver_tags(riods=riods, attrs=attrs, coords=coords) # Get geospatial coordinates transform = _rio_transform(riods) if parse_coordinates and transform.is_rectilinear: # 1d coordinates coords.update(affine_to_coords(riods.transform, riods.width, riods.height)) elif parse_coordinates: # 2d coordinates warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params, masked=masked) ) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) # create the output data array da_name = attrs.pop("NETCDF_VARNAME", default_name) result = DataArray( data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name ) result.encoding = encoding if hasattr(riods, "crs") and riods.crs: result.rio.write_crs(riods.crs, inplace=True) if chunks is not None: result = _prepare_dask(result, riods, filename, chunks) # Make the file closeable result._file_obj = manager return result
def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) assert np.array(x[B[0]][B[()]]) == "foo"
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, lock=None, masked=False): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates : bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks : int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. Chunks can also be set to ``True`` or ``"auto"`` to choose sensible chunk sizes according to ``dask.config.get("array.chunk-size"). cache : bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock : False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. masked : bool, optional If True, read the mask and to set values to NaN. Defaults to False. Returns ------- data : DataArray The newly created DataArray. """ parse_coordinates = True if parse_coordinates is None else parse_coordinates import rasterio from rasterio.vrt import WarpedVRT vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( crs=vrt.crs.to_string(), resampling=vrt.resampling, src_nodata=vrt.src_nodata, dst_nodata=vrt.dst_nodata, tolerance=vrt.tolerance, transform=vrt.transform, width=vrt.width, height=vrt.height, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r") riods = manager.acquire() # open the subdatasets if they exist if riods.subdatasets: data_arrays = {} for iii, subdataset in enumerate(riods.subdatasets): rioda = open_rasterio( subdataset, parse_coordinates=iii == 0 and parse_coordinates, chunks=chunks, cache=cache, lock=lock, masked=masked, ) data_arrays[rioda.name] = rioda return Dataset(data_arrays) if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None coords = OrderedDict() # Get bands if riods.count < 1: raise ValueError("Unknown dims") coords["band"] = np.asarray(riods.indexes) # Get coordinates if LooseVersion(rasterio.__version__) < LooseVersion("1.0"): transform = riods.affine else: transform = riods.transform if transform.is_rectilinear and parse_coordinates: # 1d coordinates coords.update( affine_to_coords(riods.transform, riods.width, riods.height)) elif parse_coordinates: # 2d coordinates warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) # Attributes attrs = _parse_tags(riods.tags(1)) encoding = dict() # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) attrs["transform"] = tuple(transform)[:6] if hasattr(riods, "nodata") and riods.nodata is not None: # The nodata values for the raster bands if masked: encoding["_FillValue"] = riods.nodata else: attrs["_FillValue"] = riods.nodata if hasattr(riods, "scales"): # The scale values for the raster bands attrs["scales"] = riods.scales if hasattr(riods, "offsets"): # The offset values for the raster bands attrs["offsets"] = riods.offsets if hasattr(riods, "descriptions") and any(riods.descriptions): # Descriptions for each dataset band attrs["descriptions"] = riods.descriptions if hasattr(riods, "units") and any(riods.units): # A list of units string for each dataset band attrs["units"] = riods.units # Parse extra metadata from tags, if supported parsers = {"ENVI": _parse_envi} driver = riods.driver if driver in parsers: meta = parsers[driver](riods.tags(ns=driver)) for k, v in meta.items(): # Add values as coordinates if they match the band count, # as attributes otherwise if isinstance(v, (list, np.ndarray)) and len(v) == riods.count: coords[k] = ("band", np.asarray(v)) else: attrs[k] = v data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) da_name = attrs.pop("NETCDF_VARNAME", None) result = DataArray(data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name) result.encoding = encoding if hasattr(riods, "crs") and riods.crs: result.rio.write_crs(riods.crs, inplace=True) if chunks is not None: from dask.base import tokenize # augment the token with the file modification time try: mtime = os.path.getmtime(filename) except OSError: # the filename is probably an s3 bucket rather than a regular file mtime = None if chunks in (True, "auto"): from dask.array.core import normalize_chunks import dask if LooseVersion(dask.__version__) < LooseVersion("0.18.0"): msg = ( "Automatic chunking requires dask.__version__ >= 0.18.0 . " "You currently have version %s" % dask.__version__) raise NotImplementedError(msg) block_shape = (1, ) + riods.block_shapes[0] chunks = normalize_chunks( chunks=(1, "auto", "auto"), shape=(riods.count, riods.height, riods.width), dtype=riods.dtypes[0], previous_chunks=tuple((c, ) for c in block_shape), ) token = tokenize(filename, mtime, chunks) name_prefix = "open_rasterio-%s" % token result = result.chunk(chunks, name_prefix=name_prefix, token=token) # Make the file closeable result._file_obj = manager return result
def open_rasterio( filename, parse_coordinates=None, chunks=None, cache=None, lock=None, masked=False, mask_and_scale=False, variable=None, group=None, default_name=None, **open_kwargs, ): """Open a file with rasterio (experimental). This should work with any file that rasterio can open (most often: geoTIFF). The x and y coordinates are generated automatically from the file's geoinformation, shifted to the center of each pixel (see `"PixelIsArea" Raster Space <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_ for more information). You can generate 2D coordinates from the file's attributes with:: from affine import Affine da = xr.open_rasterio('path_to_file.tif') transform = Affine.from_gdal(*da.attrs['transform']) nx, ny = da.sizes['x'], da.sizes['y'] x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform Parameters ---------- filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT Path to the file to open. Or already open rasterio dataset. parse_coordinates: bool, optional Whether to parse the x and y coordinates out of the file's ``transform`` attribute or not. The default is to automatically parse the coordinates only if they are rectilinear (1D). It can be useful to set ``parse_coordinates=False`` if your files are very large or if you don't need the coordinates. chunks: int, tuple or dict, optional Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new DataArray into a dask array. Chunks can also be set to ``True`` or ``"auto"`` to choose sensible chunk sizes according to ``dask.config.get("array.chunk-size")``. cache: bool, optional If True, cache data loaded from the underlying datastore in memory as NumPy arrays when accessed to avoid reading from the underlying data- store multiple times. Defaults to True unless you specify the `chunks` argument to use dask, in which case it defaults to False. lock: False, True or threading.Lock, optional If chunks is provided, this argument is passed on to :py:func:`dask.array.from_array`. By default, a global lock is used to avoid issues with concurrent access to the same file when using dask's multithreaded backend. masked: bool, optional If True, read the mask and set values to NaN. Defaults to False. mask_and_scale: bool, optional Lazily scale (using the `scales` and `offsets` from rasterio) and mask. If the _Unsigned attribute is present treat integer arrays as unsigned. variable: str or list or tuple, optional Variable name or names to use to filter loading. group: str or list or tuple, optional Group name or names to use to filter loading. default_name: str, optional The name of the data array if none exists. Default is None. **open_kwargs: kwargs, optional Optional keyword arguments to pass into rasterio.open(). Returns ------- :obj:`xarray.Dataset` | :obj:`xarray.DataArray` | List[:obj:`xarray.Dataset`]: The newly created dataset(s). """ parse_coordinates = True if parse_coordinates is None else parse_coordinates masked = masked or mask_and_scale vrt_params = None if isinstance(filename, rasterio.io.DatasetReader): filename = filename.name elif isinstance(filename, rasterio.vrt.WarpedVRT): vrt = filename filename = vrt.src_dataset.name vrt_params = dict( src_crs=vrt.src_crs.to_string(), crs=vrt.crs.to_string(), resampling=vrt.resampling, tolerance=vrt.tolerance, src_nodata=vrt.src_nodata, nodata=vrt.nodata, width=vrt.width, height=vrt.height, src_transform=vrt.src_transform, transform=vrt.transform, dtype=vrt.working_dtype, warp_extras=vrt.warp_extras, ) if lock is None: lock = RASTERIO_LOCK # ensure default for sharing is False # ref https://github.com/mapbox/rasterio/issues/1504 open_kwargs["sharing"] = open_kwargs.get("sharing", False) with warnings.catch_warnings(record=True) as rio_warnings: manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs) riods = manager.acquire() captured_warnings = rio_warnings.copy() # raise the NotGeoreferencedWarning if applicable for rio_warning in captured_warnings: if not riods.subdatasets or not isinstance(rio_warning.message, NotGeoreferencedWarning): warnings.warn(str(rio_warning.message), type(rio_warning.message)) # open the subdatasets if they exist if riods.subdatasets: return _load_subdatasets( riods=riods, group=group, variable=variable, parse_coordinates=parse_coordinates, chunks=chunks, cache=cache, lock=lock, masked=masked, mask_and_scale=mask_and_scale, ) if vrt_params is not None: riods = WarpedVRT(riods, **vrt_params) if cache is None: cache = chunks is None # Get bands if riods.count < 1: raise ValueError("Unknown dims") # parse tags & load alternate coords attrs = _get_rasterio_attrs(riods=riods) coords = _load_netcdf_1d_coords(riods.tags()) _parse_driver_tags(riods=riods, attrs=attrs, coords=coords) for coord in coords: if f"NETCDF_DIM_{coord}" in attrs: coord_name = coord attrs.pop(f"NETCDF_DIM_{coord}") break else: coord_name = "band" coords[coord_name] = np.asarray(riods.indexes) # Get geospatial coordinates transform = _rio_transform(riods) if parse_coordinates and transform.is_rectilinear: # 1d coordinates coords.update( affine_to_coords(riods.transform, riods.width, riods.height)) elif parse_coordinates: # 2d coordinates warnings.warn( "The file coordinates' transformation isn't " "rectilinear: xarray won't parse the coordinates " "in this case. Set `parse_coordinates=False` to " "suppress this warning.", RuntimeWarning, stacklevel=3, ) unsigned = False encoding = {} if mask_and_scale and "_Unsigned" in attrs: unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true" da_name = attrs.pop("NETCDF_VARNAME", default_name) data = indexing.LazilyOuterIndexedArray( RasterioArrayWrapper( manager, lock, name=da_name, vrt_params=vrt_params, masked=masked, mask_and_scale=mask_and_scale, unsigned=unsigned, )) # this lets you write arrays loaded with rasterio data = indexing.CopyOnWriteArray(data) if cache and chunks is None: data = indexing.MemoryCachedArray(data) result = DataArray(data=data, dims=(coord_name, "y", "x"), coords=coords, attrs=attrs, name=da_name) result.encoding = encoding # update attributes from NetCDF attributess _load_netcdf_attrs(riods.tags(), result) result = _decode_datetime_cf(result) # make sure the _FillValue is correct dtype if "_FillValue" in attrs: attrs["_FillValue"] = result.dtype.type(attrs["_FillValue"]) # handle encoding if mask_and_scale: if "scale_factor" in result.attrs: variables.pop_to(result.attrs, result.encoding, "scale_factor", name=da_name) if "add_offset" in result.attrs: variables.pop_to(result.attrs, result.encoding, "add_offset", name=da_name) if masked: if "_FillValue" in result.attrs: variables.pop_to(result.attrs, result.encoding, "_FillValue", name=da_name) if "missing_value" in result.attrs: variables.pop_to(result.attrs, result.encoding, "missing_value", name=da_name) # Affine transformation matrix (always available) # This describes coefficients mapping pixel coordinates to CRS # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) result.rio.write_transform(riods.transform, inplace=True) if hasattr(riods, "crs") and riods.crs: result.rio.write_crs(riods.crs, inplace=True) if chunks is not None: result = _prepare_dask(result, riods, filename, chunks) # Make the file closeable result._file_obj = manager return result