def test_surface_wrong_kind_of_input(): """ Run surface using grid input that is not file/matrix/vectors """ ship_data = load_sample_bathymetry() data = ship_data.bathymetry.to_xarray( ) # convert pandas.Series to xarray.DataArray assert data_kind(data) == "grid" with pytest.raises(GMTInvalidInput): surface(data=data, spacing="5m", region=[245, 255, 20, 30])
def test_surface_input_xy_no_z(): """ Run surface by passing in x and y, but no z """ ship_data = load_sample_bathymetry() with pytest.raises(GMTInvalidInput): surface( x=ship_data.longitude, y=ship_data.latitude, spacing="5m", region=[245, 255, 20, 30], )
def test_surface_input_data_array(): """ Run surface by passing in a numpy array into data """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray output = surface(data=data, spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, xr.DataArray) return output
def test_surface_input_file(): """ Run surface by passing in a filename """ fname = which("@tut_ship.xyz", download="c") output = surface(data=fname, spacing="5m", region=[245, 255, 20, 30]) assert isinstance(output, xr.DataArray) assert output.gmt.registration == 0 # Gridline registration assert output.gmt.gtype == 0 # Cartesian type return output
def test_surface_input_xyz(): """ Run surface by passing in x, y, z numpy.ndarrays individually """ ship_data = load_sample_bathymetry() output = surface( x=ship_data.longitude, y=ship_data.latitude, z=ship_data.bathymetry, spacing="5m", region=[245, 255, 20, 30], ) assert isinstance(output, xr.DataArray) return output
def test_surface_short_aliases(): """ Run surface using short aliases -I for spacing, -R for region, -G for outfile """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray try: output = surface(data=data, I="5m", R=[245, 255, 20, 30], G=TEMP_GRID) assert output is None # check that output is None since outfile is set assert os.path.exists( path=TEMP_GRID) # check that outfile exists at path with xr.open_dataarray(TEMP_GRID) as grid: assert isinstance(grid, xr.DataArray) # ensure netcdf grid loads ok finally: os.remove(path=TEMP_GRID) return output
def test_surface_with_outfile_param(): """ Run surface with the -Goutputfile.nc parameter """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray try: output = surface(data=data, spacing="5m", region=[245, 255, 20, 30], outfile=TEMP_GRID) assert output is None # check that output is None since outfile is set assert os.path.exists( path=TEMP_GRID) # check that outfile exists at path with xr.open_dataarray(TEMP_GRID) as grid: assert isinstance(grid, xr.DataArray) # ensure netcdf grid loads ok finally: os.remove(path=TEMP_GRID) return output
def xyz_to_grid( xyz_data: pd.DataFrame, region: str, spacing: int = 250, tension: float = 0.35, outfile: str = None, mask_cell_radius: int = 3, ): """ Performs interpolation of x, y, z point data to a raster grid. >>> xyz_data = pd.DataFrame( ... 600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3), ... columns=["x", "y", "z"], ... ) >>> region = get_region(xyz_data=xyz_data) >>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250) >>> grid.to_array().shape (1, 3, 3) >>> grid.to_array().values array([[[208.90086, 324.8038 , 515.93726], [180.06642, 234.68915, 452.8586 ], [170.60728, 298.23764, 537.49774]]], dtype=float32) """ ## Preprocessing with blockmedian with gmt.helpers.GMTTempFile(suffix=".txt") as tmpfile: with gmt.clib.Session() as lib: file_context = lib.virtualfile_from_matrix(matrix=xyz_data.values) with file_context as infile: kwargs = {"V": "", "R": region, "I": f"{spacing}+e"} arg_str = " ".join([ infile, gmt.helpers.build_arg_string(kwargs), "->" + tmpfile.name ]) lib.call_module(module="blockmedian", args=arg_str) x, y, z = np.loadtxt(fname=tmpfile.name, unpack=True) ## XYZ point data to NetCDF grid via GMT surface grid = gmt.surface( x=x, y=y, z=z, region=region, spacing=f"{spacing}+e", T=tension, V="n", # normal verbosity: produce only fatal error messages M=f"{mask_cell_radius}c", ) ## Save grid to NetCDF with projection information if outfile is not None: # TODO add CRS!! See https://github.com/pydata/xarray/issues/2288 grid.to_netcdf(path=outfile) ## Resample grid from gridline to pixel registration with gmt.helpers.GMTTempFile(suffix=".nc") as tmpfile: with gmt.clib.Session() as lib: if outfile is not None: # kind == "file" file_context = gmt.helpers.dummy_context(outfile) else: # kind == "grid" file_context = lib.virtualfile_from_grid(grid.z) outfile = tmpfile.name with file_context as infile: kwargs = {"T": "", "G": f"{outfile}"} arg_str = " ".join( [infile, gmt.helpers.build_arg_string(kwargs)]) lib.call_module(module="grdsample", args=arg_str) with xr.open_dataset(outfile) as dataset: grid = dataset.load() return grid
def spatiotemporal_cube( table: pd.DataFrame, placename: str = "", x_var: str = "x", y_var: str = "y", z_var: str = "h_corr", spacing: int = 250, clip_limits: bool = True, cycles: list = None, projection: str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs", folder: str = "", ) -> xr.Dataset: """ Interpolates a time-series point cloud into an xarray.Dataset data cube. Uses `pygmt`'s blockmedian and surface algorithms to produce individual NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset. Steps are as follows: 1. Create several xarray.DataArray grid surfaces from a table of points, one for each time cycle. 2. Stacked the grids along a time cycle axis into a xarray.Dataset which is a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions. _1__2__3_ * * / / / /| * * / / / / | * * * /__/__/__/ | y * * * --> | | | | | * * * | | | | / * * |__|__|__|/ x cycle Parameters ---------- table : pandas.DataFrame A table containing the ICESat-2 track data from multiple cycles. It should ideally have geographical columns called 'x', 'y', and attribute columns like 'h_corr_1', 'h_corr_2', etc for each cycle time. placename : str Optional. A descriptive placename for the data (e.g. some_ice_stream), to be used in the temporary NetCDF filename. x_var : str The x coordinate column name to use from the table data. Default is 'x'. y_var : str The y coordinate column name to use from the table data. Default is 'y'. z_var : str The z column name to use from the table data. This will be the attribute that the surface algorithm will run on. Default is 'h_corr'. spacing : float or str The spatial resolution of the resulting grid, provided as a number or as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and `pygmt.surface`. Default is 250 (metres). clip_limits : bool Whether or not to clip the output grid surface to ± 3 times the median absolute deviation of the data table's z-values. Useful for handling outlier values in the data table. Default is True (will clip). cycles : list The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will use columns 'h_corr_3' and 'h_corr_4'. Default is None which will automatically determine the cycles for a given z_var. projection : str The proj4 string to store in the NetCDF output, will be passed directly to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031. folder : str The folder to keep the intermediate NetCDF file in. Default is to place the files in the current working directory. Returns ------- cube : xarray.Dataset A 3-dimensional data cube made of digital surfaces stacked along a time cycle axis. """ import pygmt import tqdm # Determine grid's bounding box region (xmin, xmax, ymin, ymax) grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]], spacing=f"s{spacing}") # Automatically determine list of cycles if None is given if cycles is None: cycles: list = [ int(col[len(z_var) + 1:]) for col in table.columns if col.startswith(z_var) ] # Limit surface output to within 3 median absolute deviations of median value if clip_limits: z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]] median: float = np.nanmedian(z_values) meddev: float = scipy.stats.median_abs_deviation(x=z_values, axis=None, nan_policy="omit") limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"] else: limits = None # Create one grid surface for each time cycle _placename = f"_{placename}" if placename else "" for cycle in tqdm.tqdm(iterable=cycles): df_trimmed = pygmt.blockmedian( table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(), region=grid_region, spacing=f"{spacing}+e", ) outfile = f"{z_var}{_placename}_cycle_{cycle}.nc" pygmt.surface( data=df_trimmed.values, region=grid_region, spacing=spacing, J=f'"{projection}"', # projection L=limits, # lower and upper limits M="3c", # mask values 3 pixel cells outside/away from valid data T=0.35, # tension factor V="e", # error messages only outfile=outfile, ) # print(pygmt.grdinfo(outfile)) # Move files into new folder if requested paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles] if folder: paths: list = [ shutil.move(src=path, dst=os.path.join(folder, path)) for path in paths ] # Stack several NetCDF grids into one NetCDF along the time cycle axis dataset: xr.Dataset = xr.open_mfdataset( paths=paths, combine="nested", concat_dim=[pd.Index(data=cycles, name="cycle_number")], attrs_file=paths[-1], ) return dataset