Пример #1
0
def test_surface_wrong_kind_of_input():
    """
    Run surface using grid input that is not file/matrix/vectors
    """
    ship_data = load_sample_bathymetry()
    data = ship_data.bathymetry.to_xarray(
    )  # convert pandas.Series to xarray.DataArray
    assert data_kind(data) == "grid"
    with pytest.raises(GMTInvalidInput):
        surface(data=data, spacing="5m", region=[245, 255, 20, 30])
Пример #2
0
def test_surface_input_xy_no_z():
    """
    Run surface by passing in x and y, but no z
    """
    ship_data = load_sample_bathymetry()
    with pytest.raises(GMTInvalidInput):
        surface(
            x=ship_data.longitude,
            y=ship_data.latitude,
            spacing="5m",
            region=[245, 255, 20, 30],
        )
Пример #3
0
def test_surface_input_data_array():
    """
    Run surface by passing in a numpy array into data
    """
    ship_data = load_sample_bathymetry()
    data = ship_data.values  # convert pandas.DataFrame to numpy.ndarray
    output = surface(data=data, spacing="5m", region=[245, 255, 20, 30])
    assert isinstance(output, xr.DataArray)
    return output
Пример #4
0
def test_surface_input_file():
    """
    Run surface by passing in a filename
    """
    fname = which("@tut_ship.xyz", download="c")
    output = surface(data=fname, spacing="5m", region=[245, 255, 20, 30])
    assert isinstance(output, xr.DataArray)
    assert output.gmt.registration == 0  # Gridline registration
    assert output.gmt.gtype == 0  # Cartesian type
    return output
Пример #5
0
def test_surface_input_xyz():
    """
    Run surface by passing in x, y, z numpy.ndarrays individually
    """
    ship_data = load_sample_bathymetry()
    output = surface(
        x=ship_data.longitude,
        y=ship_data.latitude,
        z=ship_data.bathymetry,
        spacing="5m",
        region=[245, 255, 20, 30],
    )
    assert isinstance(output, xr.DataArray)
    return output
Пример #6
0
def test_surface_short_aliases():
    """
    Run surface using short aliases -I for spacing, -R for region, -G for
    outfile
    """
    ship_data = load_sample_bathymetry()
    data = ship_data.values  # convert pandas.DataFrame to numpy.ndarray
    try:
        output = surface(data=data, I="5m", R=[245, 255, 20, 30], G=TEMP_GRID)
        assert output is None  # check that output is None since outfile is set
        assert os.path.exists(
            path=TEMP_GRID)  # check that outfile exists at path
        with xr.open_dataarray(TEMP_GRID) as grid:
            assert isinstance(grid,
                              xr.DataArray)  # ensure netcdf grid loads ok
    finally:
        os.remove(path=TEMP_GRID)
    return output
Пример #7
0
def test_surface_with_outfile_param():
    """
    Run surface with the -Goutputfile.nc parameter
    """
    ship_data = load_sample_bathymetry()
    data = ship_data.values  # convert pandas.DataFrame to numpy.ndarray
    try:
        output = surface(data=data,
                         spacing="5m",
                         region=[245, 255, 20, 30],
                         outfile=TEMP_GRID)
        assert output is None  # check that output is None since outfile is set
        assert os.path.exists(
            path=TEMP_GRID)  # check that outfile exists at path
        with xr.open_dataarray(TEMP_GRID) as grid:
            assert isinstance(grid,
                              xr.DataArray)  # ensure netcdf grid loads ok
    finally:
        os.remove(path=TEMP_GRID)
    return output
Пример #8
0
def xyz_to_grid(
    xyz_data: pd.DataFrame,
    region: str,
    spacing: int = 250,
    tension: float = 0.35,
    outfile: str = None,
    mask_cell_radius: int = 3,
):
    """
    Performs interpolation of x, y, z point data to a raster grid.

    >>> xyz_data = pd.DataFrame(
    ...     600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3),
    ...     columns=["x", "y", "z"],
    ... )
    >>> region = get_region(xyz_data=xyz_data)
    >>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250)
    >>> grid.to_array().shape
    (1, 3, 3)
    >>> grid.to_array().values
    array([[[208.90086, 324.8038 , 515.93726],
            [180.06642, 234.68915, 452.8586 ],
            [170.60728, 298.23764, 537.49774]]], dtype=float32)
    """
    ## Preprocessing with blockmedian
    with gmt.helpers.GMTTempFile(suffix=".txt") as tmpfile:
        with gmt.clib.Session() as lib:
            file_context = lib.virtualfile_from_matrix(matrix=xyz_data.values)
            with file_context as infile:
                kwargs = {"V": "", "R": region, "I": f"{spacing}+e"}
                arg_str = " ".join([
                    infile,
                    gmt.helpers.build_arg_string(kwargs), "->" + tmpfile.name
                ])
                lib.call_module(module="blockmedian", args=arg_str)
            x, y, z = np.loadtxt(fname=tmpfile.name, unpack=True)

    ## XYZ point data to NetCDF grid via GMT surface
    grid = gmt.surface(
        x=x,
        y=y,
        z=z,
        region=region,
        spacing=f"{spacing}+e",
        T=tension,
        V="n",  # normal verbosity: produce only fatal error messages
        M=f"{mask_cell_radius}c",
    )

    ## Save grid to NetCDF with projection information
    if outfile is not None:
        # TODO add CRS!! See https://github.com/pydata/xarray/issues/2288
        grid.to_netcdf(path=outfile)

    ## Resample grid from gridline to pixel registration
    with gmt.helpers.GMTTempFile(suffix=".nc") as tmpfile:
        with gmt.clib.Session() as lib:
            if outfile is not None:  # kind == "file"
                file_context = gmt.helpers.dummy_context(outfile)
            else:  # kind == "grid"
                file_context = lib.virtualfile_from_grid(grid.z)
                outfile = tmpfile.name
            with file_context as infile:
                kwargs = {"T": "", "G": f"{outfile}"}
                arg_str = " ".join(
                    [infile, gmt.helpers.build_arg_string(kwargs)])
                lib.call_module(module="grdsample", args=arg_str)
            with xr.open_dataset(outfile) as dataset:
                grid = dataset.load()

    return grid
Пример #9
0
def spatiotemporal_cube(
    table: pd.DataFrame,
    placename: str = "",
    x_var: str = "x",
    y_var: str = "y",
    z_var: str = "h_corr",
    spacing: int = 250,
    clip_limits: bool = True,
    cycles: list = None,
    projection:
    str = "+proj=stere +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs",
    folder: str = "",
) -> xr.Dataset:
    """
    Interpolates a time-series point cloud into an xarray.Dataset data cube.
    Uses `pygmt`'s blockmedian and surface algorithms to produce individual
    NetCDF grids, and `xarray` to stack each NetCDF grid into one dataset.

    Steps are as follows:

    1. Create several xarray.DataArray grid surfaces from a table of points,
       one for each time cycle.
    2. Stacked the grids along a time cycle axis into a xarray.Dataset which is
       a spatiotemporal data cube with 'x', 'y' and 'cycle_number' dimensions.

                             _1__2__3_
            *   *           /  /  /  /|
         *   *             /  /  /  / |
       *   *    *         /__/__/__/  |  y
    *    *   *      -->   |  |  |  |  |
      *    *   *          |  |  |  | /
        *    *            |__|__|__|/  x
                             cycle

    Parameters
    ----------
    table : pandas.DataFrame
        A table containing the ICESat-2 track data from multiple cycles. It
        should ideally have geographical columns called 'x', 'y', and attribute
        columns like 'h_corr_1', 'h_corr_2', etc for each cycle time.
    placename : str
        Optional. A descriptive placename for the data (e.g. some_ice_stream),
        to be used in the temporary NetCDF filename.
    x_var : str
        The x coordinate column name to use from the table data. Default is
        'x'.
    y_var : str
        The y coordinate column name to use from the table data. Default is
        'y'.
    z_var : str
        The z column name to use from the table data. This will be the
        attribute that the surface algorithm will run on. Default is 'h_corr'.
    spacing : float or str
        The spatial resolution of the resulting grid, provided as a number or
        as 'dx/dy' increments. This is passed on to `pygmt.blockmedian` and
        `pygmt.surface`. Default is 250 (metres).
    clip_limits : bool
        Whether or not to clip the output grid surface to ± 3 times the median
        absolute deviation of the data table's z-values. Useful for handling
        outlier values in the data table. Default is True (will clip).
    cycles : list
        The cycle numbers to run the gridding algorithm on, e.g. [3, 4] will
        use columns 'h_corr_3' and 'h_corr_4'. Default is None which will
        automatically determine the cycles for a given z_var.
    projection : str
        The proj4 string to store in the NetCDF output, will be passed directly
        to `pygmt.surface`'s J (projection) argument. Default is '+proj=stere
        +lat_0=-90 +lat_ts=-71 +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84
        +units=m +no_defs', i.e. Antarctic Polar Stereographic EPSG:3031.
    folder : str
        The folder to keep the intermediate NetCDF file in. Default is to place
        the files in the current working directory.

    Returns
    -------
    cube : xarray.Dataset
        A 3-dimensional data cube made of digital surfaces stacked along a time
        cycle axis.

    """
    import pygmt
    import tqdm

    # Determine grid's bounding box region (xmin, xmax, ymin, ymax)
    grid_region: np.ndarray = pygmt.info(table=table[[x_var, y_var]],
                                         spacing=f"s{spacing}")

    # Automatically determine list of cycles if None is given
    if cycles is None:
        cycles: list = [
            int(col[len(z_var) + 1:]) for col in table.columns
            if col.startswith(z_var)
        ]

    # Limit surface output to within 3 median absolute deviations of median value
    if clip_limits:
        z_values = table[[f"{z_var}_{cycle}" for cycle in cycles]]
        median: float = np.nanmedian(z_values)
        meddev: float = scipy.stats.median_abs_deviation(x=z_values,
                                                         axis=None,
                                                         nan_policy="omit")
        limits: list = [f"l{median - 3 * meddev}", f"u{median + 3 * meddev}"]
    else:
        limits = None

    # Create one grid surface for each time cycle
    _placename = f"_{placename}" if placename else ""
    for cycle in tqdm.tqdm(iterable=cycles):
        df_trimmed = pygmt.blockmedian(
            table=table[[x_var, y_var, f"{z_var}_{cycle}"]].dropna(),
            region=grid_region,
            spacing=f"{spacing}+e",
        )
        outfile = f"{z_var}{_placename}_cycle_{cycle}.nc"
        pygmt.surface(
            data=df_trimmed.values,
            region=grid_region,
            spacing=spacing,
            J=f'"{projection}"',  # projection
            L=limits,  # lower and upper limits
            M="3c",  # mask values 3 pixel cells outside/away from valid data
            T=0.35,  # tension factor
            V="e",  # error messages only
            outfile=outfile,
        )
        # print(pygmt.grdinfo(outfile))

    # Move files into new folder if requested
    paths: list = [f"{z_var}{_placename}_cycle_{cycle}.nc" for cycle in cycles]
    if folder:
        paths: list = [
            shutil.move(src=path, dst=os.path.join(folder, path))
            for path in paths
        ]

    # Stack several NetCDF grids into one NetCDF along the time cycle axis
    dataset: xr.Dataset = xr.open_mfdataset(
        paths=paths,
        combine="nested",
        concat_dim=[pd.Index(data=cycles, name="cycle_number")],
        attrs_file=paths[-1],
    )

    return dataset