Exemplo n.º 1
0
def generic_gridder(day, df, idx):
    """
    Generic gridding algorithm for easy variables
    """
    data = df[idx].values
    coordinates = (df["lon"].values, df["lat"].values)
    region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
    projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
    spacing = 0.5
    chain = vd.Chain([
        ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
        ("spline", vd.Spline(damping=1e-10, mindist=100e3)),
    ])
    train, test = vd.train_test_split(projection(*coordinates),
                                      data,
                                      random_state=0)
    chain.fit(*train)
    score = chain.score(*test)
    shape = (len(YAXIS), len(XAXIS))
    grid = chain.grid(
        region=region,
        shape=shape,
        projection=projection,
        dims=["latitude", "longitude"],
        data_names=["precip"],
    )
    res = grid.to_array()
    res = np.ma.where(res < 0, 0, res)
    print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") %
          (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score))
    return masked_array(res, mpunits("inch"))
Exemplo n.º 2
0
def chain_config(
    spacing=2500,
    degree=7
):  #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast
    begin = process_time()
    print("chain_config begin")
    chain = vd.Chain([
        ('trend', vd.Trend(degree=degree)),
        ('reduce', vd.BlockReduce(np.median, spacing=spacing)),
        ('spline', vd.Spline()),
    ])
    timelapse(begin, "chain_config")
    return chain
Exemplo n.º 3
0
    projection(*coordinates),
    (data.wind_speed_east_knots, data.wind_speed_north_knots),
    random_state=2,
)

# We'll make a 20 arc-minute grid
spacing = 20 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
# requires de-trended data), and finally a Spline for each component. Notice that
# BlockReduce can work on multicomponent data without the use of Vector.
chain = vd.Chain(
    [
        ("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
        ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
        (
            "spline",
            vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]),
        ),
    ]
)
print(chain)

# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))

# Interpolate the wind speed onto a regular geographic grid and mask the data that are
# far from the observation points
Exemplo n.º 4
0
# We'll test this on the California vertical GPS velocity data because it comes with the
# uncertainties
data = vd.datasets.fetch_california_gps()
coordinates = (data.longitude.values, data.latitude.values)

# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Now we can chain a block weighted mean and weighted spline together. We'll use
# uncertainty propagation to calculate the new weights from block mean because our data
# vary smoothly but have different uncertainties.
spacing = 5 / 60  # 5 arc-minutes
chain = vd.Chain(
    [
        ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
        ("spline", vd.Spline(damping=1e-10)),
    ]
)
print(chain)

# Split the data into a training and testing set. We'll use the training set to grid the
# data and the testing set to validate our spline model. Weights need to
# 1/uncertainty**2 for the error propagation in BlockMean to work.
train, test = vd.train_test_split(
    projection(*coordinates),
    data.velocity_up,
    weights=1 / data.std_up ** 2,
    random_state=0,
)
# Fit the model on the training set
chain.fit(*train)
Exemplo n.º 5
0
def interp(df, mask, var='biomass', spacing=4000):
    """
    Grid a set of lat/lon points to a grid defined by mask

    Parameters
    ----------
    df : pd.DataFrame
        Data points to be gridded in the form of a Pandas DataFrame with
        columns ``lat``, ``lon``, and ``var``.
    mask : xr.DataArray
        Target grid defintion. Must include a pyproj parsable crs attribute
        (e.g. ``mask.attrs['crs']``). Data should be between 0 and 1.
    var : str
        Name of column in df to grid.
    spacing : float
        Grid spacing in units defined by the masks crs.

    Returns
    -------
    grid : xr.DataArray
        Gridded data from df.
    """
    import verde as vd

    # extract the projection and grid info
    region = [mask.x.data[0], mask.x.data[-1], mask.y.data[-1], mask.y.data[0]]
    projection = pyproj.Proj(mask.attrs['crs'])

    coordinates = (df.lon.values, df.lat.values)

    proj_coords = projection(*coordinates)

    # split for validation... this may belong outside of this function
    train, test = vd.train_test_split(
        projection(*coordinates),
        df[var],
        random_state=RANDOM_SEED,
    )

    # fit the gridder
    chain = vd.Chain(
        [
            ('mean', vd.BlockReduce(np.mean, spacing=spacing * 0.25, region=region)),
            ('nearest', vd.ScipyGridder(method='linear')),
        ]
    )

    chain.fit(*train)
    # y_pred = chain.predict(test[0])
    # fit_score = score(test[1][0], y_pred)

    # make the grid
    grid = chain.grid(spacing=spacing, region=region, data_names=[var], dims=('y', 'x'))
    grid = vd.distance_mask(
        proj_coords,
        maxdist=4 * spacing,
        grid=grid,
    )
    grid = np.flipud(grid[var]) * mask
    grid.name = var

    return grid
Exemplo n.º 6
0
    data.latitude,
    c=data.bathymetry_m,
    s=0.1,
    transform=ccrs.PlateCarree(),
)
plt.colorbar().set_label("meters")
vd.datasets.setup_baja_bathymetry_map(ax)
plt.show()

########################################################################################
# We'll create a chain that applies a blocked median to the data, fits a polynomial
# trend, and then fits a standard gridder to the trend residuals.

chain = vd.Chain([
    ("reduce", vd.BlockReduce(np.median, spacing * 111e3)),
    ("trend", vd.Trend(degree=1)),
    ("spline", vd.Spline()),
])
print(chain)

########################################################################################
# Calling :meth:`verde.Chain.fit` will automatically run the data through the chain:
#
# #. Apply the blocked median to the input data
# #. Fit a trend to the blocked data and output the residuals
# #. Fit the spline to the trend residuals

chain.fit(proj_coords, data.bathymetry_m)

########################################################################################
# Now that the data has been through the chain, calling :meth:`verde.Chain.predict` will
Exemplo n.º 7
0
# Split the data into a training and testing set. We'll fit the gridder on the
# training set and use the testing set to evaluate how well the gridder is
# performing.
train, test = vd.train_test_split(projection(*coordinates),
                                  (data.velocity_east, data.velocity_north),
                                  random_state=0)

# We'll make a 10 arc-minute grid in the end.
spacing = 10 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend to take
# care of the increase toward the coast, and finally the vector gridder using
# Poisson's ratio 0.5 to couple the two horizontal components.
chain = vd.Chain([
    ("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
    ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
    ("spline", ez.Elastic2D(poisson=0.5, mindist=10e3)),
])
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a
# perfect prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))

# Interpolate our horizontal GPS velocities onto a regular geographic grid and
# mask the data that are far from the observation points
grid_full = chain.grid(region,
                       spacing=spacing,
                       projection=projection,
                       dims=["latitude", "longitude"])
Exemplo n.º 8
0
# We'll test this on the air temperature data from Texas
data = vd.datasets.fetch_texas_wind()
coordinates = (data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)

# Use a Mercator projection for our Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# The output grid spacing will 15 arc-minutes
spacing = 15 / 60

# Now we can chain a blocked mean and spline together. The Spline can be regularized
# by setting the damping coefficient (should be positive). It's also a good idea to set
# the minimum distance to the average data spacing to avoid singularities in the spline.
chain = vd.Chain([
    ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
    ("spline", vd.Spline(damping=1e-10, mindist=100e3)),
])
print(chain)

# We can evaluate model performance by splitting the data into a training and testing
# set. We'll use the training set to grid the data and the testing set to validate our
# spline model.
train, test = vd.train_test_split(projection(*coordinates),
                                  data.air_temperature_c,
                                  random_state=0)

# Fit the model on the training set
chain.fit(*train)

# And calculate an R^2 score coefficient on the testing set. The best possible score
# (perfect prediction) is 1. This can tell us how good our spline is at predicting data
Exemplo n.º 9
0
# Split the data into a training and testing set. We'll fit the gridder on the training
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(projection(*coordinates),
                                  (data.velocity_east, data.velocity_north),
                                  random_state=0)

# We'll make a 20 arc-minute grid in the end.
spacing = 20 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend to take care of
# the increase toward the coast, and finally the vector gridder using Poisson's ratio
# 0.5 to couple the two horizontal components.
chain = vd.Chain([
    ("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
    ("trend", vd.VectorTrend(degree=5)),
    ("spline", vd.Vector2D(poisson=0.5)),
])
# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))

# Interpolate our horizontal GPS velocities onto a regular geographic grid and mask the
# data that are far from the observation points
grid = chain.grid(region,
                  spacing=spacing,
                  projection=projection,
                  dims=["latitude", "longitude"])
Exemplo n.º 10
0
    weights=(1 / data.std_east**2, 1 / data.std_north**2),
    random_state=1,
)

########################################################################################
# Now we can make a 2-component spline. Since :class:`verde.Vector` implements
# ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build
# a pipeline.
#
# We need to use a bit of damping so that the weights can be taken into account. Splines
# without damping provide a perfect fit to the data and ignore the weights as a
# consequence.

chain = vd.Chain([
    ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
    ("trend", vd.Vector([vd.Trend(1), vd.Trend(1)])),
    ("spline", vd.Vector([vd.Spline(damping=1e-10),
                          vd.Spline(damping=1e-10)])),
])
print(chain)

########################################################################################
#
# .. warning::
#
#     Never generate the component gridders with ``[vd.Spline()]*2``. This will result
#     in each component being a represented by **the same Spline object**, causing
#     problems when trying to fit it to different components.
#
# Fitting the spline and gridding is exactly the same as what we've done before.

chain.fit(*train)
Exemplo n.º 11
0
    weights=(1 / data.std_east**2, 1 / data.std_north**2),
    random_state=1,
)

########################################################################################
# Now we can make a 2-component spline. Since :class:`verde.Vector` implements
# ``fit``, ``predict``, and ``filter``, we can use it in a :class:`verde.Chain` to build
# a pipeline.
#
# We need to use a bit of damping so that the weights can be taken into account. Splines
# without damping provide a perfect fit to the data and ignore the weights as a
# consequence.

chain = vd.Chain([
    ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
    ("trend", vd.Vector([vd.Trend(1), vd.Trend(1)])),
    ("spline", vd.Vector([vd.Spline(damping=1e-10),
                          vd.Spline(damping=1e-10)])),
])
print(chain)

########################################################################################
#
# .. warning::
#
#     Never generate the component gridders with ``[vd.Spline()]*2``. This will result
#     in each component being a represented by **the same Spline object**, causing
#     problems when trying to fit it to different components.
#
# Fitting the spline and gridding is exactly the same as what we've done before.

chain.fit(*train)
Exemplo n.º 12
0
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)

region = vd.get_region(coordinates)
spacing = 5 / 60

########################################################################################
# Now we can grid our data using a weighted spline. We'll use the block mean results
# with uncertainty based weights.
#
# Note that the weighted spline solution will only work on a non-exact interpolation. So
# we'll need to use some damping regularization or not use the data locations for the
# point forces. Here, we'll apply a bit of damping.
spline = vd.Chain([
    # Convert the spacing to meters because Spline is a Cartesian gridder
    ("mean", vd.BlockMean(spacing=spacing * 111e3, uncertainty=True)),
    ("spline", vd.Spline(damping=1e-10)),
]).fit(proj_coords, data.velocity_up, data.weights)
grid = spline.grid(
    region=region,
    spacing=spacing,
    projection=projection,
    dims=["latitude", "longitude"],
    data_names=["velocity"],
)
# Avoid showing interpolation outside of the convex hull of the data points.
grid = vd.convexhull_mask(coordinates, grid=grid, projection=projection)

########################################################################################
# Calculate an unweighted spline as well for comparison.
spline_unweighted = vd.Chain([
Exemplo n.º 13
0
import pyproj
import verde as vd
import numpy as np
import matplotlib.pyplot as plt

print("Verde version:", vd.version.full_version)

data = vd.datasets.fetch_baja_bathymetry()
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coords = projection(data.longitude.values, data.latitude.values)

spacing = 10 / 60
interp = vd.Chain([
    ("median", vd.BlockReduce(np.median, spacing=spacing * 111e3)),
    ("spline", vd.Spline(mindist=10e3, damping=1e-5)),
])
interp.fit(proj_coords, data.bathymetry_m)

grid = interp.grid(spacing=spacing * 111e3, data_names=["bathymetry"])
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)

fig, ax = plt.subplots(1, 1, figsize=(7, 6))
pc = grid.bathymetry.plot.pcolormesh(ax=ax,
                                     cmap="viridis",
                                     vmax=0,
                                     add_colorbar=False)
plt.colorbar(pc, pad=0, ax=ax, aspect=40).set_label("bathymetry (m)")
ax.set_xlabel("Easting (m)")
ax.set_ylabel("Northing (m)")
ax.set_title("Gridded bathymetry")
ax.set_aspect("equal")
Exemplo n.º 14
0
# Load the Rio de Janeiro total field magnetic anomaly data
data = vd.datasets.fetch_rio_magnetic()
region = vd.get_region((data.longitude, data.latitude))

# Create a projection for the data using pyproj so that we can use it as input for the
# gridder. We'll set the latitude of true scale to the mean latitude of the data.
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Create a chain that fits a 2nd degree trend, decimates the residuals using a blocked
# mean to avoid aliasing, and then fits a standard gridder to the residuals. The spacing
# for the blocked mean will be 0.5 arc-minutes (approximately converted to meters).
spacing = 0.5 / 60
chain = vd.Chain([
    ("trend", vd.Trend(degree=2)),
    ("reduce", vd.BlockReduce(np.mean, spacing * 111e3)),
    ("spline", vd.Spline(damping=1e-8)),
])
print("Chained estimator:", chain)
# Calling 'fit' will automatically run the data through the chain
chain.fit(projection(data.longitude.values, data.latitude.values),
          data.total_field_anomaly_nt)

# Each component of the chain can be accessed separately using the 'named_steps'
# attribute
grid_trend = chain.named_steps["trend"].grid()
print("\nTrend grid:")
print(grid_trend)

grid_residual = chain.named_steps["spline"].grid()
print("\nResidual grid:")