train, test = vd.train_test_split( projection(*coordinates), (data.wind_speed_east_knots, data.wind_speed_north_knots), random_state=2, ) # We'll make a 20 arc-minute grid spacing = 20 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually # requires de-trended data), and finally a Spline for each component. Notice that # BlockReduce can work on multicomponent data without the use of Vector. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ( "spline", vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]), ), ] ) print(chain) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score))
# Trends # ------ # # Trends can't handle vector data automatically, so you can't pass # ``data=(data.velocity_east, data.velocity_north)`` to :meth:`verde.Trend.fit`. To get # around that, you can use the :class:`verde.Vector` class to create multi-component # estimators and gridders from single component ones. # # :class:`~verde.Vector` takes an estimator/gridder for each data component and # implements the :ref:`gridder interface <gridder_interface>` for vector data, fitting # each estimator/gridder given to a different component of the data. # # For example, to fit a trend to our GPS velocities, we need to make a 2-component # vector trend: trend = vd.Vector([vd.Trend(4), vd.Trend(1)]) print(trend) ######################################################################################## # We can use the ``trend`` as if it were a regular :class:`verde.Trend` but passing in # 2-component data to fit. This will fit each data component to a different # :class:`verde.Trend`. trend.fit( coordinates=proj_coords, data=(data.velocity_east, data.velocity_north), weights=(1 / data.std_east**2, 1 / data.std_north**2), ) ######################################################################################## # Each estimator can be accessed through the ``components`` attribute:
# Split the data into a training and testing set. We'll fit the gridder on the # training set and use the testing set to evaluate how well the gridder is # performing. train, test = vd.train_test_split(projection(*coordinates), (data.velocity_east, data.velocity_north), random_state=0) # We'll make a 10 arc-minute grid in the end. spacing = 10 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend to take # care of the increase toward the coast, and finally the vector gridder using # Poisson's ratio 0.5 to couple the two horizontal components. chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ("spline", ez.Elastic2D(poisson=0.5, mindist=10e3)), ]) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a # perfect prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score)) # Interpolate our horizontal GPS velocities onto a regular geographic grid and # mask the data that are far from the observation points grid_full = chain.grid(region, spacing=spacing, projection=projection, dims=["latitude", "longitude"])
chaining it with a vector interpolator using :class:`verde.Chain`. """ import matplotlib.pyplot as plt import cartopy.crs as ccrs import numpy as np import verde as vd # Fetch the GPS data from the U.S. West coast. The data has a strong trend toward the # North-West because of the relative movement along the San Andreas Fault System. data = vd.datasets.fetch_california_gps() # We'll fit a degree 2 trend on both the East and North components and weight the data # using the inverse of the variance of each component. # Note: Never use [Trend(...)]*2 as an argument to Vector. This creates references # to the same Trend instance and will mess up the fitting. trend = vd.Vector([vd.Trend(degree=2) for i in range(2)]) weights = vd.variance_to_weights((data.std_east**2, data.std_north**2)) trend.fit( coordinates=(data.longitude, data.latitude), data=(data.velocity_east, data.velocity_north), weights=weights, ) print("Vector trend estimator:", trend) # The separate Trend objects for each component can be accessed through the 'components' # attribute. You could grid them individually if you wanted. print("East component trend:", trend.components[0]) print("East trend coefficients:", trend.components[0].coef_) print("North component trend:", trend.components[1]) print("North trend coefficients:", trend.components[1].coef_)