def _block_average_coordinates(self, coordinates): """ Run a block-averaging process on observation points Apply a median as the reduction function. The blocks will have the size specified through the ``block_size`` argument on the constructor. Parameters ---------- coordinates : tuple of arrays Arrays with the coordinates of each data point. Should be in the following order: (``easting``, ``northing``, ``upward``, ...). Returns ------- blocked_coords : tuple of arrays Tuple containing the coordinates of the block-averaged observation points. """ reducer = vd.BlockReduce( spacing=self.block_size, reduction=np.median, drop_coords=False ) # Must pass a dummy data array to BlockReduce.filter(), we choose an # array full of zeros. We will ignore the returned reduced dummy array. blocked_coords, _ = reducer.filter(coordinates, np.zeros_like(coordinates[0])) return blocked_coords
def generic_gridder(day, df, idx): """ Generic gridding algorithm for easy variables """ data = df[idx].values coordinates = (df["lon"].values, df["lat"].values) region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]] projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean()) spacing = 0.5 chain = vd.Chain([ ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)), ("spline", vd.Spline(damping=1e-10, mindist=100e3)), ]) train, test = vd.train_test_split(projection(*coordinates), data, random_state=0) chain.fit(*train) score = chain.score(*test) shape = (len(YAXIS), len(XAXIS)) grid = chain.grid( region=region, shape=shape, projection=projection, dims=["latitude", "longitude"], data_names=["precip"], ) res = grid.to_array() res = np.ma.where(res < 0, 0, res) print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") % (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score)) return masked_array(res, mpunits("inch"))
def chain_config( spacing=2500, degree=7 ): #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast begin = process_time() print("chain_config begin") chain = vd.Chain([ ('trend', vd.Trend(degree=degree)), ('reduce', vd.BlockReduce(np.median, spacing=spacing)), ('spline', vd.Spline()), ]) timelapse(begin, "chain_config") return chain
######################################################################################## # Class :class:`verde.BlockReduce` can be used to apply a reduction/aggregation # operation (mean, median, standard deviation, etc) to the data in regular blocks. All # data inside each block will be replaced by their aggregated value. # :class:`~verde.BlockReduce` takes an aggregation function as input. It can be any # function that receives a numpy array as input and returns a single scalar value. The # :func:`numpy.mean` or :func:`numpy.median` functions are usually what we want. import numpy as np ######################################################################################## # Blocked means and medians are good ways to decimate data for interpolation. Let's use # a blocked median on our data to decimate it to our desired grid interval of 5 # arc-minutes. The reason for using a median over a mean is because bathymetry data can # vary abruptly and a mean would smooth the data too much. For data varies more # smoothly (like gravity and magnetic data), a mean would be a better option. reducer = vd.BlockReduce(reduction=np.median, spacing=5 / 60) print(reducer) ######################################################################################## # Use the :meth:`~verde.BlockReduce.filter` method to apply the reduction: coordinates, bathymetry = reducer.filter(coordinates=(data.longitude, data.latitude), data=data.bathymetry_m) plt.figure(figsize=(7, 7)) ax = plt.axes(projection=ccrs.Mercator()) ax.set_title("Locations of decimated data") # Plot the bathymetry data locations as black dots plt.plot(*coordinates, ".k", markersize=1, transform=crs) vd.datasets.setup_baja_bathymetry_map(ax) plt.tight_layout()
# set and use the testing set to evaluate how well the gridder is performing. train, test = vd.train_test_split( projection(*coordinates), (data.wind_speed_east_knots, data.wind_speed_north_knots), random_state=2, ) # We'll make a 20 arc-minute grid spacing = 20 / 60 # Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually # requires de-trended data), and finally a Spline for each component. Notice that # BlockReduce can work on multicomponent data without the use of Vector. chain = vd.Chain( [ ("mean", vd.BlockReduce(np.mean, spacing * 111e3)), ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])), ( "spline", vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]), ), ] ) print(chain) # Fit on the training data chain.fit(*train) # And score on the testing data. The best possible score is 1, meaning a perfect # prediction of the test data. score = chain.score(*test) print("Cross-validation R^2 score: {:.2f}".format(score))
interpolation. """ import matplotlib.pyplot as plt import cartopy.crs as ccrs import pyproj import numpy as np import verde as vd # We'll test this on the Baja California shipborne bathymetry data data = vd.datasets.fetch_baja_bathymetry() # Before gridding, we need to decimate the data to avoid aliasing because of the # oversampling along the ship tracks. We'll use a blocked median with 5 arc-minute # blocks. spacing = 5 / 60 reducer = vd.BlockReduce(reduction=np.median, spacing=spacing) coordinates, bathymetry = reducer.filter((data.longitude, data.latitude), data.bathymetry_m) # Project the data using pyproj so that we can use it as input for the gridder. # We'll set the latitude of true scale to the mean latitude of the data. projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) proj_coordinates = projection(*coordinates) # Now we can set up a gridder for the decimated data grd = vd.ScipyGridder(method="cubic").fit(proj_coordinates, bathymetry) print("Gridder used:", grd) # Get the grid region in geographic coordinates region = vd.get_region((data.longitude, data.latitude)) print("Data region:", region)
def interp(df, mask, var='biomass', spacing=4000): """ Grid a set of lat/lon points to a grid defined by mask Parameters ---------- df : pd.DataFrame Data points to be gridded in the form of a Pandas DataFrame with columns ``lat``, ``lon``, and ``var``. mask : xr.DataArray Target grid defintion. Must include a pyproj parsable crs attribute (e.g. ``mask.attrs['crs']``). Data should be between 0 and 1. var : str Name of column in df to grid. spacing : float Grid spacing in units defined by the masks crs. Returns ------- grid : xr.DataArray Gridded data from df. """ import verde as vd # extract the projection and grid info region = [mask.x.data[0], mask.x.data[-1], mask.y.data[-1], mask.y.data[0]] projection = pyproj.Proj(mask.attrs['crs']) coordinates = (df.lon.values, df.lat.values) proj_coords = projection(*coordinates) # split for validation... this may belong outside of this function train, test = vd.train_test_split( projection(*coordinates), df[var], random_state=RANDOM_SEED, ) # fit the gridder chain = vd.Chain( [ ('mean', vd.BlockReduce(np.mean, spacing=spacing * 0.25, region=region)), ('nearest', vd.ScipyGridder(method='linear')), ] ) chain.fit(*train) # y_pred = chain.predict(test[0]) # fit_score = score(test[1][0], y_pred) # make the grid grid = chain.grid(spacing=spacing, region=region, data_names=[var], dims=('y', 'x')) grid = vd.distance_mask( proj_coords, maxdist=4 * spacing, grid=grid, ) grid = np.flipud(grid[var]) * mask grid.name = var return grid
data.longitude, data.latitude, c=data.bathymetry_m, s=0.1, transform=ccrs.PlateCarree(), ) plt.colorbar().set_label("meters") vd.datasets.setup_baja_bathymetry_map(ax) plt.show() ######################################################################################## # We'll create a chain that applies a blocked median to the data, fits a polynomial # trend, and then fits a standard gridder to the trend residuals. chain = vd.Chain([ ("reduce", vd.BlockReduce(np.median, spacing * 111e3)), ("trend", vd.Trend(degree=1)), ("spline", vd.Spline()), ]) print(chain) ######################################################################################## # Calling :meth:`verde.Chain.fit` will automatically run the data through the chain: # # #. Apply the blocked median to the input data # #. Fit a trend to the blocked data and output the residuals # #. Fit the spline to the trend residuals chain.fit(proj_coords, data.bathymetry_m) ########################################################################################
# We'll test this on the California vertical GPS velocity data data = vd.datasets.fetch_california_gps() # We'll add some random extreme outliers to the data outliers = np.random.RandomState(2).randint(0, data.shape[0], size=20) data.velocity_up[outliers] += 0.08 print("Index of outliers:", outliers) # Create an array of weights and set the weights for the outliers to a very low value weights = np.ones_like(data.velocity_up) weights[outliers] = 1e-5 # Now we can block average the points with and without weights to compare the outputs. reducer = vd.BlockReduce(reduction=np.average, spacing=30 / 60, center_coordinates=True) coordinates, no_weights = reducer.filter((data.longitude, data.latitude), data.velocity_up) __, with_weights = reducer.filter((data.longitude, data.latitude), data.velocity_up, weights) # Now we can plot the data sets side by side on Mercator maps fig, axes = plt.subplots(1, 2, figsize=(9, 7), subplot_kw=dict(projection=ccrs.Mercator())) titles = ["No Weights", "Weights"] crs = ccrs.PlateCarree() maxabs = vd.maxabs(data.velocity_up) for ax, title, velocity in zip(axes, titles, (no_weights, with_weights)):
account the curvature of the Earth. """ import boule as bl import matplotlib.pyplot as plt import numpy as np import verde as vd import harmonica as hm # Fetch the sample gravity data from South Africa data = hm.datasets.fetch_south_africa_gravity() # Downsample the data using a blocked mean to speed-up the computations # for this example. This is preferred over simply discarding points to avoid # aliasing effects. blocked_mean = vd.BlockReduce(np.mean, spacing=0.2, drop_coords=False) (longitude, latitude, elevation), gravity_data = blocked_mean.filter( (data.longitude, data.latitude, data.elevation), data.gravity, ) # Compute gravity disturbance by removing the gravity of normal Earth ellipsoid = bl.WGS84 gamma = ellipsoid.normal_gravity(latitude, height=elevation) gravity_disturbance = gravity_data - gamma # Convert data coordinates from geodetic (longitude, latitude, height) to # spherical (longitude, spherical_latitude, radius). coordinates = ellipsoid.geodetic_to_spherical(longitude, latitude, elevation) # Create the equivalent sources
plt.plot(proj_coords[0], proj_coords[1], ".k", markersize=0.5) plt.xlabel("Easting (m)") plt.ylabel("Northing (m)") plt.gca().set_aspect("equal") plt.tight_layout() plt.show() ######################################################################################## # Cartesian grids # --------------- # # Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected # coordinates. We'll specify the desired grid spacing as degrees and convert it to # Cartesian using the 1 degree approx. 111 km rule-of-thumb. spacing = 10 / 60 reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3) filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m) spline = vd.Spline().fit(filter_coords, filter_bathy) ######################################################################################## # If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in # projected Cartesian coordinates. grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry") print("Cartesian grid:") print(grid) ######################################################################################## # We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious # solutions far away from the data points. grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)
Grafita info: Receive data an replies (number of iterations) at differents filtred parameters Parameters ---------- training_csv : pdlist test_csv : pdlist n_iterations : int *different_spacing : boolean Returns ------- description : type what does.. """ #CREATE THC/KC & KC/CTC training_csv['THC/KC'] = training_csv['THC']/training_csv['KC'] test_csv['KC/CTC'] = test_csv['KC']/test_csv['CTC'] training_csv.drop('Unnamed: 0', axis='columns', inplace=True) #getting rid of unnamed columns, do it before loading to this function, and save to csv. list_training=copy.deepcopy()#create a list of same data, to generate (do filter just for needed features) vd.BlockReduce(np.median, spacing=spacing_range) #ATRIBUTE VARIABLE BEHAVIOR AT spacing=500 ###CONSTRUTIVE THOUGHTS FOR THIS FUNCTION: #try to compare each training process before it's complete #if this is possible, compute remaining process and return a range value of best results #probably is best creat a new scope, or build it's own class to manage many functionalities. pass