Exemplo n.º 1
0
    def _block_average_coordinates(self, coordinates):
        """
        Run a block-averaging process on observation points

        Apply a median as the reduction function. The blocks will have the size
        specified through the ``block_size`` argument on the constructor.

        Parameters
        ----------
        coordinates : tuple of arrays
            Arrays with the coordinates of each data point. Should be in the
            following order: (``easting``, ``northing``, ``upward``, ...).

        Returns
        -------
        blocked_coords : tuple of arrays
            Tuple containing the coordinates of the block-averaged observation
            points.
        """
        reducer = vd.BlockReduce(
            spacing=self.block_size, reduction=np.median, drop_coords=False
        )
        # Must pass a dummy data array to BlockReduce.filter(), we choose an
        # array full of zeros. We will ignore the returned reduced dummy array.
        blocked_coords, _ = reducer.filter(coordinates, np.zeros_like(coordinates[0]))
        return blocked_coords
Exemplo n.º 2
0
def generic_gridder(day, df, idx):
    """
    Generic gridding algorithm for easy variables
    """
    data = df[idx].values
    coordinates = (df["lon"].values, df["lat"].values)
    region = [XAXIS[0], XAXIS[-1], YAXIS[0], YAXIS[-1]]
    projection = pyproj.Proj(proj="merc", lat_ts=df["lat"].mean())
    spacing = 0.5
    chain = vd.Chain([
        ("mean", vd.BlockReduce(np.mean, spacing=spacing * 111e3)),
        ("spline", vd.Spline(damping=1e-10, mindist=100e3)),
    ])
    train, test = vd.train_test_split(projection(*coordinates),
                                      data,
                                      random_state=0)
    chain.fit(*train)
    score = chain.score(*test)
    shape = (len(YAXIS), len(XAXIS))
    grid = chain.grid(
        region=region,
        shape=shape,
        projection=projection,
        dims=["latitude", "longitude"],
        data_names=["precip"],
    )
    res = grid.to_array()
    res = np.ma.where(res < 0, 0, res)
    print(("%s %s rows for %s column min:%.3f max:%.3f score: %.3f") %
          (day, len(df.index), idx, np.nanmin(res), np.nanmax(res), score))
    return masked_array(res, mpunits("inch"))
Exemplo n.º 3
0
def chain_config(
    spacing=2500,
    degree=7
):  #degree>20 is useless ##operations with 2 degree polynomium can go downwards or upwards very fast
    begin = process_time()
    print("chain_config begin")
    chain = vd.Chain([
        ('trend', vd.Trend(degree=degree)),
        ('reduce', vd.BlockReduce(np.median, spacing=spacing)),
        ('spline', vd.Spline()),
    ])
    timelapse(begin, "chain_config")
    return chain
Exemplo n.º 4
0
########################################################################################
# Class :class:`verde.BlockReduce` can be used to apply a reduction/aggregation
# operation (mean, median, standard deviation, etc) to the data in regular blocks. All
# data inside each block will be replaced by their aggregated value.
# :class:`~verde.BlockReduce` takes an aggregation function as input. It can be any
# function that receives a numpy array as input and returns a single scalar value. The
# :func:`numpy.mean` or :func:`numpy.median` functions are usually what we want.
import numpy as np

########################################################################################
# Blocked means and medians are good ways to decimate data for interpolation. Let's use
# a blocked median on our data to decimate it to our desired grid interval of 5
# arc-minutes. The reason for using a median over a mean is because bathymetry data can
# vary abruptly and a mean would smooth the data too much. For data varies more
# smoothly (like gravity and magnetic data), a mean would be a better option.
reducer = vd.BlockReduce(reduction=np.median, spacing=5 / 60)
print(reducer)

########################################################################################
# Use the :meth:`~verde.BlockReduce.filter` method to apply the reduction:
coordinates, bathymetry = reducer.filter(coordinates=(data.longitude,
                                                      data.latitude),
                                         data=data.bathymetry_m)

plt.figure(figsize=(7, 7))
ax = plt.axes(projection=ccrs.Mercator())
ax.set_title("Locations of decimated data")
# Plot the bathymetry data locations as black dots
plt.plot(*coordinates, ".k", markersize=1, transform=crs)
vd.datasets.setup_baja_bathymetry_map(ax)
plt.tight_layout()
Exemplo n.º 5
0
# set and use the testing set to evaluate how well the gridder is performing.
train, test = vd.train_test_split(
    projection(*coordinates),
    (data.wind_speed_east_knots, data.wind_speed_north_knots),
    random_state=2,
)

# We'll make a 20 arc-minute grid
spacing = 20 / 60

# Chain together a blocked mean to avoid aliasing, a polynomial trend (Spline usually
# requires de-trended data), and finally a Spline for each component. Notice that
# BlockReduce can work on multicomponent data without the use of Vector.
chain = vd.Chain(
    [
        ("mean", vd.BlockReduce(np.mean, spacing * 111e3)),
        ("trend", vd.Vector([vd.Trend(degree=1) for i in range(2)])),
        (
            "spline",
            vd.Vector([vd.Spline(damping=1e-10, mindist=500e3) for i in range(2)]),
        ),
    ]
)
print(chain)

# Fit on the training data
chain.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = chain.score(*test)
print("Cross-validation R^2 score: {:.2f}".format(score))
Exemplo n.º 6
0
interpolation.
"""
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import pyproj
import numpy as np
import verde as vd

# We'll test this on the Baja California shipborne bathymetry data
data = vd.datasets.fetch_baja_bathymetry()

# Before gridding, we need to decimate the data to avoid aliasing because of the
# oversampling along the ship tracks. We'll use a blocked median with 5 arc-minute
# blocks.
spacing = 5 / 60
reducer = vd.BlockReduce(reduction=np.median, spacing=spacing)
coordinates, bathymetry = reducer.filter((data.longitude, data.latitude),
                                         data.bathymetry_m)

# Project the data using pyproj so that we can use it as input for the gridder.
# We'll set the latitude of true scale to the mean latitude of the data.
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())
proj_coordinates = projection(*coordinates)

# Now we can set up a gridder for the decimated data
grd = vd.ScipyGridder(method="cubic").fit(proj_coordinates, bathymetry)
print("Gridder used:", grd)

# Get the grid region in geographic coordinates
region = vd.get_region((data.longitude, data.latitude))
print("Data region:", region)
Exemplo n.º 7
0
def interp(df, mask, var='biomass', spacing=4000):
    """
    Grid a set of lat/lon points to a grid defined by mask

    Parameters
    ----------
    df : pd.DataFrame
        Data points to be gridded in the form of a Pandas DataFrame with
        columns ``lat``, ``lon``, and ``var``.
    mask : xr.DataArray
        Target grid defintion. Must include a pyproj parsable crs attribute
        (e.g. ``mask.attrs['crs']``). Data should be between 0 and 1.
    var : str
        Name of column in df to grid.
    spacing : float
        Grid spacing in units defined by the masks crs.

    Returns
    -------
    grid : xr.DataArray
        Gridded data from df.
    """
    import verde as vd

    # extract the projection and grid info
    region = [mask.x.data[0], mask.x.data[-1], mask.y.data[-1], mask.y.data[0]]
    projection = pyproj.Proj(mask.attrs['crs'])

    coordinates = (df.lon.values, df.lat.values)

    proj_coords = projection(*coordinates)

    # split for validation... this may belong outside of this function
    train, test = vd.train_test_split(
        projection(*coordinates),
        df[var],
        random_state=RANDOM_SEED,
    )

    # fit the gridder
    chain = vd.Chain(
        [
            ('mean', vd.BlockReduce(np.mean, spacing=spacing * 0.25, region=region)),
            ('nearest', vd.ScipyGridder(method='linear')),
        ]
    )

    chain.fit(*train)
    # y_pred = chain.predict(test[0])
    # fit_score = score(test[1][0], y_pred)

    # make the grid
    grid = chain.grid(spacing=spacing, region=region, data_names=[var], dims=('y', 'x'))
    grid = vd.distance_mask(
        proj_coords,
        maxdist=4 * spacing,
        grid=grid,
    )
    grid = np.flipud(grid[var]) * mask
    grid.name = var

    return grid
Exemplo n.º 8
0
    data.longitude,
    data.latitude,
    c=data.bathymetry_m,
    s=0.1,
    transform=ccrs.PlateCarree(),
)
plt.colorbar().set_label("meters")
vd.datasets.setup_baja_bathymetry_map(ax)
plt.show()

########################################################################################
# We'll create a chain that applies a blocked median to the data, fits a polynomial
# trend, and then fits a standard gridder to the trend residuals.

chain = vd.Chain([
    ("reduce", vd.BlockReduce(np.median, spacing * 111e3)),
    ("trend", vd.Trend(degree=1)),
    ("spline", vd.Spline()),
])
print(chain)

########################################################################################
# Calling :meth:`verde.Chain.fit` will automatically run the data through the chain:
#
# #. Apply the blocked median to the input data
# #. Fit a trend to the blocked data and output the residuals
# #. Fit the spline to the trend residuals

chain.fit(proj_coords, data.bathymetry_m)

########################################################################################
Exemplo n.º 9
0
# We'll test this on the California vertical GPS velocity data
data = vd.datasets.fetch_california_gps()

# We'll add some random extreme outliers to the data
outliers = np.random.RandomState(2).randint(0, data.shape[0], size=20)
data.velocity_up[outliers] += 0.08
print("Index of outliers:", outliers)

# Create an array of weights and set the weights for the outliers to a very low value
weights = np.ones_like(data.velocity_up)
weights[outliers] = 1e-5

# Now we can block average the points with and without weights to compare the outputs.
reducer = vd.BlockReduce(reduction=np.average,
                         spacing=30 / 60,
                         center_coordinates=True)
coordinates, no_weights = reducer.filter((data.longitude, data.latitude),
                                         data.velocity_up)
__, with_weights = reducer.filter((data.longitude, data.latitude),
                                  data.velocity_up, weights)

# Now we can plot the data sets side by side on Mercator maps
fig, axes = plt.subplots(1,
                         2,
                         figsize=(9, 7),
                         subplot_kw=dict(projection=ccrs.Mercator()))
titles = ["No Weights", "Weights"]
crs = ccrs.PlateCarree()
maxabs = vd.maxabs(data.velocity_up)
for ax, title, velocity in zip(axes, titles, (no_weights, with_weights)):
Exemplo n.º 10
0
account the curvature of the Earth.
"""
import boule as bl
import matplotlib.pyplot as plt
import numpy as np
import verde as vd

import harmonica as hm

# Fetch the sample gravity data from South Africa
data = hm.datasets.fetch_south_africa_gravity()

# Downsample the data using a blocked mean to speed-up the computations
# for this example. This is preferred over simply discarding points to avoid
# aliasing effects.
blocked_mean = vd.BlockReduce(np.mean, spacing=0.2, drop_coords=False)
(longitude, latitude, elevation), gravity_data = blocked_mean.filter(
    (data.longitude, data.latitude, data.elevation),
    data.gravity,
)

# Compute gravity disturbance by removing the gravity of normal Earth
ellipsoid = bl.WGS84
gamma = ellipsoid.normal_gravity(latitude, height=elevation)
gravity_disturbance = gravity_data - gamma

# Convert data coordinates from geodetic (longitude, latitude, height) to
# spherical (longitude, spherical_latitude, radius).
coordinates = ellipsoid.geodetic_to_spherical(longitude, latitude, elevation)

# Create the equivalent sources
Exemplo n.º 11
0
plt.plot(proj_coords[0], proj_coords[1], ".k", markersize=0.5)
plt.xlabel("Easting (m)")
plt.ylabel("Northing (m)")
plt.gca().set_aspect("equal")
plt.tight_layout()
plt.show()

########################################################################################
# Cartesian grids
# ---------------
#
# Now we can use :class:`verde.BlockReduce` and :class:`verde.Spline` on our projected
# coordinates. We'll specify the desired grid spacing as degrees and convert it to
# Cartesian using the 1 degree approx. 111 km rule-of-thumb.
spacing = 10 / 60
reducer = vd.BlockReduce(np.median, spacing=spacing * 111e3)
filter_coords, filter_bathy = reducer.filter(proj_coords, data.bathymetry_m)
spline = vd.Spline().fit(filter_coords, filter_bathy)

########################################################################################
# If we now call :meth:`verde.Spline.grid` we'll get back a grid evenly spaced in
# projected Cartesian coordinates.
grid = spline.grid(spacing=spacing * 111e3, data_names="bathymetry")
print("Cartesian grid:")
print(grid)

########################################################################################
# We'll mask our grid using :func:`verde.distance_mask` to get rid of all the spurious
# solutions far away from the data points.
grid = vd.distance_mask(proj_coords, maxdist=30e3, grid=grid)
Exemplo n.º 12
0
    Grafita info: Receive data an replies (number of iterations) at differents filtred parameters    

     Parameters
        ----------
        training_csv : pdlist
        test_csv : pdlist
        n_iterations : int
        *different_spacing : boolean

        Returns
        -------
        description : type
        	what does..
    """
    #CREATE THC/KC & KC/CTC
    training_csv['THC/KC'] = training_csv['THC']/training_csv['KC']
    test_csv['KC/CTC'] = test_csv['KC']/test_csv['CTC']

    training_csv.drop('Unnamed: 0', axis='columns', inplace=True) #getting rid of unnamed columns, do it before loading to this function, and save to csv.

    list_training=copy.deepcopy()#create a list of same data, to generate (do filter just for needed features)

	vd.BlockReduce(np.median, spacing=spacing_range) #ATRIBUTE VARIABLE BEHAVIOR AT spacing=500

###CONSTRUTIVE THOUGHTS FOR THIS FUNCTION:
	#try to compare each training process before it's complete 
	#if this is possible, compute remaining process and return a range value of best results
	#probably is best creat a new scope, or build it's own class to manage many functionalities. 
	
	pass