def create_classifier(x, z, toe, window=40, min_buffer=40, max_buffer=200): """ Create dune toe classifier. ... Parameters ---------- x : ndarray Array of cross-shore locations of size (m,). z : ndarray Array of elevations matching x. May be of size (m,) or (m,n). toe : ndarray Array of dune toe locations of size (n,). window : int, default 40 Size of the window for training data. min_buffer : int, default 40 Minimum buffer around the real dune toe. max_buffer : int, default 200 Maximum buffer range. Returns ------- clf : scikit-learn classifier Created random forest classifier. """ # Pre-processing z = ds.interp_nan(x, z) # interp nan xx = np.arange(np.min(x), np.max(x) + 0.5, 0.5) z = ds.interp_to_grid(x, xx, z) # interp to grid toe = ds.interp_toe_to_grid(x, xx, toe) z = ds.moving_average(z, 5) # apply moving average to smooth z = ds.diff_data(z, 1) # differentiate # Create data features, labels = create_training_data(xx, z, toe, window, min_buffer, max_buffer) # Build classifier clf = RandomForestClassifier(n_estimators=100, criterion="gini", random_state=123).fit(features, labels.ravel()) return clf
def predict_dunetoe_ml(self, clf_name, no_of_output=1, dune_crest='max', **kwargs): """ Predict dune toe location using a pre-trained machine learning (ml) classifier. See pybeach/classifiers/create_classifier.py to create a classifier. ... Parameters ---------- clf_name : str Classifier to use. Classifier should be contained within 'classifiers' directory. In-built options include "barrier", "embayed", "mixed". no_of_output : int, default 1 Number of dune toes to return, ranked from most probable to least probable. dune_crest : {'max', 'rr', int, None}, default 'max' Method to identify the dune crest location. The region of the beach profile that the dune toe location is searched for is constrained to the region seaward of the dune crest. max: the maximum elevation of the cross-shore profile. rr: dune crest calculated based on relative relief. int: integer specifying the location of the dune crest. Of size 1 or self.z.shape[0]. None: do not calculate a dune crest location. Search the whole profile for the dune toe. **kwargs : arguments Additional arguments to pass to `self.predict_dunecrest()`. Keywords include window_size, threshold, water_level. Returns ------- dt_index : array of ints array containing the indices of no_of_outputs dune toe locations, in descending order of probability. dt_probabilities : array array of dune toe probabilities for each profiles in self.z. """ # Warnings assert isinstance(clf_name, str), 'clf_name should be a string.' assert isinstance(no_of_output, int) & \ (no_of_output > 0) & \ (no_of_output < len(self.x_interp)), f'no_of_outputs must be int between 0 and {len(self.x)}.' # Define dune crest if dune_crest in ['max', 'rr']: for k in kwargs.keys(): if k not in ["window_size", "threshold", "water_level"]: raise Warning( f'{k} not a valid argument for predict_dunecrest()') kwargs = { k: v for k, v in kwargs.items() if k in ["window_size", "threshold", "water_level"] } dune_crest_loc = self.predict_dunecrest(method=dune_crest, **kwargs) elif isinstance(dune_crest, int): dune_crest_loc = np.full((self.z_interp.shape[0], ), dune_crest) elif dune_crest is None: dune_crest_loc = np.full((self.z_interp.shape[0], ), 0) elif len(dune_crest) == self.z_interp.shape[0] & \ isinstance(dune_crest, np.ndarray) & \ all(isinstance(_, np.int64) for _ in dune_crest): dune_crest_loc = dune_crest else: raise ValueError( f'dune_crest should be "max", "rr", int (of size 1 or {self.z_interp.shape[0]}), or None' ) # Load the random forest classifier try: clf = cs.load_classifier(clf_name) except FileNotFoundError: raise FileNotFoundError( f'no classifier named {clf_name} found in classifier folder.') # Differentiate data z_diff = ds.diff_data(self.z_interp, 1) # Predict probability of dune toe for all points along profile dt_probabilities = np.array([ clf.predict_proba( np.squeeze(ds.rolling_samples(row, clf.n_features_)))[:, 1] for row in z_diff ]) # Interpolate the probabilities back to the original grid dt_probabilities = ds.interp_to_grid(self.x_interp, self.x, dt_probabilities) # Retrieve the top 'no_of_outputs' predictions in order dt_index = np.array([ np.flip(np.argsort(row[crest:])[-no_of_output:], 0) for row, crest in zip(dt_probabilities, dune_crest_loc) ]) dt_index = np.squeeze(dt_index) + dune_crest_loc return dt_index, dt_probabilities
def __init__(self, x, z, window_size=5): """ A class used to represent a 2D beach profile transect. ... Parameters ---------- x : ndarray Array of cross-shore locations of size (m,). z : ndarray Array of elevations matching x. May be of size (m,) or (m,n). window_size : int, default 5 Size of window used to smooth z with a moving average. Attributes ---------- x_orig : ndarray Original input array of cross-shore locations. z_orig : ndarray Original array of profile elevations matching x_orig. x : ndarray x_orig interpolated to 0.5 m grid. z : ndarray z_orig interpolated to 0.5 m grid and smoothed by a moving average with window size smooth_window. Methods ------- predict_dunetoe_ml(self, clf_name, no_of_output=1, dune_crest='rr', **kwargs) predict_dunetoe_mc(self, dune_crest='rr', shoreline=True, window_size=None, **kwargs) predict_dunetoe_pd(self, dune_crest=None, shoreline=None, **kwargs) predict_dunetoe_rr(self, window_size=11, threshold=0.2, water_level=0) predict_dunecrest(self, method="max", window_size=50, threshold=0.8, water_level=0) predict_shoreline(self, water_level=0, dune_crest='rr', **kwargs) """ assert isinstance(x, np.ndarray) & ( np.ndim(x) == 1), 'x should be of type ndarray and shape (m,).' assert (np.ndim(x) == 1), 'x should be a 1-d array of size (m,).' assert (len(x) > 1), 'x should have length > 1.' assert isinstance(z, np.ndarray), 'z should be of type ndarray.' assert isinstance(window_size, int) & \ (window_size > 0) & \ (window_size < len(x)), f'window_size must be int between 0 and {len(x)}.' # Ensure inputs are row vectors x = np.atleast_1d(x) z = np.atleast_2d(z) if len(x) not in z.shape: raise ValueError( f'Input x of shape ({x.shape[0]},) must share a dimension with input z which has shape {z.shape[0], z.shape[1]}.' ) if x.shape[0] != z.shape[1]: z = z.T # Store original inputs self.x = x self.z = z # Interp nan values z = ds.interp_nan(x, z) flag = np.polyfit(x, z.T, 1)[0] if np.any(flag > 0): #raise Warning(f'Input profiles should be oriented from landward (left) to seaward (right), ' # f'some inputted profiles appear to have the sea on the left. This may cause errors.') print( f'Input profiles should be oriented from landward (left) to seaward (right), ' f'some inputted profiles appear to have the sea on the left. This may cause errors.' ) # Interp to 0.5 m grid self.x_interp = np.arange(np.min(x), np.max(x) + 0.5, 0.5) z = ds.interp_to_grid(x, self.x_interp, z) # Apply moving average to smooth data z = ds.moving_average(z, window_size) # Store transformed inputs self.z_interp = z