import skfda import numpy as np import matplotlib.pyplot as plt from skfda.datasets import fetch_growth from skfda.representation.interpolation import SplineInterpolator from skfda.preprocessing.registration import elastic_registration_warping, elastic_registration data = fetch_growth() fd = data['data'] fd = fd[data['target'] == 0] fd = fd.derivative() fd.interpolator = SplineInterpolator(3) #fd = fd.to_grid(np.linspace(*fd.domain_range[0], 150)) fd.dataset_label = None fd.axes_labels = [ "age (year)", r"$\partial \, height \, / \, \partial \, age$ (cm/year)" ] plt.figure("berkeley-males") fd.plot() plt.xlim(fd.domain_range[0]) a, b = plt.ylim() plt.ylim(a, 20) plt.tight_layout() plt.figure("berkeley-warping")
# consists in finite values dispersed over points in a domain range. # We initialize and setup the FPCADiscretized object and run the fit method to # obtain the first two components. By default, if we do not specify the number # of components, it's 3. Other parameters are weights and centering. For more # information please visit the documentation. fpca_discretized = FPCA(n_components=2) fpca_discretized.fit(fd) fpca_discretized.components_.plot() ############################################################################## # In the second case, the data is first converted to use a basis representation # and the FPCA is done with the basis representation of the original data. # We obtain the same dataset again and transform the data to a basis # representation. This is because the FPCA module modifies the original data. # We also plot the data for better visual representation. dataset = fetch_growth() fd = dataset['data'] basis = skfda.representation.basis.BSpline(n_basis=7) basis_fd = fd.to_basis(basis) basis_fd.plot() ############################################################################## # We initialize the FPCABasis object and run the fit function to obtain the # first 2 principal components. By default the principal components are # expressed in the same basis as the data. We can see that the obtained result # is similar to the discretized case. fpca = FPCA(n_components=2) fpca.fit(basis_fd) fpca.components_.plot() ##############################################################################
if __name__ == "__main__": from skfda import datasets from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.ensemble import GradientBoostingClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import BaggingClassifier from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score data = datasets.fetch_growth() X = data['data'].data_matrix.squeeze() target = data['target'] sample_points = np.arange(31) Ncomps = 3 model = SVC() Xtrain, Xtest, ytrain, ytest = train_test_split(X, target, test_size=0.3, random_state=0) SFMClassifier_SVC = SFMClassifier(model, sample_points, Ncomps) SFMClassifier_SVC.fit(Xtrain, ytrain) ypred = SFMClassifier_SVC.predict(Xtest, Xtrain) f1 = f1_score(ytest, ypred, average='macro')
fd_align = elastic_registration.fit_transform(fd) fd_align.plot() ############################################################################## # In general these type of alignments are not possible, in the following # figure it is shown how it works with a real dataset. # The :func:`berkeley growth dataset<skfda.datasets.fetch_growth>` # contains the growth curves of a set children, in this case will be used only # the males. The growth curves will be resampled using cubic interpolation and # derived to obtain the velocity curves. # # First we show the original curves: growth = fetch_growth() # Select only one sex fd = growth['data'][growth['target'] == 0] # Obtain velocity curves fd.interpolation = skfda.representation.interpolation.SplineInterpolation(3) fd = fd.to_grid(np.linspace(*fd.domain_range[0], 200)).derivative() fd = fd.to_grid(np.linspace(*fd.domain_range[0], 50)) fd.plot() ############################################################################## # We now show the aligned curves: fd_align = elastic_registration.fit_transform(fd) fd_align.dataset_name += " - aligned"