Exemplo n.º 1
0
def plot_mean_clusters(tabclu_o):
    tabclu = tabclu_o.copy()
    columns = list(tabclu.columns)
    columns.pop()
    columns.append('cluster')
    tabclu.columns = columns
    clusters = sorted(list(tabclu.cluster.value_counts().index.array))
    sns.set_style("darkgrid")
    fig, ax = plt.subplots(figsize=(9, 7))
    for cluster in clusters:
        tab_c = tabclu[tabclu.cluster == cluster].copy()
        tab_c.drop(columns=['cluster', 'Cliente'], inplace=True)
        fda_tab_c = skfda.FDataGrid(tab_c)
        mean_periodos = skfda.exploratory.stats.mean(fda_tab_c)
        lab = 'Cluster ' + str(cluster)
        ax.plot(mean_periodos.data_matrix[0, :, 0], '-o', label=lab)
    tabclu.drop(columns=['cluster', 'Cliente'], inplace=True)
    fda_all = skfda.FDataGrid(tabclu)
    mean_all = skfda.exploratory.stats.mean(fda_all)
    ax.plot(mean_all.data_matrix[0, :, 0], '--o', label='All data')
    dates = [str(x) for x in tabclu.columns]
    ax.set_xticks(np.arange(len(dates)))
    ax.set_xticklabels(dates, rotation=90)
    plt.title('Comportamiento Volumenes')
    plt.legend()
    plt.show()
Exemplo n.º 2
0
    def test_several_variables(self):
        def f(x, y, z):
            return x * y * z

        t = np.linspace(0, 1, 100)

        x2, y2, z2 = ndm(t, 2 * t, 3 * t)

        data_matrix = f(x2, y2, z2)

        sample_points = [t, 2 * t, 3 * t]

        fd = skfda.FDataGrid(data_matrix[np.newaxis, ...],
                             sample_points=sample_points)

        basis = Tensor([
            Monomial(n_basis=5, domain_range=(0, 1)),
            Monomial(n_basis=5, domain_range=(0, 2)),
            Monomial(n_basis=5, domain_range=(0, 3))
        ])

        fd_basis = fd.to_basis(basis)

        res = 8

        np.testing.assert_allclose(skfda.misc.inner_product(fd, fd),
                                   res,
                                   rtol=1e-5)
        np.testing.assert_allclose(skfda.misc.inner_product(
            fd_basis, fd_basis),
                                   res,
                                   rtol=1e-5)
Exemplo n.º 3
0
    def test_vector_valued(self):
        def f(x):
            return x**2

        def g(y):
            return 3 * y

        t = np.linspace(0, 1, 100)

        data_matrix = np.array([np.array([f(t), g(t)]).T])

        sample_points = [t]

        fd = skfda.FDataGrid(data_matrix, sample_points=sample_points)

        basis = VectorValued([Monomial(n_basis=5), Monomial(n_basis=5)])

        fd_basis = fd.to_basis(basis)

        res = 1 / 5 + 3

        np.testing.assert_allclose(skfda.misc.inner_product(fd, fd),
                                   res,
                                   rtol=1e-5)
        np.testing.assert_allclose(skfda.misc.inner_product(
            fd_basis, fd_basis),
                                   res,
                                   rtol=1e-5)
Exemplo n.º 4
0
def data_for_twos():
    """Length-100 array in which all the elements are two."""

    data_matrix = np.full(100 * 10 * 10 * 3,
                          fill_value=2).reshape(100, 10, 10, 3)
    grid_points = [np.arange(10), np.arange(10) / 10]

    return skfda.FDataGrid(data_matrix, grid_points=grid_points)
Exemplo n.º 5
0
def data_missing():
    """Length-2 array with [NA, Valid]"""

    data_matrix = np.arange(2 * 10 * 10 * 3,
                            dtype=np.float_).reshape(2, 10, 10, 3)
    data_matrix[0, ...] = np.NaN
    grid_points = [np.arange(10), np.arange(10) / 10]

    return skfda.FDataGrid(data_matrix, grid_points=grid_points)
Exemplo n.º 6
0
def plot_inertias(tabla):
    list_inertias = []
    num_clusters_list = np.arange(2, 15)
    for num_clusters in num_clusters_list:
        data_fda = skfda.FDataGrid(tabla.copy())
        kmeans = skfda.ml.clustering.KMeans(n_clusters=num_clusters)
        kmeans.fit(data_fda)
        list_inertias.append(kmeans.inertia_)
    plt.plot(num_clusters_list, list_inertias, '*-')
Exemplo n.º 7
0
def cluster_fda(table, num_clusters, marca):
    data_fda = skfda.FDataGrid(table)
    kmeans = skfda.ml.clustering.KMeans(n_clusters=num_clusters)
    kmeans.fit(data_fda)
    clusters_df = pd.DataFrame(kmeans.labels_, index=table.index)
    table_cluster = table.merge(clusters_df,
                                how='inner',
                                right_index=True,
                                left_index=True)
    table_cluster.rename(columns={0: f'cluster_{marca}'}, inplace=True)
    return table_cluster
Exemplo n.º 8
0
def data():
    """
    Length-100 array for this type.
    * data[0] and data[1] should both be non missing
    * data[0] and data[1] should not be equal
    """

    data_matrix = np.arange(1, 100 * 10 * 10 * 3 + 1).reshape(100, 10, 10, 3)
    grid_points = [np.arange(10), np.arange(10) / 10]

    return skfda.FDataGrid(data_matrix, grid_points=grid_points)
def build_dataset(d):
    X = []
    y = []

    for i, (_, value) in enumerate(d.items()):

        X.append(value)
        y.append(np.full(shape=value.shape[0], fill_value=i))

    data_matrix = np.vstack([x.data_matrix for x in X])
    X = skfda.FDataGrid(data_matrix=data_matrix,
                        sample_points=X[0].sample_points)

    y = np.concatenate(y)

    return X, y
Exemplo n.º 10
0
    def test_basis_conversion(self):

        data_matrix = np.linspace([0, 1, 2, 3], [1, 2, 3, 4], 100)

        fd = skfda.FDataGrid(data_matrix.T)

        smoother = skfda.preprocessing.smoothing.BasisSmoother(
            basis=skfda.representation.basis.BSpline(
                n_basis=10, domain_range=fd.domain_range),
            regularization=TikhonovRegularization(
                lambda x: x(1)[:, 0] - x(0)[:, 0]),
            smoothing_parameter=10000)

        fd_basis = smoother.fit_transform(fd)

        np.testing.assert_allclose(fd_basis(0), fd_basis(1), atol=0.001)
Exemplo n.º 11
0
def split_data(data, max_pow):

    first_key, *_ = data

    # We will select segments of data with a power of two plus one
    # number of points and discard the remaining data
    n_points_segment = 2**max_pow + 1
    n_segments = data[first_key].shape[0] // n_points_segment

    dict_subseries = {}

    for key, value in data.items():
        subseries = np.split(value[:n_points_segment * n_segments], n_segments)
        subseries = np.array(subseries)
        dict_subseries[key] = skfda.FDataGrid(data_matrix=subseries,
                                              sample_points=range(
                                                  subseries.shape[1]))

    return dict_subseries
Exemplo n.º 12
0
    def _test_generic(self, estimator_class):
        loo_scorer = validation.LinearSmootherLeaveOneOutScorer()
        loo_scorer_alt = _LinearSmootherLeaveOneOutScorerAlternative()
        x = np.linspace(-2, 2, 5)
        fd = skfda.FDataGrid(x**2, x)

        estimator = estimator_class()

        grid = validation.SmoothingParameterSearch(estimator, [2, 3],
                                                   scoring=loo_scorer)
        grid.fit(fd)
        score = np.array(grid.cv_results_['mean_test_score'])

        grid_alt = validation.SmoothingParameterSearch(estimator, [2, 3],
                                                       scoring=loo_scorer_alt)
        grid_alt.fit(fd)
        score_alt = np.array(grid_alt.cv_results_['mean_test_score'])

        np.testing.assert_array_almost_equal(score, score_alt)
Exemplo n.º 13
0
# In :ref:`sphx_glr_auto_examples_plot_landmark_registration.py` it is shown
# the simplest case, where it is used to apply a transformation of the time
# scale of unidimensional data to register its features.
#
# The following example shows the basic usage applied to a surface and a
# curve, although the method will work for data with arbitrary dimensions to.
#
# Firstly we will create a data object containing a surface
# :math:`g: \mathbb{R}^2 \rightarrow \mathbb{R}`.
#
# Constructs example surface
X, Y, Z = axes3d.get_test_data(1.2)
data_matrix = [Z.T]
grid_points = [X[0, :], Y[:, 0]]

g = skfda.FDataGrid(data_matrix, grid_points)

# Sets cubic interpolation
g.interpolation = skfda.representation.interpolation.SplineInterpolation(
    interpolation_order=3)

# Plots the surface
g.plot()

##############################################################################
# We will create a parametric curve
# :math:`f(t)=(10 \, \cos(t), 10 \, sin(t))`. The result of the composition,
# :math:`g \circ f:\mathbb{R} \rightarrow \mathbb{R}` will be another
# functional object with the values of :math:`g` along the path given by
# :math:`f`.
#
Exemplo n.º 14
0
# In :ref:`sphx_glr_auto_examples_plot_landmark_registration.py` it is shown
# the simplest case, where it is used to apply a transformation of the time
# scale of unidimensional data to register its features.
#
# The following example shows the basic usage applied to a surface and a
# curve, although the method will work for data with arbitrary dimensions to.
#
# Firstly we will create a data object containing a surface
# :math:`g: \mathbb{R}^2 \rightarrow \mathbb{R}`.
#
# Constructs example surface
X, Y, Z = axes3d.get_test_data(1.2)
data_matrix = [Z.T]
sample_points = [X[0, :], Y[:, 0]]

g = skfda.FDataGrid(data_matrix, sample_points)

# Sets cubic interpolation
g.interpolator = skfda.representation.interpolation.SplineInterpolator(
    interpolation_order=3)

# Plots the surface
g.plot()

##############################################################################
# We will create a parametric curve
# :math:`f(t)=(10 \, \cos(t), 10 \, sin(t))`. The result of the composition,
# :math:`g \circ f:\mathbb{R} \rightarrow \mathbb{R}` will be another
# functional object with the values of :math:`g` along the path given by
# :math:`f`.
#
Exemplo n.º 15
0
##############################################################################
#
# All the extrapolators shown will work with multidimensional objects.
# In the following example it is constructed a 2d-surface and it is extended
# using periodic extrapolation.

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Make data.
t = np.arange(-2.5, 2.75, 0.25)
X, Y = np.meshgrid(t, t)
Z = np.exp(-0.5 * (X**2 + Y**2))

# Creation of FDataGrid
fd_surface = skfda.FDataGrid([Z], (t, t))

t = np.arange(-7, 7.5, 0.5)

# Evaluation with periodic extrapolation
values = fd_surface((t, t), grid=True, extrapolation="periodic")
T, S = np.meshgrid(t, t)

ax.plot_wireframe(T, S, values[0, ..., 0], alpha=.3, color="C0")
ax.plot_surface(X, Y, Z, color="C0")

###############################################################################
#
# The previous extension can be compared with the extrapolation using the values
# of the bounds.
Exemplo n.º 16
0
fd[10].scatter(s=0.5)
nw['fdatagrid'][10].plot(c='g')

###############################################################################
# Now, we can see the effects of a proper smoothing. We can plot the same 5
# samples from the beginning using the Nadaraya-Watson kernel smoother with
# the best choice of parameter.

plt.figure(4)
nw['fdatagrid'][0:5].plot()

###############################################################################
# We can also appreciate the effects of undersmoothing and oversmoothing in
# the following plots.

fd_us = skfda.FDataGrid(
    ks.nw(fd.sample_points, h=2).dot(fd.data_matrix[10, ..., 0]),
    fd.sample_points, fd.sample_range, fd.dataset_label, fd.axes_labels)
fd_os = skfda.FDataGrid(
    ks.nw(fd.sample_points, h=15).dot(fd.data_matrix[10, ..., 0]),
    fd.sample_points, fd.sample_range, fd.dataset_label, fd.axes_labels)

# Under-smoothed
fd[10].scatter(s=0.5)
fd_us.plot(c='sandybrown')

# Over-smoothed
plt.figure()
fd[10].scatter(s=0.5)
fd_os.plot(c='r')
Exemplo n.º 17
0
plt.tight_layout()

t = np.linspace(0, 1, 200)
time = np.copy(t)
eps = .04

a = .4
m = 11.9
idx1 = t < a
idx3 = t > a + eps
idx2 = np.logical_not(np.logical_or(idx1, idx3))

t[idx2] = m * (t[idx2] - a) + a
t[idx3] = np.linspace(t[idx2][-1], 1, idx3.sum())

warp = skfda.FDataGrid([t], time, domain_range=(0, 1))

plt.figure("pinching-warping")
warp.plot()
fd_reg = fd2.compose(warp)
plt.tight_layout()

plt.figure("pinching-effect")
fd_reg = fd_reg.to_grid(fd.sample_points[0])

x1 = fd_reg.data_matrix.squeeze()
x2 = fd.data_matrix.squeeze()
t = fd.sample_points[0]

x1[t > .4358] = x2[t > .4358]
Exemplo n.º 18
0
fd.plot()
fd_align.plot(color='C0', linestyle='--')

# Legend
plt.legend(['$f$', '$g$', '$f \\circ \\gamma $'])
plt.tight_layout()

###############################################################################
# The non-linear transformation :math:`\gamma` applied to :math:`f` in
# the alignment can be obtained using  :func:`elastic_registration_warping
# <skfda.preprocessing.registration.elastic_registration_warping>`.
#

# Warping to align f to g
warping = skfda.preprocessing.registration.elastic_registration_warping(f, g)
identity = skfda.FDataGrid([warping.sample_points[0]],
                           warping.sample_points[0])

plt.figure("pairwise-alignment-warping")

# Warping used
warping.plot()

# Plot identity
t = np.linspace(0, 1)
plt.plot(t, t, linestyle='--')

# Legend
plt.legend(['$\\gamma$', '$\\gamma_{id}$'])
plt.tight_layout()

###
Exemplo n.º 19
0
df=pd.concat([yin,xin], axis=1)

# Using statsmodels
kde = KernelReg(x, y, var_type='c', reg_type='ll', bw=[3.2])

estimator = kde.fit(y)
estimator = np.reshape(estimator[0], df.shape[0])

plt.scatter(x, y)
plt.scatter(x, estimator, c='r')
plt.show()

# Using SKFDA

df_grid=skfda.FDataGrid(df)

bandwidth = np.arange(0.1, 5, 0.2)

llr = val.SmoothingParameterSearch(
    ks.LocalLinearRegressionSmoother(),
    bandwidth)
fit = llr.fit(df_grid)
llr_df = llr.transform(df_grid)

plt.scatter(x, y)
plt.xlabel('x')
plt.ylabel('y')
plt.scatter(x, llr_df, c='r')
plt.show()
Exemplo n.º 20
0
    def setUp(self):
        data_matrix = [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4],
                       [1, 2, 3, 4]]

        self.fd = skfda.FDataGrid(data_matrix)
Exemplo n.º 21
0
                               blit=True)

anim.save("warpings.gif", writer='imagemagick')

fig = plt.figure("composition-animation")

ax = plt.axes(xlim=(-0.05, 1.05), ylim=(-1.2, 1.2))

fd = skfda.datasets.make_sinusoidal_process(n_samples=1,
                                            phase_std=0,
                                            amplitude_std=0,
                                            error_std=0)
fd.plot(color='maroon', linestyle='--', label=r'$f_i$')
plt.scatter([0, 1], [0, 0], color='maroon')

wa, wb, wc, wd = skfda.FDataGrid(data, sample_points=t)

line, = ax.plot(t,
                fd.data_matrix.squeeze(),
                color='C0',
                lw=2,
                label=r'$f_i(\gamma_i(t))$')
plt.legend()

plt.tight_layout()


def init2():
    line.set_data([], [])
    return line,
Exemplo n.º 22
0
 def setUp(self):
     self.fd = skfda.FDataGrid([[1, 2, 3, 4, 5, 6, 7],
                                [2, 3, 4, 5, 6, 7, 9]])
     self.fd_basis = self.fd.to_basis(
         skfda.representation.basis.BSpline(n_basis=5))