Пример #1
0
def test_iris_ensemble_iterative_regression():
    print('\ntest_iris_ensemble_iterative_regression():')
    X_train, X_test, y_train, y_test = train_test_split(X_iris,
                                                        y_iris,
                                                        test_size=5,
                                                        random_state=42)
    cls = ESNClassifier(input_to_node=[('tanh',
                                        InputToNode(hidden_layer_size=10,
                                                    random_state=42,
                                                    activation='identity')),
                                       ('bounded_relu',
                                        InputToNode(hidden_layer_size=10,
                                                    random_state=42,
                                                    activation='bounded_relu'))
                                       ],
                        node_to_node=[('default',
                                       NodeToNode(hidden_layer_size=20,
                                                  spectral_radius=0.0))],
                        regressor=IncrementalRegression(alpha=.01),
                        random_state=42)

    for samples in np.split(np.arange(0, X_train.shape[0]), 5):
        cls.partial_fit(X_train[samples, :],
                        y_train[samples],
                        classes=np.arange(3, dtype=int))
    y_predicted = cls.predict(X_test)

    for record in range(len(y_test)):
        print('predicted: {0} \ttrue: {1}'.format(y_predicted[record],
                                                  y_test[record]))

    print('score: {0}'.format(cls.score(X_test, y_test)))
    print('proba: {0}'.format(cls.predict_proba(X_test)))
    print('log_proba: {0}'.format(cls.predict_log_proba(X_test)))
    assert cls.score(X_test, y_test) >= 4. / 5.
Пример #2
0
 def __init__(self,
              *,
              input_to_node=None,
              regressor=None,
              chunk_size=None,
              **kwargs):
     if input_to_node is None:
         i2n_params = InputToNode()._get_param_names()
         self.input_to_node = InputToNode(**{
             key: kwargs[key]
             for key in kwargs.keys() if key in i2n_params
         })
     else:
         i2n_params = input_to_node._get_param_names()
         self.input_to_node = input_to_node.set_params(**{
             key: kwargs[key]
             for key in kwargs.keys() if key in i2n_params
         })
     if regressor is None:
         reg_params = IncrementalRegression()._get_param_names()
         self.regressor = IncrementalRegression(**{
             key: kwargs[key]
             for key in kwargs.keys() if key in reg_params
         })
     else:
         reg_params = regressor._get_param_names()
         self.regressor = regressor.set_params(**{
             key: kwargs[key]
             for key in kwargs.keys() if key in reg_params
         })
     self._chunk_size = chunk_size
Пример #3
0
def test_elm_regressor_jobs():
    print('\ntest_elm_regressor_jobs():')
    X = np.linspace(0, 10, 2000)
    y = np.hstack((np.sin(X).reshape(-1, 1), np.cos(X).reshape(-1, 1)))
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=10,
                                                        random_state=42)
    param_grid = {
        'input_to_node': [[('default',
                            InputToNode(bias_scaling=10.,
                                        hidden_layer_size=20,
                                        random_state=42))],
                          [('default',
                            InputToNode(bias_scaling=10.,
                                        hidden_layer_size=50,
                                        random_state=42))]],
        'regressor':
        [IncrementalRegression(alpha=.0001),
         IncrementalRegression(alpha=.01)],
        'random_state': [42]
    }
    elm = GridSearchCV(ELMRegressor(), param_grid)
    elm.fit(X_train.reshape(-1, 1), y_train, n_jobs=2)
    y_elm = elm.predict(X_test.reshape(-1, 1))
    print("tests - elm:\n sin | cos \n {0}".format(y_test - y_elm))
    print("best_params_: ".format(elm.best_params_))
    print("best_score: ".format(elm.best_score_))
    np.testing.assert_allclose(y_test, y_elm, atol=1e-1)
Пример #4
0
def test_input_to_node_sparse():
    print('\ntest_input_to_node_sparse():')
    i2n = InputToNode(hidden_layer_size=5,
                      sparsity=2 / 5,
                      activation='tanh',
                      input_scaling=1.,
                      bias_scaling=1.,
                      random_state=42)
    X = np.zeros(shape=(10, 3))
    i2n.fit(X)
    print(i2n._input_weights.toarray())
    assert i2n._input_weights.shape == (3, 5)
    assert safe_sparse_dot(X, i2n._input_weights).shape == (10, 5)
Пример #5
0
def test_node_to_node_dense():
    print('\ntest_node_to_node_dense():')
    i2n = InputToNode(hidden_layer_size=5,
                      sparsity=1.,
                      activation='tanh',
                      input_scaling=1.,
                      bias_scaling=1.,
                      random_state=42)
    n2n = NodeToNode(hidden_layer_size=5,
                     sparsity=1.,
                     activation='tanh',
                     spectral_radius=1.,
                     bias_scaling=1.,
                     random_state=42)
    X = np.zeros(shape=(10, 3))
    i2n.fit(X)
    n2n.fit(i2n.transform(X))
    i2n_hidden = i2n.transform(X)
    print(n2n.transform(i2n_hidden))
    print(n2n._recurrent_weights)
    assert n2n._recurrent_weights.shape == (5, 5)
    assert safe_sparse_dot(i2n.transform(X),
                           n2n._recurrent_weights).shape == (10, 5)
    assert safe_sparse_dot(i2n.transform(X),
                           n2n._recurrent_weights).shape == (10, 5)
Пример #6
0
def test_transform_bounded_relu():
    print('\ntest_transform_bounded_relu():')
    rs = np.random.RandomState(42)
    i2n = InputToNode(hidden_layer_size=5,
                      sparsity=1.,
                      activation='bounded_relu',
                      input_scaling=1.,
                      bias_scaling=1.,
                      random_state=rs)
    X = rs.uniform(low=-1., high=1., size=(10, 3))
    i2n.fit(X)
    y = i2n.transform(X)
    print('tests bounded relu')
    print(y)
    assert y.shape == (10, 5)
Пример #7
0
 def __init__(self,
              input_to_node=InputToNode(),
              regressor=IncrementalRegression(alpha=.0001),
              chunk_size=None,
              random_state=None):
     self.input_to_node = input_to_node
     self.random_state = random_state
     self._chunk_size = chunk_size
     self._regressor = regressor
Пример #8
0
 def __init__(self,
              input_to_node=InputToNode(),
              regressor=IncrementalRegression(alpha=.0001),
              chunk_size=None,
              random_state=None):
     super().__init__(input_to_node=input_to_node,
                      regressor=regressor,
                      chunk_size=chunk_size,
                      random_state=random_state)
     self._encoder = None
 def __init__(self,
              input_to_node=InputToNode(),
              node_to_node=FeedbackNodeToNode(),
              regressor=IncrementalRegression(alpha=.0001),
              chunk_size=None,
              random_state=None,
              n_jobs=None):
     super().__init__(input_to_node=input_to_node,
                      node_to_node=node_to_node,
                      regressor=regressor,
                      chunk_size=chunk_size,
                      random_state=random_state)
     self.n_jobs = n_jobs
Пример #10
0
 def __init__(self,
              *,
              input_to_node=InputToNode(),
              node_to_node=NodeToNode(),
              regressor=IncrementalRegression(alpha=.0001),
              chunk_size=None,
              random_state=None,
              n_jobs=None,
              output_strategy="last_state",
              **kwargs):
     super().__init__(input_to_node=input_to_node,
                      node_to_node=node_to_node,
                      regressor=regressor,
                      chunk_size=chunk_size,
                      random_state=random_state,
                      **kwargs)
     self.n_jobs = n_jobs
     self.output_strategy = output_strategy
# We pre-processed the dataset by removing undefined values, namely, weekends and public holidays. The remaining values were normalized to be in a range of $[0 1]$.

# In[5]:

train_len = 3000
future_len = 1

scaler = MinMaxScaler(feature_range=(-1, 1)).fit(X=X)

# Echo State Network preparation

# In[7]:

base_input_to_nodes = InputToNode(hidden_layer_size=100,
                                  activation='identity',
                                  k_in=1,
                                  input_scaling=0.6,
                                  bias_scaling=0.0)
base_nodes_to_nodes = NodeToNode(hidden_layer_size=100,
                                 spectral_radius=0.9,
                                 leakage=1.0,
                                 bias_scaling=0.0,
                                 k_rec=10)

esn = ESNRegressor(input_to_node=base_input_to_nodes,
                   node_to_node=base_nodes_to_nodes,
                   regressor=IncrementalRegression(alpha=1e-8),
                   random_state=10)

# Training and Prediction.
Пример #12
0
    'input_to_node__hidden_layer_size': [50],
    'input_to_node__input_scaling': np.linspace(start=0.1, stop=1, num=10),
    'input_to_node__bias_scaling': [0.0],
    'input_to_node__activation': ['identity'],
    'input_to_node__random_state': [42],
    'node_to_node__hidden_layer_size': [50],
    'node_to_node__leakage': [1.0],
    'node_to_node__spectral_radius': np.linspace(start=0.0, stop=1, num=11),
    'node_to_node__bias_scaling': [0.0],
    'node_to_node__activation': ['tanh'],
    'node_to_node__random_state': [42],
    'regressor__alpha': [1e-3],
    'random_state': [42]
}

base_esn = ESNClassifier(input_to_node=InputToNode(),
                         node_to_node=NodeToNode(),
                         regressor=IncrementalRegression())

# ## Optimize input_scaling and spectral_radius
#
# We use the ParameterGrid from scikit-learn, which converts the grid parameters defined before into a list of dictionaries for each parameter combination.
#
# We loop over each entry of the Parameter Grid, set the parameters in reg and fit our model on the training data. Afterwards, we report the error rates on the training and test set.
#
#     The lowest training error rate: 0.536330735; parameter combination: {'input_scaling': 0.1, 'spectral_radius': 1.0}
#     The lowest test error rate: 0.588987764; parameter combination: {'input_scaling': 0.1, 'spectral_radius': 1.0}
#
# We use the best parameter combination from the training set, because we do not want to overfit on the test set.
#
# As we can see in the python call, we have modified the training procedure: We use "partial_fit" in order to present the ESN all sequences independently from each other. The function "partial_fit" is part of the scikit-learn API. We have added one optional argument "update_output_weights". By default, it is True and thus, after feeding one sequence through the ESN, output weights are computed.
Пример #13
0
def input2node_distribution(directory):
    self_name = 'input2node_distribution'
    X, y = get_mnist(directory)

    X /= 255.

    pca = PCA(n_components=784).fit(X)
    X_pca = np.matmul(X, pca.components_.T)

    list_activation = ['tanh', 'relu', 'bounded_relu']
    list_train = [X, X_pca]

    fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(6., 3.))

    for idx_activation in range(len(list_activation)):
        activation = list_activation[idx_activation]

        for idx_train in range(len(list_train)):
            ax = axs[idx_train, idx_activation]
            train = list_train[idx_train]

            if activation in ['tanh', '']:
                i2n = InputToNode(hidden_layer_size=1,
                                  random_state=82,
                                  input_scaling=50 / 784,
                                  bias_scaling=0.,
                                  activation=activation)
            elif activation in ['relu', 'bounded_relu']:
                i2n = InputToNode(hidden_layer_size=1,
                                  random_state=82,
                                  input_scaling=1.,
                                  bias_scaling=0.,
                                  activation=activation)

            node_out = i2n.fit_transform(train, y)
            hist, bin_edges = np.histogram(node_out, bins=20, density=True)

            np.delete(bin_edges[:-1], hist <= 1e-3)
            np.delete(hist, hist <= 1e-3)

            x = bin_edges[:-1]
            width = -np.diff(bin_edges)

            # ax.bar(x=x, height=hist, width=width, label=activation, color=tud_colors['lightblue'], align='edge')
            if activation == 'bounded_relu':
                ax.hist(node_out,
                        label=activation,
                        density=True,
                        bins=[.0, .1, .9, 1.],
                        color=tud_colors['lightblue'])
            else:
                ax.hist(node_out,
                        label=activation,
                        density=True,
                        bins=20,
                        color=tud_colors['lightblue'])

            ax.grid(axis='y')
            ax.set_yscale('log')

            x_ticks = np.min(node_out), np.max(node_out)
            ax.set_xlim(x_ticks)

            # x0, y0, width, height = ax.get_position().bounds
            #  fig.text(x=x0 + width/10, y=y0 + height/2, s='scaling={0:.1e}\nbias={1:.1e}'.format(i2n.input_scaling, i2n.bias_scaling), fontsize='small')
            if activation == 'tanh':
                x_ticks += (0.0, )
            ax.set_xticks(x_ticks)
            ax.set_xticklabels(
                ['{0:.1f}'.format(x_tick) for x_tick in x_ticks])

    axs[0, 0].set_title('tanh, orig.')
    axs[0, 1].set_title('relu, orig.')
    axs[0, 2].set_title('b. relu, orig.')
    axs[1, 0].set_title('tanh, pca')
    axs[1, 1].set_title('relu, pca')
    axs[1, 2].set_title('b. relu, pca')

    # plt.tight_layout()
    fig.tight_layout()
    fig.savefig(os.path.join(directory, 'node-out.pdf'), format='pdf')
    fig.savefig(os.path.join(directory, 'node-out.eps'), format='eps')
    plt.rc('pgf', texsystem='pdflatex')
Пример #14
0
    X_test.append(X)
    y_test.append(y)
    print(X_test[-1].shape)
    print(y_test[-1].shape)
print("Validation files:")
for file in all_wavs_m[13:]:
    print(file)
    X, y = extract_features(file, sr = 4000., frame_length = 21)
    X_val.append(X)
    y_val.append(y)
    print(X_val[-1].shape)
    print(y_val[-1].shape)
"""
base_input_to_node = InputToNode(hidden_layer_size=500,
                                 activation='identity',
                                 k_in=5,
                                 input_scaling=14.6,
                                 bias_scaling=0.0,
                                 random_state=1)
base_node_to_node = NodeToNode(hidden_layer_size=500,
                               spectral_radius=0.8,
                               leakage=0.5,
                               bias_scaling=0.5,
                               k_rec=16,
                               bi_directional=True,
                               random_state=1)
base_reg = FastIncrementalRegression(alpha=1.7e-10)

base_esn = ESNRegressor(input_to_node=[('default', base_input_to_node)],
                        node_to_node=[('default', base_node_to_node)],
                        regressor=base_reg,
                        random_state=0)
    'input_to_node__activation': ['identity'],
    'input_to_node__random_state': [42],
    'node_to_node__hidden_layer_size': [50],
    'node_to_node__leakage': [1.0],
    'node_to_node__spectral_radius': [0.5],
    'node_to_node__bias_scaling': [0.0],
    'node_to_node__teacher_scaling': np.linspace(start=0.1, stop=15, num=15),
    'node_to_node__teacher_shift': np.linspace(start=-0.9, stop=0.9, num=19),
    'node_to_node__activation': ['tanh'],
    'node_to_node__output_activation': ['tanh'],
    'node_to_node__random_state': [42],
    'regressor__alpha': [1e-3],
    'random_state': [42]
}

base_esn = FeedbackESNRegressor(input_to_node=InputToNode(),
                                node_to_node=FeedbackNodeToNode(),
                                regressor=IncrementalRegression())

df = pd.DataFrame(columns=list(param_grid.keys()) + [
    "Fitting Time", "Validation Time Training", "Validation Time Test",
    "Training Loss", "Validation Loss"
])

# ## Optimize input_scaling and spectral_radius
#
# We use the ParameterGrid from scikit-learn, which converts the grid parameters defined before into a list of dictionaries for each parameter combination.
#
# We loop over each entry of the Parameter Grid, set the parameters in esn and fit our model on the training data. Afterwards, we report the MSE on the training and validation set.
#
#     The lowest training MSE: 0.000238243207656839; parameter combination: {'input_scaling': 0.4, 'spectral_radius': 0.5}
plt.xlim([0, 100])
plt.ylim([0, 1])
plt.xlabel('n')
plt.ylabel('X[n]')
plt.grid()
plt.show()

# At first, show the impact of different input scaling factors.
#
# Therefore, we neutralize the other hyper-parameters, i.e., no recurrent connections ($\rho = 0$), no bias ($\alpha_{\mathrm{b}} = 0$) and no leakage ($\lambda = 1$).

# In[3]:

esn = ESNRegressor(input_to_node=InputToNode(hidden_layer_size=50,
                                             activation='identity',
                                             k_in=1,
                                             input_scaling=0.1,
                                             bias_scaling=0.0),
                   node_to_node=NodeToNode(hidden_layer_size=50,
                                           spectral_radius=0.0,
                                           leakage=1.0,
                                           bias_scaling=0.0,
                                           k_rec=10),
                   regressor=Ridge(alpha=1e-6),
                   random_state=10)

esn.fit(X=X, y=y)
_ = esn.predict(X=X)

# Visualizing this, we can see exactly what we might expect. We have chosen an input scaling factor of 0.1. Thus, the reservoir state is non-zero for exactly one sample. We can see that all reservoir states are zero all the times except for $n=5$, when the impulse is fed into the ESN.
#
Пример #17
0
    ax.set_ylim([0,0.5])
    ax.set_ylabel("Frequency")
    ax.set_yticks([])
    ax.set_xlabel("Time")
    ax.set_xticks([])


dataset = np.loadtxt(fname=r"C:\Users\Steiner\Documents\Python\PyRCN\examples\dataset\sine_training.csv", delimiter=",", dtype=float)
X = dataset[:, 0].reshape(-1, 1)
y = dataset[:, 1]

dataset = np.loadtxt(fname=r"C:\Users\Steiner\Documents\Python\PyRCN\examples\dataset\sine_test.csv", delimiter=",", dtype=float)
X_test = dataset[:, 0].reshape(-1, 1)
y_test = dataset[:, 1]

input_to_node = InputToNode(hidden_layer_size=200, activation='identity', input_scaling=3., bias_scaling=0.01, random_state=1)
node_to_node = FeedbackNodeToNode(hidden_layer_size=200, sparsity=0.05, activation='tanh', spectral_radius=0.25, leakage=1.0, bias_scaling=0.0, teacher_scaling=1.12, teacher_shift=-0.7, bi_directional=False, output_activation="tanh", random_state=1)
reg = IncrementalRegression(alpha=1e-3)

esn = FeedbackESNRegressor(input_to_node=input_to_node, node_to_node=node_to_node, regressor=reg, random_state=1)

esn.partial_fit(X=X, y=y.reshape(-1, 1), postpone_inverse=False)

y_pred = esn.predict(X=X)

plt.figure(figsize=(10,1.5))
plt.plot(X, label='Input (Frequency)')
plt.plot(y, label='Target (Sine)')
plt.plot(y_pred, label='Predicted (Sine)')
plt.title('Training')
plt.xlim([0, len(y_pred)])
Пример #18
0
class ESNRegressor(BaseEstimator, MultiOutputMixin, RegressorMixin):
    """
    Echo State Network regressor.

    This model optimizes the mean squared error loss function using linear regression.

    Parameters
    ----------
    input_to_node : iterable, default=[('default', InputToNode())]
        List of (name, transform) tuples (implementing fit/transform) that are
        chained, in the order in which they are chained, with the last object
        an estimator.
    node_to_node : iterable, default=[('default', NodeToNode())]
        List of (name, transform) tuples (implementing fit/transform) that are
        chained, in the order in which they are chained, with the last object
        an estimator.
    regressor : object, default=IncrementalRegression(alpha=.0001)
        Regressor object such as derived from ``RegressorMixin``. This
        regressor will automatically be cloned each time prior to fitting.
        regressor cannot be None, omit argument if in doubt
    chunk_size : int, default=None
        if X.shape[0] > chunk_size, calculate results incrementally with partial_fit
    kwargs : dict, default = None
        keyword arguments passed to the subestimators if this is desired, default=None
    """
    @_deprecate_positional_args
    def __init__(self,
                 *,
                 input_to_node=None,
                 node_to_node=None,
                 regressor=None,
                 chunk_size=None,
                 **kwargs):
        if input_to_node is None:
            i2n_params = InputToNode()._get_param_names()
            self.input_to_node = InputToNode(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in i2n_params
            })
        else:
            i2n_params = input_to_node._get_param_names()
            self.input_to_node = input_to_node.set_params(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in i2n_params
            })
        if node_to_node is None:
            n2n_params = NodeToNode()._get_param_names()
            self.node_to_node = NodeToNode(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in n2n_params
            })
        else:
            n2n_params = node_to_node._get_param_names()
            self.node_to_node = node_to_node.set_params(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in n2n_params
            })
        if regressor is None:
            reg_params = IncrementalRegression()._get_param_names()
            self.regressor = IncrementalRegression(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in reg_params
            })
        else:
            reg_params = regressor._get_param_names()
            self.regressor = regressor.set_params(**{
                key: kwargs[key]
                for key in kwargs.keys() if key in reg_params
            })
        self._chunk_size = chunk_size

    def get_params(self, deep=True):
        if deep:
            return {
                **self.input_to_node.get_params(),
                **self.node_to_node.get_params(),
                **{
                    "alpha": self.regressor.get_params()["alpha"]
                }
            }
        else:
            return {
                "input_to_node": self.input_to_node,
                "node_to_node": self.node_to_node,
                "regressor": self.regressor,
                "chunk_size": self.chunk_size
            }

    def set_params(self, **parameters):
        i2n_params = self.input_to_node._get_param_names()
        self.input_to_node = self.input_to_node.set_params(**{
            key: parameters[key]
            for key in parameters.keys() if key in i2n_params
        })
        n2n_params = self.node_to_node._get_param_names()
        self.node_to_node = self.node_to_node.set_params(**{
            key: parameters[key]
            for key in parameters.keys() if key in n2n_params
        })
        reg_params = self.regressor._get_param_names()
        self.regressor = self.regressor.set_params(**{
            key: parameters[key]
            for key in parameters.keys() if key in reg_params
        })
        for parameter, value in parameters.items():
            if parameter in self.get_params(deep=False):
                setattr(self, parameter, value)

        return self

    def partial_fit(self,
                    X,
                    y,
                    n_jobs=None,
                    transformer_weights=None,
                    postpone_inverse=False):
        """
        Fits the regressor partially.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
        y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
            The targets to predict.
        n_jobs : int, default=None
            The number of jobs to run in parallel. ``-1`` means using all processors.
            See :term:`Glossary <n_jobs>` for more details.
        transformer_weights : ignored

        Returns
        -------
        self : Returns a traines ESNRegressor model.
        """
        if not hasattr(self._regressor, 'partial_fit'):
            raise BaseException(
                'Regressor has no attribute partial_fit, got {0}'.format(
                    self._regressor))

        self._validate_hyperparameters()
        self._validate_data(X=X, y=y, multi_output=True)

        # input_to_node
        try:
            hidden_layer_state = self._input_to_node.transform(X)
        except NotFittedError as e:
            print('input_to_node has not been fitted yet: {0}'.format(e))
            hidden_layer_state = self._input_to_node.fit_transform(X)
            pass

        # node_to_node
        try:
            hidden_layer_state = self._node_to_node.transform(
                hidden_layer_state)
        except NotFittedError as e:
            print('node_to_node has not been fitted yet: {0}'.format(e))
            hidden_layer_state = self._node_to_node.fit_transform(
                hidden_layer_state)
            pass

        # regression
        if self._regressor:
            self._regressor.partial_fit(
                hidden_layer_state[self.node_to_node.wash_out:, :],
                y[self.node_to_node.wash_out:, :],
                postpone_inverse=postpone_inverse)
        return self

    def fit(self, X, y, n_jobs=None, transformer_weights=None):
        """
        Fits the regressor.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
        y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
            The targets to predict.
        n_jobs : int, default=None
            The number of jobs to run in parallel. ``-1`` means using all processors.
            See :term:`Glossary <n_jobs>` for more details.
        transformer_weights : ignored

        Returns
        -------
        self : Returns a trained ESNRegressor model.
        """
        self._validate_hyperparameters()
        self._validate_data(X, y, multi_output=True)
        self._input_to_node.fit(X)
        self._node_to_node.fit(self._input_to_node.transform(X))
        self._regressor = self._regressor.__class__()

        if self._chunk_size is None or self._chunk_size > X.shape[0]:
            # input_to_node
            hidden_layer_state = self._input_to_node.transform(X)
            hidden_layer_state = self._node_to_node.transform(
                hidden_layer_state)

            # regression
            self._regressor.fit(
                hidden_layer_state[self.node_to_node.wash_out:, :],
                y[self.node_to_node.wash_out:, :])

        elif self._chunk_size < X.shape[0]:
            # setup chunk list
            chunks = list(range(0, X.shape[0], self._chunk_size))
            # postpone inverse calculation for chunks n-1
            for idx in chunks[:-1]:
                ESNRegressor.partial_fit(
                    self,
                    X=X[idx:idx + self._chunk_size, ...],
                    y=y[idx:idx + self._chunk_size, ...],
                    n_jobs=n_jobs,
                    transformer_weights=transformer_weights,
                    postpone_inverse=True)
            # last chunk, calculate inverse and bias
            ESNRegressor.partial_fit(self,
                                     X=X[chunks[-1]:, ...],
                                     y=y[chunks[-1]:, ...],
                                     n_jobs=n_jobs,
                                     transformer_weights=transformer_weights,
                                     postpone_inverse=False)
        else:
            raise ValueError('chunk_size invalid {0}'.format(self._chunk_size))
        return self

    def predict(self, X):
        """
        Predicts the targets using the trained ELM regressor.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)

        Returns
        -------
        y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
            The predicted targets
        """
        if self._input_to_node is None or self._regressor is None:
            raise NotFittedError(self)

        hidden_layer_state = self._input_to_node.transform(X)
        hidden_layer_state = self._node_to_node.transform(hidden_layer_state)

        return self._regressor.predict(hidden_layer_state)

    def _validate_hyperparameters(self):
        """Validates the hyperparameters.
        Returns
        -------
        """
        if not (hasattr(self.input_to_node, "fit")
                and hasattr(self.input_to_node, "fit_transform")
                and hasattr(self.input_to_node, "transform")):
            raise TypeError("All input_to_node should be transformers "
                            "and implement fit and transform "
                            "'%s' (type %s) doesn't" %
                            (self.input_to_node, type(self.input_to_node)))

        if not (hasattr(self.node_to_node, "fit")
                and hasattr(self.node_to_node, "fit_transform")
                and hasattr(self.node_to_node, "transform")):
            raise TypeError("All node_to_node should be transformers "
                            "and implement fit and transform "
                            "'%s' (type %s) doesn't" %
                            (self.node_to_node, type(self.node_to_node)))

        if self._chunk_size is not None and (
                not isinstance(self._chunk_size, int) or self._chunk_size < 0):
            raise ValueError('Invalid value for chunk_size, got {0}'.format(
                self._chunk_size))

        if not is_regressor(self._regressor):
            raise TypeError("The last step should be a regressor "
                            "and implement fit and predict"
                            "'%s' (type %s) doesn't" %
                            (self._regressor, type(self._regressor)))

    def __sizeof__(self):
        """Returns the size of the object in bytes.
        Returns
        -------
        size : int
        Object memory in bytes.
        """
        return object.__sizeof__(self) + \
            sys.getsizeof(self._input_to_node) + \
            sys.getsizeof(self._node_to_node) + \
            sys.getsizeof(self._regressor)

    @property
    def regressor(self):
        """Returns the chunk_size, in which X will be chopped.
        Returns
        -------
        chunk_size : int or None
        """
        return self._regressor

    @regressor.setter
    def regressor(self, regressor):
        """Sets the regressor.
        Parameters
        ----------
        regressor : regressor or None
        Returns
        -------
        """
        self._regressor = regressor

    @property
    def input_to_node(self):
        """Returns the input_to_node list or the input_to_node Transformer.
        Returns
        -------
        input_to_node : Transformer or [Transformer]
        """
        return self._input_to_node

    @input_to_node.setter
    def input_to_node(self,
                      input_to_node,
                      n_jobs=None,
                      transformer_weights=None):
        """Sets the input_to_node list or the input_to_node Transformer.
        Parameters
        ----------
        input_to_node : Transformer or [Transformer]
        n_jobs : int, default=None
        Number of jobs to run in parallel.
        None means 1 unless in a joblib.parallel_backend context. -1 means using all processors.
        transformer_weights : dict, default=None
        Multiplicative weights for features per transformer.
        Keys are transformer names, values the weights.
        Raises ValueError if key not present in transformer_list.
        Returns
        -------
        """
        if hasattr(input_to_node, '__iter__'):
            # Feature Union of list of input_to_node
            self._input_to_node = FeatureUnion(
                transformer_list=input_to_node,
                n_jobs=n_jobs,
                transformer_weights=transformer_weights)
        else:
            # single input_to_node
            self._input_to_node = input_to_node

    @property
    def node_to_node(self):
        """Returns the node_to_node list or the input_to_node Transformer.
        Returns
        -------
        node_to_node : Transformer or [Transformer]
        """
        return self._node_to_node

    @property
    def hidden_layer_state(self):
        """Returns the hidden_layer_state, e.g. the resevoir state over time.
        Returns
        -------
        hidden_layer_state : np.ndarray
        """
        return self._node_to_node._hidden_layer_state

    @node_to_node.setter
    def node_to_node(self,
                     node_to_node,
                     n_jobs=None,
                     transformer_weights=None):
        """Sets the input_to_node list or the input_to_node Transformer.
        Parameters
        ----------
        node_to_node : Transformer or [Transformer]
        n_jobs : int, default=None
        Number of jobs to run in parallel.
        None means 1 unless in a joblib.parallel_backend context. -1 means using all processors.
        transformer_weights : dict, default=None
        Multiplicative weights for features per transformer.
        Keys are transformer names, values the weights.
        Raises ValueError if key not present in transformer_list.
        Returns
        -------
        """
        if hasattr(node_to_node, '__iter__'):
            # Feature Union of list of input_to_node
            self._node_to_node = FeatureUnion(
                transformer_list=node_to_node,
                n_jobs=n_jobs,
                transformer_weights=transformer_weights)
        else:
            # single input_to_node
            self._node_to_node = node_to_node

    @property
    def chunk_size(self):
        """Returns the chunk_size, in which X will be chopped.
        Returns
        -------
        chunk_size : int or None
        """
        return self._chunk_size

    @chunk_size.setter
    def chunk_size(self, chunk_size):
        """Sets the chunk_size, in which X will be chopped.
        Parameters
        ----------
        chunk_size : int or None
        Returns
        -------
        """
        self._chunk_size = chunk_size