def test_node_to_node_dense(): print('\ntest_node_to_node_dense():') i2n = InputToNode(hidden_layer_size=5, sparsity=1., activation='tanh', input_scaling=1., bias_scaling=1., random_state=42) n2n = NodeToNode(hidden_layer_size=5, sparsity=1., activation='tanh', spectral_radius=1., bias_scaling=1., random_state=42) X = np.zeros(shape=(10, 3)) i2n.fit(X) n2n.fit(i2n.transform(X)) i2n_hidden = i2n.transform(X) print(n2n.transform(i2n_hidden)) print(n2n._recurrent_weights) assert n2n._recurrent_weights.shape == (5, 5) assert safe_sparse_dot(i2n.transform(X), n2n._recurrent_weights).shape == (10, 5) assert safe_sparse_dot(i2n.transform(X), n2n._recurrent_weights).shape == (10, 5)
def test_iris_ensemble_iterative_regression(): print('\ntest_iris_ensemble_iterative_regression():') X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=5, random_state=42) cls = ESNClassifier(input_to_node=[('tanh', InputToNode(hidden_layer_size=10, random_state=42, activation='identity')), ('bounded_relu', InputToNode(hidden_layer_size=10, random_state=42, activation='bounded_relu')) ], node_to_node=[('default', NodeToNode(hidden_layer_size=20, spectral_radius=0.0))], regressor=IncrementalRegression(alpha=.01), random_state=42) for samples in np.split(np.arange(0, X_train.shape[0]), 5): cls.partial_fit(X_train[samples, :], y_train[samples], classes=np.arange(3, dtype=int)) y_predicted = cls.predict(X_test) for record in range(len(y_test)): print('predicted: {0} \ttrue: {1}'.format(y_predicted[record], y_test[record])) print('score: {0}'.format(cls.score(X_test, y_test))) print('proba: {0}'.format(cls.predict_proba(X_test))) print('log_proba: {0}'.format(cls.predict_log_proba(X_test))) assert cls.score(X_test, y_test) >= 4. / 5.
def __init__(self, *, input_to_node=None, node_to_node=None, regressor=None, chunk_size=None, **kwargs): if input_to_node is None: i2n_params = InputToNode()._get_param_names() self.input_to_node = InputToNode(**{ key: kwargs[key] for key in kwargs.keys() if key in i2n_params }) else: i2n_params = input_to_node._get_param_names() self.input_to_node = input_to_node.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in i2n_params }) if node_to_node is None: n2n_params = NodeToNode()._get_param_names() self.node_to_node = NodeToNode(**{ key: kwargs[key] for key in kwargs.keys() if key in n2n_params }) else: n2n_params = node_to_node._get_param_names() self.node_to_node = node_to_node.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in n2n_params }) if regressor is None: reg_params = IncrementalRegression()._get_param_names() self.regressor = IncrementalRegression(**{ key: kwargs[key] for key in kwargs.keys() if key in reg_params }) else: reg_params = regressor._get_param_names() self.regressor = regressor.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in reg_params }) self._chunk_size = chunk_size
def __init__(self, input_to_node=InputToNode(), node_to_node=NodeToNode(), regressor=IncrementalRegression(alpha=.0001), chunk_size=None, random_state=None): self.input_to_node = input_to_node self.node_to_node = node_to_node self._regressor = regressor self._chunk_size = chunk_size self.random_state = random_state
def test_esn_regressor_jobs(): print('\ntest_esn_regressor_jobs():') X = np.linspace(0, 10, 2000) y = np.hstack((np.sin(X).reshape(-1, 1), np.cos(X).reshape(-1, 1))) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10, random_state=42) param_grid = { 'input_to_node': [[('default', InputToNode(bias_scaling=10., hidden_layer_size=10, activation='identity', random_state=42))], [('default', InputToNode(bias_scaling=10., hidden_layer_size=50, activation='identity', random_state=42))]], 'node_to_node': [[('default', NodeToNode(spectral_radius=0., bias_scaling=0., hidden_layer_size=10, random_state=42))], [('default', NodeToNode(spectral_radius=0., bias_scaling=0., hidden_layer_size=50, random_state=42))]], 'regressor': [IncrementalRegression(alpha=.0001), IncrementalRegression(alpha=.01)], 'random_state': [42] } esn = GridSearchCV(ESNRegressor(), param_grid) esn.fit(X_train.reshape(-1, 1), y_train, n_jobs=2) y_esn = esn.predict(X_test.reshape(-1, 1)) print("tests - esn:\n sin | cos \n {0}".format(y_test - y_esn)) print("best_params_: ".format(esn.best_params_)) print("best_score: ".format(esn.best_score_)) np.testing.assert_allclose(y_test, y_esn, atol=1e-1)
def __init__(self, input_to_node=InputToNode(), node_to_node=NodeToNode(), regressor=IncrementalRegression(alpha=.0001), chunk_size=None, random_state=None): super().__init__(input_to_node=input_to_node, node_to_node=node_to_node, regressor=regressor, chunk_size=chunk_size, random_state=random_state) self._encoder = None
def __init__(self, *, input_to_node=InputToNode(), node_to_node=NodeToNode(), regressor=IncrementalRegression(alpha=.0001), chunk_size=None, random_state=None, n_jobs=None, output_strategy="last_state", **kwargs): super().__init__(input_to_node=input_to_node, node_to_node=node_to_node, regressor=regressor, chunk_size=chunk_size, random_state=random_state, **kwargs) self.n_jobs = n_jobs self.output_strategy = output_strategy
future_len = 1 scaler = MinMaxScaler(feature_range=(-1, 1)).fit(X=X) # Echo State Network preparation # In[7]: base_input_to_nodes = InputToNode(hidden_layer_size=100, activation='identity', k_in=1, input_scaling=0.6, bias_scaling=0.0) base_nodes_to_nodes = NodeToNode(hidden_layer_size=100, spectral_radius=0.9, leakage=1.0, bias_scaling=0.0, k_rec=10) esn = ESNRegressor(input_to_node=base_input_to_nodes, node_to_node=base_nodes_to_nodes, regressor=IncrementalRegression(alpha=1e-8), random_state=10) # Training and Prediction. # In[8]: X_train = scaler.transform(X[0:train_len]) y_train = scaler.transform(X[1:train_len + 1]) X_test = scaler.transform(X[train_len + 1:-1])
'input_to_node__input_scaling': np.linspace(start=0.1, stop=1, num=10), 'input_to_node__bias_scaling': [0.0], 'input_to_node__activation': ['identity'], 'input_to_node__random_state': [42], 'node_to_node__hidden_layer_size': [50], 'node_to_node__leakage': [1.0], 'node_to_node__spectral_radius': np.linspace(start=0.0, stop=1, num=11), 'node_to_node__bias_scaling': [0.0], 'node_to_node__activation': ['tanh'], 'node_to_node__random_state': [42], 'regressor__alpha': [1e-3], 'random_state': [42] } base_esn = ESNClassifier(input_to_node=InputToNode(), node_to_node=NodeToNode(), regressor=IncrementalRegression()) # ## Optimize input_scaling and spectral_radius # # We use the ParameterGrid from scikit-learn, which converts the grid parameters defined before into a list of dictionaries for each parameter combination. # # We loop over each entry of the Parameter Grid, set the parameters in reg and fit our model on the training data. Afterwards, we report the error rates on the training and test set. # # The lowest training error rate: 0.536330735; parameter combination: {'input_scaling': 0.1, 'spectral_radius': 1.0} # The lowest test error rate: 0.588987764; parameter combination: {'input_scaling': 0.1, 'spectral_radius': 1.0} # # We use the best parameter combination from the training set, because we do not want to overfit on the test set. # # As we can see in the python call, we have modified the training procedure: We use "partial_fit" in order to present the ESN all sequences independently from each other. The function "partial_fit" is part of the scikit-learn API. We have added one optional argument "update_output_weights". By default, it is True and thus, after feeding one sequence through the ESN, output weights are computed. #
X, y = extract_features(file, sr = 4000., frame_length = 21) X_val.append(X) y_val.append(y) print(X_val[-1].shape) print(y_val[-1].shape) """ base_input_to_node = InputToNode(hidden_layer_size=500, activation='identity', k_in=5, input_scaling=14.6, bias_scaling=0.0, random_state=1) base_node_to_node = NodeToNode(hidden_layer_size=500, spectral_radius=0.8, leakage=0.5, bias_scaling=0.5, k_rec=16, bi_directional=True, random_state=1) base_reg = FastIncrementalRegression(alpha=1.7e-10) base_esn = ESNRegressor(input_to_node=[('default', base_input_to_node)], node_to_node=[('default', base_node_to_node)], regressor=base_reg, random_state=0) esn = base_esn t1 = time.time() Parallel(n_jobs=1, verbose=50)(delayed(train_esn)(base_input_to_node, base_node_to_node, base_reg, frame_length, all_wavs_m)
class ESNRegressor(BaseEstimator, MultiOutputMixin, RegressorMixin): """ Echo State Network regressor. This model optimizes the mean squared error loss function using linear regression. Parameters ---------- input_to_node : iterable, default=[('default', InputToNode())] List of (name, transform) tuples (implementing fit/transform) that are chained, in the order in which they are chained, with the last object an estimator. node_to_node : iterable, default=[('default', NodeToNode())] List of (name, transform) tuples (implementing fit/transform) that are chained, in the order in which they are chained, with the last object an estimator. regressor : object, default=IncrementalRegression(alpha=.0001) Regressor object such as derived from ``RegressorMixin``. This regressor will automatically be cloned each time prior to fitting. regressor cannot be None, omit argument if in doubt chunk_size : int, default=None if X.shape[0] > chunk_size, calculate results incrementally with partial_fit kwargs : dict, default = None keyword arguments passed to the subestimators if this is desired, default=None """ @_deprecate_positional_args def __init__(self, *, input_to_node=None, node_to_node=None, regressor=None, chunk_size=None, **kwargs): if input_to_node is None: i2n_params = InputToNode()._get_param_names() self.input_to_node = InputToNode(**{ key: kwargs[key] for key in kwargs.keys() if key in i2n_params }) else: i2n_params = input_to_node._get_param_names() self.input_to_node = input_to_node.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in i2n_params }) if node_to_node is None: n2n_params = NodeToNode()._get_param_names() self.node_to_node = NodeToNode(**{ key: kwargs[key] for key in kwargs.keys() if key in n2n_params }) else: n2n_params = node_to_node._get_param_names() self.node_to_node = node_to_node.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in n2n_params }) if regressor is None: reg_params = IncrementalRegression()._get_param_names() self.regressor = IncrementalRegression(**{ key: kwargs[key] for key in kwargs.keys() if key in reg_params }) else: reg_params = regressor._get_param_names() self.regressor = regressor.set_params(**{ key: kwargs[key] for key in kwargs.keys() if key in reg_params }) self._chunk_size = chunk_size def get_params(self, deep=True): if deep: return { **self.input_to_node.get_params(), **self.node_to_node.get_params(), **{ "alpha": self.regressor.get_params()["alpha"] } } else: return { "input_to_node": self.input_to_node, "node_to_node": self.node_to_node, "regressor": self.regressor, "chunk_size": self.chunk_size } def set_params(self, **parameters): i2n_params = self.input_to_node._get_param_names() self.input_to_node = self.input_to_node.set_params(**{ key: parameters[key] for key in parameters.keys() if key in i2n_params }) n2n_params = self.node_to_node._get_param_names() self.node_to_node = self.node_to_node.set_params(**{ key: parameters[key] for key in parameters.keys() if key in n2n_params }) reg_params = self.regressor._get_param_names() self.regressor = self.regressor.set_params(**{ key: parameters[key] for key in parameters.keys() if key in reg_params }) for parameter, value in parameters.items(): if parameter in self.get_params(deep=False): setattr(self, parameter, value) return self def partial_fit(self, X, y, n_jobs=None, transformer_weights=None, postpone_inverse=False): """ Fits the regressor partially. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets) The targets to predict. n_jobs : int, default=None The number of jobs to run in parallel. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. transformer_weights : ignored Returns ------- self : Returns a traines ESNRegressor model. """ if not hasattr(self._regressor, 'partial_fit'): raise BaseException( 'Regressor has no attribute partial_fit, got {0}'.format( self._regressor)) self._validate_hyperparameters() self._validate_data(X=X, y=y, multi_output=True) # input_to_node try: hidden_layer_state = self._input_to_node.transform(X) except NotFittedError as e: print('input_to_node has not been fitted yet: {0}'.format(e)) hidden_layer_state = self._input_to_node.fit_transform(X) pass # node_to_node try: hidden_layer_state = self._node_to_node.transform( hidden_layer_state) except NotFittedError as e: print('node_to_node has not been fitted yet: {0}'.format(e)) hidden_layer_state = self._node_to_node.fit_transform( hidden_layer_state) pass # regression if self._regressor: self._regressor.partial_fit( hidden_layer_state[self.node_to_node.wash_out:, :], y[self.node_to_node.wash_out:, :], postpone_inverse=postpone_inverse) return self def fit(self, X, y, n_jobs=None, transformer_weights=None): """ Fits the regressor. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets) The targets to predict. n_jobs : int, default=None The number of jobs to run in parallel. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. transformer_weights : ignored Returns ------- self : Returns a trained ESNRegressor model. """ self._validate_hyperparameters() self._validate_data(X, y, multi_output=True) self._input_to_node.fit(X) self._node_to_node.fit(self._input_to_node.transform(X)) self._regressor = self._regressor.__class__() if self._chunk_size is None or self._chunk_size > X.shape[0]: # input_to_node hidden_layer_state = self._input_to_node.transform(X) hidden_layer_state = self._node_to_node.transform( hidden_layer_state) # regression self._regressor.fit( hidden_layer_state[self.node_to_node.wash_out:, :], y[self.node_to_node.wash_out:, :]) elif self._chunk_size < X.shape[0]: # setup chunk list chunks = list(range(0, X.shape[0], self._chunk_size)) # postpone inverse calculation for chunks n-1 for idx in chunks[:-1]: ESNRegressor.partial_fit( self, X=X[idx:idx + self._chunk_size, ...], y=y[idx:idx + self._chunk_size, ...], n_jobs=n_jobs, transformer_weights=transformer_weights, postpone_inverse=True) # last chunk, calculate inverse and bias ESNRegressor.partial_fit(self, X=X[chunks[-1]:, ...], y=y[chunks[-1]:, ...], n_jobs=n_jobs, transformer_weights=transformer_weights, postpone_inverse=False) else: raise ValueError('chunk_size invalid {0}'.format(self._chunk_size)) return self def predict(self, X): """ Predicts the targets using the trained ELM regressor. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) Returns ------- y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets) The predicted targets """ if self._input_to_node is None or self._regressor is None: raise NotFittedError(self) hidden_layer_state = self._input_to_node.transform(X) hidden_layer_state = self._node_to_node.transform(hidden_layer_state) return self._regressor.predict(hidden_layer_state) def _validate_hyperparameters(self): """Validates the hyperparameters. Returns ------- """ if not (hasattr(self.input_to_node, "fit") and hasattr(self.input_to_node, "fit_transform") and hasattr(self.input_to_node, "transform")): raise TypeError("All input_to_node should be transformers " "and implement fit and transform " "'%s' (type %s) doesn't" % (self.input_to_node, type(self.input_to_node))) if not (hasattr(self.node_to_node, "fit") and hasattr(self.node_to_node, "fit_transform") and hasattr(self.node_to_node, "transform")): raise TypeError("All node_to_node should be transformers " "and implement fit and transform " "'%s' (type %s) doesn't" % (self.node_to_node, type(self.node_to_node))) if self._chunk_size is not None and ( not isinstance(self._chunk_size, int) or self._chunk_size < 0): raise ValueError('Invalid value for chunk_size, got {0}'.format( self._chunk_size)) if not is_regressor(self._regressor): raise TypeError("The last step should be a regressor " "and implement fit and predict" "'%s' (type %s) doesn't" % (self._regressor, type(self._regressor))) def __sizeof__(self): """Returns the size of the object in bytes. Returns ------- size : int Object memory in bytes. """ return object.__sizeof__(self) + \ sys.getsizeof(self._input_to_node) + \ sys.getsizeof(self._node_to_node) + \ sys.getsizeof(self._regressor) @property def regressor(self): """Returns the chunk_size, in which X will be chopped. Returns ------- chunk_size : int or None """ return self._regressor @regressor.setter def regressor(self, regressor): """Sets the regressor. Parameters ---------- regressor : regressor or None Returns ------- """ self._regressor = regressor @property def input_to_node(self): """Returns the input_to_node list or the input_to_node Transformer. Returns ------- input_to_node : Transformer or [Transformer] """ return self._input_to_node @input_to_node.setter def input_to_node(self, input_to_node, n_jobs=None, transformer_weights=None): """Sets the input_to_node list or the input_to_node Transformer. Parameters ---------- input_to_node : Transformer or [Transformer] n_jobs : int, default=None Number of jobs to run in parallel. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. transformer_weights : dict, default=None Multiplicative weights for features per transformer. Keys are transformer names, values the weights. Raises ValueError if key not present in transformer_list. Returns ------- """ if hasattr(input_to_node, '__iter__'): # Feature Union of list of input_to_node self._input_to_node = FeatureUnion( transformer_list=input_to_node, n_jobs=n_jobs, transformer_weights=transformer_weights) else: # single input_to_node self._input_to_node = input_to_node @property def node_to_node(self): """Returns the node_to_node list or the input_to_node Transformer. Returns ------- node_to_node : Transformer or [Transformer] """ return self._node_to_node @property def hidden_layer_state(self): """Returns the hidden_layer_state, e.g. the resevoir state over time. Returns ------- hidden_layer_state : np.ndarray """ return self._node_to_node._hidden_layer_state @node_to_node.setter def node_to_node(self, node_to_node, n_jobs=None, transformer_weights=None): """Sets the input_to_node list or the input_to_node Transformer. Parameters ---------- node_to_node : Transformer or [Transformer] n_jobs : int, default=None Number of jobs to run in parallel. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. transformer_weights : dict, default=None Multiplicative weights for features per transformer. Keys are transformer names, values the weights. Raises ValueError if key not present in transformer_list. Returns ------- """ if hasattr(node_to_node, '__iter__'): # Feature Union of list of input_to_node self._node_to_node = FeatureUnion( transformer_list=node_to_node, n_jobs=n_jobs, transformer_weights=transformer_weights) else: # single input_to_node self._node_to_node = node_to_node @property def chunk_size(self): """Returns the chunk_size, in which X will be chopped. Returns ------- chunk_size : int or None """ return self._chunk_size @chunk_size.setter def chunk_size(self, chunk_size): """Sets the chunk_size, in which X will be chopped. Parameters ---------- chunk_size : int or None Returns ------- """ self._chunk_size = chunk_size
plt.show() # At first, show the impact of different input scaling factors. # # Therefore, we neutralize the other hyper-parameters, i.e., no recurrent connections ($\rho = 0$), no bias ($\alpha_{\mathrm{b}} = 0$) and no leakage ($\lambda = 1$). # In[3]: esn = ESNRegressor(input_to_node=InputToNode(hidden_layer_size=50, activation='identity', k_in=1, input_scaling=0.1, bias_scaling=0.0), node_to_node=NodeToNode(hidden_layer_size=50, spectral_radius=0.0, leakage=1.0, bias_scaling=0.0, k_rec=10), regressor=Ridge(alpha=1e-6), random_state=10) esn.fit(X=X, y=y) _ = esn.predict(X=X) # Visualizing this, we can see exactly what we might expect. We have chosen an input scaling factor of 0.1. Thus, the reservoir state is non-zero for exactly one sample. We can see that all reservoir states are zero all the times except for $n=5$, when the impulse is fed into the ESN. # # The absolute values of the reservoir states lie between 0 and 0.1. # In[5]: plt.figure()