def contructANN(input_size, hidden_layers=[16, 16]): ann = MLPRegressor(hidden_layer_sizes=[input_size] + hidden_layers + [1], activation='logistic') ann._random_state = np.random.RandomState(np.random.randint(2**32)) ann._initialize(np.empty((1, 1)), [input_size] + hidden_layers + [1]) ann.out_activation_ = 'logistic' return ann
class Neural(Approximator): """ A fully-connected neural network to approximate a function. Args: * hidden_layer_sizes: A tuple of ints representing number of units in each hidden layer. * random_state: Integer seed or `np.random.RandomState` instance. * norm(Tuple[Tuple[float, float]]): Bounds to use to normalize input between [0, 1]. Must be same length as features. Of the form ((low, high), (low, high)...) * default (float): The default value to return if predict called before fit. * kwargs: Any keyword arguments to be fed to `sklearn.neural_network.MPLRegressor` which fits to the function. Hard-coded arguments are `warm_start`, `max_iter`. """ def __init__(self, indim: int, outdim: int, hidden_layer_sizes: Tuple[int],\ norm: Tuple[Tuple[float, float]] = None, default = 0.,\ random_state: Union[int, RandomState] = None, **kwargs): self.default = default self.indim = indim self.outdim = outdim kwargs['random_state'] = random_state self.model = MLPRegressor(hidden_layer_sizes, **kwargs) # the model does not initialize weights/random states until the first # call to 'fit()'. Setting up everything early so the weights property # is accessible from the get-go. Once initialized, the model won't # reset values. See sklearn.utils.check_random_state() and # MLPRegressor._fit() self.model._random_state = check_random_state(self.model.random_state) self.model._initialize( y=np.ones((1, outdim)), layer_units=[indim, *hidden_layer_sizes, outdim]) self.bounds = np.asarray(norm).T if norm is not None else None self.range = self.bounds[1] - self.bounds[ 0] if norm is not None else None def _project(self, x: np.ndarray) -> np.ndarray: """ Normalizes an array of features. Truncates them to `indim` features per instance. """ if self.bounds is not None: return (x - self.bounds[0]) / self.range return x def update(self, x: Union[np.ndarray, Tuple], y: Union[np.ndarray, Tuple]): """ Incrementally update function approximation using stochastic gradient descent. Args: * x (Tuple/np.ndarray): A *2D* array representing a single instance in each row. * y (Tuple, ndarray): A *2D* array of values to be learned at that point corresponding to each row of features in x. """ x, y = np.asarray(x), np.asarray(y) # If number of output classes is 1, scipy wants a 1D array instead of # a 2D array with 1 column. if self.outdim == 1: self.model.partial_fit(self._project(x), y.ravel()) else: self.model.partial_fit(self._project(x), y) def predict(self, x: Union[np.ndarray, Tuple]) -> np.ndarray: """ Predict value from the learned function given the input x. Args: * x (Tuple/np.ndarray): A *2D* array representing a single instance in each row. Returns: * A *2D* array of predictions for each instance in `x`. """ x = np.asarray(x) projection = self._project(x) return self.model.predict(projection).reshape(-1, self.outdim) @property def weights(self) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Returns a tuple of: * A list of # hidden layers vectors where each element is the bias for a unit in a layer. * A list # hidden layers weight matrices. """ return (self.model.intercepts_, self.model.coefs_) @weights.setter def weights(self, w: Tuple[List[np.ndarray], List[np.ndarray]]): self.model.intercepts_[:] = w[0][:] self.model.coefs_[:] = w[1][:]
def generate(self, seed=None, thread_num=None): """ Generate a random structure based on the genes given in the output. Parameters ---------- seed : A random seed for setting the weights. Returns ------- A Parser class that can be passed to the structure class. """ # we want to make sure our generated structure is good ann = self.parser.getGAParameters()['ann'] random.seed(seed) np.random.seed(seed) if ann: clean_generation = False while not clean_generation: new_parser = copy.deepcopy(self.parser) ann_params = self.parser.getAnnParameters() ga_params = self.parser.getGAParameters() ann = MLPRegressor( hidden_layer_sizes=tuple(ann_params['neurons']) + (len(self.parser.getGenes()), ), activation=ann_params['activation']) layers = [ann_params['neurons'][0]] + ann_params['neurons'] + [ len(self.parser.getGenes()) ] input_vec = np.ones((1, ann_params['neurons'][0])) output_vec = np.empty((1, len(self.parser.getGenes()))) ann._random_state = np.random.RandomState(seed) ann._initialize(output_vec, layers) ann.out_activation_ = ann_params['activation'] new_parser.ann = ann outputs = new_parser.ann.predict(input_vec) old_config = self.parser.getConfig() new_config = new_parser.getConfig() for gene, output in zip(self.parser.getGenes(), outputs[0]): val = getFromDict(old_config, gene['path']) new_val = (gene['range'][1] - gene['range'][0]) * output + gene['range'][0] setInDict(new_config, gene['path'], new_val) new_config, clean_generation = self.checkAndUpdate( new_config, gene, val, new_val) new_parser.updateConfig(new_config) identifier = self.n_generated + thread_num history = [self.n_generated] s = Structure(new_parser, identifier, history) return s else: clean_generation = False while not clean_generation: new_parser = copy.deepcopy(self.parser) old_config = self.parser.getConfig() new_config = new_parser.getConfig() for gene in self.parser.getGenes(): val = getFromDict(old_config, gene['path']) new_val = (gene['range'][1] - gene['range'][0] ) * np.random.uniform() + gene['range'][0] setInDict(new_config, gene['path'], new_val) new_config, clean_generation = self.checkAndUpdate( new_config, gene, val, new_val) new_parser.updateConfig(new_config) identifier = self.n_generated + thread_num s = Structure(new_parser, identifier, []) return s
class Autoencoder: def __init__(self, hidden_nodes, weights_init=None, weight_initializer='random_normal', biases_init=None, activation='sigmoid', batch_size=1, batch_norm=False, learning_rate=1e-3, momentum=0.9, regularization=0, optimizer='adam', max_epochs=sys.maxsize, convergence_criterion=(0, 10), backend='keras'): if backend not in ['tensorflow', 'keras', 'sklearn']: raise ValueError("invalid backend: {}".format(backend)) self._hidden_nodes = hidden_nodes self._weights_init = weights_init self._weight_initializer = weight_initializer self._biases_init = biases_init self._activation = activation self._batch_size = batch_size self._batch_norm = batch_norm self._learning_rate = learning_rate self._momentum = momentum self._regularization = regularization self._optimizer = optimizer self._max_epochs = max_epochs self._conv = convergence_criterion self._backend = backend @staticmethod def gridsearch(inputs, batch_sizes, learning_rates, regularizations, kwargs_model=None, cv=10, verbose=False): backend = kwargs_model.get('backend', 'keras') if backend != 'keras': err = "gridsearch currently only implemented for keras backend" raise ValueError(err) build_fn = AutoencoderFactoryKeras(inputs, kwargs_model) epochs = kwargs_model.get('max_epochs', sys.maxsize) estimator = KerasRegressor(build_fn=build_fn, epochs=epochs, verbose=0) search = GridSearchCV(estimator=estimator, scoring='neg_mean_absolute_error', cv=cv, param_grid={ 'bs': batch_sizes, 'lr': learning_rates, 'reg': regularizations }, return_train_score=True, error_score=np.nan, n_jobs=1, verbose=(51 if verbose else 0)) conv = kwargs_model.get('convergence_criterion', (0, 10)) callbacks = [ EarlyStopping(monitor='loss', min_delta=conv[0], patience=conv[1]) ] return search.fit(inputs, inputs, callbacks=callbacks) def train(self, inputs, inputs_val=None, epochs=None, learning_curve=False, verbose=False): kwargs = { 'inputs_val': inputs_val, 'epochs': epochs, 'learning_curve': learning_curve, 'verbose': verbose } if self._backend == 'tensorflow': self._init_model_tensorflow(inputs) return self._train_tensorflow(inputs, **kwargs) if self._backend == 'sklearn': self._init_model_sklearn(inputs) return self._train_sklearn(inputs, **kwargs) elif self._backend == 'keras': self._init_model_keras(inputs) return self._train_keras(inputs, **kwargs) def _init_model_tensorflow(self, inputs): if self._weights_init is not None or self._biases_init is not None: err = "custom weights currently not supported by tf backend" raise ValueError(err) def _train_tensorflow(self, inputs, inputs_val, epochs, learning_curve, verbose): if inputs_val is not None: err = "tf backend currently does not support validation" raise ValueError(err) if learning_curve and learning_curve != 'mse': err = "tf backend currently only supports MSE" raise ValueError(err) # process dataset dataset = tf.data.Dataset.from_tensor_slices(inputs) dataset = dataset.batch(self._batch_size) dataset = dataset.repeat() dataset = dataset.shuffle(10, inputs.shape[0]) dataset_it = dataset.make_one_shot_iterator() input_layer = dataset_it.get_next() # construct hidden and output layer layer_settings = {} layer_settings['activation'] = { 'sigmoid': tf.nn.sigmoid, 'relu': tf.nn.relu, 'elu': tf.nn.elu }[self._activation] layer_settings['kernel_initializer'] = { 'random_normal': tf.initializers.random_normal, 'xavier': tf.contrib.layers.xavier_initializer(uniform=False), 'he': tf.contrib.layers.variance_scaling_initializer() }[self._weight_initializer] if self._regularization > 0: layer_settings['kernel_regularizer'] = \ tf.contrib.layers.l2_regularizer(self._regularization) def layer(prev, nodes): print(layer_settings) # TODO res = tf.layers.dense(prev, nodes, **layer_settings) if self._batch_norm: res = tf.layers.batch_normalization(res, training=True, momentum=0.9) return res hidden_layer = layer(input_layer, self._hidden_nodes) output_layer = layer(hidden_layer, inputs.shape[1]) # set up otimization optimizer = { 'momentum': tf.train.MomentumOptimizer(self._learning_rate, self._momentum), 'momentum_nesterov': tf.train.MomentumOptimizer(self._learning_rate, self._momentum, use_nesterov=True), 'adam': tf.train.AdamOptimizer(self._learning_rate) }[self._optimizer] loss = tf.reduce_mean(tf.square(output_layer - input_layer)) training_op = optimizer.minimize(loss) # train model errors = [] batches = inputs.shape[0] // self._batch_size with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if epochs is None: err = "early stopping currently not supported by tf backend" raise ValueError(err) for e in range(epochs): for _ in range(batches): _, loss_ = sess.run([training_op, loss]) # determine current error errors.append(loss_) if verbose: self._show_progress(e, epochs) # return learning curve if learning_curve: epochs = list(range(1, len(errors) + 1)) return epochs, errors def _init_model_keras(self, inputs): # construct network def initializer(weights): def res(shape, dtype=None): assert shape == res.weights.shape if dtype is not None: weights = res.weights.astype(dtype) else: weights = res.weights return weights res.weights = weights return res if self._weights_init is not None: kernel_init_hidden = initializer(self._weights_init[0]) kernel_init_output = initializer(self._weights_init[1]) else: kernel_init_hidden = kernel_init_output = { 'random_normal': 'RandomNormal', 'xavier': 'glorot_normal', 'he': 'he_normal' }[self._weight_initializer] if self._biases_init is not None: bias_init_hidden = initializer(self._biases_init[0]) bias_init_output = initializer(self._biases_init[1]) else: bias_init_hidden = 'Zeros' bias_init_output = 'Zeros' hidden_layer = Dense( self._hidden_nodes, input_shape=(inputs.shape[1], ), activation=self._activation, kernel_initializer=kernel_init_hidden, bias_initializer=bias_init_hidden, kernel_regularizer=l2(self._regularization), ) output_layer = Dense(inputs.shape[1], activation=self._activation, kernel_initializer=kernel_init_output, bias_initializer=bias_init_output, kernel_regularizer=l2(self._regularization)) self._model = Sequential([hidden_layer, output_layer]) # set up optimization opt = { 'momentum': SGD(lr=self._learning_rate, momentum=self._momentum), 'momentum_nesterov': SGD(lr=self._learning_rate, momentum=self._momentum, nesterov=True), 'adam': Adam(lr=self._learning_rate) }[self._optimizer] self._model.compile(optimizer=opt, loss='mean_squared_error') def _train_keras(self, inputs, inputs_val, epochs, learning_curve, verbose): # compute initial errors if learning_curve: errors = [] if inputs_val is not None: errors_val = [] # define convergence criterion if epochs is None: callbacks = [ EarlyStopping(monitor='loss', min_delta=self._conv[0], patience=self._conv[1], verbose=(1 if verbose else 0)) ] epochs = self._max_epochs else: callbacks = [] # set up validation set if inputs_val is not None: validation_data = (inputs_val, inputs_val) else: validation_data = None # train model for e in range(epochs): h = self._model.fit(inputs, inputs, validation_data=validation_data, batch_size=self._batch_size, epochs=(e + 1), initial_epoch=e, callbacks=callbacks, verbose=0) # determine current error if learning_curve: if learning_curve == 'mse': errors.append(h.history['loss'][0]) if inputs_val is not None: errors_val.append(h.history['val_loss'][0]) elif learning_curve == 'total': errors.append(self.error(inputs)) if inputs_val is not None: errors_val.append(self.error(inputs_val)) # show progress if verbose: self._show_progress(e, epochs) # return learning curve if learning_curve: epochs = list(range(1, len(errors) + 1)) if inputs_val is not None: return epochs, errors, errors_val else: return epochs, errors def _init_model_sklearn(self, inputs): self._model = MLPRegressor( # structure hidden_layer_sizes=(self._hidden_nodes, ), # activation functions activation='logistic', # solver solver='sgd', warm_start=True, # batch size batch_size=self._batch_size, # learning rate learning_rate='constant', learning_rate_init=self._learning_rate, # momentum momentum=self._momentum, nesterovs_momentum=True, # regularization alpha=self._regularization, # convergence max_iter=self._max_epochs, tol=self._conv[0], n_iter_no_change=self._conv[1]) def _train_sklearn(self, inputs, inputs_val, epochs, learning_curve, verbose): if learning_curve and learning_curve != 'total': err = "sklearn backend currently only supports total error" raise ValueError(err) # initialize weights and biases if self._weights_init is None: self._weights_init = [ np.random.randn(inputs.shape[1], self._hidden_nodes), np.random.randn(self._hidden_nodes, inputs.shape[1]) ] if self._biases_init is None: self._biases_init = [ np.zeros(self._hidden_nodes), np.zeros(inputs.shape[1]) ] # initialize learning curve if learning_curve: total_errors = [] if inputs_val is not None: total_errors_val = [] best_total_error = math.inf dead_epochs = 0 epoch = 0 while True: if epoch == 0: # hack ahead, scikit learn's awful MLPRegressor interface # ordinarily does not allow manual weight initialization self._model.n_outputs_ = inputs.shape[1] self._model._random_state = check_random_state( self._model.random_state) self._model._initialize( inputs, [inputs.shape[1], self._hidden_nodes, inputs.shape[1]]) self._model.coefs_ = self._weights_init self._model.intercepts_ = self._biases_init continue else: self._model = self._model.partial_fit(inputs, inputs) # determine current error total_error = self.error(inputs) if learning_curve: total_errors.append(total_error) if inputs_val is not None: total_errors_val.append(self.error(inputs_val)) # show progress if verbose: self._show_progress(epoch, epochs) # check for convergence epoch += 1 if epochs is None: if total_error >= best_total_error - sel._conv[0]: dead_epochs += 1 if dead_epochs == self._conv[1]: break if total_error < best_total_error: best_total_error = min(best_total_error, total_error) dead_epochs = 0 else: if epoch > epochs: break # return learning curve if learning_curve: total_epochs = list(range(1, len(errors) + 1)) if inputs_val is not None: return total_epochs, total_errors, total_errors_val else: return total_epochs, total_errors def predict(self, i): return self._model.predict(i.reshape(1, len(i))) def error(self, inputs): total_error = 0 for i in inputs: pred = self.predict(i) total_error += np.mean(np.abs(pred - i)) return total_error @staticmethod def _show_progress(e, epochs): if epochs is None: print("\repoch {}".format(e)) else: bar = '=' * int(50 * (e + 1) / epochs) progress = "[{:<50}] epoch {}/{}".format(bar, e + 1, epochs) print("\r" + progress, end='')
# TODO:保证array为两维,即输入的y应该是np.array([[1, 2, 3]])这才是1行3列的array # if y.ndim == 1: # y = y.reshape((-1, 1)) mlp_estimator.n_outputs_ = y.shape[1] layer_units = ([n_features] + hidden_layer_sizes + [mlp_estimator.n_outputs_]) # check random state mlp_estimator._random_state = check_random_state(mlp_estimator.random_state) incremental = False if not hasattr(mlp_estimator, 'coefs_') or (not mlp_estimator.warm_start and not incremental): # First time training the model mlp_estimator._initialize(y, layer_units) # lbfgs does not support mini-batches if mlp_estimator.solver == 'lbfgs': batch_size = n_samples elif mlp_estimator.batch_size == 'auto': batch_size = min(200, n_samples) else: if mlp_estimator.batch_size < 1 or mlp_estimator.batch_size > n_samples: warnings.warn("Got `batch_size` less than 1 or larger than " "sample size. It is going to be clipped") batch_size = np.clip(mlp_estimator.batch_size, 1, n_samples) # Initialize lists activations = [X] activations.extend(