def test_stacked_lstm(self): x_train, x_test, y_train, y_test = self.data network = algorithms.RMSProp( [ layers.Input(self.n_time_steps), layers.Embedding(self.n_categories, 10), layers.LSTM( n_units=10, only_return_final=False, input_weights=init.Normal(0.1), hidden_weights=init.Normal(0.1), ), layers.LSTM( n_units=2, input_weights=init.Normal(0.1), hidden_weights=init.Normal(0.1), ), layers.Sigmoid(1), ], step=0.05, verbose=False, batch_size=1, loss='binary_crossentropy', ) network.train(x_train, y_train, x_test, y_test, epochs=20) y_predicted = network.predict(x_test).round() accuracy = (y_predicted.T == y_test).mean() self.assertGreaterEqual(accuracy, 0.8)
def create_VIN(input_image_shape=(8, 8, 2), n_hidden_filters=150, n_state_filters=10, k=10): SamePadConvolution = partial(layers.Convolution, padding='SAME', bias=None) R = layers.join( layers.Input(input_image_shape, name='grid-input'), layers.Convolution((3, 3, n_hidden_filters), padding='SAME', weight=init.Normal(), bias=init.Normal()), SamePadConvolution((1, 1, 1), weight=init.Normal()), ) # Create shared weights q_weight = random_weight((3, 3, 1, n_state_filters)) fb_weight = random_weight((3, 3, 1, n_state_filters)) Q = R > SamePadConvolution((3, 3, n_state_filters), weight=q_weight) for i in range(k): V = Q > ChannelGlobalMaxPooling() Q = layers.join( # Convolve R and V separately and then add outputs together with # the Elementwise layer. This part of the code looks different # from the one that was used in the original VIN repo, but # it does the same operation. # # conv(x, w) == (conv(x1, w1) + conv(x2, w2)) # where, x = concat(x1, x2) # w = concat(w1, w2) # # See code sample from Github Gist: https://bit.ly/2zm3ntN [[ R, SamePadConvolution((3, 3, n_state_filters), weight=q_weight) ], [ V, SamePadConvolution((3, 3, n_state_filters), weight=fb_weight) ]], layers.Elementwise(merge_function=tf.add), ) input_state_1 = layers.Input(UNKNOWN, name='state-1-input') input_state_2 = layers.Input(UNKNOWN, name='state-2-input') # Select the conv-net channels at the state position (S1, S2) VIN = [Q, input_state_1, input_state_2] > SelectValueAtStatePosition() # Set up softmax layer that predicts actions base on (S1, S2) # position. Each action encodes specific direction: # N, S, E, W, NE, NW, SE, SW (in the same order) VIN = VIN > layers.Softmax(8, bias=None, weight=init.Normal()) return VIN
def test_variable_creation(self): weight = np.ones((3, 3)) var1 = tf_utils.create_variable(weight, name='var1', shape=(3, 3)) self.assertShapesEqual(var1.shape, (3, 3)) var2 = tf_utils.create_variable(5, name='var2', shape=(4, 3)) self.assertShapesEqual(var2.shape, (4, 3)) np.testing.assert_array_almost_equal(self.eval(var2), 5 * np.ones( (4, 3))) initializer = init.Normal() var3 = tf_utils.create_variable(initializer, name='var3', shape=(4, 7)) self.assertShapesEqual(var3.shape, (4, 7)) weight = tf.Variable(np.ones((3, 3)), dtype=tf.float32) var4 = tf_utils.create_variable(weight, name='var4', shape=(3, 3)) self.assertShapesEqual(var4.shape, (3, 3)) self.assertIs(var4, weight) weight = np.ones((3, 4)) with self.assertRaisesRegexp(ValueError, "Cannot create variable"): tf_utils.create_variable(weight, name='var5', shape=(3, 3)) weight = tf.Variable(np.ones((4, 3)), dtype=tf.float32) with self.assertRaisesRegexp(ValueError, "Cannot create variable"): tf_utils.create_variable(weight, name='var6', shape=(3, 3))
def test_reproducibility(self): normal = init.Normal(mean=0, std=0.01, seed=0) weight1 = normal.sample((10, 20), return_array=True) weight2 = normal.sample((10, 20), return_array=True) np.testing.assert_array_almost_equal(weight1, weight2)
def test_normal_initializer(self): norm = init.Normal(mean=0, std=0.01) weight = self.eval(norm.sample((30, 30))) self.assertNormalyDistributed(weight) weight = norm.sample((30, 30), return_array=True) self.assertNormalyDistributed(weight)
def create_VIN(input_image_shape=(2, 8, 8), n_hidden_filters=150, n_state_filters=10, k=10): HalfPaddingConv = partial(layers.Convolution, padding='half', bias=None) R = layers.join( layers.Input(input_image_shape, name='grid-input'), layers.Convolution((n_hidden_filters, 3, 3), padding='half', weight=init.Normal(), bias=init.Normal()), HalfPaddingConv((1, 1, 1), weight=init.Normal()), ) # Create shared weights q_weight = random_weight((n_state_filters, 1, 3, 3)) fb_weight = random_weight((n_state_filters, 1, 3, 3)) Q = R > HalfPaddingConv((n_state_filters, 3, 3), weight=q_weight) for i in range(k): V = Q > GlobalMaxPooling() Q = layers.join( # Convolve R and V separately and then add # outputs together with the Elementwise layer [[ R, HalfPaddingConv((n_state_filters, 3, 3), weight=q_weight) ], [ V, HalfPaddingConv((n_state_filters, 3, 3), weight=fb_weight) ]], layers.Elementwise(merge_function=T.add), ) input_state_1 = layers.Input(10, name='state-1-input') input_state_2 = layers.Input(10, name='state-2-input') # Select the conv-net channels at the state position (S1, S2) VIN = [Q, input_state_1, input_state_2] > SelectValueAtStatePosition() # Set up softmax layer that predicts actions base on (S1, S2) # position. Each action encodes specific direction: # N, S, E, W, NE, NW, SE, SW (in the same order) VIN = VIN > layers.Softmax(8, bias=None, weight=init.Normal()) return VIN
def test_layer_copy(self): relu = layers.Relu(10, weight=init.Normal(), bias=None) copied_relu = copy.copy(relu) self.assertEqual(relu.name, 'relu-1') self.assertEqual(copied_relu.name, 'relu-2') self.assertIsInstance(relu.weight, init.Normal) self.assertIsNone(relu.bias)
def test_normal_reprodusible_with_outside_seed(self): norm = init.Normal(mean=0, std=0.01) np.random.seed(0) weight1 = norm.sample((10, 4), return_array=True) np.random.seed(0) weight2 = norm.sample((10, 4), return_array=True) np.testing.assert_array_almost_equal(weight1, weight2)
def create_VIN(input_image_shape=(8, 8, 2), n_hidden_filters=150, n_state_filters=10, k=10): # Default initialization method normal = init.Normal() # Create shared weights q_weight = create_random_weight((3, 3, 1, n_state_filters)) fb_weight = create_random_weight((3, 3, 1, n_state_filters)) # Define basic layers SamePadConv = partial(Convolution, padding='SAME', bias=None) R = join( Input(input_image_shape, name='grid-input'), SamePadConv((3, 3, n_hidden_filters), weight=normal, bias=normal), SamePadConv((1, 1, 1), weight=normal), ) Q = R >> SamePadConv((3, 3, n_state_filters), weight=q_weight) for i in range(k): V = Q >> ChannelGlobalMaxPooling() Q = join( # Convolve R and V separately and then add outputs together with # the Elementwise layer. This part of the code looks different # from the one that was used in the original VIN repo, but # it does the same operation. # # conv(x, w) == (conv(x1, w1) + conv(x2, w2)) # where, x = concat(x1, x2) # w = concat(w1, w2) # # See code sample from Github Gist: https://bit.ly/2zm3ntN parallel( R >> SamePadConv((3, 3, n_state_filters), weight=q_weight), V >> SamePadConv((3, 3, n_state_filters), weight=fb_weight), ), Elementwise('add'), ) input_state_1 = Input(UNKNOWN, name='state-1-input') input_state_2 = Input(UNKNOWN, name='state-2-input') # Select the conv-net channels at the state position (S1, S2) VIN = (Q | input_state_1 | input_state_2) >> SelectValueAtStatePosition() # Set up softmax layer that predicts actions base on (S1, S2) # position. Each action encodes specific direction: # N, S, E, W, NE, NW, SE, SW (in the same order) VIN = VIN >> Softmax(8, bias=None, weight=normal) return VIN
def test_lvq_weight_initialization_state(self): lvqnet = algorithms.LVQ(n_inputs=2, n_classes=2) self.assertFalse(lvqnet.initialized) lvqnet.train(np.random.random((10, 2)), np.random.random(10).round(), epochs=1) self.assertTrue(lvqnet.initialized) lvqnet = algorithms.LVQ(n_inputs=2, n_classes=3, weight=np.random.random((2, 3))) self.assertTrue(lvqnet.initialized) lvqnet = algorithms.LVQ(n_inputs=2, n_classes=3, weight=init.Normal()) self.assertTrue(lvqnet.initialized) self.assertEqual(lvqnet.weight.shape, (2, 3))
def test_sofm_angle_distance(self): sn = algorithms.SOFM(n_inputs=2, n_outputs=3, transform='cos', learning_radius=1, features_grid=(3, 1), weight=init.Normal(mean=0, std=1), verbose=False) sn.train(input_data, epochs=6) answers = np.array([ [1., 0., 0.], [1., 0., 0.], [0., 1., 0.], [0., 1., 0.], [0., 0., 1.], [0., 0., 1.], ]) np.testing.assert_array_almost_equal(sn.predict(input_data), answers)
class BaseAssociative(BaseNetwork): """ Base class for associative learning. Parameters ---------- n_inputs : int Number of features (columns) in the input data. n_outputs : int Number of outputs in the network. weight : array-like, Initializer Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} train(input_train, summary='table', epochs=100) Train neural network. {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1, required=True) n_outputs = IntProperty(minval=1, required=True) weight = ParameterProperty(default=init.Normal()) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) self.init_layers() def init_layers(self): valid_weight_shape = (self.n_inputs, self.n_outputs) if isinstance(self.weight, init.Initializer): self.weight = self.weight.sample( valid_weight_shape, return_array=True) if self.weight.shape != valid_weight_shape: raise ValueError( "Weight matrix has invalid shape. Got {}, expected {}" "".format(self.weight.shape, valid_weight_shape)) self.weight = self.weight.astype(float) def format_input_data(self, input_data): is_feature1d = self.n_inputs == 1 input_data = format_data(input_data, is_feature1d) if input_data.ndim != 2: raise ValueError("Cannot make prediction, because input " "data has more than 2 dimensions") n_samples, n_features = input_data.shape if n_features != self.n_inputs: raise ValueError("Input data expected to have {} features, " "but got {}".format(self.n_inputs, n_features)) return input_data def train(self, input_train, summary='table', epochs=100): input_train = self.format_input_data(input_train) return super(BaseAssociative, self).train( input_train=input_train, target_train=None, input_test=None, target_test=None, epochs=epochs, epsilon=None, summary=summary)
def test_normal_initialize_repr(self): hormal_initializer = init.Normal(mean=0, std=0.01) self.assertEqual("Normal(mean=0, std=0.01)", str(hormal_initializer))
class SOFM(Kohonen): """ Self-Organizing Feature Map (SOFM or SOM). Notes ----- - Training data samples should have normalized features. Parameters ---------- {BaseAssociative.n_inputs} n_outputs : int or None Number of outputs. Parameter is optional in case if ``feature_grid`` was specified. .. code-block:: python if n_outputs is None: n_outputs = np.prod(feature_grid) learning_radius : int Parameter defines radius within which we consider all neurons as neighbours to the winning neuron. The bigger the value the more neurons will be updated after each iteration. The ``0`` values means that we don't update neighbour neurons. Defaults to ``0``. std : int, float Parameters controls learning rate for each neighbour. The further neighbour neuron from the winning neuron the smaller that learning rate for it. Learning rate scales based on the factors produced by the normal distribution with center in the place of a winning neuron and standard deviation specified as a parameter. The learning rate for the winning neuron is always equal to the value specified in the ``step`` parameter and for neighbour neurons it's always lower. The bigger the value for this parameter the bigger learning rate for the neighbour neurons. Defaults to ``1``. features_grid : list, tuple, None Feature grid defines shape of the output neurons. The new shape should be compatible with the number of outputs. It means that the following condition should be true: .. code-block:: python np.prod(features_grid) == n_outputs SOFM implementation supports n-dimensional grids. For instance, in order to specify grid as cube instead of the regular rectangular shape we can set up options as the following: .. code-block:: python SOFM( ... features_grid=(5, 5, 5), ... ) Defaults to ``(n_outputs, 1)``. grid_type : {{``rect``, ``hexagon``}} Defines connection type in feature grid. Type defines which neurons we will consider as closest to the winning neuron during the training. - ``rect`` - Connections between neurons will be organized in hexagonal grid. - ``hexagon`` - Connections between neurons will be organized in hexagonal grid. It works only for 1d or 2d grids. Defaults to ``rect``. distance : {{``euclid``, ``dot_product``, ``cos``}} Defines function that will be used to compute closest weight to the input sample. - ``dot_product``: Just a regular dot product between data sample and network's weights - ``euclid``: Euclidean distance between data sample and network's weights - ``cos``: Cosine distance between data sample and network's weights Defaults to ``euclid``. reduce_radius_after : int or None Every specified number of epochs ``learning_radius`` parameter will be reduced by ``1``. Process continues until ``learning_radius`` equal to ``0``. The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_step_after : int or None Defines reduction rate at which parameter ``step`` will be reduced using the following formula: .. code-block:: python step = step / (1 + current_epoch / reduce_step_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. reduce_std_after : int or None Defines reduction rate at which parameter ``std`` will be reduced using the following formula: .. code-block:: python std = std / (1 + current_epoch / reduce_std_after) The ``None`` value disables parameter reduction during the training. Defaults to ``100``. weight : array-like, Initializer or {{``init_pca``, ``sample_from_data``}} Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Also, it's possible to initialized weights base on the training data. There are two options: - ``sample_from_data`` - Before starting the training will randomly take number of training samples equal to number of expected outputs. - ``init_pca`` - Before training starts SOFM will applies PCA on a covariance matrix build from the training samples. Weights will be generated based on the two eigenvectors associated with the largest eigenvalues. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.signals} {Verbose.verbose} Methods ------- init_weights(train_data) Initialized weights based on the input data. It works only for the `init_pca` and `sample_from_data` options. For other cases it will throw an error. {BaseSkeleton.predict} {BaseAssociative.train} {BaseSkeleton.fit} Examples -------- >>> import numpy as np >>> from neupy import algorithms, utils >>> >>> utils.reproducible() >>> >>> data = np.array([ ... [0.1961, 0.9806], ... [-0.1961, 0.9806], ... [-0.5812, -0.8137], ... [-0.8137, -0.5812], ... ]) >>> >>> sofm = algorithms.SOFM( ... n_inputs=2, ... n_outputs=2, ... step=0.1, ... learning_radius=0 ... ) >>> sofm.train(data, epochs=100) >>> sofm.predict(data) array([[0, 1], [0, 1], [1, 0], [1, 0]]) """ n_outputs = IntProperty(minval=1, allow_none=True, default=None) weight = SOFMWeightParameter(default=init.Normal(), choices={ 'init_pca': linear_initialization, 'sample_from_data': sample_data, }) features_grid = TypedListProperty(allow_none=True, default=None) DistanceParameter = namedtuple('DistanceParameter', 'name func') distance = ChoiceProperty(default='euclid', choices={ 'dot_product': DistanceParameter(name='dot_product', func=np.dot), 'euclid': DistanceParameter(name='euclid', func=neg_euclid_distance), 'cos': DistanceParameter(name='cosine', func=cosine_similarity), }) GridTypeMethods = namedtuple('GridTypeMethods', 'name find_neighbours find_step_scaler') grid_type = ChoiceProperty( default='rect', choices={ 'rect': GridTypeMethods(name='rectangle', find_neighbours=find_neighbours_on_rect_grid, find_step_scaler=find_step_scaler_on_rect_grid), 'hexagon': GridTypeMethods(name='hexagon', find_neighbours=find_neighbours_on_hexagon_grid, find_step_scaler=find_step_scaler_on_hexagon_grid) }) learning_radius = IntProperty(default=0, minval=0) std = NumberProperty(minval=0, default=1) reduce_radius_after = IntProperty(default=100, minval=1, allow_none=True) reduce_std_after = IntProperty(default=100, minval=1, allow_none=True) reduce_step_after = IntProperty(default=100, minval=1, allow_none=True) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) if self.n_outputs is None and self.features_grid is None: raise ValueError("One of the following parameters has to be " "specified: n_outputs, features_grid") elif self.n_outputs is None: self.n_outputs = np.prod(self.features_grid) n_grid_elements = np.prod(self.features_grid) invalid_feature_grid = (self.features_grid is not None and n_grid_elements != self.n_outputs) if invalid_feature_grid: raise ValueError( "Feature grid should contain the same number of elements " "as in the output layer: {0}, but found: {1} (shape: {2})" "".format(self.n_outputs, n_grid_elements, self.features_grid)) if self.features_grid is None: self.features_grid = (self.n_outputs, 1) if len(self.features_grid) > 2 and self.grid_type.name == 'hexagon': raise ValueError("SOFM with hexagon grid type should have " "one or two dimensional feature grid, but got " "{}d instead (shape: {!r})".format( len(self.features_grid), self.features_grid)) is_pca_init = (isinstance(options.get('weight'), six.string_types) and options.get('weight') == 'init_pca') self.initialized = False if not callable(self.weight): super(Kohonen, self).init_weights() self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) elif is_pca_init and self.grid_type.name != 'rectangle': raise WeightInitializationError( "Cannot apply PCA weight initialization for non-rectangular " "grid. Grid type: {}".format(self.grid_type.name)) def predict_raw(self, X): X = format_data(X, is_feature1d=(self.n_inputs == 1)) if X.ndim != 2: raise ValueError("Only 2D inputs are allowed") n_samples = X.shape[0] output = np.zeros((n_samples, self.n_outputs)) for i, input_row in enumerate(X): output[i, :] = self.distance.func(input_row.reshape(1, -1), self.weight) return output def update_indexes(self, layer_output): neuron_winner = layer_output.argmax(axis=1).item(0) winner_neuron_coords = np.unravel_index(neuron_winner, self.features_grid) learning_radius = self.learning_radius step = self.step std = self.std if self.reduce_radius_after is not None: learning_radius -= self.last_epoch // self.reduce_radius_after learning_radius = max(0, learning_radius) if self.reduce_step_after is not None: step = decay_function(step, self.last_epoch, self.reduce_step_after) if self.reduce_std_after is not None: std = decay_function(std, self.last_epoch, self.reduce_std_after) methods = self.grid_type output_grid = np.reshape(layer_output, self.features_grid) output_with_neighbours = methods.find_neighbours( grid=output_grid, center=winner_neuron_coords, radius=learning_radius) step_scaler = methods.find_step_scaler(grid=output_grid, center=winner_neuron_coords, std=std) index_y, = np.nonzero(output_with_neighbours.reshape(self.n_outputs)) step_scaler = step_scaler.reshape(self.n_outputs) return index_y, step * step_scaler[index_y] def init_weights(self, X_train): if self.initialized: raise WeightInitializationError( "Weights have been already initialized") weight_initializer = self.weight self.weight = weight_initializer(X_train, self.features_grid) self.initialized = True if self.distance.name == 'cosine': self.weight /= np.linalg.norm(self.weight, axis=0) def train(self, X_train, epochs=100): if not self.initialized: self.init_weights(X_train) super(SOFM, self).train(X_train, epochs=epochs) def one_training_update(self, X_train, y_train=None): step = self.step predict = self.predict update_indexes = self.update_indexes error = 0 for input_row in X_train: input_row = np.reshape(input_row, (1, input_row.size)) layer_output = predict(input_row) index_y, step = update_indexes(layer_output) distance = input_row.T - self.weight[:, index_y] updated_weights = (self.weight[:, index_y] + step * distance) if self.distance.name == 'cosine': updated_weights /= np.linalg.norm(updated_weights, axis=0) self.weight[:, index_y] = updated_weights error += np.abs(distance).mean() return error / len(X_train)
def random_weight(shape): initializer = init.Normal() weight = initializer.sample(shape) return tf.Variable(asfloat(weight), dtype=tf.float32)
class RBM(BaseAlgorithm, BaseNetwork, MinibatchTrainingMixin, DumpableObject): """ Boolean/Bernoulli Restricted Boltzmann Machine (RBM). Algorithm assumes that inputs are either binary values or values between 0 and 1. Parameters ---------- n_visible : int Number of visible units. Number of features (columns) in the input data. n_hidden : int Number of hidden units. The large the number the more information network can capture from the data, but it also mean that network is more likely to overfit. batch_size : int Size of the mini-batch. Defaults to ``10``. weight : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Normal <neupy.init.Normal>`. hidden_bias : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Constant(value=0) <neupy.init.Constant>`. visible_bias : array-like, Tensorfow variable, Initializer or scalar Default initialization methods you can find :ref:`here <init-methods>`. Defaults to :class:`Constant(value=0) <neupy.init.Constant>`. {BaseNetwork.Parameters} Methods ------- train(input_train, epochs=100) Trains network. {BaseSkeleton.fit} visible_to_hidden(visible_input) Populates data throught the network and returns output from the hidden layer. hidden_to_visible(hidden_input) Propagates output from the hidden layer backward to the visible. gibbs_sampling(visible_input, n_iter=1) Makes Gibbs sampling ``n`` times using visible input. Examples -------- >>> import numpy as np >>> from neupy import algorithms >>> >>> data = np.array([ ... [1, 0, 1, 0], ... [1, 0, 1, 0], ... [1, 0, 0, 0], # incomplete sample ... [1, 0, 1, 0], ... ... [0, 1, 0, 1], ... [0, 0, 0, 1], # incomplete sample ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... [0, 1, 0, 1], ... ]) >>> >>> rbm = algorithms.RBM(n_visible=4, n_hidden=1) >>> rbm.train(data, epochs=100) >>> >>> hidden_states = rbm.visible_to_hidden(data) >>> hidden_states.round(2) array([[ 0.99], [ 0.99], [ 0.95], [ 0.99], [ 0. ], [ 0.01], [ 0. ], [ 0. ], [ 0. ], [ 0. ]]) References ---------- [1] G. Hinton, A Practical Guide to Training Restricted Boltzmann Machines, 2010. http://www.cs.toronto.edu/~hinton/absps/guideTR.pdf """ n_visible = IntProperty(minval=1) n_hidden = IntProperty(minval=1) batch_size = IntProperty(minval=1, default=10) weight = ParameterProperty(default=init.Normal()) hidden_bias = ParameterProperty(default=init.Constant(value=0)) visible_bias = ParameterProperty(default=init.Constant(value=0)) def __init__(self, n_visible, n_hidden, **options): options.update({'n_visible': n_visible, 'n_hidden': n_hidden}) super(RBM, self).__init__(**options) def init_input_output_variables(self): with tf.variable_scope('rbm'): self.weight = create_shared_parameter(value=self.weight, name='weight', shape=(self.n_visible, self.n_hidden)) self.hidden_bias = create_shared_parameter( value=self.hidden_bias, name='hidden-bias', shape=(self.n_hidden, ), ) self.visible_bias = create_shared_parameter( value=self.visible_bias, name='visible-bias', shape=(self.n_visible, ), ) self.variables.update(network_input=tf.placeholder( tf.float32, (None, self.n_visible), name="network-input", ), network_hidden_input=tf.placeholder( tf.float32, (None, self.n_hidden), name="network-hidden-input", )) def init_variables(self): with tf.variable_scope('rbm'): self.variables.update(h_samples=tf.Variable( tf.zeros([self.batch_size, self.n_hidden]), name="hidden-samples", dtype=tf.float32, ), ) def init_methods(self): def free_energy(visible_sample): with tf.name_scope('free-energy'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias visible_bias_term = dot(visible_sample, self.visible_bias) # We can get infinity when wx_b is a relatively large number # (maybe 100). Taking exponent makes it even larger and # for with float32 it can convert it to infinity. But because # number is so large we don't care about +1 value before taking # logarithms and therefore we can just pick value as it is # since our operation won't change anything. hidden_terms = tf.where( # exp(30) is such a big number that +1 won't # make any difference in the outcome. tf.greater(wx_b, 30), wx_b, tf.log1p(tf.exp(wx_b)), ) hidden_term = tf.reduce_sum(hidden_terms, axis=1) return -(visible_bias_term + hidden_term) def visible_to_hidden(visible_sample): with tf.name_scope('visible-to-hidden'): wx = tf.matmul(visible_sample, self.weight) wx_b = wx + self.hidden_bias return tf.nn.sigmoid(wx_b) def hidden_to_visible(hidden_sample): with tf.name_scope('hidden-to-visible'): wx = tf.matmul(hidden_sample, self.weight, transpose_b=True) wx_b = wx + self.visible_bias return tf.nn.sigmoid(wx_b) def sample_hidden_from_visible(visible_sample): with tf.name_scope('sample-hidden-to-visible'): hidden_prob = visible_to_hidden(visible_sample) hidden_sample = random_binomial(hidden_prob) return hidden_sample def sample_visible_from_hidden(hidden_sample): with tf.name_scope('sample-visible-to-hidden'): visible_prob = hidden_to_visible(hidden_sample) visible_sample = random_binomial(visible_prob) return visible_sample network_input = self.variables.network_input network_hidden_input = self.variables.network_hidden_input input_shape = tf.shape(network_input) n_samples = input_shape[0] weight = self.weight h_bias = self.hidden_bias v_bias = self.visible_bias h_samples = self.variables.h_samples step = asfloat(self.step) with tf.name_scope('positive-values'): # We have to use `cond` instead of `where`, because # different if-else cases might have different shapes # and it triggers exception in tensorflow. v_pos = tf.cond( tf.equal(n_samples, self.batch_size), lambda: network_input, lambda: random_sample(network_input, self.batch_size)) h_pos = visible_to_hidden(v_pos) with tf.name_scope('negative-values'): v_neg = sample_visible_from_hidden(h_samples) h_neg = visible_to_hidden(v_neg) with tf.name_scope('weight-update'): weight_update = ( tf.matmul(v_pos, h_pos, transpose_a=True) - tf.matmul(v_neg, h_neg, transpose_a=True)) / asfloat(n_samples) with tf.name_scope('hidden-bias-update'): h_bias_update = tf.reduce_mean(h_pos - h_neg, axis=0) with tf.name_scope('visible-bias-update'): v_bias_update = tf.reduce_mean(v_pos - v_neg, axis=0) with tf.name_scope('flipped-input-features'): # Each row will have random feature marked with number 1 # Other values will be equal to 0 possible_feature_corruptions = tf.eye(self.n_visible) corrupted_features = random_sample(possible_feature_corruptions, n_samples) rounded_input = tf.round(network_input) # If we scale input values from [0, 1] range to [-1, 1] # than it will be easier to flip feature values with simple # multiplication. scaled_rounded_input = 2 * rounded_input - 1 scaled_flipped_rounded_input = ( # for corrupted_features we convert 0 to 1 and 1 to -1 # in this way after multiplication we will flip all # signs where -1 in the transformed corrupted_features (-2 * corrupted_features + 1) * scaled_rounded_input) # Scale it back to the [0, 1] range flipped_rounded_input = (scaled_flipped_rounded_input + 1) / 2 with tf.name_scope('pseudo-likelihood-loss'): # Stochastic pseudo-likelihood error = tf.reduce_mean(self.n_visible * tf.log_sigmoid( free_energy(flipped_rounded_input) - free_energy(rounded_input))) with tf.name_scope('gibbs-sampling'): gibbs_sampling = sample_visible_from_hidden( sample_hidden_from_visible(network_input)) initialize_uninitialized_variables() self.methods.update(train_epoch=function( [network_input], error, name='rbm/train-epoch', updates=[ (weight, weight + step * weight_update), (h_bias, h_bias + step * h_bias_update), (v_bias, v_bias + step * v_bias_update), (h_samples, random_binomial(p=h_neg)), ]), prediction_error=function( [network_input], error, name='rbm/prediction-error', ), diff1=function( [network_input], free_energy(flipped_rounded_input), name='rbm/diff1-error', ), diff2=function( [network_input], free_energy(rounded_input), name='rbm/diff2-error', ), visible_to_hidden=function( [network_input], visible_to_hidden(network_input), name='rbm/visible-to-hidden', ), hidden_to_visible=function( [network_hidden_input], hidden_to_visible(network_hidden_input), name='rbm/hidden-to-visible', ), gibbs_sampling=function( [network_input], gibbs_sampling, name='rbm/gibbs-sampling', )) def train(self, input_train, input_test=None, epochs=100, summary='table'): """ Train RBM. Parameters ---------- input_train : 1D or 2D array-like input_test : 1D or 2D array-like or None Defaults to ``None``. epochs : int Number of training epochs. Defaults to ``100``. summary : {'table', 'inline'} Training summary type. Defaults to ``'table'``. """ return super(RBM, self).train(input_train=input_train, target_train=None, input_test=input_test, target_test=None, epochs=epochs, epsilon=None, summary=summary) def train_epoch(self, input_train, target_train=None): """ Train one epoch. Parameters ---------- input_train : array-like (n_samples, n_features) Returns ------- float """ errors = self.apply_batches( function=self.methods.train_epoch, input_data=input_train, description='Training batches', show_error_output=True, ) n_samples = len(input_train) return average_batch_errors(errors, n_samples, self.batch_size) def visible_to_hidden(self, visible_input): """ Populates data throught the network and returns output from the hidden layer. Parameters ---------- visible_input : array-like (n_samples, n_visible_features) Returns ------- array-like """ is_input_feature1d = (self.n_visible == 1) visible_input = format_data(visible_input, is_input_feature1d) outputs = self.apply_batches( function=self.methods.visible_to_hidden, input_data=visible_input, description='Hidden from visible batches', show_progressbar=True, show_error_output=False, scalar_output=False, ) return np.concatenate(outputs, axis=0) def hidden_to_visible(self, hidden_input): """ Propagates output from the hidden layer backward to the visible. Parameters ---------- hidden_input : array-like (n_samples, n_hidden_features) Returns ------- array-like """ is_input_feature1d = (self.n_hidden == 1) hidden_input = format_data(hidden_input, is_input_feature1d) outputs = self.apply_batches( function=self.methods.hidden_to_visible, input_data=hidden_input, description='Visible from hidden batches', show_progressbar=True, show_error_output=False, scalar_output=False, ) return np.concatenate(outputs, axis=0) def prediction_error(self, input_data, target_data=None): """ Compute the pseudo-likelihood of input samples. Parameters ---------- input_data : array-like Values of the visible layer Returns ------- float Value of the pseudo-likelihood. """ is_input_feature1d = (self.n_visible == 1) input_data = format_data(input_data, is_input_feature1d) errors = self.apply_batches( function=self.methods.prediction_error, input_data=input_data, description='Validation batches', show_error_output=True, ) return average_batch_errors( errors, n_samples=len(input_data), batch_size=self.batch_size, ) def gibbs_sampling(self, visible_input, n_iter=1): """ Makes Gibbs sampling n times using visible input. Parameters ---------- visible_input : 1d or 2d array n_iter : int Number of Gibbs sampling iterations. Defaults to ``1``. Returns ------- array-like Output from the visible units after perfoming n Gibbs samples. Array will contain only binary units (0 and 1). """ is_input_feature1d = (self.n_visible == 1) visible_input = format_data(visible_input, is_input_feature1d) gibbs_sampling = self.methods.gibbs_sampling input_ = visible_input for iteration in range(n_iter): input_ = gibbs_sampling(input_) return input_
class BaseAssociative(BaseNetwork): """ Base class for associative learning. Parameters ---------- n_inputs : int Number of features (columns) in the input data. n_outputs : int Number of outputs in the network. weight : array-like, Initializer Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.Parameters} Methods ------- {BaseSkeleton.predict} train(X_train, epochs=100) Train neural network. {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1, required=True) n_outputs = IntProperty(minval=1, required=True) weight = ParameterProperty(default=init.Normal()) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) self.init_weights() def init_weights(self): valid_weight_shape = (self.n_inputs, self.n_outputs) if isinstance(self.weight, init.Initializer): self.weight = self.weight.sample(valid_weight_shape, return_array=True) if self.weight.shape != valid_weight_shape: raise ValueError( "Weight matrix has invalid shape. Got {}, expected {}" "".format(self.weight.shape, valid_weight_shape)) self.weight = self.weight.astype(float) def format_input_data(self, X): X = format_data(X, is_feature1d=(self.n_inputs == 1)) if X.ndim != 2: raise ValueError("Cannot make prediction, because input " "data has more than 2 dimensions") if X.shape[1] != self.n_inputs: raise ValueError("Input data expected to have {} features, " "but got {}".format(self.n_inputs, X.shape[1])) return X def train(self, X_train, epochs=100): X_train = self.format_input_data(X_train) return super(BaseAssociative, self).train(X_train=X_train, epochs=epochs)
def train_som(train_data, train_targets, grid_size=30, n_epochs=100, lrn_radius=2, init_mode=init.Normal(0, 1), pca_model=None, n_pca=None, plot_flag=False, extra_str='', dir_path='../Data/som_models/'): # Preprocess data if needed if type(train_data)==pd.core.frame.DataFrame: train_data = train_data.to_numpy() if type(train_targets)==pd.core.frame.DataFrame: train_targets = train_targets.to_numpy(dtype='int').squeeze() if pca_model is None and n_pca is not None: train_data, pca_model = preprocess_data(train_data, n_pca) # Create SOM structure GRID_HEIGHT = grid_size GRID_WIDTH = grid_size som = algorithms.SOFM( n_inputs=train_data.shape[1], features_grid=(GRID_HEIGHT, GRID_WIDTH), learning_radius=lrn_radius, weight=init_mode, reduce_radius_after=50, step=0.5, std=1, shuffle_data=True, verbose=True, ) # Train SOM som.train(train_data, epochs=n_epochs) # Get model targets for future predictions trained_clusters = som.predict(train_data).argmax(axis=1) model_targets = np.zeros([GRID_HEIGHT*GRID_WIDTH,1]) for row_id in range(GRID_HEIGHT): for col_id in range(GRID_WIDTH): index = row_id * GRID_HEIGHT + col_id indices = np.argwhere(trained_clusters == index).ravel() clustered_targets = train_targets[indices] if len(clustered_targets) > 0: # Select the target mode target = stats.mode(clustered_targets).mode[0] else: # If no prediction, assume 0 target = 0 model_targets[index] = target # Compute training MSE som_predictions = model_targets[trained_clusters] som.mse = mean_squared_error(train_targets, som_predictions) accuracy = accuracy_score(train_targets, som_predictions) print('SOM train MSE: ', som.mse) print('SOM train Acc: ', accuracy) # Save model som.model_targets = model_targets.squeeze() som.pca_model = pca_model save_som_model(som, extra_str, dir_path) # Plot SOM map if plot_flag: plot_SOM(som, train_data, train_targets) return som
class BaseAssociative(BaseNetwork): """ Base class for associative learning. Parameters ---------- n_inputs : int Number of input units. n_outputs : int Number of output units. weight : array-like, Initializer Neural network weights. Value defined manualy should have shape ``(n_inputs, n_outputs)``. Defaults to :class:`Normal() <neupy.init.Normal>`. {BaseNetwork.step} {BaseNetwork.show_epoch} {BaseNetwork.shuffle_data} {BaseNetwork.epoch_end_signal} {BaseNetwork.train_end_signal} {Verbose.verbose} Methods ------- {BaseSkeleton.predict} train(input_train, epochs=100) Train neural network. {BaseSkeleton.fit} """ n_inputs = IntProperty(minval=1, required=True) n_outputs = IntProperty(minval=1, required=True) weight = ParameterProperty(default=init.Normal()) def __init__(self, **options): super(BaseAssociative, self).__init__(**options) self.init_layers() def init_layers(self): valid_weight_shape = (self.n_inputs, self.n_outputs) if isinstance(self.weight, init.Initializer): self.weight = self.weight.sample(valid_weight_shape) if self.weight.shape != valid_weight_shape: raise ValueError("Weight matrix has invalid shape. Got {}, " "expected {}".format(self.weight.shape, valid_weight_shape)) self.weight = self.weight.astype(float) def train(self, input_train, epochs=100): input_train = format_data(input_train, is_feature1d=True) return super(BaseAssociative, self).train(input_train=input_train, target_train=None, input_test=None, target_test=None, epochs=epochs, epsilon=None, summary='table')
def test_normal_initializer(self): norm = init.Normal(mean=0, std=0.01) weight = norm.sample((30, 30)) self.assertNormalyDistributed(weight)