def backprop(self, X, y):
		yHat = self.forward(X)
		error = yHat - y

		delta = error
		MSE = np.mean( np.power(error, 2))

		X_bias = add_bias(X)

		if (self.n_hidden_layers == 0):
			delta2 = np.multiply(delta, self.activation_functions[0]( self.Z2 , True))

			# Compute final gradient
			dJdW1 = np.dot(X_bias.T, delta_final)
			return [ dJdW1 , MSE]
		else:
			delta3 = np.multiply(delta, self.activation_functions[1]( self.Z3 , True))
			dJdW2 = np.dot(add_bias(self.A2).T, delta3)

			# Pass backward
			delta2 = np.multiply(np.dot(delta3, self.weight_layers[1][0:self.n_hiddens].T), 
							self.activation_functions[0]( self.Z2 , True))
			dJdW1 = np.dot(X_bias.T, delta2)

			return [(dJdW1, dJdW2), MSE]
 def gradient(self, weight_vector, training_data, training_targets, cost_function ):
     assert softmax_function != self.layers[-1][1] or cost_function == softmax_neg_loss,\
         "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
     assert cost_function != softmax_neg_loss or softmax_function == self.layers[-1][1],\
         "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
     
     # assign the weight_vector as the network topology
     self.set_weights( np.array(weight_vector) )
     
     input_signals, derivatives  = self.update( training_data, trace=True )                  
     out                         = input_signals[-1]
     cost_derivative             = cost_function(out, training_targets, derivative=True).T
     delta                       = cost_derivative * derivatives[-1]
     
     layer_indexes               = range( len(self.layers) )[::-1]    # reversed
     n_samples                   = float(training_data.shape[0])
     deltas_by_layer             = []
     
     for i in layer_indexes:
         # Loop over the weight layers in reversed order to calculate the deltas
         deltas_by_layer.append(list((np.dot( delta, add_bias(input_signals[i]) )/n_samples).T.flat))
         
         if i!= 0:
             # i!= 0 because we don't want calculate the delta unnecessarily.
             weight_delta        = np.dot( self.weights[ i ][1:,:], delta ) # Skip the bias weight
 
             # Calculate the delta for the subsequent layer
             delta               = weight_delta * derivatives[i-1]
     #end weight adjustment loop
     
     return np.hstack( reversed(deltas_by_layer) )
示例#3
0
 def gradient(self, weight_vector, training_data, training_targets, cost_function ):
     # assign the weight_vector as the network topology
     self.set_weights( np.array(weight_vector) )
     
     input_signals, derivatives  = self.update( training_data, trace=True )                  
     out                         = input_signals[-1]
     cost_derivative             = cost_function(out, training_targets, derivative=True).T
     delta                       = cost_derivative * derivatives[-1]
     
     layer_indexes               = range( len(self.layers) )[::-1]    # reversed
     n_samples                   = float(training_data.shape[0])
     deltas_by_layer             = []
     
     for i in layer_indexes:
         # Loop over the weight layers in reversed order to calculate the deltas
         deltas_by_layer.append(list((np.dot( delta, add_bias(input_signals[i]) )/n_samples).T.flat))
         
         if i!= 0:
             # i!= 0 because we don't want calculate the delta unnecessarily.
             weight_delta        = np.dot( self.weights[ i ][1:,:], delta ) # Skip the bias weight
 
             # Calculate the delta for the subsequent layer
             delta               = weight_delta * derivatives[i-1]
     #end weight adjustment loop
     
     return np.hstack( reversed(deltas_by_layer) )
示例#4
0
 def gradient(self, weight_vector, training_data, training_targets ):
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     self.weights               = self.unpack( np.array(weight_vector) )
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     
     layers = []
     for i in layer_indexes:
         # Loop over the weight layers in reversed order to calculate the deltas
         
         # calculate the weight change
         layers.append(np.dot( delta, add_bias(input_signals[i]) ).T.flat)
         
         if i!= 0:
             """Do not calculate the delta unnecessarily."""
             # Skip the bias weight
             weight_delta = np.dot( self.weights[ i ][1:,:], delta )
 
             # Calculate the delta for the subsequent layer
             delta = weight_delta * derivatives[i-1]
     #end weight adjustment loop
     
     return np.hstack( reversed(layers) )
示例#5
0
    def gradient(self, weight_vector, training_data, training_targets):
        layer_indexes = range(len(self.layers))[::-1]  # reversed
        self.weights = self.unpack(np.array(weight_vector))
        input_signals, derivatives = self.update(training_data, trace=True)

        out = input_signals[-1]
        cost_derivative = self.cost_function(out,
                                             training_targets,
                                             derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = self.cost_function(out, training_targets)

        layers = []
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # calculate the weight change
            dropped = dropout(
                input_signals[i],
                # dropout probability
                self.hidden_layer_dropout
                if i > 0 else self.input_layer_dropout)

            layers.append(np.dot(delta, add_bias(dropped)).T.flat)

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(self.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]
        #end weight adjustment loop

        return np.hstack(reversed(layers))
    def gradient(self, weight_vector, training_data, training_targets):
        layer_indexes = range(len(self.layers))[::-1]  # reversed
        self.weights = self.unpack(np.array(weight_vector))
        input_signals, derivatives = self.update(training_data, trace=True)

        out = input_signals[-1]
        error = (out - training_targets).T
        delta = error * derivatives[-1]

        layers = []
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # calculate the weight change
            layers.append(np.dot(delta, add_bias(input_signals[i])).T.flat)

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(self.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]
        # end weight adjustment loop

        return np.hstack(reversed(layers))
示例#7
0
    def gradient(self, weight_vector, training_data, training_targets ):
        layer_indexes              = range( len(self.layers) )[::-1]    # reversed
        self.weights               = self.unpack( np.array(weight_vector) )
        input_signals, derivatives = self.update( training_data, trace=True )

        out                        = input_signals[-1]
        cost_derivative            = self.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = self.cost_function(out, training_targets )

        layers = []
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # calculate the weight change
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        self.hidden_layer_dropout if i > 0 else self.input_layer_dropout
                    )

            layers.append(np.dot( delta, add_bias(dropped) ).T.flat)

            if i!= 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( self.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
        #end weight adjustment loop

        return np.hstack( reversed(layers) )
示例#8
0
def parallel_backpropagation_one_process(network, trainingset, block_number, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()):
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    momentum                   = collections.defaultdict( int )
    epoch                      = 0

    input_signals, derivatives = network.update( training_data, trace=True )

    out                        = input_signals[-1]
    error                      = network.cost_function(out, training_targets )
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]

    while epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        network.hidden_layer_dropout if i > 0 else network.input_layer_dropout
                    )

            # calculate the weight change
            dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        error                      = network.cost_function(out, training_targets )
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]

    result = []
    result.append(block_number)
    result.append(out)
    result.append(error)
    result.append(cost_derivative)
    result.append(delta)
    return result
	def forward(self, X):
		# Forward propogate inputs through the network 
		(N, d) = X.shape

		X_bias = add_bias(X)

		# Calculate first activation
		self.Z2 = np.dot(X_bias, self.weight_layers[0])

		if (self.n_hidden_layers == 0):
			# Return activation on Z2
			return self.activation_functions[0]( self.Z2 , False)
		else: 

			self.A2 = self.activation_functions[0]( self.Z2 , False)
			A2_bias = add_bias(self.A2)

			self.Z3 = np.dot(A2_bias, self.weight_layers[1])
			yHat = self.activation_functions[1]( self.Z3, False)

			# Final layer output
			return yHat
示例#10
0
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    # Whether to use another function for printing the dataset error than the cost function. 
    # This is useful if you train the network with the MSE cost function, but are going to 
    # classify rather than regress on your data.
    calculate_print_error      = evaluation_function if evaluation_function != None else cost_function
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
    
    batch_size                 = batch_size if batch_size != 0 else training_data.shape[0] 
    batch_training_data        = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets     = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices              = range(len(batch_training_data))       # fast reference to batches
    
    error                      = calculate_print_error(network.update( test_data ), test_targets )
    reversed_layer_indexes     = range( len(network.layers) )[::-1]
    momentum                   = collections.defaultdict( int )
    
    epoch                      = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations
        
        for batch_index in batch_indices:
            batch_data                 = batch_training_data[    batch_index ]
            batch_targets              = batch_training_targets[ batch_index ]
            batch_size                 = float( batch_data.shape[0] )
            
            input_signals, derivatives = network.update( batch_data, trace=True )
            out                        = input_signals[-1]
            cost_derivative            = cost_function( out, batch_targets, derivative=True ).T
            delta                      = cost_derivative * derivatives[-1]
            
            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas
            
                # perform dropout
                dropped = dropout( 
                            input_signals[i], 
                            # dropout probability
                            hidden_layer_dropout if i > 0 else input_layer_dropout
                        )
            
                # calculate the weight change
                dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i]
            
                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i-1]
            
                # Store the momentum
                momentum[i] = dW
                                
                # Update the weights
                network.weights[ i ] += dW
            #end weight adjustment loop
        
        error = calculate_print_error(network.update( test_data ), test_targets )
        
        if epoch%print_rate==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
示例#12
0
 def resilient_backpropagation(self, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ):
     # Implemented according to iRprop+ 
     # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf
     assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \
             "ERROR: dropout should not be used with resilient backpropagation"
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
     
     # Data structure to store the previous derivative
     last_dEdW                  = [ 1 ] * len( self.weights )
     
     # Storing the current / previous weight step size
     weight_step                = [ np.full( weight_layer.shape, start_step ) for weight_layer in self.weights ]
     
     # Storing the current / previous weight update
     dW                         = [  np.ones(shape=weight_layer.shape) for weight_layer in self.weights ]
     
     
     input_signals, derivatives = self.update( training_data, trace=True )
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     layer_indexes              = range( len(self.layers) )[::-1] # reversed
     prev_MSE                   = ( )                             # inf
     epoch                      = 0
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch       += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
                    
             # Calculate the delta with respect to the weights
             dEdW = np.dot( delta, add_bias(input_signals[i]) ).T
             
             if i != 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             
             # Calculate sign changes and note where they have changed
             diffs            = np.multiply( dEdW, last_dEdW[i] )
             pos_indexes      = np.where( diffs > 0 )
             neg_indexes      = np.where( diffs < 0 )
             zero_indexes     = np.where( diffs == 0 )
             
             
             # positive
             if np.any(pos_indexes):
                 # Calculate the weight step size
                 weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max )
                 
                 # Calculate the weight step direction
                 dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] )
                 
                 # Apply the weight deltas
                 self.weights[i][ pos_indexes ] += dW[i][pos_indexes]
             
             # negative
             if np.any(neg_indexes):
                 weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min )
                 
                 if MSE > prev_MSE:
                     # iRprop+ version of resilient backpropagation
                     self.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack
                 
                 dEdW[ neg_indexes ] = 0
             
             # zeros
             if np.any(zero_indexes):
                 dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] )
                 self.weights[i][ zero_indexes ] += dW[i][zero_indexes]
             
             # Store the previous weight step
             last_dEdW[i] = dEdW
         #end weight adjustment loop
         
         prev_MSE                   = MSE
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         if epoch%1000==0: print "* current network error (MSE):", MSE
 
     print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def resilient_backpropagation(network,
                              trainingset,
                              testset,
                              cost_function,
                              ERROR_LIMIT=1e-3,
                              max_iterations=(),
                              weight_step_max=50.,
                              weight_step_min=0.,
                              start_step=0.5,
                              learn_max=1.2,
                              learn_min=0.5,
                              print_rate=1000,
                              save_trained_network=False):
    # Implemented according to iRprop+
    # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf

    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    # Storing the current / previous weight step size
    weight_step = [
        np.full(weight_layer.shape, start_step)
        for weight_layer in network.weights
    ]

    # Storing the current / previous weight update
    dW = [
        np.ones(shape=weight_layer.shape) for weight_layer in network.weights
    ]

    # Storing the previous derivative
    previous_dEdW = [1] * len(network.weights)

    # Storing the previous error measurement
    prev_error = ()  # inf

    input_signals, derivatives = network.update(training_data, trace=True)
    out = input_signals[-1]
    cost_derivative = cost_function(out, training_targets, derivative=True).T
    delta = cost_derivative * derivatives[-1]
    error = cost_function(network.update(test_data), test_targets)

    n_samples = float(training_data.shape[0])
    layer_indexes = range(len(network.layers))[::-1]  # reversed
    epoch = 0

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # Calculate the delta with respect to the weights
            dEdW = (np.dot(delta, add_bias(input_signals[i])) / n_samples).T

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Calculate sign changes and note where they have changed
            diffs = np.multiply(dEdW, previous_dEdW[i])
            pos_indexes = np.where(diffs > 0)
            neg_indexes = np.where(diffs < 0)
            zero_indexes = np.where(diffs == 0)

            # positive
            if np.any(pos_indexes):
                # Calculate the weight step size
                weight_step[i][pos_indexes] = np.minimum(
                    weight_step[i][pos_indexes] * learn_max, weight_step_max)

                # Calculate the weight step direction
                dW[i][pos_indexes] = np.multiply(-np.sign(dEdW[pos_indexes]),
                                                 weight_step[i][pos_indexes])

                # Apply the weight deltas
                network.weights[i][pos_indexes] += dW[i][pos_indexes]

            # negative
            if np.any(neg_indexes):
                weight_step[i][neg_indexes] = np.maximum(
                    weight_step[i][neg_indexes] * learn_min, weight_step_min)

                if error > prev_error:
                    # iRprop+ version of resilient backpropagation
                    network.weights[i][neg_indexes] -= dW[i][
                        neg_indexes]  # backtrack

                dEdW[neg_indexes] = 0

            # zeros
            if np.any(zero_indexes):
                dW[i][zero_indexes] = np.multiply(-np.sign(dEdW[zero_indexes]),
                                                  weight_step[i][zero_indexes])
                network.weights[i][zero_indexes] += dW[i][zero_indexes]

            # Store the previous weight step
            previous_dEdW[i] = dEdW
        #end weight adjustment loop

        prev_error = error

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        cost_derivative = cost_function(out, training_targets,
                                        derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = cost_function(network.update(test_data), test_targets)

        if epoch % print_rate == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def resilient_backpropagation(network, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ):
    # Implemented according to iRprop+
    # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf
    assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \
            "ERROR: dropout should not be used with resilient backpropagation"

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"

    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    # Data structure to store the previous derivative
    previous_dEdW                  = [ 1 ] * len( network.weights )

    # Storing the current / previous weight step size
    weight_step                = [ np.full( weight_layer.shape, start_step ) for weight_layer in network.weights ]

    # Storing the current / previous weight update
    dW                         = [  np.ones(shape=weight_layer.shape) for weight_layer in network.weights ]


    input_signals, derivatives = network.update( training_data, trace=True )
    out                        = input_signals[-1]
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    error                      = network.cost_function(out, training_targets )

    layer_indexes              = range( len(network.layers) )[::-1] # reversed
    prev_error                   = ( )                             # inf
    epoch                      = 0

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch       += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # Calculate the delta with respect to the weights
            dEdW = np.dot( delta, add_bias(input_signals[i]) ).T

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]


            # Calculate sign changes and note where they have changed
            diffs            = np.multiply( dEdW, previous_dEdW[i] )
            pos_indexes      = np.where( diffs > 0 )
            neg_indexes      = np.where( diffs < 0 )
            zero_indexes     = np.where( diffs == 0 )


            # positive
            if np.any(pos_indexes):
                # Calculate the weight step size
                weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max )

                # Calculate the weight step direction
                dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] )

                # Apply the weight deltas
                network.weights[i][ pos_indexes ] += dW[i][pos_indexes]

            # negative
            if np.any(neg_indexes):
                weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min )

                if error > prev_error:
                    # iRprop+ version of resilient backpropagation
                    network.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack

                dEdW[ neg_indexes ] = 0

            # zeros
            if np.any(zero_indexes):
                dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] )
                network.weights[i][ zero_indexes ] += dW[i][zero_indexes]

            # Store the previous weight step
            previous_dEdW[i] = dEdW
        #end weight adjustment loop

        prev_error                 = error

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = network.cost_function(out, training_targets )

        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_to_file()
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.layers[-1][0], \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data              = np.array( [instance.features for instance in trainingset ] )
     training_targets           = np.array( [instance.targets  for instance in trainingset ] )
                             
     layer_indexes              = range( len(self.layers) )[::-1]    # reversed
     momentum                   = collections.defaultdict( int )
     MSE                        = ( ) # inf
     epoch                      = 0
     
     input_signals, derivatives = self.update( training_data, trace=True )
     
     out                        = input_signals[-1]
     error                      = (out - training_targets).T
     delta                      = error * derivatives[-1]
     MSE                        = np.mean( np.power(error,2) )
     
     while MSE > ERROR_LIMIT and epoch < max_iterations:
         epoch += 1
         
         for i in layer_indexes:
             # Loop over the weight layers in reversed order to calculate the deltas
             
             # perform dropout
             dropped = dropout( 
                         input_signals[i], 
                         # dropout probability
                         self.hidden_layer_dropout if i else self.input_layer_dropout
                     )
             
             # calculate the weight change
             dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]
             
             if i!= 0:
                 """Do not calculate the delta unnecessarily."""
                 # Skip the bias weight
                 weight_delta = np.dot( self.weights[ i ][1:,:], delta )
     
                 # Calculate the delta for the subsequent layer
                 delta = weight_delta * derivatives[i-1]
             
             # Store the momentum
             momentum[i] = dW
                                 
             # Update the weights
             self.weights[ i ] += dW
         #end weight adjustment loop
         
         input_signals, derivatives = self.update( training_data, trace=True )
         out                        = input_signals[-1]
         error                      = (out - training_targets).T
         delta                      = error * derivatives[-1]
         MSE                        = np.mean( np.power(error,2) )
         
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch
     
     if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
         self.save_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()  ):

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"


    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )

    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    momentum                   = collections.defaultdict( int )
    epoch                      = 0

    input_signals, derivatives = network.update( training_data, trace=True )

    out                        = input_signals[-1]
    error                      = network.cost_function(out, training_targets )
    cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                        input_signals[i],
                        # dropout probability
                        network.hidden_layer_dropout if i > 0 else network.input_layer_dropout
                    )

            # calculate the weight change
            dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        error                      = network.cost_function(out, training_targets )
        cost_derivative            = network.cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]


        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network:
        network.save_to_file()
    def backpropagation(self,
                        trainingset,
                        ERROR_LIMIT=1e-3,
                        learning_rate=0.3,
                        momentum_factor=0.9):

        assert trainingset[0].features.shape[0] == self.n_inputs, \
                "ERROR: input size varies from the defined input setting"

        assert trainingset[0].targets.shape[0]  == self.n_outputs, \
                "ERROR: output size varies from the defined output setting"

        training_data = np.array(
            [instance.features for instance in trainingset])
        training_targets = np.array(
            [instance.targets for instance in trainingset])

        MSE = ()  # inf
        neterror = None
        momentum = collections.defaultdict(int)

        batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[
            0]

        epoch = 0
        while MSE > ERROR_LIMIT:
            epoch += 1

            for start in xrange(0, len(training_data), batch_size):
                batch = training_data[start:start + batch_size]
                input_layers = self.update(training_data, trace=True)
                out = input_layers[-1]

                error = out - training_targets
                delta = error
                MSE = np.mean(np.power(error, 2))

                loop = itertools.izip(
                    xrange(len(self.weights) - 1, -1, -1),
                    reversed(self.weights),
                    reversed(input_layers[:-1]),
                )

                for i, weight_layer, input_signals in loop:
                    # Loop over the weight layers in reversed order to calculate the deltas

                    if i == 0:
                        dropped = dropout(
                            add_bias(input_signals).T,
                            self.input_layer_dropout)
                    else:
                        dropped = dropout(
                            add_bias(input_signals).T,
                            self.hidden_layer_dropout)

                    # Calculate weight change
                    dW = learning_rate * np.dot(
                        dropped, delta) + momentum_factor * momentum[i]

                    if i != 0:
                        """Do not calculate the delta unnecessarily."""
                        # Skipping the bias weight during calculation.
                        weight_delta = np.dot(delta, weight_layer[1:, :].T)

                        # Calculate the delta for the subsequent layer
                        delta = np.multiply(
                            weight_delta,
                            self.activation_functions[i - 1](input_signals,
                                                             derivative=True))

                    # Store the momentum
                    momentum[i] = dW

                    # Update the weights
                    self.weights[i] -= dW

            if epoch % 1000 == 0:
                # Show the current training status
                print "* current network error (MSE):", MSE

        print "* Converged to error bound (%.4g) with MSE = %.4g." % (
            ERROR_LIMIT, MSE)
        print "* Trained for %d epochs." % epoch
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    momentum = collections.defaultdict(int)

    input_signals, derivatives = network.update(training_data, trace=True)
    out = input_signals[-1]
    cost_derivative = cost_function(out, training_targets, derivative=True).T
    delta = cost_derivative * derivatives[-1]
    error = cost_function(network.update(test_data), test_targets)

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    epoch = 0
    n_samples = float(training_data.shape[0])

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                hidden_layer_dropout if i > 0 else input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) /
                                   n_samples).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        cost_derivative = cost_function(out, training_targets,
                                        derivative=True).T
        delta = cost_derivative * derivatives[-1]
        error = cost_function(network.update(test_data), test_targets)

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def backpropagation(network,
                    trainingset,
                    testset,
                    cost_function,
                    evaluation_function=None,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=(),
                    batch_size=0,
                    input_layer_dropout=0.0,
                    hidden_layer_dropout=0.0,
                    print_rate=1000,
                    save_trained_network=False):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."

    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"

    # Whether to use another function for printing the dataset error than the cost function.
    # This is useful if you train the network with the MSE cost function, but are going to
    # classify rather than regress on your data.
    calculate_print_error = evaluation_function if evaluation_function != None else cost_function

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])
    test_data = np.array([instance.features for instance in testset])
    test_targets = np.array([instance.targets for instance in testset])

    batch_size = batch_size if batch_size != 0 else training_data.shape[0]
    batch_training_data = np.array_split(
        training_data, math.ceil(1.0 * training_data.shape[0] / batch_size))
    batch_training_targets = np.array_split(
        training_targets,
        math.ceil(1.0 * training_targets.shape[0] / batch_size))
    batch_indices = range(
        len(batch_training_data))  # fast reference to batches

    error = calculate_print_error(network.update(test_data), test_targets)
    reversed_layer_indexes = range(len(network.layers))[::-1]
    momentum = collections.defaultdict(int)

    epoch = 0
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        random.shuffle(
            batch_indices
        )  # Shuffle the order in which the batches are processed between the iterations

        for batch_index in batch_indices:
            batch_data = batch_training_data[batch_index]
            batch_targets = batch_training_targets[batch_index]
            batch_size = float(batch_data.shape[0])

            input_signals, derivatives = network.update(batch_data, trace=True)
            out = input_signals[-1]
            cost_derivative = cost_function(out,
                                            batch_targets,
                                            derivative=True).T
            delta = cost_derivative * derivatives[-1]

            for i in reversed_layer_indexes:
                # Loop over the weight layers in reversed order to calculate the deltas

                # perform dropout
                dropped = dropout(
                    input_signals[i],
                    # dropout probability
                    hidden_layer_dropout if i > 0 else input_layer_dropout)

                # calculate the weight change
                dW = -learning_rate * (np.dot(delta, add_bias(
                    dropped)) / batch_size).T + momentum_factor * momentum[i]

                if i != 0:
                    """Do not calculate the delta unnecessarily."""
                    # Skip the bias weight
                    weight_delta = np.dot(network.weights[i][1:, :], delta)

                    # Calculate the delta for the subsequent layer
                    delta = weight_delta * derivatives[i - 1]

                # Store the momentum
                momentum[i] = dW

                # Update the weights
                network.weights[i] += dW
            #end weight adjustment loop

        error = calculate_print_error(network.update(test_data), test_targets)

        if epoch % print_rate == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Measured quality: %.4g" % network.measure_quality(
        training_data, training_targets, cost_function)
    print "[training]   Trained for %d epochs." % epoch

    if save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False  ):
    assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\
        "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`."
    assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\
        "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`."
        
    assert trainingset[0].features.shape[0] == network.n_inputs, \
        "ERROR: input size varies from the defined input setting"
    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
        "ERROR: output size varies from the defined output setting"
    
    training_data              = np.array( [instance.features for instance in trainingset ] )
    training_targets           = np.array( [instance.targets  for instance in trainingset ] )
    test_data                  = np.array( [instance.features  for instance in testset ] )
    test_targets               = np.array( [instance.targets  for instance in testset ] )
          
    momentum                   = collections.defaultdict( int )
    
    input_signals, derivatives = network.update( training_data, trace=True )
    out                        = input_signals[-1]
    cost_derivative            = cost_function(out, training_targets, derivative=True).T
    delta                      = cost_derivative * derivatives[-1]
    error                      = cost_function(network.update( test_data ), test_targets )
    
    layer_indexes              = range( len(network.layers) )[::-1]    # reversed
    epoch                      = 0
    n_samples                  = float(training_data.shape[0])
    
    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1
        
        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas
            
            # perform dropout
            dropped = dropout( 
                        input_signals[i], 
                        # dropout probability
                        hidden_layer_dropout if i > 0 else input_layer_dropout
                    )
            
            # calculate the weight change
            dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i]
            
            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot( network.weights[ i ][1:,:], delta )
    
                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i-1]
            
            # Store the momentum
            momentum[i] = dW
                                
            # Update the weights
            network.weights[ i ] += dW
        #end weight adjustment loop
        
        input_signals, derivatives = network.update( training_data, trace=True )
        out                        = input_signals[-1]
        cost_derivative            = cost_function(out, training_targets, derivative=True).T
        delta                      = cost_derivative * derivatives[-1]
        error                      = cost_function(network.update( test_data ), test_targets )
        
        if epoch%1000==0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch
    
    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error )
    print "[training]   Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function )
    print "[training]   Trained for %d epochs." % epoch
    
    if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ):
        network.save_network_to_file()
示例#21
0
def backpropagation(network,
                    trainingset,
                    ERROR_LIMIT=1e-3,
                    learning_rate=0.03,
                    momentum_factor=0.9,
                    max_iterations=()):

    assert trainingset[0].features.shape[0] == network.n_inputs, \
            "ERROR: input size varies from the defined input setting"

    assert trainingset[0].targets.shape[0]  == network.layers[-1][0], \
            "ERROR: output size varies from the defined output setting"

    training_data = np.array([instance.features for instance in trainingset])
    training_targets = np.array([instance.targets for instance in trainingset])

    layer_indexes = range(len(network.layers))[::-1]  # reversed
    momentum = collections.defaultdict(int)
    epoch = 0

    input_signals, derivatives = network.update(training_data, trace=True)

    out = input_signals[-1]
    error = network.cost_function(out, training_targets)
    cost_derivative = network.cost_function(out,
                                            training_targets,
                                            derivative=True).T
    delta = cost_derivative * derivatives[-1]

    while error > ERROR_LIMIT and epoch < max_iterations:
        epoch += 1

        for i in layer_indexes:
            # Loop over the weight layers in reversed order to calculate the deltas

            # perform dropout
            dropped = dropout(
                input_signals[i],
                # dropout probability
                network.hidden_layer_dropout
                if i > 0 else network.input_layer_dropout)

            # calculate the weight change
            dW = -learning_rate * np.dot(
                delta, add_bias(dropped)).T + momentum_factor * momentum[i]

            if i != 0:
                """Do not calculate the delta unnecessarily."""
                # Skip the bias weight
                weight_delta = np.dot(network.weights[i][1:, :], delta)

                # Calculate the delta for the subsequent layer
                delta = weight_delta * derivatives[i - 1]

            # Store the momentum
            momentum[i] = dW

            # Update the weights
            network.weights[i] += dW
        #end weight adjustment loop

        input_signals, derivatives = network.update(training_data, trace=True)
        out = input_signals[-1]
        error = network.cost_function(out, training_targets)
        cost_derivative = network.cost_function(out,
                                                training_targets,
                                                derivative=True).T
        delta = cost_derivative * derivatives[-1]

        if epoch % 1000 == 0:
            # Show the current training status
            print "[training] Current error:", error, "\tEpoch:", epoch

    print "[training] Finished:"
    print "[training]   Converged to error bound (%.4g) with error %.4g." % (
        ERROR_LIMIT, error)
    print "[training]   Trained for %d epochs." % epoch

    if network.save_trained_network and confirm(
            promt="Do you wish to store the trained network?"):
        network.save_to_file()
 def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.3, momentum_factor = 0.9  ):
     
     assert trainingset[0].features.shape[0] == self.n_inputs, \
             "ERROR: input size varies from the defined input setting"
     
     assert trainingset[0].targets.shape[0]  == self.n_outputs, \
             "ERROR: output size varies from the defined output setting"
     
     
     training_data    = np.array( [instance.features for instance in trainingset ] )
     training_targets = np.array( [instance.targets for instance in trainingset ] )
     
     MSE              = ( ) # inf
     neterror         = None
     momentum         = collections.defaultdict( int )
     
     batch_size       = self.batch_size if self.batch_size != 0 else training_data.shape[0]
     
     epoch = 0
     while MSE > ERROR_LIMIT:
         epoch += 1
         
         for start in xrange( 0, len(training_data), batch_size ):
             batch             = training_data[start : start+batch_size]
             input_layers      = self.update( training_data, trace=True )
             out               = input_layers[-1]
                           
             error             = out - training_targets
             delta             = error
             MSE               = np.mean( np.power(error,2) )
         
         
             loop  = itertools.izip(
                             xrange(len(self.weights)-1, -1, -1),
                             reversed(self.weights),
                             reversed(input_layers[:-1]),
                         )
         
             for i, weight_layer, input_signals in loop:
                 # Loop over the weight layers in reversed order to calculate the deltas
             
                 if i == 0:
                     dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout  )
                 else:
                     dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout )
             
                 # Calculate weight change
                 dW = learning_rate * np.dot( dropped, delta ) + momentum_factor * momentum[i]
             
                 if i!= 0:
                     """Do not calculate the delta unnecessarily."""
                     # Skipping the bias weight during calculation.
                     weight_delta = np.dot( delta, weight_layer[1:,:].T )
         
                     # Calculate the delta for the subsequent layer
                     delta = np.multiply(  weight_delta, self.activation_functions[i-1]( input_signals, derivative=True) )
             
                 # Store the momentum
                 momentum[i] = dW
             
                 # Update the weights
                 self.weights[ i ] -= dW
         
         if epoch%1000==0:
             # Show the current training status
             print "* current network error (MSE):", MSE
     
     print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE )
     print "* Trained for %d epochs." % epoch