def get_weight_delta(self, output_train, target_train): gradients = super(ConjugateGradient, self).get_gradient(output_train, target_train) epoch = self.epoch gradient = matrix_list_in_one_vector(gradients) weight_delta = -gradient if epoch > 1 and epoch % self.n_weights == 0: # Must reset after every N iteration, because algoritm # lose conjugacy. self.logs.info("TRAIN", "Reset conjugate gradient vector") del self.prev_gradient if hasattr(self, 'prev_gradient'): gradient_old = self.prev_gradient weight_delta_old = self.prev_weight_delta beta = self.update_function(gradient_old, gradient, weight_delta_old) weight_delta += beta * weight_delta_old weight_deltas = vector_to_list_of_matrix( weight_delta, (layer.size for layer in self.train_layers) ) self.prev_weight_delta = weight_delta.copy() self.prev_gradient = gradient.copy() return weight_deltas
def get_gradient_by_weights(self, weights): weights = vector_to_list_of_matrix(weights, (layer.size for layer in self.train_layers)) self.set_weights(weights) gradient = self.get_gradient(self.output_train, self.target_train) return matrix_list_in_one_vector(gradient)
def get_weight_delta(self, output_train, target_train): gradients = super(ConjugateGradient, self).get_gradient(output_train, target_train) epoch = self.epoch gradient = matrix_list_in_one_vector(gradients) weight_delta = -gradient if epoch > 1 and epoch % self.n_weights == 0: # Must reset after every N iteration, because algoritm # lose conjugacy. self.logs.info("TRAIN", "Reset conjugate gradient vector") del self.prev_gradient if hasattr(self, 'prev_gradient'): gradient_old = self.prev_gradient weight_delta_old = self.prev_weight_delta beta = self.update_function(gradient_old, gradient, weight_delta_old) weight_delta += beta * weight_delta_old weight_deltas = vector_to_list_of_matrix( weight_delta, (layer.size for layer in self.train_layers)) self.prev_weight_delta = weight_delta.copy() self.prev_gradient = gradient.copy() return weight_deltas
def check_updates(self, new_step): weights = vector_to_list_of_matrix(new_step, (layer.size for layer in self.train_layers)) self.set_weights(weights) predicted_output = self.predict(self.input_train) return self.error(predicted_output, self.target_train)
def get_weight_delta(self, output_train, target_train): gradients = self.get_gradient(output_train, target_train) gradient = matrix_list_in_one_vector(gradients) if norm(gradient) < self.gradient_tol: raise StopIteration("Gradient norm less than {}" "".format(self.gradient_tol)) train_layers = self.train_layers weight = matrix_list_in_one_vector( (layer.weight for layer in train_layers) ) if hasattr(self, 'prev_gradient'): # In first epoch we didn't have previous weights and # gradients. For this reason we skip quasi coefitient # computation. inverse_hessian = self.update_function( self.prev_inverse_hessian, weight - self.prev_weight, gradient - self.prev_gradient ) else: inverse_hessian = self.h0_scale * eye(weight.size, dtype=int) self.prev_weight = weight.copy() self.prev_gradient = gradient.copy() self.prev_inverse_hessian = inverse_hessian return vector_to_list_of_matrix( -inverse_hessian.dot(gradient), (layer.size for layer in train_layers) )
def check_updates(self, new_step): weights = vector_to_list_of_matrix( new_step, (layer.size for layer in self.train_layers) ) self.set_weights(weights) predicted_output = self.predict(self.input_train) return self.error(predicted_output, self.target_train)
def get_gradient_by_weights(self, weights): weights = vector_to_list_of_matrix( weights, (layer.size for layer in self.train_layers) ) self.set_weights(weights) gradient = self.get_gradient(self.output_train, self.target_train) return matrix_list_in_one_vector(gradient)