def test_10(self): '''Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside''' arrs = [] labels = [] '''Points about the origin (located in a box of length 16 centered at origin)''' for i in range(0, 10): arr = [ random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2) ] label = 'yes' arrs.append(arr) labels.append(label) '''Points outside the box''' for i in range(0, 10): arr = [ random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2) ] label = 'no' arrs.append(arr) labels.append(label) '''Add some noise''' for i in range(0, 2): arr = [ random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2) ] label = 'no' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) for i in range(0, 10): arr = [ random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2) ] label = 'yes' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) (models, test_accuracies, test_costs) = ann.train() best_test_accuracy = 0 best_i = -1 for i in range(0, len(test_accuracies)): if (test_accuracies[i] > best_test_accuracy): best_test_accuracy = test_accuracies[i] best_i = i if (best_i > -1): model_name = models[i].name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(models[i], open(path_to_file, 'wb')) else: logger.error('Error!')
def test_10(self): """Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside""" arrs = [] labels = [] """Points about the origin (located in a box of length 16 centered at origin)""" for i in range(0, 100): arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = "yes" arrs.append(arr) labels.append(label) """Points outside the box""" for i in range(0, 100): arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = "no" arrs.append(arr) labels.append(label) """Add some noise""" for i in range(0, 10): arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = "no" # Note: this is artificially misclassified arrs.append(arr) labels.append(label) for i in range(0, 10): arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = "yes" # Note: this is artificially misclassified arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) (models, test_accuracies, test_costs) = ann.train() best_test_accuracy = 0 best_i = -1 for i in range(0, len(test_accuracies)): if test_accuracies[i] > best_test_accuracy: best_test_accuracy = test_accuracies[i] best_i = i if best_i > -1: model_name = models[i].name directory = "../Ann-models" path_to_file = directory + "/" + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(models[i], open(path_to_file, "wb")) else: print("Error!")
def test_10(self): '''Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside''' arrs = [] labels = [] '''Points about the origin (located in a box of length 16 centered at origin)''' for i in range(0, 10): arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = 'yes' arrs.append(arr) labels.append(label) '''Points outside the box''' for i in range(0, 10): arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = 'no' arrs.append(arr) labels.append(label) '''Add some noise''' for i in range(0, 2): arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = 'no' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) for i in range(0, 10): arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = 'yes' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) (models, test_accuracies, test_costs) = ann.train() best_test_accuracy = 0 best_i = -1 for i in range(0, len(test_accuracies)): if (test_accuracies[i] > best_test_accuracy): best_test_accuracy = test_accuracies[i] best_i = i if (best_i > -1): model_name = models[i].name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(models[i], open(path_to_file, 'wb')) else: logger.error('Error!')
def get_particle(self): ann = Ann() ann.x_train_set = self.x_train ann.y_train_set = self.y_train ann.x_valid_set = self.x_valid ann.y_valid_set = self.y_valid ann.x_test_set = self.x_test ann.y_test_set = self.y_test particle = ParticleAnn(ann) return particle
def test_1(self): classes = ('smiley', 'frowny') arrs = [] labels = [] file_names = [] for c in classes: files = [name for name in os.listdir('../library/' + c)] for el in files: img = Image.open('../library/' + c + '/' + el).convert('L') img = img.resize((50, 50), Image.ANTIALIAS) arrs.append(img.getdata()) labels.append(c) file_names.append(el) name = '../Ann-models/model_n_i_2500_n_o_34_n_h_2 2015-05-27 22:15:24.990089.annm' model = pickle.load(open(name, 'rb'))[0][0] ann = Ann(model) print(ann.h(arrs[0]))
def non_test_6(self): # Test if training works by checking that training lowers the cost for random small and medium size data-sets# # Small size random data-set with two labels arrs = [] labels = [] classes = ('cat', 'dog') for i in range(0, 1): print('\nTesting data-set ' + str(i)) for m in range(0, 10): arr = [random.random() for x in range(0, 3)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels) # Create Ann with these train_examples and labels cost_before = ann.cost() ann.train() cost_after = ann.cost() self.assertTrue(cost_after <= cost_before) # Medium size random data-set with three labels arrs = [] labels = [] classes = ('cat', 'dog', 'bird') for i in range(0, 1): print('\nTesting data-set ' + str(i)) for m in range(0, 10): arr = [random.random() for x in range(0, 5)] z = random.random() if (z < 0.33): label = classes[0] elif (z >= 0.33 and z < 0.66): label = classes[1] else: label = classes[2] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels) # Create Ann with these train_examples and labels cost_before = ann.cost() ann.train() cost_after = ann.cost() self.assertTrue(cost_after <= cost_before)
def test_5(self): # Comprehensive gradient checking # # Medium size data-set with more than two classes arrs = [] labels = [] classes = ('cat', 'dog', 'bird', 'turtle', 'dinosaur', 'human') for m in range(0, 100): arr = [random.random() for x in range(0, 200)] z = random.random() if (z < 1 / 6): label = classes[0] elif (z >= 1 / 6 and z < 2 / 6): label = classes[1] elif (z >= 2 / 6 and z < 3 / 6): label = classes[2] elif (z >= 3 / 6 and z < 4 / 6): label = classes[3] elif (z >= 4 / 6 and z < 5 / 6): label = classes[4] else: label = classes[5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch() T_original = copy.deepcopy(ann.Thetas) # Just check the neuron connections between first, second, and third layer for l in range(0, 2): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient # Randomly select 100 neuron connections to check a = random.sample(range(0, shape_J[0]), 10) b = random.sample(range(0, shape_J[1]), 10) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps ) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def test_5(self): # Comprehensive gradient checking # # Medium size data-set with more than two classes arrs = [] labels = [] classes = ("cat", "dog", "bird", "turtle", "dinosaur", "human") for m in range(0, 100): arr = [random.random() for x in range(0, 200)] z = random.random() if z < 1 / 6: label = classes[0] elif z >= 1 / 6 and z < 2 / 6: label = classes[1] elif z >= 2 / 6 and z < 3 / 6: label = classes[2] elif z >= 3 / 6 and z < 4 / 6: label = classes[3] elif z >= 4 / 6 and z < 5 / 6: label = classes[4] else: label = classes[5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch() T_original = copy.deepcopy(ann.Thetas) # Just check the neuron connections between first, second, and third layer for l in range(0, 2): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient # Randomly select 100 neuron connections to check a = random.sample(range(0, shape_J[0]), 10) b = random.sample(range(0, shape_J[1]), 10) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation print(P, "\t", J_ij, "\t", abs(P - J_ij), (l, i, j)) # if (P < 0 and J_ij > 0 or P > 0 and J_ij < 0): # self.fail() self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def test_3(self): # Test the dimensions of the Jacobian matrices against Theta matrices for first architecture# n_i1 = 4 # Number of input neurons n_h1 = 2 # Number of hidden layers n_o1 = 2 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = [1, 2, 3, 4] # Array as first example y1 = [1, 0] J = ann1.backward(x1, y1) for l in range(0, ann1.L - 1): self.assertEqual(ann1.Thetas[l].shape, J[l].shape) # Test the dimensions of the Jacobian matrices against Theta matrices for second architecture# n_i1 = 40 # Number of input neurons n_h1 = 3 # Number of hidden layers n_o1 = 10 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = 10 * [1, 2, 3, 4] # Array as first example y1 = [1, 0, 1, 1, 0, 0, 1, 0, 1, 0] J = ann1.backward(x1, y1) for l in range(0, ann1.L - 1): self.assertEqual(ann1.Thetas[l].shape, J[l].shape) # Test the dimensions of the Jacobian matrices against Theta matrices for third architecture# n_i1 = 40 # Number of input neurons n_h1 = 0 # Number of hidden layers n_o1 = 10 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = 10 * [1, 2, 3, 4] # Array as first example y1 = [1, 0, 1, 1, 0, 0, 1, 0, 1, 0] J = ann1.backward(x1, y1) for l in range(0, ann1.L - 1): self.assertEqual(ann1.Thetas[l].shape, J[l].shape)
def test_2(self): # Test for forward-propagation# # First architecture test# # Logistic regression (0 hidden layers) forward propagation test# n_i1 = 4 # Number of input neurons n_h1 = 0 # Number of hidden layers n_o1 = 1 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = [1, 2, 3, 4] # Array as first example x2 = [-1, -1, -1, -1] # Array as second example # Set all weights to zero# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape self.assertEqual(shape, (1, 5)) ann1.Thetas[i] = np.zeros(shape) self.assertEqual(ann1.h(x1), 0.5) self.assertEqual(ann1.h(x2), 0.5) # Set all weights to one# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape self.assertEqual(shape, (1, 5)) ann1.Thetas[i] = np.ones(shape) self.assertAlmostEqual(ann1.h(x1), 0.999, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.0474, delta=0.0001) # Set all weights randomly between -1 and 1 (and test the range of output)# ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture self.assertAlmostEqual(ann1.h(x1), 0.5, delta=0.5) # Sigmoid always gives values between 0 and 1 self.assertAlmostEqual(ann1.h(x2), 0.5, delta=0.5) # Custom Thetas weights# M = np.matrix([[1, -1, 0.5, -0.3, 2]]) ann1.Thetas[0] = M self.assertAlmostEqual(ann1.h(x1), 0.786, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.858, delta=0.001) # Second architecture test# # 1 hidden layer forward propagation test# n_i1 = 4 # Number of input neurons n_h1 = 1 # Number of hidden layers n_o1 = 1 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = [1, 2, 3, 4] # Array as first example x2 = [-1, -1, -1, -1] # Array as second example # Set all weights to zero# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape ann1.Thetas[i] = np.zeros(shape) self.assertEqual(ann1.h(x1), 0.5) self.assertEqual(ann1.h(x2), 0.5) # Set all weights to one# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape ann1.Thetas[i] = np.ones(shape) self.assertAlmostEqual(ann1.h(x1), 0.993, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.767, delta=0.001) # Set all weights randomly between -1 and 1 (and test the range of output)# ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture self.assertAlmostEqual(ann1.h(x1), 0.5, delta=0.5) # Sigmoid always gives values between 0 and 1 self.assertAlmostEqual(ann1.h(x2), 0.5, delta=0.5) # Custom Thetas weights# M1 = np.matrix([[1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2]]) M2 = np.matrix([[1, 1, -1, 0.5, -1]]) ann1.Thetas[0] = M1 ann1.Thetas[1] = M2 # a^(1) Should be [0.786 0.786 0.786 0.786 1]^T# self.assertAlmostEqual(ann1.h(x1), 0.545, delta=0.001) # a^(1) Should be [0.858 0.858 0.858 0.858 1]^T# self.assertAlmostEqual(ann1.h(x2), 0.571, delta=0.001)
labels.append(label) for i in range(0, 10): arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)] label = 'yes' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) (models, test_accuracies, test_costs) = ann.train() best_test_accuracy = 0 best_i = -1 for i in range(0, len(test_accuracies)): if (test_accuracies[i] > best_test_accuracy): best_test_accuracy = test_accuracies[i] best_i = i if (best_i > -1): model_name = models[i].name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(models[i], open(path_to_file, 'wb')) else: logger.error('Error!') if __name__ == "__main__": Ann.init_logger('debug') unittest.main()
def demo_helper(): print('\t** Learn the AND function using 0 hidden layers (logistic regression) **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('true')) num_hidden_layers = 0 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() == 1): print('\t** The AND function was learned correctly using 0 hidden layers **\n') else: print('\t** ERROR (when learning the AND function using 0 hidden layers **\n') print('\t** Learn the AND function using 1 hidden layer **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('true')) num_hidden_layers = 1 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() == 1): print('\t** The AND function was learned correctly using 1 hidden layers **\n') else: print('\t** ERROR (when learning the AND function using 1 hidden layers **\n') print('\t** Learn the XOR function using 0 hidden layers (logistic regression) **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('false')) num_hidden_layers = 0 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() != 1): print('\t** The XOR function was not learned correctly (as expected) because logistic regression (0 hidden layers) \n' + 'cannot create a boundary through a non-linearly separable data-set (which the XOR function is)**\n') else: print('\t** ERROR (when learning the XOR function using 0 hidden layers **\n') '''
def non_test_6(self): # Test if training works by checking that training lowers the cost for random small and medium size data-sets# # Small size random data-set with two labels arrs = [] labels = [] classes = ('cat', 'dog') for i in range(0, 1): print('\nTesting data-set ' + str(i)) for m in range(0, 10): arr = [random.random() for x in range(0, 3)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann( arrs, labels) # Create Ann with these train_examples and labels cost_before = ann.cost() ann.train() cost_after = ann.cost() self.assertTrue(cost_after <= cost_before) # Medium size random data-set with three labels arrs = [] labels = [] classes = ('cat', 'dog', 'bird') for i in range(0, 1): print('\nTesting data-set ' + str(i)) for m in range(0, 10): arr = [random.random() for x in range(0, 5)] z = random.random() if (z < 0.33): label = classes[0] elif (z >= 0.33 and z < 0.66): label = classes[1] else: label = classes[2] arrs.append(arr) labels.append(label) ann = Ann( arrs, labels) # Create Ann with these train_examples and labels cost_before = ann.cost() ann.train() cost_after = ann.cost() self.assertTrue(cost_after <= cost_before)
def test_9(self): # function 1 (XOR function) on 1 hidden layers arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 0]) arrs.append([1, 1]) labels = [] labels.append('false') labels.append('true') labels.append('true') labels.append('false') ann = Ann(arrs, labels, n_h=1) # Train and save model model = ann.train()[0][0] # Take the first model from the list of models in the tuple ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # Load the trained model into a new neural network ann_from_model = Ann(model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_model.h_by_class(x), 'false') # function 2 on 2 hidden layers arrs2 = [] arrs2.append([1, 1]) arrs2.append([2, 2]) arrs2.append([1, 3]) arrs2.append([2, 10]) arrs2.append([1, -1]) arrs2.append([-2, -2]) arrs2.append([1, -3]) arrs2.append([-2, -10]) labels2 = [] labels2.append('false') labels2.append('false') labels2.append('false') labels2.append('false') labels2.append('true') labels2.append('true') labels2.append('true') labels2.append('true') ann = Ann(arrs2, labels2, n_h=2) model2 = ann.train()[0][0] ann.validate_train() # Load the second model ann_from_model = Ann(model2) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs2[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs2[len(arrs2) - 1]), 'true') x = [1, -5] self.assertEqual(ann_from_model.h_by_class(x), 'true') # Load the first model again ann_from_model = Ann(model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_model.h_by_class(x), 'false') # Try pickling our model into a sister folder model_name = model.name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(model, open(path_to_file, 'wb')) # Try unpickling our model unpickled_model = pickle.load(open(path_to_file, 'rb')) # Load unpickled model and test ann_from_pickle = Ann(unpickled_model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_pickle.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_pickle.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_pickle.h_by_class(x), 'false')
def test_7(self): # Learn some basic functions# # Linearly-separable data-sets# # function 1 (AND function) on 0 hidden layers arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 0]) arrs.append([1, 1]) labels = [] labels.append('false') labels.append('true') labels.append('true') labels.append('true') ann = Ann(arrs, labels, n_h=0) ann.train() ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # function 2 on 2 hidden layers arrs = [] arrs.append([1, 1]) arrs.append([2, 2]) arrs.append([1, 3]) arrs.append([2, 10]) arrs.append([1, -1]) arrs.append([-2, -2]) arrs.append([1, -3]) arrs.append([-2, -10]) labels = [] labels.append('false') labels.append('false') labels.append('false') labels.append('false') labels.append('true') labels.append('true') labels.append('true') labels.append('true') ann = Ann(arrs, labels, n_h=2) ann.train() ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # Non-linearly-separable data-sets# # function 1 (XOR function) on 1 hidden layers arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 0]) arrs.append([1, 1]) labels = [] labels.append('false') labels.append('true') labels.append('true') labels.append('false') ann = Ann(arrs, labels, n_h=1) ann.train(it=3000) ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # function 1b (XOR function) on 1 hidden layers (with custom architecture) arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 0]) arrs.append([1, 1]) labels = [] labels.append('false') labels.append('true') labels.append('true') labels.append('false') s = [4, 5] # Custom hidden layer architecture ann = Ann(arrs, labels, n_h=len(s), s=s) ann.train() ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # function 1 (two nested sets) on 2 hidden layers arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 1]) arrs.append([1, 1]) arrs.append([10, 0]) arrs.append([0, 10]) arrs.append([110, 10]) arrs.append([-10, 10]) labels = [] labels.append('false') labels.append('false') labels.append('false') labels.append('false') labels.append('true') labels.append('true') labels.append('true') labels.append('true') ann = Ann(arrs, labels, n_h=0) ann.train() ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9)
def test_2(self): # Test for forward-propagation# # First architecture test# # Logistic regression (0 hidden layers) forward propagation test# n_i1 = 4 # Number of input neurons n_h1 = 0 # Number of hidden layers n_o1 = 1 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = [1, 2, 3, 4] # Array as first example x2 = [-1, -1, -1, -1] # Array as second example # Set all weights to zero# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape self.assertEqual(shape, (1, 5)) ann1.Thetas[i] = np.zeros(shape) self.assertEqual(ann1.h(x1), 0.5) self.assertEqual(ann1.h(x2), 0.5) # Set all weights to one# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape self.assertEqual(shape, (1, 5)) ann1.Thetas[i] = np.ones(shape) self.assertAlmostEqual(ann1.h(x1), 0.999, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.0474, delta=0.0001) # Set all weights randomly between -1 and 1 (and test the range of output)# ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture self.assertAlmostEqual( ann1.h(x1), 0.5, delta=0.5) # Sigmoid always gives values between 0 and 1 self.assertAlmostEqual(ann1.h(x2), 0.5, delta=0.5) # Custom Thetas weights# M = np.matrix([[1, -1, 0.5, -0.3, 2]]) ann1.Thetas[0] = M self.assertAlmostEqual(ann1.h(x1), 0.786, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.858, delta=0.001) # Second architecture test# # 1 hidden layer forward propagation test# n_i1 = 4 # Number of input neurons n_h1 = 1 # Number of hidden layers n_o1 = 1 # Number of output neurons ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture x1 = [1, 2, 3, 4] # Array as first example x2 = [-1, -1, -1, -1] # Array as second example # Set all weights to zero# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape ann1.Thetas[i] = np.zeros(shape) self.assertEqual(ann1.h(x1), 0.5) self.assertEqual(ann1.h(x2), 0.5) # Set all weights to one# for i in range(0, len(ann1.Thetas)): shape = ann1.Thetas[i].shape ann1.Thetas[i] = np.ones(shape) self.assertAlmostEqual(ann1.h(x1), 0.993, delta=0.001) self.assertAlmostEqual(ann1.h(x2), 0.767, delta=0.001) # Set all weights randomly between -1 and 1 (and test the range of output)# ann1 = Ann(n_i=n_i1, n_h=n_h1, n_o=n_o1) # Create this architecture self.assertAlmostEqual( ann1.h(x1), 0.5, delta=0.5) # Sigmoid always gives values between 0 and 1 self.assertAlmostEqual(ann1.h(x2), 0.5, delta=0.5) # Custom Thetas weights# M1 = np.matrix([[1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2]]) M2 = np.matrix([[1, 1, -1, 0.5, -1]]) ann1.Thetas[0] = M1 ann1.Thetas[1] = M2 # a^(1) Should be [0.786 0.786 0.786 0.786 1]^T# self.assertAlmostEqual(ann1.h(x1), 0.545, delta=0.001) # a^(1) Should be [0.858 0.858 0.858 0.858 1]^T# self.assertAlmostEqual(ann1.h(x2), 0.571, delta=0.001)
def pso_optimization(generate_validation_set, n_fold, x_train, y_train, x_valid, y_valid, x_test, y_test, initialization_type=pso.InitializationType.QUASI_RANDOM, use_local_search=False): print("\nReading dataset...") print("\n**** Dataset statistics *****") print("Training samples: " + str(len(x_train))) if generate_validation_set: print("Validation samples: " + str(len(x_valid))) print("Test samples: " + str(len(x_test))) if generate_validation_set: particleFactory = ParticleAnnFactory(x_train, y_train, x_valid, y_valid, x_test, y_test) else: particleFactory = ParticleAnnKFoldFactory(x_train, y_train, x_test, y_test, n_fold) n = 2 # Problem dimension (hyperparameters to be tuned) # *** Setting PSO algorithm hyperparameters layers_bounds = (1, 10) neurons_bounds = (4, 384) pso_hyperparameters = pso.PSOHyperparameters(n) pso_hyperparameters.w_start = 0.9 pso_hyperparameters.w_end = 0.4 pso_hyperparameters.c1 = 0.5 pso_hyperparameters.c2 = 0.5 pso_hyperparameters.swarm_size = 10 pso_hyperparameters.num_generations = 10 pso_hyperparameters.max_velocity = [3, 32] pso_hyperparameters.initialization_type = initialization_type pso_hyperparameters.use_local_search = use_local_search logging.info("\n\n***** PSO Configuration ******") logging.info("w_start : " + str(pso_hyperparameters.w_start)) logging.info("w_end : " + str(pso_hyperparameters.w_end)) logging.info("c1 : " + str(pso_hyperparameters.c1)) logging.info("c2 : " + str(pso_hyperparameters.c2)) logging.info("swarm_size : " + str(pso_hyperparameters.swarm_size)) logging.info("num_generations : " + str(pso_hyperparameters.num_generations)) logging.info("max_velocity : " + str(pso_hyperparameters.max_velocity)) logging.info("bounds : " + str([layers_bounds, neurons_bounds])) logging.info("initialization type : " + str(pso_hyperparameters.initialization_type)) logging.info("use local search : " + str(pso_hyperparameters.use_local_search)) start = time.time() min_point, min_value = pso.get_minimum(particleFactory, n, [layers_bounds, neurons_bounds], pso_hyperparameters) end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) print("\nMinimum point: " + str(min_point)) print("Minimum value: " + str(min_value)) print("Execution time : " + ("{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) logging.info("\n\n***** Optimal configuration found by PSO ******") logging.info("N. hidden layers : " + str(min_point[0])) logging.info("N. neurons per layer : " + str(min_point[1])) logging.info("Accuracy on validation set : " + str(1 - min_value)) logging.info("Execution time : " + ( "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) # With the optimal structure found retrain the network and calculate accuracy on test set n_layers = int(min_point[0]) n_neurons = int(min_point[1]) if generate_validation_set: ann = Ann() ann.x_train_set = x_train ann.y_train_set = y_train ann.x_valid_set = x_valid ann.y_valid_set = y_valid ann.x_test_set = x_test ann.y_test_set = y_test ann.create_model(n_layers, n_neurons, len(ann.x_test_set[0]), len(ann.y_test_set[0])) else: ann = Ann() ann.x_train_set = x_train ann.y_train_set = keras.utils.to_categorical(y_train, 2) ann.x_test_set = x_test ann.y_test_set = y_test ann.create_model(n_layers, n_neurons, len(ann.x_test_set[0]), len(ann.y_test_set[0])) validation_split = 0.25 ann.train_model(validation_split) accuracy = ann.evaluate_model() print("\nAccuracy with " + str(n_layers) + " layers and " + str(n_neurons) + " neurons: " + str(accuracy)) logging.info("Accuracy on test set : " + str(accuracy))
def grid_search_optimization(generate_validation_set, n_fold, x_train, y_train, x_valid, y_valid, x_test, y_test): grid = [ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], # [4, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384]] [4, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384] ] print("\n**** Dataset statistics *****") print("Training samples: " + str(len(x_train))) if generate_validation_set: print("Validation samples: " + str(len(x_valid))) print("Test samples: " + str(len(x_test))) logging.info("***** Grid Search configuration *****") logging.info("Grid: " + str(grid)) start = time.time() if generate_validation_set: min_point, min_value = grid_search.grid_search(grid, x_train, y_train, x_valid, y_valid, x_test, y_test) else: min_point, min_value = grid_search.grid_search_k_fold( grid, x_train, y_train, x_test, y_test, n_fold) end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) print("\nMinimum point: " + str(min_point)) print("Minimum value: " + str(min_value)) print("Execution time : " + ("{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) logging.info("\n\n***** Optimal configuration found by Grid Search ******") logging.info("N. hidden layers : " + str(min_point[0])) logging.info("N. neurons per layer : " + str(min_point[1])) logging.info("Accuracy on validation set : " + str(min_value)) logging.info("Execution time : " + ( "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) # With the optimal structure found retrain the network and calculate accuracy on test set n_layers = int(min_point[0]) n_neurons = int(min_point[1]) if generate_validation_set: ann = Ann() ann.x_train_set = x_train ann.y_train_set = y_train ann.x_valid_set = x_valid ann.y_valid_set = y_valid ann.x_test_set = x_test ann.y_test_set = y_test ann.create_model(n_layers, n_neurons, len(ann.x_test_set[0]), len(ann.y_test_set[0])) else: ann = Ann() ann.x_train_set = x_train ann.y_train_set = keras.utils.to_categorical(y_train, 2) ann.x_test_set = x_test ann.y_test_set = y_test ann.create_model(n_layers, n_neurons, len(ann.x_test_set[0]), len(ann.y_test_set[0])) validation_split = 0.25 ann.train_model(validation_split) accuracy = ann.evaluate_model() print("\nAccuracy with " + str(n_layers) + " layers and " + str(n_neurons) + " neurons: " + str(accuracy)) logging.info("Accuracy on test set : " + str(accuracy))
def quasi_random_optimization(generate_validation_set, n_fold, x_train, y_train, x_valid, y_valid, x_test, y_test): print("\n**** Dataset statistics *****") print("Training samples: " + str(len(x_train))) if generate_validation_set: print("Validation samples: " + str(len(x_valid))) print("Test samples: " + str(len(x_test))) n = 2 # Problem dimension (hyperparameters to be tuned) n_combinations = 10 layers_bounds = (1, 10) neurons_bounds = (4, 384) logging.info("\n\n***** Quasi-Random Search Configuration ******") logging.info("combinations : " + str(n_combinations)) logging.info("bounds : " + str([layers_bounds, neurons_bounds])) start = time.time() min_point, min_value = quasi_random_search.quasi_random_search( n_combinations, n, [layers_bounds, neurons_bounds], x_train, y_train, x_test, y_test, n_fold) end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) print("\nMinimum point: " + str(min_point)) print("Minimum value: " + str(min_value)) print("Execution time : " + ("{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) logging.info( "\n\n***** Optimal configuration found by Quasi-Random Search ******") logging.info("N. hidden layers : " + str(min_point[0])) logging.info("N. neurons per layer : " + str(min_point[1])) logging.info("Accuracy on validation set : " + str(1 - min_value)) logging.info("Execution time : " + ( "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds))) # With the optimal structure found retrain the network and calculate accuracy on test set n_layers = int(min_point[0]) n_neurons = int(min_point[1]) ann = Ann() ann.x_train_set = x_train ann.y_train_set = keras.utils.to_categorical(y_train, 2) ann.x_test_set = x_test ann.y_test_set = y_test ann.create_model(n_layers, n_neurons, len(ann.x_test_set[0]), len(ann.y_test_set[0])) validation_split = 0.25 ann.train_model(validation_split) accuracy = ann.evaluate_model() print("\nAccuracy with " + str(n_layers) + " layers and " + str(n_neurons) + " neurons: " + str(accuracy)) logging.info("Accuracy on test set : " + str(accuracy))
def test_9(self): # function 1 (XOR function) on 1 hidden layers arrs = [] arrs.append([0, 0]) arrs.append([0, 1]) arrs.append([1, 0]) arrs.append([1, 1]) labels = [] labels.append('false') labels.append('true') labels.append('true') labels.append('false') ann = Ann(arrs, labels, n_h=1) # Train and save model model = ann.train()[0][ 0] # Take the first model from the list of models in the tuple ann.validate_train() # Check to see if train_accuracy is over 90% self.assertTrue(ann.train_accuracy() > 0.9) # Load the trained model into a new neural network ann_from_model = Ann(model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_model.h_by_class(x), 'false') # function 2 on 2 hidden layers arrs2 = [] arrs2.append([1, 1]) arrs2.append([2, 2]) arrs2.append([1, 3]) arrs2.append([2, 10]) arrs2.append([1, -1]) arrs2.append([-2, -2]) arrs2.append([1, -3]) arrs2.append([-2, -10]) labels2 = [] labels2.append('false') labels2.append('false') labels2.append('false') labels2.append('false') labels2.append('true') labels2.append('true') labels2.append('true') labels2.append('true') ann = Ann(arrs2, labels2, n_h=2) model2 = ann.train()[0][0] ann.validate_train() # Load the second model ann_from_model = Ann(model2) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs2[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs2[len(arrs2) - 1]), 'true') x = [1, -5] self.assertEqual(ann_from_model.h_by_class(x), 'true') # Load the first model again ann_from_model = Ann(model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_model.h_by_class(x), 'false') # Try pickling our model into a sister folder model_name = model.name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(model, open(path_to_file, 'wb')) # Try unpickling our model unpickled_model = pickle.load(open(path_to_file, 'rb')) # Load unpickled model and test ann_from_pickle = Ann(unpickled_model) # Evaluate some vectors using this neural network initialized only with a model self.assertEqual(ann_from_pickle.h_by_class(arrs[0]), 'false') self.assertEqual(ann_from_pickle.h_by_class(arrs[1]), 'true') x = [1.1, 0.9] self.assertEqual(ann_from_pickle.h_by_class(x), 'false')
def test_8(self): # First test# # 1 hidden layer cost test with regularization# x1 = [1, 2, 3, 4] # Array as first example y1 = 'yes' arrs = [] labels = [] arrs.append(x1) labels.append(y1) ann1 = Ann(arrs, labels, n_h=1) # Create this architecture # Custom Thetas weights# M1 = np.matrix([[1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2]]) M2 = np.matrix([[1, 1, -1, 0.5, -1]]) ann1.Thetas[0] = M1 ann1.Thetas[1] = M2 cost_0 = ann1.cost() # lam equals 0 cost_1 = ann1.cost(lam=1) # lam equals 1 self.assertTrue( cost_1 > cost_0 ) # Cost with regularization penalty is always higher than without regularization # Gradient checking (now with regularization)# # Medium size data-set with several train_examples lam_test = 1 # Regularization parameter arrs = [] labels = [] classes = ('cat', 'dog') for m in range(0, 100): arr = [random.random() for x in range(0, 40)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch( lam=lam_test, batch_size=1) # Use full-batch for gradient descent T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient a = random.sample(range(0, shape_J[0]), 2) b = random.sample(range(0, shape_J[1]), 2) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost(lam=lam_test) # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost( lam=lam_test) # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps ) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation # print(P, '\t', J_ij, '\t', abs(P - J_ij), (l, i, j)) # if (P < 0 and J_ij > 0 or P > 0 and J_ij < 0): # self.fail() self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def test_4(self): # Gradient checking (check that a numerical approximation of the gradient is (almost) equal to our backpropagation derivation)# # First data-set with one example arrs = [] labels = [] arrs.append([1, 2, 4, 5, 5, 5]) labels.append('cat') ann = Ann(arrs, labels, n_h=10) # Create Ann with these train_examples and labels J = ann.backward(ann.train_examples[0].arr, ann.train_examples[0].y) T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient for i in range(0, shape_J[0]): for j in range(0, shape_J[1]): T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation # print(P, '\t', J_ij, '\t', abs(P - J_ij), (l, i, j)) # if (P < 0 and J_ij > 0 or P > 0 and J_ij < 0): # self.fail() self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original) # Second data-set with several train_examples arrs = [] labels = [] classes = ('cat', 'dog') for m in range(0, 100): arr = [random.random() for x in range(0, 20)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch() T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient a = random.sample(range(0, shape_J[0]), 2) b = random.sample(range(0, shape_J[1]), 2) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def demo_helper(): init_logger('debug') print('\t** Learn the AND function using 0 hidden layers (logistic regression) **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('true')) num_hidden_layers = 0 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() == 1): print('\t** The AND function was learned correctly using 0 hidden layers **\n') else: print('\t** ERROR (when learning the AND function using 0 hidden layers **\n') print('\t** Learn the AND function using 1 hidden layer **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('true')) num_hidden_layers = 1 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() == 1): print('\t** The AND function was learned correctly using 1 hidden layers **\n') else: print('\t** ERROR (when learning the AND function using 1 hidden layers **\n') print('\t** Learn the XOR function using 0 hidden layers (logistic regression) **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('false')) num_hidden_layers = 0 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() != 1): print('\t** The XOR function was not learned correctly (as expected) because logistic regression (0 hidden layers) \n' + 'cannot create a boundary through a non-linearly separable data-set (which the XOR function is)**\n') else: print('\t** ERROR (when learning the XOR function using 0 hidden layers **\n') print('\t** Learn the XOR function using 1 hidden layer **') arrs = [] labels = [] (arrs.append([0, 0]), labels.append('false')) (arrs.append([0, 1]), labels.append('true')) (arrs.append([1, 0]), labels.append('true')) (arrs.append([1, 1]), labels.append('false')) num_hidden_layers = 1 ann = Ann(arrs, labels, n_h=num_hidden_layers) ann.train() if (ann.validate_train() == 1): print('\t** The XOR function was learned correctly using 1 hidden layers **\n') else: print('\t** ERROR (when learning the XOR function using 1 hidden layers **\n')
def test_4(self): # Gradient checking (check that a numerical approximation of the gradient is (almost) equal to our backpropagation derivation)# # First data-set with one example arrs = [] labels = [] arrs.append([1, 2, 4, 5, 5, 5]) labels.append('cat') ann = Ann(arrs, labels, n_h=10) # Create Ann with these train_examples and labels J = ann.backward(ann.train_examples[0].arr, ann.train_examples[0].y) T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient for i in range(0, shape_J[0]): for j in range(0, shape_J[1]): T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps ) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation # print(P, '\t', J_ij, '\t', abs(P - J_ij), (l, i, j)) # if (P < 0 and J_ij > 0 or P > 0 and J_ij < 0): # self.fail() self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original) # Second data-set with several train_examples arrs = [] labels = [] classes = ('cat', 'dog') for m in range(0, 100): arr = [random.random() for x in range(0, 20)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch() T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient a = random.sample(range(0, shape_J[0]), 2) b = random.sample(range(0, shape_J[1]), 2) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost() # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost() # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps ) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def test_8(self): # First test# # 1 hidden layer cost test with regularization# x1 = [1, 2, 3, 4] # Array as first example y1 = 'yes' arrs = [] labels = [] arrs.append(x1) labels.append(y1) ann1 = Ann(arrs, labels, n_h=1) # Create this architecture # Custom Thetas weights# M1 = np.matrix([[1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2], [1, -1, 0.5, -0.3, 2]]) M2 = np.matrix([[1, 1, -1, 0.5, -1]]) ann1.Thetas[0] = M1 ann1.Thetas[1] = M2 cost_0 = ann1.cost() # lam equals 0 cost_1 = ann1.cost(lam=1) # lam equals 1 self.assertTrue(cost_1 > cost_0) # Cost with regularization penalty is always higher than without regularization # Gradient checking (now with regularization)# # Medium size data-set with several train_examples lam_test = 1 # Regularization parameter arrs = [] labels = [] classes = ('cat', 'dog') for m in range(0, 100): arr = [random.random() for x in range(0, 40)] label = classes[random.random() > 0.5] arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) # Create Ann with these train_examples and labels # L-1 matrices of partial derivatives for first example J = ann.backward_batch(lam=lam_test, batch_size=1) # Use full-batch for gradient descent T_original = copy.deepcopy(ann.Thetas) for l in range(0, ann.L - 1): shape_J = J[l].shape eps = 0.0001 # epsilon for a numerical approximation of the gradient a = random.sample(range(0, shape_J[0]), 2) b = random.sample(range(0, shape_J[1]), 2) for i in a: for j in b: T_e = np.zeros(shape_J) # Matrix of zeros T_e[i][j] = eps ann.Thetas[l] = T_original[l] + T_e cost_e = ann.cost(lam=lam_test) # Cost at Theta + eps ann.Thetas[l] = T_original[l] - T_e cost_minus_e = ann.cost(lam=lam_test) # Cost at Theta - eps P = (cost_e - cost_minus_e) / (2 * eps) # Numerical approximation J_ij = J[l].item(i, j) # Backpropagation derivation # print(P, '\t', J_ij, '\t', abs(P - J_ij), (l, i, j)) # if (P < 0 and J_ij > 0 or P > 0 and J_ij < 0): # self.fail() self.assertAlmostEqual(P, J_ij, delta=0.001) ann.Thetas = copy.deepcopy(T_original)
def main(): # An array of all text files dir = '../library/books/' # Using pickle so I don't keep re-reading these books print('\n\nReading books..') books = [] if (os.path.exists(dir + '../my_books')): books = pickle.load(open(dir + '../my_books', 'rb')) else: # Just use the first 10 books file_names = [name for name in os.listdir(dir)][0:10] for file_name in file_names: m = re.search('(.*?)_(.*?)\.txt', file_name) # Get the author from the text file name author = re.sub(r'([A-Z])', r' \1', m.group(1)).strip() # Get the title from the text file name title = m.group(2).strip() f = codecs.open('../library/books/' + file_name, 'r', encoding='utf-8', errors='ignore') # print(author + ' ' + title) lines = f.readlines() book = Book(author, title, lines) books.append(book) pickle.dump(books, open(dir + '../my_books', 'wb')) for book in books: print(book.title + ' by ' + book.author + '\t\t has ' + str(len(book.sentences)) + ' sentences.') n = 2 # The size of our n-grams (we choose to use bi-grams) print('\n\nMaking a vocabulary of n-grams...') # Using pickle so I don't keep re-making a vocabulary n_gram_vocab = [] if (os.path.exists(dir + '../my_n_grams')): n_gram_vocab = pickle.load(open(dir + '../my_n_grams', 'rb')) else: n_gram_vocab = {} # Treated as a set (faster 'in' operation than list) for book in books: # print(book.author + ' ' + book.title) # print(len(n_gram_vocab)) n_gram_vocab = add_to_n_gram_vocab(n_gram_vocab, book.sentences, n=n) # n_gram_vocab = OrderedDict(n_gram_vocab) # Convert to an ordered list n_gram_vocab = list(n_gram_vocab.keys()) # Convert to an ordered list pickle.dump(n_gram_vocab, open(dir + '../my_n_grams', 'wb')) print('There are ' + str(len(n_gram_vocab)) + ' n-grams of size ' + str(n)) print('\n\nBuilding a labeled data-set...') # We will do our training and testing on samples where a sample is a 5 sentence continuous text # Chunks are further broken down into a train and test sets by Ann # We look for the book with the smallest number of sentences and then get 50% of all of its 5-sentence chunks # For every other book, we randomly sample the same number of chunks (all labels have the same number of data points) arrs = [] # Holds vectorial representation of our 5-sentence chunks labels = [] # Holds the corresponding labels (author + title) of our chunks chunk_length = 5 percentage = 0.5 # Get minimum number of sentences across all our books min_num_sentences = -1 for book in books: if (len(book.sentences) < min_num_sentences or min_num_sentences == -1): min_num_sentences = len(book.sentences) for book in books: # We can't start a chunk at the last 4 sentences num_chunks = min_num_sentences - chunk_length + 1 this_num_sentences = len(book.sentences) - chunk_length + 1 num_samples = int(math.floor(num_chunks * percentage)) # Randomly pick 50% of all 5-sentence chunks samples = random.sample(range(0, this_num_sentences), num_samples) label = book.title + ' by ' + book.author print(label) # Convert our sampled 5-sentence chunks into vectors for sample in samples: # print(sample) # Take some 5-sentence chunk chunk = book.sentences[sample:sample + chunk_length + 1] chunk = ''.join(str(elem + ' ') for elem in chunk) v = sen_2_vec(chunk, n_gram_vocab, n=n) arrs.append(v) labels.append(label) print('\n\nTraining logistic regression classifier using Ann...') ann = Ann(arrs, labels, n_h=0) # n_h=0 means we are using 0 hidden layers ann.train(lam=100) print('\n\nFinding the top 5 most distinguishing bi-grams...') for k in range(0, len(books)): # Number of classes v = ann.Thetas[0][k, :].tolist()[0] s = sorted((e, i) for i, e in enumerate(v)) s.reverse() print(books[k].title + ' by ' + books[k].author) for i in range(0, 5): print(n_gram_vocab[s[i][1]])
for x in range(0, 2) ] label = 'yes' # Note: this is artificially misclassified arrs.append(arr) labels.append(label) ann = Ann(arrs, labels, n_h=2) (models, test_accuracies, test_costs) = ann.train() best_test_accuracy = 0 best_i = -1 for i in range(0, len(test_accuracies)): if (test_accuracies[i] > best_test_accuracy): best_test_accuracy = test_accuracies[i] best_i = i if (best_i > -1): model_name = models[i].name directory = '../Ann-models' path_to_file = directory + '/' + model_name if not os.path.exists(directory): os.makedirs(directory) pickle.dump(models[i], open(path_to_file, 'wb')) else: logger.error('Error!') if __name__ == "__main__": Ann.init_logger('debug') unittest.main()
def test_1(self): # Test for Ann Architecture# # First architecture test# n_i1 = 4 # Number of input neurons n_h1 = 2 # Number of hidden layers n_o1 = 1 # Number of output neurons ann1 = Ann(n_i=4, n_h=2, n_o=1) # Create this architecture self.assertEqual(n_i1, ann1.n_i) self.assertEqual(n_h1, ann1.n_h) self.assertEqual(n_o1, ann1.n_o) self.assertEqual(ann1.s, [5, 5, 5, 2]) self.assertEqual(len(ann1.Thetas), 3) self.assertEqual(ann1.Thetas[0].shape, (4, 5)) self.assertEqual(ann1.Thetas[1].shape, (4, 5)) self.assertEqual(ann1.Thetas[2].shape, (1, 5)) # Second architecture test# n_i2 = 10 # Number of input neurons n_h2 = 1 # Number of hidden layers n_o2 = 2 # Number of output neurons ann2 = Ann(n_i=n_i2, n_h=n_h2, n_o=n_o2) # Create this architecture self.assertEqual(n_i2, ann2.n_i) self.assertEqual(n_h2, ann2.n_h) self.assertEqual(n_o2, ann2.n_o) self.assertEqual(ann2.s, [11, 11, 3]) self.assertEqual(len(ann2.Thetas), 2) self.assertEqual(ann2.Thetas[0].shape, (10, 11)) self.assertEqual(ann2.Thetas[1].shape, (2, 11)) # Third architecture test# n_i3 = 100 # Number of input neurons n_h3 = 0 # Number of hidden layers n_o3 = 10 # Number of output neurons ann3 = Ann(n_i=n_i3, n_h=n_h3, n_o=n_o3) # Create this architecture self.assertEqual(n_i3, ann3.n_i) self.assertEqual(n_h3, ann3.n_h) self.assertEqual(n_o3, ann3.n_o) self.assertEqual(ann3.s, [101, 11]) self.assertEqual(len(ann3.Thetas), 1) self.assertEqual(ann3.Thetas[0].shape, (10, 101)) n_i4 = 1500 # Number of input neurons n_h4 = 3 # Number of hidden layers n_o4 = 6 # Number of output neurons # Fourth architecture test# ann4 = Ann(n_i=n_i4, n_h=n_h4, n_o=n_o4) # Create this architecture self.assertEqual(n_i4, ann4.n_i) self.assertEqual(n_h4, ann4.n_h) self.assertEqual(n_o4, ann4.n_o) self.assertEqual(ann4.s, [1501, 31 + 1, 31 + 1, 31 + 1, 6 + 1]) self.assertEqual(len(ann4.Thetas), 4) self.assertEqual(ann4.Thetas[0].shape, (31, 1501)) self.assertEqual(ann4.Thetas[1].shape, (31, 32)) self.assertEqual(ann4.Thetas[2].shape, (31, 32)) self.assertEqual(ann4.Thetas[3].shape, (6, 32)) # Fourth (arbitrary) architecture test# s = [3, 2] n_i = 4 n_h = len(s) n_o = 2 ann1 = Ann(s=s, n_i=n_i, n_h=n_h, n_o=n_o) # Create this architecture self.assertEqual(n_i, ann1.n_i) self.assertEqual(n_h, ann1.n_h) self.assertEqual(n_o, ann1.n_o) self.assertEqual(ann1.s, [5, 3, 2, 3]) self.assertEqual(len(ann1.Thetas), 3) self.assertEqual(ann1.Thetas[0].shape, (2, 5)) self.assertEqual(ann1.Thetas[1].shape, (1, 3)) self.assertEqual(ann1.Thetas[2].shape, (2, 2))