def verify_gradient(self, train, target, k): for i in list(range(k)): x = train[i] y = target[i] fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y) bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y, self.m) L = fprop_r['loss'] grad_w2_diff = check_grad_w2(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon) grad_w1_diff = check_grad_w1(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon) grad_b2_diff = check_grad_b2(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon) grad_b1_diff = check_grad_b1(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon) grad_ratio_b1 = (bprop_r['grad_b1'] + self.epsilon) / (grad_b1_diff + self.epsilon) grad_ratio_w1 = (bprop_r['grad_w1'] + self.epsilon) / (grad_w1_diff + self.epsilon) grad_ratio_b2 = (bprop_r['grad_b2'] + self.epsilon) / (grad_b2_diff + self.epsilon) grad_ratio_w2 = (bprop_r['grad_w2'] + self.epsilon) / (grad_w2_diff + self.epsilon) def check_grad_ratio(ratio): return (ratio > 0.99).all() and (ratio < 1.01).all() if check_grad_ratio(grad_ratio_b2) and check_grad_ratio(grad_ratio_w2) and check_grad_ratio( grad_ratio_b1) and check_grad_ratio(grad_ratio_w1): print('Gradient verified for element {0} ✓'.format(i)) else: print('Gradient error for element {0} X'.format(i))
def train(self, train, target, lamdas, learning_rate, k=None, iterations=100, valid=None, valid_target=None, test=None, test_target=None): t = time.process_time() self.total_grad = 0 cursor = 0 axis = 1 if k is None: batch_size = train.shape[0] else: batch_size = k for _ in range(iterations): x = np.roll(train, -1 * cursor * batch_size, axis=0)[:batch_size] y = np.roll(target, -1 * cursor * batch_size, axis=0)[:batch_size] fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y) bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y, self.m) self.total_grad += np.sum(bprop_r['grad_oa']) regularization = lamdas[0, 0] * self.W1.sum() + \ lamdas[0, 1] * np.square(self.W1).sum() + \ lamdas[1, 0] * self.W2.sum() + \ lamdas[1, 1] * np.square(self.W2).sum() self.W1 -= (learning_rate * (bprop_r['grad_w1'] + regularization)) self.W2 -= (learning_rate * (bprop_r['grad_w2'] + regularization)) self.b1 -= np.sum((learning_rate * bprop_r['grad_b1']), axis=axis) self.b2 -= np.sum((learning_rate * bprop_r['grad_b2']), axis=axis) cursor += 1 if cursor * batch_size >= train.shape[0]: if self.show_epoch: elapsed_time = time.process_time() - t print('1 epoch time: ~{0} s'.format(elapsed_time)) if self.save_datapoints: self.calculate_and_show_errors(train, target, valid, valid_target, test, test_target) cursor = 0 if self.save_datapoints: f = open('datapoints.json', 'w+') f.write(json.dumps(self.data_points)) f.close()
def train(self, train, target, lamdas, learning_rate, k=None, iterations=100, valid=None, valid_target=None, test=None, test_target=None): cursor = 0 self.total_grad = 0 t = time.process_time() if k is None: batch_size = train.shape[0] else: batch_size = k for _ in range(iterations): total_grad_w1 = 0 total_grad_w2 = 0 total_grad_b1 = 0 total_grad_b2 = 0 total_grad_oa = 0 for _ in range(batch_size): x = train[cursor] y = target[cursor] fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y) bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y, self.m) self.total_grad += np.sum(bprop_r['grad_oa']) total_grad_w1 += bprop_r['grad_w1'] total_grad_w2 += bprop_r['grad_w2'] total_grad_b1 += bprop_r['grad_b1'] total_grad_b2 += bprop_r['grad_b2'] total_grad_oa += bprop_r['grad_oa'] cursor += 1 if cursor >= train.shape[0]: if self.show_epoch: elapsed_time = time.process_time() - t print('1 epoch time: ~{0} s'.format(elapsed_time)) if self.save_datapoints: self.calculate_and_show_errors(train, target, valid, valid_target, test, test_target) cursor = (cursor%train.shape[0]) self.total_grad += np.sum(total_grad_oa) regularization = lamdas[0, 0] * self.W1.sum() + \ lamdas[0, 1] * np.square(self.W1).sum() + \ lamdas[1, 0] * self.W2.sum() + \ lamdas[1, 1] * np.square(self.W2).sum() self.W1 -= (learning_rate * (total_grad_w1 + regularization)) self.W2 -= (learning_rate * (total_grad_w2 + regularization)) self.b1 -= np.sum((learning_rate * total_grad_b1), axis=1) self.b2 -= np.sum((learning_rate * total_grad_b2), axis=1) if self.save_datapoints: f = open('datapoints.json', 'w+') f.write(json.dumps(self.data_points)) f.close()
def calculate_and_show_errors(self, train, train_target, valid, valid_target, test, test_target): if self.save_datapoints: # Calcule du taux d'erreur train fprop_train = fprop(self.W1, self.W2, self.b1, self.b2, train, train_target) self.data_points['train_loss'].append(np.sum(fprop_train['loss'])) self.data_points['train_error'].append(self.calculate_error(fprop_train['os'], train_target)) # Calcule du taux d'erreur valid fprop_valid = fprop(self.W1, self.W2, self.b1, self.b2, valid, valid_target) self.data_points['valid_loss'].append(np.sum(fprop_valid['loss'])) self.data_points['valid_error'].append(self.calculate_error(fprop_valid['os'], valid_target)) # Calcule du taux d'erreur test fprop_test = fprop(self.W1, self.W2, self.b1, self.b2, test, test_target) self.data_points['test_loss'].append(np.sum(fprop_test['loss'])) self.data_points['test_error'].append(self.calculate_error(fprop_test['os'], test_target))
def show_decision_regions(self, train_data, title='region de décision'): def combine(*seqin): '''returns a list of all combinations of argument sequences. for example: combine((1,2),(3,4)) returns [[1, 3], [1, 4], [2, 3], [2, 4]]''' def rloop(seqin, listout, comb): '''recursive looping function''' if seqin: # any more sequences to process? for item in seqin[0]: newcomb = comb + [item] # add next item to current comb # call rloop w/ rem seqs, newcomb rloop(seqin[1:], listout, newcomb) else: # processing last sequence listout.append(comb) # comb finished, add to list listout = [] # listout initialization rloop(seqin, listout, []) # start recursive process return listout d1 = train_data[train_data[:, -1].getA1() > 0] d2 = train_data[train_data[:, -1].getA1() == 0] plt.figure() plt.scatter(d1[:, 0], d1[:, 1], c='b', label='classe 1') plt.scatter(d2[:, 0], d2[:, 1], c='g', label='classe 0') xgrid = np.linspace(np.min(train_data[:, 0]) - 0.5, np.max(train_data[:, 0]) + 0.5, 100) ygrid = np.linspace(np.min(train_data[:, 1]) - 0.5, np.max(train_data[:, 1]) + 0.5, 100) # calcule le produit cartesien entre deux listes # et met les resultats dans un array thegrid = np.matrix(combine(xgrid, ygrid)) classesPred = [] for x in thegrid: os = fprop(self.W1, self.W2, self.b1, self.b2, x)['os'] classesPred.append(np.argmax(os, axis=0) + 1) pylab.pcolormesh(xgrid, ygrid, np.array(classesPred).reshape((100, 100)).T, alpha=.3) pylab.pcolormesh(xgrid, ygrid, np.array(classesPred).reshape((100, 100)).T, alpha=.3) plt.xlim(np.min(train_data[:, 0]) - 0.5, np.max(train_data[:, 0]) + 0.5) plt.ylim(np.min(train_data[:, 1]) - 0.5, np.max(train_data[:, 1]) + 0.5) plt.grid() plt.legend(loc='lower right') plt.title(title)
def check_grad_b1(L, W1, W2, b1, b2, x, y, epsilon): grad_diff = np.zeros(b1.shape) for i in list(range(b1.shape[0])): b1[i, 0] += epsilon fprop_r_diff = fprop(W1, W2, b1, b2, x, y) L_prime = np.sum(fprop_r_diff['loss']) b1[i, 0] -= epsilon grad_diff[i, 0] = (L_prime - L) / epsilon return grad_diff
def check_grad_w2(L, W1, W2, b1, b2, x, y, epsilon): grad_diff = np.zeros(W2.shape) for i in list(range(W2.shape[0])): for j in list(range(W2.shape[1])): W2[i, j] += epsilon fprop_r_diff = fprop(W1, W2, b1, b2, x, y) L_prime = fprop_r_diff['loss'] W2[i, j] -= epsilon grad_diff[i, j] = (L_prime - L) / epsilon return grad_diff