Пример #1
0
    def verify_gradient(self, train, target, k):
        for i in list(range(k)):
            x = train[i]
            y = target[i]

            fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y)
            bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y, self.m)
            L = fprop_r['loss']

            grad_w2_diff = check_grad_w2(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon)
            grad_w1_diff = check_grad_w1(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon)
            grad_b2_diff = check_grad_b2(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon)
            grad_b1_diff = check_grad_b1(L, self.W1, self.W2, self.b1, self.b2, x, y, self.epsilon)

            grad_ratio_b1 = (bprop_r['grad_b1'] + self.epsilon) / (grad_b1_diff + self.epsilon)
            grad_ratio_w1 = (bprop_r['grad_w1'] + self.epsilon) / (grad_w1_diff + self.epsilon)
            grad_ratio_b2 = (bprop_r['grad_b2'] + self.epsilon) / (grad_b2_diff + self.epsilon)
            grad_ratio_w2 = (bprop_r['grad_w2'] + self.epsilon) / (grad_w2_diff + self.epsilon)

            def check_grad_ratio(ratio):
                return (ratio > 0.99).all() and (ratio < 1.01).all()

            if check_grad_ratio(grad_ratio_b2) and check_grad_ratio(grad_ratio_w2) and check_grad_ratio(
                    grad_ratio_b1) and check_grad_ratio(grad_ratio_w1):
                print('Gradient verified for element {0} ✓'.format(i))
            else:
                print('Gradient error for element {0} X'.format(i))
Пример #2
0
    def train(self,
              train,
              target,
              lamdas,
              learning_rate,
              k=None,
              iterations=100,
              valid=None,
              valid_target=None,
              test=None,
              test_target=None):
        t = time.process_time()
        self.total_grad = 0
        cursor = 0
        axis = 1

        if k is None:
            batch_size = train.shape[0]
        else:
            batch_size = k

        for _ in range(iterations):
            x = np.roll(train, -1 * cursor * batch_size, axis=0)[:batch_size]
            y = np.roll(target, -1 * cursor * batch_size, axis=0)[:batch_size]

            fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y)
            bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y,
                            self.m)

            self.total_grad += np.sum(bprop_r['grad_oa'])

            regularization = lamdas[0, 0] * self.W1.sum() + \
                             lamdas[0, 1] * np.square(self.W1).sum() + \
                             lamdas[1, 0] * self.W2.sum() + \
                             lamdas[1, 1] * np.square(self.W2).sum()

            self.W1 -= (learning_rate * (bprop_r['grad_w1'] + regularization))
            self.W2 -= (learning_rate * (bprop_r['grad_w2'] + regularization))

            self.b1 -= np.sum((learning_rate * bprop_r['grad_b1']), axis=axis)
            self.b2 -= np.sum((learning_rate * bprop_r['grad_b2']), axis=axis)

            cursor += 1
            if cursor * batch_size >= train.shape[0]:
                if self.show_epoch:
                    elapsed_time = time.process_time() - t
                    print('1 epoch time: ~{0} s'.format(elapsed_time))

                if self.save_datapoints:
                    self.calculate_and_show_errors(train, target, valid,
                                                   valid_target, test,
                                                   test_target)

                cursor = 0

        if self.save_datapoints:
            f = open('datapoints.json', 'w+')
            f.write(json.dumps(self.data_points))
            f.close()
Пример #3
0
    def train(self, train, target, lamdas, learning_rate, k=None, iterations=100, valid=None, valid_target=None, test=None, test_target=None):
        cursor = 0
        self.total_grad = 0
        t = time.process_time()

        if k is None:
            batch_size = train.shape[0]
        else:
            batch_size = k

        for _ in range(iterations):
            total_grad_w1 = 0
            total_grad_w2 = 0
            total_grad_b1 = 0
            total_grad_b2 = 0
            total_grad_oa = 0

            for _ in range(batch_size):
                x = train[cursor]
                y = target[cursor]

                fprop_r = fprop(self.W1, self.W2, self.b1, self.b2, x, y)
                bprop_r = bprop(fprop_r, self.W1, self.W2, self.b1, self.b2, x, y, self.m)

                self.total_grad += np.sum(bprop_r['grad_oa'])
                total_grad_w1 += bprop_r['grad_w1']
                total_grad_w2 += bprop_r['grad_w2']
                total_grad_b1 += bprop_r['grad_b1']
                total_grad_b2 += bprop_r['grad_b2']
                total_grad_oa += bprop_r['grad_oa']

                cursor += 1
                if cursor >= train.shape[0]:
                    if self.show_epoch:
                        elapsed_time = time.process_time() - t
                        print('1 epoch time: ~{0} s'.format(elapsed_time))

                    if self.save_datapoints:
                        self.calculate_and_show_errors(train, target, valid, valid_target, test, test_target)

                cursor = (cursor%train.shape[0])

            self.total_grad += np.sum(total_grad_oa)

            regularization = lamdas[0, 0] * self.W1.sum() + \
                         lamdas[0, 1] * np.square(self.W1).sum() + \
                         lamdas[1, 0] * self.W2.sum() + \
                         lamdas[1, 1] * np.square(self.W2).sum()

            self.W1 -= (learning_rate * (total_grad_w1 + regularization))
            self.W2 -= (learning_rate * (total_grad_w2 + regularization))

            self.b1 -= np.sum((learning_rate * total_grad_b1), axis=1)
            self.b2 -= np.sum((learning_rate * total_grad_b2), axis=1)

            if self.save_datapoints:
                f = open('datapoints.json', 'w+')
                f.write(json.dumps(self.data_points))
                f.close()
Пример #4
0
    def calculate_and_show_errors(self, train, train_target, valid, valid_target, test, test_target):
        if self.save_datapoints:
            # Calcule du taux d'erreur train
            fprop_train = fprop(self.W1, self.W2, self.b1, self.b2, train, train_target)

            self.data_points['train_loss'].append(np.sum(fprop_train['loss']))
            self.data_points['train_error'].append(self.calculate_error(fprop_train['os'], train_target))

            # Calcule du taux d'erreur valid
            fprop_valid = fprop(self.W1, self.W2, self.b1, self.b2, valid, valid_target)

            self.data_points['valid_loss'].append(np.sum(fprop_valid['loss']))
            self.data_points['valid_error'].append(self.calculate_error(fprop_valid['os'], valid_target))

            # Calcule du taux d'erreur test
            fprop_test = fprop(self.W1, self.W2, self.b1, self.b2, test, test_target)

            self.data_points['test_loss'].append(np.sum(fprop_test['loss']))
            self.data_points['test_error'].append(self.calculate_error(fprop_test['os'], test_target))
Пример #5
0
    def show_decision_regions(self, train_data, title='region de décision'):
        def combine(*seqin):
            '''returns a list of all combinations of argument sequences.
            for example: combine((1,2),(3,4)) returns
            [[1, 3], [1, 4], [2, 3], [2, 4]]'''

            def rloop(seqin, listout, comb):
                '''recursive looping function'''
                if seqin:  # any more sequences to process?
                    for item in seqin[0]:
                        newcomb = comb + [item]  # add next item to current comb
                        # call rloop w/ rem seqs, newcomb
                        rloop(seqin[1:], listout, newcomb)
                else:  # processing last sequence
                    listout.append(comb)  # comb finished, add to list

            listout = []  # listout initialization
            rloop(seqin, listout, [])  # start recursive process
            return listout

        d1 = train_data[train_data[:, -1].getA1() > 0]
        d2 = train_data[train_data[:, -1].getA1() == 0]

        plt.figure()

        plt.scatter(d1[:, 0], d1[:, 1], c='b', label='classe 1')
        plt.scatter(d2[:, 0], d2[:, 1], c='g', label='classe 0')

        xgrid = np.linspace(np.min(train_data[:, 0]) - 0.5,
                            np.max(train_data[:, 0]) + 0.5,
                            100)
        ygrid = np.linspace(np.min(train_data[:, 1]) - 0.5,
                            np.max(train_data[:, 1]) + 0.5,
                            100)

        # calcule le produit cartesien entre deux listes
        # et met les resultats dans un array
        thegrid = np.matrix(combine(xgrid, ygrid))

        classesPred = []
        for x in thegrid:
            os = fprop(self.W1, self.W2, self.b1, self.b2, x)['os']
            classesPred.append(np.argmax(os, axis=0) + 1)

        pylab.pcolormesh(xgrid, ygrid, np.array(classesPred).reshape((100, 100)).T, alpha=.3)
        pylab.pcolormesh(xgrid, ygrid, np.array(classesPred).reshape((100, 100)).T, alpha=.3)
        plt.xlim(np.min(train_data[:, 0]) - 0.5, np.max(train_data[:, 0]) + 0.5)
        plt.ylim(np.min(train_data[:, 1]) - 0.5, np.max(train_data[:, 1]) + 0.5)
        plt.grid()
        plt.legend(loc='lower right')
        plt.title(title)
Пример #6
0
def check_grad_b1(L, W1, W2, b1, b2, x, y, epsilon):
    grad_diff = np.zeros(b1.shape)

    for i in list(range(b1.shape[0])):
        b1[i, 0] += epsilon

        fprop_r_diff = fprop(W1, W2, b1, b2, x, y)

        L_prime = np.sum(fprop_r_diff['loss'])

        b1[i, 0] -= epsilon

        grad_diff[i, 0] = (L_prime - L) / epsilon

    return grad_diff
Пример #7
0
def check_grad_w2(L, W1, W2, b1, b2, x, y, epsilon):
    grad_diff = np.zeros(W2.shape)

    for i in list(range(W2.shape[0])):
        for j in list(range(W2.shape[1])):
            W2[i, j] += epsilon

            fprop_r_diff = fprop(W1, W2, b1, b2, x, y)

            L_prime = fprop_r_diff['loss']

            W2[i, j] -= epsilon

            grad_diff[i, j] = (L_prime - L) / epsilon

    return grad_diff