def fit(self,
            X,
            Y,
            learning_rate=10e-6,
            regularisation=10e-1,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)

        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Tvalid = y2indicator(Yvalid) # WE DONT NEED TVALID CAUSE WE ARE USING COST2
        X, Y = X[:-1000], Y[:-1000]
        # print("X.shape"+str(X.shape))
        # print("Y.shape"+str(Y.shape))
        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)  #Need this for gradient descent

        self.W1, self.b1 = init_weight_and_bias(D, self.M)
        self.W2, self.b2 = init_weight_and_bias(self.M, K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation
            pY, Z = self.forward(X)

            # gradient descent
            pY_T = pY - T
            self.W2 -= learning_rate * (Z.T.dot(pY_T) +
                                        regularisation * self.W2)
            self.b2 -= learning_rate * (
                (pY_T).sum(axis=0) + regularisation * self.b2)

            # dZ = pY_T.dot(self.W2.T) * (Z>0) #Relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # Tanh
            self.W1 -= learning_rate * (X.T.dot(dZ) + regularisation * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) +
                                        regularisation * self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i : " + str(i) + "; Cost : " + str(c) + "; Error : " +
                      str(e))
                if e < best_validation_error:
                    best_validation_error = e

        print("Best Validation error : " + str(best_validation_error))

        if (show_fig):
            plt.plot(costs)
            plt.show()
Exemplo n.º 2
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=1e-6,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)
        valid_X = X[-1000:]
        valid_Y = Y[-1000:]
        train_X = X[:-1000]
        train_Y = Y[:-1000]

        T_train = indicator(train_Y)
        T_valid = indicator(valid_Y)

        N, D = train_X.shape
        K = len(set(train_Y))

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b1 = np.zeros(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_error_rate = 1

        for i in range(epochs):
            pY, Z = self.forward(train_X)

            self.W2 -= learning_rate * (Z.T.dot(pY - T_train) + reg * self.W2)
            self.b2 -= learning_rate * (
                (pY - T_train).sum(axis=0) + reg * self.b2)

            dz = (pY - T_train).dot(self.W2.T) * (1 - Z * Z)
            self.W1 -= learning_rate * (train_X.T.dot(dz) + reg * self.W1)
            self.b1 -= learning_rate * (dz.sum(axis=0) + reg * self.b1)

            if i % 10 == 0:
                pY_valid, Z_valid = self.forward(valid_X)
                c = cost2(valid_Y, pY_valid)
                e = error_rate(valid_Y, np.argmax(pY_valid, axis=1))
                costs.append(c)
                print("i: ", i, " cost: ", c, " error: ", e)

                if e < best_error_rate:
                    best_error_rate = e
        print("best_error_rate: ", e)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self,
            X,
            T,
            learning_rate=10e-7,
            reg=10e-7,
            epochs=10000,
            show_fig=False):
        X, T = shuffle(X, T)
        X_train, T_train = X[:-1000], T[:-1000]
        X_valid, T_valid = X[-1000:], T[-1000:]

        N, D = X_train.shape
        K = len(set(T_train))

        #initialize parameters
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for n in range(epochs):
            #forwardpropogation process
            Y, Z = self.forwardprop(X_train)

            #Gradient Descent
            T_train_ind = y2indicator(T_train)
            Y_T = Y - T_train_ind
            self.W2 -= learning_rate * (Z.T.dot(Y_T) + reg * self.W2)
            self.b2 -= learning_rate * (Y_T.sum(axis=0) + reg * self.b2)

            dZ = Y_T.dot(self.W2.T) * (1 - Z * Z)
            self.W1 = learning_rate * (X_train.T.dot(dZ) + reg * self.W1)
            self.b1 = learning_rate * (dZ.sum(axis=0) + reg * self.b1)

            #representation of validation cost and error rate
            if n % 10 == 0:
                Y_valid, _ = self.forwardprop(X_valid)
                cost = cost2(T_valid, Y_valid)
                costs.append(cost)
                er = error_rate(T_valid, np.argmax(Y_valid, axis=1))
                print(n, 'cost:', cost, 'error', er)
                if er < best_validation_error:
                    best_validation_error = er
        print('Best validation error:', best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.title('cross entropy loss')
            plt.show()
Exemplo n.º 4
0
    def fit(self,
            X,
            Y,
            Xvalid,
            Yvalid,
            learning_rate=1e-6,
            reg=1e-6,
            epochs=10000,
            show_fig=False):
        Tvalid = y2indicator(Yvalid)

        N, D = X.shape
        K = len(set(Y) | set(Yvalid))
        T = y2indicator(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # Gradient Descent step
            ''' 在玩這個資料集的時候,首度引入L2概念, 注意L2原本就寫在 loss function內,
            整個loss funciton作微分後,||W||變成一次方的型態,如下方運算式'''
            pY_T = pY - T  # 先設成變數,這樣之後計算才會快阿
            self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2)
            self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2)
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # tanh
            self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1)

            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                # c = cost(Tvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print('best_validation_error:', best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()
Exemplo n.º 5
0
    def fit(self,
            X,
            Y,
            learning_rate=10e-7,
            reg=10e-7,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Tvalid = y2indicator(Yvalid)
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # gradient descent step
            pY_T = pY - T
            self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2)
            self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2)
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # tanh
            self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                # c = cost2(Yvalid, pYvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                s = self.score(Xvalid, Yvalid)
                print("i:", i, "cost:", c, "error:", e, "score:", s)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()
Exemplo n.º 6
0
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=1e-6,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:1000], Y[:1000]

        N, D = X.shape
        K = max(Y) + 1
        T = y2indicator(Y)
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, k) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        cost = []
        best_validation_error = 1

        # forward propagation and cost calculation
        for i in range(epochs):
            pY, Z = self.forward(X)

            # gradient descent step
            pY_T = pY - T
            self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2)
            self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2)
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) #relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # tanh
            self.W1 -= learning_rate * (X.T.dot(Z) + reg * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print('i: ', i, 'cost:', c, 'error: ', e)
                if e < best_validation_error:
                    best_validation_error = e
        print('best_validation_error: ', best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()
Exemplo n.º 7
0
    def fit(self,
            X,
            Y,
            learning_rate=10e-7,
            reg=10e-1,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1

        for i in xrange(epochs):
            pY, Z = self.forward(X)
            pY_T = pY - T

            self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2)
            self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2)
            #dZ = pY_T.dot(self.W2.T) * (Z > 0)		#relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)
            self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1)
            self.b1 -= learning_rate * (dZ.sum(axis=0) + reg * self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print "i:", i, "cost:", c, "error", e
                if e < best_validation_error:
                    best_validation_error = e

        print "best_validation_error:", best_validation_error

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self, X, Y, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        # Tvalid = y2indicator(Yvalid)
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # gradient descent step
            pY_T = pY - T
            self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2)
            self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2)
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()
    def fit(self,
            X,
            Y,
            learning_rate=1e-6,
            reg=1e-6,
            epochs=10000,
            show_fig=False):
        X, Y = shuffle(X, Y)

        Nvalid = X.shape[0] // 5

        Xvalid, Yvalid = X[-Nvalid:], Y[-Nvalid:]
        #Tvalid = y2indicator(Yvalid)
        X, Y = X[:-Nvalid], Y[:-Nvalid]

        N, D = X.shape

        K = len(set(Y))

        T = y2indicator(Y)

        self.W1 = np.random.randn(D, self.M) / np.sqrt(D + self.M)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M + K)
        self.b2 = np.zeros(K)

        costs = []
        best_valid_err = 1

        t0 = datetime.now()

        for i in range(epochs):
            pY, Z = self.forward(X)

            # grad desc

            pY_T = pY - T

            self.W2 -= learning_rate * (Z.T.dot(pY_T) + reg * self.W2)
            self.b2 -= learning_rate * (pY_T.sum(axis=0) + reg * self.b2)

            #dZ = pY_T.dot(self.W2.T) * (Z > 0)
            dZ = pY_T.dot(self.W2.T) * (1 - Z * Z)  # tanh

            self.W1 -= learning_rate * (X.T.dot(dZ) + reg * self.W1)
            self.b1 -= learning_rate * ((dZ).sum(axis=0) + reg * self.b1)

            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))

                dt = datetime.now() - t0
                print("i:  ", i, ".  cost:  ", c, ".  error:  ", e, ".  dt:  ",
                      dt)
                t0 = datetime.now()
                if e < best_valid_err:
                    best_valid_err = e

        print("best valid err:  ", best_valid_err)

        if show_fig:
            plt.plot(costs)
            plt.show()