示例#1
0
def cnn_load_mnist(ratio=1):
    num_classes = 10
    img_rows, img_cols = 28,28  
    x_train, y_train = dbload.load_mnist(r"./db/mnist", kind='train', 
                                   count=int(ratio*40000))
    x_test, y_test = dbload.load_mnist(r"./db/mnist", kind='test', 
                                 count=int(ratio*10000))

    x_train1, y_train1 = dbload.load_expand_mnist(offset_pixels=1)
    x_train2, y_train2 =  dbload.load_expand_rotate_mnist(degree=10)
    
    x_train = np.vstack([x_train, x_train1, x_train2])
    y_train = np.hstack([y_train, y_train1, y_train2])
    x_train, y_train = scaler.shuffle(x_train, y_train)

    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    return x_train, y_train, x_test, y_test
示例#2
0
def expand_rotate_mnist(count=0, degree=10):
    import pickle,gzip
    import scaler,cv2
    
    ef = expand_file + 'rotate' + str(degree)
    if os.path.exists(ef):
        print("The expanded training set already exists.")
        return

    x_train, y_train = __load_mnist("./db/mnist", kind='t10k', 
                                    count=count, dtype=np.uint8)

    count = x_train.shape[0]
    M = cv2.getRotationMatrix2D((14,14), degree, 1)
    M1 = cv2.getRotationMatrix2D((14,14), -degree, 1)
    expand_x = np.zeros((count, 28, 28), dtype='uint8')
    expand_x1 = np.zeros((count, 28, 28), dtype='uint8')
    for i in range(count):
        expand_x[i] = cv2.warpAffine(x_train[i], M, (28, 28))
        expand_x1[i] = cv2.warpAffine(x_train[i], M1, (28, 28))
    
    expand_x = np.vstack([expand_x, expand_x1])
    expand_x, expand_y = scaler.shuffle(expand_x, np.tile(y_train, 2))
    print(expand_x.shape, expand_y.shape)
    print("Saving expanded data. This may take a few minutes.")
    with gzip.open(ef, "w") as f:
        pickle.dump((expand_x, expand_y), f)
示例#3
0
    def fit_sgd(self, X, y):
        ''' Fit Training data with adaptive model

            Parameters:
            -------------
            X: {array-like}, shape = (n_samples, n_features)
                Training vectors, where n_samples is the number of
                samples and n_features is the number of 1 sample'features.
            y: array-like, shape={n_samples}
                Target values of samples
            
            Returns:
            -------------
            self: object
        '''
        samples = X.shape[0]
        x_features = X.shape[1]
        self.w_ = 1.0 * np.zeros(1 + x_features)
        self.w_initialized = True
        self.update_labels(y)

        self.appendedX_ = np.hstack((np.ones(samples).reshape(samples, 1), X))
        self.errors_ = []
        self.costs_ = []

        # record every w during whole iterations
        self.wsteps_ = []
        self.steps_ = 1  # every steps_ descent steps statistic one cose and error sample

        while(1):
            deltaws = []
            for xi, target in zip(X, y):
                self.complex += 1
                deltaw = self.update_weights(xi, target)
                deltaws.append(deltaw)
                if (self.complex % self.steps_ == 0):
                    errors, diffs = self.errors(X, y)

                    # compute the cost of logistic  
                    output = self.sigmoid(self.net_input(X))
                    diff = 1.0 - output
                    diff[diff <= 0] = 1e-10
                    cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(diff)))
                    self.costs_.append(cost)

                    if(self.complex > self.n_iter):
                        print("Loops beyond n_iter %d" % self.n_iter)
                        return self

            '''
            if np.max(np.abs(np.array(deltaws))) < 0.0001:
                print("deltaw is less than 0.0001")
                self.wsteps_.append(self.w_.copy()) # record last w
                return self
            '''
            import scaler
            X, y = scaler.shuffle(X, y)
            self.appendedX_ = np.hstack((np.ones(samples).reshape(samples, 1), X))

        return self
示例#4
0
def expand_mnist(count=0, offset_pixels=1):
    import pickle,gzip
    import scaler
    
    ef = expand_file + str(offset_pixels)
    if os.path.exists(ef):
        print("The expanded training set already exists.")
        return
    
    x_train, y_train = __load_mnist("./db/mnist", kind='t10k', 
                                    count=count, dtype=np.uint8)
    
    # move down 1 pixel
    x_down = np.roll(x_train, offset_pixels, axis=1)
    x_down[:, 0, :] = 0
    
    # move up 1 pixel
    x_up = np.roll(x_train, -offset_pixels, axis=1)
    x_up[:, -1, :] = 0
    
    # move right 1 pixel 
    x_right = np.roll(x_train, offset_pixels, axis=2)
    x_right[:, :, 0] = 0

    # move left 1 pixel 
    x_left = np.roll(x_train, -offset_pixels, axis=2)
    x_left[:, :, -1] = 0

    expand_x = np.vstack([x_down, x_up, x_right, x_left])
    expand_x, expand_y = scaler.shuffle(expand_x, np.tile(y_train, 4))
    print("Saving expanded data. This may take a few minutes.")
    with gzip.open(ef, "w") as f:
        pickle.dump((expand_x, expand_y), f)
示例#5
0
def normal_distribute_test():
    positive_num = 60
    negtive_num = 10
    X, y = normal_distribute_trainset(positive_num, negtive_num)

    X = scaler.standard(X)
    X, y = scaler.shuffle(X, y)
    ND = AdalineSGD(0.0001, 4000)
    ND.fit_mbgd(X, y)

    print('Weights: %s' % ND.w_)
    print('Errors: %s' % ND.errors_[-1])
    print("LastCost: %f" % ND.costs_[-1])
    ND.draw_separate_line(X, y, 'Normal Distribute')
示例#6
0
def load_bmi_dataset(random_state=None, standard=True):
    import pandas as pd
    df = pd.read_csv('db/bmi/BMI.csv', header=0)
    
    # get the last column %Fat
    y = df.iloc[:, -1].values
     
    # Height M	Weight kg	BMI
    X = df.iloc[:, [0,1,2]].values
    if random_state is not None:
        X,y = scaler.shuffle(X, y)
    
    if not standard: return X,y
    else: return scaler.standard(X), y
示例#7
0
def load_iris_mclass(ratio=0.3, random_state=0):
    from sklearn import datasets

    iris = datasets.load_iris()
    X = iris.data[:, [1,3]]
    y = iris.target
    X,y = scaler.shuffle(X, y)

    X_train, X_test, y_train, y_test = crossvalid.data_split(X, y, ratio=ratio, 
                                                             random_state=random_state)    
    ds = scaler.DataScaler(X_train)
    X_train = ds.sklearn_standard(X_train)
    X_test = ds.sklearn_standard(X_test)
    
    return X_train, X_test, y_train, y_test
示例#8
0
def normal_distribute_test():
    positive_num = 30
    negtive_num = 30
    X,y = dbload.load_nd_dataset(positive_num, negtive_num)

    X = scaler.standard(X)
    X,y = scaler.shuffle(X, y)
    ND = LogRegressGD(0.5, 1000)
    ND.fit_sgd(X, y)

    print('Weights: %s' % ND.w_)
    #print('Errors: %s' % ND.errors_[-1])
    print("LastCost: %f" % ND.costs_[-1])
    ND.draw_separate_line(X, y,'Normal Distribute')
    ND.draw_sse_cost_surface(X, y)
示例#9
0
def load_iris_dataset(ratio=0.3, random_state=0, negtive=-1):
    import pandas as pd
    df = pd.read_csv('db/iris/iris.data', header=0)
    
    # get the classifications
    y = df.iloc[:100, 4].values
    y = np.where(y == 'Iris-setosa', 1, negtive)
     
    # get samples' features 2(sepal width) and 4(petal width)
    X = df.iloc[:100, [1,3]].values
    X,y = scaler.shuffle(X, y)
    X_train, X_test, y_train, y_test = crossvalid.data_split(X, y, ratio=ratio, 
                                                             random_state=random_state)
    ds = scaler.DataScaler(X_train)
    X_train = ds.sklearn_standard(X_train)
    X_test = ds.sklearn_standard(X_test)
    
    return X_train, X_test, y_train, y_test
示例#10
0
def mnist_kernels():
    import drawutils, dbload, scaler
    x_train, y_train = dbload.load_mnist(r"./db/mnist",
                                         kind='train',
                                         count=40000)

    x_train1, y_train1 = dbload.load_expand_mnist(offset_pixels=1)
    x_train3, y_train3 = dbload.load_expand_mnist(offset_pixels=3)

    x_train = np.vstack([x_train, x_train1, x_train3])
    y_train = np.hstack([y_train, y_train1, y_train3])
    x_train, y_train = scaler.shuffle(x_train, y_train)

    for digit in range(10):
        numbers = x_train[y_train == digit]
        real = np.sum(numbers, axis=0, keepdims=True) // numbers.shape[0]
        imgs = []
        for kernel in kernels:
            newimg = convolution(real.reshape(28, 28).astype(np.uint8), kernel)
            imgs.append(newimg)

        drawutils.plot_showimgs(imgs, klabels, tight=True)
示例#11
0
文件: nn.py 项目: llinjupt/mlhowto
    def fit_mbgd(self,
                 X_train,
                 y_train,
                 batchn=8,
                 verbose=False,
                 costtype='llh',
                 x_labels=None,
                 y_test=None,
                 y_labels=None):
        '''mini-batch stochastic gradient descent.'''
        self.trains_ = []
        self.tests_ = []
        self.errors_ = []
        self.costs_ = []
        self.steps_ = 1  # every steps_ descent steps statistic cost and error sample

        self.delta_bs_ = []
        self.delta_ws_ = []

        total = X_train.shape[0]
        if batchn > total:
            batchn = 1

        self.eta_reduces = 1  # accuracy prompt very tiny then try little eta

        for loop in range(self.epochs):
            X, y = scaler.shuffle(X_train, y_train)
            if verbose:
                print("Epoch: {}/{}".format(self.epochs, loop + 1), flush=True)

            x_subs = np.array_split(X, batchn, axis=0)
            y_subs = np.array_split(y, batchn, axis=0)
            for batchX, batchy in zip(x_subs, y_subs):
                self.mbatch_backprop(batchX,
                                     batchy,
                                     type=costtype,
                                     total=total)

            if self.complex % self.steps_ == 0:
                if costtype == 'llh':
                    cost = self.loglikelihood_cost(X, y)
                else:
                    cost = self.quadratic_cost(X, y)

                self.costs_.append(cost)
                if self.tol is not None and len(self.costs_) > 5:
                    if abs(sum(self.costs_[-5:]) / 5 -
                           self.costs_[-1]) < self.tol * self.steps_:
                        print(
                            "cost reduce very tiny less than tol, just quit!")
                        return

                if cost < 1e-2:
                    print("cost is very tiny just quit!")
                    return

                print("costs {}".format(cost))
                if y_test is not None:
                    self.evaluate(X_train, x_labels, y_test, y_labels)
                    if len(self.tests_) > 3:
                        if abs(sum(self.tests_[-3:]) / 3 -
                               self.tests_[-1]) < 1e-3:
                            print(
                                "test evalution prompt very tiny, just quit!")
                            self.eta /= np.power(2, self.eta_reduces)
                            self.eta_reduces += 1
                            if (self.eta_reduces > 5):
                                print("reduces eta three times just quit")
                                return

            self.complex += 1