def cnn_load_mnist(ratio=1): num_classes = 10 img_rows, img_cols = 28,28 x_train, y_train = dbload.load_mnist(r"./db/mnist", kind='train', count=int(ratio*40000)) x_test, y_test = dbload.load_mnist(r"./db/mnist", kind='test', count=int(ratio*10000)) x_train1, y_train1 = dbload.load_expand_mnist(offset_pixels=1) x_train2, y_train2 = dbload.load_expand_rotate_mnist(degree=10) x_train = np.vstack([x_train, x_train1, x_train2]) y_train = np.hstack([y_train, y_train1, y_train2]) x_train, y_train = scaler.shuffle(x_train, y_train) if K.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) return x_train, y_train, x_test, y_test
def expand_rotate_mnist(count=0, degree=10): import pickle,gzip import scaler,cv2 ef = expand_file + 'rotate' + str(degree) if os.path.exists(ef): print("The expanded training set already exists.") return x_train, y_train = __load_mnist("./db/mnist", kind='t10k', count=count, dtype=np.uint8) count = x_train.shape[0] M = cv2.getRotationMatrix2D((14,14), degree, 1) M1 = cv2.getRotationMatrix2D((14,14), -degree, 1) expand_x = np.zeros((count, 28, 28), dtype='uint8') expand_x1 = np.zeros((count, 28, 28), dtype='uint8') for i in range(count): expand_x[i] = cv2.warpAffine(x_train[i], M, (28, 28)) expand_x1[i] = cv2.warpAffine(x_train[i], M1, (28, 28)) expand_x = np.vstack([expand_x, expand_x1]) expand_x, expand_y = scaler.shuffle(expand_x, np.tile(y_train, 2)) print(expand_x.shape, expand_y.shape) print("Saving expanded data. This may take a few minutes.") with gzip.open(ef, "w") as f: pickle.dump((expand_x, expand_y), f)
def fit_sgd(self, X, y): ''' Fit Training data with adaptive model Parameters: ------------- X: {array-like}, shape = (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of 1 sample'features. y: array-like, shape={n_samples} Target values of samples Returns: ------------- self: object ''' samples = X.shape[0] x_features = X.shape[1] self.w_ = 1.0 * np.zeros(1 + x_features) self.w_initialized = True self.update_labels(y) self.appendedX_ = np.hstack((np.ones(samples).reshape(samples, 1), X)) self.errors_ = [] self.costs_ = [] # record every w during whole iterations self.wsteps_ = [] self.steps_ = 1 # every steps_ descent steps statistic one cose and error sample while(1): deltaws = [] for xi, target in zip(X, y): self.complex += 1 deltaw = self.update_weights(xi, target) deltaws.append(deltaw) if (self.complex % self.steps_ == 0): errors, diffs = self.errors(X, y) # compute the cost of logistic output = self.sigmoid(self.net_input(X)) diff = 1.0 - output diff[diff <= 0] = 1e-10 cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(diff))) self.costs_.append(cost) if(self.complex > self.n_iter): print("Loops beyond n_iter %d" % self.n_iter) return self ''' if np.max(np.abs(np.array(deltaws))) < 0.0001: print("deltaw is less than 0.0001") self.wsteps_.append(self.w_.copy()) # record last w return self ''' import scaler X, y = scaler.shuffle(X, y) self.appendedX_ = np.hstack((np.ones(samples).reshape(samples, 1), X)) return self
def expand_mnist(count=0, offset_pixels=1): import pickle,gzip import scaler ef = expand_file + str(offset_pixels) if os.path.exists(ef): print("The expanded training set already exists.") return x_train, y_train = __load_mnist("./db/mnist", kind='t10k', count=count, dtype=np.uint8) # move down 1 pixel x_down = np.roll(x_train, offset_pixels, axis=1) x_down[:, 0, :] = 0 # move up 1 pixel x_up = np.roll(x_train, -offset_pixels, axis=1) x_up[:, -1, :] = 0 # move right 1 pixel x_right = np.roll(x_train, offset_pixels, axis=2) x_right[:, :, 0] = 0 # move left 1 pixel x_left = np.roll(x_train, -offset_pixels, axis=2) x_left[:, :, -1] = 0 expand_x = np.vstack([x_down, x_up, x_right, x_left]) expand_x, expand_y = scaler.shuffle(expand_x, np.tile(y_train, 4)) print("Saving expanded data. This may take a few minutes.") with gzip.open(ef, "w") as f: pickle.dump((expand_x, expand_y), f)
def normal_distribute_test(): positive_num = 60 negtive_num = 10 X, y = normal_distribute_trainset(positive_num, negtive_num) X = scaler.standard(X) X, y = scaler.shuffle(X, y) ND = AdalineSGD(0.0001, 4000) ND.fit_mbgd(X, y) print('Weights: %s' % ND.w_) print('Errors: %s' % ND.errors_[-1]) print("LastCost: %f" % ND.costs_[-1]) ND.draw_separate_line(X, y, 'Normal Distribute')
def load_bmi_dataset(random_state=None, standard=True): import pandas as pd df = pd.read_csv('db/bmi/BMI.csv', header=0) # get the last column %Fat y = df.iloc[:, -1].values # Height M Weight kg BMI X = df.iloc[:, [0,1,2]].values if random_state is not None: X,y = scaler.shuffle(X, y) if not standard: return X,y else: return scaler.standard(X), y
def load_iris_mclass(ratio=0.3, random_state=0): from sklearn import datasets iris = datasets.load_iris() X = iris.data[:, [1,3]] y = iris.target X,y = scaler.shuffle(X, y) X_train, X_test, y_train, y_test = crossvalid.data_split(X, y, ratio=ratio, random_state=random_state) ds = scaler.DataScaler(X_train) X_train = ds.sklearn_standard(X_train) X_test = ds.sklearn_standard(X_test) return X_train, X_test, y_train, y_test
def normal_distribute_test(): positive_num = 30 negtive_num = 30 X,y = dbload.load_nd_dataset(positive_num, negtive_num) X = scaler.standard(X) X,y = scaler.shuffle(X, y) ND = LogRegressGD(0.5, 1000) ND.fit_sgd(X, y) print('Weights: %s' % ND.w_) #print('Errors: %s' % ND.errors_[-1]) print("LastCost: %f" % ND.costs_[-1]) ND.draw_separate_line(X, y,'Normal Distribute') ND.draw_sse_cost_surface(X, y)
def load_iris_dataset(ratio=0.3, random_state=0, negtive=-1): import pandas as pd df = pd.read_csv('db/iris/iris.data', header=0) # get the classifications y = df.iloc[:100, 4].values y = np.where(y == 'Iris-setosa', 1, negtive) # get samples' features 2(sepal width) and 4(petal width) X = df.iloc[:100, [1,3]].values X,y = scaler.shuffle(X, y) X_train, X_test, y_train, y_test = crossvalid.data_split(X, y, ratio=ratio, random_state=random_state) ds = scaler.DataScaler(X_train) X_train = ds.sklearn_standard(X_train) X_test = ds.sklearn_standard(X_test) return X_train, X_test, y_train, y_test
def mnist_kernels(): import drawutils, dbload, scaler x_train, y_train = dbload.load_mnist(r"./db/mnist", kind='train', count=40000) x_train1, y_train1 = dbload.load_expand_mnist(offset_pixels=1) x_train3, y_train3 = dbload.load_expand_mnist(offset_pixels=3) x_train = np.vstack([x_train, x_train1, x_train3]) y_train = np.hstack([y_train, y_train1, y_train3]) x_train, y_train = scaler.shuffle(x_train, y_train) for digit in range(10): numbers = x_train[y_train == digit] real = np.sum(numbers, axis=0, keepdims=True) // numbers.shape[0] imgs = [] for kernel in kernels: newimg = convolution(real.reshape(28, 28).astype(np.uint8), kernel) imgs.append(newimg) drawutils.plot_showimgs(imgs, klabels, tight=True)
def fit_mbgd(self, X_train, y_train, batchn=8, verbose=False, costtype='llh', x_labels=None, y_test=None, y_labels=None): '''mini-batch stochastic gradient descent.''' self.trains_ = [] self.tests_ = [] self.errors_ = [] self.costs_ = [] self.steps_ = 1 # every steps_ descent steps statistic cost and error sample self.delta_bs_ = [] self.delta_ws_ = [] total = X_train.shape[0] if batchn > total: batchn = 1 self.eta_reduces = 1 # accuracy prompt very tiny then try little eta for loop in range(self.epochs): X, y = scaler.shuffle(X_train, y_train) if verbose: print("Epoch: {}/{}".format(self.epochs, loop + 1), flush=True) x_subs = np.array_split(X, batchn, axis=0) y_subs = np.array_split(y, batchn, axis=0) for batchX, batchy in zip(x_subs, y_subs): self.mbatch_backprop(batchX, batchy, type=costtype, total=total) if self.complex % self.steps_ == 0: if costtype == 'llh': cost = self.loglikelihood_cost(X, y) else: cost = self.quadratic_cost(X, y) self.costs_.append(cost) if self.tol is not None and len(self.costs_) > 5: if abs(sum(self.costs_[-5:]) / 5 - self.costs_[-1]) < self.tol * self.steps_: print( "cost reduce very tiny less than tol, just quit!") return if cost < 1e-2: print("cost is very tiny just quit!") return print("costs {}".format(cost)) if y_test is not None: self.evaluate(X_train, x_labels, y_test, y_labels) if len(self.tests_) > 3: if abs(sum(self.tests_[-3:]) / 3 - self.tests_[-1]) < 1e-3: print( "test evalution prompt very tiny, just quit!") self.eta /= np.power(2, self.eta_reduces) self.eta_reduces += 1 if (self.eta_reduces > 5): print("reduces eta three times just quit") return self.complex += 1