def main(): start_time = time() print("---------- main1 --------------") f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r') f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r') l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r') l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r') X_train = np.frombuffer(f0.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) X_test = np.frombuffer(f1.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8) y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8) y_train = one_hot_encoding(y_train) y_label = one_hot_encoding(y_test) mean = np.mean(X_train) std = np.std(X_train) X_train, X_test = X_train - mean, X_test - mean X_train, X_test = X_train / std, X_test / std model = neural_network((89, 'TanH'), (10, 'Sigmoid'), input_nodes=784, seed=20190119) model = fit(x_train=X_train, y_train=y_train, x_test=X_test, y_test=y_label, model=model, optimizer=sgd(epochs=50, eta=0.35, etaN=0.15, decay_type='exponential'), batch_size=60, eval_every=5, early_stop=True, seed=20190119) validate_accuracy(x_test=X_test, y_test=y_test, model=model) # print(model[0][0][0].shape) # print(np.sum(model[0][0][0])) # print(model[0][0][1].shape) # print(np.sum(model[0][0][1])) # # print(model[1][0][0].shape) # print(np.sum(model[1][0][0])) # print(model[1][0][1].shape) # print(np.sum(model[1][0][1])) # print() print("--- %s seconds ---" % (time() - start_time))
def main(): start_time = time() print("---------- main5 --------------") f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r') f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r') l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r') l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r') X_train = np.frombuffer(f0.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) X_test = np.frombuffer(f1.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8) y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8) y_train = one_hot_encoding(y_train) y_label = one_hot_encoding(y_test) mean = np.mean(X_train) std = np.std(X_train) X_train, X_test = X_train - mean, X_test - mean X_train, X_test = X_train / std, X_test / std model = neural_network((89, 'TanH'), (10, 'Softmax'), input_nodes=784, seed=20190119, weight_init='scaled') model = fit(x_train=X_train, y_train=y_train, x_test=X_test, y_test=y_label, model=model, optimizer=sgd(epochs=50, eta=0.15, etaN=0.05, decay_type='exponential', beta=0.85), batch_size=60, eval_every=5, early_stop=True, loss_function='cross-entropy', seed=20190119, dropout=0.8) validate_accuracy(x_test=X_test, y_test=y_test, model=model) print("--- %s seconds ---" % (time() - start_time))
print(train_set_x.shape) print(test_set_x_orig.shape) test_set_x = test_set_x_orig print(test_set_x.shape) num_px = train_set_x_orig.shape[1] X = train_set_x / 255 Y = train_set_y X_test = test_set_x / 255 Y_test = test_set_y print(Y) print(Y_test) print("Y_test.shape : " + str(Y_test.shape)) #print("X_test.shape : " + str(X_test.shape)) dict = one_hot_encoding(dict={"Y": Y, "Y_test": Y_test}) Y = dict["Y"] Y_test = dict["Y_test"] print(Y) print(Y_test) print("Y.shape : " + str(Y.shape)) print("X.shape : " + str(X.shape)) print("Y_test.shape : " + str(Y_test.shape)) print("X_test.shape : " + str(X_test.shape)) elif dataset_option == "S": train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset_SIGNS( ) train_set_x = train_set_x_orig print(train_set_x.shape)
X_test = test_set_x / 255 Y_test = test_set_y print(Y_test) # Normalize Inputs # Normailze Mean X -= np.mean(X, axis=0) X_test -= np.mean(X_test, axis=0) # Normalize Variance X /= np.var(X, axis=0) X_test /= np.var(X_test, axis=0) # One Hot Encoding dict = {'Y': Y, 'Y_test': Y_test} dict = one_hot_encoding(dict) Y = dict['Y'] Y_test = dict["Y_test"] del dict print(Y) print("Y.shape : " + str(Y.shape)) print("X.shape : " + str(X.shape)) print("Y_test.shape : " + str(Y_test.shape)) print("X_test.shape : " + str(X_test.shape)) elif dataset_option == "N": #test = sio.loadmat('datasets/Digit_Classification-BigDataset.mat') #X = test['X'][:] #Y = test['Y'][:]
tot_bedrooms_clear = tot_bedrooms[ ~np.isnan(tot_bedrooms)] # removing NaN values median = np.median( tot_bedrooms_clear) # computing the median of the distribution tot_bedrooms.fillna(median, inplace=True) # replacing NaN values with median value dataset[ 'total_bedrooms'] = tot_bedrooms # assigning fixed feature column to dataset # second problem: ocean_proximity has categorical values --> one-hot encoding drop = True if drop: dataset_drop = f.one_hot_encoding(dataset, drop=drop) # put the label column at the end of the dataset label = dataset_drop['median_house_value'] dataset_drop.pop('median_house_value') dataset_drop['median_house_value'] = label # show the correlation between variables plt.figure(figsize=(15, 8)) corr = dataset_drop.corr() mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True sb.heatmap(abs(corr), linewidths=.5, annot=True, mask=mask,