def test_model_with_softmax(): from models import Sequential from layers import Linear, Softmax inputs = np.array([[0.25, 0.63, 0.12]]) targets = np.array([0, 1, 0]) model = Sequential() model.add(Linear(3, 3, activation=Softmax())) predictions = model.feed_forward(inputs) loss = ce.loss(predictions, targets) for i in range(len(predictions)): gradient = ce.backward(predictions[i], targets[i]) print("grad", gradient)
def default_model(): """ Function containing the code definition for training and evaluating the default required model """ model = Sequential(Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2)) train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) values = {"lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]} cross_validate = args.cross_val best_lr = 1e-4 optimizer = SGDCV(model, nb_epochs=50, mini_batch_size=1, lr=best_lr, criterion=LossMSE()) if cross_validate: optimizer.cross_validate(k=5, values=values, verbose=True) optimizer.set_params() optimizer.train(train_input, train_target, verbose=True) evaluator = Evaluator(model) print("Train accuracy: {:.1f}%".format( (evaluator.compute_accuracy(train_input, train_target) * 100).item())) print("Test accuracy: {:.1f}%".format( (evaluator.compute_accuracy(test_input, test_target) * 100).item()))
def linear_regression(a=1.0, b=0.0): X = np.linspace(-100, 100, 200) X = X.reshape((-1, 1)) [train_x, test_x] = split_data(X, ratio=0.8, random=True) train_y = a * train_x + b test_y = a * test_x + b i = Input(1) x = Dense(1)(i) # define trainer trainer = Trainer(loss='mse', optimizer=Adam(learning_rate=0.2), batch_size=50, epochs=50) # create model model = Sequential(i, x, trainer) model.summary() # training process model.fit(train_x, train_y) # predict y_hat = model.predict(test_x) plt.plot(test_x, test_y, 'b') plt.plot(test_x, y_hat, 'r') plt.show()
def load_model(name): type_map = { "<class 'layers.SimpleRecurrent'>": SimpleRecurrent, "<class 'layers.VanillaRecurrent'>": VanillaRecurrent, "<class 'layers.Dense'>": Dense, "<class 'layers.Activation'>": Activation, "<class 'layers.Softmax'>": Softmax } with open('{}_network.json'.format(name), 'r') as infile: network = json.load(infile) shallow_params_dict = np.load('{}.npz'.format(name)) params_dict = {} for k, v in shallow_params_dict.items(): sep_ind = k.find('__') layer_ind = int(k[k.find('_') + 1:sep_ind]) param_key = k[sep_ind + 2:] if layer_ind not in params_dict: params_dict[layer_ind] = {} params_dict[layer_ind][param_key] = v model = Sequential() layer_types = network['layer_types'] layer_configs = network['layer_configs'] for i in range(len(layer_types)): lt = type_map[layer_types[i]] config = layer_configs[i] layer = lt(**config) if layer.trainable: layer.set_params_from_dict(params_dict[i]) model.add(layer) return model
def main(): # 先讀取資料,並建立模型。 # 輸入的維度為一個資料的長度,因為資料量小,batch size即為資料總數。 x, y = ParityBits(8).load_data() batch_size, input_dim = x.shape model = Sequential( [Dense(64, activation=ReLU()), Dense(32, activation=Tanh()), Dense(16, activation=Tanh()), Dense(4, activation=None), Dense(1, activation=Sigmoid())], input_dim=input_dim, # 使用GD為優化器,MSE為損失函式。 optimizer=GradientDescent(learning_rate=0.01, momentum=0.0), loss=MeanSquaredError()) # 設定好epochs後訓練模型,訓練完後取得預測結果和每個epoch的損失值。 y_pred, losses = model.train( x, y, batch_size=batch_size, epochs=200, verbose_step=10) # 因為答案皆為整數0或1,因此訓練的成果為模型預測的結果取整數。 result = np.around(y_pred).astype(int) # 將答案與訓練成果相減。 diff = np.subtract(y, result) print(pd.DataFrame({ # 印出表格時,須將輸入的資料的每項陣列例如`[0 0 0 0 0 0 0 0]`轉成字串, # 因為Pandas的DataFrame的每一項不能吃陣列。 "Data": [np.array_str(v) for v in x], "Answer": y[:, 0], "Prediction": [f'{v:.8f}' for v in y_pred[:, 0]], "Result": result[:, 0], # 如果答案與訓練成果在相減之後為0的話代表預測正確,否則失敗。 "Correct": [True if v == 0 else False for v in diff[:, 0]] }, index=np.arange(1, len(x) + 1)).to_string()) # 輸出最後的損失值和訓練成果與答案差了幾項,並繪製每個epoch與其損失值的變化圖表。 print(f'loss: {losses[-1]:.8f}, difference: {np.count_nonzero(diff)}') plt.figure(figsize=(8, 4)) plt.plot(losses) plt.xlabel('epoch') plt.ylabel('loss') plt.show()
def linear_classification(a=1.0, b=0.0, graph=False): # prepare data x = np.linspace(-100, 100, 200) y = a * x + b X = np.array(list(zip(x, y))) + np.random.randn(200, 2) * 100 Y = to_one_hot(np.where(a * X[:, 0] + b > X[:, 1], 1, 0)) (train_x, train_y), (test_x, test_y) = split_data(X, Y, ratio=0.8, random=True) # build simple FNN i = Input(2) x = Dense(2, activation='softmax')(i) # define trainer trainer = Trainer(loss='cross_entropy', optimizer=Adam(learning_rate=0.05), batch_size=50, epochs=50, metrics=['accuracy']) # create model model = Sequential(i, x, trainer) model.summary() # training process model.fit(train_x, train_y) print(model.evaluate(test_x, test_y)) if graph: plt.plot(model.history['loss']) plt.show() # predict y_hat = model.predict(test_x) y_hat = np.argmax(y_hat, axis=1) simple_plot(test_x, y_hat, a, b)
def binary_classification(): def separate_label(data): X = normalize(data[:, :2].astype('float32')) Y = np.where(data[:, 2] == b'black', 0, 1) return X, Y # prepare train data data_dir = "data/examples/binary_classification" train_data_path = os.path.join(data_dir, 'training.arff') train_data = load_arff(train_data_path) train_x, train_y = separate_label(train_data) train_y = to_one_hot(train_y) # build simple FNN i = Input(2) x = Dense(30, activation='relu')(i) x = Dense(30, activation='relu')(x) x = Dense(2, activation='softmax')(x) # define trainer trainer = Trainer(loss='cross_entropy', optimizer=Adam(clipvalue=1.0), batch_size=256, epochs=500, metrics=['accuracy']) # create model model = Sequential(i, x, trainer) model.summary() # training process model.fit(train_x, train_y) plt.plot(range(len(model.history['loss'])), model.history['loss']) plt.show() # predict test_data_path = os.path.join(data_dir, 'test.arff') test_data = load_arff(test_data_path) test_x, _ = separate_label(test_data) y_hat = model.predict(test_x) simple_plot(test_x, y_hat)
def universal_approximation(f, x): [train_x, test_x] = split_data(x, ratio=0.8, random=True) train_y = f(train_x) test_x = np.sort(test_x, axis=0) test_y = f(test_x) # build simple FNN i = Input(1) x = Dense(50, activation='relu')(i) x = Dense(1)(x) # define trainer schedule = ExponentialDecay(initial_learning_rate=0.01, decay_rate=0.75) trainer = Trainer(loss='mse', optimizer=Adam(learning_rate=schedule), batch_size=50, epochs=750) # create model model = Sequential(i, x, trainer) model.summary() # training process start = time.time() model.fit(train_x, train_y) print(time.time() - start) plt.plot(range(len(model.history['loss'])), model.history['loss']) plt.show() # predict y_hat = model.predict(test_x) plt.plot(test_x, test_y, 'b-', label='original') plt.plot(test_x, y_hat, 'r-', label='predicted') plt.legend() plt.show()
(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = (x_train.astype('float32') / 255).reshape(-1, 1, 28, 28) y_train = np_utils.to_categorical(y_train.astype('int32'), 10) x_test = (x_test.astype('float32') / 255).reshape(-1, 1, 28, 28) y_test = np_utils.to_categorical(y_test.astype('int32'), 10) threshold = 1000 x_train = x_train[:threshold] y_train = y_train[:threshold] x_test = x_test[:threshold] y_test = y_test[:threshold] seed = 15 model = Sequential(seed=seed) model.add(Conv2D(32, (5, 5), activation="relu", inputs_shape=x_train.shape[1:])) model.add(Pooling((2, 2))) model.add(Conv2D(16, (3, 3), activation="relu")) model.add(Pooling((2, 2))) model.add(Dense(10, activation="relu")) model.add(Dropout(0.5)) model.add(Dense(10, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer=Adam(), metric="accuracy") model.fit(x_train=x_train, t_train=y_train, x_test=x_test,
bias_weights_0 = np.array([-5, 5]) kernel_weights_0 = np.array([[5, -5], [5, -5]]) bias_weights_1 = np.array([-5]) kernel_weights_1 = np.array([[5], [5]]) saved_bias_0 = saved_weights(bias_weights_0) saved_kernel_0 = saved_weights(kernel_weights_0) saved_bias_1 = saved_weights(bias_weights_1) saved_kernel_1 = saved_weights(kernel_weights_1) model = Sequential() model.add(Dense(2, 2, kernel_initializer=saved_kernel_0, bias_initializer=saved_bias_0, alpha=50.0)) model.add(Sigmoid()) model.add(Dense(1, 2, kernel_initializer=saved_kernel_1, bias_initializer=saved_bias_1, alpha=50.0)) model.add(Sigmoid()) X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = np.array([[1], [0], [0], [1]]) print("Prediction")
#%% import init from models import Sequential, Dense from utils import * xtr, ytr = np.loadtxt('data/xt'), np.loadtxt('data/yt') nn = Sequential(2, Dense(8, dropout=0.01), 1, activation='tanh') #%% nn.fit(xtr, ytr, lr=5e-3, epochs=50, callbacks=[train_anim(xtr, ytr)])
import numpy as np from layers import Dense from layers import Sigmoid from models import Sequential from initializers import saved_weights from metrics import SquaredError import matplotlib.pyplot as plt kernel_weights = np.array([[0, 0], [0, 0]]) bias_weights = np.array([0, 0]) saved_kernel = saved_weights(kernel_weights) saved_bias = saved_weights(bias_weights) model = Sequential() model.add(Dense(2, 2, kernel_initializer=saved_kernel, bias_initializer=saved_bias, alpha=0.5)) model.add(Sigmoid()) X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = np.array([[0, 1], [1, 0], [1, 0], [1, 0]]) print("Prediction") p = model.predict(X) print(p)
image_size = 28 num_labels = 10 image_pixels = image_size**2 with open("pickled_mnist.pkl", "br") as fh: data = pickle.load(fh) train_imgs = data[0] test_imgs = data[1] train_labels = data[2] test_labels = data[3] train_labels_one_hot = data[4] test_labels_one_hot = data[5] model = Sequential() model.restore("mnist_model.pkl") loss = SquaredError() pred = model.predict(test_imgs) pred_labels = pred.argmax(1) print("MSE", loss.evaluate(pred, test_labels_one_hot).mean(0)) print("Percentage correct", np.mean(pred_labels==test_labels)*100) print("Prediction for first 5 images") print(pred[0:5, :].argmax(1)) print("True labels") print(test_labels[0:5]) fig, ax = plt.subplots(2, 5)
# If task has not completed then report eta else: sys.stdout.write('ETA: ' + seconds_to_string(remaining)) # Output padding sys.stdout.write(' ' * 20) # Allow progress bar to persist if it's complete if current == total: sys.stdout.write('\n') # Flush to standard out sys.stdout.flush() # Return the time of the progress update return time.time() model = Sequential() model.add(Input(2)) model.add(Dense(25)) model.add(Activation("relu")) model.add(Dense(50)) model.add(Activation("relu")) model.add(Dense(50)) model.add(Activation("relu")) model.add(Dense(25)) model.add(Activation("relu")) model.add(Dense(1)) model.add(Activation("sigmoid")) def initialise_layer_parameters(seed=2): # random seed initiation
def main(): """ Function containing the main code definition, display all functionalities provided by the framework """ # Different activation functions and setting of automatic Xavier parameter initialization relu_model = Sequential(Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2), xavier_init=True) leaky_relu_model = Sequential(Linear(2, 25), LeakyReLU(), Linear(25, 25), LeakyReLU(), Linear(25, 25), LeakyReLU(), Linear(25, 2), xavier_init=True) tanh_model = Sequential(Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2), xavier_init=True) sigmoid_model = Sequential(Linear(2, 25), Sigmoid(), Linear(25, 25), Sigmoid(), Linear(25, 25), Sigmoid(), Linear(25, 2), xavier_init=False) model_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"] train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) # Model training without cross-validation of the optimizer parameters optimizer = SGDCV(leaky_relu_model, nb_epochs=25) optimizer.train(train_input, train_target) evaluator = Evaluator(leaky_relu_model) print("Train accuracy using LeakyReLU: {:.1f}%".format( (evaluator.compute_accuracy(train_input, train_target) * 100).item())) print("Test accuracy using LeakyReLU: {:.1f}%".format( (evaluator.compute_accuracy(test_input, test_target) * 100).item())) models = (relu_model, leaky_relu_model, tanh_model, sigmoid_model) sgd_cross_val_param_grid = {"lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]} adam_cross_val_param_grid = { "lr": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1], "b1": [0.9, 0.8], "b2": [0.999, 0.888], "epsilon": [1e-8, 1e-7, 1e-6] } adam_params = { "ReLU": { "lr": [0.001], "b1": [0.9], "b2": [0.999], "epsilon": [1e-08] }, "Leaky": { "lr": [0.001], "b1": [0.9], "b2": [0.999], "epsilon": [1e-08] }, "Tanh": { "lr": [0.001], "b1": [0.9], "b2": [0.999], "epsilon": [1e-08] }, "Sigmoid": { "lr": [0.001], "b1": [0.9], "b2": [0.999], "epsilon": [1e-08] } } sgd_params = { "ReLU": { "lr": [0.001] }, "Leaky": { "lr": [0.001] }, "Tanh": { "lr": [0.001] }, "Sigmoid": { "lr": [0.01] } } mse_loss = not args.CE optimizer_sgd = not args.Adam cross_validate = args.cross_val # Different loss functions if mse_loss: criterion = LossMSE() else: criterion = LossCrossEntropy() for name, model in zip(model_names, models): if optimizer_sgd: # SGD optimizer parameter cross-validation optimizer = SGDCV(model, mini_batch_size=10, criterion=criterion) if cross_validate: params = sgd_cross_val_param_grid else: params = sgd_params[name] cross_val_results, best_params_score = optimizer.cross_validate( values=params) print("Best params for model using {} : (lr={:.3f})".format( name, best_params_score["lr"])) else: # Adam optimizer parameter cross-validation optimizer = AdamCV(model, mini_batch_size=10, criterion=criterion) if cross_validate: params = adam_cross_val_param_grid else: params = adam_params[name] cross_val_results, best_params_score = optimizer.cross_validate( values=params) print( "Best params for model using {} : (lr={:.3f}, b1={:.3f}, b2={:.3f}, epsilon={:.1e})" .format(name, best_params_score["lr"], best_params_score["b1"], best_params_score["b2"], best_params_score["epsilon"])) print("Best score for model using {} : {:.3f} (+/- {:.3f})".format( name, best_params_score["mean"], best_params_score["std"]))
def run(): file_path = os.path.dirname( os.path.realpath(__file__)) + "/dlmb_mnist_example.json" # If a file of the neural-net model's architexture already exists, # then there is no need to build a new model. if os.path.isfile(file_path): # load the model and get its predictions based on x_test nn_model = Sequential() nn_model.load(file_path) predictions = nn_model.predict(x_test) # compare the predictions to the correct labels print( f"This model got a {validate_model(predictions, y_test)/100}% accuracy" ) # If the file doesn't exist then we need to build a neural-net model and train it. else: # Build the neural-net model nn_model = Sequential([ Dense( 128, 784, activation="ReLU" ), # for the layer_dim we want 128 outputs and 784 inputs (each pixel on the image) Batchnorm(128), Dense(128, 128, activation="ReLU"), Batchnorm(128), Dense(32, 128, activation="ReLU"), Batchnorm(32), Dense(10, 32, activation="Softmax" ) # We have 10 nodes in the layer for each number from 0 - 9 ]) nn_model.build(loss="crossentropy", optimizer="adam") # Crossentropy is a good loss function when you are doing logistic regression (classification) # Adam is one of the most popular optimizers nn_model.train(x_train, y_train, epochs=10, batch_size=1000) # Train the model # We go through the data 10 times and split the data of 60000 samples into 1000 sized batches leaving 60 samples # Now we save the model so we can use it again without re-training nn_model.save(file_path) # When saving, files must end in .json
save_path = "models/mnist_model.pkl" img_rows = 28 img_cols = 28 input_shape = (1, img_rows, img_cols) (train_x, train_y), (test_x, test_y) = mnist.load_data() train_x = np.reshape(train_x, (len(train_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) train_y = convert_to_one_hot(train_y, num_classes) test_x = np.reshape(test_x, (len(test_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) test_y = convert_to_one_hot(test_y, num_classes) train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y) filters = 64 model = Sequential() model.add(Convolution(filters, 3, input_shape=input_shape)) model.add(BatchNormalization()) model.add(ReLU()) model.add(MaxPooling(2)) model.add(Convolution(filters, 3)) model.add(BatchNormalization()) model.add(ReLU()) model.add(GlobalAveragePooling()) model.add(Affine(num_classes)) model.compile(SoftmaxCrossEntropy(), Adam()) train_batch_size = 100 valid_batch_size = 1 print("訓練開始: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M"))) model.fit(train_x, train_y, train_batch_size, 20, validation_data=(valid_batch_size, valid_x, valid_y), validation_steps=1)
def main(): (x_train, y_train), (x_test, y_test) = mnist.load_data() print 'Imported MNIST data: training input %s and training labels %s.' % ( x_train.shape, y_train.shape) print 'Imported MNIST data: test input %s and test labels %s.' % ( x_test.shape, y_test.shape) N, H, W = x_train.shape x = x_train.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_train, num_classes=10) model = Sequential() model.add(Dense(), ReLU(), layer_dim=(28 * 28, 300), weight_scale=1e-2) model.add(Dense(), ReLU(), layer_dim=(300, 100), weight_scale=1e-2) model.add(Dense(), Softmax(), layer_dim=(100, 10), weight_scale=1e-2) model.compile(optimizer=GradientDescent(learning_rate=1e-2), loss_func=categorical_cross_entropy) model.fit(x, y, epochs=10, batch_size=50, verbose=False) N, H, W = x_test.shape x = x_test.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_test, num_classes=10) model.evaluate(x, y)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import numpy as np import theano import theano.tensor as T from numpy import random as rng from theano import shared from layers import FullConnected from models import Sequential from optimizer import GradientChecking sample_size = 100 feature_size = 10 classes_num = 3 X_value = rng.uniform(size=(sample_size, feature_size)) Y_value = np.array(rng.randint(low=0, high=classes_num, size=(sample_size, )), dtype=np.int32) X = shared(value=X_value, name='X', borrow=True) Y = shared(value=Y_value, name='Y', borrow=True) model = Sequential([ FullConnected(feature_size, 64), FullConnected(64, classes_num, activation='softmax') ]) gc = GradientChecking(model) gc.check_gradient(X_value, Y_value)
t = iris_x[:, target].reshape(-1, 1) return x, x.shape[1], t, 1 #irisからデータを生成 #x, inputs_shape, t, outputs_shape = create_data_category() #loss = "categorical_crossentropy" #metric = "accuracy" #last_layer_activation = "softmax" x, inputs_shape, t, outputs_shape = create_data_numeric(3) loss = "mean_squared_error" metric = "rmse" last_layer_activation = "identify" seed = 15 model = Sequential(seed=seed) model.add(Dense(10, activation="relu", inputs_shape=inputs_shape)) model.add(Dense(10, activation="relu")) model.add(Dense(outputs_shape, activation=last_layer_activation)) model.compile(loss=loss, optimizer=Adam(), metric=metric) train_x, test_x, train_t, test_t = train_test_split(x, t, test_size=0.3, random_state=seed) model.fit(train_x, train_t, test_x, test_t, epochs=1000, batch_size=50) #誤差をプロット import matplotlib.pyplot as plt plt.plot(model.history_train[0])
def model_selector(mid, seed=0): # ReLU activation if mid == 0: model = Sequential([ Linear(2, 25, method='random', seed=seed), ReLU(), Linear(25, 25, method='random', seed=seed + 1), ReLU(), Linear(25, 25, method='random', seed=seed + 2), ReLU(), Linear(25, 2, method='random', seed=seed + 3), ReLU() ]) return model # ReLU & Sigmoid activations elif mid == 1: model = Sequential([ Linear(2, 25, method='random', seed=seed), ReLU(), Linear(25, 25, method='random', seed=seed + 1), ReLU(), Linear(25, 25, method='random', seed=seed + 2), ReLU(), Linear(25, 2, method='random', seed=seed + 3), Sigmoid() ]) return model # ReLU & Tanh activations elif mid == 2: model = Sequential([ Linear(2, 25, method='random', seed=seed), ReLU(), Linear(25, 25, method='random', seed=seed + 1), ReLU(), Linear(25, 25, method='random', seed=seed + 2), ReLU(), Linear(25, 2, method='random', seed=seed + 3), Tanh() ]) return model # ReLU & Softmax activations (included in the cross entropy) elif mid == 3: model = Sequential([ Linear(2, 25, method='random', seed=seed), ReLU(), Linear(25, 25, method='random', seed=seed + 1), ReLU(), Linear(25, 25, method='random', seed=seed + 2), ReLU(), Linear(25, 2, method='random', seed=seed + 3) ]) return model # Leaky ReLU activation elif mid == 4: model = Sequential([ Linear(2, 25, method='random', seed=seed), LeakyReLU(), #slope = 0.00005), Linear(25, 25, method='random', seed=seed + 1), LeakyReLU(), #slope = 0.00005), Linear(25, 25, method='random', seed=seed + 2), LeakyReLU(), #slope = 0.00005), Linear(25, 2, method='random', seed=seed + 3), LeakyReLU() ]) #slope = 0.00005)]) return model # Leaky ReLU & Sigmoid activations elif mid == 5: model = Sequential([ Linear(2, 25, method='random', seed=seed), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 1), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 2), LeakyReLU(), Linear(25, 2, method='random', seed=seed + 3), Sigmoid() ]) return model # Leaky ReLU & Tanh activations elif mid == 6: model = Sequential([ Linear(2, 25, method='random', seed=seed), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 1), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 2), LeakyReLU(), Linear(25, 2, method='random', seed=seed + 3), Tanh() ]) return model # Leaky ReLU & Softmax activations (included in the cross entropy) elif mid == 7: model = Sequential([ Linear(2, 25, method='random', seed=seed), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 1), LeakyReLU(), Linear(25, 25, method='random', seed=seed + 2), LeakyReLU(), Linear(25, 2, method='random', seed=seed + 3) ]) return model # Tanh activations (included in the cross entropy) elif mid == 8: model = Sequential([ Linear(2, 25, method='random', seed=seed), Tanh(), Linear(25, 25, method='random', seed=seed + 1), Tanh(), Linear(25, 25, method='random', seed=seed + 2), Tanh(), Linear(25, 2, method='random', seed=seed + 3), Tanh() ]) return model # ReLU & Sigmoid activations with BatchNorm elif mid == 9: model = Sequential([ Linear(2, 25, method='random', seed=seed), BatchNorm(25), Tanh(), Linear(25, 25, method='random', seed=seed + 1), BatchNorm(25), Tanh(), Linear(25, 25, method='random', seed=seed + 2), BatchNorm(25), Tanh(), Linear(25, 2, method='random', seed=seed + 3), Tanh() ]) return model
if not PATH.exists(): content = requests.get(URL + FILENAME).content PATH.open("wb").write(content) with gzip.open(PATH.as_posix(), "rb") as f: ((x_train, y_train), (x_test, y_test), _) = pickle.load(f, encoding="latin-1") im_size = (28, 28) input_dim = np.prod(im_size) # %% Auto Encoder autoencoder = Sequential(input_dim, 30, 10, 30, input_dim, activation='logistic') autoencoder.fit(x_train, x_train, epochs=10) x_sample = x_test[np.random.randint(1000, size=8)] output = autoencoder.forward(x_sample) for i in range(8): # plot original image ax = plt.subplot(8, 2, 2 * i + 1) ax.axis('off') ax.imshow(x_sample[i].reshape(im_size), cmap='gray') # plot reconstructed image ax = plt.subplot(8, 2, 2 * i + 2)
return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(valid_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True), shared(valid_y, borrow=True)) (train_x, test_x, valid_x, train_y, test_y, valid_y) = load_data('../data/mnist') # Shared params epoch = 5 batch_size = 1000 # Without dropout model = Sequential([ FullConnected(784, 625, activation='relu'), FullConnected(625, 625, activation='relu'), FullConnected(625, 10, activation='softmax') ], optimizer=RMSprop()) model.train(train_x, train_y, epoch=epoch, batch_size=batch_size, validation_data=(valid_x, valid_y), valid_freq=20, monitor=True) score = model.score(test_x, test_y) print('test score: {0}'.format(score.eval()))
from models import Sequential, compute_loss from layers import Layer min_loss = 100 best_model = None EPOCHS = 12 from random import uniform while min_loss > 6.25: model = Sequential() first_layer = Layer(4, "sigmoid") model.add(first_layer) second_layer = Layer(5, "sigmoid") model.add(second_layer) third_layer = Layer(4, "softmax") model.add(third_layer) model.compile() loss = 0 for i in range(EPOCHS): inpt = (uniform(-1, 1), uniform(-1, 1), uniform(-1, 1), uniform(-1, 1)) expected_output = [1 if n == max(inpt) else 0 for n in inpt] output = model.run(inpt) loss += compute_loss(output, expected_output) if loss < min_loss: best_model = model min_loss = loss print("Loss is: " + str(loss))
""" mndata = MNIST('./samples') images, labels = mndata.load_training() vocab = set() for label in labels: vocab.add(label) vocab = sorted(vocab) Y = [] for label in labels: one_hot = [0] * len(vocab) one_hot[label] = 1 Y.append(one_hot) X = np.array(images).T / 255 Y = np.array(Y).T return (X, Y) X, Y = parse_data() model = Sequential() model.add(Dense(1024, n_inputs=X.shape[0])) model.add(Dense(1024)) model.add(Dense(1024)) model.add(Dense(Y.shape[0], activation='sigmoid')) model.compile() # model = pickle.load(open('model.p', 'rb')) model.fit(X, Y, 1, learning_rate=0.003)
def cross_val_results(verbose=True): """ Function for generating the accuracy results of four models presented in the report with their best parameters, averaged over 10 runs and using different combinations of the available optimizers and loss :param verbose: whether to print average results for each (Model, Optimizer, Loss) combination, boolean, optional, default is True :returns: list of tuples containing (mean, std) of each (Model, Optimizer, Loss) combination, each tuple in [0, 1]^2 """ datasets = [] for i in range(10): datasets.append((generate_disc_set(1000), generate_disc_set(1000))) relu_model = Sequential(Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2), xavier_init=True) leaky_relu_model = Sequential(Linear(2, 25), LeakyReLU(), Linear(25, 25), LeakyReLU(), Linear(25, 25), LeakyReLU(), Linear(25, 2), xavier_init=True) tanh_model = Sequential(Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2), xavier_init=True) sigmoid_model = Sequential(Linear(2, 25), Sigmoid(), Linear(25, 25), Sigmoid(), Linear(25, 25), Sigmoid(), Linear(25, 2)) models = [relu_model, leaky_relu_model, tanh_model, sigmoid_model] final_scores = [] optimizers_names = ["SGD", "Adam"] models_names = ["ReLU", "Leaky", "Tanh", "Sigmoid"] losses_names = ["MSE", "CrossEntropy"] losses = [LossMSE(), LossCrossEntropy()] adam_params = {"ReLU": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}, "Leaky": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}, "Tanh": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}, "Sigmoid": {"lr": 0.001, "b1": 0.9, "b2": 0.999, "epsilon": 1e-08}} sgd_params = {"ReLU": {"lr": 0.001}, "Leaky": {"lr": 0.001}, "Tanh": {"lr": 0.001}, "Sigmoid": {"lr": 0.01}} for optim_name in optimizers_names: for loss_name, loss in zip(losses_names, losses): for model_name, model in zip(models_names, models): if verbose: print("Validating model {} with {} and {} loss...".format(model_name, optim_name, loss_name), end='') scores = [] if optim_name == "Adam": params = adam_params[model_name] optim = Adam(model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"], b1=params["b1"], b2=params["b2"], epsilon=params["epsilon"]) else: params = sgd_params[model_name] optim = SGD(relu_model, criterion=loss, nb_epochs=50, mini_batch_size=10, lr=params["lr"]) for ((train_input, train_target), (test_input, test_target)) in datasets: optim.model = copy.deepcopy(model) optim.train(train_input, train_target, verbose=False) evaluator = Evaluator(optim.model) accuracy = evaluator.compute_accuracy(test_input, test_target) scores.append(accuracy) scores = torch.FloatTensor(scores) scores_mean = torch.mean(scores).item() scores_var = torch.std(scores).item() if verbose: print("Score : {0:.3f} (+/- {1:.3f}) ".format(scores_mean, scores_var)) final_scores.append((scores_mean, scores_var)) return final_scores
(train_x, valid_x, train_y, valid_y) = train_test_split(all_x, all_y, train_size=1 - valid) return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(valid_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True), shared(valid_y, borrow=True)) else: return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True)) dataset = datasets.load_digits() (train_x, test_x, valid_x, train_y, test_y, valid_y) = load_data(dataset, valid=0.2) model = Sequential( [FullConnected(64, 128), FullConnected(128, 10, activation='softmax')], optimizer=SGD(lr=0.0001, decay=.001, momentum=0.9)) model.train(train_x, train_y, epoch=100, batch_size=1437, validation_data=(valid_x, valid_y), valid_freq=5, patience=10, monitor=True) score = model.score(test_x, test_y) print('test score: {0}'.format(score.eval()))
(train_x, test_x, train_y, test_y) = mnist(data_dir) (train_x, valid_x, train_y, valid_y) = train_test_split(train_x, train_y, train_size=0.8, random_state=np.random.randint(10e6)) return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(valid_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True), shared(valid_y, borrow=True)) (train_x, test_x, valid_x, train_y, test_y, valid_y) = load_data('../data/mnist') # Shared params epoch = 5 batch_size = 1000 # Without dropout model = Sequential( [FullConnected(784, 625, activation='relu'), FullConnected(625, 625, activation='relu'), FullConnected(625, 10, activation='softmax')], optimizer=RMSprop() ) model.train(train_x, train_y, epoch=epoch, batch_size=batch_size, validation_data=(valid_x, valid_y), valid_freq=20, monitor=True) score = model.score(test_x, test_y) print('test score: {0}'.format(score.eval()))
image_size = 28 num_labels = 10 image_pixels = image_size**2 with open("pickled_mnist.pkl", "br") as fh: data = pickle.load(fh) train_imgs = data[0] test_imgs = data[1] train_labels = data[2] test_labels = data[3] train_labels_one_hot = data[4] test_labels_one_hot = data[5] model = Sequential() model.add( Dense(16, 784, kernel_initializer=truncated_normal, bias_initializer=zeros)) model.add(Sigmoid()) model.add( Dense(10, 16, kernel_initializer=truncated_normal, bias_initializer=zeros)) model.add(Sigmoid()) loss = SquaredError() loss_history = model.fit(train_imgs, train_labels_one_hot, batch_size=32, epochs=10, loss=loss,
# Shuffle (X, y) in unison as the later parts of X will almost all be larger digits indices = np.arange(len(y)) np.random.shuffle(indices) X = X[indices] y = y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(X) - len(X) / 10 (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) (y_train, y_val) = (y[:split_at], y[split_at:]) print(X_train.shape) print(y_train.shape) print("Build model...") model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, convertor.get_dim()))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single keras_layer for _ in range(LAYERS): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributedDense(convertor.get_dim())) model.add(Activation("softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam")
import init from models import Sequential from layers import Dense from utils import onehot import numpy as np from tensorflow.keras.datasets import mnist (x_tr, y_tr), (x_ts, y_ts) = mnist.load_data() im_shape = x_tr[0].shape im_size = np.prod(im_shape) def accuracy(model): return np.mean(np.argmax((model(x_ts)), axis=(-1)) == y_ts) nn = Sequential(im_size, Dense(30, activation='tanh'), Dense(10, activation='logistic')) x_tr = x_tr.reshape(-1, im_size) x_ts = x_ts.reshape(-1, im_size) nn.fit(x_tr, (onehot(y_tr, 10)), epochs=10) print(accuracy(nn))
if valid > 0: (train_x, valid_x, train_y, valid_y) = train_test_split(all_x, all_y, train_size=1 - valid) return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(valid_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True), shared(valid_y, borrow=True)) else: return (shared(train_x, borrow=True), shared(test_x, borrow=True), shared(train_y, borrow=True), shared(test_y, borrow=True)) dataset = datasets.load_digits() (train_x, test_x, valid_x, train_y, test_y, valid_y) = load_data(dataset, valid=0.2) model = Sequential( [FullConnected(64, 128), FullConnected(128, 10, activation='softmax')], optimizer=SGD(lr=0.0001, decay=.001, momentum=0.9) ) model.train(train_x, train_y, epoch=100, batch_size=1437, validation_data=(valid_x, valid_y), valid_freq=5, patience=10, monitor=True) score = model.score(test_x, test_y) print('test score: {0}'.format(score.eval()))
def get_best_model(self): best_model = Sequential(loss=self.best_model_loss, metric=self.best_model_metric) best_model.layers = self.best_model_layers return best_model