def double_fc_dropout(p0, p1, p2, repetitions): expanded_training_data, _, _ = CNN.load_data_shared( "/data/mnist_expanded.pkl.gz") nets = [] for j in range(repetitions): print "\n\nTraining using a dropout network with parameters ", p0, p1, p2 print "Training with expanded data, run num %s" % j net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2), activation_fn=ReLU), ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), filter_shape=(40, 20, 5, 5), poolsize=(2, 2), activation_fn=ReLU), FullyConnectedLayer( n_in=40 * 4 * 4, n_out=1000, activation_fn=ReLU, p_dropout=p0), FullyConnectedLayer( n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1), SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2) ], mini_batch_size) net.SGD(expanded_training_data, 40, mini_batch_size, 0.03, validation_data, test_data) nets.append(net) return nets
def expanded_data_double_fc(n=100): """n is the number of neurons in both fully-connected layers. We'll try n=100, 300, and 1000. """ expanded_training_data, _, _ = CNN.load_data_shared( "/data/mnist_expanded.pkl.gz") for j in range(3): print "Training with expanded data, %s neurons in two FC layers, run num %s" % ( n, j) net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2), activation_fn=ReLU), ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), filter_shape=(40, 20, 5, 5), poolsize=(2, 2), activation_fn=ReLU), FullyConnectedLayer(n_in=40 * 4 * 4, n_out=n, activation_fn=ReLU), FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU), SoftmaxLayer(n_in=n, n_out=10) ], mini_batch_size) net.SGD(expanded_training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=0.1)
def dbl_conv_relu(): for lmbda in [0.0, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]: for j in range(3): print "Conv + Conv + FC num %s, relu, with regularization %s" % ( j, lmbda) net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2), activation_fn=ReLU), ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), filter_shape=(40, 20, 5, 5), poolsize=(2, 2), activation_fn=ReLU), FullyConnectedLayer( n_in=40 * 4 * 4, n_out=100, activation_fn=ReLU), SoftmaxLayer(n_in=100, n_out=10) ], mini_batch_size) net.SGD(training_data, 60, mini_batch_size, 0.03, validation_data, test_data, lmbda=lmbda)
def omit_FC(): for j in range(3): print "Conv only, no FC" net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2)), SoftmaxLayer(n_in=20 * 12 * 12, n_out=10) ], mini_batch_size) net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data) return net
def shallow(n=3, epochs=60): nets = [] for j in range(n): print "A shallow net with 100 hidden neurons" net = Network([ FullyConnectedLayer(n_in=784, n_out=100), SoftmaxLayer(n_in=100, n_out=10) ], mini_batch_size) net.SGD(training_data, epochs, mini_batch_size, 0.1, validation_data, test_data) nets.append(net) return nets
def basic_conv(n=3, epochs=10): for j in range(n): print "Conv + FC architecture" net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2)), FullyConnectedLayer(n_in=20 * 12 * 12, n_out=100), SoftmaxLayer(n_in=100, n_out=10) ], mini_batch_size) net.SGD(training_data, epochs, mini_batch_size, 0.1, validation_data, test_data) return net
def dbl_conv(activation_fn=sigmoid): for j in range(3): print "Conv + Conv + FC architecture" net = Network([ ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2), activation_fn=activation_fn), ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), filter_shape=(40, 20, 5, 5), poolsize=(2, 2), activation_fn=activation_fn), FullyConnectedLayer( n_in=40 * 4 * 4, n_out=100, activation_fn=activation_fn), SoftmaxLayer(n_in=100, n_out=10) ], mini_batch_size) net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data) return net
from RunManager import RunManager from RunBuilder import RunBuilder if __name__ == "__main__": train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor()])) parameters = OrderedDict(batch_size=[100, 1000], lr=[0.001, 0.01]) m = RunManager() for run in RunBuilder.get_runs(parameters): network = Network() loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size) optimizer = optim.Adam(network.parameters(), lr=run.lr) m.begin_run(run, network, loader) for epoch in range(2): m.begin_epoch() for batch in loader: images, labels = batch preds = network(images) loss = F.cross_entropy(preds, labels) optimizer.zero_grad()
import random from math import exp from PIL import Image from CNN import Network #import network class def f(x): #logistic activation function return 1 / (1 + exp(-x)) def fp(x): return f(x) * (1 - f(x)) random.seed() n = Network(1, 2) #make network which takes 1 * 2 image as input n.add_fully_connected_layer(2, f, fp) #add hidden layer with 10 hidden neurons n.add_fully_connected_layer(1, f, fp) #add hidden layer with 1 neuron, output im = [Image.new('RGB', (1, 2), color=(0, 0, 0)) for i in range(4)] #preparing learing set im[0].putpixel((0, 0), (0, 0, 0)) im[0].putpixel((0, 1), (0, 0, 0)) im[1].putpixel((0, 0), (0, 1, 0)) im[1].putpixel((0, 1), (0, 0, 0)) im[2].putpixel((0, 0), (0, 0, 0)) im[2].putpixel((0, 1), (0, 1, 0)) im[3].putpixel((0, 0), (0, 1, 0)) im[3].putpixel((0, 1), (0, 1, 0)) res = [[0], [1], [1], [0]] for i in range(1000000): #10000 cycles of learing x = random.randint(0, 3)
def main(): current_time = datetime.now().strftime('%Y%m%d-%H%M') checkpoint_dir = 'checkpoints' if FLAGS.checkpoint is not None: checkpoint_path = os.path.join(checkpoint_dir, FLAGS.checkpoint.lstrip('checkpoints/')) else: checkpoint_path = os.path.join(checkpoint_dir, '{}'.format(current_time)) try: os.makedirs(checkpoint_path) except os.error: print('Unable to make checkpoints direction: %s' % checkpoint_path) model_save_path = os.path.join(checkpoint_path, 'model.ckpt') nn = Network(FLAGS.network) dataset = Reader() saver = tf.train.Saver() print('Build session.') tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) if FLAGS.checkpoint is not None: print('Restore from pre-trained model.') checkpoint = tf.train.get_checkpoint_state(checkpoint_path) meta_graph_path = checkpoint.model_checkpoint_path + '.meta' restore = tf.train.import_meta_graph(meta_graph_path) restore.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) step = int(meta_graph_path.split('-')[2].split('.')[0]) else: print('Initialize.') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) step = 0 loss_list = [] train_accuracy_list = [] test_accuracy_list = [] step = 0 train_writer = tf.summary.FileWriter('logs/train@' + current_time, sess.graph) test_writer = tf.summary.FileWriter('logs/test@' + current_time, sess.graph) summary_op = tf.summary.merge_all() print('Start training:') for epoch in range(config.num_epochs): permutation = np.random.permutation(dataset.train_len) X_train_data = dataset.train_set_X[permutation] y_train_data = dataset.train_set_y[permutation] data_idx = 0 while data_idx < dataset.train_len - 1: X_train_batch = X_train_data[ data_idx:np.clip(data_idx + config.batch_size, 0, dataset.train_len - 1)] y_train_batch = y_train_data[ data_idx:np.clip(data_idx + config.batch_size, 0, dataset.train_len - 1)] data_idx += config.batch_size loss, _, train_accuracy, summary = sess.run( [nn.loss, nn.optimizer, nn.accuracy, summary_op], { nn.X_inputs: X_train_batch, nn.y_inputs: y_train_batch, nn.keep_prob: config.keep_prob, nn.training: True }) loss_list.append(loss) train_accuracy_list.append(train_accuracy) print('>> At step %i: loss = %.2f, train accuracy = %.3f%%' % (step, loss, train_accuracy * 100)) train_writer.add_summary(summary, step) step += 1 accuracy, summary = sess.run( [nn.accuracy, summary_op], { nn.X_inputs: dataset.test_set_X, nn.y_inputs: dataset.test_set_y, nn.keep_prob: 1.0, nn.training: False }) test_accuracy_list.append(accuracy) print('For epoch %i: test accuracy = %.2f%%\n' % (epoch, accuracy * 100)) test_writer.add_summary(summary, epoch) save_path = saver.save(sess, model_save_path, global_step=step) print('Model saved in file: %s' % save_path) sess.close() train_writer.close() test_writer.close()