def main(): url = "D:/workspace-python/2016-04-24 num/" train_x_url = url + "train-images.idx3-ubyte" train_y_url = url + "train-labels.idx1-ubyte" test_x_url = url + "t10k-images.idx3-ubyte" test_y_url = url + "t10k-labels.idx1-ubyte" category = 3 # 记录开始时间 begin = time.time() # 构建训练模型的实例 neural_network = NeuralNetwork(hidden_layer_count=2, hidden_layer_neuron_count=30, alpha=0.01, train_times=99) # 训练模型 x = utils.load_mnist(train_x_url) y = utils.load_mnist(train_y_url, is_image=False) x, y = utils.get_classify(x, y, category) neural_network.train(x, y) # 保存模型参数 # neural_network.save_theta("") # 测试模型 actual = neural_network.predict(x) print("回归准确率:%.1f%%" % ((np.mean(y.argmax(1) == actual.argmax(1)) * 100))) # x = utils.load_mnist(test_x_url) # y = utils.load_mnist(test_y_url, is_image=False) # x, y = utils.get_classify(x, y, category) actual = neural_network.predict(x) print("验证准确率:%.1f%%" % ((np.mean(y.argmax(1) == actual.argmax(1)) * 100))) # 记录结束时间 end = time.time() print("执行时间:%.3f分" % ((end - begin) / 60))
def demo(): X,y = utils.load_mnist() y = utils.makeMultiClass(y) # building the SDA sDA = StackedDA([100]) # pre-trainning the SDA sDA.pre_train(X[:100], noise_rate=0.3, epochs=1) # saving a PNG representation of the first layer W = sDA.Layers[0].W.T[:, 1:] utils.saveTiles(W, img_shape= (28,28), tile_shape=(10,10), filename="results/res_dA.png") # adding the final layer sDA.finalLayer(X[:37500], y[:37500], epochs=2) # trainning the whole network sDA.fine_tune(X[:37500], y[:37500], epochs=2) # predicting using the SDA pred = sDA.predict(X[37500:]).argmax(1) # let's see how the network did y = y[37500:].argmax(1) e = 0.0 for i in range(len(y)): e += y[i]==pred[i] # printing the result, this structure should result in 80% accuracy print "accuracy: %2.2f%%"%(100*e/len(y)) return sDA
def train(self): # Initializing the variables init = tf.global_variables_initializer() data, label, test_data, test_label = utils.load_mnist() # Launch the graph self.sess.run(init) step = 0 # Keep training until reach max iterations while step < self.epochs: batch_idxs = len(data) // self.batch_size for idx in xrange(batch_idxs): batch_x = data[idx*self.batch_size:(idx+1)*self.batch_size] batch_y = label[idx*self.batch_size:(idx+1)*self.batch_size] # Run optimization op (backprop) sess.run(self.optimizer, feed_dict={self.x: batch_x, self.y: batch_y, self.keep_prob: self.dropout}) if idx % self.display_step == 0: # Calculate batch loss and accuracy loss, acc = sess.run([self.cost, self.accuracy], feed_dict={self.x: batch_x, self.y: batch_y, self.keep_prob: 1.}) print("Epoch " + str(step) + " Iter " + str(idx*self.batch_size) + \ ", Minibatch Loss= " + "{:.6f}".format(loss) + \ ", Training Accuracy= " + "{:.5f}".format(acc)) step += 1 self.test(test_data[:1000], test_label[:1000]) print("Optimization Finished!")
def train_simple_perceptron(): with Timer("Loading dataset"): trainset, validset, testset = load_mnist() with Timer("Creating model"): # TODO: We should the number of different targets in the dataset, # but I'm not sure how to do it right (keep in mind the regression?). output_size = 10 model = Perceptron(trainset.input_size, output_size) model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): optimizer = SGD(loss=NLL(model, trainset)) optimizer.append_update_rule(ConstantLearningRate(0.0001)) with Timer("Building trainer"): # Train for 10 epochs batch_scheduler = MiniBatchScheduler(trainset, 100) stopping_criterion = tasks.MaxEpochStopping(10) trainer = Trainer(optimizer, batch_scheduler, stopping_criterion=stopping_criterion) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Print mean/stderror of classification errors. classif_error = tasks.ClassificationError(model.use, validset) trainer.append_task(tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) with Timer("Training"): trainer.train()
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: self.sample_z_, self.sample_y_ = Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True)
def create_inputs(): trX, trY = load_mnist(cfg.dataset, cfg.is_training) num_pre_threads = cfg.thread_per_gpu*cfg.num_gpu data_queue = tf.train.slice_input_producer([trX, trY], capacity=64*num_pre_threads) X, Y = tf.train.shuffle_batch(data_queue, num_threads=num_pre_threads, batch_size=cfg.batch_size_per_gpu*cfg.num_gpu, capacity=cfg.batch_size_per_gpu*cfg.num_gpu * 64, min_after_dequeue=cfg.batch_size_per_gpu*cfg.num_gpu * 32, allow_smaller_final_batch=False) return (X, Y)
def demo(structure = [25**2, 23**2, 21**2,19**2,16**2, 15**2]): # Getting the data X,y = utils.load_mnist() autoencoder = StackedDA([100], alpha=0.01) autoencoder.pre_train(X[:1000], 10) y = utils.makeMultiClass(y) autoencoder.fine_tune(X[:1000], y[:1000], learning_layer=200, n_iters=20, alpha=0.01) W = autoencoder.W[0].T[:, 1:] W = utils.saveTiles(W, img_shape= (28,28), tile_shape=(10,10), filename="Results/res_dA.png")
def plot_pca(): print('loading data') X_train, y_train, X_test, y_test = utils.load_mnist() pca = PCA(n_components=2) print('transforming training data') Z_train = pca.fit_transform(X_train) print('transforming test data') Z_test = pca.transform(X_test) plot(Z_train, y_train, Z_test, y_test, filename='pca.png', title='projected onto principle components')
def useLayers(): X,y = utils.load_mnist() y = utils.makeMultiClass(y) # Layers sDA = StackedDA([100]) sDA.pre_train(X[:1000], rate=0.5, n_iters=500) sDA.finalLayer(y[:1000], learner_size=200, n_iters=1) sDA.fine_tune(X[:1000], y[:1000], n_iters=1) pred = sDA.predict(X) W = sDA.Layers[0].W.T[:, 1:] W = utils.saveTiles(W, img_shape= (28,28), tile_shape=(10,10), filename="Results/res_dA.png") return pred, y
def main(_): capsNet = CapsNet(is_training=cfg.is_training) tf.logging.info('Graph loaded') sv = tf.train.Supervisor(graph=capsNet.graph, logdir=cfg.logdir, save_model_secs=0) path = cfg.results + '/accuracy.csv' if not os.path.exists(cfg.results): os.mkdir(cfg.results) elif os.path.exists(path): os.remove(path) fd_results = open(path, 'w') fd_results.write('step,test_acc\n') with sv.managed_session() as sess: num_batch = int(60000 / cfg.batch_size) num_test_batch = 10000 // cfg.batch_size teX, teY = load_mnist(cfg.dataset, False) for epoch in range(cfg.epoch): if sv.should_stop(): break for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'): global_step = sess.run(capsNet.global_step) sess.run(capsNet.train_op) if step % cfg.train_sum_freq == 0: _, summary_str = sess.run([capsNet.train_op, capsNet.train_summary]) sv.summary_writer.add_summary(summary_str, global_step) if (global_step + 1) % cfg.test_sum_freq == 0: test_acc = 0 for i in range(num_test_batch): start = i * cfg.batch_size end = start + cfg.batch_size test_acc += sess.run(capsNet.batch_accuracy, {capsNet.X: teX[start:end], capsNet.labels: teY[start:end]}) test_acc = test_acc / (cfg.batch_size * num_test_batch) fd_results.write(str(global_step + 1) + ',' + str(test_acc) + '\n') fd_results.flush() summary_str = sess.run(capsNet.test_summary, {capsNet.test_acc: test_acc}) sv.summary_writer.add_summary(summary_str, global_step) if epoch % cfg.save_freq == 0: sv.saver.save(sess, cfg.logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step)) fd_results.close() tf.logging.info('Training done')
def main(args): train, valid, _ = load_mnist() e = theanets.Experiment( theanets.Autoencoder, layers=(784, args.features ** 2, 784)) e.train(train, valid, min_improvement=0.1) plot_layers([e.network.find('hid1', 'w'), e.network.find('out', 'w')]) plt.tight_layout() plt.show() v = valid[:100] plot_images(v, 121, 'Sample data') plot_images(e.network.predict(v), 122, 'Reconstructed data') plt.tight_layout() plt.show()
def main(args): train, valid, _ = load_mnist() e = theanets.Experiment( theanets.Autoencoder, layers=(784, args.features ** 2, 784)) e.train(train, valid) plot_layers(e.network.weights) plt.tight_layout() plt.show() v = valid[:100] plot_images(v, 121, 'Sample data') plot_images(e.network.predict(v), 122, 'Reconstructed data') plt.tight_layout() plt.show()
def main(args): # load up the MNIST digit dataset. train, valid, _ = load_mnist() net = theanets.Autoencoder([784, args.features ** 2, 784], rng=42) net.train( train, valid, input_noise=0.1, weight_l2=0.0001, algo="rmsprop", momentum=0.9, max_updates=1, min_improvement=0.1, ) plot_layers([net.find("hid1", "w"), net.find("out", "w")]) plt.tight_layout() plt.show()
def main(args): # load up the MNIST digit dataset. train, valid, _ = load_mnist() net = theanets.Autoencoder([784, args.features ** 2, 784]) net.train(train, valid, input_noise=0.1, weight_l2=0.0001, algo='rmsprop', momentum=0.9, min_improvement=0.1) plot_layers([net.find('hid1', 'w'), net.find('out', 'w')]) plt.tight_layout() plt.show() v = valid[:100] plot_images(v, 121, 'Sample data') plot_images(net.predict(v), 122, 'Reconstructed data') plt.tight_layout() plt.show()
def plot_autoencoder(weightsfile): print('building model') layers = model.build_model() batch_size = 128 print('compiling theano function') encoder_func = theano_funcs.create_encoder_func(layers) print('loading weights from %s' % (weightsfile)) model.load_weights([ layers['l_decoder_out'], layers['l_discriminator_out'], ], weightsfile) print('loading data') X_train, y_train, X_test, y_test = utils.load_mnist() train_datapoints = [] print('transforming training data') for train_idx in get_batch_idx(X_train.shape[0], batch_size): X_train_batch = X_train[train_idx] train_batch_codes = encoder_func(X_train_batch) train_datapoints.append(train_batch_codes) test_datapoints = [] print('transforming test data') for test_idx in get_batch_idx(X_test.shape[0], batch_size): X_test_batch = X_test[test_idx] test_batch_codes = encoder_func(X_test_batch) test_datapoints.append(test_batch_codes) Z_train = np.vstack(train_datapoints) Z_test = np.vstack(test_datapoints) plot(Z_train, y_train, Z_test, y_test, filename='adversarial_train_val.png', title='projected onto latent space of autoencoder')
def __init__(self, seq_len, batch_size, dataset='mnist', set='train', rng=None, infinite=True, digits=None): if dataset == 'fashion_mnist': (x_train, y_train), (x_test, y_test) = utils.load_fashion_mnist() if set == 'train': self.x = x_train self.y = y_train else: self.x = x_test self.y = y_test elif dataset == 'mnist': (x_train, y_train), (x_test, y_test) = utils.load_mnist() if set == 'train': self.x = x_train self.y = y_train elif set == 'test': self.x = x_test self.y = y_test elif dataset == 'cifar10': self.x, self.y = utils.load_cifar('data/cifar', subset=set) self.x = np.transpose(self.x, (0, 2, 3, 1)) # (N,3,32,32) -> (N,32,32,3) self.x = np.float32(self.x) self.img_shape = self.x.shape[1:] self.input_dim = np.prod(self.img_shape) else: raise ValueError('wrong dataset name') if dataset == 'mnist' or dataset == 'fashion_mnist': self.input_dim = self.x.shape[-1] self.img_shape = (int(np.sqrt(self.input_dim)), int(np.sqrt(self.input_dim)), 1) self.x = np.reshape(self.x, (self.x.shape[0], ) + self.img_shape) self.x = np.float32(self.x) self.classes = np.unique(self.y) self.n_classes = len(self.classes) self.y2idxs = {} self.nsamples = 0 for i in list(self.classes): self.y2idxs[i] = np.where(self.y == i)[0] self.nsamples += len(self.y2idxs[i]) self.batch_size = batch_size self.seq_len = seq_len self.rng = np.random.RandomState(42) if not rng else rng self.infinite = infinite self.digits = digits if digits is not None else np.arange( self.n_classes) print(set, 'dataset size:', self.x.shape) print(set, 'N classes', self.n_classes) print(set, 'min, max', np.min(self.x), np.max(self.x)) print(set, 'nsamples', self.nsamples) print(set, 'digits', self.digits) print('--------------')
def save_data(): print('-' * 10 + 'SAVING DATA TO DISK' + '-' * 10 + '\n') MODEL_PATH = './model/' DATA_PATH = './data/' if not os.path.exists(MODEL_PATH): os.makedirs(MODEL_PATH) if not os.path.exists(DATA_PATH): os.makedirs(DATA_PATH) # Choosing dataset if "local" in args.dataset: print("USING LOCAL DATASET") x, y, test_x, test_y = load_dataset(args.train_feat, args.train_label, args.test_feat, args.train_label) elif "mnist" in args.dataset: print("USING MNIST DATASET") x, y, test_x, test_y = load_mnist() elif "cifar10" in args.dataset: print("USING CIFAR10 DATASET") x, y, test_x, test_y = load_cifar10() # x, y, test_x, test_y = load_cifar10() if test_x is None: print('Splitting train/test data with ratio {}/{}'.format( 1 - args.test_ratio, args.test_ratio)) x, test_x, y, test_y = train_test_split(x, y, test_size=args.test_ratio, stratify=y) # need to partition target and shadow model data assert len(x) > 2 * args.target_data_size target_data_indices, shadow_indices = get_data_indices( len(x), target_train_size=args.target_data_size) np.savez(MODEL_PATH + 'data_indices.npz', target_data_indices, shadow_indices) # target model's data print('Saving data for target model') train_x, train_y = x[target_data_indices], y[target_data_indices] size = len(target_data_indices) if size < len(test_x): test_x = test_x[:size] test_y = test_y[:size] # save target data np.savez(DATA_PATH + 'target_data.npz', train_x, train_y, test_x, test_y) # shadow model's data target_size = len(target_data_indices) shadow_x, shadow_y = x[shadow_indices], y[shadow_indices] shadow_indices = np.arange(len(shadow_indices)) for i in range(args.n_shadow): print('Saving data for shadow model {}'.format(i)) shadow_i_indices = np.random.choice(shadow_indices, 2 * target_size, replace=False) shadow_i_x, shadow_i_y = shadow_x[shadow_i_indices], shadow_y[ shadow_i_indices] train_x, train_y = shadow_i_x[:target_size], shadow_i_y[:target_size] test_x, test_y = shadow_i_x[target_size:], shadow_i_y[target_size:] np.savez(DATA_PATH + 'shadow{}_data.npz'.format(i), train_x, train_y, test_x, test_y)
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # BUILDING THE MODEL NO CORRUPTION # rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = time.clock() # TRAINING # # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') # BUILDING THE MODEL CORRUPTION 30% # rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = time.clock() # TRAINING # # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) image = Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') os.chdir('../')
import matplotlib.pyplot as plt import numpy as np import theanets from utils import load_mnist, plot_layers, plot_images class WeightInverse(theanets.Regularizer): def loss(self, layers, outputs): return sum((1 / (w * w).sum(axis=0)).sum() for l in layers for w in l.params if w.ndim > 1) (train, ), (valid, ), _ = load_mnist() # mean-center the digits and compute a pca whitening transform. m = train.mean(axis=0) train -= m valid -= m theanets.log('computing whitening transform') vals, vecs = np.linalg.eigh(np.dot(train.T, train) / len(train)) vals = vals[::-1] vecs = vecs[:, ::-1] K = 197 # this retains 99% of the variance in the digit data. vals = np.sqrt(vals[:K]) vecs = vecs[:, :K]
logging = climate.get_logger('mnist-rica') climate.enable_default_logging() class RICA(theanets.Autoencoder): def J(self, weight_inverse=0, **kwargs): cost, mon, upd = super(RICA, self).J(**kwargs) if weight_inverse > 0: cost += sum((weight_inverse / (w * w).sum(axis=0)).sum() for l in self.layers for w in l.weights) return cost, mon, upd train, valid, _ = load_mnist() # mean-center the digits and compute a pca whitening transform. train -= 0.5 valid -= 0.5 logging.info('computing whitening transform') vals, vecs = np.linalg.eigh(np.dot(train.T, train) / len(train)) vals = vals[::-1] vecs = vecs[:, ::-1] K = 197 # this retains 99% of the variance in the digit data. vals = np.sqrt(vals[:K]) vecs = vecs[:, :K]
def eval_obj(self, w): """evaluate objective value at w""" w = np.array(w, copy=False) X, Y = self.features, self.labels f, self.df = self.sess.run([self.loss, self.grads], feed_dict={self.x: X, self.y: Y, self.w: w}) return f def eval_grad(self, w, df): """evaluate gradient at w and write it to df""" df = np.array(df, copy=False) np.copyto(df, self.df) X, y = load_mnist() probs = [] probs.append(LogregExecutor(X, y)) try: import tensorflow as tf probs.append(LogregTensorExecutor(X, y)) except ImportError: print "No Tensoflow found: skip the example." for prob in probs: descend(prob, initial_stepsize=0.0001, verbose=5, max_iter=10, l1_reg=0.002, precision='f') # ------------------------------------------------------------------------------
# Add layers model.add_layer('FC-1', FCLayer(784, 128)) model.add_layer('Tanh1', Tanh()) model.add_layer('FC-2', FCLayer(128, 32)) model.add_layer('Tanh2', Tanh()) model.add_layer('FC-3', FCLayer(32, 10)) model.add_layer('Softmax Layer', SoftmaxLayer()) # ========================================================================= assert dataset in ['mnist', 'fashion_mnist'] # Dataset if dataset == 'mnist': x_train, y_train, x_test, y_test = load_mnist('./data') else: x_train, y_train, x_test, y_test = load_fashion_mnist('./data') x_train, x_test = np.squeeze(x_train), np.squeeze(x_test) # Random 10% of train data as valid data num_train = len(x_train) perm = np.random.permutation(num_train) num_valid = int(len(x_train) * 0.1) valid_idx = perm[:num_valid] train_idx = perm[num_valid:] x_valid, y_valid = x_train[valid_idx], y_train[valid_idx] x_train, y_train = x_train[train_idx], y_train[train_idx]
import utils from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf import numpy as np from flip_gradient import flip_gradient from keras.datasets import mnist # s data, usps xs, ys, xs_test, ys_test = utils.load_s_usps() # t data, mnist xt, yt, xt_test, yt_test = utils.load_mnist() # config l2_param = 1e-5 lr = 1e-4 batch_size = 64 num_steps = 50000 coral_param = 0.01 dann_param = 0.05 grl_lambda = 1 with tf.name_scope('input'): x = tf.placeholder("float", shape=[None, 2025]) y_ = tf.placeholder("float", shape=[None, 10]) x_image = tf.reshape(x, [-1, 45, 45, 1]) train_flag = tf.placeholder(tf.bool) with tf.name_scope('feature_generator'): W_conv1 = utils.weight_variable([5, 5, 1, 32], 'conv1_weight') b_conv1 = utils.bias_variable([32], 'conv1_bias') h_conv1 = tf.nn.relu(utils.conv2d(x_image, W_conv1) + b_conv1) h_pool1 = utils.max_pool_2x2(h_conv1)
def i_main(EPOCH, MODEL_PATH): train, valid, _ = load_mnist(samplewise_normalize=True) model = ResNet50(MODEL_PATH) model.fit((train[0], train[1]), (valid[0], valid[1]), EPOCH)
def main(): EPOCH, MODEL_PATH = arg_parser() train, valid, _ = load_mnist(samplewise_normalize=True) model = ResNet50(MODEL_PATH) model.fit((train[0], train[1]), (valid[0], valid[1]), EPOCH)
preds = (zip(x, y) >> vec2img >> build_pred_batch >> network.predict() >> nf.Map(np.argmax) >> nf.Collect()) (zip(x, y, preds) >> vec2img >> filter_error >> make_label >> view_image >> nf.Consume()) def view_augmented_images(x, y, n=10): """Show n augmented images""" view_image = nm.ViewImageAnnotation(0, 1, pause=1) zip(x, y) >> vec2img >> augment >> nf.Take(n) >> view_image >> nf.Consume() if __name__ == '__main__': print('loading data...') filepath = download_mnist() x_train, y_train, x_test, y_test = load_mnist(filepath) print('creating model...') device = 'cuda:0' if torch.cuda.is_available() else 'cpu' model = Model(device) network = PytorchNetwork(model) network.load_weights() network.print_layers((1, 28, 28)) print('training ...') train(network, x_train, y_train, epochs=3) network.save_weights() print('evaluating ...') print('train acc:', evaluate(network, x_train, y_train)) print('test acc:', evaluate(network, x_test, y_test))
# ax_kmeans = plt.subplot(223) # ax_kmeans.imshow(china_kmeans) # ax_kmeans.set_title('Quantized image (64 colors, K-Means)') # # ax_random = plt.subplot(224) # ax_random.imshow(china_random) # ax_random.set_title('Quantized image (64 colors, random)') # # plt.show() #------------------------------------------------------------------------------- # Part 3 - Autoencoders #------------------------------------------------------------------------------- # Load data x_train, y_train, x_test, y_test = utils.load_mnist() # Load noisy data x_train_noisy, _, x_test_noisy, _ = utils.load_mnist(noisy=True) #------------------------------------------------------------------------------- # Part 3.1 - Image reconstruction with autoencoders #------------------------------------------------------------------------------- # Parameters batch_size = 256 epochs = 50 original_dim = 784 encoding_dim = 32 # Build autoencoder
try: # Main autoencoder config file cp = utils.load_config(sys.argv[1]) except: print 'Help: ./train.py <path to main autoencoder ini file> <run number>' exit() # Trying to reduce stochastic behaviours SEED = cp.getint('Experiment', 'SEED') tf.set_random_seed(SEED) np.random.seed(SEED) # Load dataset inp_path = cp.get('Experiment', 'DATAINPUTPATH') if inp_path == '': dataset = utils.load_mnist( val_size=cp.getint('Experiment', 'VALIDATIONSIZE')) else: dataset = utils.load_data(inp_path) # Create save directory if it doesn't exist (Primary AE) directory = cp.get('Experiment', 'ModelOutputPath') if not os.path.exists(directory): os.makedirs(directory) ############################################################################## # Initializing save paths ############################################################################## out = cp.get('Experiment', 'ModelOutputPath') out_ = out.split('/')[0] + '/' + out.split('/')[1] + '/' + \
def __init__(self, args): print("\n# Hyperparameters", file=sys.stderr) pprint.pprint(args.__dict__, stream=sys.stderr) print("\n# Data", file=sys.stderr) print(" - Standard MNIST", file=sys.stderr) print(" - digit_dim=%d*%d" % (args.height, args.width), file=sys.stderr) print(" - data_dim=%d*%d" % (args.height, args.width), file=sys.stderr) train_loader, valid_loader, test_loader = load_mnist( args.batch_size, save_to='{}/std/{}x{}'.format(args.data_dir, args.height, args.width), height=args.height, width=args.width) x_size = args.height * args.width z_size = args.latent_size y_size = 10 if args.conditional else 0 # Configure prior if args.prior == 'gaussian': prior_type = Normal if len(args.prior_params) != 2: raise ValueError( "A Gaussian prior takes two parameters (loc, scale)") if args.prior_params[1] <= 0: raise ValueError( "The Gaussian scale must be strictly positive") if args.posterior not in ["gaussian"]: raise ValueError( "Pairing a Gaussian prior with a %s posterior is not a good idea" % args.posterior) elif args.prior == 'beta': prior_type = Beta if len(args.prior_params) != 2: raise ValueError( "A Beta prior takes two parameters (shape_a, shape_b)") if args.prior_params[0] <= 0 or args.prior_params[1] <= 0: raise ValueError( "The Beta shape parameters must be strictly positive") if args.posterior not in ["kumaraswamy"]: raise ValueError( "Pairing a Beta prior with a %s posterior is not a good idea" % args.posterior) else: raise ValueError("Unknown prior: %s" % args.prior) p_z = PriorLayer(event_shape=z_size, dist_type=prior_type, params=args.prior_params) # Configure likelihood if args.likelihood == 'bernoulli': likelihood_type = Bernoulli decoder_outputs = 1 * x_size else: raise ValueError("Unknown likelihood: %s" % args.likelihood) if args.decoder == 'basic': likelihood_conditioner = FFConditioner( input_size=z_size + y_size, output_size=decoder_outputs, context_size=y_size, hidden_sizes=args.hidden_sizes) elif args.decoder == 'cnn': likelihood_conditioner = TransposedConv2DConditioner( input_size=z_size + y_size, output_size=decoder_outputs, context_size=y_size, input_channels=32, output_channels=decoder_outputs // x_size, last_kernel_size=7) elif args.decoder == 'made': likelihood_conditioner = MADEConditioner( input_size=x_size + z_size + y_size, output_size=decoder_outputs, context_size=z_size + y_size, hidden_sizes=args.hidden_sizes, num_masks=1) else: raise ValueError("Unknown decoder: %s" % args.decoder) if args.decoder == 'made': conditional_x = AutoregressiveLikelihood( event_size=x_size, dist_type=likelihood_type, conditioner=likelihood_conditioner) else: conditional_x = FullyFactorizedLikelihood( event_size=x_size, dist_type=likelihood_type, conditioner=likelihood_conditioner) # CPU/CUDA device device = torch.device(args.device) # Create generative model P(z)P(x|z) gen_model = GenerativeModel(x_size=x_size, z_size=z_size, y_size=y_size, prior_z=p_z, conditional_x=conditional_x).to(device) print("\n# Generative Model", file=sys.stderr) print(gen_model, file=sys.stderr) # Configure posterior # Z|x,y if args.posterior == 'gaussian': encoder_outputs = z_size * 2 posterior_type = Normal elif args.posterior == 'kumaraswamy': encoder_outputs = z_size * 2 posterior_type = Kumaraswamy else: raise ValueError("Unknown posterior: %s" % args.posterior) if args.encoder == 'basic': conditioner = FFConditioner(input_size=x_size + y_size, output_size=encoder_outputs, hidden_sizes=args.hidden_sizes) elif args.encoder == 'cnn': conditioner = Conv2DConditioner(input_size=x_size + y_size, output_size=encoder_outputs, context_size=y_size, width=args.width, height=args.height, output_channels=256, last_kernel_size=7) else: raise ValueError("Unknown encoder architecture: %s" % args.encoder) q_z = ConditionalLayer(event_size=z_size, dist_type=posterior_type, conditioner=conditioner) inf_model = InferenceModel(x_size=x_size, z_size=z_size, y_size=y_size, conditional_z=q_z).to(device) print("\n# Inference Model", file=sys.stderr) print(inf_model, file=sys.stderr) print("\n# Optimizers", file=sys.stderr) gen_opt = get_optimizer(args.gen_opt, gen_model.parameters(), args.gen_lr, args.gen_l2_weight, args.gen_momentum) gen_scheduler = ReduceLROnPlateau(gen_opt, factor=0.5, patience=args.patience, early_stopping=args.early_stopping, mode='max', threshold_mode='abs') print(gen_opt, file=sys.stderr) inf_z_opt = get_optimizer(args.inf_z_opt, inf_model.parameters(), args.inf_z_lr, args.inf_z_l2_weight, args.inf_z_momentum) inf_z_scheduler = ReduceLROnPlateau(inf_z_opt, factor=0.5, patience=args.patience, mode='max', threshold_mode='abs') print(inf_z_opt, file=sys.stderr) self.optimizers = {'gen': gen_opt, 'inf_z': inf_z_opt} self.schedulers = {'gen': gen_scheduler, 'inf_z': inf_z_scheduler} self.train_loader = train_loader self.valid_loader = valid_loader self.test_loader = test_loader self.models = {'gen': gen_model, 'inf': inf_model} self.args = args
def __init__(self, sess, epoch, batch_size, z_dim, dataset_name, checkpoint_dir, sample_dir, log_dir, mode): self.sess = sess self.epoch = epoch self.batch_size = batch_size self.checkpoint_dir = checkpoint_dir self.sample_dir = sample_dir self.log_dir = log_dir self.dataset_name = dataset_name self.z_dim = z_dim self.random_seed = 1000 if dataset_name == 'mnist' or dataset_name == 'fashion-mnist': #image_dimension self.imgH = 28 self.imgW = 28 #the size of the first layer of generator self.s_size = 3 #arguments for the last layer of generator self.last_dconv = { 'kernel_size': [5, 5], 'stride': 1, 'padding': 'VALID' } #depths for convolution in generator and discriminator self.g_depths = [512, 256, 128, 64] self.d_depths = [64, 128, 256, 512] #channel self.c_dim = 1 #WGAN parameter, the number of critic iterations for each epoch self.d_iters = 1 self.g_iters = 1 #train self.learning_rate = 0.0002 self.beta1 = 0.5 self.beta2 = 0.9 #test, number of generated images to be saved self.sample_num = 100 #load numpy array of images and labels self.images = load_mnist(self.dataset_name) elif dataset_name == 'anime': #image_dimension self.imgH = 64 self.imgW = 64 #the size of the first layer of generator self.s_size = 4 #arguments for the last layer of generator, same as the general self.last_dconv = {} #depths for convolution in generator and discriminator self.g_depths = [512, 256, 128, 64] self.d_depths = [64, 128, 256, 512] #channel dim self.c_dim = 3 #WGAN parameter, the number of critic iterations for each epoch self.d_iters = 1 self.g_iters = 1 #train self.learning_rate = 0.0002 self.beta1 = 0.5 self.beta2 = 0.9 #test, number of generated images to be saved self.sample_num = 64 self.images = load_anime(self.dataset_name) else: raise NotImplementedError
# inputs input_shape = (1, 28, 28) epochs = 10 batch_size = 1 log_dir = './summaries/test_dir/' # make VAE vae = CholletVAE(input_shape, log_dir) # compile VAE from keras import optimizers optimizer = optimizers.Adam(lr=1e-3) vae.compile(optimizer=optimizer) # get dataset import utils (X_train, _), (X_test, _) = utils.load_mnist() train_generator = utils.make_generator(X_train, batch_size=batch_size) test_generator = utils.make_generator(X_test, batch_size=batch_size) # print summaries vae.print_model_summaries() # fit VAE steps_per_epoch = int(len(X_train) / batch_size) validation_steps = int(len(X_test) / batch_size) vae.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=test_generator, validation_steps=validation_steps)
import warnings import flwr as fl import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.metrics import log_loss import utils if __name__ == "__main__": # Load MNIST dataset from https://www.openml.org/d/554 (X_train, y_train), (X_test, y_test) = utils.load_mnist() # Split train set into 10 partitions and randomly use one for training. partition_id = np.random.choice(10) (X_train, y_train) = utils.partition(X_train, y_train, 10)[partition_id] # Create LogisticRegression Model model = LogisticRegression( penalty="l2", max_iter=1, # local epoch warm_start=True, # prevent refreshing weights when fitting ) # Setting initial parameters, akin to model.compile for keras models utils.set_initial_params(model) # Define Flower client class MnistClient(fl.client.NumPyClient): def get_parameters(self): # type: ignore return utils.get_model_parameters(model)
def experiment(network_conf_json, reshape_mode="conv"): reshape_funs = { "conv": lambda d: d.reshape(-1, 28, 28, 1), "mlp": lambda d: d.reshape(-1, 784) } xtrain, ytrain, xtest, ytest = utils.load_mnist() reshape_fun = reshape_funs[reshape_mode] xtrain, xtest = reshape_fun(xtrain), reshape_fun(xtest) xtrain, ytrain, xval, yval = utils.create_validation(xtrain, ytrain) mnist_c_errors = [] mnist_pred_bits = [] mnist_class_bits = [] xm_c_errors = [] xm_pred_bits = [] xm_class_bits = [] ob_c_errors = [] ob_pred_bits = [] ob_class_bits = [] xm_data = utils.load_processed_data("xiaoming_digits") ob_data = utils.load_processed_data("Oleks_digits") xm_digits = reshape_fun(utils.normalize_data(list(xm_data.values())[0])) xm_labels = utils.create_one_hot(xm_data["labels"]) ob_digits = reshape_fun(utils.normalize_data(list(ob_data.values())[0])) ob_labels = utils.create_one_hot(ob_data["labels"]) for ensemble_size in ensemble_sizes: # Building the ensemble models and training the networks print("===== Building the ensemble of size %s =====" % ensemble_size) inputs, outputs, train_model, model_list, merge_model = ann.build_ensemble( [network_conf_json], pop_per_type=ensemble_size, merge_type="Average") train_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy']) train_model.fit([xtrain] * ensemble_size, [ytrain] * ensemble_size, batch_size=100, verbose=1, validation_data=([xval] * ensemble_size, [yval] * ensemble_size), epochs=num_epochs) # Calculating classification errors print("===== Calculating classification errors =====") mnist_c_errors.append( ann.test_model(merge_model, [xtest] * ensemble_size, ytest, metric="c_error")) xm_c_errors.append( ann.test_model(merge_model, [xm_digits] * ensemble_size, xm_labels, metric="c_error")) ob_c_errors.append( ann.test_model(merge_model, [ob_digits] * ensemble_size, ob_labels, metric="c_error")) # Calculating ensemble prediciton entropy print("===== Calculating ensemble prediciton entropy =====") mnist_pred_bits.append( np.mean( calc_shannon_entropy( merge_model.predict([xtest] * ensemble_size)))) xm_pred_bits.append( np.mean( calc_shannon_entropy( merge_model.predict([xm_digits] * ensemble_size)))) ob_pred_bits.append( np.mean( calc_shannon_entropy( merge_model.predict([ob_digits] * ensemble_size)))) # Calculating ensemble members classification entropyc_error print( "===== Calculating ensemble members classification entropy =====") mnist_class_bits.append(np.mean(calc_class_entropy(model_list, xtest))) xm_class_bits.append(np.mean(calc_class_entropy(model_list, xm_digits))) ob_class_bits.append(np.mean(calc_class_entropy(model_list, ob_digits))) mnist_results = { "c_error": mnist_c_errors, "pred_bits": mnist_pred_bits, "class_bits": mnist_class_bits } xm_results = { "c_error": xm_c_errors, "pred_bits": xm_pred_bits, "class_bits": xm_class_bits } ob_results = { "c_error": ob_c_errors, "pred_bits": ob_pred_bits, "class_bits": ob_class_bits } return mnist_results, xm_results, ob_results
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--batch-size', default=100) parser.add_argument('--steps-per-epoch', default=450) parser.add_argument('--combine-method', choices=COMBINE_METHODS, default='max') parser.add_argument('--epochs', default=200, type=int) parser.add_argument('--digits', default=range(OUTPUT_DIM), nargs='+', type=list) parser.add_argument('workspace') args = parser.parse_args(argv[1:]) init_args_path = os.path.join(args.workspace, INIT_ARGS_FILENAME) with open(init_args_path, 'r') as f: init_args = json.load(f) # ****************************************** # * Save training args # ****************************************** train_args_path = os.path.join(args.workspace, TRAIN_SEPARATE_ARGS_FILENAME) with open(train_args_path, 'w') as f: json.dump(vars(args), f, indent=2) (X_train_mnist, T_train_mnist), _ = load_mnist() batch_size = args.batch_size steps_per_epoch = args.steps_per_epoch epochs = args.epochs # ****************************************** # * Model Specification # ****************************************** input = x = Input(shape=INPUT_SHAPE) x = Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', data_format=DIM_ORDERING, input_shape=INPUT_SHAPE)(x) x = MaxPooling2D((2, 2), padding='same', data_format=DIM_ORDERING)(x) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', data_format=DIM_ORDERING)(x) x = MaxPooling2D((2, 2), padding='same', data_format=DIM_ORDERING)(x) x = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', data_format=DIM_ORDERING)(x) x = UpSampling2D((2, 2), data_format=DIM_ORDERING)(x) x = Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', data_format=DIM_ORDERING)(x) x = UpSampling2D((2, 2), data_format=DIM_ORDERING)(x) x = Conv2D(1, kernel_size=(3, 3), activation='sigmoid', padding='same', data_format=DIM_ORDERING)(x) for digit in args.digits: print 'Digit: {}'.format(digit) train_generator = separate_pair_generator( X_train_mnist, T_train_mnist, digit, seed=0, batch_size=batch_size, combine_method=init_args['combine_method']) model = Model(inputs=input, outputs=x) summary_str_io = StringIO() with redirect_stdout(summary_str_io): model.summary() summary_str = summary_str_io.getvalue() print summary_str model_summary_path = os.path.join( args.workspace, TRAIN_SEPARATE_MODEL_SUMMARY_FILENAME) with open(model_summary_path, 'w') as f: f.write(summary_str) model.compile(optimizer='adam', loss='binary_crossentropy') model.fit_generator(train_generator, steps_per_epoch, epochs=epochs) model_dir_path = os.path.join(args.workspace, TRAIN_SEPARATE_MODEL_DIRNAME) makedirs(model_dir_path) model_filename = '{}_{}.h5'.format( TRAIN_SEPARATE_KERAS_MODEL_FILENAME_PREFIX, digit) model_path = os.path.join(model_dir_path, model_filename) model.save(model_path) print 'Done!' return 0
return pred_prob def save(self, checkpoint_dir): model_name = "mnist_cnn_classifier" if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) self.saver.save(self.sess, os.path.join(checkpoint_dir, model_name), global_step=self.epochs) def load(self, checkpoint_dir): import re print(" [*] Reading checkpoints..") ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) counter = int(next(re.finditer("(\d+)(?!.*\d)",ckpt_name)).group(0)) print(' [*] Success to read {}'.format(ckpt_name)) return True, counter else: print(' [*] Failed to find a checkpoint') return False, 0 if __name__ == '__main__': with tf.Session() as sess: model = mnist_cnn(sess) model.train() model.save("mnist_cnn") model.load("mnist_cnn") data, label, test_data, test_label = utils.load_mnist() model.test(test_data[:5000], test_label[:5000])
import numpy as np import tensorflow as tf from config import cfg from utils import load_mnist from utils import save_images from capsNet import CapsNet if __name__ == '__main__': capsNet = CapsNet(is_training=cfg.is_training) tf.logging.info('Graph loaded') teX, teY = load_mnist(cfg.dataset, cfg.is_training) with capsNet.graph.as_default(): sv = tf.train.Supervisor(logdir=cfg.logdir) # with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: with sv.managed_session() as sess: sv.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir)) tf.logging.info('Restored') reconstruction_err = [] for i in range(10000 // cfg.batch_size): start = i * cfg.batch_size end = start + cfg.batch_size recon_imgs = sess.run(capsNet.decoded, {capsNet.X: teX[start:end]}) orgin_imgs = np.reshape(teX[start:end], (cfg.batch_size, -1)) squared = np.square(recon_imgs - orgin_imgs) reconstruction_err.append(np.mean(squared)) if i % 5 == 0:
def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain(self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) self.sample_c_ = torch.zeros((self.sample_num, self.len_continuous_code)) # manipulating two continuous code temp_z_ = torch.rand((1, self.z_dim)) self.sample_z2_ = temp_z_ for i in range(self.sample_num - 1): self.sample_z2_ = torch.cat([self.sample_z2_, temp_z_]) y = np.zeros(self.sample_num, dtype=np.int64) y_one_hot = np.zeros((self.sample_num, self.len_discrete_code)) y_one_hot[np.arange(self.sample_num), y] = 1 self.sample_y2_ = torch.from_numpy(y_one_hot).type(torch.FloatTensor) temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) for i in range(10): for j in range(10): self.sample_c2_[i*10+j, 0] = temp_c[i] self.sample_c2_[i*10+j, 1] = temp_c[j] if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True), \ Variable(self.sample_c_.cuda(), volatile=True), Variable(self.sample_z2_.cuda(), volatile=True), \ Variable(self.sample_y2_.cuda(), volatile=True), Variable(self.sample_c2_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True), \ Variable(self.sample_c_, volatile=True), Variable(self.sample_z2_, volatile=True), \ Variable(self.sample_y2_, volatile=True), Variable(self.sample_c2_, volatile=True)
if __name__ == '__main__': parser = argparse.ArgumentParser(description = 'Gaussian parzen window, negative log-likelihood estimator.') parser.add_argument('-d', '--data_dir', default='/home/clb/dataset/mnist', help='Directory to load mnist.') parser.add_argument('-g', '--gen_data_path', default='result/scgan_mnist/scgan_mnist.npy', help='Path to load generative data.') parser.add_argument('-l', '--limit_size', default=1000, type=int, help='The number of samples in validation.') parser.add_argument('-b', '--batch_size', default=100, type=int) parser.add_argument('-c', '--cross_val', default=10, type=int, help="Number of cross valiation folds") parser.add_argument('--sigma_start', default=-1, type=float) parser.add_argument('--sigma_end', default=0., type=float) parser.add_argument('--file', default='cgan_mnist.txt', help='File to save mean and std of log-likelihood.') args = parser.parse_args() # load mnist trainX, trainY, testX, testY = utils.load_mnist(args.data_dir) trainX = trainX.reshape([-1, 784]).astype(np.float32)/255. testX = testX.reshape([-1, 784]).astype(np.float32)/255. x = trainX[60000-args.limit_size:] mu = np.load(args.gen_data_path).astype(np.float32)/255. sigmas = np.logspace(args.sigma_start, args.sigma_end, args.cross_val) sigma = cross_validate_sigma(x, mu, sigmas, args.batch_size) print('Using Sigma: {}'.format(sigma)) lls = get_lls(testX, mu, sigma, args.batch_size) print('Negative Log-Likelihood of Test Set = {}, Std: {}'.format(lls.mean(), lls.std()/np.sqrt(testX.shape[0]))) with open(args.file, 'w') as file: file.write('Negative Log-Likelihood of Test Set = {}, Std: {}\n'.format(lls.mean(), lls.std()/np.sqrt(testX.shape[0])))
# coding: utf-8 import numpy as np import matplotlib.pyplot as plt from collections import OrderedDict from utils import img_show, load_mnist from two_layer_net import TwoLayerNet from optimizer import * (x_train, y_train), (x_test, y_test) = load_mnist(normalize=True, one_hot_label=True) optimizers = OrderedDict() optimizers["SGD"] = SGD() optimizers["Momentum"] = Momentum() optimizers["AdaGrad"] = AdaGrad() optimizers["Adam"] = Adam() inters_num = 2000 train_size = x_train.shape[0] batch_size = 100 markers = {"SGD": "o", "Momentum": "x", "AdaGrad": "s", "Adam": "D"} train_loss_list = {} for key in optimizers: net = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) optimizer = optimizers[key] train_loss_list[key] = [] for i in range(inters_num): batch_mask = np.random.choice(train_size, batch_size)
def __init__(self, args, SUPERVISED=True): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type self.SUPERVISED = SUPERVISED # if it is true, label info is directly used for code self.len_discrete_code = 10 # categorical distribution (i.e. label) self.len_continuous_code = 2 # gaussian distribution (e.g. rotation, thickness) # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) self.info_optimizer = optim.Adam(itertools.chain( self.G.parameters(), self.D.parameters()), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() self.CE_loss = nn.CrossEntropyLoss().cuda() self.MSE_loss = nn.MSELoss().cuda() else: self.BCE_loss = nn.BCELoss() self.CE_loss = nn.CrossEntropyLoss() self.MSE_loss = nn.MSELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset, args.dataroot_dir) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i * self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i * self.y_dim + j] = self.sample_z_[i * self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i * self.y_dim:(i + 1) * self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) self.sample_c_ = torch.zeros( (self.sample_num, self.len_continuous_code)) # manipulating two continuous code temp_z_ = torch.rand((1, self.z_dim)) self.sample_z2_ = temp_z_ for i in range(self.sample_num - 1): self.sample_z2_ = torch.cat([self.sample_z2_, temp_z_]) y = np.zeros(self.sample_num, dtype=np.int64) y_one_hot = np.zeros((self.sample_num, self.len_discrete_code)) y_one_hot[np.arange(self.sample_num), y] = 1 self.sample_y2_ = torch.from_numpy(y_one_hot).type(torch.FloatTensor) temp_c = torch.linspace(-1, 1, 10) self.sample_c2_ = torch.zeros((self.sample_num, 2)) for i in range(10): for j in range(10): self.sample_c2_[i * 10 + j, 0] = temp_c[i] self.sample_c2_[i * 10 + j, 1] = temp_c[j] if self.gpu_mode: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True), \ Variable(self.sample_c_.cuda(), volatile=True), Variable(self.sample_z2_.cuda(), volatile=True), \ Variable(self.sample_y2_.cuda(), volatile=True), Variable(self.sample_c2_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_, self.sample_c_, self.sample_z2_, self.sample_y2_, self.sample_c2_ = \ Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True), \ Variable(self.sample_c_, volatile=True), Variable(self.sample_z2_, volatile=True), \ Variable(self.sample_y2_, volatile=True), Variable(self.sample_c2_, volatile=True)
parser = argparse.ArgumentParser( description="train", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--batch_size", default=128, type=int) parser.add_argument("--pretrain_epochs", default=20, type=int) parser.add_argument("--train_epochs", default=200, type=int) parser.add_argument("--save_dir", default="saves") args = parser.parse_args() print(args) epochs_pre = args.pretrain_epochs batch_size = args.batch_size x, y = load_mnist() autoencoder = AutoEncoder().to(device) ae_save_path = "saves/sim_autoencoder.pth" if os.path.isfile(ae_save_path): print("Loading {}".format(ae_save_path)) checkpoint = torch.load(ae_save_path) autoencoder.load_state_dict(checkpoint["state_dict"]) else: print("=> no checkpoint found at '{}'".format(ae_save_path)) checkpoint = {"epoch": 0, "best": float("inf")} pretrain( data=x, model=autoencoder, num_epochs=epochs_pre, savepath=ae_save_path,
#!/usr/bin/env python import matplotlib.pyplot as plt import theanets from utils import load_mnist, plot_layers train, valid, _ = load_mnist(labels=True) N = 16 e = theanets.Experiment( theanets.Classifier, layers=(784, N * N, 10), train_batches=100, ) e.run(train, valid) plot_layers(e.network.weights) plt.tight_layout() plt.show()
train_sum_freq = 10 val_sum_freq = 50 ''' Set up model ''' #To make it Distributed device, target = device_and_target() # getting node environment with tf.device(device): global_step1 = tf.train.get_or_create_global_step() model = CapsNet(batch=FLAGS.batch_size, mnist=FLAGS.use_mnist, data_path=FLAGS.path_to_data,global_step=global_step1) step1 = tf.assign_add(global_step1,1) ''' Load the data ''' trX, trY, num_tr_batch, valX, valY, num_val_batch = load_mnist(FLAGS.batch_size, is_training=True, path=FLAGS.path_to_data, mnist=FLAGS.use_mnist) #Format Y Y = valY[:num_val_batch * FLAGS.batch_size].reshape((-1, 1)) ''' Run the Model Pass in target to determine the worker ''' with tf.train.MonitoredTrainingSession(master=target, is_chief=(FLAGS.task_index == 0)) as sess: train_writer = tf.summary.FileWriter('/logs/train', sess.graph) counter = 0 for epoch in range(FLAGS.epochs): print("Training for epoch %d/%d:" % (epoch, FLAGS.epochs))
parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--dynet-gpu', action='store_true', default=False, help='enables DyNet CUDA training') parser.add_argument('--dynet-gpus', type=int, default=1, metavar='N', help='number of gpu devices to use') parser.add_argument('--dynet-seed', type=int, default=None, metavar='N', help='random seed (default: random inside DyNet)') parser.add_argument('--dynet-mem', type=int, default=None, metavar='N', help='allocating memory (default: default of DyNet 512MB)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() train_data = load_mnist('training', './data') batch_size = args.batch_size test_data = load_mnist('testing', './data') def generate_batch_loader(data, batch_size): i = 0 n = len(data) while i + batch_size <= n: yield np.asarray(data[i:i+batch_size]) i += batch_size # if i < n: # pass # last short batch ignored
import os import numpy as np from collections import Counter from utils import load_mnist path = os.path.join(os.getcwd(), 'dataset') train_x, train_y = load_mnist(path, 'train') test_x, test_y = load_mnist(path, 't10k') print('train images: ', train_x.shape[0]) print('test images: ', test_x.shape[0]) def classify(input, k, train_x, train_y): input = input / 255.0 # normalize train_x = train_x / 255.0 # normalize dists = [] for i in range(train_x.shape[0]): dist = np.linalg.norm(train_x[i] - input) dists.append(dist) dists = np.array(dists) sored_idx = np.argsort(dists) class_list = [] for idx in sored_idx[:k]: class_list.append(train_y[idx]) counter = Counter(class_list) most_common = counter.most_common(1) for label, num in most_common: return label, num # return most common label, which is the result of KNN
import theanets from utils import load_mnist, plot_layers, plot_images logging = climate.get_logger('mnist-rica') climate.enable_default_logging() class WeightInverse(theanets.Regularizer): def loss(self, layers, outputs): return sum((1 / (w * w).sum(axis=0)).sum() for l in layers for w in l.params if w.ndim > 1) (train, ), (valid, ), _ = load_mnist() # mean-center the digits and compute a pca whitening transform. m = train.mean(axis=0) train -= m valid -= m logging.info('computing whitening transform') vals, vecs = np.linalg.eigh(np.dot(train.T, train) / len(train)) vals = vals[::-1] vecs = vecs[:, ::-1] K = 197 # this retains 99% of the variance in the digit data. vals = np.sqrt(vals[:K]) vecs = vecs[:, :K]
def test_SdA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10 ) # end-snippet-3 start-snippet-4 # PRETRAINING THE MODEL # print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # end-snippet-4 # FINETUNING THE MODEL # # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
transform_time = t2 - t1 classifier = SVC(C=10) classifier.fit(X_train, y_train) return pcanet, classifier def test(pcanet, classifier, test_set): images_test, y_test = test_set X_test = pcanet.transform(images_test) y_pred = classifier.predict(X_test) return y_pred, y_test train_set, test_set = load_mnist() if args.gpu >= 0: set_device(args.gpu) if args.mode == "train": print("Training the model...") pcanet, classifier = train(train_set) if not isdir(args.out): os.makedirs(args.out) save_model(pcanet, join(args.out, "pcanet.pkl")) save_model(classifier, join(args.out, "classifier.pkl"))
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_mnist(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def experiment(network_model, reshape_mode='mlp'): reshape_funs = { "conv": lambda d: d.reshape(-1, 28, 28, 1), "mlp": lambda d: d.reshape(-1, 784) } xtrain, ytrain, xtest, ytest = utils.load_mnist() reshape_fun = reshape_funs[reshape_mode] xtrain, xtest = reshape_fun(xtrain), reshape_fun(xtest) digits_data = utils.load_processed_data('combined_testing_data') digits_data2 = utils.load_processed_data('digits_og_and_optimal') taus = [13, 14, 15] digits = list(map(reshape_fun, [digits_data[t] for t in taus])) digits = list(map(utils.normalize_data, digits)) digits_og = reshape_fun(digits_data2['lecunn']) digits_og = utils.normalize_data(digits_og) d_labels = utils.create_one_hot(digits_data['labels'].astype('uint')) d2_labels = utils.create_one_hot(digits_data2['labels'].astype('uint')) ensemble_size = 20 epochs = 50 trials = 10 mnist_correct = [] mnist_wrong = [] digits_wrong = [] digits_correct = [] d2_wrong = [] d2_correct = [] for t in range(trials): inputs = [] outputs = [] model_list = [] for e in range(ensemble_size): model = Sequential() model.add( layers.Dense(200, input_dim=784, kernel_initializer=inits.RandomUniform( maxval=0.5, minval=-0.5))) model.add(layers.Activation("relu")) model.add(layers.BatchNormalization()) model.add( layers.Dense(200, kernel_initializer=inits.RandomUniform( maxval=0.5, minval=-0.5))) model.add(layers.Activation("relu")) model.add(layers.BatchNormalization()) model.add( layers.Dense(200, kernel_initializer=inits.RandomUniform( maxval=0.5, minval=-0.5))) model.add(layers.Activation("relu")) model.add(layers.BatchNormalization()) model.add( layers.Dense(10, kernel_initializer=inits.RandomUniform( maxval=0.5, minval=-0.5))) model.add(layers.Activation("softmax")) es = clb.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) model.compile(optimizer=opt.Adam(), loss="categorical_crossentropy", metrics=['acc']) model.fit(xtrain, ytrain, epochs=epochs, batch_size=100, validation_split=(1 / 6), callbacks=[es]) model_list.append(model) inputs.extend(model.inputs) outputs.extend(model.outputs) merge_model = Model(inputs=inputs, outputs=layers.Average()(outputs)) mnist_preds = merge_model.predict([xtest] * ensemble_size) mnist_mem_preds = np.array( list(map(lambda m: m.predict(xtest), model_list))).transpose(1, 2, 0) correct, wrong = bin_entropies(mnist_preds, mnist_mem_preds, ytest) mnist_correct.extend(correct) mnist_wrong.extend(wrong) d2_preds = merge_model.predict([digits_og] * ensemble_size) d2_mempreds = np.array( list(map(lambda m: m.predict(digits_og), model_list))).transpose(1, 2, 0) correct, wrong = bin_entropies(d2_preds, d2_mempreds, d2_labels) d2_correct.extend(correct) d2_wrong.extend(wrong) for d in digits: digits_preds = merge_model.predict([d] * ensemble_size) mempreds = np.array(list(map(lambda m: m.predict(d), model_list))).transpose(1, 2, 0) correct, wrong = bin_entropies(digits_preds, mempreds, d_labels) digits_wrong.extend(wrong) digits_correct.extend(correct) ensemble = { 'mnist_correct': mnist_correct, 'mnist_wrong': mnist_wrong, 'digits_correct': digits_correct, 'digits_wrong': digits_wrong, 'lecunn_correct': d2_correct, 'lecunn_wrong': d2_wrong } return ensemble
def main(): # Training settings parser = argparse.ArgumentParser(description='Embedding extraction module') parser.add_argument('--net', default='lenet5', help='DNN name (default=lenet5)') parser.add_argument('--root', default='data', help='rootpath (default=data)') parser.add_argument('--dataset', default='imagenet', help='dataset (default=imagenet)') parser.add_argument('--tensor_folder', default='tensor_pub', help='tensor_folder (default=tensor_pub)') parser.add_argument('--layer-info', default='layer_info', help='layer-info (default=layer_info)') parser.add_argument('--gpu-id', default='1', type=str, help='id(s) for CUDA_VISIBLE_DEVICES') parser.add_argument('-j', '--workers', default=8, type=int, metavar='N', help='number of data loading workers (default: 8)') parser.add_argument('-b', '--batch-size', default=1, type=int, metavar='N', help='should be 1') args = parser.parse_args() use_cuda = True # Define what device we are using print("CUDA Available: ", torch.cuda.is_available()) root = args.root dataset = args.dataset net = args.net tensor_folder = args.tensor_folder layers, cols = utils.get_layer_info(root, dataset, net, args.layer_info) print(dataset) print(root, dataset, net) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id if dataset.startswith('imagenet'): if net == 'resnet50': model = utils.load_resnet50_model(True) elif net == 'vgg16': model = utils.load_vgg_model(pretrained=True, net=net) else: model = utils.load_resnet_model(pretrained=True) sub_models = utils.load_imagenet_sub_models( utils.get_model_root(root, dataset, net), layers, net, cols) # sub_models = utils.load_resnet_sub_models(utils.get_model_root(root, # dataset, net), layers, net) test_loader = utils.load_imagenet_test(args.batch_size, args.workers) anatomy(model, sub_models, test_loader, root, dataset, tensor_folder, net, layers) else: # cifar10, cifar100, mnist device = torch.device("cuda" if ( use_cuda and torch.cuda.is_available()) else "cpu") nclass = 10 if dataset == 'cifar100': nclass = 100 model = utils.load_model( net, device, utils.get_pretrained_model(root, dataset, net), dataset) weight_models = utils.load_weight_models( net, device, utils.get_model_root(root, dataset, net), layers, cols, nclass) if dataset == 'mnist': train_loader, test_loader = utils.load_mnist( utils.get_root(root, dataset, 'data', net)) elif dataset == 'cifar10': train_loader, test_loader = utils.load_cifar10( utils.get_root(root, dataset, 'data', net)) elif dataset == 'cifar100': train_loader, test_loader = utils.load_cifar100( utils.get_root(root, dataset, 'data', net)) else: #default mnist train_loader, test_loader = utils.load_mnist( utils.get_root(root, dataset, 'data', net)) anatomy(model, weight_models, test_loader, root, dataset, tensor_folder, net, layers)
#!/usr/bin/env python import matplotlib.pyplot as plt import theanets from utils import load_mnist, plot_layers train, valid, _ = load_mnist(labels=True) N = 10 e = theanets.Experiment( theanets.Classifier, layers=(784, N * N, 10), train_batches=100, ) e.train(train, valid) plot_layers([e.network.find(1, 0), e.network.find(2, 0)]) plt.tight_layout() plt.show()
def train_autoencoder(): print('building model') layers = model.build_model() max_epochs = 5000 batch_size = 128 weightsfile = join('weights', 'weights_train_val.pickle') print('compiling theano functions for training') print(' encoder/decoder') encoder_decoder_update = theano_funcs.create_encoder_decoder_func( layers, apply_updates=True) print(' discriminator') discriminator_update = theano_funcs.create_discriminator_func( layers, apply_updates=True) print(' generator') generator_update = theano_funcs.create_generator_func( layers, apply_updates=True) print('compiling theano functions for validation') print(' encoder/decoder') encoder_decoder_func = theano_funcs.create_encoder_decoder_func(layers) print(' discriminator') discriminator_func = theano_funcs.create_discriminator_func(layers) print(' generator') generator_func = theano_funcs.create_generator_func(layers) print('loading data') X_train, y_train, X_test, y_test = utils.load_mnist() try: for epoch in range(1, max_epochs + 1): print('epoch %d' % (epoch)) # compute loss on training data and apply gradient updates train_reconstruction_losses = [] train_discriminative_losses = [] train_generative_losses = [] for train_idx in get_batch_idx(X_train.shape[0], batch_size): X_train_batch = X_train[train_idx] # 1.) update the encoder/decoder to min. reconstruction loss train_batch_reconstruction_loss =\ encoder_decoder_update(X_train_batch) # sample from p(z) pz_train_batch = np.random.uniform( low=-2, high=2, size=(X_train_batch.shape[0], 2)).astype( np.float32) # 2.) update discriminator to separate q(z|x) from p(z) train_batch_discriminative_loss =\ discriminator_update(X_train_batch, pz_train_batch) # 3.) update generator to output q(z|x) that mimic p(z) train_batch_generative_loss = generator_update(X_train_batch) train_reconstruction_losses.append( train_batch_reconstruction_loss) train_discriminative_losses.append( train_batch_discriminative_loss) train_generative_losses.append( train_batch_generative_loss) # average over minibatches train_reconstruction_losses_mean = np.mean( train_reconstruction_losses) train_discriminative_losses_mean = np.mean( train_discriminative_losses) train_generative_losses_mean = np.mean( train_generative_losses) print(' train: rec = %.6f, dis = %.6f, gen = %.6f' % ( train_reconstruction_losses_mean, train_discriminative_losses_mean, train_generative_losses_mean, )) # compute loss on test data test_reconstruction_losses = [] test_discriminative_losses = [] test_generative_losses = [] for test_idx in get_batch_idx(X_test.shape[0], batch_size): X_test_batch = X_test[test_idx] test_batch_reconstruction_loss =\ encoder_decoder_func(X_test_batch) # sample from p(z) pz_test_batch = np.random.uniform( low=-2, high=2, size=(X_test.shape[0], 2)).astype( np.float32) test_batch_discriminative_loss =\ discriminator_func(X_test_batch, pz_test_batch) test_batch_generative_loss = generator_func(X_test_batch) test_reconstruction_losses.append( test_batch_reconstruction_loss) test_discriminative_losses.append( test_batch_discriminative_loss) test_generative_losses.append( test_batch_generative_loss) test_reconstruction_losses_mean = np.mean( test_reconstruction_losses) test_discriminative_losses_mean = np.mean( test_discriminative_losses) test_generative_losses_mean = np.mean( test_generative_losses) print(' test: rec = %.6f, dis = %.6f, gen = %.6f' % ( test_reconstruction_losses_mean, test_discriminative_losses_mean, test_generative_losses_mean, )) except KeyboardInterrupt: print('caught ctrl-c, stopped training') weights = get_all_param_values([ layers['l_decoder_out'], layers['l_discriminator_out'], ]) print('saving weights to %s' % (weightsfile)) model.save_weights(weights, weightsfile)
def main(): """The main function Entry point. """ global args # Setting the hyper parameters parser = argparse.ArgumentParser(description='Example of Capsule Network') parser.add_argument('--epochs', type=int, default=10, help='number of training epochs. default=10') parser.add_argument('--lr', type=float, default=0.01, help='learning rate. default=0.01') parser.add_argument('--batch-size', type=int, default=128, help='training batch size. default=128') parser.add_argument('--test-batch-size', type=int, default=128, help='testing batch size. default=128') parser.add_argument('--log-interval', type=int, default=10, help='how many batches to wait before logging training status. default=10') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training. default=false') parser.add_argument('--threads', type=int, default=4, help='number of threads for data loader to use. default=4') parser.add_argument('--seed', type=int, default=42, help='random seed for training. default=42') parser.add_argument('--num-conv-out-channel', type=int, default=256, help='number of channels produced by the convolution. default=256') parser.add_argument('--num-conv-in-channel', type=int, default=1, help='number of input channels to the convolution. default=1') parser.add_argument('--num-primary-unit', type=int, default=8, help='number of primary unit. default=8') parser.add_argument('--primary-unit-size', type=int, default=1152, help='primary unit size is 32 * 6 * 6. default=1152') parser.add_argument('--num-classes', type=int, default=10, help='number of digit classes. 1 unit for one MNIST digit. default=10') parser.add_argument('--output-unit-size', type=int, default=16, help='output unit size. default=16') parser.add_argument('--num-routing', type=int, default=3, help='number of routing iteration. default=3') parser.add_argument('--use-reconstruction-loss', type=utils.str2bool, nargs='?', default=True, help='use an additional reconstruction loss. default=True') parser.add_argument('--regularization-scale', type=float, default=0.0005, help='regularization coefficient for reconstruction loss. default=0.0005') args = parser.parse_args() print(args) # Check GPU or CUDA is available args.cuda = not args.no_cuda and torch.cuda.is_available() # Get reproducible results by manually seed the random number generator torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data train_loader, test_loader = utils.load_mnist(args) # Build Capsule Network print('===> Building model') model = Net(num_conv_in_channel=args.num_conv_in_channel, num_conv_out_channel=args.num_conv_out_channel, num_primary_unit=args.num_primary_unit, primary_unit_size=args.primary_unit_size, num_classes=args.num_classes, output_unit_size=args.output_unit_size, num_routing=args.num_routing, use_reconstruction_loss=args.use_reconstruction_loss, regularization_scale=args.regularization_scale, cuda_enabled=args.cuda) if args.cuda: print('Utilize GPUs for computation') print('Number of GPU available', torch.cuda.device_count()) model.cuda() cudnn.benchmark = True model = torch.nn.DataParallel(model) # Print the model architecture and parameters print('Model architectures:\n{}\n'.format(model)) print('Parameters and size:') for name, param in model.named_parameters(): print('{}: {}'.format(name, list(param.size()))) # CapsNet has 8.2M parameters and 6.8M parameters without the reconstruction subnet. num_params = sum([param.nelement() for param in model.parameters()]) # The coupling coefficients c_ij are not included in the parameter list, # we need to add them manually, which is 1152 * 10 = 11520. print('\nTotal number of parameters: {}\n'.format(num_params + 11520)) # Optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr) # Make model checkpoint directory if not os.path.exists('results/trained_model'): os.makedirs('results/trained_model') # Set the logger writer = SummaryWriter() # Train and test for epoch in range(1, args.epochs + 1): train(model, train_loader, optimizer, epoch, writer) test(model, test_loader, len(train_loader), epoch, writer) # Save model checkpoint utils.checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, epoch) writer.close()
print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) try: os.mkdir(args.model_dir) except: pass try: os.mkdir(args.visualization_dir) except: pass x_train, x_test, y_train, y_test = utils.load_mnist() N = len(x_train) model = AAE(784, n_z, hidden_units_enc=(1000, 1000, 500), hidden_units_dec=(500,1000,1000)) dis = Discriminator(n_z+10) use_gpu = args.gpu >= 0 if use_gpu: cuda.get_device(args.gpu).use() model.to_gpu() dis.to_gpu() xp = np if args.gpu < 0 else cuda.cupy optimizer_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) optimizer_aae = optimizers.Adam(alpha=0.0002, beta1=0.5)
def __init__(self, args): # parameters self.epoch = args.epoch self.sample_num = 100 self.batch_size = args.batch_size self.save_dir = args.save_dir self.result_dir = args.result_dir self.dataset = args.dataset self.log_dir = args.log_dir self.gpu_mode = args.gpu_mode self.model_name = args.gan_type # networks init self.G = generator(self.dataset) self.D = discriminator(self.dataset) self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2)) self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2)) if self.gpu_mode: self.G.cuda() self.D.cuda() self.BCE_loss = nn.BCELoss().cuda() else: self.BCE_loss = nn.BCELoss() print('---------- Networks architecture -------------') utils.print_network(self.G) utils.print_network(self.D) print('-----------------------------------------------') # load mnist self.data_X, self.data_Y = utils.load_mnist(args.dataset, args.dataroot_dir) self.z_dim = 62 self.y_dim = 10 # fixed noise & condition self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) for i in range(10): self.sample_z_[i * self.y_dim] = torch.rand(1, self.z_dim) for j in range(1, self.y_dim): self.sample_z_[i * self.y_dim + j] = self.sample_z_[i * self.y_dim] temp = torch.zeros((10, 1)) for i in range(self.y_dim): temp[i, 0] = i temp_y = torch.zeros((self.sample_num, 1)) for i in range(10): temp_y[i * self.y_dim:(i + 1) * self.y_dim] = temp self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) if self.gpu_mode: self.sample_z_, self.sample_y_ = Variable( self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) else: self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable( self.sample_y_, volatile=True)
#!/usr/bin/env python import matplotlib.pyplot as plt import theanets from utils import load_mnist, plot_layers, plot_images train, valid, _ = load_mnist() e = theanets.Experiment( theanets.Autoencoder, layers=(784, 256, 100, 64, ('tied', 100), ('tied', 256), ('tied', 784)), ) e.train(train, valid, algorithm='layerwise', patience=1, min_improvement=0.05, train_batches=100) e.train(train, valid, min_improvment=0.01, train_batches=100) plot_layers([e.network.find(i, 'w') for i in (1, 2, 3)], tied_weights=True) plt.tight_layout() plt.show() valid = valid[:16*16] plot_images(valid, 121, 'Sample data') plot_images(e.network.predict(valid), 122, 'Reconstructed data') plt.tight_layout() plt.show()
d2_hist.append(d_loss2) g_hist.append(g_loss) # evaluate if (i+1) % (batch_per_epoch * 1) == 0: log_performance(i, g_model, latent_dim) # plot plot_history(d1_hist, d2_hist, g_hist) # EXAMPLE latent_dim = 100 # discriminator model discriminator = build_discriminator(in_shape=(28, 28, 1)) # generator model generator = build_generator(latent_dim=latent_dim) # gan model gan_model = build_gan(generator, discriminator) # image dataset dataset = load_mnist() print(dataset.shape) # train train(generator, discriminator, gan_model, dataset, latent_dim)
import numpy as np import tensorflow as tf from config import cfg from utils import load_mnist from utils import save_images from capsNet import CapsNet if __name__ == '__main__': capsNet = CapsNet(is_training=cfg.is_training) tf.logging.info('Graph loaded') teX, teY = load_mnist(cfg.dataset, cfg.is_training) with capsNet.graph.as_default(): sv = tf.train.Supervisor(logdir=cfg.logdir) with sv.managed_session() as sess: sv.saver.restore(sess, tf.train.latest_checkpoint(cfg.logdir)) tf.logging.info('Restored') reconstruction_err = [] for i in range(10000 // cfg.batch_size): start = i * cfg.batch_size end = start + cfg.batch_size recon_imgs = sess.run(capsNet.decoded, {capsNet.X: teX[start:end]}) orgin_imgs = np.reshape(teX[start:end], (cfg.batch_size, -1)) squared = np.square(recon_imgs - orgin_imgs) reconstruction_err.append(np.mean(squared)) if i % 5 == 0: imgs = np.reshape(recon_imgs, (cfg.batch_size, 28, 28, 1))
import matplotlib.pyplot as plt from convnet import ConvoNet from trainer import Trainer from utils import img_show, load_mnist import sys, os sys.path.append(os.getcwd()+'\\books\\dlfs-orig\\') import common.util as book_util import layers as book_layers import dataset.mnist as book_mnist import ch07.simple_convnet as bool_convnet #(x_train, t_train), (x_test, t_test) = book_mnist.load_mnist(flatten=False) (x_train, t_train), (x_test, t_test) = load_mnist() x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1],x_train.shape[1]) x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1],x_test.shape[1]) # 处理花费时间较长的情况下减少数据 x_train, t_train = x_train[:5000], t_train[:5000] x_test, t_test = x_test[:1000], t_test[:1000] max_epochs = 20 ''' network = bool_convnet.SimpleConvNet(input_dim=(1,28,28), conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, hidden_size=100, output_size=10, weight_init_std=0.01) ''' network = ConvoNet(input_shape=(1,28,28), conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},