def test(depth, p, dataset, num_epochs=200, seed=None): if seed is None: seed = 0 np.random.seed(seed) data = None if dataset == "mnist": data = mnist.load().astype(np.float32) elif dataset == "cifar10": data = cifar10.load().astype(np.float32) num_observations, input_dim = data.shape data_split_index = int(num_observations * 0.9) training_data_iterator = DataIterator(batch_size, data[:data_split_index], data[:data_split_index]) validation_data_iterator = DataIterator(batch_size, data[data_split_index:], data[data_split_index:]) # make net net = Network(input_dim, input_dim, hidden_layers=([ 1000, ] * depth), p=p) losses = net.train(training_data_iterator, validation_data_iterator, num_epochs=num_epochs) net.close() return losses
train_ = { 'data' : train[0].reshape(-1, 28, 28,1), 'labels': one_hot(train[1], 10) } test_ = { 'data' : test[0].reshape(-1, 28, 28,1), 'labels': one_hot(test[1], 10) } valid_ = { 'data' : valid[0].reshape(-1, 28, 28,1), 'labels': one_hot(valid[1], 10) } f.close() return train_, test_, valid_ train = cifar10.load(full= True) # train, test, valid = mnist() cursor = 0 def next_batch(nB): global cursor begin, end = None, None if cursor + nB < train.shape[0]: begin = cursor else: begin = cursor = 0 end = cursor + nB cursor += nB return {
data['data0'] /= 255. data['data1'] /= 255. return data # Logger setup logger = Logger('CIFAR SIAMESE', train_log_mode='TRAIN_LOSS_ONLY', test_log_mode='TEST_LOSS_ONLY') # Configure GPU Device if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np # loading dataset dataset = cifar10.load() dim = dataset['train']['data'][0].size N_train = len(dataset['train']['target']) N_test = len(dataset['test']['target']) train_data_dict = {'data':dataset['train']['data'].astype(np.float32), 'target':dataset['train']['target'].astype(np.int32)} test_data_dict = {'data':dataset['test']['data'].astype(np.float32), 'target':dataset['test']['target'].astype(np.int32)} train_data = datafeeders.SiameseFeeder(train_data_dict, batchsize=args.batch) test_data = datafeeders.SiameseFeeder(test_data_dict, batchsize=args.valbatch) train_data.hook_preprocess(cifar_preprocess) test_data.hook_preprocess(cifar_preprocess)
def test_init(weight_sigma_square): p = 0.6 max_num_layers = 1000 num_hidden_layers = np.floor( np.min( [max_num_layers, depth("Dropout", weight_sigma_square, p) * 1.1])).astype(int) print("Testing sigma_w = {: 2.2f}; Using {: 4d} layers...".format( weight_sigma_square, num_hidden_layers)) batch_size = 128 input_data = None if dataset == "mnist": input_data = mnist.load().astype(np.float32) elif dataset == "cifar10": input_data = cifar10.load().astype(np.float32) # batch data for memory purposes input_data_iterator = DataIterator(batch_size, input_data, input_data) num_batches = input_data_iterator.size() input_size = input_data.shape[-1] net = Network(input_size, input_size, [ 1000, ] * num_hidden_layers, activation="relu", weight_sigma=np.sqrt(weight_sigma_square), dist="bern", p=p) variances = np.empty((num_batches, max_num_layers)) variances.fill(np.nan) with tqdm(desc="batches", total=num_batches) as progress_bar: for i, batch in enumerate(input_data_iterator): variance = net.get_acts(batch.input, early_stopping=True, return_variance=True) variances[i, :len(variance)] = variance progress_bar.update(1) bad_indices = np.isnan(variances) + np.isinf(variances) variances = np.ma.array(variances, mask=bad_indices) means = np.mean(variances, axis=0) mask = np.ma.getmask(means) means[mask] = np.nan means = np.array(means) if os.path.exists(data_save_path): previous_sims = np.load(data_save_path) previous_sims = np.vstack([previous_sims, means]) np.save(data_save_path, previous_sims) previous_sims = np.load(sigma_save_path) previous_sims = np.append(previous_sims, [ weight_sigma_square, ], axis=0) np.save(sigma_save_path, previous_sims) else: np.save(sigma_save_path, np.array([ weight_sigma_square, ])) np.save(data_save_path, means)
def JL_reconstruction(data='mnist', JL_dim=32 * 32 / 2, batch_size=100, seed=None): # ------------------------------------------------------- # get the dataset as infinite generator if seed is not None: np.random.seed(seed) if data == 'cifar10': data_dir = settings.filepath_cifar10 train_gen, dev_gen = cifar10.load(batch_size, data_dir=data_dir) picture_size = 32 * 32 * 3 elif data == 'celebA32': data_dir = settings.filepath_celebA32 train_gen, dev_gen = celeba.load(batch_size, data_dir=data_dir, black_white=False) picture_size = 32 * 32 * 3 elif data == 'mnist': filename = '../data/MNIST/mnist32_zoom_1' train_gen, n_samples_train, dev_gen, n_samples_test = preprocessing_mnist.load( filename, batch_size, npy=True) picture_size = 32 * 32 elif data == 'celebA32_bw': data_dir = settings.filepath_celebA32 train_gen, dev_gen = celeba.load(batch_size, data_dir=data_dir, black_white=True) picture_size = 32 * 32 # ------------------------------------------------------- # make directories dir1 = 'JL_reconstruction/' path = dir1 + data + '/' if not os.path.isdir(dir1): call(['mkdir', dir1]) if not os.path.isdir(path): call(['mkdir', path]) # ------------------------------------------------------- # JL mapping A = np.random.randn(JL_dim, picture_size) / np.sqrt(picture_size) ATA = np.matmul(np.transpose(A), A) # JL error JL_error = np.round(np.sqrt(8 * np.log(2 * batch_size) / JL_dim), decimals=4) print '\ndata dimension: {}'.format(picture_size) print 'JL dimension: {}'.format(JL_dim) print 'batch size: {}'.format(batch_size) print 'JL error: {}\n'.format(JL_error) # ------------------------------------------------------- # encode and decode data im = train_gen().next()[0] im1 = im / 255.99 reconstruction = np.matmul(im1, ATA) #/ float(picture_size) reconstruction = (255.99 * np.clip(reconstruction, 0, 1)).astype('uint8') # reconstruction = np.matmul(im, ATA) # / float(picture_size) # reconstruction = (np.clip(reconstruction, 0, 255)).astype('uint8') save_images.save_images(im, save_path=path + 'true_images.png') save_images.save_images(reconstruction, save_path=path + 'JL_reconstructed_image.png') im_d = np.zeros((100, picture_size)) for i in range(batch_size): A = np.random.randn(JL_dim, picture_size) / np.sqrt(picture_size) ATA = np.matmul(np.transpose(A), A) reconstruction = np.matmul(im1[i].reshape((1, picture_size)), ATA) # / float(picture_size) reconstruction = (255.99 * np.clip(reconstruction, 0, 1)).astype('uint8') im_d[i] = reconstruction.reshape((picture_size, )) im_d = im_d.astype('uint8') save_images.save_images(im_d, save_path=path + 'different_JL_reconstructed_image.png')
if use_cuda: fixed_noise_128 = fixed_noise_128.cuda(gpu) with torch.no_grad(): noisev = autograd.Variable(fixed_noise_128) samples = netG(noisev) samples = samples.view(-1, 3, 32, 32) samples = samples.mul(0.5).add(0.5) samples = samples.cpu().data.numpy() save_images(samples, 'results_2/' + str(name) + '/samples_' + str(frame) + '.png') # save_images(samples, './samples_{}.jpg'.format(frame)) # Dataset iterator # train_gen = load(BATCH_SIZE, data_dir=DATA_DIR) train_gen, dev_gen = cifar10.load(BATCH_SIZE, data_dir=DATA_DIR) def inf_train_gen(): while True: for images in train_gen(): yield images gen = inf_train_gen() preprocess = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) criterion = nn.BCEWithLogitsLoss()
import keras from keras.optimizers import Adam, SGD from keras.callbacks import ModelCheckpoint from keras.preprocessing.image import ImageDataGenerator from optimizers import MaSS from cifar10 import load from resnet import resnet_v1, resnet_v2 import os batch_size = 64 epochs = 200 data_augmentation = True num_classes = 10 # Load Cifar-10 data (x_train, y_train), (x_test, y_test) = load() input_shape = x_train.shape[1:] # Model parameters n = 5 version = 1 if version == 1: depth = n * 6 + 2 elif version == 2: depth = n * 9 + 2 # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version) ################################################################ # Whenever learning rate reduces, restart the MaSS optimizer at the latest learned weights.
def cifar_preprocess(data): data['data'] /= 255. return data # Logger setup logger = Logger('CIFAR10 AllConvNet') # Configure GPU Device if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np # loading dataset dataset = cifar10.load() dim = dataset['train']['data'][0].size N_train = len(dataset['train']['target']) N_test = len(dataset['test']['target']) train_data_dict = {'data':dataset['train']['data'].astype(np.float32), 'target':dataset['train']['target'].astype(np.int32)} test_data_dict = {'data':dataset['test']['data'].astype(np.float32), 'target':dataset['test']['target'].astype(np.int32)} train_data = DataFeeder(train_data_dict, batchsize=args.batch) test_data = DataFeeder(test_data_dict, batchsize=args.valbatch) train_data.hook_preprocess(cifar_preprocess) test_data.hook_preprocess(cifar_preprocess)
def train(input_dim=INPUT_DIM, batch_size=BATCH_SIZE, n_features_first=N_FEATURES_FIRST, learning_rate=1e-4, epochs=ITERS, fixed_noise_size=FIXED_NOISE_SIZE, n_features_reduction_factor=2, architecture='JLSWGN', init_method='He', BN=True, JL_dim=None, JL_error=0.5, n_projections=10000, data='cifar10', load_saved=True): """ - this is the function to use to train a Johnson-Lindenstrauss Generative Network model which uses the sliced Wasserstein-2 distance as objective funtion (JLSWGN) for CIFAR10, with the configuration given by the parameters - the function computes losses and auto-saves the model every 100 steps and automatically resumes training where it stopped (when load_saved=True) :param input_dim: the dimension of the latent space -> Z :param batch_size: the batch size, should be a divisor of 50k :param n_features_first: the number of feature maps in the first step of the generator :param epochs: the number of epochs to train for (in fact this number should be 50k/batch_size*true_epochs) :param fixed_noise_size: the number of pictures that is generated during training for visual progress :param n_features_reduction_factor: integer, e.g.: 1: use same number of feature-maps everywhere, 2: half the number of feature-maps in every step :param architecture: right now only supports 'JLSWGN', 'SWGN', defaults to 'JLSWGN' :param init_method: the method with which the variables are initialized, support: 'uniform', 'He', defaults to 'He' :param BN: shall batch normalization be used :param JL_dim: the target dimension of the JL mapping :param JL_error: the max pairwise distance deviation error of the JL mapping, only applies when JL_dim=None :param n_projections: number of random projections in sliced Wasserstein-2 distance :param data: the data set which shall be used for training: celebA32, celebA32_bw, cifar10, mnist :param load_saved: whether an already existing training progress shall be loaded to continue there (if one exists) :return: """ # ------------------------------------------------------- # setting for sending emails and getting statistics send = settings.send_email # ------------------------------------------------------- # architecture default use_JL = True if architecture not in ['SWGN']: architecture = 'JLSWGN' if architecture == 'SWGN': use_JL = False JL_error = None JL_dim = None # ------------------------------------------------------- # data set default if data not in ['cifar10', 'celebA32', 'celebA32_bw', 'mnist', 'celebA64']: data = 'cifar10' if data in ['celebA32_bw', 'mnist']: picture_size = 32 * 32 picture_dim = [-1, 32, 32] power = 5 n_features_image = 1 elif data in ['cifar10', 'celebA32']: picture_size = 32 * 32 * 3 picture_dim = [-1, 32, 32, 3] power = 5 n_features_image = 3 elif data in ['celebA64']: picture_size = 64 * 64 * 3 picture_dim = [-1, 64, 64, 3] power = 6 n_features_image = 3 print 'data set: {}'.format(data) print # ------------------------------------------------------- # init_method default if init_method not in ['uniform']: init_method = 'He' # ------------------------------------------------------- # JL_dim: if JL_dim is None: if JL_error is None and use_JL: use_JL = False architecture = 'SWGN' print print 'architecture changed to SWGN, since JL_dim and JL_error were None' print elif JL_error is not None: JL_dim = int(math.ceil(8 * np.log(2 * batch_size) / (JL_error**2))) # this uses the constant given on the Wikipedia page of "Johnson-Lindenstrauss Lemma" else: JL_error = np.round(np.sqrt(8 * np.log(2 * batch_size) / JL_dim), decimals=4) if use_JL and JL_dim >= picture_size: use_JL = False architecture = 'SWGN' JL_error = None JL_dim = None print print 'JL mapping is not used, since the target dimension was chosen bigger than the input dimension' print print 'JL_dim = {}'.format(JL_dim) print 'JL_error = {}'.format(JL_error) print # ------------------------------------------------------- # create unique folder name dir1 = 'JLSWGN/' directory = dir1+str(data)+'_'+str(input_dim)+'_'+str(batch_size)+'_'+str(n_features_first)+'_'+\ str(learning_rate)+'_'+str(n_features_reduction_factor)+'_'+\ str(architecture)+'_'+str(init_method)+'_'+str(BN)+'_'+str(JL_dim)+'_'+str(JL_error)+'_'+\ str(n_projections)+'/' samples_dir = directory + 'samples/' model_dir = directory + 'model/' # create directories if they don't exist if not os.path.isdir(dir1): call(['mkdir', dir1]) if not os.path.isdir(directory): load_saved = False print 'make new directory:', directory print call(['mkdir', directory]) call(['mkdir', samples_dir]) call(['mkdir', model_dir]) # if directories already exist, but model wasn't saved so far, set load_saved to False if 'training_progress.csv' not in os.listdir(directory): load_saved = False # ------------------------------------------------------- # initialize a TF session config = tf.ConfigProto() if N_CPUS_TF is None: number_cpus_tf = settings.number_cpus else: number_cpus_tf = N_CPUS_TF config.intra_op_parallelism_threads = number_cpus_tf config.inter_op_parallelism_threads = number_cpus_tf session = tf.Session(config=config) # ------------------------------------------------------- # convenience function to build the model def build_model(): """ - function to build the model """ with tf.name_scope('placeholders'): real_data_int = tf.placeholder(tf.int32, shape=[None, picture_size]) x_true = 2 * ((tf.cast(real_data_int, tf.float32) / 255.) - .5) z = tf.placeholder(tf.float32, [None, input_dim]) if use_JL: JL = tf.placeholder(tf.float32, [picture_size, JL_dim]) P_non_normalized = tf.placeholder(tf.float32, [JL_dim, n_projections]) P_non_normalized_SWD = tf.placeholder( tf.float32, [picture_size, n_projections]) else: JL = None P_non_normalized = tf.placeholder( tf.float32, [picture_size, n_projections]) P_non_normalized_SWD = None x_generated = generator( z, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, min_features=64, BN=BN, power=power, init_method=init_method, n_features_image=n_features_image) # define loss (big part taken from SWG) with tf.name_scope('loss'): # apply the Johnson-Lindenstrauss map, if wanted, to the flattened array if use_JL: JL_true = tf.matmul(x_true, JL) JL_gen = tf.matmul(x_generated, JL) else: JL_true = x_true JL_gen = x_generated # next project the samples (images). After being transposed, we have tensors # of the format: [[projected_image1_proj1, projected_image2_proj1, ...], # [projected_image1_proj2, projected_image2_proj2, ...],...] # Each row has the projections along one direction. This makes it easier for the sorting that follows. # first normalize the random normal vectors to lie in the sphere P = tf.nn.l2_normalize(P_non_normalized, axis=0) projected_true = tf.transpose(tf.matmul(JL_true, P)) projected_fake = tf.transpose(tf.matmul(JL_gen, P)) sorted_true, true_indices = tf.nn.top_k(input=projected_true, k=batch_size) sorted_fake, fake_indices = tf.nn.top_k(input=projected_fake, k=batch_size) # For faster gradient computation, we do not use sorted_fake to compute # loss. Instead we re-order the sorted_true so that the samples from the # true distribution go to the correct sample from the fake distribution. # It is less expensive (memory-wise) to rearrange arrays in TF. # Flatten the sorted_true from dim [n_projections, batch_size]. flat_true = tf.reshape(sorted_true, [-1]) # Modify the indices to reflect this transition to an array. # new index = row + index rows = np.asarray([ batch_size * np.floor(i * 1.0 / batch_size) for i in range(n_projections * batch_size) ]) rows = rows.astype(np.int32) flat_idx = tf.reshape(fake_indices, [-1, 1]) + np.reshape( rows, [-1, 1]) # The scatter operation takes care of reshaping to the rearranged matrix shape = tf.constant([batch_size * n_projections]) rearranged_true = tf.reshape( tf.scatter_nd(flat_idx, flat_true, shape), [n_projections, batch_size]) generator_loss = tf.reduce_mean( tf.square(projected_fake - rearranged_true)) # get for JLSWGN the sliced Wasserstein distance (SWD) (since SWD and JLSWD are not comparable) if use_JL: P_SWD = tf.nn.l2_normalize(P_non_normalized_SWD, axis=0) projected_true_SWD = tf.transpose(tf.matmul(x_true, P_SWD)) projected_fake_SWD = tf.transpose(tf.matmul( x_generated, P_SWD)) sorted_true_SWD, true_indices_SWD = tf.nn.top_k( input=projected_true_SWD, k=batch_size) sorted_fake_SWD, fake_indices_SWD = tf.nn.top_k( input=projected_fake_SWD, k=batch_size) flat_true_SWD = tf.reshape(sorted_true_SWD, [-1]) flat_idx_SWD = tf.reshape(fake_indices_SWD, [-1, 1]) + np.reshape(rows, [-1, 1]) rearranged_true_SWD = tf.reshape( tf.scatter_nd(flat_idx_SWD, flat_true_SWD, shape), [n_projections, batch_size]) SWD = tf.reduce_mean( tf.square(projected_fake_SWD - rearranged_true_SWD)) else: SWD = generator_loss with tf.name_scope('optimizer'): generator_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.5) g_train = g_optimizer.minimize(generator_loss, var_list=generator_vars) # initialize variables using init_method session.run(tf.global_variables_initializer()) return real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train # ------------------------------------------------------- # build the model real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train = build_model( ) # ------------------------------------------------------- # For creating and saving samples (taken from IWGAN) fixed_noise = np.random.normal(size=(fixed_noise_size, input_dim)).astype('float32') def generate_image(frame): samples = session.run(x_generated, feed_dict={z: fixed_noise}) samples = ((samples + 1.) * (255. / 2)).astype( 'uint8') # transform linearly from [-1,1] to [0,255] samples = samples.reshape(picture_dim) save_images.save_images(samples, samples_dir + 'iteration_{}.png'.format(frame)) # ------------------------------------------------------- # For calculating inception score softmax = None def get_inception_score(n=N_IS, softmax=softmax): all_samples = [] for i in xrange(n / 100): z_input = np.random.randn(100, input_dim) all_samples.append(session.run(x_generated, feed_dict={z: z_input})) all_samples = np.concatenate(all_samples, axis=0) all_samples = ((all_samples + 1.) * (255. / 2)).astype('int32') all_samples = all_samples.reshape((-1, 32, 32, 3)) return inception_score.get_inception_score(list(all_samples), softmax=softmax) # ------------------------------------------------------- # get the dataset as infinite generator if data == 'cifar10': data_dir = settings.filepath_cifar10 train_gen, dev_gen = cifar10.load(batch_size, data_dir=data_dir) n_dev_samples = 10000 elif data == 'celebA32': data_dir = settings.filepath_celebA32 train_gen, dev_gen = celeba.load(batch_size, data_dir=data_dir, black_white=False) n_dev_samples = 10000 elif data == 'mnist': filename = '../data/MNIST/mnist32_zoom_1' train_gen, n_samples_train, dev_gen, n_samples_test = preprocessing_mnist.load( filename, batch_size, npy=True) n_dev_samples = 10000 elif data == 'celebA32_bw': data_dir = settings.filepath_celebA32 train_gen, dev_gen = celeba.load(batch_size, data_dir=data_dir, black_white=True) n_dev_samples = 10000 elif data == 'celebA64': if settings.euler: data_dir = settings.filepath_celebA64_euler else: data_dir = settings.filepath_celebA64 train_gen, dev_gen = celeba.load(batch_size, data_dir=data_dir, black_white=False) n_dev_samples = 10000 def inf_train_gen(): while True: for images, _ in train_gen(): yield images gen = inf_train_gen() # ------------------------------------------------------- # for saving the model create a saver saver = tf.train.Saver(max_to_keep=1) epochs_trained = 0 if data == 'cifar10' and COMPUTE_IS: tp_columns = [ 'iteration', 'time_for_iterations', 'SWD_approximation', 'time_for_SWD', 'IS', 'time_for_IS' ] else: tp_columns = [ 'iteration', 'time_for_iterations', 'SWD_approximation', 'time_for_SWD' ] training_progress = pd.DataFrame(data=None, index=None, columns=tp_columns) # restore the model: if load_saved: saver.restore(sess=session, save_path=model_dir + 'saved_model') epochs_trained = int(np.loadtxt(fname=model_dir + 'epochs.csv')) tp_app = pd.read_csv(filepath_or_buffer=directory + 'training_progress.csv', index_col=0, header=0) training_progress = pd.concat([training_progress, tp_app], axis=0, ignore_index=True) print 'loaded training progress, and the model, which was already trained for {} epochs'.format( epochs_trained) print training_progress print # if the network is already trained completely, set send to false if epochs_trained == epochs: send = False # ------------------------------------------------------- # print and get model summary n_params_gen = model_summary(scope='generator')[0] print # ------------------------------------------------------- # FK: print model config to file model_config = [[ 'input_dim', 'batch_size', 'n_features_first', 'learning_rate', 'fixed_noise_size', 'n_features_reduction_factor', 'architecture', 'init_method', 'BN', 'JL_dim', 'JL_error', 'n_projections', 'data_set', 'n_trainable_params_gen' ], [ input_dim, batch_size, n_features_first, learning_rate, fixed_noise_size, n_features_reduction_factor, architecture, init_method, BN, JL_dim, JL_error, n_projections, data, n_params_gen ]] model_config = np.transpose(model_config) model_config = pd.DataFrame(data=model_config) model_config.to_csv(path_or_buf=directory + 'model_config.csv') print 'saved model configuration' print # ------------------------------------------------------- # training loop print 'train model with config:' print model_config print t = time.time() # get start time for i in xrange(epochs - epochs_trained): # print the current epoch print('iteration={}/{}'.format(i + epochs_trained + 1, epochs)) images = gen.next() z_train = np.random.randn(batch_size, input_dim) if use_JL: JL_train = np.random.randn(picture_size, JL_dim) P_train = np.random.randn(JL_dim, n_projections) session.run(g_train, feed_dict={ real_data_int: images, z: z_train, JL: JL_train, P_non_normalized: P_train }) else: P_train = np.random.randn(picture_size, n_projections) session.run(g_train, feed_dict={ real_data_int: images, z: z_train, P_non_normalized: P_train }) if not settings.euler: mem = memory() print 'memory use (GB): {}'.format(mem) # all STEP_SIZE_LOSS_COMPUTATION steps compute the losses and elapsed times, and generate images, and save model if (i + epochs_trained) % STEP_SIZE_LOSS_COMPUTATION == ( STEP_SIZE_LOSS_COMPUTATION - 1): # get time for last 100 epochs elapsed_time = time.time() - t # generate sample images from fixed noise generate_image(i + epochs_trained + 1) print 'generated images' # compute and save losses on dev set, starting after ??? iterations if i + epochs_trained + 1 >= START_COMPUTING_LOSS: t = time.time() dev_d_loss = [] print 'compute loss' j = 0 for images_dev, _ in dev_gen(): if not settings.euler: # progress bar sys.stdout.write( '\r>> Compute SWD %.1f%%' % (float(j) / float(n_dev_samples / batch_size) * 100.0)) sys.stdout.flush() j += 1 z_train_dev = np.random.randn(batch_size, input_dim) P_train_dev = np.random.randn(picture_size, n_projections) if use_JL: _dev_d_loss = session.run(SWD, feed_dict={ real_data_int: images_dev, z: z_train_dev, P_non_normalized_SWD: P_train_dev }) else: _dev_d_loss = session.run(SWD, feed_dict={ real_data_int: images_dev, z: z_train_dev, P_non_normalized: P_train_dev }) dev_d_loss.append(_dev_d_loss) dev_loss = np.mean(dev_d_loss) t_loss = time.time() - t # compute inception score (IS) if data == 'cifar10' and COMPUTE_IS: if (i + epochs_trained) % IS_FREQ == (IS_FREQ - 1): print 'compute inception score' t = time.time() IS_mean, IS_std, softmax = get_inception_score( N_IS, softmax=softmax) IS = (IS_mean, IS_std) t_IS = time.time() - t else: IS = None t_IS = None else: dev_loss = None t_loss = None IS = None t_IS = None if data == 'cifar10' and COMPUTE_IS: tp_app = pd.DataFrame(data=[[ i + epochs_trained + 1, elapsed_time, dev_loss, t_loss, IS, t_IS ]], index=None, columns=tp_columns) else: tp_app = pd.DataFrame(data=[[ i + epochs_trained + 1, elapsed_time, dev_loss, t_loss ]], index=None, columns=tp_columns) training_progress = pd.concat([training_progress, tp_app], axis=0, ignore_index=True) # save model saver.save(sess=session, save_path=model_dir + 'saved_model') # save number of epochs trained np.savetxt(fname=model_dir + 'epochs.csv', X=[i + epochs_trained + 1]) print 'saved model after training epoch {}'.format(i + epochs_trained + 1) # save training progress training_progress.to_csv(path_or_buf=directory + 'training_progress.csv') print 'saved training progress' print # fix new start time t = time.time() # ------------------------------------------------------- # after training close the session session.close() tf.reset_default_graph() # ------------------------------------------------------- # when training is done send email if send: subject = 'JL-SWG ({}) training finished'.format(data) body = 'to download the results of this model use (in the terminal):\n\n' body += 'scp -r [email protected]:/cluster/home/fkrach/MasterThesis/MTCode1/' + directory + ' .' files = [ directory + 'model_config.csv', directory + 'training_progress.csv', samples_dir + 'iteration_{}.png'.format(epochs) ] send_email.send_email(subject=subject, body=body, file_names=files) return directory
# truck, class-id: 9 # Necessary Imports and obtaining the training and testing data. import cifar10 import numpy as np from bwsi_grader.cogworks.nearest_neighbors import grade_distances from bwsi_grader.cogworks.nearest_neighbors import grade_predict from bwsi_grader.cogworks.nearest_neighbors import grade_make_folds # "cifar10" must be a subfolder in the current directory for this part to work. if not cifar10.get_path().is_file(): cifar10.download() else: print("cifar10 is already downloaded at:\n{}".format(cifar10.get_path())) # Loading in the training data and converting them into floats. x_train, y_train, x_test, y_test = (i.astype("float32") for i in cifar10.load()) x_train = x_train.transpose([0, 2, 3, 1]) x_test = x_test.transpose([0, 2, 3, 1]) # Limiting the data to make the program run faster. x_train, y_train = x_train[:5000], y_train[:5000] x_test, y_test = x_test[:500], y_test[:500] print("\n") print('Training data shape: ', x_train.shape) print('Training labels shape: ', y_train.shape) print('Test data shape: ', x_test.shape) print('Test labels shape: ', y_test.shape) # Flattening the data values print("\n") x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1))