def build_nn_objective(num_hidden=5, num_data=1000): """Builds a neural net, creates weights and data from that net, then defines the objective as the training error.""" # Load and process MNIST data (borrowing from Kayak) partial_flatten = lambda x: np.reshape(x, (x.shape[0], np.prod(x.shape[1:]))) one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int) edge_pixels_removed = 4 # on each edge. subsample_pixels = 5 # in both directions. images, labels = mnist() images = images[:, edge_pixels_removed:-edge_pixels_removed, edge_pixels_removed:-edge_pixels_removed] # Remove edges images = images[:num_data, ::subsample_pixels, :: subsample_pixels] # Subsample data images = partial_flatten( images) / 255.0 # After this, train_images is N by (x * y) labels = one_hot(labels, 10)[:num_data, :] #TODO: Randomize order? # Build the network. layer_sizes = [images.shape[1]] + num_hidden + [10] L2_reg = 0 parser, loss, = make_nn_funs(layer_sizes, L2_reg) # Build functions to interrogate the objective at a particular set of parameters. def objective(x, idxs=slice(0, num_data)): return loss(x, X=images, T=labels, idxs=idxs) obj_grad = grad(objective) obj_hvp = sliced_hvp(obj_grad) weights_subsets = {k: v[0] for k, v in parser.idxs_and_shapes.iteritems()} return parser.N, objective, obj_grad, obj_hvp, weights_subsets
def build_logistic_objective(): """Builds a neural net, creates weights and data from that net, then defines the objective as the training error.""" # Load and process MNIST data (borrowing from Kayak) partial_flatten = lambda x: np.reshape(x, (x.shape[0], np.prod(x.shape[1:]))) one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int) subsample_pixels = 5 subsample_data = 10 images, labels = mnist() images = images[::subsample_data, ::subsample_pixels, :: subsample_pixels] # Subsample data images = partial_flatten( images) / 255.0 # After this, train_images is N by (x * y) labels = one_hot(labels, 10)[::subsample_data, :] # Build the network. L2_reg = 0 parser, loss, = make_logistic_funs(images.shape[1], 10, L2_reg) # Build functions to interrogate the objective at a particular set of parameters. objective = partial(loss, X=images, T=labels) obj_grad = grad(objective) obj_hvp = sliced_hvp(obj_grad) weights_subsets = {k: v[0] for k, v in parser.idxs_and_shapes.iteritems()} return parser.N, objective, obj_grad, obj_hvp, weights_subsets
def get_mnist(self): X, Y = mnist() binary_y = np.logical_or(Y == 0, Y == 1) X = X[binary_y] Y = Y[binary_y] Y[Y == 0] = -1 return X, Y
axes = plt.gca() plt.colorbar() plt.savefig(name) if __name__ == '__main__': pre_encoder = keras.models.load_model(name + '/pre.h5') encoder = keras.models.load_model(name + '/encoder.h5') encoder_style = keras.models.load_model(name + '/encoder0.h5') discriminator_style = keras.models.load_model(name + '/discriminator0.h5') decoder = keras.models.load_model(name + '/decoder.h5') autoencoder = keras.models.load_model(name + '/model.h5') from util import mnist x_train, y_train, x_test, y_test = mnist() style_test = encoder_style.predict(x_test) plot_latent(style_test, y_test, "style.png") style_train = encoder_style.predict(x_train) plot_latent(style_train, y_train, "style-train.png") encoder_digit = keras.models.load_model(name + '/encoder1.h5') discriminator_digit = keras.models.load_model(name + '/discriminator1.h5') digit_test = encoder_digit.predict(x_test) print digit_test[:10] result_test = discriminator_digit.predict(digit_test)
def aae_train(name, epoch=128, computational_effort_factor=8): from keras.callbacks import TensorBoard, CSVLogger, ReduceLROnPlateau, EarlyStopping from keras.utils.generic_utils import Progbar from util import mnist, plot_examples batch_size = int(epoch * computational_effort_factor) print("epoch: {0}, batch: {1}".format(epoch, batch_size)) x_train, y_train, x_test, y_test = mnist() from plot_all import plot_latent plot_latent(noise.predict(x_train), np.zeros_like(y_train), "style-noise.png") x_train = x_train[:36000, :] # for removing residuals total = x_train.shape[0] real_train = np.ones([total, dimensions]) fake_train = np.zeros([total, dimensions]) r_loss, d_loss, g_loss = 0., 0., 0. try: for e in range(epoch): d = {'discriminator': 0, 'generator': 0} for i in range(total // batch_size): batch_pb = Progbar(total, width=25) def update(force=False): batch_pb.update( min((i + 1) * batch_size, total), [ ('r', r_loss), ('d', d_loss), ('g', g_loss), # ('d-g',(d_loss-g_loss)) ], force=force) x_batch = x_train[i * batch_size:(i + 1) * batch_size] real_batch = real_train[i * batch_size:(i + 1) * batch_size] fake_batch = fake_train[i * batch_size:(i + 1) * batch_size] d_batch = np.concatenate((fake_batch, real_batch), 1) g_batch = np.concatenate((real_batch, real_batch), 1) def train_autoencoder(): set_trainable(encoder, True) set_trainable(decoder, True) map(lambda d: set_trainable(d, False), discriminators) return aae_r.train_on_batch(x_batch, x_batch) def test(): return \ aae_r.test_on_batch(x_batch, x_batch), \ aae_d.test_on_batch(x_batch, d_batch), \ aae_g.test_on_batch(x_batch, g_batch) def train_discriminator(): d['discriminator'] += 1 set_trainable(encoder, False) set_trainable(decoder, False) map(lambda d: set_trainable(d, True), discriminators) return aae_d.train_on_batch(x_batch, d_batch) def train_generator(): d['generator'] += 1 set_trainable(encoder, True) set_trainable(decoder, False) map(lambda d: set_trainable(d, False), discriminators) return aae_g.train_on_batch(x_batch, g_batch) # r_loss = train_autoencoder() d_loss = train_discriminator() g_loss = train_generator() r_loss, d_loss, g_loss = test() update() print "Epoch {}/{}: {}".format(e, epoch, [('r', r_loss), ('d', d_loss), ('g', g_loss), ('td', d['discriminator']), ('tg', d['generator'])]) if (e % 120) == 0: from plot_all import plot_latent, plot_latent_nolimit r_loss, d_loss, g_loss = test() z_test = encoders[0].predict(x_test) plot_latent(z_test, y_test, "style-test-{}.png".format(e)) plot_latent_nolimit(z_test, y_test, "style2-test-{}.png".format(e)) except KeyboardInterrupt: print("learning stopped")
def main(): ap = argparse.ArgumentParser("SZO") ap.add_argument("--data", choices=["mnist", "cifar10"], default="mnist", help="dataset") #, "skewedmnist" ap.add_argument( "--opt", choices=["first", "flaxman", "dueling", "ghadimi", "agarwal"], help="optimizer type") ap.add_argument("--model", choices=["fc3", "cnn"], help="Model type") ap.add_argument("--depth", default=1, type=int, help="Depth of the cnn") ap.add_argument("--seed", default=12345, type=int, help="random seed") ap.add_argument("--num_epochs", default=5, type=int, help="number of epochs") ap.add_argument("--num_rounds", default=20, type=int, help="number of rounds") ap.add_argument("--lr", default=0.1, type=float, help="initial learning rate") ap.add_argument("--pr", default=0.2, type=float, help="pruning rate") ap.add_argument("--mu", default=0.1, type=float, help="exploration rate, smoothing parameter") ap.add_argument("--beta", default=0.0, type=float, help="momentum") ap.add_argument("--max_grad_norm", default=0.0, type=float, help="maximum gradient norm") ap.add_argument("--var", default=1.0, type=float, help="noise variance") ap.add_argument("--eval_interval", default=10000, type=int, help="evaluation interval") ap.add_argument("--batch_size", default=64, type=int, help="batch_size") ap.add_argument("--eval_batch_size", default=1000, type=int, help="batch size used in evaluation") ap.add_argument("--cv", default=True, action="store_true", help="whether to include control variates") # type=bool, ap.add_argument( "--init", choices=["reset", "random", "last"], #, 'rewind', 'best' help="initialization strategy in pruning: one of {reset, random, last}" ) #, rewind, best #ap.add_argument("--rewind_step", type=int, help="which epoch to return to after pruning") ap.add_argument( "--reward", choices=["nce", "acc", "expected_reward", "sampled_score"], help= "reward function: one of {nce, acc, expected_reward, sampled_score}") ap.add_argument("--prune_or_freeze", choices=["none", "prune", "freeze"], help="sparsification strategy: one of {prune or freeze}") ap.add_argument( "--masking_strategy", choices=["none", "L1", "heldout", "random"], help="masking strategy: one of {none, L1, heldout, random}") ap.add_argument( "--num_samples", type=int, help="number of samples to evaluate for gradient estimation") ap.add_argument("--device", choices=["cpu", "gpu"], default="cpu") ap.add_argument( '--affine', action="store_true", default=False, # type=bool, help="if specified, turn on affine transform in normalization layers") ap.add_argument('--norm', choices=["batch", "layer", "none"], default="batch", help="type of normalization to use between NN layeres") args = ap.parse_args() log_dir = f'runs-{args.seed}' if not os.path.exists(log_dir): os.mkdir(log_dir) #if not os.path.exists('logs/'+log_dir): # os.mkdir('logs/'+log_dir) # logging label = f'{args.opt}-{args.reward}-{args.prune_or_freeze}-{args.init}-{args.masking_strategy}-{args.batch_size}' logging.basicConfig( filename=os.path.join(log_dir, f'{label}-train.log'), filemode='a', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) logger.addHandler(TqdmLoggingHandler()) logger.info('Arguments:') for arg in vars(args): logger.info(f'\t{arg}: {getattr(args, arg)}') # data if args.data == 'mnist': trainset, testset, classes = mnist(data_path='data/MNIST_data/') elif args.data == 'cifar10': trainset, testset, classes = cifar10(data_path='data/CIFAR10_data/') trainloader, testloader, devloader = get_dataloader( trainset, testset, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, seed=args.seed) # model model = None model_kwargs = { 'seed': args.seed, 'class_names': classes, 'output_dim': len(classes), 'norm_affine': args.affine, 'norm': args.norm } if args.model == 'cnn': assert args.data == 'cifar10' model_kwargs['modules'] = args.depth model_kwargs['input_size'] = 32 model = ConvolutionalNN(**model_kwargs) elif args.model == 'fc3': if args.data == 'mnist': model_kwargs['input_dim'] = 28 * 28 elif args.data == 'cifar10': model_kwargs['input_dim'] = 32 * 32 * 3 model = FullyConnectedNN(**model_kwargs) else: raise ValueError("Unknown model type") # gpu device = None if args.device == 'gpu' and torch.cuda.is_available(): device = 'cuda:0' torch.set_default_tensor_type(torch.cuda.FloatTensor) else: device = 'cpu' model.to(device) logger.info(f"Device: {device}") if torch.cuda.is_available(): logger.info(f"\tn_gpu: {torch.cuda.device_count()}") # optimizer kwargs = {'prune_or_freeze': args.prune_or_freeze, 'init': args.init} if args.lr: kwargs['lr'] = args.lr if args.mu: kwargs['mu'] = args.mu if args.beta: kwargs['beta'] = args.beta if args.max_grad_norm: kwargs['max_grad_norm'] = args.max_grad_norm if args.var: kwargs['var'] = args.var if args.num_samples: kwargs['num_samples'] = args.num_samples #if args.init == 'rewind': # print(args.rewind_step) opt = None if args.opt == 'first': if args.reward in ['sampled_score']: kwargs['cv'] = args.cv # control variates opt = FirstOrderBanditOptimizer(model.parameters(), **kwargs) elif args.reward in ['nce', 'expected_reward']: opt = FirstOrderOptimizer(model.parameters(), **kwargs) else: raise ValueError elif args.opt == 'flaxman': opt = VanillaEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'dueling': opt = DuelingEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'ghadimi': opt = OneSideEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'agarwal': opt = TwoSideEvolutionOptimizer(model.parameters(), **kwargs) else: raise ValueError("Unknown optimizer type") #scheduler = lr_scheduler.ReduceLROnPlateau(opt, mode='max', patience=3, threshold=1e-2) scheduler = None # constant learning rate # trainer pruning_rate = 0.0 if args.prune_or_freeze == 'none' or args.masking_strategy == 'none' else args.pr metrics = ['acc', 'f1-score', 'precision', 'recall'] trainer = Trainer(model, opt, scheduler, args.num_epochs, args.num_rounds, label, seed=args.seed, init=args.init, pruning_rate=pruning_rate, reward=args.reward, metrics=metrics, log_dir=log_dir, eval_interval=args.eval_interval, masking_strategy=args.masking_strategy, device=device) trainer.train(trainloader, testloader, devloader) #del model #del opt #del scheduler #del trainer logging.shutdown()
print( f'final w: {self.w}, final b: {self.b}, epochs: {epoch +1 } / {epochs}' ) plt.plot(costs) plt.show() def predict(self, X): return np.sign(X.dot(self.w) + self.b) def score(self, X, Y): P = self.predict(X) return np.mean(P == Y) if __name__ == '__main__': X, y = mnist() idx = np.logical_or(y == 0, y == 1) X = X[idx] y = y[idx] y[y == 0] = -1 # because perceptron take target -1,1 so we need to change all the 0 to -1 Ntrain = len(y) // 2 X_train, y_train = X[:Ntrain], y[:Ntrain] X_test, y_test = X[Ntrain:], y[Ntrain:] classifier = Perceptron() t0 = datetime.now() classifier.fit(X_train, y_train) print(f"Training time is : {datetime.now()-t0}")