def __init__(self): self.n_visible = 12 self.n_hidden = 8 self.X = RNG(seed=1337).rand(16, self.n_visible) self.X_val = RNG(seed=42).rand(8, self.n_visible) self.rbm_config = dict(n_visible=self.n_visible, n_hidden=self.n_hidden, sample_v_states=True, sample_h_states=True, dropout=0.9, verbose=False, display_filters=False, random_seed=1337)
class SeedMixin(BaseMixin): def __init__(self, random_seed=None, *args, **kwargs): super(SeedMixin, self).__init__(*args, **kwargs) self.random_seed = random_seed self._rng = RNG(seed=self.random_seed) def make_random_seed(self): return self._rng.randint(2**31 - 1)
def make_augmentation(X_train, y_train, n_train, args): X_aug = None X_aug_path = os.path.join(args.data_path, 'X_aug.npy') y_train = y_train.tolist() * 10 RNG(seed=1337).shuffle(y_train) augment = True if os.path.isfile(X_aug_path): print "\nLoading augmented data ..." X_aug = np.load(X_aug_path) print "Checking augmented data ..." if len(X_aug) == 10 * n_train: augment = False if augment: print "\nAugmenting data ..." s = Stopwatch(verbose=True).start() X_aug = np.zeros((10 * n_train, 32, 32, 3), dtype=np.float32) X_train = im_unflatten(X_train) X_aug[:n_train] = X_train for i in xrange(n_train): for k, offset in enumerate(((1, 0), (-1, 0), (0, 1), (0, -1))): img = X_train[i].copy() X_aug[(k + 1) * n_train + i] = shift(img, offset=offset) for i in xrange(5 * n_train): X_aug[5 * n_train + i] = horizontal_mirror(X_aug[i].copy()) # shuffle once again RNG(seed=1337).shuffle(X_aug) # convert to 'uint8' type to save disk space X_aug *= 255. X_aug = X_aug.astype('uint8') # flatten to (10 * `n_train`, 3072) shape X_aug = im_flatten(X_aug) # save to disk np.save(X_aug_path, X_aug) s.elapsed() print "\n" return X_aug, y_train
def make_large_weights(small_rbms): W = np.zeros((300 * 26, 32, 32, 3), dtype=np.float32) W[...] = RNG(seed=1234).rand(*W.shape) * 5e-6 vb = np.zeros((32, 32, 3)) hb = np.zeros(300 * 26) for i in xrange(4): for j in xrange(4): rbm_id = 4 * i + j weights = small_rbms[rbm_id].get_tf_params(scope='weights') W_small = weights['W'] W_small = W_small.T # (300, 192) W_small = im_unflatten(W_small) # (300, 8, 8, 3) W[300 * rbm_id:300 * (rbm_id + 1), 8 * i:8 * (i + 1), 8 * j:8 * (j + 1), :] = W_small vb[8 * i:8 * (i + 1), 8 * j:8 * (j + 1), :] += im_unflatten(weights['vb']) hb[300 * rbm_id:300 * (rbm_id + 1)] = weights['hb'] for i in xrange(3): for j in xrange(3): rbm_id = 16 + 3 * i + j weights = small_rbms[rbm_id].get_tf_params(scope='weights') W_small = weights['W'] W_small = W_small.T W_small = im_unflatten(W_small) W[300 * rbm_id:300 * (rbm_id + 1), 4 + 8 * i:4 + 8 * (i + 1), 4 + 8 * j:4 + 8 * (j + 1), :] = W_small vb[4 + 8 * i:4 + 8 * (i + 1), 4 + 8 * j:4 + 8 * (j + 1), :] += im_unflatten(weights['vb']) hb[300 * rbm_id:300 * (rbm_id + 1)] = weights['hb'] weights = small_rbms[25].get_tf_params(scope='weights') W_small = weights['W'] W_small = W_small.T W_small = im_unflatten(W_small) vb_small = im_unflatten(weights['vb']) for i in xrange(8): for j in xrange(8): U = W_small[:, i, j, :] U = np.expand_dims(U, -1) U = np.expand_dims(U, -1) U = U.transpose(0, 2, 3, 1) W[-300:, 4 * i:4 * (i + 1), 4 * j:4 * (j + 1), :] = U / 16. vb[4 * i:4 * (i + 1), 4 * j:4 * (j + 1), :] += vb_small[i, j, :].reshape( (1, 1, 3)) / 16. hb[-300:] = weights['hb'] W = im_flatten(W) W = W.T vb /= 2. vb[4:-4, 4:-4, :] /= 1.5 vb = im_flatten(vb) return W, vb, hb
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") # data parser.add_argument('--n-train', type=int, default=49000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=1000, metavar='N', help='number of validation examples') parser.add_argument('--data-path', type=str, default='../data/', metavar='PATH', help='directory for storing augmented data etc.') # common for RBMs and DBM parser.add_argument('--n-gibbs-steps', type=int, default=(1, 1, 1), metavar='N', nargs='+', help='(initial) number of Gibbs steps for CD/PCD') parser.add_argument('--lr', type=float, default=(5e-4, 1e-4, 8e-5), metavar='LR', nargs='+', help='(initial) learning rates') parser.add_argument('--epochs', type=int, default=(120, 180, 1500), metavar='N', nargs='+', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+', help='input batch size for training, `--n-train` and `--n-val`' + \ 'must be divisible by this number (for DBM)') parser.add_argument('--l2', type=float, default=(0.01, 0.05, 1e-8), metavar='L2', nargs='+', help='L2 weight decay coefficients') parser.add_argument('--random-seed', type=int, default=(1337, 1111, 2222), metavar='N', nargs='+', help='random seeds for models training') # save dirpaths parser.add_argument('--grbm-dirpath', type=str, default='../models/grbm_cifar_naive/', metavar='DIRPATH', help='directory path to save Gaussian RBM') parser.add_argument('--mrbm-dirpath', type=str, default='../models/mrbm_cifar_naive/', metavar='DIRPATH', help='directory path to save Multinomial RBM') parser.add_argument('--dbm-dirpath', type=str, default='../models/dbm_cifar_naive/', metavar='DIRPATH', help='directory path to save DBM') # DBM related parser.add_argument('--n-particles', type=int, default=100, metavar='M', help='number of persistent Markov chains') parser.add_argument( '--max-mf-updates', type=int, default=50, metavar='N', help='maximum number of mean-field updates per weight update') parser.add_argument('--mf-tol', type=float, default=1e-11, metavar='TOL', help='mean-field tolerance') parser.add_argument('--max-norm', type=float, default=4., metavar='C', help='maximum norm constraint') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-4, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.1, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-dropout', type=float, default=0.64, metavar='P', help='probability of visible units being set to zero') parser.add_argument('--mlp-save-prefix', type=str, default='../data/grbm_naive_', metavar='PREFIX', help='prefix to save MLP predictions and targets') # parse and check params args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu for x, m in ( (args.n_gibbs_steps, 3), (args.lr, 3), (args.epochs, 3), (args.batch_size, 3), (args.l2, 3), (args.random_seed, 3), ): if len(x) == 1: x *= m # prepare data (load + scale + split) print "\nPreparing data ..." X, y = load_cifar10(mode='train', path=args.data_path) X = X.astype(np.float32) X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] X_val = X[-n_val:] y_train = y[:n_train] y_val = y[-n_val:] # remove 1000 least significant singular values X_train = make_smoothing(X_train, n_train, args) print X_train.shape # center and normalize training data X_s_mean = X_train.mean(axis=0) X_s_std = X_train.std(axis=0) mean_path = os.path.join(args.data_path, 'X_s_mean.npy') std_path = os.path.join(args.data_path, 'X_s_std.npy') if not os.path.isfile(mean_path): np.save(mean_path, X_s_mean) if not os.path.isfile(std_path): np.save(std_path, X_s_std) X_train -= X_s_mean X_train /= X_s_std X_val -= X_s_mean X_val /= X_s_std print "Mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format( X_train.mean(axis=0)[0], X_train.std(axis=0)[0]) print "Range: ({0:.3f}, {1:.3f})\n\n".format(X_train.min(), X_train.max()) # pre-train Gaussian RBM grbm = make_grbm((X_train, X_val), args) # extract features Q = p_{G-RBM}(h|v=X) print "\nExtracting features from G-RBM ...\n\n" Q_train, Q_val = None, None if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir( args.dbm_dirpath): Q_train_path = os.path.join(args.data_path, 'Q_train_cifar_naive.npy') Q_train = make_rbm_transform(grbm, X_train, Q_train_path) if not os.path.isdir(args.mrbm_dirpath): Q_val_path = os.path.join(args.data_path, 'Q_val_cifar_naive.npy') Q_val = make_rbm_transform(grbm, X_val, Q_val_path) # pre-train Multinomial RBM (M-RBM) mrbm = make_mrbm((Q_train, Q_val), args) # extract features G = p_{M-RBM}(h|v=Q) print "\nExtracting features from M-RBM ...\n\n" Q, G = None, None if not os.path.isdir(args.dbm_dirpath): Q = Q_train[:args.n_particles] G_path = os.path.join(args.data_path, 'G_train_cifar_naive.npy') G = make_rbm_transform(mrbm, Q, G_path) # jointly train DBM dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args) # load test data X_test, y_test = load_cifar10(mode='test', path=args.data_path) X_test /= 255. X_test -= X_s_mean X_test /= X_s_std # G-RBM discriminative fine-tuning: # initialize MLP with learned weights, # add FC layer and train using backprop print "\nG-RBM Discriminative fine-tuning ...\n\n" W, hb = None, None if not args.mlp_no_init: weights = grbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), args)
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general/data parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") parser.add_argument('--n-train', type=int, default=55000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=5000, metavar='N', help='number of validation examples') parser.add_argument('--data-path', type=str, default='../data/', metavar='PATH', help='directory for storing augmented data etc.') # RBM related parser.add_argument('--n-hidden', type=int, default=1024, metavar='N', help='number of hidden units') parser.add_argument( '--w-init', type=float, default=0.01, metavar='STD', help= 'initialize weights from zero-centered Gaussian with this standard deviation' ) parser.add_argument('--vb-init', action='store_false', help='initialize visible biases as logit of mean values of features' + \ ', otherwise (if enabled) zero init') parser.add_argument('--hb-init', type=float, default=0., metavar='HB', help='initial hidden bias') parser.add_argument( '--n-gibbs-steps', type=int, default=1, metavar='N', nargs='+', help= 'number of Gibbs updates per weights update or sequence of such (per epoch)' ) parser.add_argument('--lr', type=float, default=0.05, metavar='LR', nargs='+', help='learning rate or sequence of such (per epoch)') parser.add_argument('--epochs', type=int, default=120, metavar='N', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=10, metavar='B', help='input batch size for training') parser.add_argument('--l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument( '--sample-v-states', action='store_true', help='sample visible states, otherwise use probabilities w/o sampling') parser.add_argument('--dropout', type=float, metavar='P', help='probability of visible units being on') parser.add_argument('--sparsity-target', type=float, default=0.1, metavar='T', help='desired probability of hidden activation') parser.add_argument('--sparsity-cost', type=float, default=1e-5, metavar='C', help='controls the amount of sparsity penalty') parser.add_argument('--sparsity-damping', type=float, default=0.9, metavar='D', help='decay rate for hidden activations probs') parser.add_argument('--random-seed', type=int, default=1337, metavar='N', help="random seed for model training") parser.add_argument('--dtype', type=str, default='float32', metavar='T', help="datatype precision to use") parser.add_argument('--model-dirpath', type=str, default='../models/rbm_mnist/', metavar='DIRPATH', help='directory path to save the model') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.1, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-save-prefix', type=str, default='../data/rbm_', metavar='PREFIX', help='prefix to save MLP predictions and targets') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if len(args.mlp_lrm) == 1: args.mlp_lrm *= 2 # prepare data (load + scale + split) print("\nPreparing data ...\n\n") X, y = load_mnist(mode='train', path=args.data_path) X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] y_train = y[:n_train] X_val = X[-n_val:] y_val = y[-n_val:] # train and save the RBM model rbm = make_rbm(X_train, X_val, args) # load test data X_test, y_test = load_mnist(mode='test', path=args.data_path) X_test /= 255. # discriminative fine-tuning: initialize MLP with # learned weights, add FC layer and train using backprop print("\nDiscriminative fine-tuning ...\n\n") W, hb = None, None if not args.mlp_no_init: weights = rbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), args)
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") # data parser.add_argument('--n-train', type=int, default=49000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=1000, metavar='N', help='number of validation examples') parser.add_argument('--data-path', type=str, default='../data/', metavar='PATH', help='directory for storing augmented data etc.') parser.add_argument('--no-aug', action='store_true', help="if enabled, don't augment data") # small RBMs related parser.add_argument('--small-lr', type=float, default=1e-3, metavar='LR', nargs='+', help='learning rate or sequence of such (per epoch)') parser.add_argument('--small-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument('--small-batch-size', type=int, default=48, metavar='B', help='input batch size for training') parser.add_argument('--small-l2', type=float, default=1e-3, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--small-sparsity-target', type=float, default=0.1, metavar='T', help='desired probability of hidden activation') parser.add_argument('--small-sparsity-cost', type=float, default=1e-3, metavar='C', help='controls the amount of sparsity penalty') parser.add_argument('--small-random-seed', type=int, default=9000, metavar='N', help="random seeds for models training") parser.add_argument( '--small-dirpath-prefix', type=str, default='../models/rbm_cifar_small_', metavar='PREFIX', help='directory path prefix to save RBMs trained on patches') # M-RBM related parser.add_argument( '--increase-n-gibbs-steps-every', type=int, default=16, metavar='I', help= 'increase number of Gibbs steps every specified number of epochs for M-RBM' ) # common for RBMs and DBM parser.add_argument('--n-gibbs-steps', type=int, default=(1, 1, 1), metavar='N', nargs='+', help='(initial) number of Gibbs steps for CD/PCD') parser.add_argument('--lr', type=float, default=(5e-4, 5e-5, 4e-5), metavar='LR', nargs='+', help='(initial) learning rates') parser.add_argument('--epochs', type=int, default=(64, 33, 100), metavar='N', nargs='+', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=(100, 100, 100), metavar='B', nargs='+', help='input batch size for training, `--n-train` and `--n-val`' + \ 'must be divisible by this number (for DBM)') parser.add_argument('--l2', type=float, default=(1e-3, 0.005, 0.), metavar='L2', nargs='+', help='L2 weight decay coefficients') parser.add_argument('--random-seed', type=int, default=(1111, 2222, 3333), metavar='N', nargs='+', help='random seeds for models training') # save dirpaths parser.add_argument('--grbm-dirpath', type=str, default='../models/grbm_cifar/', metavar='DIRPATH', help='directory path to save Gaussian RBM') parser.add_argument('--mrbm-dirpath', type=str, default='../models/mrbm_cifar/', metavar='DIRPATH', help='directory path to save Multinomial RBM') parser.add_argument('--dbm-dirpath', type=str, default='../models/dbm_cifar/', metavar='DIRPATH', help='directory path to save DBM') # DBM related parser.add_argument('--n-particles', type=int, default=100, metavar='M', help='number of persistent Markov chains') parser.add_argument( '--max-mf-updates', type=int, default=50, metavar='N', help='maximum number of mean-field updates per weight update') parser.add_argument('--mf-tol', type=float, default=1e-11, metavar='TOL', help='mean-field tolerance') parser.add_argument('--max-norm', type=float, default=4., metavar='C', help='maximum norm constraint') parser.add_argument('--sparsity-target', type=float, default=(0.2, 0.2), metavar='T', nargs='+', help='desired probability of hidden activation') parser.add_argument('--sparsity-cost', type=float, default=(1e-4, 1e-3), metavar='C', nargs='+', help='controls the amount of sparsity penalty') parser.add_argument('--sparsity-damping', type=float, default=0.9, metavar='D', help='decay rate for hidden activations probs') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-4, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.01, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-dropout', type=float, default=0.7, metavar='P', help='probability of visible units being set to zero') parser.add_argument('--mlp-save-prefix', type=str, default='../data/grbm_', metavar='PREFIX', help='prefix to save MLP predictions and targets') # parse and check params args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu for x, m in ( (args.n_gibbs_steps, 3), (args.lr, 3), (args.epochs, 3), (args.batch_size, 3), (args.l2, 3), (args.random_seed, 3), ): if len(x) == 1: x *= m # prepare data (load + scale + split) print("\nPreparing data ...") X, y = load_cifar10(mode='train', path=args.data_path) X = X.astype(np.float32) X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] X_val = X[-n_val:] y_train = y[:n_train] y_val = y[-n_val:] if not args.no_aug: # augment data X_aug, y_train = make_augmentation(X_train, y_train, n_train, args) # convert + scale augmented data again X_train = X_aug.astype(np.float32) X_train /= 255. print("Augmented shape: {0}".format(X_train.shape)) print("Augmented range: {0}".format((X_train.min(), X_train.max()))) # center and normalize training data X_mean = X_train.mean(axis=0) X_std = X_train.std(axis=0) if not args.no_aug: mean_path = os.path.join(args.data_path, 'X_aug_mean.npy') std_path = os.path.join(args.data_path, 'X_aug_std.npy') if not os.path.isfile(mean_path): np.save(mean_path, X_mean) if not os.path.isfile(std_path): np.save(std_path, X_std) X_train -= X_mean X_train /= X_std X_val -= X_mean X_val /= X_std print("Augmented mean: ({0:.3f}, ...); std: ({1:.3f}, ...)".format( X_train.mean(axis=0)[0], X_train.std(axis=0)[0])) print("Augmented range: ({0:.3f}, {1:.3f})\n\n".format( X_train.min(), X_train.max())) # train 26 small Gaussian RBMs on patches small_rbms = None if not os.path.isdir(args.grbm_dirpath): small_rbms = make_small_rbms((X_train, X_val), args) # assemble large weight matrix and biases # and pre-train large Gaussian RBM (G-RBM) grbm = make_grbm((X_train, X_val), small_rbms, args) # extract features Q = p_{G-RBM}(h|v=X) print("\nExtracting features from G-RBM ...\n\n") Q_train, Q_val = None, None if not os.path.isdir(args.mrbm_dirpath) or not os.path.isdir( args.dbm_dirpath): Q_train_path = os.path.join(args.data_path, 'Q_train_cifar.npy') Q_train = make_rbm_transform(grbm, X_train, Q_train_path, np_dtype=np.float16) if not os.path.isdir(args.mrbm_dirpath): Q_val_path = os.path.join(args.data_path, 'Q_val_cifar.npy') Q_val = make_rbm_transform(grbm, X_val, Q_val_path) # pre-train Multinomial RBM (M-RBM) mrbm = make_mrbm((Q_train, Q_val), args) # extract features G = p_{M-RBM}(h|v=Q) print("\nExtracting features from M-RBM ...\n\n") Q, G = None, None if not os.path.isdir(args.dbm_dirpath): Q = Q_train[:args.n_particles] G_path = os.path.join(args.data_path, 'G_train_cifar.npy') G = make_rbm_transform(mrbm, Q, G_path) # jointly train DBM dbm = make_dbm((X_train, X_val), (grbm, mrbm), (Q, G), args) # load test data X_test, y_test = load_cifar10(mode='test', path=args.data_path) X_test /= 255. X_test -= X_mean X_test /= X_std # G-RBM discriminative fine-tuning: # initialize MLP with learned weights, # add FC layer and train using backprop print("\nG-RBM Discriminative fine-tuning ...\n\n") W, hb = None, None if not args.mlp_no_init: weights = grbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), args)
def __init__(self, random_seed=None, *args, **kwargs): super(SeedMixin, self).__init__(*args, **kwargs) self.random_seed = random_seed self._rng = RNG(seed=self.random_seed)
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general/data parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") parser.add_argument('--n-train', type=int, default=59000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=1000, metavar='N', help='number of validation examples') # RBM #2 related parser.add_argument( '--increase-n-gibbs-steps-every', type=int, default=20, metavar='I', help= 'increase number of Gibbs steps every specified number of epochs for RBM #2' ) # common for RBMs and DBM parser.add_argument('--n-hiddens', type=int, default=(512, 1024), metavar='N', nargs='+', help='numbers of hidden units') parser.add_argument('--n-gibbs-steps', type=int, default=(1, 1, 1), metavar='N', nargs='+', help='(initial) number of Gibbs steps for CD/PCD') parser.add_argument('--lr', type=float, default=(0.05, 0.01, 2e-3), metavar='LR', nargs='+', help='(initial) learning rates') parser.add_argument('--epochs', type=int, default=(64, 120, 500), metavar='N', nargs='+', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=(48, 48, 100), metavar='B', nargs='+', help='input batch size for training, `--n-train` and `--n-val`' + \ 'must be divisible by this number (for DBM)') parser.add_argument('--l2', type=float, default=(1e-3, 2e-4, 1e-7), metavar='L2', nargs='+', help='L2 weight decay coefficients') parser.add_argument('--random-seed', type=int, default=(1337, 1111, 2222), metavar='N', nargs='+', help='random seeds for models training') # save dirpaths parser.add_argument('--rbm1-dirpath', type=str, default='../models/dbm_mnist_rbm1/', metavar='DIRPATH', help='directory path to save RBM #1') parser.add_argument('--rbm2-dirpath', type=str, default='../models/dbm_mnist_rbm2/', metavar='DIRPATH', help='directory path to save RBM #2') parser.add_argument('--dbm-dirpath', type=str, default='../models/dbm_mnist/', metavar='DIRPATH', help='directory path to save DBM') # DBM related parser.add_argument('--n-particles', type=int, default=100, metavar='M', help='number of persistent Markov chains') parser.add_argument( '--max-mf-updates', type=int, default=50, metavar='N', help='maximum number of mean-field updates per weight update') parser.add_argument('--mf-tol', type=float, default=1e-7, metavar='TOL', help='mean-field tolerance') parser.add_argument('--max-norm', type=float, default=6., metavar='C', help='maximum norm constraint') parser.add_argument('--sparsity-target', type=float, default=(0.2, 0.1), metavar='T', nargs='+', help='desired probability of hidden activation') parser.add_argument('--sparsity-cost', type=float, default=(1e-4, 5e-5), metavar='C', nargs='+', help='controls the amount of sparsity penalty') parser.add_argument('--sparsity-damping', type=float, default=0.9, metavar='D', help='decay rate for hidden activations probs') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.01, 0.1, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-save-prefix', type=str, default='../data/dbm_', metavar='PREFIX', help='prefix to save MLP predictions and targets') # parse and check params args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu for x, m in ( (args.n_gibbs_steps, 3), (args.lr, 3), (args.epochs, 3), (args.batch_size, 3), (args.l2, 3), (args.random_seed, 3), (args.sparsity_target, 2), (args.sparsity_cost, 2), (args.mlp_lrm, 3), ): if len(x) == 1: x *= m # prepare data (load + scale + split) print "\nPreparing data ...\n\n" X, y = load_mnist(mode='train', path='../data/') X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] y_train = y[:n_train] X_val = X[-n_val:] y_val = y[-n_val:] X = np.concatenate((X_train, X_val)) # pre-train RBM #1 rbm1 = make_rbm1(X, args) # freeze RBM #1 and extract features Q = p_{RBM_1}(h|v=X) Q = None if not os.path.isdir(args.rbm2_dirpath) or not os.path.isdir( args.dbm_dirpath): print "\nExtracting features from RBM #1 ..." Q = rbm1.transform(X) print "\n" # pre-train RBM #2 rbm2 = make_rbm2(Q, args) # freeze RBM #2 and extract features G = p_{RBM_2}(h|v=Q) G = None if not os.path.isdir(args.dbm_dirpath): print "\nExtracting features from RBM #2 ..." G = rbm2.transform(Q) print "\n" # jointly train DBM dbm = make_dbm((X_train, X_val), (rbm1, rbm2), (Q, G), args) # load test data X_test, y_test = load_mnist(mode='test', path='../data/') X_test /= 255. # discriminative fine-tuning: initialize MLP with # learned weights, add FC layer and train using backprop print "\nDiscriminative fine-tuning ...\n\n" W, hb = None, None W2, hb2 = None, None if not args.mlp_no_init: weights = dbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] W2 = weights['W_1'] hb2 = weights['hb_1'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), (W2, hb2), args)