import os # set GPU GPU = 0 os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU) import env from bm import DBM from bm.rbm import BernoulliRBM from bm.utils import (progress_bar, Stopwatch, im_plot, im_reshape, im_gif, tick_params, plot_confusion_matrix) from bm.utils.dataset import load_mnist X, y = load_mnist(mode='train', path='../data/') X /= 255. X_test, y_test = load_mnist(mode='test', path='../data/') X_test /= 255. print(X.shape, y.shape, X_test.shape, y_test.shape) fig = plt.figure(figsize=(10, 10)) im_plot(X[:100], shape=(28, 28), title='Training examples', imshow_params={'cmap': plt.cm.gray}) plt.savefig('mnist.png', dpi=196, bbox_inches='tight') rbm1 = BernoulliRBM.load_model('../models/dbm_mnist_rbm1/') rbm1_W = rbm1.get_tf_params(scope='weights')['W']
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general/data parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") parser.add_argument('--n-train', type=int, default=55000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=5000, metavar='N', help='number of validation examples') parser.add_argument('--data-path', type=str, default='../data/', metavar='PATH', help='directory for storing augmented data etc.') # RBM related parser.add_argument('--n-hidden', type=int, default=1024, metavar='N', help='number of hidden units') parser.add_argument( '--w-init', type=float, default=0.01, metavar='STD', help= 'initialize weights from zero-centered Gaussian with this standard deviation' ) parser.add_argument('--vb-init', action='store_false', help='initialize visible biases as logit of mean values of features' + \ ', otherwise (if enabled) zero init') parser.add_argument('--hb-init', type=float, default=0., metavar='HB', help='initial hidden bias') parser.add_argument( '--n-gibbs-steps', type=int, default=1, metavar='N', nargs='+', help= 'number of Gibbs updates per weights update or sequence of such (per epoch)' ) parser.add_argument('--lr', type=float, default=0.05, metavar='LR', nargs='+', help='learning rate or sequence of such (per epoch)') parser.add_argument('--epochs', type=int, default=120, metavar='N', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=10, metavar='B', help='input batch size for training') parser.add_argument('--l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument( '--sample-v-states', action='store_true', help='sample visible states, otherwise use probabilities w/o sampling') parser.add_argument('--dropout', type=float, metavar='P', help='probability of visible units being on') parser.add_argument('--sparsity-target', type=float, default=0.1, metavar='T', help='desired probability of hidden activation') parser.add_argument('--sparsity-cost', type=float, default=1e-5, metavar='C', help='controls the amount of sparsity penalty') parser.add_argument('--sparsity-damping', type=float, default=0.9, metavar='D', help='decay rate for hidden activations probs') parser.add_argument('--random-seed', type=int, default=1337, metavar='N', help="random seed for model training") parser.add_argument('--dtype', type=str, default='float32', metavar='T', help="datatype precision to use") parser.add_argument('--model-dirpath', type=str, default='../models/rbm_mnist/', metavar='DIRPATH', help='directory path to save the model') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.1, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-save-prefix', type=str, default='../data/rbm_', metavar='PREFIX', help='prefix to save MLP predictions and targets') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if len(args.mlp_lrm) == 1: args.mlp_lrm *= 2 # prepare data (load + scale + split) print("\nPreparing data ...\n\n") X, y = load_mnist(mode='train', path=args.data_path) X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] y_train = y[:n_train] X_val = X[-n_val:] y_val = y[-n_val:] # train and save the RBM model rbm = make_rbm(X_train, X_val, args) # load test data X_test, y_test = load_mnist(mode='test', path=args.data_path) X_test /= 255. # discriminative fine-tuning: initialize MLP with # learned weights, add FC layer and train using backprop print("\nDiscriminative fine-tuning ...\n\n") W, hb = None, None if not args.mlp_no_init: weights = rbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), args)
def main(): # training settings parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) # general/data parser.add_argument( '--gpu', type=str, default='0', metavar='ID', help="ID of the GPU to train on (or '' to train on CPU)") parser.add_argument('--n-train', type=int, default=59000, metavar='N', help='number of training examples') parser.add_argument('--n-val', type=int, default=1000, metavar='N', help='number of validation examples') # RBM #2 related parser.add_argument( '--increase-n-gibbs-steps-every', type=int, default=20, metavar='I', help= 'increase number of Gibbs steps every specified number of epochs for RBM #2' ) # common for RBMs and DBM parser.add_argument('--n-hiddens', type=int, default=(512, 1024), metavar='N', nargs='+', help='numbers of hidden units') parser.add_argument('--n-gibbs-steps', type=int, default=(1, 1, 1), metavar='N', nargs='+', help='(initial) number of Gibbs steps for CD/PCD') parser.add_argument('--lr', type=float, default=(0.05, 0.01, 2e-3), metavar='LR', nargs='+', help='(initial) learning rates') parser.add_argument('--epochs', type=int, default=(64, 120, 500), metavar='N', nargs='+', help='number of epochs to train') parser.add_argument('--batch-size', type=int, default=(48, 48, 100), metavar='B', nargs='+', help='input batch size for training, `--n-train` and `--n-val`' + \ 'must be divisible by this number (for DBM)') parser.add_argument('--l2', type=float, default=(1e-3, 2e-4, 1e-7), metavar='L2', nargs='+', help='L2 weight decay coefficients') parser.add_argument('--random-seed', type=int, default=(1337, 1111, 2222), metavar='N', nargs='+', help='random seeds for models training') # save dirpaths parser.add_argument('--rbm1-dirpath', type=str, default='../models/dbm_mnist_rbm1/', metavar='DIRPATH', help='directory path to save RBM #1') parser.add_argument('--rbm2-dirpath', type=str, default='../models/dbm_mnist_rbm2/', metavar='DIRPATH', help='directory path to save RBM #2') parser.add_argument('--dbm-dirpath', type=str, default='../models/dbm_mnist/', metavar='DIRPATH', help='directory path to save DBM') # DBM related parser.add_argument('--n-particles', type=int, default=100, metavar='M', help='number of persistent Markov chains') parser.add_argument( '--max-mf-updates', type=int, default=50, metavar='N', help='maximum number of mean-field updates per weight update') parser.add_argument('--mf-tol', type=float, default=1e-7, metavar='TOL', help='mean-field tolerance') parser.add_argument('--max-norm', type=float, default=6., metavar='C', help='maximum norm constraint') parser.add_argument('--sparsity-target', type=float, default=(0.2, 0.1), metavar='T', nargs='+', help='desired probability of hidden activation') parser.add_argument('--sparsity-cost', type=float, default=(1e-4, 5e-5), metavar='C', nargs='+', help='controls the amount of sparsity penalty') parser.add_argument('--sparsity-damping', type=float, default=0.9, metavar='D', help='decay rate for hidden activations probs') # MLP related parser.add_argument('--mlp-no-init', action='store_true', help='if enabled, use random initialization') parser.add_argument('--mlp-l2', type=float, default=1e-5, metavar='L2', help='L2 weight decay coefficient') parser.add_argument('--mlp-lrm', type=float, default=(0.01, 0.1, 1.), metavar='LRM', nargs='+', help='learning rate multipliers of 1e-3') parser.add_argument('--mlp-epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument( '--mlp-val-metric', type=str, default='val_acc', metavar='S', help= "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}" ) parser.add_argument('--mlp-batch-size', type=int, default=128, metavar='N', help='input batch size for training') parser.add_argument('--mlp-save-prefix', type=str, default='../data/dbm_', metavar='PREFIX', help='prefix to save MLP predictions and targets') # parse and check params args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu for x, m in ( (args.n_gibbs_steps, 3), (args.lr, 3), (args.epochs, 3), (args.batch_size, 3), (args.l2, 3), (args.random_seed, 3), (args.sparsity_target, 2), (args.sparsity_cost, 2), (args.mlp_lrm, 3), ): if len(x) == 1: x *= m # prepare data (load + scale + split) print "\nPreparing data ...\n\n" X, y = load_mnist(mode='train', path='../data/') X /= 255. RNG(seed=42).shuffle(X) RNG(seed=42).shuffle(y) n_train = min(len(X), args.n_train) n_val = min(len(X), args.n_val) X_train = X[:n_train] y_train = y[:n_train] X_val = X[-n_val:] y_val = y[-n_val:] X = np.concatenate((X_train, X_val)) # pre-train RBM #1 rbm1 = make_rbm1(X, args) # freeze RBM #1 and extract features Q = p_{RBM_1}(h|v=X) Q = None if not os.path.isdir(args.rbm2_dirpath) or not os.path.isdir( args.dbm_dirpath): print "\nExtracting features from RBM #1 ..." Q = rbm1.transform(X) print "\n" # pre-train RBM #2 rbm2 = make_rbm2(Q, args) # freeze RBM #2 and extract features G = p_{RBM_2}(h|v=Q) G = None if not os.path.isdir(args.dbm_dirpath): print "\nExtracting features from RBM #2 ..." G = rbm2.transform(Q) print "\n" # jointly train DBM dbm = make_dbm((X_train, X_val), (rbm1, rbm2), (Q, G), args) # load test data X_test, y_test = load_mnist(mode='test', path='../data/') X_test /= 255. # discriminative fine-tuning: initialize MLP with # learned weights, add FC layer and train using backprop print "\nDiscriminative fine-tuning ...\n\n" W, hb = None, None W2, hb2 = None, None if not args.mlp_no_init: weights = dbm.get_tf_params(scope='weights') W = weights['W'] hb = weights['hb'] W2 = weights['W_1'] hb2 = weights['hb_1'] make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb), (W2, hb2), args)