示例#1
0
import os

# set GPU
GPU = 0
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU)

import env
from bm import DBM
from bm.rbm import BernoulliRBM
from bm.utils import (progress_bar, Stopwatch, im_plot, im_reshape, im_gif,
                      tick_params, plot_confusion_matrix)
from bm.utils.dataset import load_mnist

X, y = load_mnist(mode='train', path='../data/')
X /= 255.
X_test, y_test = load_mnist(mode='test', path='../data/')
X_test /= 255.
print(X.shape, y.shape, X_test.shape, y_test.shape)

fig = plt.figure(figsize=(10, 10))
im_plot(X[:100],
        shape=(28, 28),
        title='Training examples',
        imshow_params={'cmap': plt.cm.gray})
plt.savefig('mnist.png', dpi=196, bbox_inches='tight')

rbm1 = BernoulliRBM.load_model('../models/dbm_mnist_rbm1/')

rbm1_W = rbm1.get_tf_params(scope='weights')['W']
示例#2
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general/data
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")
    parser.add_argument('--n-train',
                        type=int,
                        default=55000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=5000,
                        metavar='N',
                        help='number of validation examples')
    parser.add_argument('--data-path',
                        type=str,
                        default='../data/',
                        metavar='PATH',
                        help='directory for storing augmented data etc.')

    # RBM related
    parser.add_argument('--n-hidden',
                        type=int,
                        default=1024,
                        metavar='N',
                        help='number of hidden units')
    parser.add_argument(
        '--w-init',
        type=float,
        default=0.01,
        metavar='STD',
        help=
        'initialize weights from zero-centered Gaussian with this standard deviation'
    )
    parser.add_argument('--vb-init', action='store_false',
                        help='initialize visible biases as logit of mean values of features' + \
                             ', otherwise (if enabled) zero init')
    parser.add_argument('--hb-init',
                        type=float,
                        default=0.,
                        metavar='HB',
                        help='initial hidden bias')
    parser.add_argument(
        '--n-gibbs-steps',
        type=int,
        default=1,
        metavar='N',
        nargs='+',
        help=
        'number of Gibbs updates per weights update or sequence of such (per epoch)'
    )
    parser.add_argument('--lr',
                        type=float,
                        default=0.05,
                        metavar='LR',
                        nargs='+',
                        help='learning rate or sequence of such (per epoch)')
    parser.add_argument('--epochs',
                        type=int,
                        default=120,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument('--batch-size',
                        type=int,
                        default=10,
                        metavar='B',
                        help='input batch size for training')
    parser.add_argument('--l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument(
        '--sample-v-states',
        action='store_true',
        help='sample visible states, otherwise use probabilities w/o sampling')
    parser.add_argument('--dropout',
                        type=float,
                        metavar='P',
                        help='probability of visible units being on')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=0.1,
                        metavar='T',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=1e-5,
                        metavar='C',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')
    parser.add_argument('--random-seed',
                        type=int,
                        default=1337,
                        metavar='N',
                        help="random seed for model training")
    parser.add_argument('--dtype',
                        type=str,
                        default='float32',
                        metavar='T',
                        help="datatype precision to use")
    parser.add_argument('--model-dirpath',
                        type=str,
                        default='../models/rbm_mnist/',
                        metavar='DIRPATH',
                        help='directory path to save the model')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/rbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if len(args.mlp_lrm) == 1:
        args.mlp_lrm *= 2

    # prepare data (load + scale + split)
    print("\nPreparing data ...\n\n")
    X, y = load_mnist(mode='train', path=args.data_path)
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_val = X[-n_val:]
    y_val = y[-n_val:]

    # train and save the RBM model
    rbm = make_rbm(X_train, X_val, args)

    # load test data
    X_test, y_test = load_mnist(mode='test', path=args.data_path)
    X_test /= 255.

    # discriminative fine-tuning: initialize MLP with
    # learned weights, add FC layer and train using backprop
    print("\nDiscriminative fine-tuning ...\n\n")

    W, hb = None, None
    if not args.mlp_no_init:
        weights = rbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             args)
示例#3
0
def main():
    # training settings
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # general/data
    parser.add_argument(
        '--gpu',
        type=str,
        default='0',
        metavar='ID',
        help="ID of the GPU to train on (or '' to train on CPU)")
    parser.add_argument('--n-train',
                        type=int,
                        default=59000,
                        metavar='N',
                        help='number of training examples')
    parser.add_argument('--n-val',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='number of validation examples')

    # RBM #2 related
    parser.add_argument(
        '--increase-n-gibbs-steps-every',
        type=int,
        default=20,
        metavar='I',
        help=
        'increase number of Gibbs steps every specified number of epochs for RBM #2'
    )

    # common for RBMs and DBM
    parser.add_argument('--n-hiddens',
                        type=int,
                        default=(512, 1024),
                        metavar='N',
                        nargs='+',
                        help='numbers of hidden units')
    parser.add_argument('--n-gibbs-steps',
                        type=int,
                        default=(1, 1, 1),
                        metavar='N',
                        nargs='+',
                        help='(initial) number of Gibbs steps for CD/PCD')
    parser.add_argument('--lr',
                        type=float,
                        default=(0.05, 0.01, 2e-3),
                        metavar='LR',
                        nargs='+',
                        help='(initial) learning rates')
    parser.add_argument('--epochs',
                        type=int,
                        default=(64, 120, 500),
                        metavar='N',
                        nargs='+',
                        help='number of epochs to train')
    parser.add_argument('--batch-size', type=int, default=(48, 48, 100), metavar='B', nargs='+',
                        help='input batch size for training, `--n-train` and `--n-val`' + \
                             'must be divisible by this number (for DBM)')
    parser.add_argument('--l2',
                        type=float,
                        default=(1e-3, 2e-4, 1e-7),
                        metavar='L2',
                        nargs='+',
                        help='L2 weight decay coefficients')
    parser.add_argument('--random-seed',
                        type=int,
                        default=(1337, 1111, 2222),
                        metavar='N',
                        nargs='+',
                        help='random seeds for models training')

    # save dirpaths
    parser.add_argument('--rbm1-dirpath',
                        type=str,
                        default='../models/dbm_mnist_rbm1/',
                        metavar='DIRPATH',
                        help='directory path to save RBM #1')
    parser.add_argument('--rbm2-dirpath',
                        type=str,
                        default='../models/dbm_mnist_rbm2/',
                        metavar='DIRPATH',
                        help='directory path to save RBM #2')
    parser.add_argument('--dbm-dirpath',
                        type=str,
                        default='../models/dbm_mnist/',
                        metavar='DIRPATH',
                        help='directory path to save DBM')

    # DBM related
    parser.add_argument('--n-particles',
                        type=int,
                        default=100,
                        metavar='M',
                        help='number of persistent Markov chains')
    parser.add_argument(
        '--max-mf-updates',
        type=int,
        default=50,
        metavar='N',
        help='maximum number of mean-field updates per weight update')
    parser.add_argument('--mf-tol',
                        type=float,
                        default=1e-7,
                        metavar='TOL',
                        help='mean-field tolerance')
    parser.add_argument('--max-norm',
                        type=float,
                        default=6.,
                        metavar='C',
                        help='maximum norm constraint')
    parser.add_argument('--sparsity-target',
                        type=float,
                        default=(0.2, 0.1),
                        metavar='T',
                        nargs='+',
                        help='desired probability of hidden activation')
    parser.add_argument('--sparsity-cost',
                        type=float,
                        default=(1e-4, 5e-5),
                        metavar='C',
                        nargs='+',
                        help='controls the amount of sparsity penalty')
    parser.add_argument('--sparsity-damping',
                        type=float,
                        default=0.9,
                        metavar='D',
                        help='decay rate for hidden activations probs')

    # MLP related
    parser.add_argument('--mlp-no-init',
                        action='store_true',
                        help='if enabled, use random initialization')
    parser.add_argument('--mlp-l2',
                        type=float,
                        default=1e-5,
                        metavar='L2',
                        help='L2 weight decay coefficient')
    parser.add_argument('--mlp-lrm',
                        type=float,
                        default=(0.01, 0.1, 1.),
                        metavar='LRM',
                        nargs='+',
                        help='learning rate multipliers of 1e-3')
    parser.add_argument('--mlp-epochs',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of epochs to train')
    parser.add_argument(
        '--mlp-val-metric',
        type=str,
        default='val_acc',
        metavar='S',
        help=
        "metric on validation set to perform early stopping, {'val_acc', 'val_loss'}"
    )
    parser.add_argument('--mlp-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training')
    parser.add_argument('--mlp-save-prefix',
                        type=str,
                        default='../data/dbm_',
                        metavar='PREFIX',
                        help='prefix to save MLP predictions and targets')

    # parse and check params
    args = parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    for x, m in (
        (args.n_gibbs_steps, 3),
        (args.lr, 3),
        (args.epochs, 3),
        (args.batch_size, 3),
        (args.l2, 3),
        (args.random_seed, 3),
        (args.sparsity_target, 2),
        (args.sparsity_cost, 2),
        (args.mlp_lrm, 3),
    ):
        if len(x) == 1:
            x *= m

    # prepare data (load + scale + split)
    print "\nPreparing data ...\n\n"
    X, y = load_mnist(mode='train', path='../data/')
    X /= 255.
    RNG(seed=42).shuffle(X)
    RNG(seed=42).shuffle(y)
    n_train = min(len(X), args.n_train)
    n_val = min(len(X), args.n_val)
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_val = X[-n_val:]
    y_val = y[-n_val:]
    X = np.concatenate((X_train, X_val))

    # pre-train RBM #1
    rbm1 = make_rbm1(X, args)

    # freeze RBM #1 and extract features Q = p_{RBM_1}(h|v=X)
    Q = None
    if not os.path.isdir(args.rbm2_dirpath) or not os.path.isdir(
            args.dbm_dirpath):
        print "\nExtracting features from RBM #1 ..."
        Q = rbm1.transform(X)
        print "\n"

    # pre-train RBM #2
    rbm2 = make_rbm2(Q, args)

    # freeze RBM #2 and extract features G = p_{RBM_2}(h|v=Q)
    G = None
    if not os.path.isdir(args.dbm_dirpath):
        print "\nExtracting features from RBM #2 ..."
        G = rbm2.transform(Q)
        print "\n"

    # jointly train DBM
    dbm = make_dbm((X_train, X_val), (rbm1, rbm2), (Q, G), args)

    # load test data
    X_test, y_test = load_mnist(mode='test', path='../data/')
    X_test /= 255.

    # discriminative fine-tuning: initialize MLP with
    # learned weights, add FC layer and train using backprop
    print "\nDiscriminative fine-tuning ...\n\n"

    W, hb = None, None
    W2, hb2 = None, None
    if not args.mlp_no_init:
        weights = dbm.get_tf_params(scope='weights')
        W = weights['W']
        hb = weights['hb']
        W2 = weights['W_1']
        hb2 = weights['hb_1']

    make_mlp((X_train, y_train), (X_val, y_val), (X_test, y_test), (W, hb),
             (W2, hb2), args)