Exemplo n.º 1
0
def run_experiment(args):
    # set environment variable for theano
    os.environ['THEANO_FLAGS'] = "device=gpu" + str(args.gpu)

    from learning.experiment import Experiment
    from learning.training import DSGNHTTrainer
    from learning.termination import EarlyStopping
    from learning.monitor import MonitorLL, DLogModelParams, SampleFromP, MonitorPosteriorMean
    from learning.dataset import Omniglot
    from learning.preproc import PermuteColumns, Binarize

    from learning.models.dgm_vae import VAEDGMLayerStack
    from learning.models.vae import VAE, StandardVAETop

    np.set_printoptions(precision=2)

    logger.debug("Arguments %s" % args)
    tags = []

    np.random.seed(23)

    # n_samples to evaluate model
    n_samples_epoch = [1, 5, 25, 100]
    n_samples_final = [1, 5, 10, 25, 100, 500, 1000, 5000]

    # no. posterior samples for posterior mean
    postsamples = [int(s) for s in args.postsamples.split(",")]

    n_X = 28 * 28

    p_layers = []
    q_layers = []

    model = None

    # build the variational auto-encoder layer stack
    if args.layers == 1:
        latent_units = [50]
        hidden_units_q = [[200, 200]]
        hidden_units_p = [[200, 200]]

        n_Y = latent_units[0]
        p_layers.append(
            VAE(n_X=n_X,
                n_Y=n_Y,
                det_units=hidden_units_p[0],
                data_type="binary",
                bias=None))
        p_layers.append(StandardVAETop(n_X=n_Y))
        q_layers.append(
            VAE(n_X=n_Y,
                n_Y=n_X,
                det_units=hidden_units_q[0],
                data_type="continuous",
                bias=None))
        model = VAEDGMLayerStack(
            p_layers=p_layers,
            q_layers=q_layers,
        )
        model.setup()
    elif args.layers == 2:
        latent_units = [100, 50]
        hidden_units_p = [[200, 200], [100, 100]]
        hidden_units_q = [[100, 100], [200, 200]]

        n_Y = latent_units[0]
        p_layers.append(
            VAE(n_X=n_X,
                n_Y=latent_units[0],
                det_units=hidden_units_p[0],
                data_type="binary",
                bias=None))
        q_layers.append(
            VAE(n_X=latent_units[0],
                n_Y=n_X,
                det_units=hidden_units_q[1],
                data_type="continuous",
                bias=None))
        p_layers.append(
            VAE(n_X=latent_units[0],
                n_Y=latent_units[1],
                det_units=hidden_units_p[1],
                data_type="continuous",
                bias=None))
        q_layers.append(
            VAE(n_X=latent_units[1],
                n_Y=latent_units[0],
                det_units=hidden_units_q[0],
                data_type="continuous",
                bias=None))

        p_layers.append(StandardVAETop(n_X=latent_units[1]))
        model = VAEDGMLayerStack(
            p_layers=p_layers,
            q_layers=q_layers,
        )
        model.setup()

    assert model is not None

    # parameters
    def param_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        if value == 0.0:
            return "00"
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    # Learning rate
    lr_p = args.lr_p
    tags += ["lp" + param_tag(lr_p)]
    lr_q = args.lr_q
    tags += ["lq" + param_tag(lr_q)]

    # LR decay
    if args.lrdecay != 1.0:
        tags += ["lrdecay" + param_tag(args.lrdecay - 1.)]

    # Samples
    n_samples = args.samples
    tags += ["spl%d" % n_samples]

    # Batch size
    batch_size = args.batchsize
    tags += ["bs%d" % batch_size]

    # n_steps_simu
    n_steps_simu = args.n_simu
    tags += ["ns%d" % n_steps_simu]

    # n_steps_optm
    n_steps_optm = args.n_optm
    tags += ["no%d" % n_steps_optm]

    # momentum_decay
    momentum_decay = args.momentum_decay
    tags += ["md" + param_tag(momentum_decay)]

    # Dataset
    if args.shuffle:
        np.random.seed(23)
        preproc = [PermuteColumns()]
        tags += ["shuffle"]
    else:
        preproc = []

    binarize_preproc = preproc + [Binarize(late=True)]
    dataset = Omniglot(which_set='train', preproc=binarize_preproc)
    valiset = Omniglot(which_set='valid', preproc=binarize_preproc)
    testset = Omniglot(which_set='test', preproc=binarize_preproc)

    # lookahead
    lookahead = args.lookahead
    tags += ["lah%d" % lookahead]

    tags.sort()
    expname = "dsgnht-%s-%slayer" % ("-".join(tags), str(args.layers))

    if args.report:
        expname = "report/" + expname

    logger.info("Running %s" % expname)

    #-----------------------------------------------------------------------------

    dlog_model_params_monitor = DLogModelParams()
    generate_data_monitor = SampleFromP(n_samples=100)

    trainer = DSGNHTTrainer(
        batch_size=batch_size,
        n_samples=n_samples,
        n_steps_simu=n_steps_simu,
        n_steps_optm=n_steps_optm,
        learning_rate_p=lr_p,
        learning_rate_q=lr_q,
        lr_decay=args.lrdecay,
        momentum_decay=momentum_decay,
        dataset=dataset,
        model=model,
        termination=EarlyStopping(lookahead=lookahead,
                                  min_epochs=10,
                                  max_epochs=999999),
        epoch_monitors=[
            dlog_model_params_monitor,
            generate_data_monitor,
            MonitorLL(name="valiset", data=valiset, n_samples=n_samples_epoch),
        ],
        final_monitors=[
            dlog_model_params_monitor,
            generate_data_monitor,
            MonitorLL(name="final-testset",
                      data=testset,
                      n_samples=n_samples_final,
                      level=logging.CRITICAL),
        ],
        posterior_mean_samples=postsamples,
        posterior_mean_monitor=MonitorPosteriorMean(),
    )

    experiment = Experiment()
    experiment.set_trainer(trainer)
    experiment.setup_output_dir(expname)
    experiment.setup_logging()
    experiment.print_summary()

    if args.cont is None:
        logger.info("Starting experiment ...")
        experiment.run_experiment()
    else:
        logger.info("Continuing experiment %s ...." % args.cont)
        experiment.continue_experiment(args.cont + "/results.h5", row=-1)

    logger.info("Finished. Wrinting metadata")

    experiment.print_summary()
Exemplo n.º 2
0
def run_experiment(args):
    # set environment variable for theano
    os.environ['THEANO_FLAGS'] = "device=gpu" + str(args.gpu)

    from learning.experiment import Experiment
    from learning.training import DSGNHTTrainer
    from learning.termination import EarlyStopping
    from learning.monitor import MonitorLL, DLogModelParams, SampleFromP, MonitorPosteriorMean
    from learning.dataset import MNIST
    from learning.preproc import PermuteColumns, Binarize

    from learning.models.dgm import DGMLayerStack
    from learning.models.sbn import SBN, SBNTop
    from learning.models.dsbn import DSBN
    from learning.models.darn import DARN, DARNTop
    from learning.models.nade import NADE, NADETop

    np.set_printoptions(precision=2)

    logger.debug("Arguments %s" % args)
    tags = []

    np.random.seed(23)

    # Layer models
    layer_models = {
        "sbn": (SBN, SBNTop),
        "dsbn": (DSBN, SBNTop),
        "darn": (DARN, DARNTop),
        "nade": (NADE, NADETop),
    }

    assert args.p_model in layer_models
    assert args.q_model in layer_models

    p_layer, p_top = layer_models[args.p_model]
    q_layer, q_top = layer_models[args.q_model]

    # n_samples to evaluate model
    n_samples_epoch = [1, 5, 25, 100]
    n_samples_final = [1, 5, 10, 25, 100, 500, 1000, 10000, 100000]
    if (args.p_model in ['darn', 'nade']) or (args.q_model in ['darn', 'nade'
                                                               ]):
        n_samples_epoch = [1, 5, 25]
        n_samples_final = [1, 5, 10, 25, 100, 500]

    # no. posterior samples for posterior mean
    postsamples = [int(s) for s in args.postsamples.split(",")]

    # Layer sizes
    layer_sizes = [int(s) for s in args.layer_sizes.split(",")]

    n_X = 28 * 28

    p_layers = []
    q_layers = []

    for ls in layer_sizes:
        n_Y = ls
        p_layers.append(p_layer(n_X=n_X, n_Y=n_Y))
        q_layers.append(q_layer(n_X=n_Y, n_Y=n_X))
        n_X = n_Y
    p_layers.append(p_top(n_X=n_X))

    model = DGMLayerStack(p_layers=p_layers, q_layers=q_layers)
    model.setup()

    # parameters
    def param_tag(value):
        """ Convert a float into a short tag-usable string representation. E.g.:
            0.1   -> 11
            0.01  -> 12
            0.001 -> 13
            0.005 -> 53
        """
        if value == 0.0:
            return "00"
        exp = np.floor(np.log10(value))
        leading = ("%e" % value)[0]
        return "%s%d" % (leading, -exp)

    # Learning rate
    lr_p = args.lr_p
    tags += ["lp" + param_tag(lr_p)]
    lr_q = args.lr_q
    tags += ["lq" + param_tag(lr_q)]

    # LR decay
    if args.lrdecay != 1.0:
        tags += ["lrdecay" + param_tag(args.lrdecay - 1.)]

    # Samples
    n_samples = args.samples
    tags += ["spl%d" % n_samples]

    # Batch size
    batch_size = args.batchsize
    tags += ["bs%d" % batch_size]

    # n_steps_simu
    n_steps_simu = args.n_simu
    tags += ["ns%d" % n_steps_simu]

    # n_steps_optm
    n_steps_optm = args.n_optm
    tags += ["no%d" % n_steps_optm]

    # momentum_decay
    momentum_decay = args.momentum_decay
    tags += ["md" + param_tag(momentum_decay)]

    # Dataset
    if args.shuffle:
        np.random.seed(23)
        preproc = [PermuteColumns()]
        tags += ["shuffle"]
    else:
        preproc = []

    if args.rebinarize:
        binarize_preproc = preproc + [Binarize(late=True)]
        dataset = MNIST(which_set='train',
                        preproc=binarize_preproc,
                        n_datapoints=50000)
        valiset = MNIST(which_set='valid',
                        preproc=binarize_preproc,
                        n_datapoints=10000)
        testset = MNIST(which_set='test',
                        preproc=binarize_preproc,
                        n_datapoints=10000)
        # testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000)
        tags += ["rb"]
    else:
        dataset = MNIST(fname="mnist_salakhutdinov.pkl.gz",
                        which_set='train',
                        preproc=preproc,
                        n_datapoints=50000)
        valiset = MNIST(fname="mnist_salakhutdinov.pkl.gz",
                        which_set='valid',
                        preproc=preproc,
                        n_datapoints=10000)
        testset = MNIST(fname="mnist_salakhutdinov.pkl.gz",
                        which_set='test',
                        preproc=preproc,
                        n_datapoints=10000)

    # lookahead
    lookahead = args.lookahead
    tags += ["lah%d" % lookahead]

    tags.sort()
    expname = "dsgnht-%s-%s-%s-%s" % ("-".join(tags), args.p_model,
                                      args.q_model, "-".join(
                                          [str(s) for s in layer_sizes]))

    if args.report:
        expname = "report/" + expname

    logger.info("Running %s" % expname)

    #-----------------------------------------------------------------------------

    dlog_model_params_monitor = DLogModelParams()
    generate_data_monitor = SampleFromP(n_samples=100)

    trainer = DSGNHTTrainer(
        batch_size=batch_size,
        n_samples=n_samples,
        n_steps_simu=n_steps_simu,
        n_steps_optm=n_steps_optm,
        learning_rate_p=lr_p,
        learning_rate_q=lr_q,
        lr_decay=args.lrdecay,
        momentum_decay=momentum_decay,
        dataset=dataset,
        model=model,
        termination=EarlyStopping(lookahead=lookahead,
                                  min_epochs=10,
                                  max_epochs=999999),
        epoch_monitors=[
            dlog_model_params_monitor,
            generate_data_monitor,
            MonitorLL(name="valiset", data=valiset, n_samples=n_samples_epoch),
        ],
        final_monitors=[
            dlog_model_params_monitor,
            generate_data_monitor,
            MonitorLL(name="final-testset",
                      data=testset,
                      n_samples=n_samples_final,
                      level=logging.CRITICAL),
        ],
        posterior_mean_samples=postsamples,
        posterior_mean_monitor=MonitorPosteriorMean(),
    )

    experiment = Experiment()
    experiment.set_trainer(trainer)
    experiment.setup_output_dir(expname)
    experiment.setup_logging()
    experiment.print_summary()

    if args.cont is None:
        logger.info("Starting experiment ...")
        experiment.run_experiment()
    else:
        logger.info("Continuing experiment %s ...." % args.cont)
        experiment.continue_experiment(args.cont + "/results.h5", row=-1)

    logger.info("Finished. Wrinting metadata")

    experiment.print_summary()
Exemplo n.º 3
0
    p_layers=p_layers,
    q_layers=q_layers,
)

trainer = Trainer(
    n_samples=10,
    learning_rate_p=1e-6,
    #learning_rate_q=1e-6,
    learning_rate_q=0.0,
    #learning_rate_s=1e-6,
    learning_rate_s=1e-6,
    weight_decay=0.0,
    batch_size=25,
    dataset=trainset,
    model=model,
    termination=EarlyStopping(lookahead=10),
    epoch_monitors=[
        DLogModelParams(),
        #SampleFromP(n_samples=100),
        MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 10, 25, 100]),
    ],
    final_monitors=[
        MonitorLL(name="final-valiset",
                  data=valiset,
                  n_samples=[1, 5, 10, 25, 100, 500, 1000]),
        MonitorLL(name="final-testset",
                  data=testset,
                  n_samples=[1, 5, 10, 25, 100, 500, 1000]),
    ],
    #step_monitors=[
    #    DLogModelParams(),
Exemplo n.º 4
0
model = LayerStack(
    p_layers=p_layers,
    q_layers=q_layers,
)

trainer = Trainer(
    n_samples=5,
    learning_rate_p=3e-2,
    learning_rate_q=3e-2,
    learning_rate_s=3e-2,
    layer_discount=1.00,
    batch_size=25,
    dataset=dataset,
    model=model,
    termination=EarlyStopping(),
    #monitor_nth_step=100,
    #step_monitors=[
    #    MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100])
    #],
    epoch_monitors=[
        DLogModelParams(),
        MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]),
        MonitorLL(name="testset", data=testset, n_samples=[1, 5, 25, 100]),
    ],
    final_monitors=[
        MonitorLL(name="final-valiset",
                  data=valiset,
                  n_samples=[1, 5, 25, 100]),
        MonitorLL(name="final-testset",
                  data=testset,
Exemplo n.º 5
0
    )
]

model = LayerStack(
    p_layers=p_layers,
    q_layers=q_layers,
)

trainer = Trainer(
    n_samples=5,
    learning_rate_p=1e-3,
    learning_rate_q=1e-3,
    learning_rate_s=1e-3,
    layer_discount=1.0,
    batch_size=25,
    dataset=dataset, 
    model=model,
    termination=EarlyStopping(min_epochs=250, max_epochs=250),
    #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])],
    epoch_monitors=[
        DLogModelParams(), 
        SampleFromP(n_samples=100)
        MonitorLL(name="valiset", data=valiset, n_samples=[100]),
    ],
    final_monitors=[
        MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 10, 25, 100, 500, 1000]),
        MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000]),
    ],
    #monitor_nth_step=100,
)