示例#1
0
def train(
    out_path=None, name='', model_to_load=None, save_images=True, test_every=None,
    dim_h=None, rec_args=None, gen_args=None, prior='gaussian',
    preprocessing=None,
    learning_args=None,
    dataset_args=None):

    # ========================================================================
    if preprocessing is None: preprocessing = []
    if learning_args is None: learning_args = dict()
    if dataset_args is None: raise ValueError('Dataset args must be provided')
    learning_args = init_learning_args(**learning_args)

    print 'Dataset args: %s' % pprint.pformat(dataset_args)
    print 'Learning args: %s' % pprint.pformat(learning_args)

    # ========================================================================
    print_section('Setting up data')
    batch_size = learning_args.pop('batch_size')
    valid_batch_size = learning_args.pop('valid_batch_size')
    train, valid, test = load_data(
        train_batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        **dataset_args)

    # ========================================================================
    print_section('Setting model and variables')
    dim_in = train.dims[train.name]

    X = T.matrix('x', dtype=floatX)
    X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype)
    trng = get_trng()

    preproc = Preprocessor(preprocessing)
    X_i = preproc(X, data_iter=train)
    inps = [X]

    # ========================================================================
    print_section('Loading model and forming graph')

    def create_model():
        model = Helmholtz.factory(
            dim_h, train,
            prior=prior,
            rec_args=rec_args,
            gen_args=gen_args)

        models = OrderedDict()
        models[model.name] = model
        return models

    models = set_model(create_model, model_to_load, unpack)
    model = next((v for k, v in models.iteritems() if k in ['sbn', 'gbn', 'lbn']), None)
    posterior = model.posterior
    if not posterior.distribution.is_continuous:
        raise ValueError('Cannot perform VAE with posterior with distribution '
                         '%r' % type(posterior.distribution))
    tparams = model.set_tparams()
    print_profile(tparams)

    # ==========================================================================
    print_section('Getting cost')
    constants = []
    updates = theano.OrderedUpdates()
    n_posterior_samples = learning_args.pop('n_posterior_samples')
    reweight = learning_args.pop('reweight')
    results, samples, constants, updates = model(
        X_i, X, qk=None, pass_gradients=True,
        n_posterior_samples=n_posterior_samples, reweight=reweight)

    cost = results['cost']
    extra_outs = []
    extra_outs_keys = ['cost']

    l2_decay = learning_args.pop('l2_decay')
    if l2_decay is not False and l2_decay > 0.:
        print 'Adding %.5f L2 weight decay' % l2_decay
        l2_rval = model.l2_decay(l2_decay)
        cost += l2_rval.pop('cost')
        extra_outs += l2_rval.values()
        extra_outs_keys += l2_rval.keys()

    # ==========================================================================
    print_section('Test functions')
    f_test_keys = results.keys()
    f_test = theano.function([X], results.values())

    prior_samples, p_updates = model.sample_from_prior()
    f_prior = theano.function([], prior_samples, updates=p_updates)

    latent_vis = model.visualize_latents()
    f_latent = theano.function([], latent_vis)

    py = samples['py']
    f_py_h = theano.function([X], py)

    # ========================================================================
    print_section('Setting final tparams and save function')
    excludes = learning_args.pop('excludes')
    tparams, all_params = set_params(
        tparams, updates, excludes=excludes)

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())
        d.update(
            dim_h=dim_h,
            rec_args=rec_args,
            gen_args=gen_args
        )
        np.savez(outfile, **d)

    def save_images():
        p_samples = f_prior()
        p_samples = p_samples.reshape(
            (p_samples.shape[0] // 10, 10, p_samples.shape[1]))
        train.save_images(p_samples, path.join(out_path, 'prior_samples.png'))

        l_vis = f_latent()
        l_vis = l_vis.reshape((l_vis.shape[0] // 10, 10, l_vis.shape[1]))
        train.save_images(l_vis, path.join(out_path, 'latent_vis.png'))

        py_h = f_py_h(train.X[:100])
        train.save_images(py_h, path.join(out_path, 'py_h.png'))

    # ========================================================================
    print_section('Getting gradients and building optimizer.')
    f_grad_shared, f_grad_updates, learning_args = set_optimizer(
        inps, cost, tparams, constants, updates, extra_outs, **learning_args)

    # ========================================================================
    print_section('Actually running (main loop)')
    monitor = SimpleMonitor()

    main_loop(
        train, valid, tparams,
        f_grad_shared, f_grad_updates, f_test, f_test_keys,
        test_every=test_every,
        save=save,
        save_images=save_images,
        monitor=monitor,
        out_path=out_path,
        name=name,
        extra_outs_keys=extra_outs_keys,
        **learning_args)
示例#2
0
def train(
    out_path=None, name='', model_to_load=None, save_images=True, test_every=None,
    dim_h=None, preprocessing=None,
    learning_args=None,
    inference_args=None,
    dataset_args=None):

    # ========================================================================
    if preprocessing is None: preprocessing = []
    if learning_args is None: learning_args = dict()
    if inference_args is None: inference_args = dict()
    if dataset_args is None: raise ValueError('Dataset args must be provided')

    learning_args = init_learning_args(**learning_args)
    inference_args = init_inference_args(**inference_args)

    print 'Dataset args: %s' % pprint.pformat(dataset_args)
    print 'Learning args: %s' % pprint.pformat(learning_args)
    print 'Inference args: %s' % pprint.pformat(inference_args)

    # ========================================================================
    print_section('Setting up data')
    batch_size = learning_args.pop('batch_size')
    valid_batch_size = learning_args.pop('valid_batch_size')
    train, valid, test = load_data(
        train_batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        **dataset_args)

    # ========================================================================
    print_section('Setting model and variables')
    dim_in = train.dims[train.name]

    X = T.matrix('x', dtype=floatX)
    X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype)
    trng = get_trng()

    preproc = Preprocessor(preprocessing)
    X_i = preproc(X, data_iter=train)
    inps = [X]

    # ========================================================================
    print_section('Loading model and forming graph')

    def create_model():
        model = RBM(dim_in, dim_h, v_dist=train.distributions[train.name],
                    mean_image=train.mean_image)
        models = OrderedDict()
        models[model.name] = model
        return models

    models = set_model(create_model, model_to_load, unpack)
    model = models['rbm']
    tparams = model.set_tparams()
    print_profile(tparams)

    # ==========================================================================
    print_section('Getting cost')

    persistent = inference_args.pop('persistent')
    if persistent:
        H_p = theano.shared(
            np.zeros((inference_args['n_chains'], model.h_dist.dim)).astype(floatX),
            name='h_p')
    else:
        H_p = None
    results, samples, updates, constants = model(
        X_i, h_p=H_p, **inference_args)

    updates = theano.OrderedUpdates()
    if persistent:
        updates += theano.OrderedUpdates([(H_p, samples['hs'][-1])])

    cost = results['cost']
    extra_outs = [results['free_energy']]
    extra_outs_keys = ['cost', 'free_energy']

    # ==========================================================================
    print_section('Test functions')
    f_test_keys = results.keys()
    f_test = theano.function([X], results.values())

    try:
        _, z_updates = model.update_partition_function(K=1000)
        f_update_partition = theano.function([], [], updates=z_updates)
    except NotImplementedError:
        f_update_partition = None

    H0 = model.trng.binomial(size=(10, model.h_dist.dim), dtype=floatX)
    s_outs, s_updates = model.sample(H0, n_steps=100)
    f_chain = theano.function(
        [], model.v_dist.get_center(s_outs['pvs']), updates=s_updates)

     # ========================================================================
    print_section('Setting final tparams and save function')
    excludes = learning_args.pop('excludes')
    tparams, all_params = set_params(tparams, updates, excludes=excludes)

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())
        d.update(
            dim_in=dim_in,
            dim_h=dim_h
        )
        np.savez(outfile, **d)

    def save_images():
        w = model.W.get_value().T
        w = w.reshape((10, w.shape[0] // 10, w.shape[1]))
        train.save_images(w, path.join(out_path, 'weights.png'))

        chain = f_chain()
        train.save_images(chain, path.join(out_path, 'chain.png'))

    # ========================================================================
    print_section('Getting gradients and building optimizer.')
    f_grad_shared, f_grad_updates, learning_args = set_optimizer(
        [X], cost, tparams, constants, updates, extra_outs, **learning_args)

    # ========================================================================
    print_section('Actually running (main loop)')
    monitor = SimpleMonitor()

    main_loop(
        train, valid, tparams,
        f_grad_shared, f_grad_updates, f_test, f_test_keys,
        f_extra=f_update_partition,
        test_every=test_every,
        save=save,
        save_images=save_images,
        monitor=monitor,
        out_path=out_path,
        name=name,
        extra_outs_keys=extra_outs_keys,
        **learning_args)
示例#3
0
def train(
    out_path=None, name='', model_to_load=None, test_every=None,
    classifier=None, preprocessing=None,
    learning_args=None,
    dataset_args=None):
    '''Basic training script.

    Args:
        out_path: str, path for output directory.
        name: str, name of experiment.
        test_every: int (optional), if not None, test every n epochs instead of
            every 1 epoch.
        classifier: dict, kwargs for MLP factory.
        learning_args: dict or None, see `init_learning_args` above for options.
        dataset_args: dict, arguments for Dataset class.
    '''

    # ========================================================================
    if preprocessing is None: preprocessing = []
    if learning_args is None: learning_args = dict()
    if dataset_args is None: raise ValueError('Dataset args must be provided')

    learning_args = init_learning_args(**learning_args)
    print 'Dataset args: %s' % pprint.pformat(dataset_args)
    print 'Learning args: %s' % pprint.pformat(learning_args)

    # ========================================================================
    print_section('Setting up data')
    input_keys = dataset_args.pop('keys')
    batch_size = learning_args.pop('batch_size')
    valid_batch_size = learning_args.pop('valid_batch_size')
    train, valid, test = load_data(
        train_batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        **dataset_args)

    # ========================================================================
    print_section('Setting model and variables')
    dim_in = train.dims[input_keys[0]]
    dim_out = train.dims[input_keys[1]]

    X = T.matrix('x', dtype=floatX) # Input data
    Y = T.matrix('y', dtype=floatX) # Lables
    X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype)
    Y.tag.test_value = np.zeros((batch_size, dim_out), dtype=X.dtype)
    trng = get_trng()

    preproc = Preprocessor(preprocessing)
    X_i = preproc(X, data_iter=train)
    inps = [X, Y]

    # ========================================================================
    print_section('Loading model and forming graph')
    dropout = learning_args.pop('dropout')

    def create_model():
        model = MLP.factory(dim_in=dim_in, dim_out=dim_out,
                            distribution=train.distributions[input_keys[1]],
                            dropout=dropout,
                            **classifier)
        models = OrderedDict()
        models[model.name] = model
        return models

    def unpack(dim_in=None, dim_out=None, mlp=None, **model_args):
        model = MLP.factory(dim_in=dim_in, dim_out=dim_out, **mlp)
        models = [model]
        return models, model_args, None

    models = set_model(create_model, model_to_load, unpack)
    model = models['MLP']
    tparams = model.set_tparams()
    print_profile(tparams)

    # ==========================================================================
    print_section('Getting cost')
    outs = model(X_i)
    p = outs['p']
    base_cost = model.neg_log_prob(Y, p).sum(axis=0)
    cost = base_cost

    updates = theano.OrderedUpdates()

    l2_decay = learning_args.pop('l2_decay')
    if l2_decay > 0.:
        print 'Adding %.5f L2 weight decay' % l2_decay
        l2_rval = model.l2_decay(l2_decay)
        l2_cost = l2_rval.pop('cost')
        cost += l2_cost

    constants = []
    extra_outs = []
    extra_outs_keys = ['cost']

    # ==========================================================================
    print_section('Test functions')
    error = (Y * (1 - p)).sum(axis=1).mean()

    f_test_keys = ['error', 'cost']
    f_test_vals = [error, base_cost]

    if l2_decay > 0.:
        f_test_keys.append('L2 cost')
        f_test_vals.append(l2_cost)
    f_test = theano.function([X, Y], f_test_vals)

     # ========================================================================
    print_section('Setting final tparams and save function')
    tparams, all_params = set_params(tparams, updates)

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())
        d.update(
            dim_in=dim_in,
            dim_out=dim_out,
            mlp=classifier
        )
        np.savez(outfile, **d)

    # ========================================================================
    print_section('Getting gradients and building optimizer.')
    f_grad_shared, f_grad_updates, learning_args = set_optimizer(
        inps, cost, tparams, constants, updates, extra_outs, **learning_args)

    # ========================================================================
    print_section('Actually running (main loop)')
    monitor = SimpleMonitor()

    main_loop(
        train, valid, tparams,
        f_grad_shared, f_grad_updates, f_test, f_test_keys,
        input_keys=input_keys,
        test_every=test_every,
        save=save,
        monitor=monitor,
        out_path=out_path,
        name=name,
        extra_outs_keys=extra_outs_keys,
        **learning_args)
示例#4
0
def train(out_path=None,
          name='',
          model_to_load=None,
          save_images=True,
          test_every=None,
          dim_h=None,
          preprocessing=None,
          learning_args=None,
          inference_args=None,
          dataset_args=None):

    # ========================================================================
    if preprocessing is None: preprocessing = []
    if learning_args is None: learning_args = dict()
    if inference_args is None: inference_args = dict()
    if dataset_args is None: raise ValueError('Dataset args must be provided')

    learning_args = init_learning_args(**learning_args)
    inference_args = init_inference_args(**inference_args)

    print 'Dataset args: %s' % pprint.pformat(dataset_args)
    print 'Learning args: %s' % pprint.pformat(learning_args)
    print 'Inference args: %s' % pprint.pformat(inference_args)

    # ========================================================================
    print_section('Setting up data')
    batch_size = learning_args.pop('batch_size')
    valid_batch_size = learning_args.pop('valid_batch_size')
    train, valid, test = load_data(train_batch_size=batch_size,
                                   valid_batch_size=valid_batch_size,
                                   **dataset_args)

    # ========================================================================
    print_section('Setting model and variables')
    dim_in = train.dims[train.name]

    X = T.matrix('x', dtype=floatX)
    X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype)
    trng = get_trng()

    preproc = Preprocessor(preprocessing)
    X_i = preproc(X, data_iter=train)
    inps = [X]

    # ========================================================================
    print_section('Loading model and forming graph')

    def create_model():
        model = RBM(dim_in,
                    dim_h,
                    v_dist=train.distributions[train.name],
                    mean_image=train.mean_image)
        models = OrderedDict()
        models[model.name] = model
        return models

    models = set_model(create_model, model_to_load, unpack)
    model = models['rbm']
    tparams = model.set_tparams()
    print_profile(tparams)

    # ==========================================================================
    print_section('Getting cost')

    persistent = inference_args.pop('persistent')
    if persistent:
        H_p = theano.shared(np.zeros(
            (inference_args['n_chains'], model.h_dist.dim)).astype(floatX),
                            name='h_p')
    else:
        H_p = None
    results, samples, updates, constants = model(X_i,
                                                 h_p=H_p,
                                                 **inference_args)

    updates = theano.OrderedUpdates()
    if persistent:
        updates += theano.OrderedUpdates([(H_p, samples['hs'][-1])])

    cost = results['cost']
    extra_outs = [results['free_energy']]
    extra_outs_keys = ['cost', 'free_energy']

    # ==========================================================================
    print_section('Test functions')
    f_test_keys = results.keys()
    f_test = theano.function([X], results.values())

    try:
        _, z_updates = model.update_partition_function(K=1000)
        f_update_partition = theano.function([], [], updates=z_updates)
    except NotImplementedError:
        f_update_partition = None

    H0 = model.trng.binomial(size=(10, model.h_dist.dim), dtype=floatX)
    s_outs, s_updates = model.sample(H0, n_steps=100)
    f_chain = theano.function([],
                              model.v_dist.get_center(s_outs['pvs']),
                              updates=s_updates)

    # ========================================================================
    print_section('Setting final tparams and save function')
    excludes = learning_args.pop('excludes')
    tparams, all_params = set_params(tparams, updates, excludes=excludes)

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())
        d.update(dim_in=dim_in, dim_h=dim_h)
        np.savez(outfile, **d)

    def save_images():
        w = model.W.get_value().T
        w = w.reshape((10, w.shape[0] // 10, w.shape[1]))
        train.save_images(w, path.join(out_path, 'weights.png'))

        chain = f_chain()
        train.save_images(chain, path.join(out_path, 'chain.png'))

    # ========================================================================
    print_section('Getting gradients and building optimizer.')
    f_grad_shared, f_grad_updates, learning_args = set_optimizer(
        [X], cost, tparams, constants, updates, extra_outs, **learning_args)

    # ========================================================================
    print_section('Actually running (main loop)')
    monitor = SimpleMonitor()

    main_loop(train,
              valid,
              tparams,
              f_grad_shared,
              f_grad_updates,
              f_test,
              f_test_keys,
              f_extra=f_update_partition,
              test_every=test_every,
              save=save,
              save_images=save_images,
              monitor=monitor,
              out_path=out_path,
              name=name,
              extra_outs_keys=extra_outs_keys,
              **learning_args)
示例#5
0
def train(out_path=None,
          name='',
          model_to_load=None,
          save_images=True,
          test_every=None,
          dim_h=None,
          rec_args=None,
          gen_args=None,
          prior='gaussian',
          preprocessing=None,
          learning_args=None,
          dataset_args=None):

    # ========================================================================
    if preprocessing is None: preprocessing = []
    if learning_args is None: learning_args = dict()
    if dataset_args is None: raise ValueError('Dataset args must be provided')
    learning_args = init_learning_args(**learning_args)

    print 'Dataset args: %s' % pprint.pformat(dataset_args)
    print 'Learning args: %s' % pprint.pformat(learning_args)

    # ========================================================================
    print_section('Setting up data')
    batch_size = learning_args.pop('batch_size')
    valid_batch_size = learning_args.pop('valid_batch_size')
    dataset = dataset_args['dataset']
    dataset_class = resolve_dataset(dataset)

    train, valid, test, idx = load_data_split(
        dataset_class,
        train_batch_size=batch_size,
        valid_batch_size=valid_batch_size,
        **dataset_args)
    dataset_args['idx'] = idx

    # ========================================================================
    print_section('Setting model and variables')
    dim_in = train.dims[train.name]

    X = T.matrix('x', dtype=floatX)
    X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype)
    trng = get_trng()

    preproc = Preprocessor(preprocessing)
    X_i = preproc(X, data_iter=train)
    inps = [X]

    # ========================================================================
    print_section('Loading model and forming graph')

    def create_model():
        model = Helmholtz.factory(dim_h,
                                  train,
                                  prior=prior,
                                  rec_args=rec_args,
                                  gen_args=gen_args)

        models = OrderedDict()
        models[model.name] = model
        return models

    models = set_model(create_model, model_to_load, unpack)
    model = next(
        (v
         for k, v in models.iteritems() if k in ['sbn', 'gbn', 'lbn', 'labn']),
        None)
    posterior = model.posterior
    if not posterior.distribution.is_continuous:
        raise ValueError('Cannot perform VAE with posterior with distribution '
                         '%r' % type(posterior.distribution))
    tparams = model.set_tparams()
    print_profile(tparams)

    # ==========================================================================
    print_section('Getting cost')
    constants = []
    updates = theano.OrderedUpdates()
    n_posterior_samples = learning_args.pop('n_posterior_samples')
    results, samples, updates, constants = model(
        X_i,
        X,
        qk=None,
        pass_gradients=True,
        n_posterior_samples=n_posterior_samples)

    cost = results['cost']
    extra_outs = []
    extra_outs_keys = ['cost']

    l2_decay = learning_args.pop('l2_decay')
    if l2_decay is not False and l2_decay > 0.:
        print 'Adding %.5f L2 weight decay' % l2_decay
        l2_rval = model.l2_decay(l2_decay)
        cost += l2_rval.pop('cost')
        extra_outs += l2_rval.values()
        extra_outs_keys += l2_rval.keys()

    # ==========================================================================
    print_section('Test functions')
    f_test_keys = results.keys()
    f_test = theano.function([X], results.values())

    prior_samples, p_updates = model.sample_from_prior()
    f_prior = theano.function([],
                              model.get_center(prior_samples),
                              updates=p_updates)

    latent_vis = model.visualize_latents()
    f_latent = theano.function([], latent_vis)

    py = model.get_center(samples['py'])
    f_py_h = theano.function([X], py)

    # ========================================================================
    print_section('Setting final tparams and save function')
    excludes = learning_args.pop('excludes')
    tparams, all_params = set_params(tparams, updates, excludes=excludes)

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())
        d.update(dim_h=dim_h, rec_args=rec_args, gen_args=gen_args)
        np.savez(outfile, **d)

    def save_images():
        p_samples = f_prior()
        train.save_images(p_samples, path.join(out_path, 'prior_samples.png'))

        l_vis = f_latent()
        train.save_images(l_vis, path.join(out_path, 'latent_vis.png'))

        py_h = f_py_h(train.X[:100])
        train.save_images(py_h, path.join(out_path, 'py_h.png'))

    # ========================================================================
    print_section('Getting gradients and building optimizer.')
    f_grad_shared, f_grad_updates, learning_args = set_optimizer(
        inps, cost, tparams, constants, updates, extra_outs, **learning_args)

    # ========================================================================
    print_section('Actually running (main loop)')
    monitor = SimpleMonitor()

    main_loop(train,
              valid,
              tparams,
              f_grad_shared,
              f_grad_updates,
              f_test,
              f_test_keys,
              test_every=test_every,
              save=save,
              save_images=save_images,
              monitor=monitor,
              out_path=out_path,
              name=name,
              extra_outs_keys=extra_outs_keys,
              **learning_args)