示例#1
0
                         num_channels=args.C,
                         wd=args.wd,
                         wd1=args.wd1,
                         pixel_k=args.pixel_k,
                         lp_k=args.lp_k,
                         bp_ks=args.bp_ks)

    # ######################################################################
    # Build the optimizer - use separate parameter groups for the gain
    # and convolutional layers
    default_params = model.parameters()
    wave_params = model.wave_parameters()
    optim, sched = get_optim('sgd',
                             default_params,
                             init_lr=args.lr,
                             steps=args.steps,
                             wd=0,
                             gamma=args.gamma,
                             momentum=args.mom,
                             max_epochs=args.epochs)

    if len(wave_params) > 0:
        if args.lr1 is None:
            args.lr1 = args.lr
        if args.mom1 is None:
            args.mom1 = args.mom
        optim2, sched2 = get_optim('sgd',
                                   wave_params,
                                   init_lr=args.lr1,
                                   steps=args.steps,
                                   wd=0,
                                   gamma=args.gamma,
示例#2
0
train_loader, eval_loader = get_data(args)
data_id = get_data_id(args)

###################
## Specify model ##
###################

model = get_model(args)
model_id = get_model_id(args)

#######################
## Specify optimizer ##
#######################

optimizer, scheduler_iter, scheduler_epoch = get_optim(args,
                                                       model.parameters())
optim_id = get_optim_id(args)

##############
## Training ##
##############

exp = TeacherExperiment(args=args,
                        data_id=data_id,
                        model_id=model_id,
                        optim_id=optim_id,
                        train_loader=train_loader,
                        eval_loader=eval_loader,
                        model=model,
                        optimizer=optimizer,
                        scheduler_iter=scheduler_iter,
def main(cf):
    print(
        f"\nStarting divisize normalization experiment {cf.logdir}: --seed {cf.seed} --device {utils.DEVICE}"
    )
    pprint.pprint(cf)
    os.makedirs(cf.logdir, exist_ok=True)
    utils.seed(cf.seed)
    utils.save_json({k: str(v)
                     for (k, v) in cf.items()}, cf.logdir + "config.json")

    train_dataset = datasets.MNIST(train=True,
                                   scale=cf.label_scale,
                                   size=cf.train_size,
                                   normalize=cf.normalize)
    test_dataset = datasets.MNIST(train=False,
                                  scale=cf.label_scale,
                                  size=cf.test_size,
                                  normalize=cf.normalize)
    train_loader = datasets.get_dataloader(train_dataset, cf.batch_size)
    test_loader = datasets.get_dataloader(test_dataset, cf.batch_size)
    print(
        f"Loaded data [train batches: {len(train_loader)} test batches: {len(test_loader)}]"
    )

    model = PCModel(nodes=cf.nodes,
                    mu_dt=cf.mu_dt,
                    act_fn=cf.act_fn,
                    use_bias=cf.use_bias,
                    kaiming_init=cf.kaiming_init,
                    pe_fn=cf.pe_fn,
                    pe_fn_inverse=cf.pe_fn_inverse)
    optimizer = optim.get_optim(
        model.params,
        cf.optim,
        cf.lr,
        batch_scale=cf.batch_scale,
        grad_clip=cf.grad_clip,
        weight_decay=cf.weight_decay,
    )

    with torch.no_grad():
        metrics = {"acc": []}
        for epoch in range(1, cf.n_epochs + 1):

            print(f"\nTrain @ epoch {epoch} ({len(train_loader)} batches)")
            for batch_id, (img_batch, label_batch) in enumerate(train_loader):
                model.train_batch_supervised(img_batch,
                                             label_batch,
                                             cf.n_train_iters,
                                             fixed_preds=cf.fixed_preds_train)
                optimizer.step(
                    curr_epoch=epoch,
                    curr_batch=batch_id,
                    n_batches=len(train_loader),
                    batch_size=img_batch.size(0),
                )

            if epoch % cf.test_every == 0:
                acc = 0
                for _, (img_batch, label_batch) in enumerate(test_loader):
                    label_preds = model.test_batch_supervised(img_batch)
                    acc += datasets.accuracy(label_preds, label_batch)
                metrics["acc"].append(acc / len(test_loader))
                print("\nTest @ epoch {} / Accuracy: {:.4f}".format(
                    epoch, acc / len(test_loader)))

            utils.save_json(metrics, cf.logdir + "metrics.json")
def train(options, data, load_params=False, start_epoc=0):
    print "OPTIONS: ", options
    print 'Setting up model with options:'
    options = set_defaults(options)
    for kk, vv in options.iteritems():
        print kk, vv
    print "model seed: ", options['model_seed']
    print "fold: ", options['fold']
    print 'seed: ', options['seed']
    rng = numpy.random.RandomState(options['model_seed'] +
                                   100 * options.get('fold', 99) +
                                   options.get('seed', 99))
    params, operators = init_params(options, rng)
    print 'done...'

    if load_params:
        loaded = load_par(options)
        start_epoc = resume_epoc(options)
        # Check that we've loaded the correct parameters...
        for kk, vv in loaded.iteritems():
            assert params[kk].shape == vv.shape
            assert type(params[kk]) == type(vv)
        params = loaded

    tparams = init_tparams(params)

    trng, use_noise, inps, out = build_model(tparams, options, rng)
    y = tensor.imatrix('y')
    cost = nll(out, y)

    f_eval = theano.function([inps, y],
                             cost,
                             givens={use_noise: numpy.float32(0.)},
                             on_unused_input='ignore')

    reg = 0.
    for k, v in tparams.iteritems():
        if k[:6] == 'hidden' or k[-3:] == 'W_h':
            reg += options['l1'] * tensor.sum(abs(v))
            reg += options['l2'] * tensor.sum((v)**2)

    cost += reg

    grads = tensor.grad(cost, wrt=itemlist(tparams))
    lr = tensor.scalar(name='lr', dtype=theano.config.floatX)
    opt = get_optim(options['opt'])
    print 'Compiling functions'
    f_grad_shared, f_update, gshared = opt(lr, tparams, grads, [inps, y], cost,
                                           use_noise)
    f_out = theano.function([inps],
                            out,
                            givens={use_noise: numpy.float32(0.)},
                            on_unused_input='ignore',
                            allow_input_downcast=True)

    best = numpy.inf
    print 'Starting training'

    train = list_update(data[0], f_eval, options['batch_size'], rng=rng)
    test = list_update(data[-1], f_eval, options['batch_size'], rng=rng)
    starting = (train, test)
    print 'Pre-training. test: %f, train: %f' % (test, train)
    print 'Training'
    lr = options['lr']
    max_itr = options['max_itr']
    grad_norm = 0.
    train_scores = 50 * [0.]
    try:
        for epoch in xrange(max_itr):
            start_time = time.time()
            for g in gshared:
                # manually set gradients to 0 because we accumulate in list update
                g.set_value(0.0 * g.get_value())
            use_noise.set_value(1.)
            train_cost, n_obs = list_update(data[0],
                                            f_grad_shared,
                                            batchsize=options['batch_size'],
                                            rng=rng,
                                            return_n_obs=True)
            use_noise.set_value(0.)
            for g in gshared:
                g.set_value(floatx(g.get_value() / float(n_obs)))
            f_update(lr)
            apply_proximity(tparams, operators)
            train = list_update(data[0],
                                f_eval,
                                options['batch_size'],
                                rng=rng)
            elapsed_time = time.time() - start_time

            if train < best:
                # early stopping on training set
                test = list_update(data[-1], f_eval)
                best_par = unzip(tparams)
                best_perf = (train, test)
                best = train

            test = list_update(data[-1], f_eval)

            if (epoch % 50) == 0:
                # Save progress....
                save_progress(options, tparams, epoch, best_perf)
                print 'Epoch: %d, cost: %f, train: %f, test: %f, lr:%f, time: %f' % (
                    epoch, train_cost, train, test, lr, elapsed_time)

            # Check if we're diverging...
            train_ave = running_ave(train_scores, train, epoch)

            if epoch > 1000:
                # Only exit if we're diverging after 1000 iterations
                if train_ave > 1.03 * best_perf[0]:
                    print "Diverged..."
                    break
    except KeyboardInterrupt:
        print "Interrupted"
    # check that we're outputing prob distributions
    X = data[0][(3, 3)][0]
    assert abs(
        f_out(X.reshape(X.shape[0], 2, 3, 3)).sum() - float(X.shape[0])) < 1e-4
    print "Best performance:"
    print "train, test"
    print "%f,%f" % best_perf
    return best_perf, best_par
示例#5
0
train_loader, eval_loader, data_shape = get_data(args)
data_id = get_data_id(args)

###################
## Specify model ##
###################

model = get_model(args, data_shape=data_shape)
model_id = get_model_id(args)

#######################
## Specify optimizer ##
#######################

optimizer, _, _ = get_optim(args, model)
optim_id = f"more_{get_optim_id(args)}"

##############
## Training ##
##############

exp = FlowExperiment(args=args,
                     data_id=data_id,
                     model_id=model_id,
                     optim_id=optim_id,
                     train_loader=train_loader,
                     eval_loader=eval_loader,
                     model=model,
                     optimizer=optimizer,
                     scheduler_iter=None,
示例#6
0
train_loader, eval_loader, data_shape = get_data(args)
data_id = get_data_id(args)

###################
## Specify model ##
###################

model = get_model(args, data_shape=data_shape)
model_id = get_model_id(args)

#######################
## Specify optimizer ##
#######################

optimizer, scheduler_iter, scheduler_epoch = get_optim(args, model)
optim_id = get_optim_id(args)

##############
## Training ##
##############

exp = FlowExperiment(args=args,
                     data_id=data_id,
                     model_id=model_id,
                     optim_id=optim_id,
                     train_loader=train_loader,
                     eval_loader=eval_loader,
                     model=model,
                     optimizer=optimizer,
                     scheduler_iter=scheduler_iter,
示例#7
0
## Load args ##
###############

with open(path_args, 'rb') as f:
    args = pickle.load(f)

################
## Experiment ##
################

if eval_args.model_type == "flow":
    student, teacher, data_id = get_model(args)
    model_id = get_model_id(args)
    args.dataset = data_id

    optimizer, scheduler_iter, scheduler_epoch = get_optim(
        args, student.parameters())
    optim_id = get_optim_id(args)

    exp = StudentExperiment(args=args,
                            data_id=data_id,
                            model_id=model_id,
                            optim_id=optim_id,
                            model=student,
                            teacher=teacher,
                            optimizer=optimizer,
                            scheduler_iter=scheduler_iter,
                            scheduler_epoch=scheduler_epoch)
else:
    student, teacher, data_id = get_baseline(args)
    model_id = get_model_id(args)
    args.dataset = data_id