Exemplo n.º 1
0
def train(model, dataset, cfg):
    train_loss_batch = []  # record_loss
    train_acc_batch = []  # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    time_since_improvement = 0
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        for batch_num in range(tot_batches):
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            feed_dict = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels:
                dataset.data['train_labels'][batch_inds],
                model.lr:
                cfg.learning_rate,
                model.use_past_bt:
                False,
                model.input_past_bt:
                np.zeros((len(batch_inds), model.cfg.input_height,
                          model.cfg.input_width, model.cfg.input_nchannels)),
                model.fc4_past_bt:
                np.zeros((len(batch_inds), 1000))
            }
            loss, acc, _ = model.sess.run(
                [model.loss, model.accuracy, model.train_op],
                feed_dict=feed_dict)
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1],
                train_acc_batch[-1])
        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])
        if train_loss[-1] == min(train_loss):
            time_since_improvement = 0
        else:
            time_since_improvement += 1
            if time_since_improvement >= cfg.early_stopping:
                print 'early stopping. no improvement since ', str(
                    cfg.early_stopping), ' epochs.'
                break

        vl, va = validate(model, dataset)
        val_loss.append(vl)
        val_acc.append(va)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc
Exemplo n.º 2
0
def train_autoencoder_vanilla(model, dataset, cfg, save_dir):
    train_loss_batch = []  # record_loss
    train_loss = []
    val_loss = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    time_since_improvement = 0
    print np.min(dataset.data['train_images'])
    print np.max(dataset.data['train_images'])
    print '=' * 100
    time_since_improvement = 0  # early stopping
    train_time = 0.0
    for epoch in range(cfg.max_epochs):
        st = time.time()
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        for batch_num in range(tot_batches):
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            feed_dict = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.lr: cfg.learning_rate,
            }
            loss, _ = model.sess.run([model.loss, model.train_op],
                                     feed_dict=feed_dict)
            train_loss_batch.append(loss)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1])
        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])

        ## train time
        train_time += time.time() - st
        print 'Total Train Time:', train_time

        ## validation
        vl, va = validate_autoencoder(model, dataset)
        val_loss.append(vl)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')

        ## early stopping
        time_since_improvement, early_stop = early_stopping(
            val_loss, time_since_improvement, cfg.early_stopping)
        if early_stop:
            break
    return train_loss, val_loss
Exemplo n.º 3
0
def train(model, dataset, cfg):
    train_loss_batch = [] # record_loss
    train_acc_batch = [] # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    alpha = 1e-2
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0*dataset.n_train/cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        times = [[] for i in range(10)]
        for batch_num in range(tot_batches):
            bst = time.time()
            st = time.time()
            batch_inds = inds[batch_num*cfg.batch_size:min((batch_num+1)*cfg.batch_size,dataset.n_train)]
            ## get f(x) and gradients
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:],
                  model.labels: dataset.data['train_labels'][batch_inds],
                  model.use_past_bt: False,
                  model.input_past_bt: np.zeros((len(batch_inds),model.cfg.input_height,model.cfg.input_width,model.cfg.input_nchannels)),
                  model.fc4_past_bt: np.zeros((len(batch_inds),1000))
                }
            loss, acc, grads, input_bt, fc4_bt = model.sess.run([model.loss, model.accuracy, model.grads, 
                                                                 model.input_binary_tensor, model.fc4_binary_tensor], 
                                                                 feed_dict=fd)
            fx = loss
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            research_fd = {model.conv1_W_grad: grads[0],    model.conv1_b_grad: grads[1],
                           model.conv2_W_grad: grads[2],    model.conv2_b_grad: grads[3],
                           model.conv3_W_grad: grads[4],    model.conv3_b_grad: grads[5],
                           model.fc4_W_grad: grads[6],      model.fc4_b_grad: grads[7],
                           model.fc5_W_grad: grads[8],      model.fc5_b_grad: grads[9],
                          }
            times[0].append(time.time()-st)
            print 'fx and grads: ', time.time()-st
            st = time.time()
            gT_g = np.sum([np.sum(np.square(g)) for g in grads])
            times[1].append(time.time()-st)
            print 'gT_g: ', time.time()-st

            ## set fd to use old binary tensors
            st = time.time()
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:],
                  model.labels: dataset.data['train_labels'][batch_inds],
                  model.use_past_bt: True,
                  model.input_past_bt: input_bt,
                  model.fc4_past_bt: fc4_bt
                }
            times[2].append(time.time()-st)
            print 'change fd: ', time.time()-st

            ## get f(x+alpha*g)
            st = time.time()
            research_fd[model.lr] = -alpha
            model.sess.run(model.change_weights_op, feed_dict=research_fd)
            times[3].append(time.time()-st)
            print 'change_weights_op: ', time.time()-st
            st = time.time()
            fx_plus_ag = model.sess.run(model.loss, feed_dict=fd)
            times[4].append(time.time()-st)
            print 'fx+: ', time.time()-st

            ## get f(x-alpha*g)
            st = time.time()
            research_fd[model.lr] = 2*alpha
            model.sess.run(model.change_weights_op, feed_dict=research_fd)
            times[5].append(time.time()-st)
            print 'change_weights_op: ', time.time()-st
            st = time.time()
            fx_minus_ag = model.sess.run(model.loss, feed_dict=fd)
            times[6].append(time.time()-st)
            print 'fx-: ', time.time()-st

            ## choose learning rate
            st = time.time()
            gT_H_g = (fx_plus_ag + fx_minus_ag - 2*fx)/(alpha**2)
            if not cfg.magic_2nd_order:
                max_lr = 2*gT_g/np.abs(gT_H_g)
                lr = min(fx/gT_g, max_lr)
            else: ## 2nd order magic
                if gT_g**2-2*gT_H_g*fx > 0:
                    max_lr = lr = - (-gT_g + np.sqrt(gT_g**2-2*gT_H_g*fx)) / gT_H_g
                else:
                    max_lr = lr = - (-gT_g/gT_H_g)

            max_lr_epoch.append(max_lr)
            lr_epoch.append(lr)
            times[7].append(time.time()-st)
            print 'choose lr: ', time.time()-st
            
            ## print
            st = time.time()
            if True:
                print ''
                print 'alpha             : ', alpha
                print 'f(x)              : ', fx
                print 'f(x+alpha*g)      : ', fx_plus_ag
                print 'f(x-alpha*g)      : ', fx_minus_ag
                print 'f(x+)+f(x-)-2f(x) : ', fx_plus_ag + fx_minus_ag - 2*fx
                print 'estimated (g.T)Hg : ', gT_H_g
                print '(g.T)g            : ', gT_g
                print 'max lr            : ', max_lr
                print 'lr                : ', lr
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(epoch+1,batch_num+1,
                                                                                          train_loss_batch[-1],train_acc_batch[-1])
            times[8].append(time.time()-st)
            print 'printing: ', time.time()-st

            ## quit?
            st = time.time()
            if gT_H_g==0.0:
                print 'gT_H_g==0.0, exiting'
                exit()

            ## update step
            research_fd[model.lr] = -alpha+lr
            model.sess.run(model.change_weights_op, feed_dict=research_fd)

            ## update alpha
            alpha = min(lr/2, 1e-1)

            times[9].append(time.time()-st)
            print 'quit? final update, alpha: ', time.time()-st
            print 'batch_time: ', time.time()-bst
            print '_'*100
        print 'avg_batch_time: ', (time.time()-est)/tot_batches
        for i in range(len(times)):
            print 'i: ', i, '   ', np.mean(times[i])
        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(epoch+1, train_loss[-1])
        #vl, va = validate(model, dataset)
        #val_loss.append(vl)
        #val_acc.append(va)
        #save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc
def train(model, dataset, cfg):
    train_loss_batch = []  # record_loss
    train_acc_batch = []  # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    alpha = 1e-2
    db = []  # d_biased
    gg2 = []
    timestep = 0
    count = 0
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        for batch_num in range(tot_batches):
            timestep += 1
            bst = time.time()
            st = time.time()
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            ## get f(x) and gradients
            fd = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels: dataset.data['train_labels'][batch_inds],
                model.use_past_bt: False,
                model.h1_past_bt: np.zeros(
                    (len(batch_inds), model.cfg.h1_dim)),
                model.h2_past_bt: np.zeros((len(batch_inds), model.cfg.h2_dim))
            }
            loss, acc, grads, h1_bt, h2_bt = model.sess.run([
                model.loss, model.accuracy, model.grads,
                model.h1_binary_tensor, model.h2_binary_tensor
            ],
                                                            feed_dict=fd)
            fx = loss
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            if db == []:
                #db  = [(1-cfg.eps)*grads[i] for i in range(len(grads))]
                d = [grads[i] for i in range(len(grads))]
                #gg2 = [(1-cfg.eps2)*grads[i]*grads[i] for i in range(len(grads))]
            else:
                #db  = [cfg.eps*db[i] + (1-cfg.eps)*grads[i] for i in range(len(grads))]
                mult1 = cfg.eps * (1 - cfg.eps**
                                   (timestep - 1)) / (1 - cfg.eps**timestep)
                mult2 = (1 - cfg.eps) / (1 - cfg.eps**timestep)
                d = [
                    mult1 * d[i] + mult2 * grads[i] for i in range(len(grads))
                ]
                #d  = [db[i]/(1-cfg.eps**timestep) for i in range(len(grads))]
                #gg2 = [cfg.eps*gg2[i] + (1-cfg.eps2)*grads[i]*grads[i] for i in range(len(grads))]
            #d  = [db[i]/(1-cfg.eps**timestep) for i in range(len(grads))]
            #gg = [gg2[i]/(1-cfg.eps2**timestep) for i in range(len(grads))]
            #d  = [d[i]/(np.sqrt(gg[i])+cfg.epsilon) for i in range(len(grads))]

            research_fd = {
                model.h1_W_grad: d[0],
                model.h1_b_grad: d[1],
                model.h2_W_grad: d[2],
                model.h2_b_grad: d[3],
                model.preds_W_grad: d[4],
                model.preds_b_grad: d[5],
            }
            print 'fx and grads: ', time.time() - st
            st = time.time()
            gT_d = np.sum([np.sum(grads[i] * d[i]) for i in range(len(grads))])
            print 'gT_d: ', time.time() - st

            ## set fd to use old binary tensors
            st = time.time()
            fd = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels: dataset.data['train_labels'][batch_inds],
                model.use_past_bt: True,
                model.h1_past_bt: h1_bt,
                model.h2_past_bt: h2_bt
            }
            print 'change fd: ', time.time() - st

            ## get f(x+alpha*d)
            st = time.time()
            research_fd[model.lr] = -alpha
            model.sess.run(model.change_weights_op, feed_dict=research_fd)
            fx_plus_ad, grads2 = model.sess.run([model.loss, model.grads],
                                                feed_dict=fd)
            print 'fx+: ', time.time() - st

            ## get f(x-alpha*d)
            st = time.time()
            research_fd[model.lr] = 2 * alpha
            model.sess.run(model.change_weights_op, feed_dict=research_fd)
            #fx_minus_ad = model.sess.run(model.loss, feed_dict=fd)
            fx_minus_ad, grads3 = model.sess.run([model.loss, model.grads],
                                                 feed_dict=fd)
            print 'fx-: ', time.time() - st

            ## estimate Hd and dT_H_d
            Hd = [
                grads2[i] / alpha - grads[i] / alpha for i in range(len(grads))
            ]
            dT_H_d = np.sum([np.sum(d[i] * Hd[i]) for i in range(len(grads))])

            ## choose learning rate
            st = time.time()
            dT_H_d_2 = (fx_plus_ad + fx_minus_ad - 2 * fx) / (alpha**2)
            Hd_23 = [
                grads2[i] / (2.0 * alpha) - grads3[i] / (2.0 * alpha)
                for i in range(len(grads))
            ]
            dT_H_d_23 = np.sum(
                [np.sum(d[i] * Hd_23[i]) for i in range(len(grads))])
            print 'dT_H_d_2', dT_H_d, dT_H_d_2, dT_H_d_23
            dT_H_d = dT_H_d
            if not cfg.magic_2nd_order:
                if dT_H_d == 0.0:
                    max_lr = lr = 0.0
                else:
                    max_lr = 2 * gT_d / np.abs(
                        dT_H_d)  # this is a magnitude comment
                    lr = max(min(fx / gT_d, np.abs(max_lr)), -np.abs(max_lr))
                    max_lr_epoch.append(max_lr)
                    lr_epoch.append(lr)

            else:  ## 2nd order magic
                if dT_H_d == 0.0:
                    max_lr = lr = 0.0
                else:
                    delta_f = fx
                    if gT_d**2 - 2 * dT_H_d * delta_f >= 0:
                        if gT_d > 0:  # choose the smaller of the two
                            max_lr = lr = -(-gT_d + np.sqrt(
                                gT_d**2 - 2 * dT_H_d * delta_f)) / dT_H_d
                        else:
                            max_lr = lr = -(-gT_d - np.sqrt(
                                gT_d**2 - 2 * dT_H_d * delta_f)) / dT_H_d
                    else:
                        max_lr = lr = -(-gT_d / dT_H_d)
            print 'choose lr: ', time.time() - st
            if max_lr == lr:
                count += 1

            ## print
            st = time.time()
            if True:
                print ''
                print 'alpha             : ', alpha
                print 'f(x)              : ', fx
                print 'f(x+alpha*d)      : ', fx_plus_ad
                #print 'f(x-alpha*d)      : ', fx_minus_ad
                #print 'f(x+)+f(x-)-2f(x) : ', fx_plus_ad + fx_minus_ad - 2*fx
                print 'estimated (d.T)Hd : ', dT_H_d
                print '(g.T)d            : ', gT_d
                print 'max lr            : ', max_lr
                print 'lr                : ', lr
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1],
                train_acc_batch[-1])
            print 'printing: ', time.time() - st

            ## quit?
            st = time.time()
            if dT_H_d == 0.0:
                print 'dT_H_d==0.0, exiting'
                exit()
            ## update step
            # reset to x
            research_fd[model.lr] = -alpha + lr
            model.sess.run(model.change_weights_op, feed_dict=research_fd)

            ## update alpha
            alpha = min(lr / 2, 1e-1)
            alpha = 0.1

            print 'quit? final update, alpha: ', time.time() - st
            print 'batch_time: ', time.time() - bst
            print '_' * 100
        print 'avg_batch_time: ', (time.time() - est) / tot_batches

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])
        #vl, va = validate(model, dataset)
        #val_loss.append(vl)
        #val_acc.append(va)
        #save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    print '#max_lr==lr: ', count, '/', timestep
    exit()
    return train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc
Exemplo n.º 5
0
def train_autoencoder_kalpit(model, dataset, cfg, save_dir):
    train_loss_batch = [] # record_loss
    train_loss = []
    val_loss = []
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    alpha = 1e-1
    moms = []
    converged = False
    ### accumulators
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0*dataset.n_train/cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        for batch_num in range(tot_batches):
            alpha = 1e0
            bst = time.time()
            batch_inds = inds[batch_num*cfg.batch_size:min((batch_num+1)*cfg.batch_size,dataset.n_train)]
            ## get f(x) and gradients
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:]}
            loss, grads = model.sess.run([model.loss, model.grads],
                                          feed_dict=fd)
            train_loss_batch.append(loss)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}'.format(epoch+1,batch_num+1,train_loss_batch[-1])
            ## set research_fd. set fd to use old binary tensors.
            research_fd = {model.enc1_W_grad: grads[0],    model.enc1_b_grad: grads[1],
                           model.enc2_W_grad: grads[2],    model.enc2_b_grad: grads[3],
                           model.dec2_W_grad: grads[4],    model.dec2_b_grad: grads[5],
                           model.dec1_W_grad: grads[6],    model.dec1_b_grad: grads[7]
                          }
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:]}

            ## get kalpit learning_rate
            max_lr, lr = get_kalpit_lr(model, cfg, research_fd, fd, loss, alpha, grads)
            max_lr_epoch.append(max_lr)
            lr_epoch.append(lr)

            ## update step
            # momentum
            if moms==[]:
                moms = grads[:]
            else:
                moms = [model.cfg.momentum*moms[i] + grads[i] for i in range(len(moms))]
            research_fd = {model.enc1_W_grad: moms[0],    model.enc1_b_grad: moms[1],
                           model.enc2_W_grad: moms[2],    model.enc2_b_grad: moms[3],
                           model.dec2_W_grad: moms[4],    model.dec2_b_grad: moms[5],
                           model.dec1_W_grad: moms[6],    model.dec1_b_grad: moms[7]
                          }
            research_fd[model.lr] = lr
            model.sess.run(model.change_weights_op, feed_dict=research_fd)

            ## update alpha
            alpha = min(lr/2, 1e-1)

            print 'batch_time: ', time.time()-bst
            print '_'*100
        print 'avg_batch_time: ', (time.time()-est)/tot_batches

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(epoch+1, train_loss[-1])
        if converged:
            break
        vl, va = validate_autoencoder(model, dataset)
        val_loss.append(vl)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss, val_loss
Exemplo n.º 6
0
def train_conv_kalpit(model, dataset, cfg, save_dir):
    train_loss_batch = [] # record_loss
    train_acc_batch = [] # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    alpha = 1e-2
    moms = []
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0*dataset.n_train/cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        for batch_num in range(tot_batches):
            bst = time.time()
            batch_inds = inds[batch_num*cfg.batch_size:min((batch_num+1)*cfg.batch_size,dataset.n_train)]
            ## get f(x) and gradients
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:],
                  model.labels: dataset.data['train_labels'][batch_inds],
                  model.use_past_bt: False,
                  model.input_past_bt: np.zeros((len(batch_inds),cfg.input_height,cfg.input_width,cfg.input_nchannels)),
                  model.fc4_past_bt: np.zeros((len(batch_inds),1000))
                 }
            loss, acc, grads, input_bt, fc4_bt = model.sess.run([model.loss, model.accuracy, model.grads,
                                                                 model.input_binary_tensor, model.fc4_binary_tensor],
                                                                 feed_dict=fd)
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(epoch+1,batch_num+1,
                                                                                          train_loss_batch[-1],train_acc_batch[-1])
            ## set research_fd. set fd to use old binary tensors.
            research_fd = {model.conv1_W_grad: grads[0],    model.conv1_b_grad: grads[1],
                           model.conv2_W_grad: grads[2],    model.conv2_b_grad: grads[3],
                           model.conv3_W_grad: grads[4],    model.conv3_b_grad: grads[5],
                           model.fc4_W_grad: grads[6],      model.fc4_b_grad: grads[7],
                           model.fc5_W_grad: grads[8],      model.fc5_b_grad: grads[9],
                           model.input_past_bt: input_bt,   model.fc4_past_bt: fc4_bt
                          }
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:],
                  model.labels: dataset.data['train_labels'][batch_inds],
                  model.use_past_bt: True,
                  model.input_past_bt: input_bt,
                  model.fc4_past_bt: fc4_bt
                 }
            # momentum
            if moms==[]:
                moms = grads[:]
            else:
                moms = [model.cfg.momentum*moms[i] + grads[i] for i in range(len(moms))]
            moms_fd = {model.conv1_W_grad: moms[0],    model.conv1_b_grad: moms[1],
                       model.conv2_W_grad: moms[2],    model.conv2_b_grad: moms[3],
                       model.conv3_W_grad: moms[4],    model.conv3_b_grad: moms[5],
                       model.fc4_W_grad: moms[6],      model.fc4_b_grad: moms[7],
                       model.fc5_W_grad: moms[8],      model.fc5_b_grad: moms[9]                       }

            ## get kalpit learning_rate
            print 'USING DIXIT LR'
            max_lr, lr = get_dixit_lr(loss, grads, moms, cfg)
            #max_lr, lr = get_kalpit_lr(model, cfg, grads_fd, fd, loss, alpha, grads)
            max_lr_epoch.append(max_lr)
            lr_epoch.append(lr)

            ## update step
            moms_fd[model.lr] = lr
            model.sess.run(model.change_weights_op, feed_dict=moms_fd)

            ## update alpha
            alpha = min(lr/2, 1e-1)

            print 'batch_time: ', time.time()-bst
            print '_'*100
        print 'avg_batch_time: ', (time.time()-est)/tot_batches

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(epoch+1, train_loss[-1])
        vl, va = validate_conv(model, dataset)
        val_loss.append(vl)
        val_acc.append(va)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss, val_loss, val_acc
Exemplo n.º 7
0
def train_ff_kalpit(model, dataset, cfg, save_dir):
    train_loss_batch = [] # record_loss
    train_acc_batch = [] # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    count1 = 0
    count2 = 0
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0*dataset.n_train/cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        for batch_num in range(tot_batches):
            bst = time.time()
            batch_inds = inds[batch_num*cfg.batch_size:min((batch_num+1)*cfg.batch_size,dataset.n_train)]
            ## get f(x) and gradients
            fd = {model.input_images: dataset.data['train_images'][batch_inds,:],
                  model.labels: dataset.data['train_labels'][batch_inds],
                  model.use_past_bt: False,
                  model.h1_past_bt: np.zeros((len(batch_inds),model.cfg.h1_dim)),
                  model.h2_past_bt: np.zeros((len(batch_inds),model.cfg.h2_dim)),
                  model.max_lr: cfg.max_lr
                 }
            loss, acc, lr, _ = model.sess.run([model.loss, model.accuracy, model.lr, model.dixit_train_op],
                                              feed_dict=fd)
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(epoch+1,batch_num+1,
                                                                                          train_loss_batch[-1],train_acc_batch[-1])

            ## get kalpit learning_rate
            print 'USING DIXIT LR'
            max_lr_epoch.append(cfg.max_lr)
            lr_epoch.append(lr)
            if lr > 0.999*cfg.max_lr:
                count1 += 1.0
            count2 += 1.0
            print 100.0*count1/count2
            print 'batch_time: ', time.time()-bst
            print '_'*100
        print 'avg_batch_time: ', (time.time()-est)/tot_batches

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(epoch+1, train_loss[-1])
        vl, va = validate_ff(model, dataset)
        val_loss.append(vl)
        val_acc.append(va)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss, val_loss, val_acc
Exemplo n.º 8
0
def train_ff_vanilla(model, dataset, cfg, save_dir):
    train_loss_batch = []  # record_loss
    train_acc_batch = []  # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss(val_acc, save_dir, 'validation_accuracy.txt', first_use=True)
    time_since_improvement = 0  # early stopping
    train_time = 0.0
    for epoch in range(cfg.max_epochs):
        st = time.time()
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        for batch_num in range(tot_batches):
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            feed_dict = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels: dataset.data['train_labels'][batch_inds],
                model.lr: cfg.learning_rate,
                model.keep_prob: cfg.keep_prob,
                model.use_past_bt: False,
                model.h1_past_bt: np.zeros(
                    (len(batch_inds), model.cfg.h1_dim)),
                model.h2_past_bt: np.zeros((len(batch_inds), model.cfg.h2_dim))
            }
            loss, acc, _ = model.sess.run(
                [model.loss, model.accuracy, model.train_op],
                feed_dict=feed_dict)
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1],
                train_acc_batch[-1])
        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])

        ## train time
        train_time += time.time() - st
        print 'Total Train Time:', train_time

        ## validation
        vl, va = validate_ff(model, dataset)
        val_loss.append(vl)
        val_acc.append(va)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
        save_loss(val_acc[-1:], save_dir, 'validation_accuracy.txt')

        ## early stopping
        time_since_improvement, early_stop = early_stopping(
            val_loss, time_since_improvement, cfg.early_stopping)
        if early_stop:
            break

    return train_loss, val_loss, val_acc
Exemplo n.º 9
0
def train_conv_kalpit(model, dataset, cfg, save_dir):
    train_loss_batch = []  # record_loss
    train_acc_batch = []  # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss(val_acc, save_dir, 'validation_accuracy.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    count1 = 0
    count2 = 0
    time_since_improvement = 0  # early stopping
    train_time = 0.0
    for epoch in range(cfg.max_epochs):
        st = time.time()
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        count1 = 0.0
        count2 = 0.0
        for batch_num in range(tot_batches):
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            ## get f(x) and gradients
            fd = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels:
                dataset.data['train_labels'][batch_inds],
                model.keep_prob:
                cfg.keep_prob,
                model.use_past_bt:
                False,
                model.input_past_bt:
                np.zeros((len(batch_inds), cfg.input_height, cfg.input_width,
                          cfg.input_nchannels)),
                model.fc4_past_bt:
                np.zeros((len(batch_inds), 1000)),
                model.max_lr:
                cfg.max_lr
            }
            loss, acc, lr, _, gT_d = model.sess.run([
                model.loss, model.accuracy, model.lr, model.dixit_train_op,
                model.gT_d
            ],
                                                    feed_dict=fd)
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}  learning_rate:{:.3f}  gT_d:{:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1],
                train_acc_batch[-1], lr, gT_d)
            ## get kalpit learning_rate
            print 'USING DIXIT LR'
            max_lr_epoch.append(cfg.max_lr)
            lr_epoch.append(lr)
            if lr > 0.999 * cfg.max_lr:
                count1 += 1.0
            count2 += 1.0
            print '_' * 100

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])
        print 'Percentage of lr==max_lr: {:.3f}'.format(100.0 * count1 /
                                                        count2)

        ## train time
        train_time += time.time() - st
        print 'Total Train Time:', train_time

        ## validation
        vl, va = validate_conv(model, dataset)
        val_loss.append(vl)
        val_acc.append(va)
        save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
        save_loss(val_acc[-1:], save_dir, 'validation_accuracy.txt')

        ## early stopping
        time_since_improvement, early_stop = early_stopping(
            val_loss, time_since_improvement, cfg.early_stopping)
        if early_stop:
            break
    return train_loss, val_loss, val_acc
def train(model, dataset, cfg):
    train_loss_batch = []  # record_loss
    train_acc_batch = []  # record_accuracy
    train_loss = []
    val_loss = []
    val_acc = []
    save_loss(train_loss, save_dir, 'training_cost.txt', first_use=True)
    save_loss(val_loss, save_dir, 'validation_cost.txt', first_use=True)
    save_loss([], save_dir, 'max_learning_rates.txt', first_use=True)
    save_loss([], save_dir, 'learning_rates.txt', first_use=True)
    alpha = 1e-2
    moms = []
    for epoch in range(cfg.max_epochs):
        inds = range(dataset.n_train)
        np.random.shuffle(inds)
        tot_batches = int(np.ceil(1.0 * dataset.n_train / cfg.batch_size))
        max_lr_epoch = []
        lr_epoch = []
        est = time.time()
        for batch_num in range(tot_batches):
            bst = time.time()
            st = time.time()
            batch_inds = inds[batch_num * cfg.batch_size:min(
                (batch_num + 1) * cfg.batch_size, dataset.n_train)]
            ## get f(x) and gradients
            fd = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels: dataset.data['train_labels'][batch_inds],
                model.use_past_bt: False,
                model.h1_past_bt: np.zeros(
                    (len(batch_inds), model.cfg.h1_dim)),
                model.h2_past_bt: np.zeros((len(batch_inds), model.cfg.h2_dim))
            }
            loss, acc, grads, h1_bt, h2_bt = model.sess.run([
                model.loss, model.accuracy, model.grads,
                model.h1_binary_tensor, model.h2_binary_tensor
            ],
                                                            feed_dict=fd)
            fx = loss
            train_loss_batch.append(loss)
            train_acc_batch.append(acc)
            ## momentum
            if moms == []:
                moms = grads[:]
            else:
                moms = [
                    model.cfg.momentum * moms[i] + grads[i]
                    for i in range(len(moms))
                ]
            research_fd = {
                model.h1_W_grad: moms[0],
                model.h1_b_grad: moms[1],
                model.h2_W_grad: moms[2],
                model.h2_b_grad: moms[3],
                model.preds_W_grad: moms[4],
                model.preds_b_grad: moms[5],
            }
            print 'fx and grads: ', time.time() - st
            st = time.time()
            gT_m = np.sum(
                [np.sum(grads[i] * moms[i]) for i in range(len(moms))])
            print 'mT_m: ', time.time() - st

            ## set fd to use old binary tensors
            st = time.time()
            fd = {
                model.input_images:
                dataset.data['train_images'][batch_inds, :],
                model.labels: dataset.data['train_labels'][batch_inds],
                model.use_past_bt: True,
                model.h1_past_bt: h1_bt,
                model.h2_past_bt: h2_bt
            }
            print 'change fd: ', time.time() - st

            ## get f(x+alpha*m)
            st = time.time()
            research_fd[model.lr] = -alpha
            model.sess.run(model.change_weights_op, feed_dict=research_fd)
            fx_plus_am, grads2 = model.sess.run([model.loss, model.grads],
                                                feed_dict=fd)
            print 'fx+: ', time.time() - st

            ### get f(x-alpha*g)
            #st = time.time()
            #research_fd[model.lr] = 2*alpha
            #model.sess.run(model.change_weights_op, feed_dict=research_fd)
            #fx_minus_am = model.sess.run(model.loss, feed_dict=fd)
            #print 'fx-: ', time.time()-st

            ## choose learning rate
            st = time.time()
            #mT_H_m = (fx_plus_am + fx_minus_am - 2*fx)/(alpha**2)
            H_m = [
                grads2[i] / alpha - grads[i] / alpha for i in range(len(grads))
            ]
            mT_H_m = np.sum(
                [np.sum(moms[i] * H_m[i]) for i in range(len(grads))])
            if not cfg.magic_2nd_order:
                max_lr = 2 * gT_m / np.abs(mT_H_m)
                lr = min(fx / gT_m, max_lr)
                max_lr_epoch.append(max_lr)
                lr_epoch.append(lr)

            else:  ## 2nd order magic
                if mT_H_m == 0.0:
                    max_lr = lr = 0.0
                else:
                    delta_f = fx
                    if gT_m**2 - 2 * mT_H_m * delta_f >= 0:
                        max_lr = lr = -(-gT_m + np.sqrt(gT_m**2 - 2 * mT_H_m *
                                                        delta_f)) / mT_H_m
                    else:
                        max_lr = lr = -(-gT_m / mT_H_m)
            print 'choose lr: ', time.time() - st

            ## print
            st = time.time()
            if True:
                print ''
                print 'alpha             : ', alpha
                print 'f(x)              : ', fx
                print 'f(x+alpha*m)      : ', fx_plus_am
                #print 'f(x-alpha*m)      : ', fx_minus_am
                #print 'f(x+)+f(x-)-2f(x) : ', fx_plus_am + fx_minus_am - 2*fx
                print 'estimated (m.T)Hm : ', mT_H_m
                print '(g.T)m            : ', gT_m
                print 'max lr            : ', max_lr
                print 'lr                : ', lr
            print 'Epoch-Batch: {:3d}-{:3d}  train_loss: {:.3f}  train_acc:{:.3f}'.format(
                epoch + 1, batch_num + 1, train_loss_batch[-1],
                train_acc_batch[-1])
            print 'printing: ', time.time() - st

            ## quit?
            st = time.time()
            if mT_H_m == 0.0:
                print 'mT_H_m==0.0, exiting'
                exit()

            ## update step
            # reset to x
            research_fd[model.lr] = -alpha + lr
            model.sess.run(model.change_weights_op, feed_dict=research_fd)

            ## update alpha
            alpha = min(lr / 2, 1e-1)

            print 'quit? final update, alpha: ', time.time() - st
            print 'batch_time: ', time.time() - bst
            print '_' * 100
        print 'avg_batch_time: ', (time.time() - est) / tot_batches

        train_loss.append(np.mean(train_loss_batch[-tot_batches:]))
        save_loss(max_lr_epoch, save_dir, 'max_learning_rates.txt')
        save_loss(lr_epoch, save_dir, 'learning_rates.txt')
        save_loss(train_loss[-1:], save_dir, 'training_cost.txt')
        print 'Epoch {} - Average Training Cost: {:.3f}'.format(
            epoch + 1, train_loss[-1])
        #vl, va = validate(model, dataset)
        #val_loss.append(vl)
        #val_acc.append(va)
        #save_loss(val_loss[-1:], save_dir, 'validation_cost.txt')
    return train_loss_batch, train_acc_batch, train_loss, val_loss, val_acc