Exemplo n.º 1
0
def main():
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    logger = u.TensorboardLogger(args.run)

    with u.timeit("init/session"):

        rewrite_options = None
        try:
            from tensorflow.core.protobuf import rewriter_config_pb2
            rewrite_options = rewriter_config_pb2.RewriterConfig(
                disable_model_pruning=True,
                constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
                memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
        except:
            pass

        optimizer_options = tf.OptimizerOptions(
            opt_level=tf.OptimizerOptions.L0)
        graph_options = tf.GraphOptions(optimizer_options=optimizer_options,
                                        rewrite_options=rewrite_options)
        gpu_options = tf.GPUOptions(allow_growth=False)
        config = tf.ConfigProto(graph_options=graph_options,
                                gpu_options=gpu_options,
                                log_device_placement=False)

        sess = tf.InteractiveSession(config=config)
        u.register_default_session(
            sess)  # since default session is Thread-local

    with u.timeit("init/model_init"):
        model = model_creator(args.batch_size, name="main")
        model.initialize_global_vars(verbose=True)
        model.initialize_local_vars()

    kfac_lib.numeric_inverse = args.numeric_inverse
    with u.timeit("init/kfac_init"):
        kfac = Kfac(model_creator, args.kfac_batch_size)
        kfac.model.initialize_global_vars(verbose=False)
        kfac.model.initialize_local_vars()
        kfac.Lambda.set(args.Lambda)
        kfac.reset()  # resets optimization variables (not model variables)

    if args.mode != 'run':
        opt = tf.train.AdamOptimizer(0.001)
    else:
        opt = tf.train.AdamOptimizer(args.lr)
    grads_and_vars = opt.compute_gradients(model.loss,
                                           var_list=model.trainable_vars)

    grad = IndexedGrad.from_grads_and_vars(grads_and_vars)
    grad_new = kfac.correct(grad)
    with u.capture_vars() as adam_vars:
        train_op = opt.apply_gradients(grad_new.to_grads_and_vars())
    with u.timeit("init/adam"):
        sessrun([v.initializer for v in adam_vars])

    losses = []
    u.record_time()

    start_time = time.time()
    vloss0 = 0

    # todo, unify the two data outputs
    outfn = 'data/%s_%f_%f.csv' % (args.run, args.lr, args.Lambda)

    start_time = time.time()
    if args.extra_kfac_batch_advance:
        kfac.model.advance_batch()  # advance kfac batch

    if args.kfac_async:
        kfac.start_stats_runners()

    for step in range(args.num_steps):

        if args.validate_every_n and step % args.validate_every_n == 0:
            loss0, vloss0 = sessrun([model.loss, model.vloss])
        else:
            loss0, = sessrun([model.loss])
        losses.append(loss0)  # TODO: remove this

        logger('loss/loss', loss0, 'loss/vloss', vloss0)

        elapsed = time.time() - start_time
        start_time = time.time()
        print("%4d ms, step %4d, loss %5.2f, vloss %5.2f" %
              (elapsed * 1e3, step, loss0, vloss0))

        if args.method == 'kfac' and not args.kfac_async:
            kfac.model.advance_batch()
            kfac.update_stats()

        with u.timeit("train"):
            model.advance_batch()
            with u.timeit("grad.update"):
                grad.update()
            with kfac.read_lock():
                grad_new.update()
            u.run(train_op)
            u.record_time()

        logger.next_step()

    # TODO: use u.global_runs_dir
    # TODO: get rid of u.timeit?

    with open('timelines/graphdef.txt', 'w') as f:
        f.write(str(u.get_default_graph().as_graph_def()))

    u.summarize_time()

    if args.mode == 'record':
        u.dump_with_prompt(losses, release_test_fn)

    elif args.mode == 'test':
        targets = np.loadtxt('data/' + release_test_fn, delimiter=",")
        u.check_equal(losses, targets, rtol=1e-2)
        u.summarize_difference(losses, targets)
        assert u.last_time() < 800, "Expected 648 on GTX 1080"
Exemplo n.º 2
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--run',
                        type=str,
                        default='momentum-lenet',
                        help='name of run')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    print("using device ", device)

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp/data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp/data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    logger = u.TensorboardLogger(args.run)

    model = LeNet().to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    example_count = 0
    for epoch in range(1, args.epochs + 1):

        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            step_start = time.perf_counter()
            data, target = data.to(device), target.to(device)
            logger.set_step(example_count)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            for param in model.parameters():
                loss += 0.0002 * torch.sum(param * param)
            logger('loss/train', loss)
            loss.backward()
            optimizer.step()
            example_count += args.batch_size
            logger('time/step', 1000 * (time.perf_counter() - step_start))

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                test_loss += F.nll_loss(
                    output, target,
                    reduction='sum').item()  # sum up batch loss
                pred = output.max(1, keepdim=True)[
                    1]  # get the index of the max log-probability
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, len(test_loader.dataset),
                     100. * correct / len(test_loader.dataset)))
        accuracy = 1 - correct / len(test_loader.dataset)
        logger('loss/test', accuracy)
Exemplo n.º 3
0
def main():
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    device = torch.device("cuda" if use_cuda else "cpu")
    print("using device ", device)
    torch.manual_seed(args.seed)

    u.set_runs_directory('runs3')
    logger = u.TensorboardLogger(args.run)
    batch_size = 64
    shuffle = True
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp/data',
        train=True,
        download=True,
        transform=transforms.Compose([transforms.ToTensor()])),
                                               batch_size=batch_size,
                                               shuffle=shuffle,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '/tmp/data',
        train=False,
        transform=transforms.Compose([transforms.ToTensor()])),
                                              batch_size=1000,
                                              shuffle=shuffle,
                                              **kwargs)
    """input image size for the original LeNet5 is 32x32, here is 28x28"""

    #  W1 = 0.1 * torch.randn(1 * 5 * 5 + 1, 6)

    net = LeNet5().to(device)

    def train_loss(data, target):
        y = net(data)
        y = F.log_softmax(y, dim=1)
        loss = F.nll_loss(y, target)
        for w in net.W:
            loss += 0.0002 * torch.sum(w * w)

        return loss

    def test_loss():
        num_errs = 0
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)

                y = net(data)
                _, pred = torch.max(y, dim=1)
                num_errs += torch.sum(pred != target)

        return num_errs.item() / len(test_loader.dataset)

    Qs = [[torch.eye(w.shape[0]), torch.eye(w.shape[1])] for w in net.W]
    for i in range(len(Qs)):
        for j in range(len(Qs[i])):
            Qs[i][j] = Qs[i][j].to(device)

    step_size = 0.1  # tried 0.15, diverges
    grad_norm_clip_thr = 1e10
    TrainLoss, TestLoss = [], []
    example_count = 0
    step_time_ms = 0

    for epoch in range(10):
        for batch_idx, (data, target) in enumerate(train_loader):
            step_start = time.perf_counter()
            data, target = data.to(device), target.to(device)

            loss = train_loss(data, target)

            with u.timeit('grad'):
                grads = autograd.grad(loss, net.W, create_graph=True)
            TrainLoss.append(loss.item())
            logger.set_step(example_count)
            logger('loss/train', TrainLoss[-1])
            if batch_idx % 10 == 0:
                print(
                    f'Epoch: {epoch}; batch: {batch_idx}; train loss: {TrainLoss[-1]:.2f}, step time: {step_time_ms:.0f}'
                )

            with u.timeit('Hv'):
                #        noise.normal_()
                # torch.manual_seed(args.seed)
                v = [torch.randn(w.shape).to(device) for w in net.W]
                # v = grads
                Hv = autograd.grad(grads, net.W, v)

            if args.verbose:
                print("v", v[0].mean())
                print("data", data.mean())
                print("Hv", Hv[0].mean())

            n = len(net.W)
            with torch.no_grad():
                with u.timeit('P_update'):
                    for i in range(num_updates):
                        psteps = []
                        for j in range(n):
                            q = Qs[j]
                            dw = v[j]
                            dg = Hv[j]
                            Qs[j][0], Qs[j][
                                1], pstep = psgd.update_precond_kron_with_step(
                                    q[0], q[1], dw, dg)
                            psteps.append(pstep)

                            #          print(np.array(psteps).mean())
                    logger('p_residual', np.array(psteps).mean())

                with u.timeit('g_update'):
                    pre_grads = [
                        psgd.precond_grad_kron(q[0], q[1], g)
                        for (q, g) in zip(Qs, grads)
                    ]
                    grad_norm = torch.sqrt(
                        sum([torch.sum(g * g) for g in pre_grads]))

                with u.timeit('gradstep'):
                    step_adjust = min(
                        grad_norm_clip_thr / (grad_norm + 1.2e-38), 1.0)
                    for i in range(len(net.W)):
                        net.W[i] -= step_adjust * step_size * pre_grads[i]

                total_step = step_adjust * step_size
                logger('step/adjust', step_adjust)
                logger('step/size', step_size)
                logger('step/total', total_step)
                logger('grad_norm', grad_norm)

                if args.verbose:
                    print(data.mean())
                    import pdb
                    pdb.set_trace()
                if args.early_stop:
                    sys.exit()

            example_count += batch_size
            step_time_ms = 1000 * (time.perf_counter() - step_start)
            logger('time/step', step_time_ms)

            if args.test and batch_idx >= 100:
                break
        if args.test and batch_idx >= 100:
            break

        test_loss0 = test_loss()
        TestLoss.append(test_loss0)
        logger('loss/test', test_loss0)
        step_size = (0.1**0.1) * step_size
        print('Epoch: {}; best test loss: {}'.format(epoch, min(TestLoss)))

    if args.test:
        step_times = logger.d['time/step']
        assert step_times[-1] < 30, step_times  # should be around 20ms
        losses = logger.d['loss/train']
        assert losses[0] > 2  # around 2.3887393474578857
        assert losses[-1] < 0.5, losses
        print("Test passed")
Exemplo n.º 4
0
def main():
  np.random.seed(args.seed)
  tf.set_random_seed(args.seed)

  logger = u.TensorboardLogger(args.run)
  
  with u.timeit("init/session"):
    gpu_options = tf.GPUOptions(allow_growth=False)
    sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
    u.register_default_session(sess)   # since default session is Thread-local

  with u.timeit("init/model_init"):
    model = model_creator(args.batch_size, name="main")
    model.initialize_global_vars(verbose=True)
    model.initialize_local_vars()
  
  with u.timeit("init/kfac_init"):
    kfac = Kfac(model_creator, args.kfac_batch_size) 
    kfac.model.initialize_global_vars(verbose=False)
    kfac.model.initialize_local_vars()
    kfac.Lambda.set(args.Lambda)
    kfac.reset()    # resets optimization variables (not model variables)

  if args.mode != 'run':
    opt = tf.train.AdamOptimizer(0.001)
  else:
    opt = tf.train.AdamOptimizer(args.lr)
  grads_and_vars = opt.compute_gradients(model.loss,
                                         var_list=model.trainable_vars)
      
  grad = IndexedGrad.from_grads_and_vars(grads_and_vars)
  grad_new = kfac.correct(grad)
  with u.capture_vars() as adam_vars:
    train_op = opt.apply_gradients(grad_new.to_grads_and_vars())
  with u.timeit("init/adam"):
    sessrun([v.initializer for v in adam_vars])
  
  losses = []
  u.record_time()

  start_time = time.time()
  vloss0 = 0

  # todo, unify the two data outputs
  outfn = 'data/%s_%f_%f.csv'%(args.run, args.lr, args.Lambda)
  writer = u.BufferedWriter(outfn, 60)   # get rid?

  start_time = time.time()
  if args.extra_kfac_batch_advance:
    kfac.model.advance_batch()  # advance kfac batch

  if args.kfac_async:
    kfac.start_stats_runners()
    
  for step in range(args.num_steps):
    
    if args.validate_every_n and step%args.validate_every_n == 0:
      loss0, vloss0 = sessrun([model.loss, model.vloss])
    else:
      loss0, = sessrun([model.loss])
    losses.append(loss0)  # TODO: remove this

    logger('loss/loss', loss0, 'loss/vloss', vloss0)
    
    elapsed = time.time()-start_time
    print("%d sec, step %d, loss %.2f, vloss %.2f" %(elapsed, step, loss0,
                                                     vloss0))
    writer.write('%d, %f, %f, %f\n'%(step, elapsed, loss0, vloss0))

    if args.method=='kfac' and not args.kfac_async:
      kfac.model.advance_batch()
      kfac.update_stats()

    with u.timeit("train"):
      model.advance_batch()
      grad.update()
      with kfac.read_lock():
        grad_new.update()
      train_op.run()
      u.record_time()

    logger.next_step()

  # TODO: use u.global_runs_dir
  # TODO: get rid of u.timeit?
  
  with open('timelines/graphdef.txt', 'w') as f:
    f.write(str(u.get_default_graph().as_graph_def()))

  u.summarize_time()
  
  if args.mode == 'record':
    u.dump_with_prompt(losses, release_test_fn)

  elif args.mode == 'test':
    targets = np.loadtxt('data/'+release_test_fn, delimiter=",")
    u.check_equal(losses, targets, rtol=1e-2)
    u.summarize_difference(losses, targets)