Exemplo n.º 1
0
def testOptimizerConfigSGD():
    '''Test initialization of SGD'''
    cfg = optim.SGDConfig()
    assert cfg.name == 'SGDOptimizer'

    rtol = 1e-05
    assert_allclose(0.001, cfg.lr, rtol=rtol, err_msg="lr mismatch")

    cfg = optim.SGDConfig(lr=0.002)
    assert_allclose(0.002, cfg.lr, rtol=rtol, err_msg="lr mismatch")

    # SGD does not support params
    with pytest.raises(AssertionError) as e:
        params = [{'params': ['layer1.weight'], 'lr': 0.1}]
        optim.SGDConfig(params=params, lr=0.002)
        assert_allclose(0.002, cfg.lr, rtol=rtol, err_msg="lr mismatch")
    assert str(e.value) == "'params' must be an empty list for SGD optimizer"
def testToyBertStateDictWrapModelLossFn():
    # Common setup
    seed = 1
    torch.manual_seed(seed)
    onnxruntime.set_seed(seed)

    # Modeling
    class LinearModel(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.linear = torch.nn.Linear(2, 4)

        def forward(self, y=None, x=None):
            if y is not None:
                return self.linear(x) + y
            else:
                return self.linear(x) + torch.ones(2, 4)

    pt_model = LinearModel()
    model_desc = {
        'inputs': [('x', [2, 2]), ('label', [
            2,
        ])],
        'outputs': [('loss', [], True), ('output', [2, 4])]
    }
    optim_config = optim.SGDConfig(lr=0.02)

    def loss_fn(x, label):
        return F.nll_loss(F.log_softmax(x, dim=1), label)

    trainer = orttrainer.ORTTrainer(pt_model,
                                    model_desc,
                                    optim_config,
                                    loss_fn=loss_fn)

    # Compare resulting state_dict keys before train
    state_dict = checkpoint.experimental_state_dict(trainer)
    assert state_dict == {}

    # Executing train_step() once
    data = torch.randn(2, 2)
    label = torch.tensor([0, 1], dtype=torch.int64)
    trainer.train_step(x=data, label=label)

    # Compare resulting state_dict keys after train
    state_dict = checkpoint.experimental_state_dict(trainer)
    assert state_dict.keys() == {'linear.bias', 'linear.weight'}
Exemplo n.º 3
0
def testLRSchedulerUpdateImpl(lr_scheduler, expected_values):
    # Test tolerance
    rtol = 1e-04

    # Initial state
    initial_lr = 1
    total_steps = 10
    warmup = 0.5
    optimizer_config = optim.SGDConfig(lr=initial_lr)
    lr_scheduler = lr_scheduler(total_steps, warmup)

    # First half is warmup
    for optimization_step in range(total_steps):
        # Emulate ORTTRainer.train_step() call that updates its train_step_info
        train_step_info = TrainStepInfo(optimizer_config=optimizer_config, optimization_step=optimization_step)

        lr_scheduler._step(train_step_info)
        lr_list = lr_scheduler.get_last_lr()
        assert len(lr_list) == 1
        assert_allclose(lr_list[0],
                        expected_values[optimization_step], rtol=rtol, err_msg="lr mismatch")
Exemplo n.º 4
0
def train_ort_model(epoch=1):
    device = "cuda"
    ntokens=28785
    bptt = 35
    batch_size = 20
    initial_lr = 0.001
    
    train_data, val_data, test_data = prepare_data(device, 20, 20)
    pt_model_path = os.path.join('pt_model.py')
    pt_model = _utils.import_module_from_file(pt_model_path)
    model = pt_model.TransformerModel(28785, 200, 2, 200, 2, 0.2).to(device)
    
    model_desc = {'inputs':  [('input1', [bptt, batch_size]),
                              ('label', [bptt * batch_size])],
                  'outputs': [('loss', [], True),
                              ('predictions', [bptt, batch_size, ntokens])]}

    opts = orttrainer.ORTTrainerOptions({'device' : {'id' : device}})
    optim_config = optim.SGDConfig(lr=initial_lr)
    trainer = orttrainer.ORTTrainer(model, model_desc, optim_config, loss_fn=my_loss, options=opts)

    total_loss = 0.
    start_time = time.time()
    for batch, i in enumerate(range(0, train_data.size(0) - 35, bptt)):
        data, targets = get_batch(train_data, i)
        output = trainer.train_step(data, targets)
        total_loss += output[0].item()
        
        log_interval = 200
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| {} | epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.3f} | ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f}'.format(
                    device, epoch, batch, len(train_data) // bptt, initial_lr,
                    elapsed * 1000 / log_interval,
                    cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Exemplo n.º 5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    # Basic setup
    args = parser.parse_args()
    if not args.no_cuda and torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    torch.manual_seed(args.seed)
    onnxruntime.set_seed(args.seed)

    # Data loader
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True)

    # Modeling
    model = NeuralNet(784, 500, 10)
    model_desc = mnist_model_description()
    optim_config = optim.SGDConfig(lr=args.lr)
    opts = ORTTrainerOptions({'device': {'id': device}})
    trainer = ORTTrainer(model,
                         model_desc,
                         optim_config,
                         loss_fn=my_loss,
                         options=opts)

    # Train loop
    for epoch in range(1, args.epochs + 1):
        train_with_trainer(args.log_interval, trainer, device, train_loader,
                           epoch)
        test_with_trainer(trainer, device, test_loader)