示例#1
0
文件: main.py 项目: kuss/examples
def valid(model, device, val_dataloader, start_epoch):
    model.eval()
    val_loss = 0.0
    total = 0
    correct = 0
    for i, (data, labels) in enumerate(val_dataloader):
        with torch.no_grad():
            inputs, labels = data.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loss = criterion(outputs, labels)
            val_loss += loss.cpu().numpy()

    val_loss /= len(val_dataloader.dataset)
    val_accuracy = 100. * correct / len(val_dataloader.dataset)

    print('\nValid set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
          format(val_loss, correct, len(val_dataloader.dataset), val_accuracy))

    # Logging loss metrics to Vessl
    vessl.log(step=epoch + start_epoch + 1,
              row={
                  'val_loss': val_loss,
                  'val_accuracy': val_accuracy
              })

    return val_accuracy
示例#2
0
def test(model, device, test_loader, save_image):
    model.eval()
    test_loss = 0
    correct = 0
    test_images = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            test_images.append(vessl.Image(
                data[0], caption="Pred: {} Truth: {}".format(pred[0].item(), target[0])))

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), test_accuracy))

    if save_image:
        vessl.log({
            "Examples": test_images,
        })

    return test_accuracy
示例#3
0
def train(model_type, model, corpus, train_data, batch_size, bptt, clip,
          log_interval, dry_run, epoch):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    train_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if model_type != 'Transformer':
        hidden = model.init_hidden(batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if model_type == 'Transformer':
            output = model(data)
            output = output.view(-1, ntokens)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output, targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        for p in model.parameters():
            p.data.add_(p.grad, alpha=-lr)

        total_loss += loss.item()
        train_loss += loss.item()

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // bptt, lr, elapsed * 1000 / log_interval,
                    cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
        if dry_run:
            break

    # Logging metrics to Vessl
    loss = train_loss / (len(train_data) // bptt)
    vessl.log(step=epoch, row={'loss': loss, 'ppl': math.exp(loss)})
示例#4
0
文件: main.py 项目: kuss/examples
def train(model, device, train_dataloader, optimizer, epoch, start_epoch):
    model.train()
    loss = 0
    for batch_idx, (data, labels) in enumerate(train_dataloader):
        inputs, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if batch_idx % 128 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch + 1, batch_idx * len(data),
                len(train_dataloader.dataset),
                100. * batch_idx / len(train_dataloader), loss.item()))

    # Logging loss metrics to Vessl
    vessl.log(step=epoch + start_epoch + 1, row={'loss': loss.item()})
示例#5
0
    best_val_loss = None
    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train(model_type, model, corpus, train_data, batch_size, args.bptt,
              clip, args.log_interval, args.dry_run, epoch)
        val_loss = evaluate(model_type, model, corpus, val_data, args.bptt)
        val_ppl = math.exp(val_loss)
        print('-' * 89)
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
              'valid ppl {:8.2f}'.format(epoch,
                                         (time.time() - epoch_start_time),
                                         val_loss, val_ppl))
        print('-' * 89)

        # Logging metrics to Vessl
        vessl.log(step=epoch, row={'val_loss': val_loss, 'val_ppl': val_ppl})

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            save(model, args.output_path)
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0

    # Load the best saved model.
    with open(os.path.join(args.output_path, 'model.pt'), 'rb') as f:
        model = torch.load(f)
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        # Currently, only rnn model supports flatten_parameters function.
示例#6
0
 def after_step(self):
     vessl.log(step=self.trainer.iter, row={'loss': self.trainer.storage.history('total_loss').latest()})