示例#1
0
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

# save model
torch.save(net.state_dict(), MODEL_PATH)
示例#2
0
class Trainer(object):
    def __init__(self, args):
        self.args = args
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.prepare_data()
        self.setup_train()

    def prepare_data(self):
        train_val = MnistDataset(
            self.args.train_image_file,
            self.args.train_label_file,
            transform=transforms.Compose([ToTensor()]),
        )
        train_len = int(0.8 * len(train_val))
        train_ds, val_ds = torch.utils.data.random_split(
            train_val, [train_len, len(train_val) - train_len]
        )
        print("Train {}, val {}".format(len(train_ds), len(val_ds)))
        self.train_loader = torch.utils.data.DataLoader(
            train_ds,
            batch_size=self.args.batch_size,
            collate_fn=collate_fn,
            shuffle=True,
        )
        self.val_loader = torch.utils.data.DataLoader(
            val_ds,
            batch_size=self.args.batch_size,
            collate_fn=collate_fn,
            shuffle=False,
        )

    def setup_train(self):
        self.model = Net().to(self.device)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.args.lr)
        self.criterion = nn.CrossEntropyLoss().to(self.device)
        if not os.path.isdir(self.args.ckpt):
            os.mkdir(self.args.ckpt)

    def train_one_epoch(self):
        train_loss = 0.0
        self.model.train()
        for i, sample in enumerate(self.train_loader):
            X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device)
            self.optimizer.zero_grad()
            output = self.model(X)
            loss = self.criterion(output, Y_true)
            loss.backward()
            self.optimizer.step()
            train_loss += loss.item()
        return train_loss / len(self.train_loader)

    def evaluate(self):
        val_loss = 0.0
        self.model.eval()
        predicts = []
        truths = []
        with torch.no_grad():
            for i, sample in enumerate(self.val_loader):
                X, Y_true = sample["X"].to(self.device), sample["Y"].to(self.device)
                output = self.model(X)
                loss = self.criterion(output, Y_true)
                val_loss += loss.item()
                predicts.append(torch.argmax(output, dim=1))
                truths.append(Y_true)
        predicts = torch.cat(predicts, dim=0)
        truths = torch.cat(truths, dim=0)
        acc = torch.sum(torch.eq(predicts, truths))
        return acc / len(predicts), val_loss / (len(self.val_loader))

    def run(self):
        min_loss = 10e4
        max_acc = 0
        for epoch in range(self.args.epochs):
            train_loss = self.train_one_epoch()
            val_acc, val_loss = self.evaluate()

            if val_acc > max_acc:
                max_acc = val_acc
                torch.save(
                    self.model.state_dict(),
                    os.path.join(
                        self.args.ckpt,
                        "{}_{}_{:.4f}.pth".format(self.args.name, epoch, max_acc),
                    ),
                )
            print(
                "Epoch {}, loss {:.4f}, val_acc {:.4f}".format(
                    epoch, train_loss, val_acc
                )
            )
示例#3
0
        eval_loss += loss.item()
        eval_pred = torch.max(predictions, 1)[1]
        num_correct = (eval_pred == targets).sum()
        eval_acc += num_correct.item()

    eval_acc_list.append(eval_acc)
    if eval_acc == max(eval_acc_list):
        is_best = True
        best_pred = eval_acc / (len(data['valid']))
        print('The best best_predAcc is {:.6f}, epoch {}'.format(best_pred, epoch + 1))
        f.write('The best best_predAcc is {:.6f}, epoch {}'.format(best_pred, epoch + 1) + '\n')
    else:
        is_best = False

    # 保存模型
    torch.save(net.state_dict(), os.path.join(save_model_path, 'model-20200904-2.pth.tar'))
    # 如果是best,则复制最优模型
    if is_best:
        shutil.copyfile(os.path.join(save_model_path, 'model-20200904-2.pth.tar'),
                        os.path.join(save_model_path, 'best_model-20200904-2.pth.tar'))

    # 输出日志信息
    print('epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'.format(
        epoch + 1, train_loss / (len(data['train'])), train_acc / (len(data['train'])), eval_loss / (len(
                data['valid'])), eval_acc / (len(data['valid']))))
    f.write('epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'.format(
        epoch + 1, train_loss / (len(data['train'])), train_acc / (len(data['train'])), eval_loss / (len(
                data['valid'])), eval_acc / (len(data['valid']))) + '\n')

end = time.time()
f.write(end + '\n')
        num_correct = (eval_pred == targets).sum()
        eval_acc += num_correct.item()

    eval_acc_list.append(eval_acc)
    if eval_acc == max(eval_acc_list):
        is_best = True
        best_pred = eval_acc / (len(data['valid']))
        print('The best best_predAcc is {:.6f}, epoch {}'.format(
            best_pred, epoch + 1))
        f.write('The best best_predAcc is {:.6f}, epoch {}'.format(
            best_pred, epoch + 1) + '\n')
    else:
        is_best = False

    # 保存模型
    torch.save(model.state_dict(), os.path.join(save_model_path,
                                                '8-24.pth.tar'))
    # 如果是best,则复制最优模型
    if is_best:
        shutil.copyfile(os.path.join(save_model_path, '8-24.pth.tar'),
                        os.path.join(save_model_path, 'best-8-24.pth.tar'))

    # 输出日志信息
    print(
        'epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'
        .format(epoch + 1, train_loss / (len(data['train'])),
                train_acc / (len(data['train'])),
                eval_loss / (len(data['valid'])),
                eval_acc / (len(data['valid']))))
    f.write(
        'epoch {} trainLoss {:.6f} trainAcc {:.6f} validLoss {:.6f} validAcc {:.6f}'