示例#1
0
def train_pipe(args, part='parameters'):
    torch.manual_seed(args.seed)
    deepspeed.runtime.utils.set_random_seed(args.seed)

    #
    # Build the model
    #

    # VGG also works :-)
    #net = vgg19(num_classes=10)
    net = AlexNet(num_classes=10)
    net = PipelineModule(layers=join_layers(net),
                         loss_fn=torch.nn.CrossEntropyLoss(),
                         num_stages=args.pipeline_parallel_size,
                         partition_method=part,
                         activation_checkpoint_interval=0)

    trainset = cifar_trainset(args.local_rank)

    engine, _, _, _ = deepspeed.initialize(
        args=args,
        model=net,
        model_parameters=[p for p in net.parameters() if p.requires_grad],
        training_data=trainset)

    for step in range(args.steps):
        loss = engine.train_batch()
示例#2
0
def pytorch_cos():
    model = AlexNet(num_classes=2)
    optimizer = optim.SGD(params=model.parameters(), lr=0.0001)

    epoch = 100
    len_loader = 100

    scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
                                                         T_0=2,
                                                         T_mult=2,
                                                         eta_min=1e-6,
                                                         last_epoch=-1)
    plt.figure()
    x = []
    y = []
    for e in range(epoch):
        for i in range(len_loader):
            step = e + i / len_loader
            scheduler.step(step)
            lr = scheduler.get_last_lr()[0]

            x.append(step)
            y.append(lr)

    plt.plot(x, y)
    plt.xticks(np.arange(0, epoch + 1, 4))
    plt.show()
示例#3
0
def get_model(device=None):
    # 加载CNN模型
    model = AlexNet(num_classes=2)
    model.load_state_dict(
        torch.load('./models/best_linear_svm_alexnet_car.pth'))
    model.eval()

    # 取消梯度追踪
    for param in model.parameters():
        param.requires_grad = False
    if device:
        model = model.to(device)

    return model
示例#4
0
def train_base(args):
    torch.manual_seed(args.seed)

    # VGG also works :-)
    #net = vgg19(num_classes=10)
    net = AlexNet(num_classes=10)

    trainset = cifar_trainset(args.local_rank)

    engine, _, dataloader, __ = deepspeed.initialize(
        args=args,
        model=net,
        model_parameters=[p for p in net.parameters() if p.requires_grad],
        training_data=trainset)

    dataloader = RepeatingLoader(dataloader)
    data_iter = iter(dataloader)

    rank = dist.get_rank()
    gas = engine.gradient_accumulation_steps()

    criterion = torch.nn.CrossEntropyLoss()

    total_steps = args.steps * engine.gradient_accumulation_steps()
    step = 0
    for micro_step in range(total_steps):
        batch = next(data_iter)
        inputs = batch[0].to(engine.device)
        labels = batch[1].to(engine.device)

        outputs = engine(inputs)
        loss = criterion(outputs, labels)
        engine.backward(loss)
        engine.step()

        if micro_step % engine.gradient_accumulation_steps() == 0:
            step += 1
            if rank == 0 and (step % 10 == 0):
                print(f'step: {step:3d} / {args.steps:3d} loss: {loss}')
示例#5
0
def train():
    torch.multiprocessing.freeze_support()

    traindir = os.path.join('./200508_cat_classification/dogs-vs-cats',
                            'train')  #경로를 병합함 .
    testdir = os.path.join('./200508_cat_classification/dogs-vs-cats', 'test')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = datautil.DataLoader(TrainImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
                                       batch_size=4,
                                       shuffle=True,
                                       num_workers=4,
                                       pin_memory=True)

    test_loader = datautil.DataLoader(TestImageFolder(
        testdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                      batch_size=1,
                                      shuffle=False,
                                      num_workers=1,
                                      pin_memory=False)

    net = AlexNet()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = net.to(device)
    load_model(net, './alexnet.pth')

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")

    net = nn.DataParallel(net)

    if torch.cuda.is_available():
        net.cuda()

    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.004)

    for epoch in range(3):
        running_loss = 0.0
        acc = 0.
        correct = 0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

            optimizer.zero_grad()
            outputs = net(inputs)
            #print(outputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.data.item()

            prediction = torch.max(outputs.data, 1)[1]

            correct += prediction.eq(
                labels.data.view_as(prediction)).cpu().sum()

            if i % 2000 == 1999:
                total = (i + 1) * 4
                print(
                    f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.6f} acc : {correct} / {total}'
                )
                running_loss = 0.0

    print('Finished Training')

    save_model(net, './')
示例#6
0
from torchvision.models import AlexNet
import matplotlib.pyplot as plt
from utils.lr_scheduler import CosineAnnealingWarmupRestarts


def plot(lr_list):
    f = plt.figure()

    plt.plot(lr_list)
    plt.show()


epochs = 200
iterations = 100
model = AlexNet()
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=1e-4, last_epoch=-1)
# scheduler = lr_scheduler.CosineAnnealingLR(optimizer, iterations, eta_min=1e-4, last_epoch=-1)
scheduler = CosineAnnealingWarmupRestarts(optimizer,
                                          first_cycle_steps=iterations,
                                          cycle_mult=0.5,
                                          max_lr=0.1,
                                          min_lr=0.0,
                                          warmup_steps=1,
                                          gamma=0.5)
# this zero gradient update is needed to avoid a warning message, issue #8.
optimizer.zero_grad()

lr_list = list()
for epoch in range(epochs):
    optimizer.step()
示例#7
0
                  mode='triangular'):
        scheduler = CyclicLR(self.optimizer,
                             base_lr=base_lr,
                             max_lr=max_lr,
                             step_size_up=step_size_up,
                             step_size_down=step_size_down,
                             mode=mode)
        return scheduler

    def adjust(self, base_lr, type):
        pass


if __name__ == '__main__':
    net = AlexNet(num_classes=2)
    optimizer = SGD(net.parameters(), lr=0.0003)
    adj = AdjustLr(optimizer)
    sch1 = adj.LambdaLR_(milestone=5, gamma=0.92)
    epoches = 40
    plt.figure()
    x1 = list(range(epoches))
    y1 = list()
    lr = optimizer.param_groups[0]['lr']
    for epoch in range(epoches):
        optimizer.step()
        sch1.step(epoch)

        a = sch1.get_lr()
        print(epoch, a)
        y1.append(a)
    plt.plot(x1, y1)
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])

trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)

deepspeed.init_distributed()
net = AlexNet(num_classes=10)
net = PipelineModule(layers=join_layers(net),
                     loss_fn=torch.nn.CrossEntropyLoss(),
                     num_stages=2,
                     partition_method="parameters",
                     activation_checkpoint_interval=0)

args = add_argument()
engine, optimizer, trainloader, __ = deepspeed.initialize(
    args=args,
    model=net,
    model_parameters=[p for p in net.parameters() if p.requires_grad],
    training_data=trainset)

for step in range(steps):
    loss = engine.train_batch()
    print(loss)
# deepspeed --hostfile=./hostfile model_parallel/deepspeed/tutorial.py --deepspeed --deepspeed_config model_parallel/deepspeed/ds_config.json
示例#9
0
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        return self.classifier(x)


model = AlexNet(10).to(device)
h1 = hl.build_graph(model, torch.zeros(64, 3, 224, 224).to(device))
h1.save('images/alexnet.png', format='png')

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


def update_lr(optimizer, lr):
    """For updating learning rate."""

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
示例#10
0
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision.models import AlexNet
import matplotlib.pyplot as plt

model = AlexNet(num_classes=2)
optimizer = optim.SGD(params=model.parameters(), lr=0.01)


def f_step():
    scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.98)
    x = list(range(100))
    y = []
    for epoch in range(100):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        print(epoch, lr)
        y.append(lr)

    return x, y


def f_multistep():
    scheduler = lr_scheduler.MultiStepLR(optimizer, [30, 80], gamma=0.98)
    x = list(range(100))
    y = []
    for epoch in range(100):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        print(epoch, lr)
示例#11
0
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision as tv
import torchvision.transforms as transforms

# Tensor的读取与保存
a = torch.Tensor(3, 4)
a.cuda()
torch.save(a, 'a.pth')

b = torch.load('a.pth')

c = torch.load('a.pth', map_location=lambda sto, loc: sto)
# ----------------------------------------------------------
torch.set_default_tensor_type('torch.FloatTensor')
from torchvision.models import AlexNet

model = AlexNet()
model.state_dict().keys()
# model的保存与加载
torch.save(model.state_dict(), 'alexnet.pth')
model.load_state_dict(torch.load('alexnet.pth'))

opt = torch.optim.Adam(model.parameters(), lr=0.1)
# 优化器的参数读取与保存
torch.save(opt.state_dict(), 'opt.pth')
opt.load_state_dict(torch.load('opt.pth'))
示例#12
0
                torch.save(
                    {
                        'epoch': self.T_max,
                        'state_dict': self.model.state_dict()
                    }, self.out_dir + "Weight/" +
                    'snapshot_e_{:03d}.pth.tar'.format(self.T_max))

            ## reset epochs since the last reset
            self.current_epoch = 0

            ## reset the next goal
            self.Te = int(self.Te * self.T_mult)
            self.T_max = self.T_max + self.Te


if __name__ == '__main__':
    from torchvision.models import AlexNet
    model = AlexNet(num_classes=2)
    optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=2)
    scheduler = CosineAnnealingLR_with_Restart(optimizer,
                                               T_max=4,
                                               T_mult=1,
                                               model=model,
                                               out_dir='./',
                                               take_snapshot=False)
    for epoch in range(20):
        lr = scheduler.get_lr()
        print(lr)
        scheduler.step()
示例#13
0
    os.makedirs(cpkRoot, exist_ok=True)
    filePath = os.path.join(cpkRoot, fileName)
    bestPath = os.path.join(cpkRoot, 'model_best.pth.tar')
    if isBest:
        shutil.copyfile(filePath, bestPath)
        return
    torch.save(state, filePath)


if __name__ == '__main__':
    from easydict import EasyDict as edict
    args = edict()
    args.lr = 0.2
    args.nEpochs = 40
    args.power = 0.9
    model = AlexNet(num_classes=2)
    optimizer = optim.SGD(params=model.parameters(), lr=args.lr)
    # scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.2)
    plt.figure()
    x = list(range(10))
    y = []

    for epoch in range(10):
        # scheduler.step(epoch)
        # y.append(scheduler.get_lr()[0])
        adjust_learning_rate(optimizer, epoch, args)
        y.append(optimizer.param_groups[0]['lr'])

    plt.plot(x, y)
    plt.show()
示例#14
0
    plt.ylabel('Cross entropy Loss')
    plt.legend()
    plt.show()

def plot_data(exp_id):
    epochs, lrs, train_err, val_err, train_loss, val_loss = load_experiment(exp_id)
    plot_lr_data(epochs, lrs)
    plot_err_data(epochs, train_err, val_err)
    plot_loss_data(epochs, train_loss, val_loss)

if __name__ == '__main__':
    plot_data(8)
    exit(1)
    from torchvision.models import AlexNet
    model = AlexNet()
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    scheduler = CosineWithRestartLR(
                optimizer,
                min_lr=1e-4,
                max_lr=0.1,
                restart_interval=10,
                restart_multiplier=2,
                amplitude_decay=1
     )
    # scheduler = AdaptiveLR(
    #     optimizer,
    #     start_lr = 0.01,
    #     mu=0.99,
    #     eps=0.1,
    #     last_epoch=-1
    # )