Exemplo n.º 1
0
def main(args):

    logs = []

    transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

    trainloader, testloader = get_dataset(args, transform)
    net = AlexNet()

    if args.no_distributed:
        optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.0)
    else:
        optimizer = DownpourSGD(net.parameters(), lr=args.lr, n_push=args.num_push, n_pull=args.num_pull, model=net)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True, min_lr=1e-3)

    # train
    net.train()
    if args.cuda:
        net = net.cuda()

    for epoch in range(args.epochs):  # loop over the dataset multiple times
        print("Training for epoch {}".format(epoch))
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            if args.cuda:
                inputs, labels = inputs.cuda(), labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = net(inputs)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(predicted, labels)

            log_obj = {
                'timestamp': datetime.now(),
                'iteration': i,
                'training_loss': loss.item(),
                'training_accuracy': accuracy,
            }

            if i % args.log_interval == 0 and i > 0:    # print every n mini-batches
                log_obj['test_loss'], log_obj['test_accuracy']= evaluate( net, testloader, args)
                print("Timestamp: {timestamp} | "
                      "Iteration: {iteration:6} | "
                      "Loss: {training_loss:6.4f} | "
                      "Accuracy : {training_accuracy:6.4f} | "
                      "Test Loss: {test_loss:6.4f} | "
                      "Test Accuracy: {test_accuracy:6.4f}".format(**log_obj))

            logs.append(log_obj)
                
        val_loss, val_accuracy = evaluate(net, testloader, args, verbose=True)
        scheduler.step(val_loss)

    df = pd.DataFrame(logs)
    print(df)
    if args.no_distributed:
        if args.cuda:
            df.to_csv('log/gpu.csv', index_label='index')
        else:
            df.to_csv('log/single.csv', index_label='index')
    else:
        df.to_csv('log/node{}.csv'.format(dist.get_rank()), index_label='index')

    print('Finished Training')
Exemplo n.º 2
0
params(iterable) --- 可迭代的参数来优化或取消定义参数组
lr(float, 可选) --- 学习率(默认值 1e-3)
beta(Tuple[float, float], 可选) --- 用于计算梯度及其平方的运行平均值的系数(默认值:(0.9,0.999))
eps (float, 可选) ---- 添加到分母以提高数值稳定性(默认值:1e-8)
weight_decay (float, 可选) --- 权重衰减(L2 惩罚)(默认值:0)
amsgrad (boolean, 可选) ---- 是否使用该算法的AMSGrad变体来自论文关于 Adam 和 Beyond 的融合  


2、还有这里我们使用的损失函数 
class torch.nn.CrossEntropyLoss(weight=None, size_average=True, ignore_index=-100, reduce=True)
交叉熵损失函数
具体的请看:http://pytorch.apachecn.org/cn/docs/0.3.0/nn.html   
'''
# 3. 设置优化器和损失函数
# 这里我们使用 Adam 优化器,使用的损失函数是 交叉熵损失
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005, betas=(0.9, 0.99))  # 优化所有的 cnn 参数
loss_func = nn.CrossEntropyLoss()  # 目标 label 不是 one-hotted 类型的

# --------------------------- 3.设置相应的优化器和损失函数 end ------------------------------------------------------------------

# --------------------------- 4.训练 CNN 模型 start ------------------------------------------------------------------

# 4. 训练模型
# 设置训练模型的次数,这里我们设置的是 10 次,也就是用我们的训练数据集对我们的模型训练 10 次,为了节省时间,我们可以只训练 1 次
EPOCH = 10
# 训练和测试
for epoch in range(EPOCH):
        num = 0
        # 给出 batch 数据,在迭代 train_loader 的时候对 x 进行 normalize
        for step, (x, y) in enumerate(loader_train):
            b_x = Variable(x)  # batch x
Exemplo n.º 3
0
                               batch_size=20,
                               shuffle=True,
                               num_workers=1)

testdir = '/opt/data/kaggle/playground/dogs-vs-cats/sample_test'
test_dataset = DogCat(testdir, train=True)
loader_test = data.DataLoader(test_dataset,
                              batch_size=3,
                              shuffle=True,
                              num_workers=1)

# 2. 创建 CNN 模型
cnn = AlexNet()
print(cnn)
# 3. 设置优化器和损失函数
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.005,
                             betas=(0.9, 0.99))  # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss()  # the target label is not one-hotted

# 4. 训练模型
EPOCH = 10  # train the training data n times, to save time, we just train 1 epoch
# training and testing
for epoch in range(EPOCH):
    num = 0
    # gives batch data, normalize x when iterate train_loader
    for step, (x, y) in enumerate(loader_train):
        b_x = Variable(x)  # batch x
        b_y = Variable(y)  # batch y

        output = cnn(b_x)  # cnn output
        loss = loss_func(output, b_y)  # cross entropy loss
Exemplo n.º 4
0
        from models.DenseNet import *
        model = DenseNet121()
    elif modelName == "MobileNet":
        from models.MobileNet import *
        model = MobileNet()
    else:
        model = None

    if model is not None:
        model = model.to(Device)
    else:
        raise Exception("model not found")


    # optimizer and loss function
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, 
                    momentum=momentum, weight_decay=weightDecay)
    loss_func = torch.nn.CrossEntropyLoss()


    # use testset to evaluate
    def evaluateModel():
        total_acc = 0.0
        total_loss = 0.0
        step = 0
        with torch.no_grad():
            for i, (image, label) in enumerate(test_loader):
                image, label = image.to(Device), label.to(Device)
                if modelName != 'GoogLeNet':
                    # Others
                    output = model(image)
Exemplo n.º 5
0
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           sampler=train_sampler)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=batch_size,
                                         sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          sampler=test_sample)

# GPU setup
device = torch.device('cuda')

net = AlexNet(in_channel=2, classes=10).to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)  #, momentum=0.9)

# Train loop
num_epochs = 25
for epoch in range(num_epochs):

    print("Epoch: {} - Train".format(epoch))
    net.train()
    running_loss = 0.
    # Train:
    for batch_index, (signals, labels) in enumerate(tqdm(train_loader)):

        signals, labels = signals.to(device=device), labels.to(device=device)

        optimizer.zero_grad()
Exemplo n.º 6
0
if torch.cuda.is_available():
    print('CUDA enabled.')
    net.cuda()
print("--- Pretrained network loaded ---")
# test(net, loader_test)

# prune the weights
masks = weight_prune(net, param['pruning_perc'])
net.set_masks(masks)
net = nn.DataParallel(net)
print("--- {}% parameters pruned ---".format(param['pruning_perc']))
test(net, loader_test)


# Retraining
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], 
                                weight_decay=param['weight_decay'])

train(net, criterion, optimizer, param, loader_train)


# Check accuracy and nonzeros weights in each layer
print("--- After retraining ---")
test(net, loader_test)
prune_rate(net)


# Save and load the entire model
torch.save(net.state_dict(), 'models/alexnet_pruned.pkl')
Exemplo n.º 7
0
    dtest = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=transform_test
    )
    test_loader = torch.utils.data.DataLoader(
        dtest, batch_size=BATCH_SIZE, shuffle=False
    )

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 損失関数にクロスエントロピー誤差を用いる
    criterion = nn.CrossEntropyLoss()
    # モデルの定義
    model = AlexNet(num_classes=num_classes)
    # optimizerはSGD
    optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9,
                                weight_decay=5e-4)

    # 実際の学習部分
    for epoch in range(1, NUM_EPOCHS + 1):
        train_loss, train_acc = epoch_train(train_loader, model, optimizer,
                                            criterion)
        test_loss, test_acc = epoch_eval(test_loader, model, criterion)

        print(f'EPOCH: [{epoch}/{NUM_EPOCHS}]')
        print(f'TRAIN LOSS: {train_loss:.3f}, TRAIN ACC: {train_acc:.3f}')
        print(f'TEST LOSS: {test_loss:.3f}, TEST ACC: {test_acc:.3f}')

        # このように重みを保存する
        parameters = model.state_dict()
        torch.save(parameters, f'../weights/{epoch}.pth')
Exemplo n.º 8
0
for i in range(cat_samples.shape[0]):
    tensorboard.add_images("Cat/{}".format(i), None, cat_samples[i:i + 1])
    tensorboard.add_images("Dog/{}".format(i), None, dog_samples[i:i + 1])

model = AlexNet(input_size, output_size)
cur_epoch = 0

if RESUME_TRAINING:
    cur_epoch, optimizer_states_dict, loss = load_checkpoint(model)
    cur_epoch += 1

model = model.to(device)
print(model)

loss_func = torch.nn.BCELoss(reduction="mean")
optimizer = opt(model.parameters(), lr=lr, momentum=momentum)
if RESUME_TRAINING:
    optimizer.load_state_dict(optimizer_states_dict)

# Train

for epoch in range(cur_epoch, epochs):
    accumulated_train_loss = []
    # Set model in trainng mode
    model.train()
    iteration = 0
    for batch_x, batch_y in tqdm(train_generator):
        # Forward
        preds = model(batch_x)

        # compute loss
def main():
    global args, best_prec1
    args = parser.parse_args()
    print(args)


    # Set # classes
    if args.data == 'UCF101':
        num_classes = 101
    else:
        num_classes = 0
        print('Specify the dataset to use ')


    # Create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet'):
        model = AlexNet(num_classes=num_classes)
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()

    else:
        model = torch.nn.DataParallel(model).cuda()


    # Modify last layer of the model
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 101)
    model = model_ft.cuda()
    model = torch.nn.DataParallel(model).cuda() # Using one GPU (device_ids = 1)
    # print(model)


    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Optionally resume from a checkpoint
    if args.resume:
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    testdir = os.path.join(args.data, 'test')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(traindir, transforms.Compose([
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size = args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True
    )

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(testdir, transforms.Compose([
            transforms.Scale(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size = args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True
    )

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # Train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # # Evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # Remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, is_best)
Exemplo n.º 10
0
def main():
    """
    This code is written for the pre-training of the AlexNet to implement the SA-Siam object tarcker.
    SA-Siam has the two subnetwork, S-Net and A-Net,
    and this pre-trained AlexNet is used for the feature extractor of S-Net.

    I slightly changed the code from the pytorch examples
    (https://github.com/pytorch/examples/tree/master/imagenet)
    """

    global args, best_prec1
    args = parser.parse_args()

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    model = AlexNet()
    model = torch.nn.parallel.DataParallel(model).cuda()
    # model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(255),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(
            valdir,
            transforms.Compose([
                # transforms.Resize((255, 255)),
                transforms.RandomResizedCrop(255),
                # transforms.CenterCrop(255),
                transforms.ToTensor(),
                normalize,
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)