def demo(depth=58, growth_rate=12, efficient=False):
    # Get densenet configuration
    if (depth - 4) % 3:
        raise Exception('Invalid depth')
    block_config = [(depth - 4) // 6 for _ in range(3)]
    model = DenseNet(
        growth_rate=growth_rate,
        block_config=block_config,
        num_classes=4,
        small_inputs=True,
        efficient=efficient,
    )
    model.load_state_dict(torch.load(os.path.join('./ckpt2/model.dat')))
    t = list(model.state_dict())
    n = len(t)
    w = []
    for x in range(3):
        w.append([])
        for i in range(9):
            w[x].append([])
            for j in range(9):
                w[x][i].append(0)
    for name in model.state_dict():
        if len(name) == 49 and name[37] == 'c':
            x, i, j = int(name[19]), int(name[32]), int(name[34])
            a = abs(model.state_dict()[name])
            w[x - 1][j][i - 1] = a.sum()
    for x in range(3):
        for i in range(9):
            mx = 0
            for j in range(i, 9):
                mx = max(mx, w[x][i][j])
            for j in range(i, 9):
                w[x][i][j] = w[x][i][j] / mx
    mask = []
    for i in range(9):
        mask.append([])
        for j in range(9):
            mask[i].append(j > i)
    ax = []
    for x in range(3):
        sns.set()
        ax.append(sns.heatmap(w[x], vmin = 0, vmax = 1, cmap = 'jet', square = True, mask = mask))
        ax[x].set_title('Dense Block %s' % (x + 1))
        ax[x].set_xlabel('Target layer (l)', fontsize=15)
        ax[x].set_ylabel('Source layer (s)', fontsize=15)
        plt.show(ax[x])
def train(data,
          save,
          valid_size=5000,
          seed=None,
          depth=40,
          growth_rate=12,
          n_epochs=300,
          batch_size=64,
          lr=0.1,
          wd=0.0001,
          momentum=0.9):
    """
    A function to train a DenseNet-BC on CIFAR-100.

    Args:
        data (str) - path to directory where data should be loaded from/downloaded
            (default $DATA_DIR)
        save (str) - path to save the model to (default /tmp)

        valid_size (int) - size of validation set
        seed (int) - manually set the random seed (default None)

        depth (int) - depth of the network (number of convolution layers) (default 40)
        growth_rate (int) - number of features added per DenseNet layer (default 12)
        n_epochs (int) - number of epochs for training (default 300)
        batch_size (int) - size of minibatch (default 256)
        lr (float) - initial learning rate
        wd (float) - weight decay
        momentum (float) - momentum
    """

    if seed is not None:
        torch.manual_seed(seed)

    # Make save directory
    if not os.path.exists(save):
        os.makedirs(save)
    if not os.path.isdir(save):
        raise Exception('%s is not a dir' % save)

    # Get densenet configuration
    if (depth - 4) % 3:
        raise Exception('Invalid depth')
    block_config = [(depth - 4) // 6 for _ in range(3)]

    # Data transforms
    mean = [0.5071, 0.4867, 0.4408]
    stdv = [0.2675, 0.2565, 0.2761]
    train_transforms = tv.transforms.Compose([
        tv.transforms.RandomCrop(32, padding=4),
        tv.transforms.RandomHorizontalFlip(),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize(mean=mean, std=stdv),
    ])
    test_transforms = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Normalize(mean=mean, std=stdv),
    ])

    # Split training into train and validation - needed for calibration
    #
    # IMPORTANT! We need to use the same validation set for temperature
    # scaling, so we're going to save the indices for later
    train_set = tv.datasets.CIFAR100(data,
                                     train=True,
                                     transform=train_transforms,
                                     download=True)
    valid_set = tv.datasets.CIFAR100(data,
                                     train=True,
                                     transform=test_transforms,
                                     download=False)
    indices = torch.randperm(len(train_set))
    train_indices = indices[:len(indices) - valid_size]
    valid_indices = indices[len(indices) - valid_size:] if valid_size else None

    # Make dataloaders
    train_loader = torch.utils.data.DataLoader(
        train_set,
        pin_memory=True,
        batch_size=batch_size,
        sampler=SubsetRandomSampler(train_indices))
    valid_loader = torch.utils.data.DataLoader(
        valid_set,
        pin_memory=True,
        batch_size=batch_size,
        sampler=SubsetRandomSampler(valid_indices))

    # Make model, criterion, and optimizer
    model = DenseNet(growth_rate=growth_rate,
                     block_config=block_config,
                     num_classes=100)
    # Wrap model if multiple gpus
    if torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()
    else:
        model_wrapper = model.cuda()
    print(model_wrapper)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model_wrapper.parameters(),
                          lr=lr,
                          momentum=momentum,
                          nesterov=True)
    scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)

    # Train model
    best_error = 1
    for epoch in range(1, n_epochs + 1):
        scheduler.step()
        run_epoch(
            loader=train_loader,
            model=model_wrapper,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch,
            n_epochs=n_epochs,
            train=True,
        )
        valid_results = run_epoch(
            loader=valid_loader,
            model=model_wrapper,
            criterion=criterion,
            optimizer=optimizer,
            epoch=epoch,
            n_epochs=n_epochs,
            train=False,
        )

        # Determine if model is the best
        _, _, valid_error = valid_results
        if valid_error[0] < best_error:
            best_error = valid_error[0]
            print('New best error: %.4f' % best_error)

            # When we save the model, we're also going to
            # include the validation indices
            torch.save(model.state_dict(), os.path.join(save, 'model.pth'))
            torch.save(valid_indices, os.path.join(save, 'valid_indices.pth'))

    print('Done!')
示例#3
0
        net = CNN(len(sirnas)).to('cuda')
    else:
        print("invalid model type")

    if (args.train_type == 'erm'):
        print("training with erm")
        writer = SummaryWriter('logs/erm{}'.format(est_time))
        train_erm(net, combined_train_loader, val_loader, writer, args)
    elif (args.train_type == 'irm'):
        print("training with irm")
        writer = SummaryWriter('logs/irm{}'.format(est_time))

        train_irm(net, loaders, val_loader, writer, args)
    elif (args.train_type == 'multitask'):
        print("training with multitask")
        writer = SummaryWriter('logs/multitask_{}'.format(est_time))
        train_multitask(net, loaders, val_loader, writer, args)
    elif (args.train_type == 'baseline'):
        print("training with baseline")
        writer = SummaryWriter('logs/baseline_{}'.format(est_time))
        train_baseline(net, combined_train_loader, val_loader, writer, args)
    else:
        print("invalid train type")

    print("save final net")
    checkpoint = {
        'state_dict': net.state_dict(),
    }

    save_name = "saved_models/{}_finished.pth".format(args.checkpoint_name)
    torch.save(checkpoint, save_name)
示例#4
0
import torch.nn as nn
import torch.backends.cudnn as cudnn
from models import DenseNet
from collections import OrderedDict

# run it with python -m utils.gradient_checking
use_cuda = True
bn_size = None
multigpus = False
is_eval = False
model = DenseNet(input_size=32, bn_size=bn_size, efficient=False)
model_effi = DenseNet(input_size=32, bn_size=bn_size, efficient=True)
# for stronger test
model.features.denseblock2.denselayer12._modules['norm1'].running_mean.fill_(1)
model.features.denseblock2.denselayer12._modules['norm1'].running_var.fill_(2)
state = model.state_dict()
state = OrderedDict(
    (k.replace('.norm1.', '.bottleneck.norm_'), v) for k, v in state.items())
state = OrderedDict(
    (k.replace('.conv1.', '.bottleneck.conv_'), v) for k, v in state.items())

model_effi.load_state_dict(state)
if use_cuda:
    model.cuda()
    model_effi.cuda()
    cudnn.deterministic = True
    if multigpus:
        model = nn.DataParallel(model, device_ids=[0, 1])
        model_effi = nn.DataParallel(model_effi, device_ids=[0, 1])
if is_eval:
    model.eval()