def demo(depth=58, growth_rate=12, efficient=False): # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=4, small_inputs=True, efficient=efficient, ) model.load_state_dict(torch.load(os.path.join('./ckpt2/model.dat'))) t = list(model.state_dict()) n = len(t) w = [] for x in range(3): w.append([]) for i in range(9): w[x].append([]) for j in range(9): w[x][i].append(0) for name in model.state_dict(): if len(name) == 49 and name[37] == 'c': x, i, j = int(name[19]), int(name[32]), int(name[34]) a = abs(model.state_dict()[name]) w[x - 1][j][i - 1] = a.sum() for x in range(3): for i in range(9): mx = 0 for j in range(i, 9): mx = max(mx, w[x][i][j]) for j in range(i, 9): w[x][i][j] = w[x][i][j] / mx mask = [] for i in range(9): mask.append([]) for j in range(9): mask[i].append(j > i) ax = [] for x in range(3): sns.set() ax.append(sns.heatmap(w[x], vmin = 0, vmax = 1, cmap = 'jet', square = True, mask = mask)) ax[x].set_title('Dense Block %s' % (x + 1)) ax[x].set_xlabel('Target layer (l)', fontsize=15) ax[x].set_ylabel('Source layer (s)', fontsize=15) plt.show(ax[x])
def train(data, save, valid_size=5000, seed=None, depth=40, growth_rate=12, n_epochs=300, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9): """ A function to train a DenseNet-BC on CIFAR-100. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) valid_size (int) - size of validation set seed (int) - manually set the random seed (default None) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) lr (float) - initial learning rate wd (float) - weight decay momentum (float) - momentum """ if seed is not None: torch.manual_seed(seed) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = tv.transforms.Compose([ tv.transforms.RandomCrop(32, padding=4), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) # Split training into train and validation - needed for calibration # # IMPORTANT! We need to use the same validation set for temperature # scaling, so we're going to save the indices for later train_set = tv.datasets.CIFAR100(data, train=True, transform=train_transforms, download=True) valid_set = tv.datasets.CIFAR100(data, train=True, transform=test_transforms, download=False) indices = torch.randperm(len(train_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] if valid_size else None # Make dataloaders train_loader = torch.utils.data.DataLoader( train_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(train_indices)) valid_loader = torch.utils.data.DataLoader( valid_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(valid_indices)) # Make model, criterion, and optimizer model = DenseNet(growth_rate=growth_rate, block_config=block_config, num_classes=100) # Wrap model if multiple gpus if torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() else: model_wrapper = model.cuda() print(model_wrapper) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model_wrapper.parameters(), lr=lr, momentum=momentum, nesterov=True) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1) # Train model best_error = 1 for epoch in range(1, n_epochs + 1): scheduler.step() run_epoch( loader=train_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=True, ) valid_results = run_epoch( loader=valid_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=False, ) # Determine if model is the best _, _, valid_error = valid_results if valid_error[0] < best_error: best_error = valid_error[0] print('New best error: %.4f' % best_error) # When we save the model, we're also going to # include the validation indices torch.save(model.state_dict(), os.path.join(save, 'model.pth')) torch.save(valid_indices, os.path.join(save, 'valid_indices.pth')) print('Done!')
net = CNN(len(sirnas)).to('cuda') else: print("invalid model type") if (args.train_type == 'erm'): print("training with erm") writer = SummaryWriter('logs/erm{}'.format(est_time)) train_erm(net, combined_train_loader, val_loader, writer, args) elif (args.train_type == 'irm'): print("training with irm") writer = SummaryWriter('logs/irm{}'.format(est_time)) train_irm(net, loaders, val_loader, writer, args) elif (args.train_type == 'multitask'): print("training with multitask") writer = SummaryWriter('logs/multitask_{}'.format(est_time)) train_multitask(net, loaders, val_loader, writer, args) elif (args.train_type == 'baseline'): print("training with baseline") writer = SummaryWriter('logs/baseline_{}'.format(est_time)) train_baseline(net, combined_train_loader, val_loader, writer, args) else: print("invalid train type") print("save final net") checkpoint = { 'state_dict': net.state_dict(), } save_name = "saved_models/{}_finished.pth".format(args.checkpoint_name) torch.save(checkpoint, save_name)
import torch.nn as nn import torch.backends.cudnn as cudnn from models import DenseNet from collections import OrderedDict # run it with python -m utils.gradient_checking use_cuda = True bn_size = None multigpus = False is_eval = False model = DenseNet(input_size=32, bn_size=bn_size, efficient=False) model_effi = DenseNet(input_size=32, bn_size=bn_size, efficient=True) # for stronger test model.features.denseblock2.denselayer12._modules['norm1'].running_mean.fill_(1) model.features.denseblock2.denselayer12._modules['norm1'].running_var.fill_(2) state = model.state_dict() state = OrderedDict( (k.replace('.norm1.', '.bottleneck.norm_'), v) for k, v in state.items()) state = OrderedDict( (k.replace('.conv1.', '.bottleneck.conv_'), v) for k, v in state.items()) model_effi.load_state_dict(state) if use_cuda: model.cuda() model_effi.cuda() cudnn.deterministic = True if multigpus: model = nn.DataParallel(model, device_ids=[0, 1]) model_effi = nn.DataParallel(model_effi, device_ids=[0, 1]) if is_eval: model.eval()