def train(data, save, valid_size=5000, seed=None, depth=40, growth_rate=12, n_epochs=300, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9): """ A function to train a DenseNet-BC on CIFAR-100. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) valid_size (int) - size of validation set seed (int) - manually set the random seed (default None) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) lr (float) - initial learning rate wd (float) - weight decay momentum (float) - momentum """ if seed is not None: torch.manual_seed(seed) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = tv.transforms.Compose([ tv.transforms.RandomCrop(32, padding=4), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) # Split training into train and validation - needed for calibration # # IMPORTANT! We need to use the same validation set for temperature # scaling, so we're going to save the indices for later train_set = tv.datasets.CIFAR100(data, train=True, transform=train_transforms, download=True) valid_set = tv.datasets.CIFAR100(data, train=True, transform=test_transforms, download=False) indices = torch.randperm(len(train_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] if valid_size else None # Make dataloaders train_loader = torch.utils.data.DataLoader( train_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(train_indices)) valid_loader = torch.utils.data.DataLoader( valid_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(valid_indices)) # Make model, criterion, and optimizer model = DenseNet(growth_rate=growth_rate, block_config=block_config, num_classes=100) # Wrap model if multiple gpus if torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() else: model_wrapper = model.cuda() print(model_wrapper) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model_wrapper.parameters(), lr=lr, momentum=momentum, nesterov=True) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1) # Train model best_error = 1 for epoch in range(1, n_epochs + 1): scheduler.step() run_epoch( loader=train_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=True, ) valid_results = run_epoch( loader=valid_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=False, ) # Determine if model is the best _, _, valid_error = valid_results if valid_error[0] < best_error: best_error = valid_error[0] print('New best error: %.4f' % best_error) # When we save the model, we're also going to # include the validation indices torch.save(model.state_dict(), os.path.join(save, 'model.pth')) torch.save(valid_indices, os.path.join(save, 'valid_indices.pth')) print('Done!')
print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) if 'epoch' in checkpoint: args.start_epoch = checkpoint['epoch'] + 1 state_dict = checkpoint['state_dict'] else: state_dict = checkpoint model.load_state_dict(state_dict=state_dict, strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch - 1)) else: print("=> no checkpoint found at '{}'".format(args.resume)) print(args) if len(args.gpus) > 0: model.cuda() cudnn.benchmark = True if len(args.gpus) > 1: model = nn.DataParallel(model, device_ids=args.gpus).cuda() engine = Engine() meter_loss = tnt.meter.AverageValueMeter() topk = [1, 5] classerr = tnt.meter.ClassErrorMeter(topk=topk, accuracy=False) # default is also False confusion_meter = tnt.meter.ConfusionMeter(num_classes[args.dataset], normalized=True) if args.visdom: if args.log_name == '': args.log_name = args.build_type
multigpus = False is_eval = False model = DenseNet(input_size=32, bn_size=bn_size, efficient=False) model_effi = DenseNet(input_size=32, bn_size=bn_size, efficient=True) # for stronger test model.features.denseblock2.denselayer12._modules['norm1'].running_mean.fill_(1) model.features.denseblock2.denselayer12._modules['norm1'].running_var.fill_(2) state = model.state_dict() state = OrderedDict( (k.replace('.norm1.', '.bottleneck.norm_'), v) for k, v in state.items()) state = OrderedDict( (k.replace('.conv1.', '.bottleneck.conv_'), v) for k, v in state.items()) model_effi.load_state_dict(state) if use_cuda: model.cuda() model_effi.cuda() cudnn.deterministic = True if multigpus: model = nn.DataParallel(model, device_ids=[0, 1]) model_effi = nn.DataParallel(model_effi, device_ids=[0, 1]) if is_eval: model.eval() model_effi.eval() # create the model inputs input_var = torch.randn(8, 3, 32, 32) if use_cuda: input_var = input_var.cuda() out = model(input_var) model.zero_grad()
use_cuda = True multigpus = True # set cudnn backend to benchmark config cudnn.benchmark = True # instantiate the models densenet = DenseNet(efficient=False) densnet_effi = DenseNet(efficient=True) # build dummy variables to input and output x = torch.randn(128, 3, 32, 32) y = torch.randn(128, 100) if use_cuda: densenet = densenet.cuda() densnet_effi = densnet_effi.cuda() x = x.cuda() y = y.cuda() if multigpus: densenet = nn.DataParallel(densenet, device_ids=[0, 1]) densnet_effi = nn.DataParallel(densnet_effi, device_ids=[0, 1]) # build the dict to iterate over architectures = {'densenet': densenet, 'densenet-effi': densnet_effi} # loop over architectures and measure them for deep_net in architectures: print(deep_net) t_fp, t_bp = benchmark(architectures[deep_net], x, y) # print results print('FORWARD PASS: '******'+/-',
import torch import torch.nn as nn from models import DenseNet from config import config from preprocess import train_data_iterator, test_data_helper net = DenseNet( growth_rate=32, block_config=[3, 3, 3], num_classes=config.charlen * config.captlen, small_inputs=False, efficient=True, ) net = net.cuda() # Optimizer optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, nesterov=True, weight_decay=0.0001) best_acc = config.baseline loss_fn = torch.nn.BCEWithLogitsLoss(reduce=False) for epoch in range(config.epochs): for i, (input, target) in enumerate(train_data_iterator()): input = torch.FloatTensor(input) target = torch.LongTensor(target)