def load_l2_model(path): """Loads model and return it without DataParallel table.""" if os.path.isfile(path): print("=> loading checkpoint '{}'".format(path)) checkpoint = torch.load(path) # size of the top layer N = checkpoint['state_dict']['top_layer.L.weight'].size() # build skeleton of the model sob = 'sobel.0.weight' in checkpoint['state_dict'].keys() model = models.__dict__[checkpoint['arch']](sobel=sob, out=int(N[0])) # model.top_layer = None model.top_layer = models.distLinear(N[1], N[0]) # deal with a dataparallel table def rename_key(key): if not 'module' in key: return key return ''.join(key.split('.module')) checkpoint['state_dict'] = { rename_key(key): val for key, val in checkpoint['state_dict'].items() } # load weights model.load_state_dict(checkpoint['state_dict']) print("l2 model Loaded") else: model = None print("=> no checkpoint found at '{}'".format(path)) return model
def __init__(self, conv, num_labels, l2): super(RegLog, self).__init__() self.conv = conv if conv == 1: self.av_pool = nn.AvgPool2d(6, stride=6, padding=3) s = 9600 elif conv == 2: self.av_pool = nn.AvgPool2d(4, stride=4, padding=0) s = 9216 elif conv == 3: self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) s = 9600 elif conv == 4: self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) s = 9600 elif conv == 5: self.av_pool = nn.AvgPool2d(2, stride=2, padding=0) s = 9216 if l2: self.linear = models.distLinear(s, num_labels) else: # for l2 softmax #self.linear = models.distLinear(s, num_labels) # in alexnet #self.av_pool = nn.AvgPool2d(2, stride=2, padding=0) #global avg pooling as like in the original resnet code #self.av_pool = nn.AdaptiveAvgPool2d((1, 1)) # for layer 3 # self.linear = nn.Linear(50176, num_labels) # with global avg pooling #self.linear = nn.Linear(1024, num_labels) # for layer 2 #self.linear = nn.Linear(100352, num_labels) # feat dim 16384 for layer 3 #self.av_pool = nn.AvgPool2d(3, stride=3, padding=0) #self.linear = nn.Linear(16384, num_labels) self.av_pool = nn.AvgPool2d(4, stride=4, padding=0) self.linear = nn.Linear(9216, num_labels)
def __init__(self, conv, num_labels, l2): super(RegLog, self).__init__() self.conv = conv if conv == 1: self.av_pool = nn.AvgPool2d(6, stride=6, padding=3) s = 9600 elif conv == 2: self.av_pool = nn.AvgPool2d(4, stride=4, padding=0) s = 9216 elif conv == 3: self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) s = 9600 elif conv == 4: self.av_pool = nn.AvgPool2d(3, stride=3, padding=1) s = 9600 elif conv == 5: self.av_pool = nn.AvgPool2d(2, stride=2, padding=0) s = 9216 if l2: self.linear = models.distLinear(s, num_labels) else: self.linear = nn.Linear(s, num_labels)
def main(): global args args = parser.parse_args() # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # CNN if args.verbose: print('Architecture: {}'.format(args.arch)) model = models.__dict__[args.arch](sobel=args.sobel) fd = int(model.top_layer.weight.size()[1]) model.top_layer = None model.features = torch.nn.DataParallel(model.features) model.cuda() cudnn.benchmark = True # create optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=10**args.wd, ) # define loss function criterion = nn.CrossEntropyLoss().cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # remove top_layer parameters from checkpoint for key in checkpoint['state_dict']: if 'top_layer' in key: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # creating checkpoint repo exp_check = os.path.join(args.exp, 'checkpoints') if not os.path.isdir(exp_check): os.makedirs(exp_check) # creating cluster assignments log cluster_log = Logger(os.path.join(args.exp, 'clusters')) # preprocessing of data normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tra = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ] # load the data end = time.time() dataset = datasets.ImageFolder(args.data, transform=transforms.Compose(tra)) if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch, num_workers=args.workers, pin_memory=True) # clustering algorithm to use #print("------------->>><<______") #print(clustering.__dict__) #print('-------_******_---------') #print(args.clustering) #print('---------') #print(clustering.__dict__[args.clustering]) deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster) #print(deepcluster) # training convnet with DeepCluster for epoch in range(args.start_epoch, args.epochs): end = time.time() # remove head model.top_layer = None model.classifier = nn.Sequential( *list(model.classifier.children())[:-1]) # get the features for the whole dataset features = compute_features(dataloader, model, len(dataset)) # cluster the features clustering_loss = deepcluster.cluster(features, verbose=args.verbose) # assign pseudo-labels train_dataset = clustering.cluster_assign(deepcluster.images_lists, dataset.imgs) # uniformely sample per target sampler = UnifLabelSampler(int(args.reassign * len(train_dataset)), deepcluster.images_lists) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch, num_workers=args.workers, sampler=sampler, pin_memory=True, ) # set last fully connected layer mlp = list(model.classifier.children()) mlp.append(nn.ReLU(inplace=True).cuda()) model.classifier = nn.Sequential(*mlp) if args.normalize: model.top_layer = models.distLinear(fd, len(deepcluster.images_lists)) model.top_layer.L.weight.data.normal_(0, 0.01) else: model.top_layer = nn.Linear(fd, len(deepcluster.images_lists)) model.top_layer.weight.data.normal_(0, 0.01) model.top_layer.bias.data.zero_() model.top_layer.cuda() # train network with clusters as pseudo-labels end = time.time() loss = train(train_dataloader, model, criterion, optimizer, epoch) # print log a = True if a: #rgs.verbose: print('###### Epoch [{0}] ###### \n' 'Time: {1:.3f} s\n' 'Clustering loss: {2:.3f} \n' 'ConvNet loss: {3:.3f}'.format(epoch, time.time() - end, clustering_loss, loss)) try: nmi = normalized_mutual_info_score( clustering.arrange_clustering(deepcluster.images_lists), clustering.arrange_clustering(cluster_log.data[-1])) print('---CLUSTER DATA [-1]----') print(deepcluster.images_lists) print('NMI against previous assignment: {0:.3f}'.format(nmi)) except IndexError: pass print('####################### \n') # save running checkpoint torch.save( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(args.exp, 'checkpoint.pth.tar')) # save cluster assignments cluster_log.log(deepcluster.images_lists)
def main(): args = parser.parse_args() print(args) # fix random seeds torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) # create model and move it to gpu if args.l2: model = load_l2_model(args.model) model.top_layer = nn.Linear(model.top_layer.L.weight.size(1), 20) # model.top_layer = models.distLinear(model.top_layer.L.weight.size(1), 20) else: model = load_model(args.model) # model.top_layer = nn.Linear(model.top_layer.weight.size(1), 20) model.top_layer = models.distLinear(model.top_layer.weight.size(1), 20) model.cuda() cudnn.benchmark = True # what partition of the data to use if args.split == 'train': args.test = 'val' elif args.split == 'trainval': args.test = 'test' # data loader normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = VOC2007_dataset(os.path.join(args.vocdir, 'trainval'), split=args.split, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop( 224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)), transforms.ToTensor(), normalize, ])) loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=24, pin_memory=True) print('PASCAL VOC 2007 ' + args.split + ' dataset loaded') # re initialize classifier for y, m in enumerate(model.classifier.modules()): if isinstance(m, nn.Linear): m.weight.data.normal_(0, 0.01) m.bias.data.fill_(0.1) # if not args.l2: # model.top_layer.bias.data.fill_(0.1) if args.fc6_8: # freeze some layers for param in model.features.parameters(): param.requires_grad = False # unfreeze batchnorm scaling if args.train_batchnorm: for layer in model.modules(): if isinstance(layer, torch.nn.BatchNorm2d): for param in layer.parameters(): param.requires_grad = True # set optimizer optimizer = torch.optim.SGD( filter(lambda x: x.requires_grad, model.parameters()), lr=args.lr, momentum=0.9, weight_decay=args.wd, ) criterion = nn.BCEWithLogitsLoss(reduction='none') print('Start training') it = 0 losses = AverageMeter() while it < args.nit: it = train( loader, model, optimizer, criterion, args.fc6_8, losses, it=it, total_iterations=args.nit, stepsize=args.stepsize, ) print('Evaluation') if args.eval_random_crops: transform_eval = [ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop(224, scale=(args.min_scale, args.max_scale), ratio=(1, 1)), transforms.ToTensor(), normalize, ] else: transform_eval = [ transforms.Resize(256), transforms.TenCrop(224), transforms.Lambda(lambda crops: torch.stack( [normalize(transforms.ToTensor()(crop)) for crop in crops])) ] print('Train set') train_dataset = VOC2007_dataset( os.path.join(args.vocdir, 'trainval'), split=args.split, transform=transforms.Compose(transform_eval)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=1, shuffle=False, num_workers=24, pin_memory=True, ) evaluate(train_loader, model, args.eval_random_crops) print('Test set') test_dataset = VOC2007_dataset( os.path.join(args.vocdir, 'test'), split=args.test, transform=transforms.Compose(transform_eval)) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=1, shuffle=False, num_workers=24, pin_memory=True, ) evaluate(test_loader, model, args.eval_random_crops)