def construct_model(architecture, method, num_classes): if architecture == "vgg16_bn": from vgg import vgg16_bn model = models.vgg16_bn(pretrained=True) num_ftrs = model.classifier[6].in_features if method in ["deep_gamblers"]: model.classifier[6] = nn.Linear(num_ftrs, num_classes + 1) else: model.classifier[6] = nn.Linear(num_ftrs, num_classes) elif architecture == "vgg16_bn_conf_aware_paper": from model.vgg import vgg16 as vgg16_bn_conf_aware_paper model = vgg16_bn_conf_aware_paper(num_classes=10) elif architecture == 'vgg16_bn_dropout': from vgg import vgg16_bn if method in ["deep_gamblers"]: model = vgg16_bn(dropout=True, num_classes=num_classes+1, input_size=32) elif method in ["selectivenet"]: model = SelectiveVgg16_bn(n_labels) else: model = vgg16_bn(dropout=True, num_classes=num_classes, input_size=32) elif architecture == "densenet121": model = models.densenet121(pretrained=True) num_ftrs = model.classifier.in_features if method in ["deep_gamblers"]: model.classifier = nn.Linear(num_ftrs, num_classes + 1) else: model.classifier = nn.Linear(num_ftrs, num_classes) elif architecture == "resnet50": model = models.resnet50(pretrained=True) num_ftrs = model.fc.in_features if method in ["deep_gamblers"]: model.fc = nn.Linear(num_ftrs, num_classes + 1) elif method in ["selectivenet"]: model = SelectiveResNet50(num_classes) else: model.fc = nn.Linear(num_ftrs, num_classes) else: print("Unknown architecture. Aborting...") return model.architecture = architecture model.num_classes = num_classes return model
def train(args): log_dir = os.path.join(args.output_dir, "L{}_W{}_LOSS{}".format( '_'.join([item[1] for item in args.layer_list]), '_'.join([str(item) for item in args.weight_layer]), args.loss_type) ) if not os.path.exists(log_dir): os.mkdir(log_dir) log_file = open(os.path.join(log_dir, "log.txt"), "w") model = vgg16_bn(pretrained=True) model.cuda() model.eval() img = load_img(args.img_path, args.img_shape) pre_noise = np.random.uniform( low=-3, high=3, size=img.shape).astype(np.float32) pre_noise = sigmoid(pre_noise) img_tensor = torch.from_numpy(img).permute( 2, 0, 1).contiguous().unsqueeze(0).cuda() noise_tensor = torch.from_numpy(pre_noise).permute( 2, 0, 1).contiguous().unsqueeze(0).cuda() noise_tensor.requires_grad_(True) criterion = GramLoss(args.weight_layer, dist_type=args.loss_type) def lr_func(epoch): lr_factor = args.lr_factor_dict lr_key = list(lr_factor.keys()) index = 0 for i in range(len(lr_key)): if epoch < lr_key[i]: break else: index = i return lr_factor[lr_key[index]] optimizer = Adam([noise_tensor], lr=args.lr) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_func) for epoch in range(args.epoch): scheduler.step() img_output = extract_feature(model, img_tensor, args.layer_list) noise_output = extract_feature(model, noise_tensor, args.layer_list) data = list(zip(noise_output, img_output)) loss = criterion(data) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % args.show_interval == 0: print("e:{}---loss:{:.6f}".format(epoch, loss.item())) print("e:{}---loss:{:.6f}".format(epoch, loss.item()), file=log_file) if epoch % args.save_interval == 0: noise_np = noise_tensor.data.cpu().squeeze( 0).permute(1, 2, 0).contiguous().numpy() output_img(noise_np, os.path.join( log_dir, "epoch_{}.png".format(epoch))) log_file.close()
def __init__(self, batchSize, vgg_path=""): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) self.vggnet = vgg.vgg16_bn().cuda() if (vgg_path != ""): self.vggnet.load_state_dict(torch.load(vgg_path)) self.optimizer = optim.SGD(self.vggnet.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) # self.trainset = torchvision.datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train) # self.trainloader = torch.utils.data.DataLoader(self.trainset, batch_size=batchSize, shuffle=True) # self.testset = torchvision.datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test) # self.testloader = torch.utils.data.DataLoader(self.testset, batch_size=256, shuffle=True) self.criterion = nn.CrossEntropyLoss() self.change_f = True self.change_c = False self.init_conv2d_distance_rate() self.make_model()
def get_net(name): if name == 'densenet121': net = densenet121() elif name == 'densenet161': net = densenet161() elif name == 'densenet169': net = densenet169() elif name == 'googlenet': net = googlenet() elif name == 'inception_v3': net = inception_v3() elif name == 'mobilenet_v2': net = mobilenet_v2() elif name == 'resnet18': net = resnet18() elif name == 'resnet34': net = resnet34() elif name == 'resnet50': net = resnet50() elif name == 'resnet_orig': net = resnet_orig() elif name == 'vgg11_bn': net = vgg11_bn() elif name == 'vgg13_bn': net = vgg13_bn() elif name == 'vgg16_bn': net = vgg16_bn() elif name == 'vgg19_bn': net = vgg19_bn() else: print(f'{name} not a valid model name') sys.exit(0) return net.to(device)
def __init__(self): super(ARShadowGAN, self).__init__() self.attention = AttentionBlock(in_channels=4) self.generator = VirtualShadowGenerator(in_channels=6) self.discriminator = PixelDiscriminator(in_channels=7) self.vgg = vgg16_bn(pretrained=False) self.mse_loss = nn.MSELoss()
def build_model(use_custom=False): # VGG 16 model with output layer resized for 100 classes if use_custom: model = vgg.vgg16_bn() # modified vgg-16 network for TRT conversion else: model = models.vgg16_bn(pretrained=True) model.classifier[6] = torch.nn.Linear(4096, 100) return model
def init_net(self): net_args = { "pretrained": True, "n_input_channels": len(self.kwargs["static"]["imagery_bands"]) } # https://pytorch.org/docs/stable/torchvision/models.html if self.kwargs["net"] == "resnet18": self.model = resnet.resnet18(**net_args) elif self.kwargs["net"] == "resnet34": self.model = resnet.resnet34(**net_args) elif self.kwargs["net"] == "resnet50": self.model = resnet.resnet50(**net_args) elif self.kwargs["net"] == "resnet101": self.model = resnet.resnet101(**net_args) elif self.kwargs["net"] == "resnet152": self.model = resnet.resnet152(**net_args) elif self.kwargs["net"] == "vgg11": self.model = vgg.vgg11(**net_args) elif self.kwargs["net"] == "vgg11_bn": self.model = vgg.vgg11_bn(**net_args) elif self.kwargs["net"] == "vgg13": self.model = vgg.vgg13(**net_args) elif self.kwargs["net"] == "vgg13_bn": self.model = vgg.vgg13_bn(**net_args) elif self.kwargs["net"] == "vgg16": self.model = vgg.vgg16(**net_args) elif self.kwargs["net"] == "vgg16_bn": self.model = vgg.vgg16_bn(**net_args) elif self.kwargs["net"] == "vgg19": self.model = vgg.vgg19(**net_args) elif self.kwargs["net"] == "vgg19_bn": self.model = vgg.vgg19_bn(**net_args) else: raise ValueError("Invalid network specified: {}".format( self.kwargs["net"])) # run type: 1 = fine tune, 2 = fixed feature extractor # - replace run type option with "# of layers to fine tune" if self.kwargs["run_type"] == 2: layer_count = len(list(self.model.parameters())) for layer, param in enumerate(self.model.parameters()): if layer <= layer_count - 5: param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default # get existing number for input features # set new number for output features to number of categories being classified # see: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html if "resnet" in self.kwargs["net"]: num_ftrs = self.model.fc.in_features self.model.fc = nn.Linear(num_ftrs, self.ncats) elif "vgg" in self.kwargs["net"]: num_ftrs = self.model.classifier[6].in_features self.model.classifier[6] = nn.Linear(num_ftrs, self.ncats)
def main(): # Load the pretrained model from pytorch vgg16 = vgg16_bn(True) vgg16.load_state_dict(torch.load("../input/vgg16bn/vgg16_bn.pth")) # Freeze training for all layers for param in vgg16.features.parameters(): param.require_grad = False criterion = nn.CrossEntropyLoss() eval_model(vgg16, criterion)
def main(): args = args_parse() device = 'cuda' if torch.cuda.is_available() else 'cpu' # Data print('==> Preparing data..') transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = DataLoader(testset, batch_size=100, shuffle=False) # Model print('==> Building model..') net = vgg16_bn(p=args.P) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) checkpoint = torch.load('./model/vgg16_P{}.pth'.format(args.P)) net.load_state_dict(checkpoint['net']) criterion = nn.CrossEntropyLoss() net.eval() test_loss = 0 correct = 0 total = 0 with torch.no_grad(): with tqdm(testloader, desc='Testing ', unit='batch') as loader: for batch_idx, (inputs, targets) in enumerate(loader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() loader.set_postfix( info='Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) loader.close()
def create_vgg16bn(load_weights=False): vgg16_bn_ft = vgg16_bn(pretrained=True) #vgg16_bn_ft.classifier = nn.Linear(25088, 3) vgg16_bn_ft.classifier = nn.Sequential(nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 3)) vgg16_bn_ft = vgg16_bn_ft.cuda() vgg16_bn_ft.name = 'vgg16bn' return vgg16_bn_ft
def create_discriptor_net(content_weight=1.0, layers=None, pretrained=False): "Weights are not used" if layers is None: layers = ['relu_10'] #eq. relu4_2 #VGG-random if not pretrained: cnn = vgg16_bn() else: cnn = torch.load(IVGG_PATH) cnn = cnn.features.cuda() content_losses = [] #copy VGG into a new model with loss layers model = nn.Sequential().cuda() i = 1 xlist = isinstance(cnn, torch.nn.DataParallel) and cnn.module or cnn for j, layer in enumerate(list(xlist)): if isinstance(layer, nn.Conv2d): name = "conv_" + str(i) model.add_module(name, layer) if name in layers: content_loss = ContentLoss(content_weight).cuda() model.add_module("content_loss_" + str(i), content_loss) content_losses.append(content_loss) if isinstance(layer, nn.ReLU): name = "relu_" + str(i) model.add_module(name, layer) if name in layers: content_loss = ContentLoss(content_weight).cuda() model.add_module("content_loss_" + str(i), content_loss) content_losses.append(content_loss) i += 1 if isinstance(layer, nn.MaxPool2d): name = "pool_" + str(i) model.add_module(name, layer) #cnn = cnn.cpu() del cnn model.eval() return model, content_losses
def __init__(self): super(CRAFT, self).__init__() """ Base network """ self.basenet = vgg16_bn() inputs = tf.random.normal(shape=[1, 768, 768, 3]) x_1 = self.basenet(inputs=inputs) # Load weighs weighs = np.load("./pretrain/vgg16.npy", encoding='latin1', allow_pickle=True).item() for layer_name in weighs.keys(): try: layer = self.basenet.get_layer(layer_name) layer.set_weights(weighs[layer_name]) except Exception as ex: print(ex) self.basenet.summary() """ U network """ self.upconv1 = double_conv(1024, 512, 256) self.upconv2 = double_conv(512, 256, 128) self.upconv3 = double_conv(256, 128, 64) self.upconv4 = double_conv(128, 64, 32) num_class = 2 self.conv_cls = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'), tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same'), tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same'), tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same'), tf.keras.layers.Conv2D(filters=num_class, kernel_size=(1, 1), activation='relu') ])
def model_select(args): if args.usenet == "bn_alexnet": model = bn_alexnet(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "vgg16": model = vgg16_bn(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "vgg19": model = vgg19_bn(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet18": model = resnet18(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet34": model = resnet34(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet50": model = resnet50(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet101": model = resnet101(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet152": model = resnet152(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnet200": model = resnet200(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "resnext101": model = resnext101(pretrained=False, num_classes=args.numof_classes).to(device) return model elif args.usenet == "densenet161": model = densenet161(pretrained=False, num_classes=args.numof_classes).to(device) return model
def create_vgg16bn(load_weights=False, freeze=False): vgg16_bn_ft = vgg16_bn(pretrained=True) if freeze: for param in vgg16_bn_ft.parameters(): param.requires_grad = False #vgg16_bn_ft.classifier = nn.Linear(25088, 3) vgg16_bn_ft.classifier = nn.Sequential(nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 1), nn.Sigmoid()) vgg16_bn_ft = vgg16_bn_ft.cuda() vgg16_bn_ft.name = 'vgg16bn' vgg16_bn_ft.max_num = 1 #vgg16_bn_ft.batch_size = 32 return vgg16_bn_ft
def __init__(self, w1=50, w2=1, weight_vgg=None): """ A weighted sum of pixel-wise L1 loss and sum of L2 loss of Gram matrices. :param w1: weight of L1 (pixel-wise) :param w2: weight of L2 loss (Gram matrix) :param weight_vgg: weight of VGG extracted features (should be add up to 1.0) """ super(CoarseLoss, self).__init__() if weight_vgg is None: weight_vgg = [0.5, 0.5, 0.5, 0.5, 0.5] self.w1 = w1 self.w2 = w2 self.l1 = nn.L1Loss(reduction='mean') self.l2 = nn.MSELoss(reduction='sum') # https://github.com/PatWie/tensorflow-recipes/blob/33962bb45e81f3619bfa6a8aeae5556cc7534caf/EnhanceNet/enet_pat.py#L169 self.weight_vgg = weight_vgg self.vgg16_bn = vgg16_bn(pretrained=True).eval()
def main(batch_size, root): ##################################################################### "The implementation of tensorboardX and topK accuracy is in utils.py" ##################################################################### # get checkpoint information checkpoint_newest = get_checkPoint("./checkpoint/") #TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now()) # write log and visualize the losses of batches of training and testing TIMESTAMP = "" writer1 = SummaryWriter('./tensorboard_log/batch/'+TIMESTAMP) # write log and visualize the accuracy of batches of training and testing writer2 = SummaryWriter('./tensorboard_log/epoch/'+TIMESTAMP) train_loader, test_loader = get_dataloader(batch_size, root) gpus = list(range(torch.cuda.device_count())) # initialize your net/optimizer vgg16bn = nn.DataParallel(vgg16_bn(num_classes=7), device_ids=gpus) optimizer = optim.SGD(params=vgg16bn.parameters(), lr=0.6 / 1024 * batch_size, momentum=0.9, weight_decay=1e-4) # No existed checkpoint if checkpoint_newest == 0: scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1) trainer = Trainer(vgg16bn, optimizer, F.cross_entropy, save_dir="./checkpoint/", writer1=writer1, writer2=writer2, save_freq=1) trainer.loop(100, train_loader, test_loader, 1, scheduler) # load existed checkpoint else: print("The path of the pretrained model %s" %checkpoint_newest) print("load pretrained model......") checkpoint = torch.load(checkpoint_newest) vgg16bn.load_state_dict(checkpoint['weight']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler = optim.lr_scheduler.StepLR(optimizer, 30, gamma=0.1, last_epoch=checkpoint['epoch']) print("The current epoch is %d" %checkpoint['epoch']) trainer = Trainer(vgg16bn, optimizer, F.cross_entropy, save_dir="./checkpoint/", writer1=writer1, writer2=writer2, save_freq=1) trainer.loop(100, train_loader, test_loader, checkpoint['epoch']+1, scheduler)
def train(opt, log_func=None): torch.manual_seed(opt.seed) if opt.cuda: torch.cuda.set_device(opt.device) torch.cuda.manual_seed(opt.seed) torch.backends.cudnn.enabled = True if opt.model == 'logreg': model = LogReg(28 * 28, 10) elif opt.model == 'mlp': model = MLP(28 * 28, 1000, 10) elif opt.model == 'vgg': model = vgg.vgg16_bn() if opt.parallel: model.features = torch.nn.DataParallel(model.features) else: raise Exception('Unknown model: {}'.format(opt.model)) if opt.cuda: model = model.cuda() if opt.model == 'logreg' or opt.model == 'mlp': task = 'MNIST' train_loader = DataLoader(datasets.MNIST('./data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (0.1307, ), (0.3081, )) ])), batch_size=opt.batchSize, shuffle=True) valid_loader = DataLoader(datasets.MNIST('./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (0.1307, ), (0.3081, )) ])), batch_size=opt.batchSize, shuffle=False) elif opt.model == 'vgg': task = 'CIFAR10' normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( root='./data', train=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize, ]), download=True), batch_size=opt.batchSize, shuffle=True, num_workers=opt.workers, pin_memory=True) valid_loader = torch.utils.data.DataLoader(datasets.CIFAR10( root='./data', train=False, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])), batch_size=opt.batchSize, shuffle=False, num_workers=opt.workers, pin_memory=True) else: raise Exception('Unknown model: {}'.format(opt.model)) if opt.method == 'sgd': optimizer = SGD(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay) elif opt.method == 'sgd_hd': optimizer = SGDHD(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay, hypergrad_lr=opt.beta) elif opt.method == 'sgdn': optimizer = SGD(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay, momentum=opt.mu, nesterov=True) elif opt.method == 'sgdn_hd': optimizer = SGDHD(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay, momentum=opt.mu, nesterov=True, hypergrad_lr=opt.beta) elif opt.method == 'adam': optimizer = Adam(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay) elif opt.method == 'adam_hd': optimizer = AdamHD(model.parameters(), lr=opt.alpha_0, weight_decay=opt.weightDecay, hypergrad_lr=opt.beta) else: raise Exception('Unknown method: {}'.format(opt.method)) if not opt.silent: print('Task: {}, Model: {}, Method: {}'.format(task, opt.model, opt.method)) model.eval() for batch_id, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) if opt.cuda: data, target = data.cuda(), target.cuda() output = model(data) loss = F.cross_entropy(output, target) loss = loss.data[0] break valid_loss = 0 for data, target in valid_loader: data, target = Variable(data, volatile=True), Variable(target) if opt.cuda: data, target = data.cuda(), target.cuda() output = model(data) valid_loss += F.cross_entropy(output, target, size_average=False).data[0] valid_loss /= len(valid_loader.dataset) if log_func is not None: log_func(0, 0, 0, loss, loss, valid_loss, opt.alpha_0, opt.alpha_0, opt.beta) time_start = time.time() iteration = 1 epoch = 1 done = False while not done: model.train() loss_epoch = 0 alpha_epoch = 0 for batch_id, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) if opt.cuda: data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() loss = loss.data[0] loss_epoch += loss alpha = optimizer.param_groups[0]['lr'] alpha_epoch += alpha iteration += 1 if opt.iterations != 0: if iteration > opt.iterations: print('Early stopping: iteration > {}'.format( opt.iterations)) done = True break if opt.lossThreshold >= 0: if loss <= opt.lossThreshold: print('Early stopping: loss <= {}'.format( opt.lossThreshold)) done = True break if batch_id + 1 >= len(train_loader): loss_epoch /= len(train_loader) alpha_epoch /= len(train_loader) model.eval() valid_loss = 0 for data, target in valid_loader: data, target = Variable(data, volatile=True), Variable(target) if opt.cuda: data, target = data.cuda(), target.cuda() output = model(data) valid_loss += F.cross_entropy(output, target, size_average=False).data[0] valid_loss /= len(valid_loader.dataset) if log_func is not None: log_func(epoch, iteration, time.time() - time_start, loss, loss_epoch, valid_loss, alpha, alpha_epoch, opt.beta) else: if log_func is not None: log_func(epoch, iteration, time.time() - time_start, loss, float('nan'), float('nan'), alpha, float('nan'), opt.beta) epoch += 1 if opt.epochs != 0: if epoch > opt.epochs: print('Early stopping: epoch > {}'.format(opt.epochs)) done = True return loss, iteration
def rl_main(args): args.rl_batch_steps = 5 args.num_episodes = 150 args.mc_alpha = 0 args.epsilon = 0.25 # try with full policy. and try with using the full vector to compute a reward. But it really is just a multiple. Unless we specifically penalize assigning 0 counts args.oracle_clusters = True # probably starting with 10 or so points randomly would be very good. but would invalidate past work with open(os.path.join(args.out_path, "args.txt"), "w") as file: for key, val in vars(args).items(): file.write("{}:{}\n".format(key, val)) if args.dataset == "ring": print("Using Ring dataset...") test_dataloader = data.DataLoader(Ring( args.data_path, transform=simple_data_transformer(), return_idx=False, testset=True), batch_size=args.batch_size, drop_last=False) train_dataset = Ring(args.data_path, simple_data_transformer()) print(len(train_dataset)) args.num_images = 2500 args.budget = 1 #how many we can label at each round args.initial_budget = 1 args.num_classes = 5 elif args.dataset == "mnist": print("Using MNIST dataset...") test_dataloader = data.DataLoader(MNIST(args.data_path, return_idx=False), batch_size=args.batch_size, drop_last=False) train_dataset = MNIST(args.data_path) print(len(train_dataset)) args.num_images = 60000 args.budget = 1 # how many we can label at each round args.initial_budget = 1 args.num_classes = 10 # args.task_model = MNISTNet() random.seed(args.torch_manual_seed) torch.manual_seed(args.torch_manual_seed) args.cuda = args.cuda and torch.cuda.is_available() solver = Solver(args, test_dataloader) all_indices = set(np.arange(args.num_images)) initial_indices = random.sample(all_indices, args.initial_budget) sampler = data.sampler.SubsetRandomSampler(initial_indices) current_indices = list(initial_indices) unlabeled_indices = np.setdiff1d(list(all_indices), current_indices) unlabeled_sampler = data.sampler.SubsetRandomSampler(unlabeled_indices) unlabeled_dataloader = data.DataLoader(train_dataset, sampler=unlabeled_sampler, batch_size=args.batch_size, drop_last=False) # dataset with labels available train_dataloader = data.DataLoader(train_dataset, sampler=sampler, batch_size=args.batch_size, drop_last=False) # iterate the train_dataloader, and compute some statistics. And also, increment quantities. ''' FORMULATION1: We will feed in the class_specific accuracies. ''' ROLLING_AVG_LEN = args.rl_batch_steps * 2 prev_reward = torch.ones((ROLLING_AVG_LEN, 1)) prev_reward *= 10 print("prev_reward{}".format(prev_reward)) STATE_SPACE = args.num_classes ACTION_SPACE = args.num_classes CLASS_DIST_SPACE = args.num_classes pol_class_net = PolicyNet( STATE_SPACE + CLASS_DIST_SPACE, ACTION_SPACE ) # gradient, or hessian in the network..; per class accs as well pol_optimizer = optim.Adam(pol_class_net.parameters(), lr=5e-2) curr_state = torch.zeros( (1, STATE_SPACE + CLASS_DIST_SPACE)) #only feed it in the past state directly import copy # task_model = model.FCNet(num_classes=args.num_classes) # inference_model = task_model # inference_model.to(args.device) task_model = vgg.vgg16_bn(num_classes=args.num_classes) task_model = MNISTNet() # task_model = models.resnet18(pretrained=True, num_classes=args.num_classes) accuracies = [] criterion = torch.nn.CrossEntropyLoss() # feel like supporting a desparate cause; might delete later entire_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True) # ask on SO: multi item getting using pytorch, dataloader features, labels, idx = next(iter(entire_loader)) features = features.numpy( )[unlabeled_indices] # we should exactly not be using it as this. Actually, it is OK. we are just saing what is labelled and what is not labels = np.expand_dims(labels.numpy()[unlabeled_indices], 1) idx = np.expand_dims(idx.numpy()[unlabeled_indices], 1) # X = np.hstack((features,labels ,idx )) #strange that this doesn't work # X = np.concatenate((features.reshape(len(features),-1), labels,idx), axis=1) args.feature_length = 784 features = features.reshape(-1, args.feature_length) # features = np.repeat(features[:,np.newaxis,:], 3, axis=1) # np.broadcast_to(features, shape=(-1,3,args.feature_length)) X = np.concatenate((features, labels, idx), axis=1) from sklearn.cluster import KMeans kmeans_obj = KMeans( n_clusters=args.num_classes, random_state=0) # we can also fit one kmeans at the very start. cluster_preds = kmeans_obj.fit_predict(X[:, 0:args.feature_length]) if args.oracle_clusters: unlabelled_dataset = np.concatenate((X, labels), axis=1) else: # we can also just predict (should be fast) again on new datapoints, using the trained classifier. But why not just memorize unlabelled_dataset = np.concatenate( (X, np.expand_dims(cluster_preds, axis=1)), axis=1) import matplotlib.pyplot as plt fig, ax = plt.subplots() # try and predict directly in the data space? # try and graph where in the dataset it does it as well. # in this case, we would again need some fix of the policy gradient. # we can no longer just do an easy cross entropy # instead, we would be operating more in the regime of . # this is a nice analysis on problems of this type! # just about rotating and sculpting to your particular area you want # ultra few shot learning with fixed dimension, horizon! # contributions: to the field of meta/few shot learning using an active learning with reinforcement learning approach # keep on layering intersections, until you get the particular area you want. # even the approach of doing few shot learning, using active learning is pretty novel IMO # we would be trying to essentially do q learning on the choice of datapoint. but make sure you pick in the data space (not action, but continuous choice of the datapoint) # the key is really then, trying to do X # we could literally do an entire course of lin alg during the break! # really, digging into the problems of policy gradient # now let's graph the unlabelled dataset for cluster in range(args.num_classes): # k_means_data = unlabelled_dataset[unlabelled_dataset[...,-1]==cluster] # fig, ax = plt.subplots() k_means_data = unlabelled_dataset[unlabelled_dataset[:, -1] == cluster] ax.scatter(k_means_data[:, 0], k_means_data[:, 1]) ax.scatter(kmeans_obj.cluster_centers_[cluster][0], kmeans_obj.cluster_centers_[cluster][1], s=100) fig.savefig(os.path.join(args.out_path, "cluster_{}".format(cluster))) # break fig.show() gradient_accum = torch.zeros( (args.rl_batch_steps, 1), requires_grad=False) # accumulate all the losses # try making it an empty thing gradient_accum = torch.zeros( (args.rl_batch_steps), requires_grad=False) # accumulate all the losses # loss.backward(0 => doesn't actually execute an update of the weights. we could probably call loss.backward individually batched_accs = [] # try combining it with the state. and also, just try doing an epsilon greedy policy import torch.nn.functional as F for i in tqdm(range(args.num_episodes)): pol_optimizer.zero_grad() # here we need a fake label, in order to back prop the loss. And don't backprop immediately, instead, get the gradient, # hold it, wait for the reward, and then backprop on that quantity action_vector = pol_class_net(curr_state) # torch.nn.functional.log_softmax(action_vector) # action_dist = torch.zeros((1)) action_dist = torch.distributions.Categorical(probs=F.softmax( action_vector)) #the diff between Softmax and softmax # we probably need logsoftmax here too print( "action dist{}\n, dist probs{}\n, self f.softmax {}\n, self.log softmax{}\n" .format(action_vector, action_dist.probs, F.softmax(action_vector, dim=1), F.log_softmax(action_vector, dim=1)) ) #intelligent to take the softmax over the right dimension # print() #recall logsoftmax and such # if torch.rand() < args.epsilon: # pass # else: # correct_label1, action1 = get_query(action_dist, unlabeled_dataloader, inference_model, args) print(curr_state) correct_label, action, unlabelled_dataset, rand = get_query_via_kmeans( action_dist, unlabelled_dataset, args) if not rand: #still compute the losses to avoid policy collpase # print(rand) pred_vector = action_vector.view(1, -1) correct_label = correct_label # just a k-size list loss = criterion(pred_vector, correct_label) print("loss stats") print(pred_vector, correct_label) # labelled updates current_indices = list(current_indices) + [int( action[2].item())] # really they just want a set here... sampler = data.sampler.SubsetRandomSampler(current_indices) train_dataloader = data.DataLoader(train_dataset, sampler=sampler, batch_size=args.batch_size, drop_last=False) # unlabelled updates unlabeled_indices = np.setdiff1d(list(all_indices), current_indices) unlabeled_sampler = data.sampler.SubsetRandomSampler(unlabeled_indices) unlabeled_dataloader = data.DataLoader(train_dataset, sampler=unlabeled_sampler, batch_size=args.batch_size, drop_last=False) class_counts, total = dataloader_statistics(train_dataloader, args.num_classes) print("class counts {}".format(class_counts)) #data loader not subscriptable => we should deal with the indices. # we could also combine, and get the uncertainties, but WEIGHTED BY CLASS # lets just try the dataloader, but it will be challenging when we have the batch size... # print(correct_label) print("this is the action taken by the sampler") print(action) acc, curr_state = environment_step( train_dataloader, solver, task_model, args) #might need to write a bit coupled code. This is OK for now accuracies.append(acc) # curr_state = torch.cat((curr_state_accs, class_counts.t()), axis=1) if not rand: reward, prev_reward = compute_reward( curr_state, i, prev_reward, args) # basline is around 1% improvement print("prev_reward{}".format(prev_reward)) print("curr reward{}".format(reward)) # print("log loss is") # print(loss) # check what the norm of the policy is # if torch.sum(loss) <= 0.005: # loss +=0.005 #to avoid policy collapse loss *= reward # calling loss backwards here works loss *= -1 #want to maximize the reward args.penalty_type = "kl" p_dist = curr_state[:, args.num_classes:].clone() if args.penalty_type == "kl": # add the penalty as well p_dist /= torch.sum(p_dist) #normalize # KL penalty q_dist = torch.ones((1, args.num_classes), requires_grad=True) q_dist = q_dist * 1 / (args.num_classes) #normalize this # add delta smoothing mcp_loss = mode_collapse_penalty_kl(action_dist.probs.clone(), q_dist) else: # Square penalty q_dist = torch.ones((1, args.num_classes), requires_grad=True) q_dist = q_dist * i // args.num_classes + 1 mcp_loss = mode_collapse_penalty(p_dist, q_dist) print(loss, mcp_loss) # loss = mcp_loss #this detracts from the reward loss = loss + args.mc_alpha * mcp_loss print("total loss") print(loss) gradient_accum[i % args.rl_batch_steps] = loss # tess = torch.mean(gradient_accum) # print('tess') # print(tess) # tess.backward() if i % args.rl_batch_steps == 0 and i != 0: # HER buffer dataloader here: we remember what the choice was, and the reward. then we can decouple the updates! # but generally, we should try the baseline (easy) print("the gradient is") print(gradient_accum) # let's prevent the policy collapse gradient_accum = gradient_accum[gradient_accum.nonzero( )] #filter out the points where we took the epsilon policy print(gradient_accum) # gradient_accum = torch.clamp(gradient_accum, -10, 10) # torch.mean(gradient_accum, dim=0).backward() if len(gradient_accum) > 0: batched_loss = torch.mean(gradient_accum, dim=0) print(batched_loss) batched_loss.backward() pol_optimizer.step() # print(list(pol_class_net.parameters())[0].grad ) gradient_accum = torch.zeros( (args.rl_batch_steps), requires_grad=False) # accumulate all the losses batched_accs.append(acc) # now on the next step, you want to run some gradient and see how it goes. and only graph that. Equivalently, # just graph every 10th datapoint # args.epsilon *= 0.6 #perform the gradient update # compute the reward. store the gradients # store all the gradients, then torch.mean them, and then take a step. This means we only have 10/50 steps. # loss.backward() # pol_optimizer.step() with open(os.path.join(args.out_path, "accs.txt"), "a") as acc_file: acc_file.write("{};{}\n".format(acc, curr_state)) print(curr_state) print(acc) # with open(os.path.join(args.out_path, "rl_current_accs.txt"), "a") as acc_file: # acc_file.write("{} {}\n".format(acc, class_accs)) # inference_model = task_model # inference_model.to(args.device) # task_model = model.FCNet(num_classes=args.num_classes) # remake a new task model each time task_model = vgg.vgg16_bn(num_classes=args.num_classes) task_model = MNISTNet() # task_model = models.resnet18(pretrained=True, num_classes=args.num_classes) # graph the train dataloader at each iteration # for cluster in range(args.num_classes): # # k_means_data = unlabelled_dataset[unlabelled_dataset[...,-1]==cluster] # # fig, ax = plt.subplots() # # k_means_data = unlabelled_dataset[unlabelled_dataset[:, -1] == cluster] # # ax.scatter(k_means_data[:, 0], k_means_data[:, 1]) # ax.scatter(kmeans_obj.cluster_centers_[cluster][0], kmeans_obj.cluster_centers_[cluster][1], s=100) # fig.savefig(os.path.join(args.out_path, "cluster_{}".format(cluster))) # visual_labelled_dataset = np.zeros((0,3)) #each dimension does not require something new! # # new_datapoints= np.reshape(np.asarray(action[0]), newshape=(-1,2)) # for datapoint_batch, label_batch, _ in train_dataloader: #will be tuple of n by 1 # train_ex_batch = np.concatenate((datapoint_batch, np.expand_dims(label_batch,axis=1)), axis=1) # visual_labelled_dataset = np.concatenate((visual_labelled_dataset, train_ex_batch), axis=0 ) #concat the # visualize_training_dataset(i, args.num_classes, visual_labelled_dataset, new_datapoints) # stack all of them! # and furthermore, we need to do a group by on the label. # now, check the visual labelled dataset # let's graph the vector, as we see it come # graph the new point on the map, then graph the old collection of data as regular # current_indices # save the trained model model_params = pol_class_net.state_dict() torch.save(model_params, os.path.join(args.out_path, "model.pt")) # fig, ax = acc_plot(accuracies, args, label="policy gradient", name="policy gradient only") spaced_x = list(range(len(batched_accs))) spaced_x = [x * 10 for x in spaced_x] ax.plot(spaced_x, batched_accs, marker="x", c="purple", label="batched policy updates") ax.legend() fig.show() fig.savefig( os.path.join( args.out_path, "comparison_batched_acc_plot_{}_queries".format(len(accuracies)))) print(pol_class_net) import copy uncertain_args = copy.deepcopy(args) uncertain_args.sampling_method = "uncertainty" uncertain_accs = random_baseline(uncertain_args, args.num_episodes) random_args = copy.deepcopy(args) random_args.sampling_method = "random" random_accs = random_baseline(random_args, args.num_episodes) fig, ax = acc_plot(accuracies, args, label="policy gradient") ax.plot(range(0, len(random_accs)), random_accs, marker="x", c="orange", label="random") ax.plot(range(0, len(uncertain_accs)), uncertain_accs, marker="^", c="green", label="uncertain") ax.legend() fig.show() fig.savefig( os.path.join(args.out_path, "comparison_acc_plot_{}_queries".format(len(accuracies))))
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--imgdir', default=r'./data/img/Flicker8k_Dataset/', type=str, help='path to the images') parser.add_argument('--tokentxt', default=r'./data/label/Flickr8k.lemma.token.txt', type=str, help='path to Flickr8k.lemma.token.txt') parser.add_argument('--imgtxt', default=r'./data/label/Flickr_8k.trainImages.txt', type=str, help='path to Flickr_8k.xxxImages.txt') parser.add_argument('--bsz', default=64, type=int, help='batch size') parser.add_argument('--dict', default=True, type=bool, help='True if it\' used for training') parser.add_argument('--fname', default='out', type=str, help='name of output') args = parser.parse_args() bsz = args.bsz imgdir = args.imgdir tokentxt = args.tokentxt imgtxt = args.imgtxt is_dict = args.dict fname = args.fname flicker8k = FlickrDataLoader.Flicker8k(imgdir, tokentxt, imgtxt, transform=get_transform(), train=True) model = vgg.vgg16_bn(True) model.eval() model_conv = VggConv(model).cuda() trainloader = torch.utils.data.DataLoader(flicker8k, batch_size=bsz, shuffle=False, num_workers=2) feature_extract(trainloader, is_dict, fname)
def train(args): log_dir = os.path.join( args.output_dir, "M{}_L{}_W{}_F{}".format( args.model, str(args.lr) + "-".join([str(x) for x in args.lr_milestones]), args.weight_tv, "-".join(args.feature))) if not os.path.exists(log_dir): os.mkdir(log_dir) log_file = open(os.path.join(log_dir, "log.txt"), "w") transform_dict = None if args.model == 'vgg16_bn': model = vgg16_bn(pretrained=True) transform_dict = {'conv3_1': ('features', '10')} elif args.model == 'resnet18': model = resnet18(pretrained=True) else: raise ValueError("invalid model:{}".format(args.model)) assert (transform_dict is not None) model.cuda() model.eval() img = load_img(args.img_path, args.img_shape) pre_noise = np.random.uniform(low=-3, high=3, size=img.shape).astype(np.float32) pre_noise = sigmoid(pre_noise) img_tensor = torch.from_numpy(img).permute( 2, 0, 1).contiguous().unsqueeze(0).cuda() noise_tensor = torch.from_numpy(pre_noise).permute( 2, 0, 1).contiguous().unsqueeze(0).cuda() noise_tensor.requires_grad_(True) criterion = FeatureMatchLpLoss(p=2) optimizer = Adam([noise_tensor], lr=args.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.lr_milestones, gamma=0.5) for epoch in range(args.epoch): scheduler.step() loss = torch.Tensor().cuda() for item in args.feature: img_output = extract_feature(model, img_tensor, item, transform_dict) noise_output = extract_feature(model, noise_tensor, item, transform_dict) loss += criterion(noise_output, img_output) loss = criterion(img_output, noise_output) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % args.show_interval == 0: print("e:{}---loss:{:.5f}".format(epoch, loss.item())) print("e:{}---loss:{:.5f}".format(epoch, loss.item()), file=log_file) if epoch % args.save_interval == 0: noise_np = noise_tensor.data.cpu().squeeze(0).permute( 1, 2, 0).contiguous().numpy() output_img(noise_np, os.path.join(log_dir, "epoch_{}.png".format(epoch))) log_file.close()
def main(args): if args.dataset == 'cifar10': test_dataloader = data.DataLoader(datasets.CIFAR10( args.data_path, download=True, transform=cifar_transformer(), train=False), batch_size=args.batch_size, drop_last=False) train_dataset = CIFAR10(args.data_path) args.num_images = 50000 args.budget = 2500 args.initial_budget = 5000 args.num_classes = 10 elif args.dataset == 'cifar100': test_dataloader = data.DataLoader(datasets.CIFAR100( args.data_path, download=True, transform=cifar_transformer(), train=False), batch_size=args.batch_size, drop_last=False) train_dataset = CIFAR100(args.data_path) args.num_images = 50000 args.budget = 2500 args.initial_budget = 5000 args.num_classes = 100 elif args.dataset == 'imagenet': test_dataloader = data.DataLoader(datasets.ImageFolder( args.data_path, transform=imagenet_transformer()), drop_last=False, batch_size=args.batch_size) train_dataset = ImageNet(args.data_path) args.num_images = 1281167 args.budget = 64060 args.initial_budget = 128120 args.num_classes = 1000 else: raise NotImplementedError all_indices = set(np.arange(args.num_images)) initial_indices = random.sample(all_indices, args.initial_budget) sampler = data.sampler.SubsetRandomSampler(initial_indices) # dataset with labels available querry_dataloader = data.DataLoader(train_dataset, sampler=sampler, batch_size=args.batch_size, drop_last=True) args.cuda = args.cuda and torch.cuda.is_available() solver = Solver(args, test_dataloader) splits = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4] #splits = [0.4] current_indices = list(initial_indices) accuracies = [] for split in splits: # need to retrain all the models on the new images # re initialize and retrain the models task_model = vgg.vgg16_bn(num_classes=args.num_classes) vae = model.VAE(args.latent_dim) discriminator = model.Discriminator(args.latent_dim, args.num_classes + 1) unlabeled_indices = np.setdiff1d(list(all_indices), current_indices) unlabeled_sampler = data.sampler.SubsetRandomSampler(unlabeled_indices) unlabeled_dataloader = data.DataLoader(train_dataset, sampler=unlabeled_sampler, batch_size=args.batch_size, drop_last=False) # train the models on the current data acc, vae, discriminator = solver.train(querry_dataloader, task_model, vae, discriminator, unlabeled_dataloader, args) print('Final accuracy with {}% of data is: {:.2f}'.format( int(split * 100), acc)) accuracies.append(acc) sampled_indices = solver.sample_for_labeling(vae, discriminator, unlabeled_dataloader) current_indices = list(current_indices) + list(sampled_indices) sampler = data.sampler.SubsetRandomSampler(current_indices) querry_dataloader = data.DataLoader(train_dataset, sampler=sampler, batch_size=args.batch_size, drop_last=True) torch.save(accuracies, os.path.join(args.out_path, args.log_name))
import torch from vgg import vgg16, vgg16_bn device = 'cpu' vgg_model = vgg16_bn(pretraind=True, progress=True, num_classes=1000) vgg_model.eval().to(device=device) x = torch.rand((1, 3, 224, 224)).to(device=device) out = vgg_model(x) print('end')
global args, best_prec1 args = parser.parse_args() filename = args.model + "_" + str(args.batch_size) + "_" + str( args.lr) + "_" + str(args.momentum) + "_" + str( args.loss_weight) + "_" + str(args.weight_decay) lengths = 200 root = "/app/MATH-6380p/project-2/" model_file = root + filename + '_model_best.pth.tar' # /media/leo/0287D1936157598A/docker_ws/docker_ws/MATH-6380p/project-2/resnet.resnet18_256_0.1_0.9_0.003_0.001_model_best.pth.tar best_prec1 = 0 if args.model == "vgg.vgg16_bn": model = vgg.vgg16_bn() if args.model == "resnet.resnet18": model = resnet.resnet18(pretrained=False) if args.model == "dcfresnet.resnet18": model = dcfresnet.resnet18(pretrained=False) if args.model == "dcfnet.vgg16_bn": model = dcfnet.vgg16_bn(pretrained=False) #model = vgg.vgg16_bn() # model = resnet.resnet18(pretrained=False) # model = dcfresnet.resnet18(pretrained=False) # model = dcfnet.vgg16_bn(pretrained=False) model = model.cuda() #print(model.features.children()) val_data = datasets.CIFAR10('./CIFAR10',
def main(): args = args_parse() seed_torch() device = 'cuda' if torch.cuda.is_available() else 'cpu' best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = DataLoader(trainset, batch_size=128, shuffle=True) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = DataLoader(testset, batch_size=100, shuffle=False) # classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Model print('==> Building model..') net = vgg16_bn(p=args.P) net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) # cudnn.benchmark = True summary(net, torch.zeros(2, 3, 32, 32).cuda(), print_layer_info=False) # if args.resume: # # Load checkpoint. # print('==> Resuming from checkpoint..') # assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' # checkpoint = torch.load('./checkpoint/ckpt.t7') # net.load_state_dict(checkpoint['net']) # best_acc = checkpoint['acc'] # start_epoch = checkpoint['epoch'] criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 100, 150], gamma=0.1) for epoch in range(start_epoch, start_epoch + 200): lr_scheduler.step() train(epoch, net, trainloader, optimizer, criterion, device) acc = test(epoch, net, testloader, criterion, device) # Save checkpoint. if acc > best_acc: # print('Saving..') state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } if not os.path.isdir('model'): os.mkdir('model') torch.save(state, './model/vgg16_P{}.pth'.format(args.P)) best_acc = acc
shuffle=True, pin_memory=True) loader_test = torch.utils.data.DataLoader(datasets.CIFAR10( root='../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])), batch_size=128, shuffle=False, pin_memory=True) # Load the pretrained model net = vgg16_bn() new_net = vgg16_bn() if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() new_net.cuda() print("--- Pretrained network loaded ---") criterion = nn.CrossEntropyLoss() # optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], # weight_decay=param['weight_decay']) optimizer = torch.optim.SGD(net.parameters(), param['learning_rate'], momentum=param['momentum'], weight_decay=param['weight_decay'])
N_CLASSES = 100 trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.train_bs, shuffle=True, num_workers=3) testloader = torch.utils.data.DataLoader(testset, batch_size=2000, shuffle=False, num_workers=3) truncloader = torch.utils.data.DataLoader(trainset, batch_size=args.prune_bs, num_workers=3) if args.vgg_type == 'vgg16': model = vgg.vgg16_bn(num_classes=N_CLASSES) elif args.vgg_type == 'vgg19': model = vgg.vgg19_bn(num_classes=N_CLASSES) model = model.to(device) model.train() x, y = map(lambda x: x.to(device), next(iter(trainloader))) p = model(x) loss = F.cross_entropy(p, y) loss.backward() agg_tensor = [] for child in model.modules(): if isinstance(child, vgg.MaskedConv2d) or isinstance( child, vgg.MaskedLinear): agg_tensor += child.get_mask_grad()
shuffle=True, pin_memory=True) loader_test = torch.utils.data.DataLoader(datasets.CIFAR10( root='../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])), batch_size=128, shuffle=False, pin_memory=True) # Load the pretrained model net = vgg16_bn() # for m in net.modules(): # if isinstance(m, nn.Conv2d): # m.set_mask(torch.rand((2,3,4))) #print('ok') if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") criterion = nn.CrossEntropyLoss() # optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], # weight_decay=param['weight_decay']) optimizer = torch.optim.SGD(net.parameters(),
# GPU or CPU の判別 device = "cuda" if torch.cuda.is_available() else "cpu" print(device) splits = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4] for split in splits: train_dataset, val_dataset = torch.utils.data.random_split( trainset, [int(num_images * split), num_images - int(num_images * split)]) trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) # train print("train") task_model = vgg.vgg16_bn(num_classes=num_classes) model = task_model.to(device) optimizer = torch.optim.SGD(task_model.parameters(), lr=0.01, weight_decay=5e-4, momentum=0.9) criterion = nn.CrossEntropyLoss(reduction="mean") model = model.train() loss_train_list = [] acc_train_list = [] total, tp = 0, 0 epoch = 100 for i in range(epoch): acc_count = 0 for j, (x, y) in enumerate(trainloader):
def main(): global args, best_prec1 # Check the save_dir exists or not if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) if args.arch == 'vgg': model = vgg16_bn() elif args.arch == 'alexnet': model = alexnet( num_classes=10) if args.dataset == 'cifar10' else alexnet( num_classes=100) elif args.arch == 'wide_resnet': if args.dataset == 'cifar10': model = wide_WResNet(num_classes=10, depth=16, dataset='cifar10') else: model = wide_WResNet(num_classes=100, depth=16, dataset='cifar100') elif args.arch == 'resnet': if args.dataset == 'cifar10': model = resnet(num_classes=10, dataset='cifar10') else: model = resnet(num_classes=100, dataset='cifar100') model.cuda() cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() if args.half: model.half() criterion.half() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.evaluate: validate(val_loader, model, criterion) return scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[150, 225, 275], gamma=0.1) for epoch in range(args.start_epoch, args.epochs): scheduler.step() # adjust_learning_rate(optimizer, epoch) # train for one epoch cubic_train(model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=os.path.join(args.save_dir, 'checkpoint_{}.tar'.format(epoch)))
neptune.create_experiment(model_name) NeptuneLog() if model_name == 'vgg11': model = vgg.vgg11(pretrained=pretrain_check) elif model_name == 'vgg11_bn': model = vgg.vgg11_bn(pretrained=pretrain_check) elif model_name == 'vgg13': model = vgg.vgg13(pretrained=pretrain_check) elif model_name == 'vgg13_bn': model = vgg.vgg13_bn(pretrained=pretrain_check) elif model_name == 'vgg16': model = vgg.vgg16(pretrained=pretrain_check) elif model_name == 'vgg16_bn': model = vgg.vgg16_bn(pretrained=pretrain_check) elif model_name == 'vgg19': model = vgg.vgg19(pretrained=pretrain_check) elif model_name == 'vgg19_bn': model = vgg.vgg19_bn(pretrained=pretrain_check) model.eval() model = torch.nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5) scheduler = ReduceLROnPlateau(optimizer, factor=0.01, patience=patience,