def __init__(self, model, device, update_opts, offline_dataset, class_map): super().__init__(model, device, update_opts, offline_dataset) x, _ = next(iter(self.offline_loader)) x = x.to(device) self.feature_dim = model.features(x).shape[-1] model.backbone = model.backbone.eval() self.sample_counter = 0 self.idcs = [ x for x in np.arange(0, 1000) if x not in class_map.values() ] self.initialized_classes = set(self.idcs) #self.initialized_classes = set() self.params = [] #centroids = torch.zeros([1000, self.feature_dim]) #centroids[self.idcs] = self.model.classifier.weight[self.idcs] #self.model.base = torch.nn.Parameter(centroids.to(device)) self.num_layers = update_opts.num_layers extract_layers(self.model, self.num_layers, self.params) # self.optimizer = torch.optim.SGD([self.model.centroids]+self.params, self.update_opts.lr, # momentum=self.update_opts.m, # weight_decay=1e-4) self.running_labels = torch.zeros(1000).to(self.device) self.running_proto = torch.zeros([1000, self.feature_dim]).to(self.device) self.counter = 0
def __init__(self, model, split_layers, sequence_num, root, num_classes, device): super().__init__() self.num_classes = num_classes self.model = extract_backbone(model) test_device = next(self.model.parameters()).device test_val = torch.zeros(1, 3, 224, 224).to(test_device) self.device = device _, feature_dim = self.model(test_val).shape path = 'S' + str(sequence_num) + '/class_map' + str( sequence_num) + '.npy' class_map_base = np.load(os.path.join(root, path), allow_pickle=True).item() self.base_idx = torch.tensor([ x for x in np.arange(0, num_classes) if x not in class_map_base.values() ]) self.novel_idx = torch.tensor(list(class_map_base.values())) self.params = [] extract_layers(model, split_layers, self.params) self.model = extract_backbone(model) self.base_classifier = torch.nn.Linear(feature_dim, len( self.base_idx)).to(self.device) self.base_classifier.weight = torch.nn.Parameter( self.params[0][self.base_idx]) self.base_classifier.bias = torch.nn.Parameter( self.params[1][self.base_idx]) self.base_classifier.requires_grad = False self.novel_classifier = torch.nn.Linear(feature_dim, 750).to(self.device)
def __init__(self, model, device, update_opts, offline_dataset): super().__init__(model, device, update_opts, offline_dataset) self.params = [] extract_layers(model, update_opts.num_layers, self.params) self.optimizer = torch.optim.SGD(self.params, self.update_opts.lr, momentum=self.update_opts.m, weight_decay=1e-4)
def __init__(self, ind=[1, 3, 6], device=None): super().__init__() if device is None: device = get_device() self.vgg = utils.extract_layers( models.vgg16( pretrained=True).eval().to(device).features)[:max(ind)] self.ind = ind
def split_encoder(encoder, blocks): #print(encoder) #print(blocks) enc_blocks = [] lays = utils.extract_layers(encoder) last = 0 for block in blocks: enc_blocks.append(nn.Sequential(*lays[last:block.ind + 1])) last = block.ind + 1 return enc_blocks
def reverse_net(encoder, input=None): """ Input example, not in batch """ lays = extract_layers(encoder) if input is not None: input = input.squeeze(0) input_shapes = [] output_shapes = [] for lay in lays: input_shapes.append(input.shape) input = lay(input) output_shapes.append(input.shape) blocks = group_by_blocks(lays, input_shapes, output_shapes) else: blocks = group_by_blocks(lays) return [el.revert() for el in blocks]
def train_net(input, encoder, blocks, verbose=False): encoder = split_encoder(encoder, blocks) device = get_device() dataset = utils.IOdataset(input, copy.deepcopy(input)) optimizer = None decoder = nn.Sequential() #torch.optim.SGD(reverted_lay.parameters(), lr=LAY_LR, momentum=LAY_MOMENTUM) iterator = range(len(blocks)) if verbose: iterator = tqdm.tqdm(iterator, desc="Blocks training") criterion = VGG16_MLoss() enc = None for i in iterator: if enc is None: enc = nn.Sequential(encoder[0]) enc.to(device) enc.eval() else: enc = nn.Sequential(*(utils.extract_layers(enc) + [encoder[i].to(device)])) enc.eval() print('ENCODER:') print(enc) dataset.output = utils.apply_net(enc, dataset.input, device, batch_size=LAY_BATCHSIZE) net = blocks[i] net.to(device) print('DEC_HEAD', net) print(utils.extract_layers(decoder)) decoder = nn.Sequential(*([net] + utils.extract_layers(decoder))) if i == 0: optimizer = torch.optim.SGD(decoder.parameters(), lr=NET_LR, momentum=NET_MOMENTUM) else: groups = [] for param_group in optimizer.param_groups: param_group['lr'] *= LR_DECAY if param_group['lr'] >= LR_THR: groups.append(param_group) optimizer.param_groups = groups optimizer.add_param_group({ 'params': net.parameters(), 'lr': NET_LR, 'momentum': NET_MOMENTUM }) decoder, loss, optimizer = train_lay(decoder, dataset, verbose, optimizer, device, criterion=criterion) decoder = nn.Sequential(*utils.extract_layers(decoder)) return decoder
def join_blocks(blocks): decoder_lays = [] for el in blocks: decoder_lays += extract_layers(el.lays) return nn.Sequential(*decoder_lays)