def lr_find(self, lr_start=1e-6, lr_end=10): init_model_states = copy.deepcopy(self.model.state_dict()) children_num = len(list(self.model.children())) freeze_until(self.model, children_num - 2) t = (lr_end / lr_start)**(1.0 / (len(self.train_dl) - 1)) optimizer = get_optimizer(self.model, [lr_start], None) scheduler = StepLR(optimizer, step_size=1, gamma=t) records = [] for images, labels in tqdm(self.train_dl): self.model.train() scheduler.step() images = images.to(self.device) labels = labels.to(self.device) optimizer.zero_grad() with torch.set_grad_enabled(True): outputs = self.model(images) _, preds = torch.max(outputs, 1) loss = self.criterion(outputs, labels) loss.backward() optimizer.step() cur_lr = optimizer.param_groups[0]['lr'] cur_loss = loss.item() records.append((cur_lr, cur_loss)) self.model.load_state_dict(init_model_states) return records
def train_all_layers(self, lrs, param_places=[1, 2, 3], cycle_num=3, cycle_len=None, cycle_mult=None): use_sgdr = True if (cycle_len == None): use_sgdr = False cycle_len, cycle_mult = 1, 1 freeze_until(self.model, -1) optimizer = get_optimizer(self.model, lrs, param_places) cur_epoch = 0 for cycle in range(cycle_num): scheduler = None if (use_sgdr == True): scheduler = CosineAnnealingLR( optimizer, 0, total=self.train_iteration_per_epoch * cycle_len) for epoch in range(cycle_len): cur_epoch += 1 running_loss = 0.0 running_corrects = 0 for batch in tqdm(self.train_dl): cur_lr = get_lr(optimizer) self.lr_list.append(cur_lr) cur_loss, cur_corrects = self.train( batch, optimizer, scheduler) running_loss += cur_loss running_corrects += cur_corrects epoch_loss_train = running_loss / self.train_dset_size epoch_acc_train = -1 if (self.no_acc == False): epoch_acc_train = running_corrects / self.train_dset_size * 100.0 epoch_loss_val, epoch_acc_val = self.evaluate( self.val_dl, self.val_dset_size) print( 'Epoch : {}, Train Loss : {:.6f}, Train Acc : {:.6f}, Val Loss : {:.6f}, Val Acc : {:.6f}' .format(cur_epoch, epoch_loss_train, epoch_acc_train, epoch_loss_val, epoch_acc_val)) cycle_len *= cycle_mult return self.model
def lr_find(self, lr_start=1e-6, lr_multiplier=1.1, max_loss=3.0, print_value=True): init_model_states = copy.deepcopy(self.model.state_dict()) children_num = len(list(self.model.children())) freeze_until(self.model, children_num - 2) optimizer = optim.Adam(self.model.parameters(), lr_start) scheduler = StepLR(optimizer, step_size=1, gamma=lr_multiplier) records = [] lr_found = 0 while (1): for images, labels in self.train_dl: # train a single iteration self.model.train() scheduler.step() images = images.to(self.device) labels = labels.to(self.device) optimizer.zero_grad() with torch.set_grad_enabled(True): outputs = self.model(images) _, preds = torch.max(outputs, 1) loss = self.criterion(outputs, labels) loss.backward() optimizer.step() cur_lr = optimizer.param_groups[0]['lr'] cur_loss = loss.item() records.append((cur_lr, cur_loss)) if (print_value == True): print('Learning rate : {} / Loss : {}'.format( cur_lr, cur_loss)) if (cur_loss > max_loss): lr_found = 1 break if (lr_found == 1): break self.model.load_state_dict(init_model_states) return records
def train_all_layers_clr(self, lrs_max, lrs_min, epoch_num, cycle_num, div, param_places=[1, 2, 3]): freeze_until(self.model, -1) optimizer = get_optimizer(self.model, lrs_min, param_places) cur_epoch = 0 for cycle in range(cycle_num): scheduler = CyclicLR(optimizer, lrs_max, div, total=self.train_iteration_per_epoch * epoch_num) for epoch in range(epoch_num): cur_epoch += 1 running_loss = 0.0 running_corrects = 0 for batch in tqdm(self.train_dl): cur_lr = get_lr(optimizer) self.lr_list.append(cur_lr) cur_loss, cur_corrects = self.train( batch, optimizer, scheduler) running_loss += cur_loss running_corrects += cur_corrects epoch_loss_train = running_loss / self.train_dset_size epoch_acc_train = -1 if (self.no_acc == False): epoch_acc_train = running_corrects / self.train_dset_size * 100.0 epoch_loss_val, epoch_acc_val = self.evaluate( self.val_dl, self.val_dset_size) print( 'Epoch : {}, Train Loss : {:.6f}, Train Acc : {:.6f}, Val Loss : {:.6f}, Val Acc : {:.6f}' .format(cur_epoch, epoch_loss_train, epoch_acc_train, epoch_loss_val, epoch_acc_val)) return self.model
def create_vgg_model(style_layers=[0, 7, 14, 27, 40], content_layers=[30], style_weight=[1.0, 1.0, 1.0, 1.0, 1.0], content_weight=[1.0]): vgg = freeze_until( models.vgg19_bn(pretrained=True).to(self.device).features, 99999) style_losses = [] content_losses = [] calc_loss = False model = nn.Sequential( Normalize( torch.tensor([0.485, 0.456, 0.406]).to(self.device), torch.tensor([0.229, 0.224, 0.225]).to(self.device))) for i, layer in enumerate(vgg.children()): if (isinstance(layer, nn.ReLU)): model.add_module(str(i), nn.ReLU(inplace=False)) else: model.add_module(str(i), layer) if (i in style_layers): style_loss_layer = StyleLoss(model(s_img).detach()) model.add_module('Style-' + str(i), style_loss_layer) style_layers.remove(i) style_losses.append(style_loss_layer) if (i in content_layers): content_loss_layer = ContentLoss(model(c_img).detach()) model.add_module('Content-' + str(i), content_loss_layer) content_layers.remove(i) content_losses.append(content_loss_layer) if (len(style_layers) == 0 and len(content_layers) == 0): break return model
n_frames=n_frames, transform=transform, train=False) valset_loader = DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=2) # Define VGG model vgg16 = models.vgg16(pretrained=True) old_classifier = list(vgg16.classifier.children()) old_classifier = old_classifier[:3] old_classifier.append(nn.Linear(4096, 1)) vgg16.classifier = nn.Sequential(*old_classifier) freeze_until(vgg16, "features.17.weight") model = MyEnsemble(vgg16, n_frames=n_frames) model.cuda() # Train VGG model optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) finetune = FineTune(model, 'vgg16', epoch=epoch, batch_size=batch_size, optimizer=optimizer, filename=log_name, trainset_loader=trainset_loader, valset_loader=valset_loader, device=device) finetune.train()