def learn_one_iter(self, high_res: Tensor): low_res = torch.clamp( F.interpolate(high_res, scale_factor=self.scale_factor, mode=self.downsampling_mode), 0.0, 1.0) data = self._cb_handler.on_batch_begin( { 'high_res': high_res, 'low_res': low_res }, True) high_res, low_res = data['high_res'], data['low_res'] loss = self._cb_handler.after_losses( {"loss": self.compute_loss(low_res, high_res, True)}, True)["loss"] if self._cb_handler.on_backward_begin(): loss.backward() if self._cb_handler.after_backward(): self._optimizer.step() if self._cb_handler.after_step(): self._optimizer.zero_grad() if get_device().type == 'cuda': mem = torch.cuda.memory_allocated(get_device()) self._cb_handler.on_batch_end({ "loss": loss.cpu(), "allocated_memory": mem }) else: self._cb_handler.on_batch_end({"loss": loss})
def learn_one_iter(self, high_res: Tensor): pyramid = [ torch.clamp( F.interpolate(high_res, scale_factor=scale, mode=self.downsampling_mode), 0.0, 1.0) for scale in self.scale_factors if scale != 1.0 ] pyramid += [high_res] data = self._cb_handler.on_batch_begin( {'pyramid_' + str(i): pyramid[i] for i in range(len(pyramid))}, True) pyramid = [data["pyramid_" + str(i)] for i in range(len(pyramid))] loss = self._cb_handler.after_losses( {"loss": self.compute_loss(pyramid, True)}, True)["loss"] if self._cb_handler.on_backward_begin(): loss.backward() if self._cb_handler.after_backward(): self._optimizer.step() if self._cb_handler.after_step(): self._optimizer.zero_grad() if get_device().type == 'cuda': mem = torch.cuda.memory_allocated(get_device()) self._cb_handler.on_batch_end({ "loss": loss.cpu(), "allocated_memory": mem }) else: self._cb_handler.on_batch_end({"loss": loss})
def __init__( self, train_data: DataLoader, val_data: DataLoader, model: Module, criterion: Module, optimizer: Optimizer, device=get_device(), mixup: bool = False, mixup_alpha: float = 0.4 ): super().__init__(train_data, val_data, model, criterion, optimizer) self._device = device self._mixup = mixup if mixup: self._mixup_transformer = MixupTransformer(alpha=mixup_alpha)
def evaluate_fn(parameterization: Dict[str, Any], model: nn.Module, run: ExperimentRun) -> float: lr = parameterization["lr"] print("Evaluate at learning rate %f" % lr) # Set up train and validation data data = CIFAR10('data/', train=True, download=True, transform=ToTensor()) train_size = int(0.8 * len(data)) val_size = len(data) - train_size train_dataset, val_dataset = torch.utils.data.random_split( data, [train_size, val_size]) train_dataset.dataset.transform = Compose([ RandomHorizontalFlip(), RandomResizedCrop(size=32, scale=(0.95, 1.0)), ToTensor() ]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) print("Number of batches per epoch " + str(len(train_loader))) optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=lr, momentum=0.9) learner = SupervisedImageLearner(train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy().to( get_device()), optimizer=optimizer, mixup=True) metrics = {"accuracy": Accuracy(), "loss": Loss()} callbacks = [ ToDeviceCallback(), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.024, T_max=405)), LossLogger(), ModelDBCB(run=run, filepath="weights/model.pt", metrics=metrics, monitor='accuracy', mode='max') ] return learner.learn(n_epoch=20, callbacks=callbacks, metrics=metrics, final_metric='accuracy')
def __init__(self, train_iterator: Iterator, val_iterator: Iterator, model: nn.Module, criterion: nn.Module, optimizer: Optimizer, device=get_device()): self._train_data = self._train_iterator = train_iterator self._val_iterator = val_iterator self._model = model.to(device) self._optimizer = optimizer self._criterion = criterion.to(device) self._device = device
def model_fn(parameterization: Dict[str, Any]) -> nn.Module: model = Sequential( ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU), ResidualBlockPreActivation(in_channels=16, activation=nn.ReLU), ConvolutionalLayer(in_channels=16, out_channels=32, kernel_size=3, activation=nn.ReLU), ResidualBlockPreActivation(in_channels=32, activation=nn.ReLU), FeedforwardBlock(in_channels=32, out_features=10, pool_output_size=2, hidden_layer_sizes=(64, 32))).to(get_device()) return model
def run_classifier_test(): print("Starting classifier test") # progress_bar_test() torch.backends.cudnn.benchmark = True # data = CIFAR10('data/', train=True, download=True, transform=ToTensor()) # train_size = int(0.8 * len(data)) # val_size = len(data) - train_size # train_dataset, val_dataset = torch.utils.data.random_split(data, [train_size, val_size]) # train_dataset.dataset.transform = Compose( # [ # RandomHorizontalFlip(), # RandomResizedCrop(size=32, scale=(0.95, 1.0)), # # Cutout(length=16, n_holes=1), # ToTensor() # ] # ) # # test_dataset = torchvision.datasets.CIFAR10('data/', train=False, download=True, transform=ToTensor()) # kernel = partial(PolynomialKernel, dp=3, cp=2.0) train_val_dataset = ImageFolder( 'data/imagenette-160/train', transform=Compose([ Resize((128, 128)), ToTensor() ]) ) test_dataset = ImageFolder( 'data/imagenette-160/val', transform=Compose([ Resize((128, 128)), ToTensor() ]) ) train_size = int(0.8 * len(train_val_dataset)) val_size = len(train_val_dataset) - train_size train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size]) train_dataset.dataset.transform = Compose( [ RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), # Cutout(length=16, n_holes=1), ToTensor() ] ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False) class SEResNeXtShakeShake(ResNeXtBlock): def __init__(self, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU, normalization=nn.BatchNorm2d): super(SEResNeXtShakeShake, self).__init__( branches=nn.ModuleList( [ nn.Sequential( ConvolutionalLayer( in_channels, in_channels // 4, kernel_size=1, padding=0, activation=activation, normalization=normalization ), ConvolutionalLayer( in_channels // 4, in_channels, kernel_size=3, padding=1, activation=activation, normalization=normalization ), # ConvolutionalLayer( # in_channels // 4, in_channels, kernel_size=1, padding=0, # activation=activation, normalization=normalization # ), SEBlock(in_channels, reduction_ratio) ) for _ in range(cardinality) ] ), use_shake_shake=True ) class StandAloneMultiheadAttentionLayer(nn.Sequential): def __init__( self, num_heads, in_channels, out_channels, kernel_size, stride=1, padding=3, activation=nn.ReLU, normalization=nn.BatchNorm2d ): layers = [ StandAloneMultiheadAttention( num_heads=num_heads, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False ), activation(), normalization(num_features=out_channels), ] super(StandAloneMultiheadAttentionLayer, self).__init__(*layers) class SEResNeXtShakeShakeAttention(ResNeXtBlock): def __init__(self, num_heads, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU, normalization=nn.BatchNorm2d): super(SEResNeXtShakeShakeAttention, self).__init__( branches=nn.ModuleList( [ nn.Sequential( ConvolutionalLayer( in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, activation=activation, normalization=normalization ), StandAloneMultiheadAttentionLayer( num_heads=num_heads, in_channels=in_channels // 2, out_channels=in_channels // 2, kernel_size=3, activation=activation, normalization=normalization ), ConvolutionalLayer( in_channels=in_channels // 2, out_channels=in_channels, kernel_size=1, activation=activation, normalization=normalization ), SEBlock(in_channels, reduction_ratio) ) for _ in range(cardinality) ] ), use_shake_shake=True ) # layer_1 = ManifoldMixupModule(ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU)) # block_1 = ManifoldMixupModule(SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU)) model = Sequential( ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU), SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU), # layer_1, # block_1, ConvolutionalLayer( in_channels=16, out_channels=32, activation=nn.ReLU, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=32), ConvolutionalLayer( in_channels=32, out_channels=64, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=64), ConvolutionalLayer( in_channels=64, out_channels=128, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=128), ConvolutionalLayer( in_channels=128, out_channels=256, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=256), ConvolutionalLayer( in_channels=256, out_channels=512, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=512), # SEResNeXtShakeShakeAttention(num_heads=8, in_channels=512), FeedforwardBlock( in_channels=512, out_features=10, pool_output_size=2, hidden_layer_sizes=(256, 128) ) ).to(get_device()) # lsuv_init(module=model, input=get_first_batch(train_loader, callbacks = [ToDeviceCallback()])[0]) # print(count_trainable_parameters(model)) # 14437816 3075928 optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=0.30, momentum=0.9) learner = SupervisedImageLearner( train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy().to(get_device()), optimizer=optimizer, mixup=True ) # lr_finder = LRFinder( # model=model, # train_data=train_loader, # criterion=SmoothedCrossEntropy(), # optimizer=partial(SGD, lr=0.074, weight_decay=0.0001, momentum=0.9), # device=get_device() # ) # lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()]) swa = StochasticWeightAveraging(learner, average_after=5025, update_every=670) callbacks = [ # ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]), ToDeviceCallback(), InputProgressiveResizing(initial_size=80, max_size=160, upscale_every=10, upscale_factor=math.sqrt(2)), # MixedPrecisionV2(), Tensorboard(), NaNWarner(), # ReduceLROnPlateauCB(optimizer, monitor='accuracy', mode='max', patience=10), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.10, T_max=335)), swa, LossLogger(), ModelCheckpoint(learner=learner, filepath="weights/model.pt", monitor='accuracy', mode='max'), ProgressBarCB() ] metrics = { "accuracy": Accuracy(), "loss": Loss() } final = learner.learn( n_epoch=500, callbacks=callbacks, metrics=metrics, final_metric='accuracy' ) print(final) load_model(model=model, path="weights/model.pt") classifier = ImageClassifier(model, tta_transform=Compose([ ToPILImage(), RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), ToTensor() ])) print(classifier.evaluate(test_loader)) print("Test SWA:") model = swa.get_averaged_model() classifier = ImageClassifier(model, tta_transform=Compose([ ToPILImage(), RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), ToTensor() ])) print(classifier.evaluate(test_loader))
SEResNeXtShakeShake(in_channels=128), ConvolutionalLayer(in_channels=128, out_channels=256, kernel_size=2, stride=2), SEResNeXtShakeShake(in_channels=256), ConvolutionalLayer(in_channels=256, out_channels=512, kernel_size=2, stride=2), SEResNeXtShakeShake(in_channels=512), # SEResNeXtShakeShakeAttention(num_heads=8, in_channels=512), FeedforwardBlock(in_channels=512, out_features=10, pool_output_size=2, hidden_layer_sizes=(256, 128))).to(get_device()) # lr_finder = LRFinder( # model=model, # train_data=train_loader, # criterion=SmoothedCrossEntropy(), # optimizer=partial(LAMB, lr=0.074, weight_decay=0.01), # device=get_device() # ) # lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()]) # lsuv_init(module=model, input=get_first_batch(train_loader, callbacks = [ToDeviceCallback()])[0]) # print(count_trainable_parameters(model)) # 14437816 3075928 # optimizer = LARS(model.parameters(), weight_decay=0.0001, lr=0.10, momentum=0.9)
def run_test(encoder=None, style_weight=1e5, content_weight=1.0, total_variation_weight=1e-4, n_epoch=100, print_every=100, eval_every=1, batch_size=4, style_layers={0, 7, 14, 27, 40}, content_layers={30}, train_ratio=0.95, img_dim=(128, 128), style_path="mouse.png", save_path="weights/model.pt"): mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] images = UnlabelledImageDataset("MiniCOCO/128/", img_dim=img_dim) train_size = int(train_ratio * len(images)) val_size = len(images) - train_size train_dataset, val_dataset = torch.utils.data.random_split( images, [train_size, val_size]) style = pil_to_tensor(Image.open(style_path).convert("RGB")) dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) dataloader_val = DataLoader(val_dataset, shuffle=True, batch_size=batch_size) feature_extractor = FeatureExtractor(model=vgg19_bn, fine_tune=False, mean=mean, std=std, device=get_device()) if encoder is None: encoder = Sequential( ConvolutionalLayer(3, 16, padding=1, stride=2, normalization=InstanceNorm2d), SEResidualBlockPreActivation(16, normalization=InstanceNorm2d), ConvolutionalLayer(16, 32, padding=1, stride=2, normalization=InstanceNorm2d), SEResidualBlockPreActivation(32, normalization=InstanceNorm2d), ConvolutionalLayer(32, 64, padding=1, stride=2, normalization=InstanceNorm2d), SEResidualBlockPreActivation(64, normalization=InstanceNorm2d), ConvolutionalLayer(64, 128, padding=1, stride=2, normalization=InstanceNorm2d), SEResidualBlockPreActivation(128, normalization=InstanceNorm2d), ConvolutionalLayer(128, 256, padding=1, stride=2, normalization=InstanceNorm2d), ) model = CustomDynamicUnet(encoder=encoder, normalization=InstanceNorm2d, n_classes=3, y_range=(0, 1), blur=True) print(model) learner = StyleTransferLearner( dataloader, dataloader_val, style, model, feature_extractor, style_layers=style_layers, content_layers=content_layers, style_weight=style_weight, content_weight=content_weight, total_variation_weight=total_variation_weight, device=get_device()) learner.learn(n_epoch=n_epoch, print_every=print_every, eval_every=eval_every, draw=True, save_path=save_path)
def run_test_multiple(style_weight=10.0, content_weight=1.0, total_variation_weight=0.1, n_epoch=100, batch_size=8, style_path="./data/train_9/"): from nntoolbox.vision.learner import MultipleStylesTransferLearner from nntoolbox.vision.utils import UnlabelledImageDataset, PairedDataset, UnlabelledImageListDataset from nntoolbox.utils import get_device from nntoolbox.callbacks import Tensorboard, MultipleMetricLogger,\ ModelCheckpoint, ToDeviceCallback, ProgressBarCB, MixedPrecisionV2, LRSchedulerCB # from nntoolbox.optim.lr_scheduler import FunctionalLR from torch.optim.lr_scheduler import LambdaLR from src.models import GenericDecoder, MultipleStyleTransferNetwork, \ PixelShuffleDecoder, PixelShuffleDecoderV2, MultipleStyleUNet, SimpleDecoder from torchvision.models import vgg19 from torch.utils.data import DataLoader from torchvision.transforms import Compose, Resize, RandomCrop from torch.optim import Adam mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] print("Begin creating dataset") style_paths_train = ["./data/train_" + str(i) + "/" for i in range(1, 8)] style_paths_val = ["./data/train_8/", "./data/train_9/"] content_images = UnlabelledImageListDataset( "data/train2014/", transform=Compose([Resize(512), RandomCrop((256, 256))])) train_style = UnlabelledImageListDataset( style_paths_train, transform=Compose([Resize(512), RandomCrop((256, 256))])) val_style = UnlabelledImageListDataset( style_paths_val, transform=Compose([Resize(512), RandomCrop((256, 256))])) # img_dim = (128, 128) # # content_images = UnlabelledImageDataset("MiniCOCO/128/", img_dim=img_dim) # # style_images = UnlabelledImageDataset(style_path, img_dim=img_dim) # # # content_images = UnlabelledImageListDataset("data/", img_dim=img_dim) # style_images = UnlabelledImageListDataset("data/train_9/", img_dim=img_dim) print("Begin splitting data") train_size = int(0.80 * len(content_images)) val_size = len(content_images) - train_size train_content, val_content = torch.utils.data.random_split( content_images, [train_size, val_size]) train_dataset = PairedDataset(train_content, train_style) val_dataset = PairedDataset(val_content, val_style) # train_sampler = BatchSampler(RandomSampler(train_dataset), batch_size=8, drop_last=True) train_sampler = RandomSampler(train_dataset, replacement=True, num_samples=8) val_sampler = RandomSampler(val_dataset, replacement=True, num_samples=8) print("Begin creating data dataloaders") dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=8) dataloader_val = DataLoader(val_dataset, sampler=val_sampler, batch_size=8) # print(len(dataloader)) print("Creating models") feature_extractor = FeatureExtractor(model=vgg19(True), fine_tune=False, mean=mean, std=std, device=get_device(), last_layer=20) print("Finish creating feature extractor") decoder = PixelShuffleDecoderV2() # decoder = SimpleDecoder() print("Finish creating decoder") model = MultipleStyleTransferNetwork(encoder=FeatureExtractor( model=vgg19(True), fine_tune=False, mean=mean, std=std, device=get_device(), last_layer=20), decoder=decoder, extracted_feature=20) # model = MultipleStyleUNet( # encoder=FeatureExtractorSequential( # model=vgg19(True), fine_tune=False, # mean=mean, std=std, last_layer=20 # ), # extracted_feature=20 # ) # optimizer = Adam(model.parameters()) optimizer = Adam(model.parameters(), lr=1e-4) lr_scheduler = LRSchedulerCB(scheduler=LambdaLR(optimizer, lr_lambda=lambda iter: 1 / (1.0 + 5e-5 * iter)), timescale='iter') learner = MultipleStylesTransferLearner( dataloader, dataloader_val, model, feature_extractor, optimizer=optimizer, style_layers={1, 6, 11, 20}, total_variation_weight=total_variation_weight, style_weight=style_weight, content_weight=content_weight, device=get_device()) every_iter = eval_every = print_every = compute_num_batch( len(train_style), batch_size) # every_iter = eval_every = print_every = compute_num_batch(len(val_style), batch_size) n_iter = every_iter * n_epoch callbacks = [ ToDeviceCallback(), # MixedPrecisionV2(), Tensorboard(every_iter=every_iter, every_epoch=1), MultipleMetricLogger(iter_metrics=[ "content_loss", "style_loss", "total_variation_loss", "loss" ], print_every=print_every), lr_scheduler, ModelCheckpoint(learner=learner, save_best_only=False, filepath='weights/model.pt'), # ProgressBarCB(range(print_every)) ] learner.learn(n_iter=n_iter, callbacks=callbacks, eval_every=eval_every)
TEXT = data.Field(tokenize='spacy', include_lengths=True, fix_length=500) LABEL = data.LabelField(dtype=torch.float) # train_data, val_data, test_data = SST.splits( # text_field=TEXT, # label_field=LABEL # ) train_val_data, test_data = IMDB.splits(TEXT, LABEL) train_data, val_data = train_val_data.split(split_ratio=0.8) train_iterator, val_iterator, test_iterator = data.BucketIterator.splits( (train_data, val_data, test_data), batch_size=BATCH_SIZE, sort_within_batch=True, device=get_device() ) TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE, vectors="glove.6B.100d") LABEL.build_vocab(train_data) # max_length = 0 # for batch in train_iterator: # texts, text_lengths = batch.text # if len(texts) > max_length: # max_length = len(texts) # # print(max_length) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100
shuffle=False) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False) # print(count_trainable_parameters(model)) # 14437816 3075928 optimizer = SGD(get_trainable_parameters(model), weight_decay=0.0001, lr=0.30, momentum=0.9) learner = SupervisedImageLearner(train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy().to( get_device()), optimizer=optimizer, mixup=True) # lr_finder = LRFinder( # model=model, # train_data=train_loader, # criterion=SmoothedCrossEntropy(), # optimizer=partial(SGD, lr=0.074, weight_decay=0.0001, momentum=0.9), # device=get_device() # ) # lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()]) swa = StochasticWeightAveraging(learner, average_after=5025, update_every=670) callbacks = [ # ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]),