def learn_fn(train_data, val_data, model, save_path): optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=0.094, momentum=0.9) train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True) val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, shuffle=True) learner = SupervisedImageLearner( train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy(), optimizer=optimizer, mixup=True ) callbacks = [ ToDeviceCallback(), Tensorboard(), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.024, T_max=1600)), LossLogger(), ModelCheckpoint(learner=learner, filepath=save_path, monitor='accuracy', mode='max'), # EarlyStoppingCB(monitor='accuracy', mode='max', patience=20) ] metrics = { "accuracy": Accuracy(), "loss": Loss() } final = learner.learn( n_epoch=10, callbacks=callbacks, metrics=metrics, final_metric='accuracy' ) print(final)
def evaluate_fn(parameterization: Dict[str, Any], model: nn.Module, run: ExperimentRun) -> float: lr = parameterization["lr"] print("Evaluate at learning rate %f" % lr) # Set up train and validation data data = CIFAR10('data/', train=True, download=True, transform=ToTensor()) train_size = int(0.8 * len(data)) val_size = len(data) - train_size train_dataset, val_dataset = torch.utils.data.random_split( data, [train_size, val_size]) train_dataset.dataset.transform = Compose([ RandomHorizontalFlip(), RandomResizedCrop(size=32, scale=(0.95, 1.0)), ToTensor() ]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) print("Number of batches per epoch " + str(len(train_loader))) optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=lr, momentum=0.9) learner = SupervisedImageLearner(train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy().to( get_device()), optimizer=optimizer, mixup=True) metrics = {"accuracy": Accuracy(), "loss": Loss()} callbacks = [ ToDeviceCallback(), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.024, T_max=405)), LossLogger(), ModelDBCB(run=run, filepath="weights/model.pt", metrics=metrics, monitor='accuracy', mode='max') ] return learner.learn(n_epoch=20, callbacks=callbacks, metrics=metrics, final_metric='accuracy')
def run_classifier_test(): print("Starting classifier test") # progress_bar_test() torch.backends.cudnn.benchmark = True # data = CIFAR10('data/', train=True, download=True, transform=ToTensor()) # train_size = int(0.8 * len(data)) # val_size = len(data) - train_size # train_dataset, val_dataset = torch.utils.data.random_split(data, [train_size, val_size]) # train_dataset.dataset.transform = Compose( # [ # RandomHorizontalFlip(), # RandomResizedCrop(size=32, scale=(0.95, 1.0)), # # Cutout(length=16, n_holes=1), # ToTensor() # ] # ) # # test_dataset = torchvision.datasets.CIFAR10('data/', train=False, download=True, transform=ToTensor()) # kernel = partial(PolynomialKernel, dp=3, cp=2.0) train_val_dataset = ImageFolder( 'data/imagenette-160/train', transform=Compose([ Resize((128, 128)), ToTensor() ]) ) test_dataset = ImageFolder( 'data/imagenette-160/val', transform=Compose([ Resize((128, 128)), ToTensor() ]) ) train_size = int(0.8 * len(train_val_dataset)) val_size = len(train_val_dataset) - train_size train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size]) train_dataset.dataset.transform = Compose( [ RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), # Cutout(length=16, n_holes=1), ToTensor() ] ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False) class SEResNeXtShakeShake(ResNeXtBlock): def __init__(self, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU, normalization=nn.BatchNorm2d): super(SEResNeXtShakeShake, self).__init__( branches=nn.ModuleList( [ nn.Sequential( ConvolutionalLayer( in_channels, in_channels // 4, kernel_size=1, padding=0, activation=activation, normalization=normalization ), ConvolutionalLayer( in_channels // 4, in_channels, kernel_size=3, padding=1, activation=activation, normalization=normalization ), # ConvolutionalLayer( # in_channels // 4, in_channels, kernel_size=1, padding=0, # activation=activation, normalization=normalization # ), SEBlock(in_channels, reduction_ratio) ) for _ in range(cardinality) ] ), use_shake_shake=True ) class StandAloneMultiheadAttentionLayer(nn.Sequential): def __init__( self, num_heads, in_channels, out_channels, kernel_size, stride=1, padding=3, activation=nn.ReLU, normalization=nn.BatchNorm2d ): layers = [ StandAloneMultiheadAttention( num_heads=num_heads, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False ), activation(), normalization(num_features=out_channels), ] super(StandAloneMultiheadAttentionLayer, self).__init__(*layers) class SEResNeXtShakeShakeAttention(ResNeXtBlock): def __init__(self, num_heads, in_channels, reduction_ratio=16, cardinality=2, activation=nn.ReLU, normalization=nn.BatchNorm2d): super(SEResNeXtShakeShakeAttention, self).__init__( branches=nn.ModuleList( [ nn.Sequential( ConvolutionalLayer( in_channels=in_channels, out_channels=in_channels // 2, kernel_size=1, activation=activation, normalization=normalization ), StandAloneMultiheadAttentionLayer( num_heads=num_heads, in_channels=in_channels // 2, out_channels=in_channels // 2, kernel_size=3, activation=activation, normalization=normalization ), ConvolutionalLayer( in_channels=in_channels // 2, out_channels=in_channels, kernel_size=1, activation=activation, normalization=normalization ), SEBlock(in_channels, reduction_ratio) ) for _ in range(cardinality) ] ), use_shake_shake=True ) # layer_1 = ManifoldMixupModule(ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU)) # block_1 = ManifoldMixupModule(SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU)) model = Sequential( ConvolutionalLayer(in_channels=3, out_channels=16, kernel_size=3, activation=nn.ReLU), SEResNeXtShakeShake(in_channels=16, activation=nn.ReLU), # layer_1, # block_1, ConvolutionalLayer( in_channels=16, out_channels=32, activation=nn.ReLU, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=32), ConvolutionalLayer( in_channels=32, out_channels=64, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=64), ConvolutionalLayer( in_channels=64, out_channels=128, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=128), ConvolutionalLayer( in_channels=128, out_channels=256, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=256), ConvolutionalLayer( in_channels=256, out_channels=512, kernel_size=2, stride=2 ), SEResNeXtShakeShake(in_channels=512), # SEResNeXtShakeShakeAttention(num_heads=8, in_channels=512), FeedforwardBlock( in_channels=512, out_features=10, pool_output_size=2, hidden_layer_sizes=(256, 128) ) ).to(get_device()) # lsuv_init(module=model, input=get_first_batch(train_loader, callbacks = [ToDeviceCallback()])[0]) # print(count_trainable_parameters(model)) # 14437816 3075928 optimizer = SGD(model.parameters(), weight_decay=0.0001, lr=0.30, momentum=0.9) learner = SupervisedImageLearner( train_data=train_loader, val_data=val_loader, model=model, criterion=SmoothedCrossEntropy().to(get_device()), optimizer=optimizer, mixup=True ) # lr_finder = LRFinder( # model=model, # train_data=train_loader, # criterion=SmoothedCrossEntropy(), # optimizer=partial(SGD, lr=0.074, weight_decay=0.0001, momentum=0.9), # device=get_device() # ) # lr_finder.find_lr(warmup=100, callbacks=[ToDeviceCallback()]) swa = StochasticWeightAveraging(learner, average_after=5025, update_every=670) callbacks = [ # ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]), ToDeviceCallback(), InputProgressiveResizing(initial_size=80, max_size=160, upscale_every=10, upscale_factor=math.sqrt(2)), # MixedPrecisionV2(), Tensorboard(), NaNWarner(), # ReduceLROnPlateauCB(optimizer, monitor='accuracy', mode='max', patience=10), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.10, T_max=335)), swa, LossLogger(), ModelCheckpoint(learner=learner, filepath="weights/model.pt", monitor='accuracy', mode='max'), ProgressBarCB() ] metrics = { "accuracy": Accuracy(), "loss": Loss() } final = learner.learn( n_epoch=500, callbacks=callbacks, metrics=metrics, final_metric='accuracy' ) print(final) load_model(model=model, path="weights/model.pt") classifier = ImageClassifier(model, tta_transform=Compose([ ToPILImage(), RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), ToTensor() ])) print(classifier.evaluate(test_loader)) print("Test SWA:") model = swa.get_averaged_model() classifier = ImageClassifier(model, tta_transform=Compose([ ToPILImage(), RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), ToTensor() ])) print(classifier.evaluate(test_loader))
# ManifoldMixupCallback(learner=learner, modules=[layer_1, block_1]), ToDeviceCallback(), # MixedPrecisionV2(), # InputProgressiveResizing(initial_size=80, max_size=160, upscale_every=10, upscale_factor=math.sqrt(2)), Tensorboard(), # ReduceLROnPlateauCB(optimizer, monitor='accuracy', mode='max', patience=10), LRSchedulerCB(CosineAnnealingLR(optimizer, eta_min=0.024, T_max=405)), GradualLRWarmup(min_lr=0.024, max_lr=0.094, duration=810), LossLogger(), ModelCheckpoint(learner=learner, filepath="weights/model.pt", monitor='accuracy', mode='max'), ] metrics = {"accuracy": Accuracy(), "loss": Loss()} final = learner.learn(n_epoch=500, callbacks=callbacks, metrics=metrics, final_metric='accuracy') print(final) load_model(model=model, path="weights/model.pt") classifier = ImageClassifier(model, tta_transform=Compose([ ToPILImage(), RandomHorizontalFlip(), RandomResizedCrop(size=(128, 128), scale=(0.95, 1.0)), ToTensor()