Пример #1
0
from torch.optim import Adam
from torch.utils.data import DataLoader
import argparse
import torch

from few_shot.datasets import OmniglotDataset, MiniImageNet
from few_shot.models import get_few_shot_encoder
from few_shot.core import NShotTaskSampler, EvaluateFewShot, prepare_nshot_task
from few_shot.proto import proto_net_episode
from few_shot.train import fit
from few_shot.callbacks import *
from few_shot.utils import setup_dirs
from config import PATH


setup_dirs()
assert torch.cuda.is_available()
device = torch.device('cuda')
torch.backends.cudnn.benchmark = True


##############
# Parameters #
##############
parser = argparse.ArgumentParser()
parser.add_argument('--dataset')
parser.add_argument('--distance', default='l2')
parser.add_argument('--n-train', default=1, type=int)
parser.add_argument('--n-test', default=1, type=int)
parser.add_argument('--k-train', default=60, type=int)
parser.add_argument('--k-test', default=5, type=int)
    def test(self):

        img_filename = os.path.join( DATA_PATH ,'Omniglot',
                                     'images_background',
                                     'Latin.0',
                                     'character16',
                                     '0698_01.137602.png')
        img = io.imread(img_filename)
        img = img[np.newaxis, np.newaxis, :, :]
        img = (img - img.min()) / (img.max() - img.min())
        img = torch.from_numpy(img)
        print(img.size())
        n = 5
        k = 300
        setup_dirs()
        assert torch.cuda.is_available()

        device = torch.device('cpu')
        torch.backends.cudnn.benchmark = True

        model = FewShotClassifier(1, k).to(device, dtype=torch.double)
        model.load_state_dict(torch.load(os.path.join("models", "semantic_classifier",
                                                      "test_k=300_few_shot_classifier.pth")))

        conv_out = model(img)

        deconv_model = FewShotDeconv(model).to(device, dtype=torch.double)

        conv_layer_indices = model.get_conv_layer_indices()

        plt.ion()  # remove blocking
        plt.figure(figsize=(10, 5))

        done = False
        i=0
        while not done:
            layer = input('Layer to view (0-12, -1 to exit): ')
            try:
                layer = int(layer)
            except ValueError:
                continue

            if layer < 0:
                sys.exit(0)
            activ_map = model.feature_outputs[layer].data.numpy()
            activ_map = activ_map.transpose(1, 2, 3, 0)
            activ_map_grid = vis_grid(activ_map)
            vis_layer(activ_map_grid)

            # only transpose convolve from Conv2d or ReLU layers
            conv_layer = layer
            if conv_layer not in conv_layer_indices:
                conv_layer -= 1
                if conv_layer not in conv_layer_indices:
                    continue

            n_maps = activ_map.shape[0]

            marker = None
            while True:
                choose_map = True# input('Select map?  (y/[n]): ') == 'y'
                if marker != None:
                    marker.pop(0).remove()

                if not choose_map:
                    break

                _, map_x_dim, map_y_dim, _ = activ_map.shape
                map_img_x_dim, map_img_y_dim, _ = activ_map_grid.shape
                x_step = map_img_x_dim // (map_x_dim + 1)

                print('Click on an activation map to continue')
                x_pos, y_pos = plt.ginput(1)[0]
                x_index = x_pos // (map_x_dim + 1)
                y_index = y_pos // (map_y_dim + 1)
                map_idx = int(x_step * y_index + x_index)

                if map_idx >= n_maps:
                    print('Invalid map selected')
                    continue

                decon = deconv_model(model.feature_outputs[layer][0][map_idx][None, None, :, :], conv_layer, map_idx,
                                model.pool_indices)
                img = decon_img(decon)
                img = img.reshape((28,28))
                print(img.shape)
                plt.subplot(121)
                vis_layer(activ_map_grid)
                marker = plt.plot(x_pos, y_pos, marker='+', color='red')
                plt.subplot(122)
                plt.imshow(img)
                # plt.savefig('deconvnet' + str(x_pos) + '_' + str(y_pos) + '_hiragana13_layer=' + str(layer)+ '.png')
                i += 1
Пример #3
0
def few_shot_training(datadir=DATA_PATH,
                      dataset='fashion',
                      num_input_channels=3,
                      drop_lr_every=20,
                      validation_episodes=200,
                      evaluation_episodes=1000,
                      episodes_per_epoch=100,
                      n_epochs=80,
                      small_dataset=False,
                      n_train=1,
                      n_test=1,
                      k_train=30,
                      k_test=5,
                      q_train=5,
                      q_test=1,
                      distance='l2',
                      pretrained=False,
                      monitor_validation=False,
                      n_val_classes=10,
                      architecture='resnet18',
                      gpu=None):
    setup_dirs()

    if dataset == 'fashion':
        dataset_class = FashionProductImagesSmall if small_dataset \
            else FashionProductImages
    else:
        raise (ValueError, 'Unsupported dataset')

    param_str = f'{dataset}_nt={n_train}_kt={k_train}_qt={q_train}_' \
                f'nv={n_test}_kv={k_test}_qv={q_test}_small={small_dataset}_' \
                f'pretrained={pretrained}_validate={monitor_validation}'

    print(param_str)

    ###################
    # Create datasets #
    ###################

    # ADAPTED: data transforms including augmentation
    resize = (80, 60) if small_dataset else (400, 300)

    background_transform = transforms.Compose([
        transforms.RandomResizedCrop(resize, scale=(0.8, 1.0)),
        # transforms.RandomGrayscale(),
        transforms.RandomPerspective(),
        transforms.RandomHorizontalFlip(),
        # transforms.Resize(resize),
        transforms.ToTensor(),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                     std=[0.229, 0.224, 0.225])
    ])

    evaluation_transform = transforms.Compose([
        transforms.Resize(resize),
        # transforms.CenterCrop(224),
        transforms.ToTensor(),
        # transforms.Normalize(mean=[0.485, 0.456, 0.406],
        #                     std=[0.229, 0.224, 0.225])
    ])

    if monitor_validation:
        if not n_val_classes >= k_test:
            n_val_classes = k_test
            print("Warning: `n_val_classes` < `k_test`. Take a larger number"
                  " of validation classes next time. Increased to `k_test`"
                  " classes")

        # class structure for background (training), validation (validation),
        # evaluation (test): take a random subset of background classes
        validation_classes = list(
            np.random.choice(dataset_class.background_classes, n_val_classes))
        background_classes = list(
            set(dataset_class.background_classes).difference(
                set(validation_classes)))

        # use keyword for evaluation classes
        evaluation_classes = 'evaluation'

        # Meta-validation set
        validation = dataset_class(datadir,
                                   split='all',
                                   classes=validation_classes,
                                   transform=evaluation_transform)
        # ADAPTED: in the original code, `episodes_per_epoch` was provided to
        # `NShotTaskSampler` instead of `validation_episodes`.
        validation_sampler = NShotTaskSampler(validation, validation_episodes,
                                              n_test, k_test, q_test)
        validation_taskloader = DataLoader(validation,
                                           batch_sampler=validation_sampler,
                                           num_workers=4)
    else:
        # use keyword for both background and evaluation classes
        background_classes = 'background'
        evaluation_classes = 'evaluation'

    # Meta-training set
    background = dataset_class(datadir,
                               split='all',
                               classes=background_classes,
                               transform=background_transform)
    background_sampler = NShotTaskSampler(background, episodes_per_epoch,
                                          n_train, k_train, q_train)
    background_taskloader = DataLoader(background,
                                       batch_sampler=background_sampler,
                                       num_workers=4)

    # Meta-test set
    evaluation = dataset_class(datadir,
                               split='all',
                               classes=evaluation_classes,
                               transform=evaluation_transform)
    # ADAPTED: in the original code, `episodes_per_epoch` was provided to
    # `NShotTaskSampler` instead of `evaluation_episodes`.
    evaluation_sampler = NShotTaskSampler(evaluation, evaluation_episodes,
                                          n_test, k_test, q_test)
    evaluation_taskloader = DataLoader(evaluation,
                                       batch_sampler=evaluation_sampler,
                                       num_workers=4)

    #########
    # Model #
    #########

    if torch.cuda.is_available():
        if gpu is not None:
            device = torch.device('cuda', gpu)
        else:
            device = torch.device('cuda')
        torch.backends.cudnn.benchmark = True
    else:
        device = torch.device('cpu')

    if not pretrained:
        model = get_few_shot_encoder(num_input_channels)
        # ADAPTED
        model.to(device)
        # BEFORE
        # model.to(device, dtype=torch.double)
    else:
        assert torch.cuda.is_available()
        model = models.__dict__[architecture](pretrained=True)
        model.fc = Identity()
        if gpu is not None:
            model = model.cuda(gpu)
        else:
            model = model.cuda()
        # TODO this is too risky: I'm not sure that this can work, since in
        #  the few-shot github repo the batch axis is actually split into
        #  support and query samples
        # model = torch.nn.DataParallel(model).cuda()

    def lr_schedule(epoch, lr):
        # Drop lr every 2000 episodes
        if epoch % drop_lr_every == 0:
            return lr / 2
        else:
            return lr

    ############
    # Training #
    ############
    print(f'Training Prototypical network on {dataset}...')
    optimiser = Adam(model.parameters(), lr=1e-3)
    loss_fn = torch.nn.NLLLoss().to(device)

    callbacks = [
        # ADAPTED: this is the test monitoring now - and is only done at the
        # end of training.
        EvaluateFewShot(
            eval_fn=proto_net_episode,
            num_tasks=evaluation_episodes,  # THIS IS NOT USED
            n_shot=n_test,
            k_way=k_test,
            q_queries=q_test,
            taskloader=evaluation_taskloader,
            prepare_batch=prepare_nshot_task(n_test,
                                             k_test,
                                             q_test,
                                             device=device),
            distance=distance,
            on_epoch_end=False,
            on_train_end=True,
            prefix='test_')
    ]
    if monitor_validation:
        callbacks.append(
            # ADAPTED: this is the validation monitoring now - computed
            # after every epoch.
            EvaluateFewShot(
                eval_fn=proto_net_episode,
                num_tasks=evaluation_episodes,  # THIS IS NOT USED
                n_shot=n_test,
                k_way=k_test,
                q_queries=q_test,
                # BEFORE taskloader=evaluation_taskloader,
                taskloader=validation_taskloader,  # ADAPTED
                prepare_batch=prepare_nshot_task(n_test,
                                                 k_test,
                                                 q_test,
                                                 device=device),
                distance=distance,
                on_epoch_end=True,  # ADAPTED
                on_train_end=False,  # ADAPTED
                prefix='val_'))
    callbacks.extend([
        ModelCheckpoint(
            filepath=PATH + f'/models/proto_nets/{param_str}.pth',
            monitor=f'val_{n_test}-shot_{k_test}-way_acc',
            verbose=1,  # ADAPTED
            save_best_only=monitor_validation  # ADAPTED
        ),
        LearningRateScheduler(schedule=lr_schedule),
        CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'),
    ])

    fit(
        model,
        optimiser,
        loss_fn,
        epochs=n_epochs,
        dataloader=background_taskloader,
        prepare_batch=prepare_nshot_task(n_train,
                                         k_train,
                                         q_train,
                                         device=device),
        callbacks=callbacks,
        metrics=['categorical_accuracy'],
        fit_function=proto_net_episode,
        fit_function_kwargs={
            'n_shot': n_train,
            'k_way': k_train,
            'q_queries': q_train,
            'train': True,
            'distance': distance
        },
    )
    def test(self):
        k = 200
        n = 5
        epochs = 20
        size_binary_layer = 10
        stochastic = True
        n_conv_layers = 4
        lr = 0.01

        model_name = 'Omniglot__n=5_k=20_epochs=1000__lr=__size_binary_layer=10__size_continue_layer=10__stochastic__simplified_encoder'
        validation_split = .2

        setup_dirs()
        assert torch.cuda.is_available()

        device = torch.device('cuda')
        torch.backends.cudnn.benchmark = True

        # model = SemanticBinaryClassifier(1, k, size_binary_layer=size_binary_layer, stochastic=stochastic,
        #                                  size_dense_layer_before_binary=None,
        #                                  n_conv_layers=n_conv_layers)
        #model = FewShotClassifier(1, k)
        model = SemanticBinaryEncoder(1, 10, 10, stochastic=True)
        model.load_state_dict(torch.load(os.path.join("models", "semantic_gan",
                                                     model_name+".pth")))

        evaluation = OmniglotDataset('evaluation')

        classes = np.random.choice(evaluation.df['class_id'].unique(), size=k)
        for i in classes:
            evaluation.df[evaluation.df['class_id'] == i] = evaluation.df[evaluation.df['class_id'] == i].sample(frac=1)

        train_dataloader = DataLoader(
            evaluation,
            batch_sampler=BasicSampler(evaluation, validation_split, True, classes, n=n),
            num_workers=8
        )

        eval_dataloader = DataLoader(
            evaluation,
            batch_sampler=BasicSampler(evaluation, validation_split, False, classes, n=n),
            num_workers=8
        )

        test_model = TestSemanticBinaryClassifier(k, model, size_binary_layer=size_binary_layer).to(device, dtype=torch.double)
        loss_fn = nn.CrossEntropyLoss().to(device)
        optimiser = torch.optim.Adam(test_model.parameters(), lr=lr)

        def prepare_batch(n, k):
            def prepare_batch_(batch):
                x, y = batch
                x = x.double().cuda()
                # Create dummy 0-(num_classes - 1) label
                y = create_nshot_task_label(k, n).cuda()
                return x, y
            return prepare_batch_

        evalmetrics = EvaluateMetrics(eval_dataloader)
        evalmetrics.set_params({'metrics': ['categorical_accuracy'],
                            'prepare_batch': prepare_batch(n, k),
                            'loss_fn': loss_fn})

        callbacks = [
            evalmetrics,

            ModelCheckpoint(
                filepath=os.path.join(PATH, 'models', 'semantic_classifier', model_name + 'test_other_class.pth'),
                monitor='val_' + str(n) + '-shot_' + str(k) + '-way_acc'
            ),
            ReduceLROnPlateau(patience=10, factor=0.5, monitor='val_loss'),
            CSVLogger(os.path.join(PATH, 'logs', 'semantic_classifier', model_name + 'test_other_class.csv'))
        ]

        #print(summary(model, (1, 28, 28)))
        for param in model.parameters():
            param.requires_grad = False
        fit(
            test_model,
            optimiser,
            loss_fn,
            epochs=100,
            dataloader=train_dataloader,
            prepare_batch=prepare_batch(n, k),
            callbacks=callbacks,
            metrics=['categorical_accuracy'],
            fit_function=gradient_step,
            fit_function_kwargs={'n_shot': n, 'k_way': k, 'device': device},
        )
def train_sweep():

    from torch.optim import Adam
    from torch.utils.data import DataLoader
    import argparse

    from few_shot.datasets import OmniglotDataset, MiniImageNet, ClinicDataset, SNIPSDataset, CustomDataset
    from few_shot.models import XLNetForEmbedding
    from few_shot.core import NShotTaskSampler, EvaluateFewShot, prepare_nshot_task
    from few_shot.proto import proto_net_episode
    from few_shot.train_with_prints import fit
    from few_shot.callbacks import CallbackList, Callback, DefaultCallback, ProgressBarLogger, CSVLogger, EvaluateMetrics, ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
    from few_shot.utils import setup_dirs
    from few_shot.utils import get_gpu_info
    from config import PATH
    import wandb
    from transformers import AdamW

    import torch

    gpu_dict = get_gpu_info()
    print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format(
        gpu_dict['mem_total'], gpu_dict['mem_used'],
        gpu_dict['mem_used_percent']))

    setup_dirs()
    assert torch.cuda.is_available()
    device = torch.device('cuda')
    torch.backends.cudnn.benchmark = True

    ##############
    # Parameters #
    ##############
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', default='Custom')
    parser.add_argument('--distance', default='l2')
    parser.add_argument('--n-train', default=2, type=int)
    parser.add_argument('--n-test', default=2, type=int)
    parser.add_argument('--k-train', default=2, type=int)
    parser.add_argument('--k-test', default=2, type=int)
    parser.add_argument('--q-train', default=2, type=int)
    parser.add_argument('--q-test', default=2, type=int)
    args = parser.parse_args()

    evaluation_episodes = 100
    episodes_per_epoch = 10

    if args.dataset == 'omniglot':
        n_epochs = 40
        dataset_class = OmniglotDataset
        num_input_channels = 1
        drop_lr_every = 20
    elif args.dataset == 'miniImageNet':
        n_epochs = 80
        dataset_class = MiniImageNet
        num_input_channels = 3
        drop_lr_every = 40
    elif args.dataset == 'clinic150':
        n_epochs = 5
        dataset_class = ClinicDataset
        num_input_channels = 150
        drop_lr_every = 2
    elif args.dataset == 'SNIPS':
        n_epochs = 5
        dataset_class = SNIPSDataset
        num_input_channels = 150
        drop_lr_every = 2
    elif args.dataset == 'Custom':
        n_epochs = 20
        dataset_class = CustomDataset
        num_input_channels = 150
        drop_lr_every = 5
    else:
        raise (ValueError, 'Unsupported dataset')

    param_str = f'{args.dataset}_nt={args.n_train}_kt={args.k_train}_qt={args.q_train}_' \
                f'nv={args.n_test}_kv={args.k_test}_qv={args.q_test}'

    print(param_str)

    from sklearn.model_selection import train_test_split

    ###################
    # Create datasets #
    ###################

    train_df = dataset_class('train')

    train_taskloader = DataLoader(train_df,
                                  batch_sampler=NShotTaskSampler(
                                      train_df, episodes_per_epoch,
                                      args.n_train, args.k_train,
                                      args.q_train))

    val_df = dataset_class('val')

    evaluation_taskloader = DataLoader(
        val_df,
        batch_sampler=NShotTaskSampler(val_df, episodes_per_epoch, args.n_test,
                                       args.k_test, args.q_test))

    #train_iter = iter(train_taskloader)
    #train_taskloader = next(train_iter)

    #val_iter = iter(evaluation_taskloader)
    #evaluation_taskloader = next(val_iter)

    #########
    # Wandb #
    #########

    config_defaults = {
        'lr': 0.00001,
        'optimiser': 'adam',
        'batch_size': 16,
    }

    wandb.init(config=config_defaults)

    #########
    # Model #
    #########

    torch.cuda.empty_cache()

    try:
        print('Before Model Move')
        gpu_dict = get_gpu_info()
        print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format(
            gpu_dict['mem_total'], gpu_dict['mem_used'],
            gpu_dict['mem_used_percent']))
    except:
        pass

    #from transformers import XLNetForSequenceClassification, AdamW

    #model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels=150)
    #model.cuda()

    try:
        del model
    except:
        print("Cannot delete model. No model with name 'model' exists")

    model = XLNetForEmbedding(num_input_channels)
    model.to(device, dtype=torch.double)

    #param_optimizer = list(model.named_parameters())
    #no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    #optimizer_grouped_parameters = [
    #                                {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    #                                {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0}
    #]

    try:
        print('After Model Move')
        gpu_dict = get_gpu_info()
        print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format(
            gpu_dict['mem_total'], gpu_dict['mem_used'],
            gpu_dict['mem_used_percent']))
    except:
        pass

    wandb.watch(model)

    ############
    # Training #
    ############

    from transformers import AdamW

    print(f'Training Prototypical network on {args.dataset}...')
    if wandb.config.optimiser == 'adam':
        optimiser = Adam(model.parameters(), lr=wandb.config.lr)
    else:
        optimiser = AdamW(model.parameters(), lr=wandb.config.lr)

    #optimiser = AdamW(optimizer_grouped_parameters, lr=3e-5)
    #loss_fn = torch.nn.NLLLoss().cuda()

    #loss_fn = torch.nn.CrossEntropyLoss()

    #max_grad_norm = 1.0

    loss_fn = torch.nn.NLLLoss()

    def lr_schedule(epoch, lr):
        # Drop lr every 2000 episodes
        if epoch % drop_lr_every == 0:
            return lr / 2
        else:
            return lr

    callbacks = [
        EvaluateFewShot(eval_fn=proto_net_episode,
                        num_tasks=evaluation_episodes,
                        n_shot=args.n_test,
                        k_way=args.k_test,
                        q_queries=args.q_test,
                        taskloader=evaluation_taskloader,
                        prepare_batch=prepare_nshot_task(
                            args.n_test, args.k_test, args.q_test),
                        distance=args.distance),
        ModelCheckpoint(
            filepath=PATH + f'/models/proto_nets/{param_str}.pth',
            monitor=f'val_{args.n_test}-shot_{args.k_test}-way_acc'),
        LearningRateScheduler(schedule=lr_schedule),
        CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'),
    ]

    try:
        print('Before Fit')
        print('optimiser :', optimiser)
        print('Learning Rate: ', wandb.config.lr)
        gpu_dict = get_gpu_info()
        print('Total GPU Mem: {} , Used GPU Mem: {}, Used Percent: {}'.format(
            gpu_dict['mem_total'], gpu_dict['mem_used'],
            gpu_dict['mem_used_percent']))
    except:
        pass

    fit(
        model,
        optimiser,
        loss_fn,
        epochs=n_epochs,
        dataloader=train_taskloader,
        prepare_batch=prepare_nshot_task(args.n_train, args.k_train,
                                         args.q_train),
        callbacks=callbacks,
        metrics=['categorical_accuracy'],
        fit_function=proto_net_episode,
        fit_function_kwargs={
            'n_shot': args.n_train,
            'k_way': args.k_train,
            'q_queries': args.q_train,
            'train': True,
            'distance': args.distance
        },
    )