Пример #1
0
def main():

    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    CLASSES = []
    AUROCs = []
    auroc = AverageMeter()

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES = [cls]

        print("==> Creating model")
        model = FeedForward(num_classes=1)

        if CUDA:
            model = model.cuda()
            model = nn.DataParallel(model)
            cudnn.benchmark = True

        print('    Total params: %.2fK' %
              (sum(p.numel() for p in model.parameters()) / 1000))

        criterion = nn.BCELoss()
        optimizer = optim.SGD(model.parameters(),
                              lr=LEARNING_RATE,
                              momentum=MOMENTUM,
                              weight_decay=WEIGHT_DECAY)

        print("==> Learning")

        best_loss = 1e10
        learning_rate = LEARNING_RATE

        for epoch in range(EPOCHS):

            # decay learning rate
            if (epoch + 1) % EPOCHS_DROP == 0:
                learning_rate *= LR_DROP
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate

            print('Epoch: [%d | %d]' % (epoch + 1, EPOCHS))

            train_loss = train(trainloader,
                               model,
                               criterion,
                               CLASSES,
                               CLASSES,
                               optimizer=optimizer,
                               use_cuda=CUDA)
            test_loss = train(validloader,
                              model,
                              criterion,
                              CLASSES,
                              CLASSES,
                              test=True,
                              use_cuda=CUDA)

            # save model
            is_best = test_loss < best_loss
            best_loss = min(test_loss, best_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'loss': test_loss,
                    'optimizer': optimizer.state_dict()
                }, CHECKPOINT, is_best)

        print("==> Calculating AUROC")

        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        new_auroc = calc_avg_AUROC(model, testloader, CLASSES, CLASSES, CUDA)
        auroc.update(new_auroc)

        print('New Task AUROC: {}'.format(new_auroc))
        print('Average AUROC: {}'.format(auroc.avg))

        AUROCs.append(auroc.avg)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))
Пример #2
0
def main():
    if not os.path.isdir(CHECKPOINT):
        os.makedirs(CHECKPOINT)

    print('==> Preparing dataset')

    trainloader, validloader, testloader = load_MNIST(batch_size=BATCH_SIZE,
                                                      num_workers=NUM_WORKERS)

    print("==> Creating model")
    model = FeedForward(num_classes=len(ALL_CLASSES))

    if CUDA:
        model = model.cuda()
        # model = nn.DataParallel(model)
        cudnn.benchmark = True

    # initialize parameters
    # for name, param in model.named_parameters():
    #     if 'bias' in name:
    #         param.data.zero_()
    #     elif 'weight' in name:
    #         param.data.normal_(0, 0.005)

    print('    Total params: %.2fK' %
          (sum(p.numel() for p in model.parameters()) / 1000))

    criterion = nn.BCELoss()

    CLASSES = []
    AUROCs = []

    for t, cls in enumerate(ALL_CLASSES):

        print('\nTask: [%d | %d]\n' % (t + 1, len(ALL_CLASSES)))

        CLASSES.append(cls)

        if t == 0:
            print("==> Learning")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE
            # epochs = 10
            for epoch in range(MAX_EPOCHS):
                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   penalty=penalty,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  penalty=penalty,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

                suma = 0
                for p in model.parameters():
                    p = p.data.cpu().numpy()
                    suma += (abs(p) < ZERO_THRESHOLD).sum()
                print("Number of zero weights: %d" % suma)
        else:  # if t != 0
            # copy model
            model_copy = copy.deepcopy(model)

            print("==> Selective Retraining")

            # Solve Eq.3

            # freeze all layers except the last one (last 2 parameters)
            params = list(model.parameters())
            for param in params[:-2]:
                param.requires_grad = False

            optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=WEIGHT_DECAY)

            penalty = L1Penalty(coeff=L1_COEFF)
            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train(trainloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      optimizer=optimizer,
                      penalty=penalty,
                      use_cuda=CUDA)
                train(validloader,
                      model,
                      criterion,
                      ALL_CLASSES, [cls],
                      test=True,
                      penalty=penalty,
                      use_cuda=CUDA)

            for param in model.parameters():
                param.requires_grad = True

            print("==> Selecting Neurons")
            hooks = select_neurons(model, t)

            print("==> Training Selected Neurons")

            optimizer = optim.SGD(model.parameters(),
                                  lr=LEARNING_RATE,
                                  momentum=MOMENTUM,
                                  weight_decay=1e-4)

            best_loss = 1e10
            learning_rate = LEARNING_RATE

            for epoch in range(MAX_EPOCHS):

                # decay learning rate
                if (epoch + 1) % EPOCHS_DROP == 0:
                    learning_rate *= LR_DROP
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = learning_rate

                print('Epoch: [%d | %d]' % (epoch + 1, MAX_EPOCHS))

                train_loss = train(trainloader,
                                   model,
                                   criterion,
                                   ALL_CLASSES, [cls],
                                   optimizer=optimizer,
                                   use_cuda=CUDA)
                test_loss = train(validloader,
                                  model,
                                  criterion,
                                  ALL_CLASSES, [cls],
                                  test=True,
                                  use_cuda=CUDA)

                # save model
                is_best = test_loss < best_loss
                best_loss = min(test_loss, best_loss)
                save_checkpoint({'state_dict': model.state_dict()}, CHECKPOINT,
                                is_best)

            # remove hooks
            for hook in hooks:
                hook.remove()

            print("==> Splitting Neurons")
            split_neurons(model_copy, model)

        print("==> Calculating AUROC")
        filepath_best = os.path.join(CHECKPOINT, "best.pt")
        checkpoint = torch.load(filepath_best)
        model.load_state_dict(checkpoint['state_dict'])

        auroc = calc_avg_AUROC(model, testloader, ALL_CLASSES, CLASSES, CUDA)

        print('AUROC: {}'.format(auroc))

        AUROCs.append(auroc)

    print('\nAverage Per-task Performance over number of tasks')
    for i, p in enumerate(AUROCs):
        print("%d: %f" % (i + 1, p))