示例#1
0
def main(args):
    utils.make_directories(args)
    utils.some_sanity_checks(args)
    utils.save_code(args)

    print('=' * 100)
    print('Arguments =')
    for arg in vars(args):
        print('\t' + arg + ':', getattr(args, arg))
    print('=' * 100)


    accuracies, forgetting = [], []
    for n in range(args.num_runs):
        args.seed = n
        args.output = '{}_{}_tasks_seed_{}.txt'.format(args.experiment, args.ntasks, args.seed)
        print ("args.output: ", args.output)

        print (" >>>> Run #", n)
        acc, bwt = run(args, n)
        accuracies.append(acc)
        forgetting.append(bwt)


    print('*' * 100)
    print ("Average over {} runs: ".format(args.num_runs))
    print ('AVG ACC: {:5.4f}% \pm {:5.4f}'.format(np.array(accuracies).mean(), np.array(accuracies).std()))
    print ('AVG BWT: {:5.2f}% \pm {:5.4f}'.format(np.array(forgetting).mean(), np.array(forgetting).std()))


    print ("All Done! ")
    print('[Elapsed time = {:.1f} min]'.format((time.time()-tstart)/(60)))
    utils.print_time()
示例#2
0
def main(args):
    utils.print_time(start=True)
    args.path.checkpoint, args.wandb.notes = utils.make_directories(args)

    if args.wandb.log:
        wandb.init(project=args.wandb.project,
                   name=args.wandb.notes,
                   config=args.config,
                   notes=args.wandb.notes,
                   allow_val_change=True)

    utils.save_code(args)

    print('=' * 100)
    print('Arguments =')
    for arg in vars(args):
        print('\t' + arg + ':', getattr(args, arg))
    print('=' * 100)

    for n in range(args.train.num_runs):
        args.seed = n + 1

        args.experiment.memory_budget = int(args.experiment.memory_budget)
        args.path.output = 'Run_{}_{}.txt'.format(n + 1, args.wandb.notes)

        if args.wandb.log:
            wandb.config.update(args, allow_val_change=True)

        print(">" * 30, "Run #", n + 1)
        run(args, n)

    print("All Done! ")
    print('[Elapsed time = {:.1f} min - {:0.1f} hours]'.format(
        (time.time() - tstart) / (60), (time.time() - tstart) / (3600)))
    utils.print_time(start=False)
    def __init__(
            self,
            model: Model,
            epoch: int,
            train_loader: DataLoader,
            test_model: TestModel) -> None:
        """
        Args:
            model (Model): model parameters.
            epoch (int): max epoch.
            train_data (DataLoader): train dataloader.
            test_model (TestModel): testing model.
        """

        # parameters
        self.inherit_params(model)  # inherit by model variable

        self.max_epoch = epoch
        self.train_loader = train_loader
        self.test_model = test_model  # TestModel

        # schedule by cycle
        self.test_schedule = self._create_schedule(self.tms.test_cycle)
        self.pth_save_schedule = self._create_schedule(self.tms.pth_save_cycle)

        # for making confusion matrix
        self.all_label = torch.tensor([], dtype=torch.long)
        self.all_pred = torch.tensor([], dtype=torch.long)

        # for making false path
        base = Path(self.tms.false_path, self.tms.filename_base)
        self.false_paths = [base.joinpath(f'epoch{ep}') for ep in range(self.max_epoch)]
        ul.make_directories(*self.false_paths)

        if self.tms.pth_save_cycle != 0:
            self.pth_save_path = f'{self.tms.pth_save_path}/{self.tms.filename_base}'
            ul.make_directories(self.pth_save_path)
示例#4
0
def main():
    tstart = time.time()

    parser = argparse.ArgumentParser(description='xxx')

    # Data parameters
    parser.add_argument('--seed',
                        default=0,
                        type=int,
                        help='(default=%(default)d)')
    parser.add_argument('--device', default='cuda:0', type=str, help='gpu id')
    parser.add_argument('--approach',
                        default='lwf',
                        type=str,
                        help='approach used')
    parser.add_argument('--experiment', default='MI', type=str)
    parser.add_argument('--data_dir',
                        default='data',
                        type=str,
                        help='data directory')
    parser.add_argument('--ntasks', default=10, type=int)
    parser.add_argument('--pc-valid', default=0.02, type=float)
    parser.add_argument('--workers', default=4, type=int)

    # Training parameters
    parser.add_argument('--output', default='', type=str, help='')
    parser.add_argument('--checkpoint_dir',
                        default='checkpoints/',
                        type=str,
                        help='')
    parser.add_argument('--nepochs', default=200, type=int, help='')
    parser.add_argument('--sbatch', default=64, type=int, help='')
    parser.add_argument('--lr', default=0.05, type=float, help='')
    parser.add_argument('--momentum', default=0.9, type=float)
    parser.add_argument('--weight-decay', default=0.0, type=float)
    parser.add_argument('--resume', default='no', type=str, help='resume?')
    parser.add_argument('--sti', default=0, type=int, help='starting task?')
    parser.add_argument('--mul', default=2, type=int)

    args = parser.parse_args()
    utils.print_arguments(args)

    #####################################################################################

    # Seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    print('Using device:', args.device)
    checkpoint = utils.make_directories(args)
    args.checkpoint = checkpoint
    print()

    # Args -- Experiment
    from dataloaders.miniimagenet import DatasetGen

    # Args -- Approach
    if args.approach == 'ewc':
        from approaches import ewc as approach
    elif args.approach == 'sgd':
        from approaches import sgd as approach
    elif args.approach == 'sgd-frozen':
        from approaches import sgd_frozen as approach
    elif args.approach == 'imm-mode':
        from approaches import imm_mode as approach
    elif args.approach == 'lwf':
        from approaches import lwf as approach
    else:
        raise NotImplementedError("approach currently not implemented")

    # Args -- Network
    if args.approach != 'hat':
        from networks import alexnet as network
    else:
        from networks import alexnet_hat as network

    ########################################################################################
    print()
    print("Starting this run on :")
    print(datetime.now().strftime("%Y-%m-%d %H:%M"))

    # Load
    print('Load data...')
    # prepare data for each task
    datagen = DatasetGen(args)
    for task_id in range(args.ntasks):
        datagen.get(task_id)
    print('\nTask info =', datagen.taskcla)

    args.num_tasks = len(datagen.taskcla)
    args.inputsize, args.taskcla = datagen.inputsize, datagen.taskcla

    # Inits
    print('Inits...')
    model = network.Net(args).to(args.device)

    # print number of parameters
    count = 0
    for p in model.parameters():
        count += np.prod(p.size())
    print('model size in MB: ', count * 4 / (1024 * 1024))

    print('-' * 100)
    appr = approach.Appr(model, args=args)
    print('-' * 100)

    if args.resume == 'yes':
        checkpoint = torch.load(
            os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti)))
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(device=args.device)
    else:
        args.sti = 0

    # Loop tasks
    acc = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32)
    lss = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32)
    for task, ncla in args.taskcla[args.sti:]:
        data_t = datagen.dataloaders[task]
        print('*' * 100)
        print('Task {:2d} ({:s})'.format(task, data_t['name']))
        print('*' * 100)

        # Train
        appr.train(task, data_t['train'], data_t['valid'])
        print('-' * 100)

        appr.save_model(task)
        # Test
        for u in range(task + 1):
            data_u = datagen.dataloaders[u]
            test_loss, test_acc = appr.eval(u, data_u['test'])
            print(
                '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<'
                .format(u, data_u['name'], test_loss, 100 * test_acc))
            acc[task, u] = test_acc
            lss[task, u] = test_loss

        # Save
        print('Save at ' + args.checkpoint)
        np.savetxt(
            os.path.join(args.checkpoint,
                         '{}_{}.txt'.format(args.approach, args.seed)), acc,
            '%.5f')

    utils.print_log_acc_bwt(args, acc, lss)
    print('[Elapsed time = {:.1f} h]'.format(
        (time.time() - tstart) / (60 * 60)))
示例#5
0
args = parser.parse_args()
utils.print_arguments(args)

########################################################################################################################

# Seed
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)
    # torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

print('Using device:', args.device)
checkpoint = utils.make_directories(args)
args.checkpoint = checkpoint
print()

# Args -- Experiment
if args.experiment == 'mnist2':
    from dataloaders import mnist2 as dataloader
elif args.experiment == 'mnist5':
    from dataloaders import mnist5 as dataloader
elif args.experiment == 'pmnist':
    from dataloaders import pmnist as dataloader
elif args.experiment == 'cifar':
    from dataloaders import cifar as dataloader
elif args.experiment == 'mixture':
    from dataloaders import mixture as dataloader
示例#6
0
def main():
    tstart = time.time()

    parser = argparse.ArgumentParser(description='BLIP Image Classification')

    # Data parameters
    parser.add_argument('--seed',
                        default=0,
                        type=int,
                        help='(default=%(default)d)')
    parser.add_argument('--device', default='cuda:0', type=str, help='gpu id')
    parser.add_argument('--experiment',
                        default='mnist5',
                        type=str,
                        help='experiment dataset',
                        required=True)
    parser.add_argument('--data_path',
                        default='../data/',
                        type=str,
                        help='gpu id')

    # Training parameters
    parser.add_argument('--approach',
                        default='blip',
                        type=str,
                        help='continual learning approach')
    parser.add_argument('--output', default='', type=str, help='')
    parser.add_argument('--checkpoint_dir',
                        default='../checkpoints/',
                        type=str,
                        help='')
    parser.add_argument('--nepochs', default=200, type=int, help='')
    parser.add_argument('--sbatch', default=64, type=int, help='')
    parser.add_argument('--lr', default=0.05, type=float, help='')
    parser.add_argument('--momentum', default=0, type=float, help='')
    parser.add_argument('--weight-decay', default=0.0, type=float, help='')
    parser.add_argument('--resume', default='no', type=str, help='resume?')
    parser.add_argument('--sti', default=0, type=int, help='starting task?')

    # Model parameters
    parser.add_argument('--ndim',
                        default=1200,
                        type=int,
                        help='hidden dimension for 2 layer MLP')
    parser.add_argument('--mul',
                        default=1.0,
                        type=float,
                        help='multiplier of model width')

    # BLIP specific parameters
    parser.add_argument('--max-bit',
                        default=20,
                        type=int,
                        help='maximum number of bits for each parameter')
    parser.add_argument('--F-prior',
                        default=1e-15,
                        type=float,
                        help='scaling factor of F_prior')

    args = parser.parse_args()
    utils.print_arguments(args)

    #####################################################################################

    # Seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    print('Using device:', args.device)
    checkpoint = utils.make_directories(args)
    args.checkpoint = checkpoint
    print()

    # Args -- Experiment
    if args.experiment == 'mnist2':
        from dataloaders import mnist2 as dataloader
    elif args.experiment == 'mnist5':
        from dataloaders import mnist5 as dataloader
    elif args.experiment == 'pmnist':
        from dataloaders import pmnist as dataloader
    elif args.experiment == 'cifar':
        from dataloaders import cifar as dataloader
    elif args.experiment == 'mixture5':
        from dataloaders import mixture5 as dataloader
    else:
        raise NotImplementedError('dataset currently not implemented')

    # Args -- Approach
    if args.approach == 'blip':
        from approaches import blip as approach
    else:
        raise NotImplementedError('approach currently not implemented')

    # Args -- Network
    if args.experiment == 'mnist2' or args.experiment == 'pmnist' or args.experiment == 'mnist5':
        from networks import q_mlp as network
    else:
        from networks import q_alexnet as network

    ########################################################################################
    print()
    print("Starting this run on :")
    print(datetime.now().strftime("%Y-%m-%d %H:%M"))

    # Load
    print('Load data...')
    data, taskcla, inputsize = dataloader.get(data_path=args.data_path,
                                              seed=args.seed)
    print('Input size =', inputsize, '\nTask info =', taskcla)
    args.num_tasks = len(taskcla)
    args.inputsize, args.taskcla = inputsize, taskcla

    # Inits
    print('Inits...')
    model = network.Net(args).to(args.device)

    print('-' * 100)
    appr = approach.Appr(model, args=args)
    print('-' * 100)

    if args.resume == 'yes':
        checkpoint = torch.load(
            os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti)))
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(device=args.device)
    else:
        args.sti = 0

    # Loop tasks
    acc = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32)
    lss = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32)
    num_task = len(taskcla)
    for t, ncla in taskcla[args.sti:]:

        print('*' * 100)
        print('Task {:2d} ({:s})'.format(t, data[t]['name']))
        print('*' * 100)

        # Get data
        xtrain = data[t]['train']['x'].to(args.device)
        ytrain = data[t]['train']['y'].to(args.device)
        xvalid = data[t]['valid']['x'].to(args.device)
        yvalid = data[t]['valid']['y'].to(args.device)
        task = t

        # Train
        appr.train(task, xtrain, ytrain, xvalid, yvalid)
        print('-' * 100)

        # BLIP specifics post processing
        estimate_fisher(task, args.device, model, xtrain, ytrain)
        for m in model.features.modules():
            if isinstance(m, Linear_Q) or isinstance(m, Conv2d_Q):
                # update bits according to information gain
                m.update_bits(task=task, C=0.5 / math.log(2))
                # do quantization
                m.sync_weight()
                # update Fisher in the buffer
                m.update_fisher(task=task)

        # save the model after the update
        appr.save_model(task)
        # Test
        for u in range(t + 1):
            xtest = data[u]['test']['x'].to(args.device)
            ytest = data[u]['test']['y'].to(args.device)
            test_loss, test_acc = appr.eval(u, xtest, ytest, debug=True)
            print(
                '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<'
                .format(u, data[u]['name'], test_loss, 100 * test_acc))
            acc[t, u] = test_acc
            lss[t, u] = test_loss

        utils.used_capacity(model, args.max_bit)

        # Save
        print('Save at ' + args.checkpoint)
        np.savetxt(
            os.path.join(
                args.checkpoint,
                '{}_{}_{}.txt'.format(args.experiment, args.approach,
                                      args.seed)), acc, '%.5f')

    utils.print_log_acc_bwt(args, acc, lss)
    print('[Elapsed time = {:.1f} h]'.format(
        (time.time() - tstart) / (60 * 60)))
示例#7
0
    def cache(self, resp, resource):
        """Takes a GopherResponse and a GopherResource.  Saves the content of
        the response to disk, and returns the filename saved to."""

        if resource.isAskType():
            # Don't cache ASK blocks.  This is because if you do, the program
            # will interpret it as data, and put the question structure inside
            # a text box.   Plus, since these may be dynamic, caching could
            # lead to missing out on things.
            raise CacheException("Do not cache AskTypes.  Not a good idea.")
        
        basedir      = self.getCachePrefix()
        basefilename = resource.toCacheFilename()

        # Problem - basedir is our base directory, but basefilename contains
        # trailing filename info that shouldn't be part of the directories
        # we're creating.
        filenamecopy = basefilename[:]
        ind = filenamecopy.rfind(os.sep)
        
        # Chop off extra info so "/home/x/foobar" becomes "/home/x/foobar"
        # this is because make_directories will otherwise create foobar as
        # a directory when it's actually a filename
        filenamecopy = filenamecopy[0:ind]     

        # Create the directory structure where necessary
        utils.make_directories(filenamecopy, basedir)

        basedirlastchar = basedir[len(basedir)-1]
        if basedirlastchar == os.sep:
            filename = "%s%s" % (basedir, basefilename)
        else:
            filename = "%s%s%s" % (basedir, os.sep, basefilename)

        # print "Cache: caching data to \"%s\"" % filename
        
        try:
            fp = open(filename, "w")

            if resp.getData() is None:    # This is a directory entry.
                response_lines = resp.getResponses()
                # Each response line is a GopherResource
                # write them as if it was a file served by the gopher server.
                # that way it can be easily reparsed when loaded from the
                # cache.
                for response_line in response_lines:
                    fp.write(response_line.toProtocolString())

                # write the string terminator.  This isn't really needed
                # since it isn't data, but it helps fool our other objects
                # into thinking that it's dealing with data off of a socket
                # instead of data from a file.  So do it.
                fp.write("\r\n.\r\n") 
            else:
                fp.write(resp.getData())

            fp.flush()
            fp.close()
        except IOError as errstr:
            # Some error writing data to the file.  Bummer.
            raise CacheException("Couldn't write to\n%s:\n%s" % (filename, errstr))
        # Successfully wrote the data - return the filename that was used
        # to save the data into.  (Absolute path)
        return os.path.abspath(filename)
示例#8
0
    # Load the model weights after training
    model_weights = torch.load(model_path + '/tem_' + str(i_start) + '.pt')
    # Set the model weights to the loaded trained model weights
    tem.load_state_dict(model_weights)

    # Make list of all the environments that this model was trained on
    envs = list(glob.iglob(envs_path + '/*'))

    # And increase starting iteration by 1, since the loaded model already carried out the current starting iteration
    i_start = i_start + 1
else:
    # Start training from step 0
    i_start = 0

    # Create directories for storing all information about the current run
    run_path, train_path, model_path, save_path, script_path, envs_path = utils.make_directories(
    )
    # Save all python files in current directory to script directory
    files = glob.iglob(os.path.join('.', '*.py'))
    for file in files:
        if os.path.isfile(file):
            shutil.copy2(file, os.path.join(script_path, file))

    # Initalise hyperparameters for model
    params = parameters.parameters()
    # Save parameters
    np.save(os.path.join(save_path, 'params'), params)

    # And create instance of TEM with those parameters
    tem = model.Model(params)

    # Create list of environments that we will sample from during training to provide TEM with trajectory input
示例#9
0
def main():
    tstart = time.time()

    parser = argparse.ArgumentParser(description='BLIP mini-ImageNet')

    # Data parameters
    parser.add_argument('--seed',
                        default=0,
                        type=int,
                        help='(default=%(default)d)')
    parser.add_argument('--device', default='cuda:0', type=str, help='gpu id')
    parser.add_argument('--experiment', default='MI', type=str)
    parser.add_argument('--data_dir',
                        default='data',
                        type=str,
                        help='data directory')
    parser.add_argument('--ntasks', default=10, type=int)
    parser.add_argument('--pc-valid', default=0.02, type=float)
    parser.add_argument('--workers', default=4, type=int)

    # Training parameters
    parser.add_argument('--approach', default='blip', type=str)
    parser.add_argument('--output', default='', type=str, help='')
    parser.add_argument('--checkpoint_dir',
                        default='checkpoints/',
                        type=str,
                        help='')
    parser.add_argument('--nepochs', default=200, type=int, help='')
    parser.add_argument('--sbatch', default=64, type=int, help='')
    parser.add_argument('--lr', default=0.05, type=float, help='')
    parser.add_argument('--momentum', default=0.9, type=float)
    parser.add_argument('--weight-decay', default=0.0, type=float)
    parser.add_argument('--resume', default='no', type=str, help='resume?')
    parser.add_argument('--sti', default=0, type=int, help='starting task?')

    # model parameters
    parser.add_argument('--mul', default=1.0, type=float)
    parser.add_argument('--arch', default='alexnet', type=str)

    # BLIP parameters
    parser.add_argument('--max-bit', default=20, type=int, help='')
    parser.add_argument('--F-prior', default=1e-15, type=float, help='')

    args = parser.parse_args()
    utils.print_arguments(args)

    #####################################################################################

    # Seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    print('Using device:', args.device)
    checkpoint = utils.make_directories(args)
    args.checkpoint = checkpoint
    print()

    # Args -- Experiment
    from dataloaders.miniimagenet import DatasetGen

    # Args -- Approach
    from approaches import blip as approach

    # Args -- Network
    if args.arch == 'alexnet':
        from networks import q_alexnet as network
    elif args.arch == 'resnet':
        from networks import q_resnet as network
    else:
        raise NotImplementedError("network currently not implemented")

    ########################################################################################
    print()
    print("Starting this run on :")
    print(datetime.now().strftime("%Y-%m-%d %H:%M"))

    # Load
    print('Load data...')
    # prepare data for each task
    datagen = DatasetGen(args)
    for task_id in range(args.ntasks):
        datagen.get(task_id)
    print('\nTask info =', datagen.taskcla)

    args.num_tasks = len(datagen.taskcla)
    args.inputsize, args.taskcla = datagen.inputsize, datagen.taskcla

    # Inits
    print('Inits...')
    model = network.Net(args).to(args.device)

    # print number of parameters
    count = 0
    for p in model.parameters():
        count += np.prod(p.size())
    print('model size in MB: ', count * 4 / (1024 * 1024))

    print('-' * 100)
    appr = approach.Appr(model, args=args)
    print('-' * 100)

    if args.resume == 'yes':
        checkpoint = torch.load(
            os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti)))
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(device=args.device)
    else:
        args.sti = 0

    # Loop tasks
    acc = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32)
    lss = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32)
    for task, ncla in args.taskcla[args.sti:]:
        data_t = datagen.dataloaders[task]
        print('*' * 100)
        print('Task {:2d} ({:s})'.format(task, data_t['name']))
        print('*' * 100)

        # Train
        appr.train(task, data_t['train'], data_t['valid'])
        print('-' * 100)

        estimate_fisher(task, args.device, model, data_t['fisher'])
        for m in model.modules():
            if isinstance(m, Linear_Q) or isinstance(m, Conv2d_Q):
                # update bits according to information gain
                m.update_bits(task=task, C=0.5 / math.log(2))
                # do quantization
                m.sync_weight()
                # update Fisher in the buffer
                m.update_fisher(task=task)

        # save the model after the update
        appr.save_model(task)
        # Test
        for u in range(task + 1):
            data_u = datagen.dataloaders[u]
            test_loss, test_acc = appr.eval(u, data_u['test'])
            print(
                '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<'
                .format(u, data_u['name'], test_loss, 100 * test_acc))
            acc[task, u] = test_acc
            lss[task, u] = test_loss

        utils.used_capacity(model, args.max_bit)

        # Save
        print('Save at ' + args.checkpoint)
        np.savetxt(
            os.path.join(
                args.checkpoint,
                '{}_{}_{}_{}.txt'.format(args.approach, args.arch, args.seed,
                                         str(args.F_prior))), acc, '%.5f')

    utils.print_log_acc_bwt(args, acc, lss)
    print('[Elapsed time = {:.1f} h]'.format(
        (time.time() - tstart) / (60 * 60)))