def main(args): utils.make_directories(args) utils.some_sanity_checks(args) utils.save_code(args) print('=' * 100) print('Arguments =') for arg in vars(args): print('\t' + arg + ':', getattr(args, arg)) print('=' * 100) accuracies, forgetting = [], [] for n in range(args.num_runs): args.seed = n args.output = '{}_{}_tasks_seed_{}.txt'.format(args.experiment, args.ntasks, args.seed) print ("args.output: ", args.output) print (" >>>> Run #", n) acc, bwt = run(args, n) accuracies.append(acc) forgetting.append(bwt) print('*' * 100) print ("Average over {} runs: ".format(args.num_runs)) print ('AVG ACC: {:5.4f}% \pm {:5.4f}'.format(np.array(accuracies).mean(), np.array(accuracies).std())) print ('AVG BWT: {:5.2f}% \pm {:5.4f}'.format(np.array(forgetting).mean(), np.array(forgetting).std())) print ("All Done! ") print('[Elapsed time = {:.1f} min]'.format((time.time()-tstart)/(60))) utils.print_time()
def main(args): utils.print_time(start=True) args.path.checkpoint, args.wandb.notes = utils.make_directories(args) if args.wandb.log: wandb.init(project=args.wandb.project, name=args.wandb.notes, config=args.config, notes=args.wandb.notes, allow_val_change=True) utils.save_code(args) print('=' * 100) print('Arguments =') for arg in vars(args): print('\t' + arg + ':', getattr(args, arg)) print('=' * 100) for n in range(args.train.num_runs): args.seed = n + 1 args.experiment.memory_budget = int(args.experiment.memory_budget) args.path.output = 'Run_{}_{}.txt'.format(n + 1, args.wandb.notes) if args.wandb.log: wandb.config.update(args, allow_val_change=True) print(">" * 30, "Run #", n + 1) run(args, n) print("All Done! ") print('[Elapsed time = {:.1f} min - {:0.1f} hours]'.format( (time.time() - tstart) / (60), (time.time() - tstart) / (3600))) utils.print_time(start=False)
def __init__( self, model: Model, epoch: int, train_loader: DataLoader, test_model: TestModel) -> None: """ Args: model (Model): model parameters. epoch (int): max epoch. train_data (DataLoader): train dataloader. test_model (TestModel): testing model. """ # parameters self.inherit_params(model) # inherit by model variable self.max_epoch = epoch self.train_loader = train_loader self.test_model = test_model # TestModel # schedule by cycle self.test_schedule = self._create_schedule(self.tms.test_cycle) self.pth_save_schedule = self._create_schedule(self.tms.pth_save_cycle) # for making confusion matrix self.all_label = torch.tensor([], dtype=torch.long) self.all_pred = torch.tensor([], dtype=torch.long) # for making false path base = Path(self.tms.false_path, self.tms.filename_base) self.false_paths = [base.joinpath(f'epoch{ep}') for ep in range(self.max_epoch)] ul.make_directories(*self.false_paths) if self.tms.pth_save_cycle != 0: self.pth_save_path = f'{self.tms.pth_save_path}/{self.tms.filename_base}' ul.make_directories(self.pth_save_path)
def main(): tstart = time.time() parser = argparse.ArgumentParser(description='xxx') # Data parameters parser.add_argument('--seed', default=0, type=int, help='(default=%(default)d)') parser.add_argument('--device', default='cuda:0', type=str, help='gpu id') parser.add_argument('--approach', default='lwf', type=str, help='approach used') parser.add_argument('--experiment', default='MI', type=str) parser.add_argument('--data_dir', default='data', type=str, help='data directory') parser.add_argument('--ntasks', default=10, type=int) parser.add_argument('--pc-valid', default=0.02, type=float) parser.add_argument('--workers', default=4, type=int) # Training parameters parser.add_argument('--output', default='', type=str, help='') parser.add_argument('--checkpoint_dir', default='checkpoints/', type=str, help='') parser.add_argument('--nepochs', default=200, type=int, help='') parser.add_argument('--sbatch', default=64, type=int, help='') parser.add_argument('--lr', default=0.05, type=float, help='') parser.add_argument('--momentum', default=0.9, type=float) parser.add_argument('--weight-decay', default=0.0, type=float) parser.add_argument('--resume', default='no', type=str, help='resume?') parser.add_argument('--sti', default=0, type=int, help='starting task?') parser.add_argument('--mul', default=2, type=int) args = parser.parse_args() utils.print_arguments(args) ##################################################################################### # Seed np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print('Using device:', args.device) checkpoint = utils.make_directories(args) args.checkpoint = checkpoint print() # Args -- Experiment from dataloaders.miniimagenet import DatasetGen # Args -- Approach if args.approach == 'ewc': from approaches import ewc as approach elif args.approach == 'sgd': from approaches import sgd as approach elif args.approach == 'sgd-frozen': from approaches import sgd_frozen as approach elif args.approach == 'imm-mode': from approaches import imm_mode as approach elif args.approach == 'lwf': from approaches import lwf as approach else: raise NotImplementedError("approach currently not implemented") # Args -- Network if args.approach != 'hat': from networks import alexnet as network else: from networks import alexnet_hat as network ######################################################################################## print() print("Starting this run on :") print(datetime.now().strftime("%Y-%m-%d %H:%M")) # Load print('Load data...') # prepare data for each task datagen = DatasetGen(args) for task_id in range(args.ntasks): datagen.get(task_id) print('\nTask info =', datagen.taskcla) args.num_tasks = len(datagen.taskcla) args.inputsize, args.taskcla = datagen.inputsize, datagen.taskcla # Inits print('Inits...') model = network.Net(args).to(args.device) # print number of parameters count = 0 for p in model.parameters(): count += np.prod(p.size()) print('model size in MB: ', count * 4 / (1024 * 1024)) print('-' * 100) appr = approach.Appr(model, args=args) print('-' * 100) if args.resume == 'yes': checkpoint = torch.load( os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti))) model.load_state_dict(checkpoint['model_state_dict']) model = model.to(device=args.device) else: args.sti = 0 # Loop tasks acc = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32) lss = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32) for task, ncla in args.taskcla[args.sti:]: data_t = datagen.dataloaders[task] print('*' * 100) print('Task {:2d} ({:s})'.format(task, data_t['name'])) print('*' * 100) # Train appr.train(task, data_t['train'], data_t['valid']) print('-' * 100) appr.save_model(task) # Test for u in range(task + 1): data_u = datagen.dataloaders[u] test_loss, test_acc = appr.eval(u, data_u['test']) print( '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<' .format(u, data_u['name'], test_loss, 100 * test_acc)) acc[task, u] = test_acc lss[task, u] = test_loss # Save print('Save at ' + args.checkpoint) np.savetxt( os.path.join(args.checkpoint, '{}_{}.txt'.format(args.approach, args.seed)), acc, '%.5f') utils.print_log_acc_bwt(args, acc, lss) print('[Elapsed time = {:.1f} h]'.format( (time.time() - tstart) / (60 * 60)))
args = parser.parse_args() utils.print_arguments(args) ######################################################################################################################## # Seed np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print('Using device:', args.device) checkpoint = utils.make_directories(args) args.checkpoint = checkpoint print() # Args -- Experiment if args.experiment == 'mnist2': from dataloaders import mnist2 as dataloader elif args.experiment == 'mnist5': from dataloaders import mnist5 as dataloader elif args.experiment == 'pmnist': from dataloaders import pmnist as dataloader elif args.experiment == 'cifar': from dataloaders import cifar as dataloader elif args.experiment == 'mixture': from dataloaders import mixture as dataloader
def main(): tstart = time.time() parser = argparse.ArgumentParser(description='BLIP Image Classification') # Data parameters parser.add_argument('--seed', default=0, type=int, help='(default=%(default)d)') parser.add_argument('--device', default='cuda:0', type=str, help='gpu id') parser.add_argument('--experiment', default='mnist5', type=str, help='experiment dataset', required=True) parser.add_argument('--data_path', default='../data/', type=str, help='gpu id') # Training parameters parser.add_argument('--approach', default='blip', type=str, help='continual learning approach') parser.add_argument('--output', default='', type=str, help='') parser.add_argument('--checkpoint_dir', default='../checkpoints/', type=str, help='') parser.add_argument('--nepochs', default=200, type=int, help='') parser.add_argument('--sbatch', default=64, type=int, help='') parser.add_argument('--lr', default=0.05, type=float, help='') parser.add_argument('--momentum', default=0, type=float, help='') parser.add_argument('--weight-decay', default=0.0, type=float, help='') parser.add_argument('--resume', default='no', type=str, help='resume?') parser.add_argument('--sti', default=0, type=int, help='starting task?') # Model parameters parser.add_argument('--ndim', default=1200, type=int, help='hidden dimension for 2 layer MLP') parser.add_argument('--mul', default=1.0, type=float, help='multiplier of model width') # BLIP specific parameters parser.add_argument('--max-bit', default=20, type=int, help='maximum number of bits for each parameter') parser.add_argument('--F-prior', default=1e-15, type=float, help='scaling factor of F_prior') args = parser.parse_args() utils.print_arguments(args) ##################################################################################### # Seed np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print('Using device:', args.device) checkpoint = utils.make_directories(args) args.checkpoint = checkpoint print() # Args -- Experiment if args.experiment == 'mnist2': from dataloaders import mnist2 as dataloader elif args.experiment == 'mnist5': from dataloaders import mnist5 as dataloader elif args.experiment == 'pmnist': from dataloaders import pmnist as dataloader elif args.experiment == 'cifar': from dataloaders import cifar as dataloader elif args.experiment == 'mixture5': from dataloaders import mixture5 as dataloader else: raise NotImplementedError('dataset currently not implemented') # Args -- Approach if args.approach == 'blip': from approaches import blip as approach else: raise NotImplementedError('approach currently not implemented') # Args -- Network if args.experiment == 'mnist2' or args.experiment == 'pmnist' or args.experiment == 'mnist5': from networks import q_mlp as network else: from networks import q_alexnet as network ######################################################################################## print() print("Starting this run on :") print(datetime.now().strftime("%Y-%m-%d %H:%M")) # Load print('Load data...') data, taskcla, inputsize = dataloader.get(data_path=args.data_path, seed=args.seed) print('Input size =', inputsize, '\nTask info =', taskcla) args.num_tasks = len(taskcla) args.inputsize, args.taskcla = inputsize, taskcla # Inits print('Inits...') model = network.Net(args).to(args.device) print('-' * 100) appr = approach.Appr(model, args=args) print('-' * 100) if args.resume == 'yes': checkpoint = torch.load( os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti))) model.load_state_dict(checkpoint['model_state_dict']) model = model.to(device=args.device) else: args.sti = 0 # Loop tasks acc = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) lss = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) num_task = len(taskcla) for t, ncla in taskcla[args.sti:]: print('*' * 100) print('Task {:2d} ({:s})'.format(t, data[t]['name'])) print('*' * 100) # Get data xtrain = data[t]['train']['x'].to(args.device) ytrain = data[t]['train']['y'].to(args.device) xvalid = data[t]['valid']['x'].to(args.device) yvalid = data[t]['valid']['y'].to(args.device) task = t # Train appr.train(task, xtrain, ytrain, xvalid, yvalid) print('-' * 100) # BLIP specifics post processing estimate_fisher(task, args.device, model, xtrain, ytrain) for m in model.features.modules(): if isinstance(m, Linear_Q) or isinstance(m, Conv2d_Q): # update bits according to information gain m.update_bits(task=task, C=0.5 / math.log(2)) # do quantization m.sync_weight() # update Fisher in the buffer m.update_fisher(task=task) # save the model after the update appr.save_model(task) # Test for u in range(t + 1): xtest = data[u]['test']['x'].to(args.device) ytest = data[u]['test']['y'].to(args.device) test_loss, test_acc = appr.eval(u, xtest, ytest, debug=True) print( '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<' .format(u, data[u]['name'], test_loss, 100 * test_acc)) acc[t, u] = test_acc lss[t, u] = test_loss utils.used_capacity(model, args.max_bit) # Save print('Save at ' + args.checkpoint) np.savetxt( os.path.join( args.checkpoint, '{}_{}_{}.txt'.format(args.experiment, args.approach, args.seed)), acc, '%.5f') utils.print_log_acc_bwt(args, acc, lss) print('[Elapsed time = {:.1f} h]'.format( (time.time() - tstart) / (60 * 60)))
def cache(self, resp, resource): """Takes a GopherResponse and a GopherResource. Saves the content of the response to disk, and returns the filename saved to.""" if resource.isAskType(): # Don't cache ASK blocks. This is because if you do, the program # will interpret it as data, and put the question structure inside # a text box. Plus, since these may be dynamic, caching could # lead to missing out on things. raise CacheException("Do not cache AskTypes. Not a good idea.") basedir = self.getCachePrefix() basefilename = resource.toCacheFilename() # Problem - basedir is our base directory, but basefilename contains # trailing filename info that shouldn't be part of the directories # we're creating. filenamecopy = basefilename[:] ind = filenamecopy.rfind(os.sep) # Chop off extra info so "/home/x/foobar" becomes "/home/x/foobar" # this is because make_directories will otherwise create foobar as # a directory when it's actually a filename filenamecopy = filenamecopy[0:ind] # Create the directory structure where necessary utils.make_directories(filenamecopy, basedir) basedirlastchar = basedir[len(basedir)-1] if basedirlastchar == os.sep: filename = "%s%s" % (basedir, basefilename) else: filename = "%s%s%s" % (basedir, os.sep, basefilename) # print "Cache: caching data to \"%s\"" % filename try: fp = open(filename, "w") if resp.getData() is None: # This is a directory entry. response_lines = resp.getResponses() # Each response line is a GopherResource # write them as if it was a file served by the gopher server. # that way it can be easily reparsed when loaded from the # cache. for response_line in response_lines: fp.write(response_line.toProtocolString()) # write the string terminator. This isn't really needed # since it isn't data, but it helps fool our other objects # into thinking that it's dealing with data off of a socket # instead of data from a file. So do it. fp.write("\r\n.\r\n") else: fp.write(resp.getData()) fp.flush() fp.close() except IOError as errstr: # Some error writing data to the file. Bummer. raise CacheException("Couldn't write to\n%s:\n%s" % (filename, errstr)) # Successfully wrote the data - return the filename that was used # to save the data into. (Absolute path) return os.path.abspath(filename)
# Load the model weights after training model_weights = torch.load(model_path + '/tem_' + str(i_start) + '.pt') # Set the model weights to the loaded trained model weights tem.load_state_dict(model_weights) # Make list of all the environments that this model was trained on envs = list(glob.iglob(envs_path + '/*')) # And increase starting iteration by 1, since the loaded model already carried out the current starting iteration i_start = i_start + 1 else: # Start training from step 0 i_start = 0 # Create directories for storing all information about the current run run_path, train_path, model_path, save_path, script_path, envs_path = utils.make_directories( ) # Save all python files in current directory to script directory files = glob.iglob(os.path.join('.', '*.py')) for file in files: if os.path.isfile(file): shutil.copy2(file, os.path.join(script_path, file)) # Initalise hyperparameters for model params = parameters.parameters() # Save parameters np.save(os.path.join(save_path, 'params'), params) # And create instance of TEM with those parameters tem = model.Model(params) # Create list of environments that we will sample from during training to provide TEM with trajectory input
def main(): tstart = time.time() parser = argparse.ArgumentParser(description='BLIP mini-ImageNet') # Data parameters parser.add_argument('--seed', default=0, type=int, help='(default=%(default)d)') parser.add_argument('--device', default='cuda:0', type=str, help='gpu id') parser.add_argument('--experiment', default='MI', type=str) parser.add_argument('--data_dir', default='data', type=str, help='data directory') parser.add_argument('--ntasks', default=10, type=int) parser.add_argument('--pc-valid', default=0.02, type=float) parser.add_argument('--workers', default=4, type=int) # Training parameters parser.add_argument('--approach', default='blip', type=str) parser.add_argument('--output', default='', type=str, help='') parser.add_argument('--checkpoint_dir', default='checkpoints/', type=str, help='') parser.add_argument('--nepochs', default=200, type=int, help='') parser.add_argument('--sbatch', default=64, type=int, help='') parser.add_argument('--lr', default=0.05, type=float, help='') parser.add_argument('--momentum', default=0.9, type=float) parser.add_argument('--weight-decay', default=0.0, type=float) parser.add_argument('--resume', default='no', type=str, help='resume?') parser.add_argument('--sti', default=0, type=int, help='starting task?') # model parameters parser.add_argument('--mul', default=1.0, type=float) parser.add_argument('--arch', default='alexnet', type=str) # BLIP parameters parser.add_argument('--max-bit', default=20, type=int, help='') parser.add_argument('--F-prior', default=1e-15, type=float, help='') args = parser.parse_args() utils.print_arguments(args) ##################################################################################### # Seed np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print('Using device:', args.device) checkpoint = utils.make_directories(args) args.checkpoint = checkpoint print() # Args -- Experiment from dataloaders.miniimagenet import DatasetGen # Args -- Approach from approaches import blip as approach # Args -- Network if args.arch == 'alexnet': from networks import q_alexnet as network elif args.arch == 'resnet': from networks import q_resnet as network else: raise NotImplementedError("network currently not implemented") ######################################################################################## print() print("Starting this run on :") print(datetime.now().strftime("%Y-%m-%d %H:%M")) # Load print('Load data...') # prepare data for each task datagen = DatasetGen(args) for task_id in range(args.ntasks): datagen.get(task_id) print('\nTask info =', datagen.taskcla) args.num_tasks = len(datagen.taskcla) args.inputsize, args.taskcla = datagen.inputsize, datagen.taskcla # Inits print('Inits...') model = network.Net(args).to(args.device) # print number of parameters count = 0 for p in model.parameters(): count += np.prod(p.size()) print('model size in MB: ', count * 4 / (1024 * 1024)) print('-' * 100) appr = approach.Appr(model, args=args) print('-' * 100) if args.resume == 'yes': checkpoint = torch.load( os.path.join(args.checkpoint, 'model_{}.pth.tar'.format(args.sti))) model.load_state_dict(checkpoint['model_state_dict']) model = model.to(device=args.device) else: args.sti = 0 # Loop tasks acc = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32) lss = np.zeros((len(args.taskcla), len(args.taskcla)), dtype=np.float32) for task, ncla in args.taskcla[args.sti:]: data_t = datagen.dataloaders[task] print('*' * 100) print('Task {:2d} ({:s})'.format(task, data_t['name'])) print('*' * 100) # Train appr.train(task, data_t['train'], data_t['valid']) print('-' * 100) estimate_fisher(task, args.device, model, data_t['fisher']) for m in model.modules(): if isinstance(m, Linear_Q) or isinstance(m, Conv2d_Q): # update bits according to information gain m.update_bits(task=task, C=0.5 / math.log(2)) # do quantization m.sync_weight() # update Fisher in the buffer m.update_fisher(task=task) # save the model after the update appr.save_model(task) # Test for u in range(task + 1): data_u = datagen.dataloaders[u] test_loss, test_acc = appr.eval(u, data_u['test']) print( '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.3f}% <<<' .format(u, data_u['name'], test_loss, 100 * test_acc)) acc[task, u] = test_acc lss[task, u] = test_loss utils.used_capacity(model, args.max_bit) # Save print('Save at ' + args.checkpoint) np.savetxt( os.path.join( args.checkpoint, '{}_{}_{}_{}.txt'.format(args.approach, args.arch, args.seed, str(args.F_prior))), acc, '%.5f') utils.print_log_acc_bwt(args, acc, lss) print('[Elapsed time = {:.1f} h]'.format( (time.time() - tstart) / (60 * 60)))