def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "/data5/jlindsey/continual/results", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) # final_results_all = [] total_clases = [10, 50, 75, 100, 150, 200] if args.twentyclass: total_clases = [20, 50] if args.fiveclass: total_clases = [5] for tot_class in total_clases: avg_perf = 0.0 lr_list = [0.03] for aoo in range(0, args.runs): keep = np.random.choice(list(range(200)), tot_class, replace=False) print('keep', keep) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) print('lenbefore', len(dataset.data)) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) print("LEN", len(iterator_sorted), len(dataset.data)) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) # sampler = ts.MNISTSampler(list(range(0, total_clases)), dataset) # print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) # for lr in [0.001, 0.0003, 0.0001, 0.00003, 0.00001]: maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: # logger.info("Freeezing name %s", str(name)) param.learn = False # logger.info(str(param.requires_grad)) else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) ''' torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") # logger.info("Name = %s", n) if n == "vars_14": w = nn.Parameter(torch.ones_like(a)) # logger.info("W shape = %s", str(w.shape)) torch.nn.init.kaiming_normal_(w) a.data = w if n == "vars_15": w = nn.Parameter(torch.zeros_like(a)) a.data = w ''' correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = ["vars.0", "vars.1", "vars.2", "vars.3", "vars.4", "vars.5"] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map(lambda x: x[1], list(filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list(filter(lambda x: x.learn, maml.parameters())) list_of_names = list(filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() # for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) fast_weights = maml.vars if args.randomize_plastic_weights: maml.randomize_plastic_weights() if args.zero_plastic_weights: maml.zero_plastic_weights() for iter in range(0, args.epoch): iter_count = 0 imgs = [] ys = [] for img, y in iterator_sorted: #print(iter_count, y) if iter_count % 15 >= args.shots: iter_count += 1 continue iter_count += 1 img = img.to(device) y = y.to(device) imgs.append(img) ys.append(y) if not args.batch_learning: pred = maml(img, vars=fast_weights) opt.zero_grad() loss = F.cross_entropy(pred, y) grad = torch.autograd.grad(loss, fast_weights) # fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters()))) if args.plastic_update: fast_weights = list( map(lambda p: p[1] - p[0] * p[2] if p[1].learn else p[1], zip(grad, fast_weights, maml.vars_plasticity))) else: fast_weights = list( map(lambda p: p[1] - args.update_lr * p[0] if p[1].learn else p[1], zip(grad, fast_weights))) for params_old, params_new in zip(maml.parameters(), fast_weights): params_new.learn = params_old.learn if args.batch_learning: y = torch.cat(ys, 0) img = torch.cat(imgs, 0) pred = maml(img, vars=fast_weights) opt.zero_grad() loss = F.cross_entropy(pred, y) grad = torch.autograd.grad(loss, fast_weights) # fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters()))) if args.plastic_update: fast_weights = list( map(lambda p: p[1] - p[0] * p[2] if p[1].learn else p[1], zip(grad, fast_weights, maml.vars_plasticity))) else: fast_weights = list( map(lambda p: p[1] - args.update_lr * p[0] if p[1].learn else p[1], zip(grad, fast_weights))) for params_old, params_new in zip(maml.parameters(), fast_weights): params_new.learn = params_old.learn #loss.backward() #opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=fast_weights, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) avg_perf += max_acc / args.runs print('avg perf', avg_perf * args.runs / (1+aoo)) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() np.save('evals/final_results_'+args.orig_name+'.npy', final_results_all) print("FINAL RESULTS = ", final_results_all) writer.close()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) #for v in range(6): # frozen_layers.append("vars_bn.{0}".format(v)) final_results_all = [] temp_result = [] total_clases = args.schedule for tot_class in total_clases: lr_list = [ 0.001, 0.0006, 0.0004, 0.00035, 0.0003, 0.00025, 0.0002, 0.00015, 0.0001, 0.00009, 0.00008, 0.00006, 0.00003, 0.00001 ] lr_all = [] for lr_search in range(10): keep = np.random.choice(list(range(650)), tot_class, replace=False) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False, path=args.dataset_path), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False, path=args.dataset_path), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("OML", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: param.learn = False else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w if args.neuromodulation: weights2reset = ["vars_26"] biases2reset = ["vars_27"] else: weights2reset = ["vars_14"] biases2reset = ["vars_15"] for n, a in maml.named_parameters(): n = n.replace(".", "_") if n in weights2reset: w = nn.Parameter(torch.ones_like(a)).to(device) torch.nn.init.kaiming_normal_(w) a.data = w if n in biases2reset: w = nn.Parameter(torch.zeros_like(a)).to(device) a.data = w filter_list = ["vars.{0}".format(v) for v in range(6)] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) for _ in range(0, args.epoch): for img, y in iterator_sorted: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_all.append(max_lr) results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(lr_search), max_acc, tot_class) temp_result.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Temp Results = %s", str(results_mem_size)) my_experiment.results["Temp Results"] = temp_result my_experiment.store_json() print("LR RESULTS = ", temp_result) from scipy import stats best_lr = float(stats.mode(lr_all)[0][0]) logger.info("BEST LR %s= ", str(best_lr)) for aoo in range(args.runs): keep = np.random.choice(list(range(650)), tot_class, replace=False) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 lr = best_lr maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("MRCL", args.dataset) maml = learner.Learner(config) maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: param.learn = False else: if args.reset: w = nn.Parameter(torch.ones_like(param)) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") if args.neuromodulation: weights2reset = ["vars_26"] biases2reset = ["vars_27"] else: weights2reset = ["vars_14"] biases2reset = ["vars_15"] for n, a in maml.named_parameters(): n = n.replace(".", "_") if n in weights2reset: w = nn.Parameter(torch.ones_like(a)).to(device) torch.nn.init.kaiming_normal_(w) a.data = w if n in biases2reset: w = nn.Parameter(torch.zeros_like(a)).to(device) a.data = w correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = ["vars.{0}".format(v) for v in range(6)] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) for _ in range(0, args.epoch): for img, y in iterator_sorted: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() print("FINAL RESULTS = ", final_results_all) writer.close()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) # final_results_all = [] total_clases = [10, 50, 75, 100, 150, 200] for tot_class in total_clases: lr_list = [0.03, 0.01, 0.003, 0.001, 0.0003, 0.0001, 0.00003, 0.00001] for aoo in range(0, 20): keep = np.random.choice(list(range(200)), tot_class) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) # sampler = ts.MNISTSampler(list(range(0, total_clases)), dataset) # print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) # for lr in [0.001, 0.0003, 0.0001, 0.00003, 0.00001]: maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: # logger.info("Freeezing name %s", str(name)) param.learn = False # logger.info(str(param.requires_grad)) else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") # logger.info("Name = %s", n) if n == "vars_14": w = nn.Parameter(torch.ones_like(a)) # logger.info("W shape = %s", str(w.shape)) torch.nn.init.kaiming_normal_(w) a.data = w if n == "vars_15": w = nn.Parameter(torch.zeros_like(a)) a.data = w correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = [ "vars.0", "vars.1", "vars.2", "vars.3", "vars.4", "vars.5" ] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() # for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) import module.replay as rep res_sampler = rep.ReservoirSampler(mem_size) for _ in range(0, args.epoch): for img, y in iterator_sorted: if mem_size > 0: res_sampler.update_buffer(zip(img, y)) res_sampler.update_observations(len(img)) img = img.to(device) y = y.to(device) img2, y2 = res_sampler.sample_buffer(16) img2 = img2.to(device) y2 = y2.to(device) img = torch.cat([img, img2], dim=0) y = torch.cat([y, y2], dim=0) else: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) # quit() final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() print("FINAL RESULTS = ", final_results_all) writer.close()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "./evals/", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") ver = 0 while os.path.exists(args.modelX + "_" + str(ver)): ver += 1 args.modelX = args.modelX + "_" + str(ver-1) + "/learner.model" logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 total_ff_vars = 2*(6 + 2 + args.num_extra_dense_layers) frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) for temp in range(args.rln_end * 2): frozen_layers.append("net.vars." + str(total_ff_vars - 1 - temp)) #logger.info("Frozen layers = %s", " ".join(frozen_layers)) # final_results_all = [] total_clases = [5] if args.twentyclass: total_clases = [20] if args.twotask: total_clases = [2, 10] if args.fiftyclass: total_clases = [50] if args.tenclass: total_clases = [10] if args.fiveclass: total_clases = [5] print('yooo', total_clases) for tot_class in total_clases: avg_perf = 0.0 print('TOT_CLASS', tot_class) lr_list = [0]#[0.03, 0.01, 0.003, 0.001, 0.0003, 0.0001, 0.00003, 0.00001] for aoo in range(0, args.runs): #print('run', aoo) keep = np.random.choice(list(range(650)), tot_class, replace=False) if args.dataset == "imagenet": keep = np.random.choice(list(range(20)), tot_class, replace=False) dataset = imgnet.MiniImagenet(args.imagenet_path, mode='test', elem_per_class=30, classes=keep, seed=aoo) dataset_test = imgnet.MiniImagenet(args.imagenet_path, mode='test', elem_per_class=30, classes=keep, test=args.test, seed=aoo) iterator = torch.utils.data.DataLoader(dataset_test, batch_size=128, shuffle=True, num_workers=1) iterator_sorted = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=False, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=False, num_workers=2) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) # sampler = ts.MNISTSampler(list(range(0, total_clases)), dataset) # #print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} #print("LEN", len(iterator_sorted)) for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: #torch.cuda.empty_cache() #print(lr) # for lr in [0.001, 0.0003, 0.0001, 0.00003, 0.00001]: maml = torch.load(args.modelX, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model(args.model_type, args.dataset) maml = learner.Learner(config, lr) # maml = MetaLearingClassification(args, config).to(device).net #maml.update_lr = lr maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): #if name.find("feedback_strength_vars") != -1: # print(name, param) if name in frozen_layers: # logger.info("Freeezing name %s", str(name)) param.learn = False # logger.info(str(param.requires_grad)) else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) #torch.nn.init.kaiming_normal_(maml.parameters()[-2]) #w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) #maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") # logger.info("Name = %s", n) if n == "vars_"+str(14+2*args.num_extra_dense_layers): pass #w = nn.Parameter(torch.ones_like(a)) # logger.info("W shape = %s", str(w.shape)) #torch.nn.init.kaiming_normal_(w) #a.data = w if n == "vars_"+str(15+2*args.num_extra_dense_layers): pass #w = nn.Parameter(torch.zeros_like(a)) #a.data = w #for fv in maml.feedback_vars: # w = nn.Parameter(torch.zeros_like(fv)) # fv.data = w #for fv in maml.feedback_strength_vars: # w = nn.Parameter(torch.ones_like(fv)) # fv.data = w correct = 0 for img, target in iterator: #print('size', target.size()) target = torch.tensor(np.array([list(keep).index(int(target.cpu().numpy()[i])) for i in range(target.size()[0])])) with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) #logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = ["vars.0", "vars.1", "vars.2", "vars.3", "vars.4", "vars.5"] #logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map(lambda x: x[1], list(filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list(filter(lambda x: x.learn, maml.parameters())) list_of_names = list(filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() # #for x in list_of_names: # logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) fast_weights = None if args.randomize_plastic_weights: maml.randomize_plastic_weights() if args.zero_plastic_weights: maml.zero_plastic_weights() res_sampler = rep.ReservoirSampler(mem_size) iterator_sorted_new = [] iter_count = 0 for img, y in iterator_sorted: y = torch.tensor(np.array([list(keep).index(int(y.cpu().numpy()[i])) for i in range(y.size()[0])])) if iter_count % 15 >= args.shots: iter_count += 1 continue iterator_sorted_new.append((img, y)) iter_count += 1 iterator_sorted = [] perm = np.random.permutation(len(iterator_sorted_new)) for i in range(len(iterator_sorted_new)): if args.iid: iterator_sorted.append(iterator_sorted_new[perm[i]]) else: iterator_sorted.append(iterator_sorted_new[i]) for iter in range(0, args.epoch): iter_count = 0 imgs = [] ys = [] for img, y in iterator_sorted: #print('iter count', iter_count) #print('y is', y) #if iter_count % 15 >= args.shots: # iter_count += 1 # continue iter_count += 1 #with torch.no_grad(): if args.memory == 0: img = img.to(device) y = y.to(device) else: res_sampler.update_buffer(zip(img, y)) res_sampler.update_observations(len(img)) img = img.to(device) y = y.to(device) img2, y2 = res_sampler.sample_buffer(8) img2 = img2.to(device) y2 = y2.to(device) img = torch.cat([img, img2], dim=0) y = torch.cat([y, y2], dim=0) #print('img size', img.size()) imgs.append(img) ys.append(y) if not args.batch_learning: logits = maml(img, vars=fast_weights) fast_weights = maml.getOjaUpdate(y, logits, fast_weights, hebbian=args.hebb) if args.batch_learning: y = torch.cat(ys, 0) img = torch.cat(imgs, 0) logits = maml(img, vars=fast_weights) fast_weights = maml.getOjaUpdate(y, logits, fast_weights, hebbian=args.hebb) #logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: target = torch.tensor(np.array([list(keep).index(int(target.cpu().numpy()[i])) for i in range(target.size()[0])])) img = img.to(device) target = target.to(device) logits_q = maml(img, vars=fast_weights, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) #logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr del maml #del maml #del fast_weights lr_list = [max_lr] #print('result', max_acc) results_mem_size[mem_size] = (max_acc, max_lr) #logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) avg_perf += max_acc / args.runs #TODO: change this if/when I ever use memory -- can't choose max memory size differently for each run! print('avg perf', avg_perf * args.runs / (1+aoo)) final_results_all.append((tot_class, results_mem_size)) #writer.add_scalar('performance', avg_perf, tot_class) #print("A= ", results_mem_size) #logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() np.save('evals/final_results_'+args.orig_name+'.npy', final_results_all) #print("FINAL RESULTS = ", final_results_all) writer.close()