def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt') if os.path.isfile(genoname): with open(genoname, "r") as f: args.arch = f.read() genotype = eval("genotypes.%s" % args.arch) else: genotype = eval("genotypes.BATH") model = Network(args.init_channels, 1, args.layers, args.auxiliary, genotype, input_channels=4) model = model.cuda() print(os.path.join(utils.get_dir(), args.model_path)) utils.load(model, os.path.join(utils.get_dir(), args.model_path)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.MSELoss() criterion = criterion.cuda() test_data_tne = utils.BathymetryDataset(args, "../29TNE.csv", root_dir="dataset/bathymetry/29TNE/dataset_29TNE", to_trim="/tmp/pbs.6233542.admin01/tmp_portugal/", to_filter=False) test_queue_tne = torch.utils.data.DataLoader( test_data_tne, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_obj, targets, preds = infer(test_queue_tne, model, criterion, args.depth_normalization) logging.info('test_obj tne %f', test_obj) test_data_tne.write_results(targets, preds, os.path.join(args.save, 'tne_results.csv')) test_data_smd = utils.BathymetryDataset(args, "../29SMD.csv", root_dir="dataset/bathymetry/29SMD/dataset_29SMD", to_trim="/tmp/pbs.6233565.admin01/tmp_portugal/", to_filter=False) test_queue_smd = torch.utils.data.DataLoader( test_data_smd, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) test_obj, targets, preds = infer(test_queue_smd, model, criterion, args.depth_normalization) logging.info('test_obj smd %f', test_obj) test_data_smd.write_results(targets, preds, os.path.join(args.save, 'smd_results.csv'))
parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') parser.add_argument('--drop_path_prob', type=float, default=0, help='drop path probability') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') args = parser.parse_args() args.save = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], "test") utils.create_exp_dir(args.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CLASSES = 1000
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.ewma) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) model.initialize_Z_and_U() loggers = {"train": {"loss": [], "acc": [], "step": []}, "val": {"loss": [], "acc": [], "step": []}, "infer": {"loss": [], "acc": [], "step": []}, "ath": {"threshold": [], "step": []}, "zuth": {"threshold": [], "step": []}, "astep": [], "zustep": []} if args.constant_alpha_threshold < 0: alpha_threshold = args.init_alpha_threshold else: alpha_threshold = args.constant_alpha_threshold zu_threshold = args.init_zu_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): valid_iter = iter(valid_queue) model.clear_U() scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(torch.clamp(model.alphas_normal, min=0.1, max=1.0)) print(torch.clamp(model.alphas_reduce, min=0.1, max=1.0)) # training train_acc, train_obj, alpha_threshold, zu_threshold, alpha_counter, ewma = train(train_queue, valid_iter, model, architect, criterion, optimizer, lr, loggers, alpha_threshold, zu_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) # model.update_history() utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reducealpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_normal_history, path=os.path.join(args.save, 'normalFI'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.FI_reduce_history, path=os.path.join(args.save, 'reduceFI'), steps=loggers["train"]["step"]) scaled_FI_normal = scale(model.FI_normal_history, model.alphas_normal_history) scaled_FI_reduce = scale(model.FI_reduce_history, model.alphas_reduce_history) utils.save_file(recoder=scaled_FI_normal, path=os.path.join(args.save, 'normalFIscaled'), steps=loggers["train"]["step"]) utils.save_file(recoder=scaled_FI_reduce, path=os.path.join(args.save, 'reduceFIscaled'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.plot_FI(model.FI_alpha_history_step, model.FI_alpha_history, args.save, "FI_alpha", loggers["zuth"], loggers['zustep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
parser.add_argument('--layers', type=int, default=8, help='total number of layers') parser.add_argument('--model_path', type=str, default='EXP/model.pt', help='path of pretrained model') parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--arch', type=str, default='BATH', help='which architecture to use') parser.add_argument('--min_energy', type=float, default=0.1, help='minimum energy') parser.add_argument('--max_energy', type=float, default=4.0, help='maximum energy') parser.add_argument('--max_depth', type=float, default=40.0, help='maximum unnormalized depth') parser.add_argument('--depth_normalization', type=float, default=0.1, help='depth normalization factor') args = parser.parse_args() args.save = os.path.join(utils.get_dir(), args.model_path[:-3]) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) torch.cuda.empty_cache() cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genotype.txt') print(genotype_path) if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genoname = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], 'genoname.txt') if os.path.isfile(genoname): with open(genoname, "r") as f: args.arch = f.read() genotype = eval("genotypes.%s" % args.arch) else: genotype = eval("genotypes.ADMM") model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() utils.load(model, os.path.join(utils.get_dir(), args.model_path)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() _, test_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) if args.task == "CIFAR100cf": _, test_transform = utils._data_transforms_cifar100(args) test_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=test_transform) test_indices = test_data.filter_by_fine(args.test_filter) test_queue = torch.utils.data.DataLoader( torch.utils.data.Subset(test_data, test_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": _, test_transform = utils._data_transforms_cifar100(args) test_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=test_transform) else: _, test_transform = utils._data_transforms_cifar10(args) test_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model.drop_path_prob = args.drop_path_prob test_acc, test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc)
parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding') parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding') parser.add_argument('--rho', type=float, default=1e-3, help='admm relative weight') parser.add_argument('--admm_freq', type=int, default=10, help='admm update frequency (if not dynamically scheduled') parser.add_argument('--init_alpha_threshold', type=float, default=1.0, help='initial alpha threshold') parser.add_argument('--init_zu_threshold', type=float, default=1.0, help='initial zu threshold') parser.add_argument('--threshold_multiplier', type=float, default=1.1, help='threshold multiplier') parser.add_argument('--threshold_divider', type=float, default=0.2, help='threshold divider') parser.add_argument('--scheduled_zu', action='store_true', default=False, help='use dynamically scheduled z,u steps') parser.add_argument('--constant_alpha_threshold', type=float, default=-1.0, help='use constant threshold (-1 to use dynamic threshold)') parser.add_argument('--ewma', type=float, default=1.0, help='weight for exp weighted moving average (1.0 for no ewma)') args = parser.parse_args() if len(args.save) == 0: args.save = os.path.join(utils.get_dir(), 'exp/admmsched-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S"))) else: args.save = os.path.join(utils.get_dir(), 'exp', args.save) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CIFAR_CLASSES = 10
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) datapath = os.path.join(utils.get_dir(), args.data) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = { "train": { "loss": [], "acc": [], "step": [] }, "val": { "loss": [], "acc": [], "step": [] }, "infer": { "loss": [], "acc": [], "step": [] }, "ath": { "threshold": [], "step": [] }, "astep": [], "zustep": [] } for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, loggers) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'Normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'Reducealpha'), steps=loggers["train"]["step"]) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.softmax(model.alphas_normal, dim=-1).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.softmax(model.alphas_reduce, dim=-1).data.cpu().numpy()) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
help='CIFAR100cf fine classes to filter per coarse class in test') parser.add_argument('--batch_size', type=int, default=96, help='batch size') parser.add_argument('--report_freq', type=float, default=50, help='report frequency') parser.add_argument('--gpu', type=int, default=0, help='gpu device id') parser.add_argument('--init_channels', type=int, default=36, help='num of init channels') parser.add_argument('--layers', type=int, default=20, help='total number of layers') parser.add_argument('--model_path', type=str, default='EXP/model.pt', help='path of pretrained model') parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') args = parser.parse_args() args.save = os.path.join(utils.get_dir(), os.path.split(args.model_path)[0], "test") utils.create_exp_dir(args.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'testlog.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.task == "CIFAR100": CIFAR_CLASSES = 100 elif args.task == "CIFAR100cf": CIFAR_CLASSES = 20 else:
default=0.1, help='minimum energy') parser.add_argument('--max_energy', type=float, default=4.0, help='maximum energy') parser.add_argument('--max_depth', type=float, default=40.0, help='maximum unnormalized depth') args = parser.parse_args() if args.genotype_path is not None: args.save = os.path.join( utils.get_dir(), args.genotype_path, 'batheval-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S"))) else: args.save = os.path.join( utils.get_dir(), 'exp/batheval-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S"))) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log_eval.txt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, 1, args.layers, args.auxiliary, genotype, input_channels=4) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.MSELoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # datapath = os.path.join(utils.get_dir(), args.data) # train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) # valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform) train_data = utils.BathymetryDataset(args, "../mixed_train.csv", to_filter=False) valid_data = utils.BathymetryDataset(args, "../mixed_validation.csv", to_filter=False) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) loggers = { "train": { "loss": [], "step": [] }, "val": { "loss": [], "step": [] } } for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs _ = train(train_queue, model, criterion, optimizer, loggers["train"]) infer_loss = infer(valid_queue, model, criterion) utils.log_loss(loggers["val"], infer_loss, None, 1) utils.plot_loss_acc(loggers, args.save) utils.save(model, os.path.join(args.save, 'weights.pt')) if (epoch + 1) % 50 == 0: utils.save( model, os.path.join(args.save, 'checkpoint' + str(epoch) + 'weights.pt'))
def main(): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu != -1: if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) else: logging.info('using cpu') if args.dyno_schedule: args.threshold_divider = np.exp(-np.log(args.threshold_multiplier) * args.schedfreq) print( args.threshold_divider, -np.log(args.threshold_multiplier) / np.log(args.threshold_divider)) if args.dyno_split: args.train_portion = 1 - 1 / (1 + args.schedfreq) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() if args.gpu != -1: criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.rho, args.crb, args.epochs, args.gpu, ewma=args.ewma, reg=args.reg) if args.gpu != -1: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * len(indices))) orig_num_train = len(indices[:split]) orig_num_valid = len(indices[split:num_train]) train_indices = train_data.filter_by_fine(args.train_filter, indices[:split]) valid_indices = train_data.filter_by_fine(args.valid_filter, indices[split:num_train]) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(train_indices, orig_num_train, reshuffle=True), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=utils.FillingSubsetRandomSampler(valid_indices, orig_num_valid, reshuffle=True), pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter elif args.task == "CIFAR100split": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) if not args.evensplit: train_indices, valid_indices = train_data.split(args.train_portion) else: num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=2) else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_indices = indices[:split] valid_indices = indices[split:num_train] train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( valid_indices), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, int(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) loggers = { "train": { "loss": [], "acc": [], "step": [] }, "val": { "loss": [], "acc": [], "step": [] }, "infer": { "loss": [], "acc": [], "step": [] }, "ath": { "threshold": [], "step": [] }, "astep": [], "zustep": [] } alpha_threshold = args.init_alpha_threshold alpha_counter = 0 ewma = -1 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) if args.ckpt_interval > 0 and epoch > 0 and ( epoch) % args.ckpt_interval == 0: logging.info('checkpointing genotype') os.mkdir(os.path.join(args.save, 'genotypes', str(epoch))) with open( os.path.join(args.save, 'genotypes', str(epoch), 'genotype.txt'), "w") as f: f.write(str(genotype)) print(model.activate(model.alphas_normal)) print(model.activate(model.alphas_reduce)) # training train_acc, train_obj, alpha_threshold, alpha_counter, ewma = train( train_queue, valid_queue, model, architect, criterion, optimizer, loggers, alpha_threshold, alpha_counter, ewma, args) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) utils.log_loss(loggers["infer"], valid_obj, valid_acc, model.clock) logging.info('valid_acc %f', valid_acc) utils.plot_loss_acc(loggers, args.save) utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'Normalalpha'), steps=loggers["train"]["step"]) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'Reducealpha'), steps=loggers["train"]["step"]) utils.plot_FI(loggers["train"]["step"], model.FI_history, args.save, "FI", loggers["ath"], loggers['astep']) utils.plot_FI(loggers["train"]["step"], model.FI_ewma_history, args.save, "FI_ewma", loggers["ath"], loggers['astep']) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() logging.info('genotype = %s', genotype) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) datapath = os.path.join(utils.get_dir(), args.data) if args.task == "CIFAR100cf": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = utils.CIFAR100C2F(root=datapath, train=True, download=True, transform=train_transform) valid_data = utils.CIFAR100C2F(root=datapath, train=False, download=True, transform=valid_transform) train_indices = train_data.filter_by_fine(args.eval_filter) valid_indices = valid_data.filter_by_fine(args.eval_filter) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_indices), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(torch.utils.data.Subset( valid_data, valid_indices), batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) # TODO: extend each epoch or multiply number of epochs by 20%*args.class_filter else: if args.task == "CIFAR100": train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=datapath, train=False, download=True, transform=valid_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=datapath, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=datapath, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) if args.test: torch.cuda.empty_cache() os.system( 'python src/test.py --batch_size 8 --auxiliary --model_path %s --task %s --test_filter %s' % (os.path.join(args.save, 'weights.pt'), args.task, args.test_filter))
default='DARTS', help='which architecture to use') parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') args = parser.parse_args() if len(args.save) == 0: args.save = 'eval-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S")) if args.genotype_path is not None: if "exp" not in args.genotype_path: args.genotype_path = os.path.join('exp', args.genotype_path) args.save = os.path.join(utils.get_dir(), args.genotype_path, args.save) else: args.genotype_path = os.path.join('exp', args.genotype_path) args.save = os.path.join(utils.get_dir(), args.save) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.task == "CIFAR100":
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype_path = os.path.join(utils.get_dir(), args.genotype_path, 'genotype.txt') if os.path.isfile(genotype_path): with open(genotype_path, "r") as f: geno_raw = f.read() genotype = eval(geno_raw) else: genotype = eval("genotypes.%s" % args.arch) f = open(os.path.join(args.save, 'genotype.txt'), "w") f.write(str(genotype)) f.close() model = Network(args.init_channels, CLASSES, args.layers, args.auxiliary, genotype) if args.parallel: model = nn.DataParallel(model).cuda() else: model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() criterion_smooth = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion_smooth = criterion_smooth.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) traindir = os.path.join(utils.get_dir(), args.data, 'train') validdir = os.path.join(utils.get_dir(), args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data = dset.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), transforms.ToTensor(), normalize, ])) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma) best_acc_top1 = 0 for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_last_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion_smooth, optimizer) logging.info('train_acc %f', train_acc) valid_acc_top1, valid_acc_top5, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc_top1 %f', valid_acc_top1) logging.info('valid_acc_top5 %f', valid_acc_top5) is_best = False if valid_acc_top1 > best_acc_top1: best_acc_top1 = valid_acc_top1 is_best = True utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc_top1': best_acc_top1, 'optimizer': optimizer.state_dict(), }, is_best, args.save) if args.test: torch.cuda.empty_cache() os.system( 'python src/test_imagenet.py --batch_size 8 --auxiliary --model_path %s ' % os.path.join(args.save, 'weights.pt'))
parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding') parser.add_argument('--rho', type=float, default=1e-3, help='admm relative weight') parser.add_argument('--admm_freq', type=int, default=10, help='admm update frequency') args = parser.parse_args() args.save = os.path.join( utils.get_dir(), 'exp/admm-{}-{}'.format(os.getenv('SLURM_JOB_ID'), time.strftime("%Y%m%d-%H%M%S"))) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('src/*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CIFAR_CLASSES = 10