def main(): torch.manual_seed(2020) # model model = Network(cfg) print('=> Load model') model.cuda() print('=> Cuda used') test_dataset = MyDataLoader(root=cfg.dataset, split="test") test_loader = DataLoader(test_dataset, batch_size=1, num_workers=1, drop_last=True,shuffle=False) if args.mode == "test": assert isfile(cfg.resume), "No checkpoint is found at '{}'".format(cfg.resume) model.load_checkpoint() test(cfg, model, test_loader, save_dir = join(TMP_DIR, "test", "sing_scale_test")) if cfg.multi_aug: multiscale_test(model, test_loader, save_dir = join(TMP_DIR, "test", "multi_scale_test")) else: train_dataset = MyDataLoader(root=cfg.dataset, split="train", transform=True) train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, num_workers=1, drop_last=True,shuffle=True) model.init_weight() if cfg.resume: model.load_checkpoint() model.train() # optimizer optim, scheduler = Optimizer(cfg)(model) # log log = Logger(join(TMP_DIR, "%s-%d-log.txt" %("sgd",cfg.lr))) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(0, cfg.max_epoch): tr_avg_loss, tr_detail_loss = train(cfg, train_loader, model, optim, scheduler, epoch, save_dir = join(TMP_DIR, "train", "epoch-%d-training-record" % epoch)) test(cfg, model, test_loader, save_dir = join(TMP_DIR, "train", "epoch-%d-testing-record-view" % epoch)) log.flush() train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(): model = Net() if torch.cuda.is_available(): model.cuda() else: pass model.apply(weights_init) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'" .format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) # 数据处理 # 直接在train里面处理 # dataParser = DataParser(batch_size) loss_function = nn.L1Loss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=settings.MILESTONES,gamma=0.2)#learning rate decay scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr))) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") # 暂时空着 tr_avg_loss, tr_detail_loss = train(model = model,optimizer = optimizer,epoch= epoch,save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch)) test() log.flush() # Save checkpoint save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}) scheduler.step() # 自动调整学习率 train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(): args.cuda = True model = NRCNN(4, 64) model.cuda() if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) net = model log = Logger('test_result.txt') sys.stdout = log test(model) log.flush()
def main(): # model model = Extened_NRCNN(args.res_block, 64) model.cuda() #model.apply(weights_init) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) #tune lr optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) # log if not isdir(args.save_path): os.makedirs(args.save_path) log = Logger(join(args.save_path, '%s-%d-log.txt' % ('sgd', args.lr))) sys.stdout = log for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") train(trainloader, model, optimizer, epoch, save_dir=join(args.save_path, 'epoch-%d-training-record' % epoch)) log.flush() # write log scheduler.step() # will adjust learning rate writer.close()
def main(): args.cuda = True # dataset train_dataset = BSDSLoader(root=args.dataset, split="train") test_dataset = BSDSLoader(root=args.dataset, split="test") train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=False) with open('data/HED-BSDS/test.lst', 'r') as f: test_list = f.readlines() test_list = [split(i.rstrip())[1] for i in test_list] assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader)) # model model = HED() model.cuda() model.apply(weights_init) load_vgg16pretrain(model) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) #tune lr net_parameters_id = {} net = model for pname, p in net.named_parameters(): if pname in [ 'conv1_1.weight', 'conv1_2.weight', 'conv2_1.weight', 'conv2_2.weight', 'conv3_1.weight', 'conv3_2.weight', 'conv3_3.weight', 'conv4_1.weight', 'conv4_2.weight', 'conv4_3.weight' ]: #print(pname, 'lr:1 de:1') if 'conv1-4.weight' not in net_parameters_id: net_parameters_id['conv1-4.weight'] = [] net_parameters_id['conv1-4.weight'].append(p) elif pname in [ 'conv1_1.bias', 'conv1_2.bias', 'conv2_1.bias', 'conv2_2.bias', 'conv3_1.bias', 'conv3_2.bias', 'conv3_3.bias', 'conv4_1.bias', 'conv4_2.bias', 'conv4_3.bias' ]: #print(pname, 'lr:2 de:0') if 'conv1-4.bias' not in net_parameters_id: net_parameters_id['conv1-4.bias'] = [] net_parameters_id['conv1-4.bias'].append(p) elif pname in ['conv5_1.weight', 'conv5_2.weight', 'conv5_3.weight']: #print(pname, 'lr:100 de:1') if 'conv5.weight' not in net_parameters_id: net_parameters_id['conv5.weight'] = [] net_parameters_id['conv5.weight'].append(p) elif pname in ['conv5_1.bias', 'conv5_2.bias', 'conv5_3.bias']: #print(pname, 'lr:200 de:0') if 'conv5.bias' not in net_parameters_id: net_parameters_id['conv5.bias'] = [] net_parameters_id['conv5.bias'].append(p) elif pname in [ 'score_dsn1.weight', 'score_dsn2.weight', 'score_dsn3.weight', 'score_dsn4.weight', 'score_dsn5.weight' ]: #print(pname, 'lr:0.01 de:1') if 'score_dsn_1-5.weight' not in net_parameters_id: net_parameters_id['score_dsn_1-5.weight'] = [] net_parameters_id['score_dsn_1-5.weight'].append(p) elif pname in [ 'score_dsn1.bias', 'score_dsn2.bias', 'score_dsn3.bias', 'score_dsn4.bias', 'score_dsn5.bias' ]: #print(pname, 'lr:0.02 de:0') if 'score_dsn_1-5.bias' not in net_parameters_id: net_parameters_id['score_dsn_1-5.bias'] = [] net_parameters_id['score_dsn_1-5.bias'].append(p) elif pname in ['score_final.weight']: #print(pname, 'lr:0.001 de:1') if 'score_final.weight' not in net_parameters_id: net_parameters_id['score_final.weight'] = [] net_parameters_id['score_final.weight'].append(p) elif pname in ['score_final.bias']: #print(pname, 'lr:0.002 de:0') if 'score_final.bias' not in net_parameters_id: net_parameters_id['score_final.bias'] = [] net_parameters_id['score_final.bias'].append(p) optimizer = torch.optim.SGD([ { 'params': net_parameters_id['conv1-4.weight'], 'lr': args.lr * 1, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv1-4.bias'], 'lr': args.lr * 2, 'weight_decay': 0. }, { 'params': net_parameters_id['conv5.weight'], 'lr': args.lr * 100, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv5.bias'], 'lr': args.lr * 200, 'weight_decay': 0. }, { 'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr * 0.01, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_dsn_1-5.bias'], 'lr': args.lr * 0.02, 'weight_decay': 0. }, { 'params': net_parameters_id['score_final.weight'], 'lr': args.lr * 0.001, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_final.bias'], 'lr': args.lr * 0.002, 'weight_decay': 0. }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) # log log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr))) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): #if epoch == 0: # print("Performing initial testing...") # test(model, test_loader, epoch=epoch, test_list=test_list, # save_dir = join(TMP_DIR, 'initial-testing-record')) tr_avg_loss, tr_detail_loss = train( train_loader, model, optimizer, epoch, save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch)) test(model, test_loader, epoch=epoch, test_list=test_list, save_dir=join(TMP_DIR, 'epoch-%d-testing-record' % epoch)) log.flush() # write log # Save checkpoint save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_file) scheduler.step() # will adjust learning rate # save train/val loss/accuracy, save every epoch in case of early stop train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(): parser = argparse.ArgumentParser() # Settings parser.add_argument('-d', '--dataset', choices=dataset_attributes.keys(), required=True) parser.add_argument('-s', '--shift_type', choices=shift_types, required=True) # Confounders parser.add_argument('-t', '--target_name') parser.add_argument('-c', '--confounder_names', nargs='+') # Resume? parser.add_argument('--resume', default=False, action='store_true') # Label shifts parser.add_argument('--minority_fraction', type=float) parser.add_argument('--imbalance_ratio', type=float) # Data parser.add_argument('--fraction', type=float, default=1.0) parser.add_argument('--root_dir', default=None) parser.add_argument('--subsample_to_minority', action='store_true', default=False) parser.add_argument('--reweight_groups', action='store_true', default=False) parser.add_argument('--augment_data', action='store_true', default=False) parser.add_argument('--val_fraction', type=float, default=0.1) # Objective parser.add_argument('--robust', default=False, action='store_true') parser.add_argument('--alpha', type=float, default=0.2) parser.add_argument('--generalization_adjustment', default="0.0") parser.add_argument('--automatic_adjustment', default=False, action='store_true') parser.add_argument('--robust_step_size', default=0.01, type=float) parser.add_argument('--use_normalized_loss', default=False, action='store_true') parser.add_argument('--btl', default=False, action='store_true') parser.add_argument('--hinge', default=False, action='store_true') # Model parser.add_argument('--model', choices=model_attributes.keys(), default='resnet50') parser.add_argument('--train_from_scratch', action='store_true', default=False) parser.add_argument('--resnet_width', type=int, default=None) # Optimization parser.add_argument('--n_epochs', type=int, default=4) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--scheduler', action='store_true', default=False) parser.add_argument('--weight_decay', type=float, default=5e-5) parser.add_argument('--gamma', type=float, default=0.1) parser.add_argument('--minimum_variational_weight', type=float, default=0) # Misc parser.add_argument('--seed', type=int, default=0) parser.add_argument('--show_progress', default=False, action='store_true') parser.add_argument('--log_dir', default='./logs') parser.add_argument('--log_every', default=50, type=int) parser.add_argument('--save_step', type=int, default=10) parser.add_argument('--save_best', action='store_true', default=False) parser.add_argument('--save_last', action='store_true', default=False) parser.add_argument('--model_test', type=str) parser.add_argument('--gpu', type=str) args = parser.parse_args() check_args(args) model_test = args.model_test gpu = args.gpu os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu # BERT-specific configs copied over from run_glue.py if args.model == 'bert': args.max_grad_norm = 1.0 args.adam_epsilon = 1e-8 args.warmup_steps = 0 if os.path.exists(args.log_dir) and args.resume: resume = True mode = 'a' else: resume = False mode = 'w' ## Initialize logs if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) logger = Logger(os.path.join(args.log_dir, model_test + '_log.txt'), mode) # Record args log_args(args, logger) set_seed(args.seed) # Data # Test data for label_shift_step is not implemented yet test_data = None test_loader = None if args.shift_type == 'confounder': train_data, val_data, test_data = prepare_data(args, train=True) elif args.shift_type == 'label_shift_step': train_data, val_data = prepare_data(args, train=True) loader_kwargs = { 'batch_size': args.batch_size, 'num_workers': 12, 'pin_memory': True } train_loader = train_data.get_loader(train=True, reweight_groups=args.reweight_groups, **loader_kwargs) val_loader = val_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) if test_data is not None: test_loader = test_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) data = {} data['train_loader'] = train_loader data['val_loader'] = val_loader data['test_loader'] = test_loader data['train_data'] = train_data data['val_data'] = val_data data['test_data'] = test_data n_classes = train_data.n_classes log_data(data, logger) ## Initialize model pretrained = not args.train_from_scratch if resume: model = torch.load(os.path.join(args.log_dir, model_test)) d = train_data.input_size()[0] elif model_attributes[args.model]['feature_type'] in ('precomputed', 'raw_flattened'): assert pretrained # Load precomputed features d = train_data.input_size()[0] model = nn.Linear(d, n_classes) model.has_aux_logits = False elif args.model == 'resnet50': model = torchvision.models.resnet50(pretrained=pretrained) d = model.fc.in_features model.fc = nn.Linear(d, n_classes) elif args.model == 'resnet34': model = torchvision.models.resnet34(pretrained=pretrained) d = model.fc.in_features model.fc = nn.Linear(d, n_classes) elif args.model == 'wideresnet50': model = torchvision.models.wide_resnet50_2(pretrained=pretrained) d = model.fc.in_features model.fc = nn.Linear(d, n_classes) elif args.model == 'resnet50vw': assert not pretrained assert args.resnet_width is not None model = resnet50vw(args.resnet_width, num_classes=n_classes) elif args.model == 'resnet18vw': assert not pretrained assert args.resnet_width is not None model = resnet18vw(args.resnet_width, num_classes=n_classes) elif args.model == 'resnet10vw': assert not pretrained assert args.resnet_width is not None model = resnet10vw(args.resnet_width, num_classes=n_classes) elif args.model == 'bert': assert args.dataset == 'MultiNLI' from pytorch_transformers import BertConfig, BertForSequenceClassification config_class = BertConfig model_class = BertForSequenceClassification config = config_class.from_pretrained('bert-base-uncased', num_labels=3, finetuning_task='mnli') model = model_class.from_pretrained('bert-base-uncased', from_tf=False, config=config) else: raise ValueError('Model not recognized.') logger.flush() ## Define the objective if args.hinge: assert args.dataset in ['CelebA', 'CUB'] # Only supports binary def hinge_loss(yhat, y): # The torch loss takes in three arguments so we need to split yhat # It also expects classes in {+1.0, -1.0} whereas by default we give them in {0, 1} # Furthermore, if y = 1 it expects the first input to be higher instead of the second, # so we need to swap yhat[:, 0] and yhat[:, 1]... torch_loss = torch.nn.MarginRankingLoss(margin=1.0, reduction='none') y = (y.float() * 2.0) - 1.0 return torch_loss(yhat[:, 1], yhat[:, 0], y) criterion = hinge_loss else: criterion = torch.nn.CrossEntropyLoss(reduction='none') if False: df = pd.read_csv(os.path.join(args.log_dir, 'test.csv')) epoch_offset = df.loc[len(df) - 1, 'epoch'] + 1 logger.write(f'starting from epoch {epoch_offset}') else: epoch_offset = 0 train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'), train_data.n_groups, mode=mode) val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'), train_data.n_groups, mode=mode) test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'), train_data.n_groups, mode=mode) train(model, criterion, data, logger, train_csv_logger, val_csv_logger, test_csv_logger, args, epoch_offset=epoch_offset) train_csv_logger.close() val_csv_logger.close() test_csv_logger.close()
def main(): ################################################ # I. Miscellaneous. ################################################ # Create the output directory. current_dir = abspath(dirname(__file__)) output_dir = join(current_dir, args.output) if not isdir(output_dir): os.makedirs(output_dir) # Set logger. now_str = datetime.now().strftime('%y%m%d-%H%M%S') log = Logger(join(output_dir, 'log-{}.txt'.format(now_str))) sys.stdout = log # Overwrite the standard output. ################################################ # II. Datasets. ################################################ # Datasets and dataloaders. train_dataset = BsdsDataset(dataset_dir=args.dataset, split='train') test_dataset = BsdsDataset(dataset_dir=args.dataset, split='test') train_loader = DataLoader(train_dataset, batch_size=args.train_batch_size, num_workers=4, drop_last=True, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, num_workers=4, drop_last=False, shuffle=False) ################################################ # III. Network and optimizer. ################################################ # Create the network in GPU. net = nn.DataParallel(HED(device)) net.to(device) # Initialize the weights for HED model. def weights_init(m): """ Weight initialization function. """ if isinstance(m, nn.Conv2d): # Initialize: m.weight. if m.weight.data.shape == torch.Size([1, 5, 1, 1]): # Constant initialization for fusion layer in HED network. torch.nn.init.constant_(m.weight, 0.2) else: # Zero initialization following official repository. # Reference: hed/docs/tutorial/layers.md m.weight.data.zero_() # Initialize: m.bias. if m.bias is not None: # Zero initialization. m.bias.data.zero_() net.apply(weights_init) # Optimizer settings. net_parameters_id = defaultdict(list) for name, param in net.named_parameters(): if name in [ 'module.conv1_1.weight', 'module.conv1_2.weight', 'module.conv2_1.weight', 'module.conv2_2.weight', 'module.conv3_1.weight', 'module.conv3_2.weight', 'module.conv3_3.weight', 'module.conv4_1.weight', 'module.conv4_2.weight', 'module.conv4_3.weight' ]: print('{:26} lr: 1 decay:1'.format(name)) net_parameters_id['conv1-4.weight'].append(param) elif name in [ 'module.conv1_1.bias', 'module.conv1_2.bias', 'module.conv2_1.bias', 'module.conv2_2.bias', 'module.conv3_1.bias', 'module.conv3_2.bias', 'module.conv3_3.bias', 'module.conv4_1.bias', 'module.conv4_2.bias', 'module.conv4_3.bias' ]: print('{:26} lr: 2 decay:0'.format(name)) net_parameters_id['conv1-4.bias'].append(param) elif name in [ 'module.conv5_1.weight', 'module.conv5_2.weight', 'module.conv5_3.weight' ]: print('{:26} lr: 100 decay:1'.format(name)) net_parameters_id['conv5.weight'].append(param) elif name in [ 'module.conv5_1.bias', 'module.conv5_2.bias', 'module.conv5_3.bias' ]: print('{:26} lr: 200 decay:0'.format(name)) net_parameters_id['conv5.bias'].append(param) elif name in [ 'module.score_dsn1.weight', 'module.score_dsn2.weight', 'module.score_dsn3.weight', 'module.score_dsn4.weight', 'module.score_dsn5.weight' ]: print('{:26} lr: 0.01 decay:1'.format(name)) net_parameters_id['score_dsn_1-5.weight'].append(param) elif name in [ 'module.score_dsn1.bias', 'module.score_dsn2.bias', 'module.score_dsn3.bias', 'module.score_dsn4.bias', 'module.score_dsn5.bias' ]: print('{:26} lr: 0.02 decay:0'.format(name)) net_parameters_id['score_dsn_1-5.bias'].append(param) elif name in ['module.score_final.weight']: print('{:26} lr:0.001 decay:1'.format(name)) net_parameters_id['score_final.weight'].append(param) elif name in ['module.score_final.bias']: print('{:26} lr:0.002 decay:0'.format(name)) net_parameters_id['score_final.bias'].append(param) # Create optimizer. opt = torch.optim.SGD([ { 'params': net_parameters_id['conv1-4.weight'], 'lr': args.lr * 1, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv1-4.bias'], 'lr': args.lr * 2, 'weight_decay': 0. }, { 'params': net_parameters_id['conv5.weight'], 'lr': args.lr * 100, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv5.bias'], 'lr': args.lr * 200, 'weight_decay': 0. }, { 'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr * 0.01, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_dsn_1-5.bias'], 'lr': args.lr * 0.02, 'weight_decay': 0. }, { 'params': net_parameters_id['score_final.weight'], 'lr': args.lr * 0.001, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_final.bias'], 'lr': args.lr * 0.002, 'weight_decay': 0. }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Note: In train_val.prototxt and deploy.prototxt, the learning rates of score_final.weight/bias are different. # Learning rate scheduler. lr_schd = lr_scheduler.StepLR(opt, step_size=args.lr_stepsize, gamma=args.lr_gamma) ################################################ # IV. Pre-trained parameters. ################################################ # Load parameters from pre-trained VGG-16 Caffe model. if args.vgg16_caffe: load_vgg16_caffe(net, args.vgg16_caffe) # Resume the checkpoint. if args.checkpoint: load_checkpoint(net, opt, args.checkpoint) # Omit the returned values. # Resume the HED Caffe model. if args.caffe_model: load_pretrained_caffe(net, args.caffe_model) ################################################ # V. Training / testing. ################################################ if args.test is True: # Only test. test(test_loader, net, save_dir=join(output_dir, 'test')) else: # Train. train_epoch_losses = [] for epoch in range(args.max_epoch): # Initial test. if epoch == 0: print('Initial test...') test(test_loader, net, save_dir=join(output_dir, 'initial-test')) # Epoch training and test. train_epoch_loss = \ train(train_loader, net, opt, lr_schd, epoch, save_dir=join(output_dir, 'epoch-{}-train'.format(epoch))) test(test_loader, net, save_dir=join(output_dir, 'epoch-{}-test'.format(epoch))) # Write log. log.flush() # Save checkpoint. save_checkpoint(state={ 'net': net.state_dict(), 'opt': opt.state_dict(), 'epoch': epoch }, path=os.path.join( output_dir, 'epoch-{}-checkpoint.pt'.format(epoch))) # Collect losses. train_epoch_losses.append(train_epoch_loss)
def runTrain(sess,d,rnn,msg): sess.run(tf.global_variables_initializer()) experiment='{}_{}_{}'.format(rnn.name,datanum,Time.now()) model_path="model/{}".format(experiment) log_path="SAVE_Logs/{}.txt".format(experiment) stat_path="SAVE_Logs/{}.stat".format(experiment) logger=Logger(log_path) stat={"tests":0} stat_lowAbs={"dist":100} total_number_of_batch=0 for number in trainRange: total_number_of_batch+=d[number].numberBatch total_number_of_batch_test=0 for number in testRange: total_number_of_batch_test+=d[number].numberBatch num_epoch=100 totalTime=Time() for curr_epoch in range(0,num_epoch): cost_sum=0 test_cost_sum=0 trainTime=Time() for number in trainRange: for index in range(d[number].numberBatch): cost,_=rnn.Train(d[number]._MFCC[index],d[number]._LABEL[index],0.8) cost_sum+=cost avg_cost=cost_sum/total_number_of_batch acc1=0.0 acc0=0.0 for number in trainRange: for index in range(d[number].numberBatch): ac1,ac0=rnn.Accuracy(d[number]._MFCC[index],d[number]._LABEL[index]) acc1+=ac1 acc0+=ac0 avg_train_accuracy= (acc1/total_number_of_batch+acc0/total_number_of_batch)/2 acc1=0.0 acc0=0.0 test_cost_sum=0 resultMatrix=np.zeros([2,2],int) for number in testRange: for index in range(d[number].numberBatch): ac1,ac0=rnn.Accuracy(d[number]._MFCC[index],d[number]._LABEL[index]) test_cost_sum+=rnn.Cost(d[number]._MFCC[index],d[number]._LABEL[index]) resultMatrix+=rnn.return_ResultMatrix(d[number]._MFCC[index],d[number]._LABEL[index]) acc1+=ac1 acc0+=ac0 avg_test_accuracy= (acc1/total_number_of_batch_test+acc0/total_number_of_batch_test)/2 test_distance=np.abs(acc1/total_number_of_batch_test-acc0/total_number_of_batch_test) avg_test_cost=test_cost_sum/total_number_of_batch_test if(avg_test_accuracy>stat["tests"]): stat['tests']=avg_test_accuracy stat['trains']=avg_train_accuracy stat['epoch']=curr_epoch stat['cost']=avg_cost stat['traincost']=avg_test_cost stat['resultMatrix']=resultMatrix stat['dist']=test_distance rnn.Save(model_path) if(test_distance<stat_lowAbs['dist']): stat_lowAbs['tests']=avg_test_accuracy stat_lowAbs['trains']=avg_train_accuracy stat_lowAbs['epoch']=curr_epoch stat_lowAbs['cost']=avg_cost stat_lowAbs['traincost']=avg_test_cost stat_lowAbs['resultMatrix']=resultMatrix stat_lowAbs['dist']=test_distance rnn.Save(model_path+'lowdist') log="Epoch {}/{}, l_rate:{:.10f}, cost = {:>7.4f},train cost={:>7.4f}, accracy(train,test/best):({:.4f}, {:.4f}/{:.4f}), test_distance ={:.4f} ,time = {}/{}\n".format( curr_epoch, num_epoch, rnn.learning_rate,avg_cost,avg_test_cost, avg_train_accuracy,avg_test_accuracy,stat['tests'],test_distance ,trainTime.duration(), totalTime.duration()) logger.write(log) summary =""" {}.{}.{} learning_rate : {} train_data_ratio : {} num_epoch : {} batch_size : {} windowsize : {} windowshift : {} Best evaulation based on test_data : Accuracy_train : {} Accuracy_test : {} at epoch :{} Best evaulation based on test_data at lowest distance : Accuracy_train : {} Accuracy_test : {} at epoch :{} \n best Result Matrix : \n{}{}\n best Reuslt Matrix at lowest distance : \n{}{}\n """.format( rnn.name,experiment,msg, rnn.learning_rate, train_rate, num_epoch,a.batch_size,a.windowsize,a.windowstep, stat["trains"],stat["tests"],stat['epoch'],stat_lowAbs['trains'],stat_lowAbs['tests'],stat_lowAbs['epoch'], stat['resultMatrix'],matrixAccuracy(stat['resultMatrix']),stat_lowAbs['resultMatrix'],matrixAccuracy(stat_lowAbs['resultMatrix'])) print(summary) logger.flush() logger.close() plot_static(log_path) with open("SAVE_Logs/log.txt","a") as f: f.write(summary)
def main(): args.cuda = True # dataset train_dataset = BSDS_RCFLoader(root=args.dataset, split="train") test_dataset = BSDS_RCFLoader(root=args.dataset + "/HED-BSDS", split="test") train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=False) # model model = RCF() model.cuda() model.apply(weights_init) load_vgg16pretrain(model) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: raise Exception() else: raise Exception() #tune lr net_parameters_id = {} net = model for pname, p in net.named_parameters(): if pname in [ 'conv1_1.weight', 'conv1_2.weight', 'conv2_1.weight', 'conv2_2.weight', 'conv3_1.weight', 'conv3_2.weight', 'conv3_3.weight', 'conv4_1.weight', 'conv4_2.weight', 'conv4_3.weight' ]: print(pname, 'lr:1 de:1') if 'conv1-4.weight' not in net_parameters_id: net_parameters_id['conv1-4.weight'] = [] net_parameters_id['conv1-4.weight'].append(p) elif pname in [ 'conv1_1.bias', 'conv1_2.bias', 'conv2_1.bias', 'conv2_2.bias', 'conv3_1.bias', 'conv3_2.bias', 'conv3_3.bias', 'conv4_1.bias', 'conv4_2.bias', 'conv4_3.bias' ]: print(pname, 'lr:2 de:0') if 'conv1-4.bias' not in net_parameters_id: net_parameters_id['conv1-4.bias'] = [] net_parameters_id['conv1-4.bias'].append(p) elif pname in ['conv5_1.weight', 'conv5_2.weight', 'conv5_3.weight']: print(pname, 'lr:100 de:1') if 'conv5.weight' not in net_parameters_id: net_parameters_id['conv5.weight'] = [] net_parameters_id['conv5.weight'].append(p) elif pname in ['conv5_1.bias', 'conv5_2.bias', 'conv5_3.bias']: print(pname, 'lr:200 de:0') if 'conv5.bias' not in net_parameters_id: net_parameters_id['conv5.bias'] = [] net_parameters_id['conv5.bias'].append(p) elif pname in [ 'conv1_1_down.weight', 'conv1_2_down.weight', 'conv2_1_down.weight', 'conv2_2_down.weight', 'conv3_1_down.weight', 'conv3_2_down.weight', 'conv3_3_down.weight', 'conv4_1_down.weight', 'conv4_2_down.weight', 'conv4_3_down.weight', 'conv5_1_down.weight', 'conv5_2_down.weight', 'conv5_3_down.weight' ]: print(pname, 'lr:0.1 de:1') if 'conv_down_1-5.weight' not in net_parameters_id: net_parameters_id['conv_down_1-5.weight'] = [] net_parameters_id['conv_down_1-5.weight'].append(p) elif pname in [ 'conv1_1_down.bias', 'conv1_2_down.bias', 'conv2_1_down.bias', 'conv2_2_down.bias', 'conv3_1_down.bias', 'conv3_2_down.bias', 'conv3_3_down.bias', 'conv4_1_down.bias', 'conv4_2_down.bias', 'conv4_3_down.bias', 'conv5_1_down.bias', 'conv5_2_down.bias', 'conv5_3_down.bias' ]: print(pname, 'lr:0.2 de:0') if 'conv_down_1-5.bias' not in net_parameters_id: net_parameters_id['conv_down_1-5.bias'] = [] net_parameters_id['conv_down_1-5.bias'].append(p) elif pname in [ 'score_dsn1.weight', 'score_dsn2.weight', 'score_dsn3.weight', 'score_dsn4.weight', 'score_dsn5.weight' ]: print(pname, 'lr:0.01 de:1') if 'score_dsn_1-5.weight' not in net_parameters_id: net_parameters_id['score_dsn_1-5.weight'] = [] net_parameters_id['score_dsn_1-5.weight'].append(p) elif pname in [ 'score_dsn1.bias', 'score_dsn2.bias', 'score_dsn3.bias', 'score_dsn4.bias', 'score_dsn5.bias' ]: print(pname, 'lr:0.02 de:0') if 'score_dsn_1-5.bias' not in net_parameters_id: net_parameters_id['score_dsn_1-5.bias'] = [] net_parameters_id['score_dsn_1-5.bias'].append(p) elif pname in ['score_final.weight']: print(pname, 'lr:0.001 de:1') if 'score_final.weight' not in net_parameters_id: net_parameters_id['score_final.weight'] = [] net_parameters_id['score_final.weight'].append(p) elif pname in ['score_final.bias']: print(pname, 'lr:0.002 de:0') if 'score_final.bias' not in net_parameters_id: net_parameters_id['score_final.bias'] = [] net_parameters_id['score_final.bias'].append(p) optimizer = torch.optim.SGD([ { 'params': net_parameters_id['conv1-4.weight'], 'lr': args.lr * 1, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv1-4.bias'], 'lr': args.lr * 2, 'weight_decay': 0. }, { 'params': net_parameters_id['conv5.weight'], 'lr': args.lr * 100, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv5.bias'], 'lr': args.lr * 200, 'weight_decay': 0. }, { 'params': net_parameters_id['conv_down_1-5.weight'], 'lr': args.lr * 0.1, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['conv_down_1-5.bias'], 'lr': args.lr * 0.2, 'weight_decay': 0. }, { 'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr * 0.01, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_dsn_1-5.bias'], 'lr': args.lr * 0.02, 'weight_decay': 0. }, { 'params': net_parameters_id['score_final.weight'], 'lr': args.lr * 0.001, 'weight_decay': args.weight_decay }, { 'params': net_parameters_id['score_final.bias'], 'lr': args.lr * 0.002, 'weight_decay': 0. }, ], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) # log log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('sgd', args.lr))) sys.stdout = log for epoch in range(args.start_epoch, args.maxepoch): tr_avg_loss, tr_detail_loss = train( train_loader, model, optimizer, epoch, save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch)) # with torch.no_grad(): # # test(model, test_loader, epoch=epoch, # # save_dir = join(TMP_DIR, 'epoch-%d-testing-record-view' % epoch)) # # multiscale_test(model, test_loader, epoch=epoch, # # save_dir = join(TMP_DIR, 'epoch-%d-testing-record' % epoch)) log.flush() # write log # Save checkpoint save_file = os.path.join(TMP_DIR, 'checkpoint.pth') save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_file) scheduler.step() # will adjust learning rate
def main(): parser = argparse.ArgumentParser() # Settings parser.add_argument('-d', '--dataset', choices=dataset_attributes.keys(), required=True) parser.add_argument('-s', '--shift_type', choices=shift_types, required=True) # Confounders parser.add_argument('-t', '--target_name') parser.add_argument('-c', '--confounder_names', nargs='+') # Resume? parser.add_argument('--resume', default=False, action='store_true') # Label shifts parser.add_argument('--minority_fraction', type=float) parser.add_argument('--imbalance_ratio', type=float) # Data parser.add_argument('--fraction', type=float, default=1.0) parser.add_argument('--root_dir', default=None) parser.add_argument('--subsample_to_minority', action='store_true', default=False) parser.add_argument('--reweight_groups', action='store_true', default=False) parser.add_argument('--augment_data', action='store_true', default=False) parser.add_argument('--val_fraction', type=float, default=0.1) # Objective parser.add_argument('--robust', default=False, action='store_true') parser.add_argument('--alpha', type=float, default=0.2) parser.add_argument('--generalization_adjustment', default="0.0") parser.add_argument('--automatic_adjustment', default=False, action='store_true') parser.add_argument('--robust_step_size', default=0.01, type=float) parser.add_argument('--use_normalized_loss', default=False, action='store_true') parser.add_argument('--btl', default=False, action='store_true') parser.add_argument('--hinge', default=False, action='store_true') # Model parser.add_argument('--model', choices=model_attributes.keys(), default='resnet50') parser.add_argument('--train_from_scratch', action='store_true', default=False) parser.add_argument('--resnet_width', type=int, default=None) # Optimization parser.add_argument('--n_epochs', type=int, default=4) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--scheduler', action='store_true', default=False) parser.add_argument('--weight_decay', type=float, default=5e-5) parser.add_argument('--gamma', type=float, default=0.1) parser.add_argument('--minimum_variational_weight', type=float, default=0) # Misc parser.add_argument('--seed', type=int, default=0) parser.add_argument('--show_progress', default=False, action='store_true') parser.add_argument('--log_dir', default='./logs') parser.add_argument('--log_every', default=50, type=int) parser.add_argument('--save_step', type=int, default=10) parser.add_argument('--save_best', action='store_true', default=False) parser.add_argument('--save_last', action='store_true', default=True) parser.add_argument('--student_width', type=int) parser.add_argument('--teacher_dir', type=str) parser.add_argument('--teacher_width', type=int) parser.add_argument('--gpu', type=str) parser.add_argument('--temp', type=str) args = parser.parse_args() gpu = args.gpu temp = args.temp check_args(args) teacher_dir = args.teacher_dir student_width = args.student_width teacher_width = args.teacher_width os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu def DistillationLoss(temperature): cross_entropy = torch.nn.CrossEntropyLoss() def loss(student_logits, teacher_logits, target): last_dim = len(student_logits.shape) - 1 p_t = nn.functional.softmax(teacher_logits / temperature, dim=last_dim) log_p_s = nn.functional.log_softmax(student_logits / temperature, dim=last_dim) return cross_entropy(student_logits, target) - (p_t * log_p_s).sum( dim=last_dim).mean() * temperature**2 return loss # BERT-specific configs copied over from run_glue.py if args.model == 'bert': args.max_grad_norm = 1.0 args.adam_epsilon = 1e-8 args.warmup_steps = 0 if os.path.exists(args.log_dir) and args.resume: resume = True mode = 'a' else: resume = False mode = 'w' ## Initialize logs if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) logger = Logger(os.path.join(args.log_dir, 'log.txt'), mode) # Record args log_args(args, logger) set_seed(args.seed) print("starting prep") # Data # Test data for label_shift_step is not implemented yet test_data = None test_loader = None if args.shift_type == 'confounder': train_data, val_data, test_data = prepare_data(args, train=True) elif args.shift_type == 'label_shift_step': train_data, val_data = prepare_data(args, train=True) print("done prep") loader_kwargs = { 'batch_size': args.batch_size, 'num_workers': 16, 'pin_memory': True } train_loader = train_data.get_loader(train=True, reweight_groups=args.reweight_groups, **loader_kwargs) val_loader = val_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) if test_data is not None: test_loader = test_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) data = {} data['train_loader'] = train_loader data['val_loader'] = val_loader data['test_loader'] = test_loader data['train_data'] = train_data data['val_data'] = val_data data['test_data'] = test_data n_classes = train_data.n_classes log_data(data, logger) logger.flush() ## Define the objective if args.hinge: assert args.dataset in ['CelebA', 'CUB'] # Only supports binary def hinge_loss(yhat, y): # The torch loss takes in three arguments so we need to split yhat # It also expects classes in {+1.0, -1.0} whereas by default we give them in {0, 1} # Furthermore, if y = 1 it expects the first input to be higher instead of the second, # so we need to swap yhat[:, 0] and yhat[:, 1]... torch_loss = torch.nn.MarginRankingLoss(margin=1.0, reduction='none') y = (y.float() * 2.0) - 1.0 return torch_loss(yhat[:, 1], yhat[:, 0], y) criterion = hinge_loss else: criterion = torch.nn.CrossEntropyLoss(reduction='none') if resume: df = pd.read_csv(os.path.join(args.log_dir, 'test.csv')) epoch_offset = df.loc[len(df) - 1, 'epoch'] + 1 logger.write(f'starting from epoch {epoch_offset}') else: epoch_offset = 0 train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'), train_data.n_groups, mode=mode) val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'), train_data.n_groups, mode=mode) test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'), train_data.n_groups, mode=mode) strain_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'strain.csv'), train_data.n_groups, mode=mode) sval_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'sval.csv'), train_data.n_groups, mode=mode) stest_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'stest.csv'), train_data.n_groups, mode=mode) teacher = resnet10vw(teacher_width, num_classes=n_classes) teacher_old = torch.load(teacher_dir + "/10_model.pth") for k, m in teacher_old.named_modules(): m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability teacher.load_state_dict(teacher_old.state_dict()) teacher = teacher.to('cuda') # def DistillationLoss(temperature): # cross_entropy = torch.nn.CrossEntropyLoss() # # def loss(student_logits, teacher_logits, target): # last_dim = len(student_logits.shape) - 1 # # p_t = nn.functional.softmax(teacher_logits/temperature, dim=last_dim) # log_p_s = nn.functional.log_softmax(student_logits/temperature, dim=last_dim) # # return cross_entropy(student_logits, target) - (p_t * log_p_s).sum(dim=last_dim).mean() # # return loss distill_criterion = DistillationLoss(float(temp)) student = resnet10vw(int(student_width), num_classes=n_classes).to('cuda') #student.to(device) train(teacher, student, criterion, distill_criterion, data, logger, train_csv_logger, val_csv_logger, test_csv_logger, strain_csv_logger, sval_csv_logger, test_csv_logger, args, epoch_offset=epoch_offset) train_csv_logger.close() val_csv_logger.close() test_csv_logger.close() strain_csv_logger.close() sval_csv_logger.close() stest_csv_logger.close()
def main(): args.cuda = True # dataset train_dataset = BSDSLoader(root=args.dataset, dataSplit="train") test_dataset = BSDSLoader(root=args.dataset, dataSplit="test") train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=8, drop_last=True, shuffle=False) with open(join(args.dataset, 'test.lst'), 'r') as f: test_list = f.readlines() test_list = [split(i.rstrip())[1] for i in test_list] assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader)) # model model = HED() model.apply(weights_init) pretrained_dict = torch.load(args.model_path) pretrained_dict = convert_vgg(pretrained_dict) model_dict = model.state_dict() model_dict.update(pretrained_dict) model.load_state_dict(model_dict) model.cuda() if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'".format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) #tune lr tuned_lrs = tune_lrs(model, args.lr, args.weight_decay) optimizer = torch.optim.SGD(tuned_lrs, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) # log log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr))) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") validate(model, test_loader, epoch=epoch, test_list=test_list, save_dir=join(TMP_DIR, 'initial-testing-record')) tr_avg_loss, tr_detail_loss = train( train_loader, model, optimizer, epoch, save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch)) validate(model, test_loader, epoch=epoch, test_list=test_list, save_dir=join(TMP_DIR, 'epoch-%d-testing-record' % epoch)) log.flush() # write log # Save checkpoint save_file = join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_file) scheduler.step() # will adjust learning rate # save train/val loss/accuracy, save every epoch in case of early stop train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(args): if args.wandb: wandb.init(project=f"{args.project_name}_{args.dataset}") wandb.config.update(args) # BERT-specific configs copied over from run_glue.py if (args.model.startswith("bert") and args.use_bert_params): args.max_grad_norm = 1.0 args.adam_epsilon = 1e-8 args.warmup_steps = 0 if os.path.exists(args.log_dir) and args.resume: resume = True mode = "a" else: resume = False mode = "w" ## Initialize logs if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) logger = Logger(os.path.join(args.log_dir, "log.txt"), mode) # Record args log_args(args, logger) set_seed(args.seed) # Data # Test data for label_shift_step is not implemented yet test_data = None test_loader = None if args.shift_type == "confounder": train_data, val_data, test_data = prepare_data( args, train=True, ) elif args.shift_type == "label_shift_step": raise NotImplementedError train_data, val_data = prepare_data(args, train=True) ######################################################################### ###################### Prepare data for our method ###################### ######################################################################### # Should probably not be upweighting if folds are specified. assert not args.fold or not args.up_weight # Fold passed. Use it as train and valid. if args.fold: train_data, val_data = folds.get_fold( train_data, args.fold, cross_validation_ratio=(1 / args.num_folds_per_sweep), num_valid_per_point=args.num_sweeps, seed=args.seed, ) if args.up_weight != 0: assert args.aug_col is not None # Get points that should be upsampled metadata_df = pd.read_csv(args.metadata_path) if args.dataset == "jigsaw": train_col = metadata_df[metadata_df["split"] == "train"] else: train_col = metadata_df[metadata_df["split"] == 0] aug_indices = np.where(train_col[args.aug_col] == 1)[0] print("len", len(train_col), len(aug_indices)) if args.up_weight == -1: up_weight_factor = int( (len(train_col) - len(aug_indices)) / len(aug_indices)) - 1 else: up_weight_factor = args.up_weight print(f"Up-weight factor: {up_weight_factor}") upsampled_points = Subset(train_data, list(aug_indices) * up_weight_factor) # Convert to DRODataset train_data = dro_dataset.DRODataset( ConcatDataset([train_data, upsampled_points]), process_item_fn=None, n_groups=train_data.n_groups, n_classes=train_data.n_classes, group_str_fn=train_data.group_str, ) elif args.aug_col is not None: print("\n"*2 + "WARNING: aug_col is not being used." + "\n"*2) ######################################################################### ######################################################################### ######################################################################### loader_kwargs = { "batch_size": args.batch_size, "num_workers": 4, "pin_memory": True, } train_loader = dro_dataset.get_loader(train_data, train=True, reweight_groups=args.reweight_groups, **loader_kwargs) val_loader = dro_dataset.get_loader(val_data, train=False, reweight_groups=None, **loader_kwargs) if test_data is not None: test_loader = dro_dataset.get_loader(test_data, train=False, reweight_groups=None, **loader_kwargs) data = {} data["train_loader"] = train_loader data["val_loader"] = val_loader data["test_loader"] = test_loader data["train_data"] = train_data data["val_data"] = val_data data["test_data"] = test_data n_classes = train_data.n_classes log_data(data, logger) ## Initialize model model = get_model( model=args.model, pretrained=not args.train_from_scratch, resume=resume, n_classes=train_data.n_classes, dataset=args.dataset, log_dir=args.log_dir, ) if args.wandb: wandb.watch(model) logger.flush() ## Define the objective if args.hinge: assert args.dataset in ["CelebA", "CUB"] # Only supports binary criterion = hinge_loss else: criterion = torch.nn.CrossEntropyLoss(reduction="none") if resume: raise NotImplementedError # Check this implementation. df = pd.read_csv(os.path.join(args.log_dir, "test.csv")) epoch_offset = df.loc[len(df) - 1, "epoch"] + 1 logger.write(f"starting from epoch {epoch_offset}") else: epoch_offset = 0 train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"train.csv"), train_data.n_groups, mode=mode) val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"val.csv"), val_data.n_groups, mode=mode) test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, f"test.csv"), test_data.n_groups, mode=mode) train( model, criterion, data, logger, train_csv_logger, val_csv_logger, test_csv_logger, args, epoch_offset=epoch_offset, csv_name=args.fold, wandb=wandb if args.wandb else None, ) train_csv_logger.close() val_csv_logger.close() test_csv_logger.close()
def main(): args.cuda = True # dataset train_dataset = BSDSLoader(root=args.dataset, split="train") test_dataset = BSDSLoader(root=args.dataset, split="test") train_loader = DataLoader( train_dataset, batch_size=args.batch_size, num_workers=4, drop_last=True,shuffle=True) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, num_workers=4, drop_last=True,shuffle=False) with open(join(args.dataset, 'test.lst'), 'r') as f: test_list = f.readlines() test_list = [split(i.rstrip())[1] for i in test_list] assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader)) # default hyperparameters if args.use_cfg: if args.pretrained and not args.small: args.stepsize = 2 args.lr = 0.001 if args.harmonic else 0.0002 elif args.small: args.stepsize = 6 args.lr = 0.005 if args.harmonic else 0.001 else: args.stepsize = 4 args.lr = 0.0005 if args.harmonic else 0.0002 args.maxepoch = args.stepsize + 1 # model model = HEDSmall(harmonic=args.harmonic) if args.small else HED(harmonic=args.harmonic) model.cuda() model.apply(weights_init) if args.pretrained and not args.small: if args.harmonic: load_harm_vgg16pretrain(model) else: load_vgg16pretrain(model) #tune lr net_parameters_id = {} if args.pretrained and not args.small: for pname, p in model.named_parameters(): if pname in ['conv1_1.weight','conv1_2.weight', 'conv2_1.weight','conv2_2.weight', 'conv3_1.weight','conv3_2.weight','conv3_3.weight', 'conv4_1.weight','conv4_2.weight','conv4_3.weight', 'conv5_1.weight','conv5_2.weight','conv5_3.weight']: print(pname, 'lr:1 de:1') if 'conv1-5.weight' not in net_parameters_id: net_parameters_id['conv1-5.weight'] = [] net_parameters_id['conv1-5.weight'].append(p) elif pname in ['conv1_1.bias','conv1_2.bias', 'conv2_1.bias','conv2_2.bias', 'conv3_1.bias','conv3_2.bias','conv3_3.bias', 'conv4_1.bias','conv4_2.bias','conv4_3.bias', 'conv5_1.bias','conv5_2.bias','conv5_3.bias']: print(pname, 'lr:2 de:0') if 'conv1-5.bias' not in net_parameters_id: net_parameters_id['conv1-5.bias'] = [] net_parameters_id['conv1-5.bias'].append(p) elif pname in ['score_dsn1.weight','score_dsn2.weight','score_dsn3.weight', 'score_dsn4.weight','score_dsn5.weight']: print(pname, 'lr:0.01 de:1') if 'score_dsn_1-5.weight' not in net_parameters_id: net_parameters_id['score_dsn_1-5.weight'] = [] net_parameters_id['score_dsn_1-5.weight'].append(p) elif pname in ['score_dsn1.bias','score_dsn2.bias','score_dsn3.bias', 'score_dsn4.bias','score_dsn5.bias']: print(pname, 'lr:0.02 de:0') if 'score_dsn_1-5.bias' not in net_parameters_id: net_parameters_id['score_dsn_1-5.bias'] = [] net_parameters_id['score_dsn_1-5.bias'].append(p) elif pname in ['score_final.weight']: print(pname, 'lr:0.001 de:1') if 'score_final.weight' not in net_parameters_id: net_parameters_id['score_final.weight'] = [] net_parameters_id['score_final.weight'].append(p) elif pname in ['score_final.bias']: print(pname, 'lr:0.002 de:0') if 'score_final.bias' not in net_parameters_id: net_parameters_id['score_final.bias'] = [] net_parameters_id['score_final.bias'].append(p) param_groups = [ {'params': net_parameters_id['conv1-5.weight'] , 'lr': args.lr*1 , 'weight_decay': args.weight_decay}, {'params': net_parameters_id['conv1-5.bias'] , 'lr': args.lr*2 , 'weight_decay': 0.}, {'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr*0.01 , 'weight_decay': args.weight_decay}, {'params': net_parameters_id['score_dsn_1-5.bias'] , 'lr': args.lr*0.02 , 'weight_decay': 0.}, {'params': net_parameters_id['score_final.weight'] , 'lr': args.lr*0.001, 'weight_decay': args.weight_decay}, {'params': net_parameters_id['score_final.bias'] , 'lr': args.lr*0.002, 'weight_decay': 0.} ] else: net_parameters_id = {'weights': [], 'biases': []} for pname, p in model.named_parameters(): if 'weight' in pname: net_parameters_id['weights'].append(p) elif 'bias' in pname: net_parameters_id['biases'].append(p) param_groups = [ {'params': net_parameters_id['weights'], 'weight_decay': args.weight_decay}, {'params': net_parameters_id['biases'], 'weight_decay': 0.} ] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'" .format(args.resume)) optimizer.load_state_dict(checkpoint['optimizer']) args.start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(args.resume)) # log log = Logger(join(OUT_DIR, 'log.txt')) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") test(model, test_loader, epoch=epoch, test_list=test_list, save_dir = join(OUT_DIR, 'initial-testing-record')) tr_avg_loss, tr_detail_loss = train( train_loader, model, optimizer, epoch, save_dir = join(OUT_DIR, 'epoch-%d-training-record' % epoch)) test(model, test_loader, epoch=epoch, test_list=test_list, save_dir = join(OUT_DIR, 'epoch-%d-testing-record' % epoch)) log.flush() # write log # Save checkpoint save_file = os.path.join(OUT_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_file) scheduler.step() # will adjust learning rate # save train/val loss/accuracy, save every epoch in case of early stop train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(): print("Loading and checking args...") args = parse_args() check_args(args) # BERT-specific configs copied over from run_glue.py if args.model.startswith('bert'): args.max_grad_norm = 1.0 args.adam_epsilon = 1e-8 args.warmup_steps = 0 #Write for logging; assumes no existing logs. mode = 'w' ## Initialize logs if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) logger = Logger(os.path.join(args.log_dir, 'log.txt'), mode) # Record args log_args(args, logger) set_seed(args.seed) # Data print("Preparing data") train_data, val_data, test_data = prepare_data(args, train=True) print("Setting up loader") loader_kwargs = { 'batch_size': args.batch_size, 'num_workers': 4, 'pin_memory': True } train_loader = train_data.get_loader(train=True, reweight_groups=args.reweight_groups, **loader_kwargs) val_loader = val_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) test_loader = test_data.get_loader(train=False, reweight_groups=None, **loader_kwargs) data = {} data['train_loader'] = train_loader data['val_loader'] = val_loader data['test_loader'] = test_loader data['train_data'] = train_data data['val_data'] = val_data data['test_data'] = test_data n_classes = train_data.n_classes log_data(data, logger) ## Initialize model if args.model == 'resnet50': model = torchvision.models.resnet50(pretrained=True) d = model.fc.in_features model.fc = nn.Linear(d, n_classes) if args.mc_dropout: model = add_dropout(model, 'fc') elif args.model == 'densenet121': model = torchvision.models.densenet121(pretrained=True) d = model.classifier.in_features model.classifier = nn.Linear(d, n_classes) if args.mc_dropout: model = add_dropout(model, 'classifier') elif args.model == 'bert-base-uncased': print("Loading bert") model = BertForSequenceClassification.from_pretrained( args.model, num_labels=n_classes) else: raise ValueError('Model not recognized.') logger.flush() criterion = torch.nn.CrossEntropyLoss(reduction='none') print("Getting loggers") train_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'train.csv'), train_data.n_groups, mode=mode) val_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'val.csv'), train_data.n_groups, mode=mode) test_csv_logger = CSVBatchLogger(os.path.join(args.log_dir, 'test.csv'), train_data.n_groups, mode=mode) print("Starting to train...") train(model, criterion, data, logger, train_csv_logger, val_csv_logger, test_csv_logger, args, epoch_offset=0, train=True) train_csv_logger.close() val_csv_logger.close() test_csv_logger.close() if args.save_preds: save_preds(model, data, args) return