def train_net(args): checkpoint = args.checkpoint start_epoch = 1 best_loss = float('inf') writer = SummaryWriter(logdir=args.logdir) epochs_since_improvement = 0 decays_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: torch.random.manual_seed(7) torch.cuda.manual_seed(7) np.random.seed(7) model = DIMModel(num_classes=1) if args.pretrained: migrate(model) model = nn.DataParallel(model) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, betas=[args.beta1, args.beta2]) start_epoch = args.start_epoch else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] if 'torch_seed' in checkpoint: torch.random.set_rng_state(checkpoint['torch_seed']) else: torch.random.manual_seed(7) if 'torch_cuda_seed' in checkpoint: torch.cuda.set_rng_state(checkpoint['torch_cuda_seed']) else: torch.cuda.manual_seed(7) if 'np_seed' in checkpoint: np.random.set_state(checkpoint['np_seed']) else: np.random.seed(7) if 'python_seed' in checkpoint: random.setstate(checkpoint['python_seed']) else: random.seed(7) logger = get_logger() # Move to GPU, if available model = model.to(device) train_dataset = DIMDataset('train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=8, pin_memory=True) valid_dataset = DIMDataset('valid') valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=8, pin_memory=True) # Epochs for epoch in range(start_epoch, args.end_epoch): if args.optimizer == 'sgd' and epochs_since_improvement == 10: break if args.optimizer == 'sgd' and epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0: decays_since_improvement += 1 print("\nDecays since last improvement: %d\n" % (decays_since_improvement,)) adjust_learning_rate(optimizer, 0.6 ** decays_since_improvement) # One epoch's training train_loss = train(train_loader=train_loader, model=model, optimizer=optimizer, epoch=epoch, logger=logger) effective_lr = get_learning_rate(optimizer) print('Current effective learning rate: {}\n'.format(effective_lr)) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Learning_Rate', effective_lr, epoch) # One epoch's validation valid_loss = valid(valid_loader=valid_loader, model=model, epoch=epoch, logger=logger) writer.add_scalar('Valid_Loss', valid_loss, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,)) else: epochs_since_improvement = 0 decays_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best, args.checkpointdir)
def train_net(args): torch.manual_seed(7) np.random.seed(7) checkpoint = args.checkpoint start_epoch = 0 best_loss = float('inf') writer = SummaryWriter(logdir="runs_1_1") epochs_since_improvement = 0 decays_since_improvement = 0 # Initialize / load checkpoint if checkpoint is None: model = DIMModel(n_classes=1, in_channels=4, is_unpooling=True, pretrain=True) migrate(model) model = nn.DataParallel(model) if args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) start_epoch = args.start_epoch else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model = checkpoint['model'] optimizer = checkpoint['optimizer'] logger = get_logger() # Move to GPU, if available model = model.to(device) # Custom dataloaders # train_dataset = DIMDataset('train') # train_sample = InvariantSampler(train_dataset, "train", args.batch_size) # train_batch_sample = BatchSampler(InvariantSampler(train_dataset, "train", args.batch_size), batch_size=args.batch_size,drop_last=False) # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sample, num_workers=8, pin_memory=True, drop_last=False) # valid_dataset = DIMDataset('valid') # valid_sample = InvariantSampler(valid, "valid", args.batch_size) # valid_batch_sample = BatchSampler(InvariantSampler(valid_dataset, "valid", args.batch_size), batch_size=args.batch_size,drop_last=False) # valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, sampler=valid_sample, num_workers=8, pin_memory=True, drop_last=False) train_dataset = DIMDataset('train') train_sample = RandomSampler(train_dataset, num_samples=int(num_fgs * args.batch_size * 8)) train_loader = torch.utils.data.DataLoader(train_dataset, sampler=train_sample, batch_size=args.batch_size, num_workers=8) valid_dataset = DIMDataset('valid') valid_sample = RandomSampler(train_dataset, num_samples=int(valid_ratio * num_fgs) * args.batch_size * 8) valid_loader = torch.utils.data.DataLoader(valid_dataset, sampler=valid_sample, batch_size=args.batch_size, num_workers=8) # Epochs for epoch in range(start_epoch, args.end_epoch): if args.optimizer == 'sgd' and epochs_since_improvement == 10: break if args.optimizer == 'sgd' and epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0: # checkpoint = 'checkpoints_1_1/BEST_checkpoint.tar' # checkpoint = torch.load(checkpoint) # model = checkpoint['model'] # optimizer = checkpoint['optimizer'] decays_since_improvement += 1 print("\nDecays since last improvement: %d\n" % (decays_since_improvement, )) adjust_learning_rate(optimizer, 0.6**decays_since_improvement) # One epoch's training train_loss = train(train_loader=train_loader, model=model, optimizer=optimizer, epoch=epoch, logger=logger) effective_lr = get_learning_rate(optimizer) print('Current effective learning rate: {}\n'.format(effective_lr)) writer.add_scalar('Train_Loss', train_loss, epoch) writer.add_scalar('Learning_Rate', effective_lr, epoch) # One epoch's validation valid_loss = valid(valid_loader=valid_loader, model=model, epoch=epoch, logger=logger) writer.add_scalar('Valid_Loss', valid_loss, epoch) # Check if there was an improvement is_best = valid_loss < best_loss best_loss = min(valid_loss, best_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 decays_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best, "checkpoints_1_1")