def train(args): # device device = torch.device("cuda:%d" % args.gpu if torch.cuda.is_available() else "cpu") # data trainset = SonyDataset(args.input_dir, args.gt_dir, args.ps) train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=12, pin_memory=True) logging.info("data loading okay") # model model = Unet().to(device) # loss function criterion = nn.L1Loss() # optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # lr scheduler scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2000, gamma=0.1) # training running_loss = 0.0 for epoch in range(args.num_epoch): scheduler.step() for i, databatch in enumerate(train_loader): # get the inputs input_patch, gt_patch, train_id, ratio = databatch input_patch, gt_patch = input_patch.to(device), gt_patch.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(input_patch) loss = criterion(outputs, gt_patch) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % args.log_interval == (args.log_interval - 1): print('[%d, %5d] loss: %.3f %s' % (epoch, i, running_loss / args.log_interval, datetime.now())) running_loss = 0.0 if epoch % args.save_freq == 0: if not os.path.isdir( os.path.join(args.result_dir, '%04d' % epoch)): os.makedirs(os.path.join(args.result_dir, '%04d' % epoch)) gt_patch = gt_patch.cpu().detach().numpy() outputs = outputs.cpu().detach().numpy() train_id = train_id.numpy() ratio = ratio.numpy() temp = np.concatenate( (gt_patch[0, :, :, :], outputs[0, :, :, :]), axis=2) scipy.misc.toimage( temp * 255, high=255, low=0, cmin=0, cmax=255).save(args.result_dir + '%04d/%05d_00_train_%d.jpg' % (epoch, train_id[0], ratio[0])) # at the end of epoch if epoch % args.model_save_freq == 0: torch.save(model.state_dict(), args.checkpoint_dir + './model_%d.pl' % epoch)
valid_data = Ultrasound_Dataset(valid_df,transform=transforms_valid) valid_loader = DataLoader(valid_data , batch_size = 4,shuffle=False) # Checking GPU Avalaibility use_cuda = True if use_cuda and torch.cuda.is_available(): print('yes') print(torch.cuda.is_available()) # Model Initialization model = Unet(1,net_type='semi_inception',version='b',add_residual=True) if use_cuda and torch.cuda.is_available(): model.cuda() criterion = CustomLoss(0.5,1) optimizer = optim.Adam(model.parameters(),5e-6) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience = 3) training_loss,valid_loss,model,saved_model=train_(model,optimizer,scheduler,criterion,train_loader,valid_loader,epochs=5) plot_learning_curve(training_loss,valid_loss) # save model for further use torch.save(model.state_dict(),'../Mymodel')
def main(args): logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) device = torch.device( 'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu') # Create output folder if (args.output_folder is not None): if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) logging.debug('Creating folder `{0}`'.format(args.output_folder)) output_folder = os.path.join(args.output_folder, time.strftime('%Y-%m-%d_%H%M%S')) os.makedirs(output_folder) logging.debug('Creating folder `{0}`'.format(output_folder)) args.datafolder = os.path.abspath(args.datafolder) args.model_path = os.path.abspath( os.path.join(output_folder, 'model.th')) # Save the configuration in a config.json file with open(os.path.join(output_folder, 'config.json'), 'w') as f: json.dump(vars(args), f, indent=2) logging.info('Saving configuration file in `{0}`'.format( os.path.abspath(os.path.join(output_folder, 'config.json')))) # Get datasets and load into meta learning format meta_train_dataset, meta_val_dataset, _ = get_datasets( args.dataset, args.datafolder, args.num_ways, args.num_shots, args.num_shots_test, augment=augment, fold=args.fold, download=download_data) meta_train_dataloader = BatchMetaDataLoader(meta_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) # Define model model = Unet(device=device, feature_scale=args.feature_scale) model = model.to(device) print(f'Using device: {device}') # Define optimizer meta_optimizer = torch.optim.Adam(model.parameters(), lr=args.meta_lr) #, weight_decay=1e-5) #meta_optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, momentum = 0.99) # Define meta learner metalearner = ModelAgnosticMetaLearning( model, meta_optimizer, first_order=args.first_order, num_adaptation_steps=args.num_adaption_steps, step_size=args.step_size, learn_step_size=False, loss_function=loss_function, device=device) best_value = None # Training loop epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 + int(math.log10(args.num_epochs))) train_losses = [] val_losses = [] train_ious = [] train_accuracies = [] val_accuracies = [] val_ious = [] start_time = time.time() for epoch in range(args.num_epochs): print('start epoch ', epoch + 1) print('start train---------------------------------------------------') train_loss, train_accuracy, train_iou = metalearner.train( meta_train_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc='Training', leave=False) print(f'\n train accuracy: {train_accuracy}, train loss: {train_loss}') print('end train---------------------------------------------------') train_losses.append(train_loss) train_accuracies.append(train_accuracy) train_ious.append(train_iou) # Evaluate in given intervals if epoch % args.val_step_size == 0: print( 'start evaluate-------------------------------------------------' ) results = metalearner.evaluate(meta_val_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc=epoch_desc.format(epoch + 1), is_test=False) val_acc = results['accuracy'] val_loss = results['mean_outer_loss'] val_losses.append(val_loss) val_accuracies.append(val_acc) val_ious.append(results['iou']) print( f'\n validation accuracy: {val_acc}, validation loss: {val_loss}' ) print( 'end evaluate-------------------------------------------------' ) # Save best model if 'accuracies_after' in results: if (best_value is None) or (best_value < results['accuracies_after']): best_value = results['accuracies_after'] save_model = True elif (best_value is None) or (best_value > results['mean_outer_loss']): best_value = results['mean_outer_loss'] save_model = True else: save_model = False if save_model and (args.output_folder is not None): with open(args.model_path, 'wb') as f: torch.save(model.state_dict(), f) print('end epoch ', epoch + 1) elapsed_time = time.time() - start_time print('Finished after ', time.strftime('%H:%M:%S', time.gmtime(elapsed_time))) r = {} r['train_losses'] = train_losses r['train_accuracies'] = train_accuracies r['train_ious'] = train_ious r['val_losses'] = val_losses r['val_accuracies'] = val_accuracies r['val_ious'] = val_ious r['time'] = time.strftime('%H:%M:%S', time.gmtime(elapsed_time)) with open(os.path.join(output_folder, 'train_results.json'), 'w') as g: json.dump(r, g) logging.info('Saving results dict in `{0}`'.format( os.path.abspath(os.path.join(output_folder, 'train_results.json')))) # Plot results plot_errors(args.num_epochs, train_losses, val_losses, val_step_size=args.val_step_size, output_folder=output_folder, save=True, bce_dice_focal=bce_dice_focal) plot_accuracy(args.num_epochs, train_accuracies, val_accuracies, val_step_size=args.val_step_size, output_folder=output_folder, save=True) plot_iou(args.num_epochs, train_ious, val_ious, val_step_size=args.val_step_size, output_folder=output_folder, save=True) if hasattr(meta_train_dataset, 'close'): meta_train_dataset.close() meta_val_dataset.close()
losses.backward() optimizer.step() t.set_description('Training epoch %i' % epoch) t.set_postfix(loss=losses.item()) raw_loss += losses.item() print('epoch : ', epoch, 'loss:', raw_loss / num_batches_per_epoch) valid_loss = 0 with tqdm(total=num_validation_batches_per_epoch) as t: for batch in dataloader_validation: inputs = torch.from_numpy(batch['data']).cuda() target = torch.from_numpy(batch['seg'].astype(int)).cuda() target = get_multi_class_labels(target, n_labels=n_labels) with torch.no_grad(): outputs = model(inputs, dag) losses = loss(outputs, target) t.set_description('Valid epoch %i' % epoch) t.set_postfix(loss=losses.item()) t.update() valid_loss += losses.item() print('epoch : ', epoch, 'valid loss:', valid_loss / num_validation_batches_per_epoch) log.write('%i,%f,%f\n' % (epoch, raw_loss / num_batches_per_epoch, valid_loss / num_validation_batches_per_epoch)) log.flush() torch.save(model.state_dict(), model_path) current_lr = adjust_lr(optimizer, current_lr, lr_schedule) log.close()