def configure_lr_scheduler(args, optimizer): with logging.block("Learning Rate Scheduler", emph=True): logging.value( "Scheduler: ", args.lr_scheduler if args.lr_scheduler is not None else "None") lr_scheduler = None if args.lr_scheduler is not None: kwargs = typeinf.kwargs_from_args(args, "lr_scheduler") with logging.block(): logging.values(kwargs) kwargs["optimizer"] = optimizer lr_scheduler = typeinf.instance_from_kwargs( args.lr_scheduler_class, kwargs=kwargs) return lr_scheduler
def configure_visualizers(args, model_and_loss, optimizer, param_scheduler, lr_scheduler, train_loader, validation_loader): with logging.block("Runtime Visualizers", emph=True): logging.value( "Visualizer: ", args.visualizer if args.visualizer is not None else "None") visualizer = None if args.visualizer is not None: kwargs = typeinf.kwargs_from_args(args, "visualizer") logging.values(kwargs) kwargs["args"] = args kwargs["model_and_loss"] = model_and_loss kwargs["optimizer"] = optimizer kwargs["param_scheduler"] = param_scheduler kwargs["lr_scheduler"] = lr_scheduler kwargs["train_loader"] = train_loader kwargs["validation_loader"] = validation_loader visualizer = typeinf.instance_from_kwargs(args.visualizer_class, kwargs=kwargs) return visualizer
def configure_runtime_augmentations(args): with logging.block("Runtime Augmentations", emph=True): training_augmentation = None validation_augmentation = None # ---------------------------------------------------- # Training Augmentation # ---------------------------------------------------- if args.training_augmentation is not None: kwargs = typeinf.kwargs_from_args(args, "training_augmentation") logging.value("training_augmentation: ", args.training_augmentation) with logging.block(): logging.values(kwargs) kwargs["args"] = args training_augmentation = typeinf.instance_from_kwargs( args.training_augmentation_class, kwargs=kwargs) training_augmentation = training_augmentation.to(args.device) else: logging.info("training_augmentation: None") # ---------------------------------------------------- # Training Augmentation # ---------------------------------------------------- if args.validation_augmentation is not None: kwargs = typeinf.kwargs_from_args(args, "validation_augmentation") logging.value("validation_augmentation: ", args.training_augmentation) with logging.block(): logging.values(kwargs) kwargs["args"] = args validation_augmentation = typeinf.instance_from_kwargs( args.validation_augmentation_class, kwargs=kwargs) validation_augmentation = validation_augmentation.to(args.device) else: logging.info("validation_augmentation: None") return training_augmentation, validation_augmentation
def main(): # --------------------------------------------------- # Set working directory to folder containing main.py # --------------------------------------------------- os.chdir(os.path.dirname(os.path.realpath(__file__))) # ---------------------------------------------------------------- # Activate syntax highlighting in tracebacks for better debugging # ---------------------------------------------------------------- colored_traceback.add_hook() # ----------------------------------------------------------- # Configure logging # ----------------------------------------------------------- logging_filename = os.path.join(commandline.parse_save_dir(), constants.LOGGING_LOGBOOK_FILENAME) logger.configure_logging(logging_filename) # ---------------------------------------------------------------- # Register type factories before parsing the commandline. # NOTE: We decided to explicitly call these init() functions, to # have more precise control over the timeline # ---------------------------------------------------------------- with logging.block("Registering factories", emph=True): augmentations.init() datasets.init() losses.init() models.init() optim.init() visualizers.init() logging.info('Done!') # ----------------------------------------------------------- # Parse commandline after factories have been filled # ----------------------------------------------------------- args = commandline.parse_arguments(blocktitle="Commandline Arguments") # ----------------------- # Telegram configuration # ----------------------- with logging.block("Telegram", emph=True): logger.configure_telegram(constants.LOGGING_TELEGRAM_MACHINES_FILENAME) # ---------------------------------------------------------------------- # Log git repository hash and make a compressed copy of the source code # ---------------------------------------------------------------------- with logging.block("Source Code", emph=True): logging.value("Git Hash: ", system.git_hash()) # Zip source code and copy to save folder filename = os.path.join(args.save, constants.LOGGING_ZIPSOURCE_FILENAME) zipsource.create_zip(filename=filename, directory=os.getcwd()) logging.value("Archieved code: ", filename) # ---------------------------------------------------- # Change process title for `top` and `pkill` commands # This is more "informative" in `nvidia-smi` ;-) # ---------------------------------------------------- args = config.configure_proctitle(args) # ------------------------------------------------- # Set random seed for python, numpy, torch, cuda.. # ------------------------------------------------- config.configure_random_seed(args) # ----------------------------------------------------------- # Machine stats # ----------------------------------------------------------- with logging.block("Machine Statistics", emph=True): if args.cuda: args.device = torch.device("cuda:0") logging.value("Cuda: ", torch.version.cuda) logging.value("Cuda device count: ", torch.cuda.device_count()) logging.value("Cuda device name: ", torch.cuda.get_device_name(0)) logging.value("CuDNN: ", torch.backends.cudnn.version()) device_no = 0 if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): device_no = os.environ['CUDA_VISIBLE_DEVICES'] args.actual_device = "gpu:%s" % device_no else: args.device = torch.device("cpu") args.actual_device = "cpu" logging.value("Hostname: ", system.hostname()) logging.value("PyTorch: ", torch.__version__) logging.value("PyTorch device: ", args.actual_device) # ------------------------------------------------------ # Fetch data loaders. Quit if no data loader is present # ------------------------------------------------------ train_loader, validation_loader = config.configure_data_loaders(args) # ------------------------------------------------------------------------- # Check whether any dataset could be found # ------------------------------------------------------------------------- success = any(loader is not None for loader in [train_loader, validation_loader]) if not success: logging.info( "No dataset could be loaded successfully. Please check dataset paths!" ) quit() # ------------------------------------------------------------------------- # Configure runtime augmentations # ------------------------------------------------------------------------- training_augmentation, validation_augmentation = config.configure_runtime_augmentations( args) # ---------------------------------------------------------- # Configure model and loss. # ---------------------------------------------------------- model_and_loss = config.configure_model_and_loss(args) # -------------------------------------------------------- # Print model visualization # -------------------------------------------------------- if args.logging_model_graph: with logging.block("Model Graph", emph=True): logger.log_module_info(model_and_loss.model) if args.logging_loss_graph: with logging.block("Loss Graph", emph=True): logger.log_module_info(model_and_loss.loss) # ------------------------------------------------------------------------- # Possibly resume from checkpoint # ------------------------------------------------------------------------- checkpoint_saver, checkpoint_stats = config.configure_checkpoint_saver( args, model_and_loss) if checkpoint_stats is not None: with logging.block(): logging.info("Checkpoint Statistics:") with logging.block(): logging.values(checkpoint_stats) # --------------------------------------------------------------------- # Set checkpoint stats # --------------------------------------------------------------------- if args.checkpoint_mode in ["resume_from_best", "resume_from_latest"]: args.start_epoch = checkpoint_stats["epoch"] # --------------------------------------------------------------------- # Checkpoint and save directory # --------------------------------------------------------------------- with logging.block("Save Directory", emph=True): if args.save is None: logging.info("No 'save' directory specified!") quit() logging.value("Save directory: ", args.save) if not os.path.exists(args.save): os.makedirs(args.save) # ------------------------------------------------------------ # If this is just an evaluation: overwrite savers and epochs # ------------------------------------------------------------ if args.training_dataset is None and args.validation_dataset is not None: args.start_epoch = 1 args.total_epochs = 1 train_loader = None checkpoint_saver = None args.optimizer = None args.lr_scheduler = None # ---------------------------------------------------- # Tensorboard summaries # ---------------------------------------------------- logger.configure_tensorboard_summaries(args.save) # ------------------------------------------------------------------- # From PyTorch API: # If you need to move a model to GPU via .cuda(), please do so before # constructing optimizers for it. Parameters of a model after .cuda() # will be different objects with those before the call. # In general, you should make sure that optimized parameters live in # consistent locations when optimizers are constructed and used. # ------------------------------------------------------------------- model_and_loss = model_and_loss.to(args.device) # ---------------------------------------------------------- # Configure optimizer # ---------------------------------------------------------- optimizer = config.configure_optimizer(args, model_and_loss) # ---------------------------------------------------------- # Configure learning rate # ---------------------------------------------------------- lr_scheduler = config.configure_lr_scheduler(args, optimizer) # -------------------------------------------------------------------------- # Configure parameter scheduling # -------------------------------------------------------------------------- param_scheduler = config.configure_parameter_scheduler( args, model_and_loss) # quit() # ---------------------------------------------------------- # Cuda optimization # ---------------------------------------------------------- if args.cuda: torch.backends.cudnn.benchmark = constants.CUDNN_BENCHMARK # ---------------------------------------------------------- # Configurate runtime visualization # ---------------------------------------------------------- visualizer = config.configure_visualizers( args, model_and_loss=model_and_loss, optimizer=optimizer, param_scheduler=param_scheduler, lr_scheduler=lr_scheduler, train_loader=train_loader, validation_loader=validation_loader) if visualizer is not None: visualizer = visualizer.to(args.device) # ---------------------------------------------------------- # Kickoff training, validation and/or testing # ---------------------------------------------------------- return runtime.exec_runtime( args, checkpoint_saver=checkpoint_saver, lr_scheduler=lr_scheduler, param_scheduler=param_scheduler, model_and_loss=model_and_loss, optimizer=optimizer, train_loader=train_loader, training_augmentation=training_augmentation, validation_augmentation=validation_augmentation, validation_loader=validation_loader, visualizer=visualizer)
def configure_optimizer(args, model_and_loss): optimizer = None with logging.block("Optimizer", emph=True): logging.value("Algorithm: ", args.optimizer if args.optimizer is not None else "None") if args.optimizer is not None: if model_and_loss.num_parameters() == 0: logging.info("No trainable parameters detected.") logging.info("Setting optimizer to None.") else: with logging.block(): # ------------------------------------------- # Figure out all optimizer arguments # ------------------------------------------- all_kwargs = typeinf.kwargs_from_args(args, "optimizer") # ------------------------------------------- # Get the split of param groups # ------------------------------------------- kwargs_without_groups = { key: value for key, value in all_kwargs.items() if key != "group" } param_groups = all_kwargs["group"] # ---------------------------------------------------------------------- # Print arguments (without groups) # ---------------------------------------------------------------------- logging.values(kwargs_without_groups) # ---------------------------------------------------------------------- # Construct actual optimizer params # ---------------------------------------------------------------------- kwargs = dict(kwargs_without_groups) if param_groups is None: # --------------------------------------------------------- # Add all trainable parameters if there is no param groups # --------------------------------------------------------- all_trainable_parameters = _generate_trainable_params( model_and_loss) kwargs["params"] = all_trainable_parameters else: # ------------------------------------------- # Add list of parameter groups instead # ------------------------------------------- trainable_parameter_groups = [] dnames, dparams = _param_names_and_trainable_generator( model_and_loss) dnames = set(dnames) dparams = set(list(dparams)) with logging.block("parameter_groups:"): for group in param_groups: # log group settings group_match = group["params"] group_args = { key: value for key, value in group.items() if key != "params" } with logging.block("%s: %s" % (group_match, group_args)): # retrieve parameters by matching name gnames, gparams = _param_names_and_trainable_generator( model_and_loss, match=group_match) # log all names affected for n in sorted(gnames): logging.info(n) # set generator for group group_args["params"] = gparams # append parameter group trainable_parameter_groups.append( group_args) # update remaining trainable parameters dnames -= set(gnames) dparams -= set(list(gparams)) # append default parameter group trainable_parameter_groups.append( {"params": list(dparams)}) # and log its parameter names with logging.block("default:"): for dname in sorted(dnames): logging.info(dname) # set params in optimizer kwargs kwargs["params"] = trainable_parameter_groups # ------------------------------------------- # Create optimizer instance # ------------------------------------------- optimizer = typeinf.instance_from_kwargs( args.optimizer_class, kwargs=kwargs) return optimizer