Exemplo n.º 1
0
def configure_lr_scheduler(args, optimizer):
    with logging.block("Learning Rate Scheduler", emph=True):
        logging.value(
            "Scheduler: ",
            args.lr_scheduler if args.lr_scheduler is not None else "None")
        lr_scheduler = None
        if args.lr_scheduler is not None:
            kwargs = typeinf.kwargs_from_args(args, "lr_scheduler")
            with logging.block():
                logging.values(kwargs)
            kwargs["optimizer"] = optimizer
            lr_scheduler = typeinf.instance_from_kwargs(
                args.lr_scheduler_class, kwargs=kwargs)
    return lr_scheduler
Exemplo n.º 2
0
def configure_visualizers(args, model_and_loss, optimizer, param_scheduler,
                          lr_scheduler, train_loader, validation_loader):
    with logging.block("Runtime Visualizers", emph=True):
        logging.value(
            "Visualizer: ",
            args.visualizer if args.visualizer is not None else "None")
        visualizer = None
        if args.visualizer is not None:
            kwargs = typeinf.kwargs_from_args(args, "visualizer")
            logging.values(kwargs)
            kwargs["args"] = args
            kwargs["model_and_loss"] = model_and_loss
            kwargs["optimizer"] = optimizer
            kwargs["param_scheduler"] = param_scheduler
            kwargs["lr_scheduler"] = lr_scheduler
            kwargs["train_loader"] = train_loader
            kwargs["validation_loader"] = validation_loader
            visualizer = typeinf.instance_from_kwargs(args.visualizer_class,
                                                      kwargs=kwargs)
    return visualizer
Exemplo n.º 3
0
def configure_runtime_augmentations(args):
    with logging.block("Runtime Augmentations", emph=True):

        training_augmentation = None
        validation_augmentation = None

        # ----------------------------------------------------
        # Training Augmentation
        # ----------------------------------------------------
        if args.training_augmentation is not None:
            kwargs = typeinf.kwargs_from_args(args, "training_augmentation")
            logging.value("training_augmentation: ",
                          args.training_augmentation)
            with logging.block():
                logging.values(kwargs)
            kwargs["args"] = args
            training_augmentation = typeinf.instance_from_kwargs(
                args.training_augmentation_class, kwargs=kwargs)
            training_augmentation = training_augmentation.to(args.device)
        else:
            logging.info("training_augmentation: None")

        # ----------------------------------------------------
        # Training Augmentation
        # ----------------------------------------------------
        if args.validation_augmentation is not None:
            kwargs = typeinf.kwargs_from_args(args, "validation_augmentation")
            logging.value("validation_augmentation: ",
                          args.training_augmentation)
            with logging.block():
                logging.values(kwargs)
            kwargs["args"] = args
            validation_augmentation = typeinf.instance_from_kwargs(
                args.validation_augmentation_class, kwargs=kwargs)
            validation_augmentation = validation_augmentation.to(args.device)

        else:
            logging.info("validation_augmentation: None")

    return training_augmentation, validation_augmentation
Exemplo n.º 4
0
def main():
    # ---------------------------------------------------
    # Set working directory to folder containing main.py
    # ---------------------------------------------------
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # ----------------------------------------------------------------
    # Activate syntax highlighting in tracebacks for better debugging
    # ----------------------------------------------------------------
    colored_traceback.add_hook()

    # -----------------------------------------------------------
    # Configure logging
    # -----------------------------------------------------------
    logging_filename = os.path.join(commandline.parse_save_dir(),
                                    constants.LOGGING_LOGBOOK_FILENAME)
    logger.configure_logging(logging_filename)

    # ----------------------------------------------------------------
    # Register type factories before parsing the commandline.
    # NOTE: We decided to explicitly call these init() functions, to
    #       have more precise control over the timeline
    # ----------------------------------------------------------------
    with logging.block("Registering factories", emph=True):
        augmentations.init()
        datasets.init()
        losses.init()
        models.init()
        optim.init()
        visualizers.init()
        logging.info('Done!')

    # -----------------------------------------------------------
    # Parse commandline after factories have been filled
    # -----------------------------------------------------------
    args = commandline.parse_arguments(blocktitle="Commandline Arguments")

    # -----------------------
    # Telegram configuration
    # -----------------------
    with logging.block("Telegram", emph=True):
        logger.configure_telegram(constants.LOGGING_TELEGRAM_MACHINES_FILENAME)

    # ----------------------------------------------------------------------
    # Log git repository hash and make a compressed copy of the source code
    # ----------------------------------------------------------------------
    with logging.block("Source Code", emph=True):
        logging.value("Git Hash: ", system.git_hash())
        # Zip source code and copy to save folder
        filename = os.path.join(args.save,
                                constants.LOGGING_ZIPSOURCE_FILENAME)
        zipsource.create_zip(filename=filename, directory=os.getcwd())
        logging.value("Archieved code: ", filename)

    # ----------------------------------------------------
    # Change process title for `top` and `pkill` commands
    # This is more "informative" in `nvidia-smi` ;-)
    # ----------------------------------------------------
    args = config.configure_proctitle(args)

    # -------------------------------------------------
    # Set random seed for python, numpy, torch, cuda..
    # -------------------------------------------------
    config.configure_random_seed(args)

    # -----------------------------------------------------------
    # Machine stats
    # -----------------------------------------------------------
    with logging.block("Machine Statistics", emph=True):
        if args.cuda:
            args.device = torch.device("cuda:0")
            logging.value("Cuda: ", torch.version.cuda)
            logging.value("Cuda device count: ", torch.cuda.device_count())
            logging.value("Cuda device name: ", torch.cuda.get_device_name(0))
            logging.value("CuDNN: ", torch.backends.cudnn.version())
            device_no = 0
            if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
                device_no = os.environ['CUDA_VISIBLE_DEVICES']
            args.actual_device = "gpu:%s" % device_no
        else:
            args.device = torch.device("cpu")
            args.actual_device = "cpu"
        logging.value("Hostname: ", system.hostname())
        logging.value("PyTorch: ", torch.__version__)
        logging.value("PyTorch device: ", args.actual_device)

    # ------------------------------------------------------
    # Fetch data loaders. Quit if no data loader is present
    # ------------------------------------------------------
    train_loader, validation_loader = config.configure_data_loaders(args)

    # -------------------------------------------------------------------------
    # Check whether any dataset could be found
    # -------------------------------------------------------------------------
    success = any(loader is not None
                  for loader in [train_loader, validation_loader])
    if not success:
        logging.info(
            "No dataset could be loaded successfully. Please check dataset paths!"
        )
        quit()

    # -------------------------------------------------------------------------
    # Configure runtime augmentations
    # -------------------------------------------------------------------------
    training_augmentation, validation_augmentation = config.configure_runtime_augmentations(
        args)

    # ----------------------------------------------------------
    # Configure model and loss.
    # ----------------------------------------------------------
    model_and_loss = config.configure_model_and_loss(args)

    # --------------------------------------------------------
    # Print model visualization
    # --------------------------------------------------------
    if args.logging_model_graph:
        with logging.block("Model Graph", emph=True):
            logger.log_module_info(model_and_loss.model)
    if args.logging_loss_graph:
        with logging.block("Loss Graph", emph=True):
            logger.log_module_info(model_and_loss.loss)

    # -------------------------------------------------------------------------
    # Possibly resume from checkpoint
    # -------------------------------------------------------------------------
    checkpoint_saver, checkpoint_stats = config.configure_checkpoint_saver(
        args, model_and_loss)
    if checkpoint_stats is not None:
        with logging.block():
            logging.info("Checkpoint Statistics:")
            with logging.block():
                logging.values(checkpoint_stats)
        # ---------------------------------------------------------------------
        # Set checkpoint stats
        # ---------------------------------------------------------------------
        if args.checkpoint_mode in ["resume_from_best", "resume_from_latest"]:
            args.start_epoch = checkpoint_stats["epoch"]

    # ---------------------------------------------------------------------
    # Checkpoint and save directory
    # ---------------------------------------------------------------------
    with logging.block("Save Directory", emph=True):
        if args.save is None:
            logging.info("No 'save' directory specified!")
            quit()
        logging.value("Save directory: ", args.save)
        if not os.path.exists(args.save):
            os.makedirs(args.save)

    # ------------------------------------------------------------
    # If this is just an evaluation: overwrite savers and epochs
    # ------------------------------------------------------------
    if args.training_dataset is None and args.validation_dataset is not None:
        args.start_epoch = 1
        args.total_epochs = 1
        train_loader = None
        checkpoint_saver = None
        args.optimizer = None
        args.lr_scheduler = None

    # ----------------------------------------------------
    # Tensorboard summaries
    # ----------------------------------------------------
    logger.configure_tensorboard_summaries(args.save)

    # -------------------------------------------------------------------
    # From PyTorch API:
    # If you need to move a model to GPU via .cuda(), please do so before
    # constructing optimizers for it. Parameters of a model after .cuda()
    # will be different objects with those before the call.
    # In general, you should make sure that optimized parameters live in
    # consistent locations when optimizers are constructed and used.
    # -------------------------------------------------------------------
    model_and_loss = model_and_loss.to(args.device)

    # ----------------------------------------------------------
    # Configure optimizer
    # ----------------------------------------------------------
    optimizer = config.configure_optimizer(args, model_and_loss)

    # ----------------------------------------------------------
    # Configure learning rate
    # ----------------------------------------------------------
    lr_scheduler = config.configure_lr_scheduler(args, optimizer)

    # --------------------------------------------------------------------------
    # Configure parameter scheduling
    # --------------------------------------------------------------------------
    param_scheduler = config.configure_parameter_scheduler(
        args, model_and_loss)

    # quit()

    # ----------------------------------------------------------
    # Cuda optimization
    # ----------------------------------------------------------
    if args.cuda:
        torch.backends.cudnn.benchmark = constants.CUDNN_BENCHMARK

    # ----------------------------------------------------------
    # Configurate runtime visualization
    # ----------------------------------------------------------
    visualizer = config.configure_visualizers(
        args,
        model_and_loss=model_and_loss,
        optimizer=optimizer,
        param_scheduler=param_scheduler,
        lr_scheduler=lr_scheduler,
        train_loader=train_loader,
        validation_loader=validation_loader)
    if visualizer is not None:
        visualizer = visualizer.to(args.device)

    # ----------------------------------------------------------
    # Kickoff training, validation and/or testing
    # ----------------------------------------------------------
    return runtime.exec_runtime(
        args,
        checkpoint_saver=checkpoint_saver,
        lr_scheduler=lr_scheduler,
        param_scheduler=param_scheduler,
        model_and_loss=model_and_loss,
        optimizer=optimizer,
        train_loader=train_loader,
        training_augmentation=training_augmentation,
        validation_augmentation=validation_augmentation,
        validation_loader=validation_loader,
        visualizer=visualizer)
Exemplo n.º 5
0
def configure_optimizer(args, model_and_loss):
    optimizer = None
    with logging.block("Optimizer", emph=True):
        logging.value("Algorithm: ",
                      args.optimizer if args.optimizer is not None else "None")
        if args.optimizer is not None:
            if model_and_loss.num_parameters() == 0:
                logging.info("No trainable parameters detected.")
                logging.info("Setting optimizer to None.")
            else:
                with logging.block():
                    # -------------------------------------------
                    # Figure out all optimizer arguments
                    # -------------------------------------------
                    all_kwargs = typeinf.kwargs_from_args(args, "optimizer")

                    # -------------------------------------------
                    # Get the split of param groups
                    # -------------------------------------------
                    kwargs_without_groups = {
                        key: value
                        for key, value in all_kwargs.items() if key != "group"
                    }
                    param_groups = all_kwargs["group"]

                    # ----------------------------------------------------------------------
                    # Print arguments (without groups)
                    # ----------------------------------------------------------------------
                    logging.values(kwargs_without_groups)

                    # ----------------------------------------------------------------------
                    # Construct actual optimizer params
                    # ----------------------------------------------------------------------
                    kwargs = dict(kwargs_without_groups)
                    if param_groups is None:
                        # ---------------------------------------------------------
                        # Add all trainable parameters if there is no param groups
                        # ---------------------------------------------------------
                        all_trainable_parameters = _generate_trainable_params(
                            model_and_loss)
                        kwargs["params"] = all_trainable_parameters
                    else:
                        # -------------------------------------------
                        # Add list of parameter groups instead
                        # -------------------------------------------
                        trainable_parameter_groups = []
                        dnames, dparams = _param_names_and_trainable_generator(
                            model_and_loss)
                        dnames = set(dnames)
                        dparams = set(list(dparams))
                        with logging.block("parameter_groups:"):
                            for group in param_groups:
                                #  log group settings
                                group_match = group["params"]
                                group_args = {
                                    key: value
                                    for key, value in group.items()
                                    if key != "params"
                                }

                                with logging.block("%s: %s" %
                                                   (group_match, group_args)):
                                    # retrieve parameters by matching name
                                    gnames, gparams = _param_names_and_trainable_generator(
                                        model_and_loss, match=group_match)
                                    # log all names affected
                                    for n in sorted(gnames):
                                        logging.info(n)
                                    # set generator for group
                                    group_args["params"] = gparams
                                    # append parameter group
                                    trainable_parameter_groups.append(
                                        group_args)
                                    # update remaining trainable parameters
                                    dnames -= set(gnames)
                                    dparams -= set(list(gparams))

                            # append default parameter group
                            trainable_parameter_groups.append(
                                {"params": list(dparams)})
                            # and log its parameter names
                            with logging.block("default:"):
                                for dname in sorted(dnames):
                                    logging.info(dname)

                        # set params in optimizer kwargs
                        kwargs["params"] = trainable_parameter_groups

                    # -------------------------------------------
                    # Create optimizer instance
                    # -------------------------------------------
                    optimizer = typeinf.instance_from_kwargs(
                        args.optimizer_class, kwargs=kwargs)

    return optimizer