示例#1
0
    def __init__(self, logger=None, save_dir=None, save_log: bool = False):
        super(LoggerWrapper, self).__init__(save_dir)
        from homura.liblog import get_logger, set_file_handler

        self.logger = get_logger(
            "homura.reporter") if logger is None else logger
        if save_log:
            set_file_handler(self._save_dir / "log.txt")
示例#2
0
    def __init__(self, iterator: Iterable, verb: bool = False):

        super(TQDMReporter, self).__init__()
        self.writer = tqdm.tqdm(iterator,
                                ncols=80) if is_master() else iterator
        self._verb = verb
        self._logger = liblog.get_logger(__name__)
        self._length = len(iterator)
        liblog._set_tqdm_handler()
示例#3
0
    def __init__(self, iterator: Iterable, verb: bool = False):

        super(TQDMReporter, self).__init__()
        self.writer = tqdm.tqdm(
            iterator, dynamic_ncols=True) if is_master() else iterator
        self._verb = verb
        self._logger = liblog.get_logger(__name__)
        self._length = len(iterator)
        self._max_accuracy = -1.0
        liblog._set_tqdm_handler()
        liblog._set_tqdm_print()
示例#4
0
 def __init__(self,
              metric: Callable[[Mapping], Any],
              name: str,
              logger=None,
              no_reduce: bool = False):
     if metric is not None:
         self.metric_function = metric
     self.metric_name = name
     self._last_iter = {}
     self._last_epoch = {}
     self._metrics_history = {}
     self._logger = get_logger(__name__) if logger is None else logger
     self._warning_flag = True
     self._no_reduce = no_reduce
示例#5
0
    def __init__(self,
                 metric: Callable[[Mapping], Any],
                 name: str,
                 logger=None,
                 reduction="average",
                 no_reduce: bool = False):

        if metric is not None:
            self.metric_function = metric
        self.metric_name = name
        self._last_iter = {}
        self._last_epoch = {}
        self._metrics_history = {}
        self._logger = get_logger(__name__) if logger is None else logger
        self._no_reduce = no_reduce

        if reduction not in ("average", "sum"):
            raise RuntimeError(
                f"`reduction` should be 'average' or 'sum', but got {reduction} instead"
            )
        self.reduction = reduction
示例#6
0
    def __init__(self, model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Optimizer or Dict[str, Optimizer] or torch.optim.Optimizer],
                 loss_f: Optional[Callable or Dict[str, Callable]], *,
                 callbacks: Optional[Callback or Iterable[Callable]] = None,
                 scheduler: Optional[LRScheduler or Dict[LRScheduler]] = None,
                 update_scheduler_by_epoch: bool = True,
                 device: Optional[torch.device or str] = None,
                 verb=True, use_cudnn_benchmark=True, use_cuda_nonblocking=False, logger=None, **kwargs):

        if logger is None:
            logger = get_logger(__name__)
        super(TrainerBase, self).__init__(model, callbacks, device, use_cudnn_benchmark, use_cuda_nonblocking, logger,
                                          **kwargs)

        # set optimizer(s)
        if optimizer is None:
            self.optimizer = None
        elif isinstance(optimizer, Optimizer):
            self.optimizer = optimizer.set_model(self.model.parameters())
        elif isinstance(optimizer, torch.optim.Optimizer):
            self.optimizer = optimizer
        elif isinstance(optimizer, dict):
            if not isinstance(model, dict):
                raise TypeError(f"model is not dict but optimizer is dict!")
            self.optimizer = StepDict(torch.optim.Optimizer)
            # self.model is nn.ModuleDict, then self.optimizer is StepDict
            for k, opt in optimizer.items():
                m = self.model._modules.get(k)
                if m is None:
                    raise KeyError(f"No such key {k} in model!")
                if opt is None:
                    self.optimizer[k] = None
                elif isinstance(opt, Optimizer):
                    self.optimizer[k] = opt.set_model(m.parameters())
                else:
                    raise TypeError(f"Unknown type: {type(opt)}")
        else:
            raise TypeError(f"Unknown type: {type(optimizer)}")
        self.logger.debug(f"Use optimizer: {self.optimizer.__class__.__name__}")

        # set scheduler(s)
        self.update_scheduler_by_epoch = update_scheduler_by_epoch
        self.update_scheduler(scheduler, update_scheduler_by_epoch)

        self.logger.debug(f"Use scheduler: {self.scheduler.__class__.__name__}")

        self.loss_f = loss_f
        self._verb = verb

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch!
        self._step = -1
        self._epoch = -1
        self._is_train = True

        _map_base = {MODEL: self.model,
                     OPTIMIZER: self.optimizer,
                     SCHEDULER: self.scheduler,
                     TRAINER: self}
        self._iteration_map = Map(**_map_base.copy())
        self._epoch_map = Map(**_map_base.copy())
        self._all_map = Map(**_map_base.copy())

        self._callbacks.before_all(self._all_map)
示例#7
0
from __future__ import annotations

import torch
from torch import Tensor

from homura.liblog import get_logger

logger = get_logger(__name__)

__all__ = [
    "true_positive", "true_negative", "false_positive", "false_negative",
    "classwise_accuracy", "precision", "recall", "specificity", "f1_score",
    "confusion_matrix", "accuracy"
]


def _base(input: Tensor, target: Tensor) -> tuple[Tensor, Tensor, Tensor]:
    classes = torch.arange(input.size(1), device=input.device)
    pred = input.argmax(dim=1).view(-1, 1)
    target = target.view(-1, 1)
    return pred, target, classes


def true_positive(input: Tensor, target: Tensor) -> Tensor:
    """Calculate true positive

    :param input: output of network, expected to be `BxCx(OPTIONAL DIMENSIONS)`
    :param target: target, expected to be `Bx(OPTIONAL DIMENSIONS)`
    :return: true positive in float tensor of `C`
    """
示例#8
0
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Partial or Optimizer
                                     or Dict[str, Optimizer]],
                 loss_f: Optional[Callable or Dict[str, Callable]],
                 *,
                 callbacks: Optional[Iterable[Callback]] = None,
                 scheduler: Optional[Partial or Scheduler
                                     or Dict[str, Scheduler]] = None,
                 update_scheduler_by_epoch: bool = True,
                 device: Optional[torch.device or str] = None,
                 verb: bool = True,
                 use_cudnn_benchmark: bool = True,
                 use_cuda_nonblocking: bool = False,
                 use_horovod: bool = False,
                 logger=None,
                 use_sync_bn: bool = False,
                 tqdm_ncols: int = 80,
                 **kwargs):

        if logger is None:
            logger = get_logger(__name__)
        self.logger = logger

        if device is None:
            self.device = torch.device(
                GPU) if torch.cuda.is_available() else torch.device(CPU)
        else:
            self.device = device

        if use_horovod and not is_horovod_available():
            raise RuntimeError('horovod is not available!')

        if is_distributed():
            if use_sync_bn:
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)

            rank = get_local_rank()
            torch.cuda.set_device(rank)
            if get_global_rank() > 0:
                # to avoid overwriting
                verb = False

        if isinstance(model, nn.Module):
            self.model = model
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
        else:
            raise TypeError(
                f"Unknown type for `model`. Expected nn.Module or Dict[str, Module] but got {type(model)}"
            )

        if GPU in str(self.device):
            self.model.to(self.device)
            torch.backends.cudnn.benchmark = use_cudnn_benchmark
            self._cuda_nonblocking = use_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {use_cudnn_benchmark}, "
                f"cuda.nonblocking: {use_cuda_nonblocking}")
        else:
            self._cuda_nonblocking = False
            # usually, this is not expected
            self.logger.info(
                f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})"
            )

        if not use_horovod and is_distributed():
            self.model = nn.parallel.DistributedDataParallel(self.model,
                                                             device_ids=[rank])

        if isinstance(self.model,
                      nn.parallel.DistributedDataParallel) or isinstance(
                          self.model, nn.DataParallel):
            self.accessible_model = self.model.module
        else:
            self.accessible_model = self.model

        self.optimizer = None
        self.scheduler = None
        self._callbacks = None
        self.update_scheduler_by_epoch = update_scheduler_by_epoch
        self._set_optimizer(optimizer)
        self._set_scheduler(scheduler)
        self._set_callbacks(callbacks)

        if use_horovod:
            import horovod.torch as hvd

            hvd.broadcast_parameters(self.model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(self.optimizer, root_rank=0)
            self.optimizer = hvd.DistributedOptimizer(
                self.optimizer, named_parameters=self.model.named_parameters())

        self.loss_f = loss_f
        self._verb = verb

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch!
        self._step = -1
        self._epoch = -1
        self._is_train = True
        # to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
        self._tqdm = Partial(tqdm, ncols=tqdm_ncols,
                             leave=False) if verb else lambda x: x

        _map_base = {
            MODEL: self.accessible_model,
            OPTIMIZER: self.optimizer,
            SCHEDULER: self.scheduler,
            TRAINER: self
        }
        self._iteration_map = Map(**_map_base.copy())
        self._epoch_map = Map(**_map_base.copy())
        self._all_map = Map(**_map_base.copy())

        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if torch.is_tensor(v):
                v = v.to(self.device)
            if isinstance(v, nn.Module):
                v.to(self.device)
            setattr(self, k, v)

        self._callbacks.before_all(self._all_map)
示例#9
0
""" Helper functions to get information about the environment.
"""

import importlib.util
import os as python_os
import subprocess
import sys as python_sys
from typing import Any, Optional

from homura.liblog import get_logger

logger = get_logger("homura.environment")


# Utility functions that useful libraries are available or not
def is_accimage_available() -> bool:
    return importlib.util.find_spec("accimage") is not None


def enable_accimage() -> None:
    if is_accimage_available():
        import torchvision

        torchvision.set_image_backend("accimage")
        logger.info("accimage is activated")
    else:
        logger.warning("accimage is not available")


def is_faiss_available() -> bool:
    _faiss_available = importlib.util.find_spec("faiss") is not None
示例#10
0
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Partial or Optimizer
                                     or Dict[str, Optimizer]],
                 loss_f: Optional[Callable or Dict[str, Callable]] = None,
                 *,
                 reporters: Optional[_ReporterBase
                                     or List[_ReporterBase]] = None,
                 scheduler: Optional[Partial or Scheduler
                                     or Dict[str, Scheduler]] = None,
                 update_scheduler_by_epoch: bool = True,
                 device: Optional[torch.device or str] = None,
                 verb: bool = True,
                 use_cudnn_benchmark: bool = True,
                 use_cuda_nonblocking: bool = False,
                 use_horovod: bool = False,
                 logger=None,
                 use_sync_bn: bool = False,
                 tqdm_ncols: int = 80,
                 **kwargs):

        if kwargs.get("callbacks"):
            raise DeprecationWarning(
                "callback is deprecated, if you need, use homura before v2020.8"
            )

        self.logger = logger or get_logger(__name__)

        self.device = device or (torch.device(
            GPU) if torch.cuda.is_available() else torch.device(CPU))

        # setup for distributed
        self._use_sync_bn = use_sync_bn
        if is_distributed():
            if self._use_sync_bn:
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
                self.logger.info(
                    "BNs of model are converted to nn.SyncBatchNorm")

            rank = get_local_rank()
            torch.cuda.set_device(rank)
            if get_global_rank() > 0:
                # to avoid overwriting
                verb = False

        # setup model
        if isinstance(model, nn.Module):
            self.model = model
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
        else:
            raise TypeError(
                f"Unknown type for `model`. Expected nn.Module or Dict[str, Module], but got {type(model)}"
            )

        if GPU in str(self.device):
            self.model.to(self.device)
            torch.backends.cudnn.benchmark = use_cudnn_benchmark
            self._cuda_nonblocking = use_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {use_cudnn_benchmark}, "
                f"cuda.nonblocking: {use_cuda_nonblocking}")
        else:
            self._cuda_nonblocking = False
            # usually, this is not expected
            self.logger.info(
                f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})"
            )

        if not use_horovod and is_distributed():
            self.model = nn.parallel.DistributedDataParallel(self.model,
                                                             device_ids=[rank])

        # self.accessible_model is useful for e.g., checkpointing
        if isinstance(self.model,
                      nn.parallel.DistributedDataParallel) or isinstance(
                          self.model, nn.DataParallel):
            self.accessible_model = self.model.module
        else:
            self.accessible_model = self.model

        self.loss_f = loss_f
        self._verb = verb

        # setup optimizer and scheduler
        self.optimizer = optimizer
        self.scheduler = scheduler
        self._update_scheduler_by_epoch = update_scheduler_by_epoch
        self.set_optimizer()
        self.set_scheduler()

        if use_horovod:
            if not is_horovod_available():
                raise RuntimeError("horovod is not available!")
            import horovod.torch as hvd

            hvd.broadcast_parameters(self.model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(self.optimizer, root_rank=0)
            self.optimizer = hvd.DistributedOptimizer(
                self.optimizer, named_parameters=self.model.named_parameters())

        if reporters is not None and not isinstance(reporters, Iterable):
            reporters = [reporters]
        reporters = reporters or []

        if not any([isinstance(rep, TQDMReporter) for rep in reporters]):
            # if reporters not contain TQDMReporter
            reporters.append(TQDMReporter(ncols=tqdm_ncols))
        self.reporter = ReporterList(reporters)

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch
        self._step = -1
        self._epoch = -1
        self._is_train = True

        # to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
        self._tqdm = lambda x: x
        if verb:
            self._tqdm = Partial(tqdm, ncols=tqdm_ncols, leave=False)
            _set_tqdm_print()

        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if torch.is_tensor(v):
                v = v.to(self.device)
            if isinstance(v, nn.Module):
                v.to(self.device)
            setattr(self, k, v)
            self.logger.debug(f"trainer sets {k} as a new attribute")
示例#11
0
""" Helper functions to make distributed training easy
"""

import builtins
import os as python_os
import warnings
from functools import wraps
from typing import Callable, Optional

from torch import distributed
from torch.cuda import device_count

from homura.liblog import get_logger
from .environment import get_args, get_environ

logger = get_logger("homura.distributed")
original_print = builtins.print


def is_horovod_available() -> bool:
    warnings.warn("horovod is no longer supported by homura", DeprecationWarning)
    return False


def is_distributed_available() -> bool:
    return distributed.is_available()


def is_distributed() -> bool:
    """ Check if the process is distributed by checking the world size is larger than 1.
    """
示例#12
0
文件: utils.py 项目: wiwi/ssl-suite
from functools import partial
from itertools import cycle
from statistics import median
from typing import Tuple, Mapping, Callable, Optional

import torch
from homura import optim, trainers, reporters, callbacks, Map, get_args, lr_scheduler
from homura.liblog import get_logger
from homura.modules import exponential_moving_average_, to_onehot
from torch import nn
from torch.nn import functional as F

from backends.data import get_dataloader
from backends.wrn import wrn28_2

logger = get_logger(__file__)


class PackedLoader(object):
    def __init__(self, trusted_loader, untrusted_loader):
        self.l_loaders = trusted_loader
        self.u_loaders = untrusted_loader
        self._size = len(untrusted_loader)

    def __len__(self):
        return self._size

    def __iter__(self):
        for l, u in zip(cycle(self.l_loaders), self.u_loaders):
            yield list(l) + list(u)
示例#13
0
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 optimizer: Optional[Partial or Optimizer
                                     or Dict[str, Optimizer]],
                 loss_f: Optional[Callable or Dict[str, Callable]] = None,
                 *,
                 reporters: Optional[_ReporterBase
                                     or List[_ReporterBase]] = None,
                 scheduler: Optional[Partial or Scheduler
                                     or Dict[str, Scheduler]] = None,
                 device: Optional[torch.device or str] = None,
                 quiet: bool = True,
                 disable_cudnn_benchmark: bool = False,
                 disable_cuda_nonblocking: bool = False,
                 logger=None,
                 use_sync_bn: bool = False,
                 tqdm_ncols: int = 120,
                 debug: bool = False,
                 **kwargs):

        if kwargs.get("update_scheduler_by_epoch"):
            raise DeprecationWarning(
                "update_scheduler_by_epoch is deprecated, users need to step")

        if kwargs.get("callbacks"):
            raise DeprecationWarning(
                "callback is deprecated, if you need, use homura before v2020.8"
            )

        self.logger = logger or get_logger(__name__)
        self.device = device or (torch.device(
            GPU) if torch.cuda.is_available() else torch.device(CPU))
        self._is_debug = debug

        if self._is_debug:
            self.logger.warning(
                "Trainer is set to be debug mode, which may affect the performance"
            )
            set_verb_level("debug")

        # setup for distributed
        self._use_sync_bn = use_sync_bn
        if is_distributed():
            if self._use_sync_bn:
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
                self.logger.info(
                    "BNs of model are converted to nn.SyncBatchNorm")

            rank = get_local_rank()
            torch.cuda.set_device(rank)
            if get_global_rank() > 0:
                # to avoid overwriting
                quiet = True

        self.loss_f = loss_f
        self._verbose = not quiet

        # setup model
        if isinstance(model, nn.Module):
            self.model = model
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
            self.logger.debug(f"model is nn.ModuleDict of {self.model.keys()}")
        else:
            raise TypeError(
                f"Unknown type for `model`. Expected nn.Module or Dict[str, Module], but got {type(model)}"
            )

        if GPU in str(self.device):
            self.model.to(self.device)
            torch.backends.cudnn.benchmark = not disable_cudnn_benchmark
            self._cuda_nonblocking = not disable_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {not disable_cudnn_benchmark}, "
                f"cuda.nonblocking: {not disable_cuda_nonblocking}")
        else:
            self._cuda_nonblocking = False
            # usually, this is not expected
            self.logger.info(
                f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})"
            )

        if is_distributed():
            self.model = nn.parallel.DistributedDataParallel(self.model,
                                                             device_ids=[rank])
            self.logger.debug(
                f"model converted to DistributedDataParallel at rank={rank}")

        # self.accessible_model is useful for e.g., checkpointing
        if isinstance(self.model,
                      nn.parallel.DistributedDataParallel) or isinstance(
                          self.model, nn.DataParallel):
            self.accessible_model = self.model.module
        else:
            self.accessible_model = self.model

        # setup optimizer and scheduler
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.set_optimizer()
        self.set_scheduler()

        if reporters is not None and not isinstance(reporters, Iterable):
            reporters = [reporters]
        reporters = reporters or []

        if not any([isinstance(rep, TQDMReporter) for rep in reporters]):
            # if reporters not contain TQDMReporter
            reporters.append(TQDMReporter(ncols=tqdm_ncols))
        self.logger.debug(f"reporter is ready: {reporters}")
        self.reporter = ReporterList(reporters)

        # called via property
        # _step and _epoch are set to -1 because they are incremented before each iteration and epoch
        self._step = -1
        self._epoch = -1
        self._is_train = True

        # to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
        self._tqdm = lambda x: x
        if self._verbose:
            self._tqdm = Partial(tqdm, ncols=tqdm_ncols, leave=False)
            set_tqdm_stdout_stderr()
            self.logger.debug("verbose: setup tqdm")
        else:
            self.logger.debug("quiet: no tqdm")

        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if isinstance(v, torch.Tensor):
                v = v.to(self.device)
            if isinstance(v, nn.Module):
                v.to(self.device)
            setattr(self, k, v)
            self.logger.debug(f"trainer sets {k} as a new attribute")
示例#14
0
import importlib.util
import os as python_os
import subprocess
import sys as python_sys

from torch.cuda import device_count

from homura.liblog import get_logger

__all__ = [
    "is_accimage_available", "is_apex_available", "is_distributed",
    "enable_accimage", "get_global_rank", "get_local_rank", "get_world_size",
    "get_num_nodes"
]

logger = get_logger("homura.env")
is_accimage_available = importlib.util.find_spec("accimage") is not None
is_apex_available = importlib.util.find_spec("apex") is not None

args = " ".join(python_sys.argv)
is_distributed = "--local_rank" in args


def _decode_bytes(b: bytes) -> str:
    return b.decode("ascii")[:-1]


def get_git_hash() -> str:
    try:
        is_git_repo = subprocess.run(
            ["git", "rev-parse", "--is-inside-work-tree"],
示例#15
0
    def __init__(self,
                 model: nn.Module or Dict[str, nn.Module],
                 callbacks: Optional[Callback or Iterable[Callable]] = None,
                 device: torch.device or str = None,
                 use_cudnn_benchmark=True,
                 use_cuda_nonblocking=False,
                 logger: Optional[Logger] = None,
                 **kwargs):

        self.logger = get_logger(__name__) if logger is None else logger
        if device is None:
            self.device = GPU if torch.cuda.is_available() else CPU
        else:
            self.device = device

        # set model(s)
        if isinstance(model, nn.Module):
            self.model = model
            self._is_single_model = True
        elif isinstance(model, dict):
            self.model = nn.ModuleDict(model)
            self._is_single_model = False
        else:
            raise TypeError(
                f"Unknown type for arg. model. Expected nn.Module or "
                f"Dict[str, Module] but got {type(model)}")

        if GPU in str(self.device):
            if use_cudnn_benchmark:
                torch.backends.cudnn.benchmark = True
            self.model.to(self.device)
            self._cuda_nonblocking = use_cuda_nonblocking
            self.logger.debug(
                f"cuda: True, cudnn.benchmark: {use_cudnn_benchmark}, nonblocking: {use_cuda_nonblocking}"
            )
        else:
            self._cuda_nonblocking = False
            self.logger.info("Running on CPU!")

        # set callback(s)
        if isinstance(callbacks, CallbackList):
            self._callbacks = callbacks
        elif isinstance(callbacks, Callback):
            self._callbacks = callbacks
            self.logger.debug(
                f"registered callback {callbacks.__class__.__name__}")
        elif isinstance(callbacks, Iterable):
            self._callbacks = CallbackList(*callbacks)
        elif callbacks is None:
            # if callback is not set
            self._callbacks = Callback()
            self.logger.debug(f"No callback registered")
        else:
            raise TypeError(
                f"type(callbacks) should not be {type(callbacks)}!")

        # set kwargs
        for k, v in kwargs.items():
            if hasattr(self, k):
                raise AttributeError(f"{self} already has {k}")
            if torch.is_tensor(v):
                v.to(self.device)
            setattr(self, k, v)