Exemplo n.º 1
0
 def log_process(self, num, total, msg, name, append=True):
     # type:(int,int,str,Visdom,str,str,dict,bool)->None
     info = "[{time}]{msg}".format(time=timestr('%m-%d %H:%M:%S'), msg=msg)
     append = append and self.visdom.win_exists(name)
     ret = self.visdom.text(info, win=(name), env=self.config.visdom_env, opts=dict(title=name), append=append)
     with open(self.config.log_file, 'a') as f:
         f.write(info + '\n')
     self.processBar(num, total, msg)
     return ret == name
Exemplo n.º 2
0
 def log(self, msg, name, append=True, log_file=None):
     # type:(str,str,bool,bool,str)->None
     if log_file is None:
         log_file = self.config.log_file
     info = "[{time}]{msg}".format(time=timestr('%m-%d %H:%M:%S'), msg=msg)
     append = append and self.visdom.win_exists(name)
     ret = self.visdom.text(info, win=name, env=self.config.visdom_env, opts=dict(title=name), append=append)
     mode = 'a+' if append else 'w+'
     with open(log_file, mode) as f:
         f.write(info + '\n')
     return ret == name
Exemplo n.º 3
0
 def reinit(self, config):
     self.config = config
     try:
         self.visdom = Visdom(env=config.visdom_env)
         self.connected = self.visdom.check_connection()
         if not self.connected:
             print("Visdom server hasn't started, please run command 'python -m visdom.server' in terminal.")
             # try:
             #     print("Visdom server hasn't started, do you want to start it? ")
             #     if 'y' in input("y/n: ").lower():
             #         os.popen('python -m visdom.server')
             # except Exception as e:
             #     warn(e)
     except ConnectionError as e:
         warn("Can't open Visdom because " + e.strerror)
     with open(self.config.log_file, 'a') as f:
         info = "[{time}]Initialize Visdom\n".format(time=timestr('%m-%d %H:%M:%S'))
         info += str(self.config)
         f.write(info + '\n')
 def __init__(self, mode: str, **kwargs):
     if mode not in ['train', 'inference']:
         warn(
             "Invalid argument mode, expect 'train' or 'inference' but got '%s'"
             % mode)
     self.mode = mode
     self.enable_grad = mode == 'train'
     self.init_time = timestr('%Y%m%d.%H%M%S')
     for key, value in kwargs.items():
         if hasattr(self, key):
             setattr(self, key, value)
         else:
             warn("{} has no attribute {}:{}".format(
                 type(self), key, value))
     # data config
     assert os.path.isfile(
         self.classes_path), "%s is not a valid file" % self.classes_path
     self.classes = []
     with open(self.classes_path, "r") as f:
         for cls in f.readlines():
             self.classes.append(cls.strip())
     self.num_classes = len(self.classes)
     # efficiency config
     if self.use_gpu:
         from torch.cuda import is_available as cuda_available, device_count
         if cuda_available():
             self.num_gpu = device_count()
             self.gpu_list = list(range(self.num_gpu))
             assert self.batch_size % self.num_gpu == 0, \
                 "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \
                     .format(self.batch_size, self.num_gpu)
         else:
             warn(
                 "Can't find available cuda devices, use_gpu will be automatically set to False."
             )
             self.use_gpu = False
             self.num_gpu = 0
             self.gpu_list = []
     else:
         from torch.cuda import is_available as cuda_available
         if cuda_available():
             warn(
                 "Available cuda devices were found, please switch use_gpu to True for acceleration."
             )
         self.num_gpu = 0
         self.gpu_list = []
     if self.use_gpu:
         self.map_location = lambda storage, loc: storage
     else:
         self.map_location = "cpu"
     # weight S/L config
     self.vis_env_path = os.path.join(self.log_root, 'visdom')
     os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True)
     os.makedirs(self.log_root, exist_ok=True)
     os.makedirs(self.vis_env_path, exist_ok=True)
     assert os.path.isdir(self.log_root)
     self.temp_weight_path = os.path.join(
         self.log_root, 'tmpmodel{}.pth'.format(self.init_time))
     self.temp_optim_path = os.path.join(
         self.log_root, 'tmp{}{}.pth'.format(self.optimizer,
                                             self.init_time))
     self.log_file = os.path.join(
         self.log_root, '{}.{}.log'.format(self.mode, self.init_time))
     self.val_result = os.path.join(
         self.log_root, 'validation_result{}.txt'.format(self.init_time))
     self.train_record_file = os.path.join(self.log_root,
                                           'train.record.jsonlist')
     self.debug_flag_file = os.path.abspath(self.debug_flag_file)
     """
    record training process by core.make_checkpoint() with corresponding arguments of
    [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy]
    DO NOT CHANGE IT unless you know what you're doing!!!
    """
     self.__record_fields__ = [
         'init', 'epoch', 'start', 'elapsed', 'loss', 'train_acc', 'val_acc'
     ]
     if len(self.__record_fields__) == 0:
         warn(
             '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \
                 .format(type(self), self.train_record_file))
         self.__record_dict__ = '{{}}'
     else:
         self.__record_dict__ = '{{'
         for field in self.__record_fields__:
             self.__record_dict__ += '"{}":"{{}}",'.format(field)
         self.__record_dict__ = self.__record_dict__[:-1] + '}}'
     # module config
     if isinstance(self.image_resize, int):
         self.image_resize = [self.image_resize, self.image_resize]
     self.loss_type = self.loss_type.lower()
     assert self.loss_type in [
         "mse", "cross_entropy", "crossentropy", "cross", "ce"
     ]
     self.optimizer = self.optimizer.lower()
     assert self.optimizer in ["sgd", "adam"]
Exemplo n.º 5
0
    def __init__(self, **kwargs):
        self.init_time = timestr('%Y%m%d.%H%M%S')
        # Parse kwargs
        for key, value in kwargs.items():
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                warn("{} has no attribute {}:{}".format(type(self), key, value))

        if self.mode not in ['train', 'inference']:
            warn("Invalid argument mode, expect 'train' or 'inference' but got '%s'" % self.mode)
        self.enable_grad = self.mode == 'train'

        # efficiency config
        if self.use_gpu:
            from torch.cuda import is_available as cuda_available, device_count
            if cuda_available():
                self.num_gpu = device_count()
                self.gpu_list = list(range(self.num_gpu))
                assert self.batch_size % self.num_gpu == 0, \
                    "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \
                        .format(self.batch_size, self.num_gpu)
            else:
                warn("Can't find available cuda devices, use_gpu will be automatically set to False.")
                self.use_gpu = False
                self.num_gpu = 0
                self.gpu_list = []
        else:
            from torch.cuda import is_available as cuda_available
            if cuda_available():
                warn("Available cuda devices were found, please switch use_gpu to True for acceleration.")
            self.num_gpu = 0
            self.gpu_list = []
        if self.use_gpu:
            self.map_location = lambda storage, loc: storage
        else:
            self.map_location = "cpu"

        # weight S/L config
        self.vis_env_path = os.path.join(self.log_root, 'visdom')
        os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True)
        os.makedirs(self.log_root, exist_ok=True)
        os.makedirs(self.vis_env_path, exist_ok=True)
        assert os.path.isdir(self.log_root)
        self.temp_ckpt_path = os.path.join(self.log_root, 'ckpt-{time}.pth'.format(time=self.init_time))
        self.log_file = os.path.join(self.log_root, '{}.{}.log'.format(self.mode, self.init_time))
        self.val_result = os.path.join(self.log_root, 'validation_result{}.txt'.format(self.init_time))
        self.train_record_file = os.path.join(self.log_root, 'train.record.jsons')
        """
       record training process by core.make_checkpoint() with corresponding arguments of
       [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy]
       DO NOT CHANGE IT unless you know what you're doing!!!
       """
        self.__record_fields__ = ['epoch', 'start', 'elapsed', 'loss', 'train_score', 'val_score']
        if len(self.__record_fields__) == 0:
            warn(
                '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \
                    .format(type(self), self.train_record_file))
            self.__record_dict__ = '{{}}'
        else:
            self.__record_dict__ = '{{'
            for field in self.__record_fields__:
                self.__record_dict__ += '"{}":"{{}}",'.format(field)
            self.__record_dict__ = self.__record_dict__[:-1] + '}}'

        # visualize config
        if self.visual_engine in ["visdom", "vis"]:
            self.port = 8097 if self.port is None else self.port
        elif self.visual_engine in ["tensorboardx", "tensorboard", "tb"]:
            self.port = 6006 if self.port is None else self.port
        else:
            raise RuntimeError("Invalid parameter value of visual_engine :", self.visual_engine)