def log_process(self, num, total, msg, name, append=True): # type:(int,int,str,Visdom,str,str,dict,bool)->None info = "[{time}]{msg}".format(time=timestr('%m-%d %H:%M:%S'), msg=msg) append = append and self.visdom.win_exists(name) ret = self.visdom.text(info, win=(name), env=self.config.visdom_env, opts=dict(title=name), append=append) with open(self.config.log_file, 'a') as f: f.write(info + '\n') self.processBar(num, total, msg) return ret == name
def log(self, msg, name, append=True, log_file=None): # type:(str,str,bool,bool,str)->None if log_file is None: log_file = self.config.log_file info = "[{time}]{msg}".format(time=timestr('%m-%d %H:%M:%S'), msg=msg) append = append and self.visdom.win_exists(name) ret = self.visdom.text(info, win=name, env=self.config.visdom_env, opts=dict(title=name), append=append) mode = 'a+' if append else 'w+' with open(log_file, mode) as f: f.write(info + '\n') return ret == name
def reinit(self, config): self.config = config try: self.visdom = Visdom(env=config.visdom_env) self.connected = self.visdom.check_connection() if not self.connected: print("Visdom server hasn't started, please run command 'python -m visdom.server' in terminal.") # try: # print("Visdom server hasn't started, do you want to start it? ") # if 'y' in input("y/n: ").lower(): # os.popen('python -m visdom.server') # except Exception as e: # warn(e) except ConnectionError as e: warn("Can't open Visdom because " + e.strerror) with open(self.config.log_file, 'a') as f: info = "[{time}]Initialize Visdom\n".format(time=timestr('%m-%d %H:%M:%S')) info += str(self.config) f.write(info + '\n')
def __init__(self, mode: str, **kwargs): if mode not in ['train', 'inference']: warn( "Invalid argument mode, expect 'train' or 'inference' but got '%s'" % mode) self.mode = mode self.enable_grad = mode == 'train' self.init_time = timestr('%Y%m%d.%H%M%S') for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) else: warn("{} has no attribute {}:{}".format( type(self), key, value)) # data config assert os.path.isfile( self.classes_path), "%s is not a valid file" % self.classes_path self.classes = [] with open(self.classes_path, "r") as f: for cls in f.readlines(): self.classes.append(cls.strip()) self.num_classes = len(self.classes) # efficiency config if self.use_gpu: from torch.cuda import is_available as cuda_available, device_count if cuda_available(): self.num_gpu = device_count() self.gpu_list = list(range(self.num_gpu)) assert self.batch_size % self.num_gpu == 0, \ "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \ .format(self.batch_size, self.num_gpu) else: warn( "Can't find available cuda devices, use_gpu will be automatically set to False." ) self.use_gpu = False self.num_gpu = 0 self.gpu_list = [] else: from torch.cuda import is_available as cuda_available if cuda_available(): warn( "Available cuda devices were found, please switch use_gpu to True for acceleration." ) self.num_gpu = 0 self.gpu_list = [] if self.use_gpu: self.map_location = lambda storage, loc: storage else: self.map_location = "cpu" # weight S/L config self.vis_env_path = os.path.join(self.log_root, 'visdom') os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True) os.makedirs(self.log_root, exist_ok=True) os.makedirs(self.vis_env_path, exist_ok=True) assert os.path.isdir(self.log_root) self.temp_weight_path = os.path.join( self.log_root, 'tmpmodel{}.pth'.format(self.init_time)) self.temp_optim_path = os.path.join( self.log_root, 'tmp{}{}.pth'.format(self.optimizer, self.init_time)) self.log_file = os.path.join( self.log_root, '{}.{}.log'.format(self.mode, self.init_time)) self.val_result = os.path.join( self.log_root, 'validation_result{}.txt'.format(self.init_time)) self.train_record_file = os.path.join(self.log_root, 'train.record.jsonlist') self.debug_flag_file = os.path.abspath(self.debug_flag_file) """ record training process by core.make_checkpoint() with corresponding arguments of [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy] DO NOT CHANGE IT unless you know what you're doing!!! """ self.__record_fields__ = [ 'init', 'epoch', 'start', 'elapsed', 'loss', 'train_acc', 'val_acc' ] if len(self.__record_fields__) == 0: warn( '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \ .format(type(self), self.train_record_file)) self.__record_dict__ = '{{}}' else: self.__record_dict__ = '{{' for field in self.__record_fields__: self.__record_dict__ += '"{}":"{{}}",'.format(field) self.__record_dict__ = self.__record_dict__[:-1] + '}}' # module config if isinstance(self.image_resize, int): self.image_resize = [self.image_resize, self.image_resize] self.loss_type = self.loss_type.lower() assert self.loss_type in [ "mse", "cross_entropy", "crossentropy", "cross", "ce" ] self.optimizer = self.optimizer.lower() assert self.optimizer in ["sgd", "adam"]
def __init__(self, **kwargs): self.init_time = timestr('%Y%m%d.%H%M%S') # Parse kwargs for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) else: warn("{} has no attribute {}:{}".format(type(self), key, value)) if self.mode not in ['train', 'inference']: warn("Invalid argument mode, expect 'train' or 'inference' but got '%s'" % self.mode) self.enable_grad = self.mode == 'train' # efficiency config if self.use_gpu: from torch.cuda import is_available as cuda_available, device_count if cuda_available(): self.num_gpu = device_count() self.gpu_list = list(range(self.num_gpu)) assert self.batch_size % self.num_gpu == 0, \ "Can't split a batch of data with batch_size {} averagely into {} gpu(s)" \ .format(self.batch_size, self.num_gpu) else: warn("Can't find available cuda devices, use_gpu will be automatically set to False.") self.use_gpu = False self.num_gpu = 0 self.gpu_list = [] else: from torch.cuda import is_available as cuda_available if cuda_available(): warn("Available cuda devices were found, please switch use_gpu to True for acceleration.") self.num_gpu = 0 self.gpu_list = [] if self.use_gpu: self.map_location = lambda storage, loc: storage else: self.map_location = "cpu" # weight S/L config self.vis_env_path = os.path.join(self.log_root, 'visdom') os.makedirs(os.path.dirname(self.weight_save_path), exist_ok=True) os.makedirs(self.log_root, exist_ok=True) os.makedirs(self.vis_env_path, exist_ok=True) assert os.path.isdir(self.log_root) self.temp_ckpt_path = os.path.join(self.log_root, 'ckpt-{time}.pth'.format(time=self.init_time)) self.log_file = os.path.join(self.log_root, '{}.{}.log'.format(self.mode, self.init_time)) self.val_result = os.path.join(self.log_root, 'validation_result{}.txt'.format(self.init_time)) self.train_record_file = os.path.join(self.log_root, 'train.record.jsons') """ record training process by core.make_checkpoint() with corresponding arguments of [epoch, start time, elapsed time, loss value, train accuracy, validate accuracy] DO NOT CHANGE IT unless you know what you're doing!!! """ self.__record_fields__ = ['epoch', 'start', 'elapsed', 'loss', 'train_score', 'val_score'] if len(self.__record_fields__) == 0: warn( '{}.__record_fields__ is empty, this may cause unknown issues when save checkpoint into {}' \ .format(type(self), self.train_record_file)) self.__record_dict__ = '{{}}' else: self.__record_dict__ = '{{' for field in self.__record_fields__: self.__record_dict__ += '"{}":"{{}}",'.format(field) self.__record_dict__ = self.__record_dict__[:-1] + '}}' # visualize config if self.visual_engine in ["visdom", "vis"]: self.port = 8097 if self.port is None else self.port elif self.visual_engine in ["tensorboardx", "tensorboard", "tb"]: self.port = 6006 if self.port is None else self.port else: raise RuntimeError("Invalid parameter value of visual_engine :", self.visual_engine)