def main(): # ===> 获取配置文件参数 cfg = parse_args() os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True) logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log')) # ===> 训练信息的打印 train_options = cfg.train_options logger.info(cfg) # ===> to_use_device = torch.device( train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu') set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True) # ===> build network net = build_model(cfg['model']) # ===> 模型初始化及模型部署到对应的设备 net.apply(weight_init) # if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net = net.to(to_use_device) net.train() # ===> get fine tune layers params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage']) # ===> solver and lr scheduler optimizer = build_optimizer(net.parameters(), cfg['optimizer']) scheduler = build_scheduler(optimizer, cfg['lr_scheduler']) # ===> whether to resume from checkpoint resume_from = train_options['resume_from'] if resume_from: net, _resumed_optimizer,global_state = load_checkpoint(net, resume_from, to_use_device, optimizer, third_name=train_options['third_party_name']) if _resumed_optimizer: optimizer = _resumed_optimizer logger.info(f'net resume from {resume_from}') else: global_state = {} logger.info(f'net resume from scratch.') # ===> loss function loss_func = build_loss(cfg['loss']) loss_func = loss_func.to(to_use_device) with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file: cfg.dataset.alphabet = ''.join([s.strip('\n') for s in file.readlines()]) # ===> data loader cfg.dataset.train.dataset.alphabet = cfg.dataset.alphabet train_loader = build_dataloader(cfg.dataset.train) cfg.dataset.eval.dataset.alphabet = cfg.dataset.alphabet eval_loader = build_dataloader(cfg.dataset.eval) # ===> train train(net, optimizer, scheduler, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger)
def main(): # ===> 获取配置文件参数 cfg = parse_args() os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True) # ===> 训练信息的打印 logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log')) logger.info(cfg) # ===> train_options = cfg.train_options to_use_device = torch.device( train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu') set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True) # ===> build network net = build_model(cfg['model']) # ===> 模型部署到对应的设备 net = nn.DataParallel(net) net = net.to(to_use_device) # ===> 创建metric metric = build_metric(cfg['metric']) # ===> get fine tune layers # params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage']) # ===> solver and lr scheduler optimizer = build_optimizer(net.parameters(), cfg['optimizer']) net.train() net.module.model_dict['Teacher'].eval() # ===> whether to resume from checkpoint resume_from = train_options['resume_from'] if resume_from: net, _resumed_optimizer, global_state = load_checkpoint(net, resume_from, to_use_device, optimizer) if _resumed_optimizer: optimizer = _resumed_optimizer logger.info(f'net resume from {resume_from}') else: global_state = {} logger.info(f'net resume from scratch.') # ===> loss function loss_func = build_loss(cfg['loss']) loss_func = loss_func.to(to_use_device) # ===> data loader train_loader = build_dataloader(cfg.dataset.train) eval_loader = build_dataloader(cfg.dataset.eval) # ===> post_process post_process = build_post_process(cfg['post_process']) # ===> train train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger, post_process,metric)
def main(): # ===> 获取配置文件参数 cfg = parse_args() os.makedirs(cfg.train_options['checkpoint_save_dir'], exist_ok=True) logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options['checkpoint_save_dir'], 'train.log')) # ===> 训练信息的打印 train_options = cfg.train_options logger.info(cfg) # ===> to_use_device = torch.device( train_options['device'] if torch.cuda.is_available() and ('cuda' in train_options['device']) else 'cpu') set_random_seed(cfg['SEED'], 'cuda' in train_options['device'], deterministic=True) # ===> build network net = build_model(cfg['model']) # ===> 模型初始化及模型部署到对应的设备 # net.apply(weight_init) # 使用 pretrained时,注释掉这句话 # if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net = net.to(to_use_device) net.train() # ===> get fine tune layers params_to_train = get_fine_tune_params(net, train_options['fine_tune_stage']) # ===> solver and lr scheduler optimizer = build_optimizer(net.parameters(), cfg['optimizer']) # ===> whether to resume from checkpoint resume_from = train_options['resume_from'] if resume_from: net, current_epoch, _resumed_optimizer = load_checkpoint(net, resume_from, to_use_device, optimizer, third_name=train_options['third_party_name']) if _resumed_optimizer: optimizer = _resumed_optimizer logger.info(f'net resume from {resume_from}') else: current_epoch = 0 logger.info(f'net resume from scratch.') # ===> loss function loss_func = build_loss(cfg['loss']) loss_func = loss_func.to(to_use_device) # ===> data loader train_loader = build_dataloader(cfg.dataset.train) eval_loader = build_dataloader(cfg.dataset.eval) # post_process post_process = build_post_process(cfg['post_process']) # ===> train train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, current_epoch, logger, post_process)
def main(): # ===> 获取配置文件参数 parser = argparse.ArgumentParser(description='train') parser.add_argument('--config', type=str, default='config/det.json', help='train config file path') cfg = parser.parse_args() with open(cfg.config) as fin: cfg = json.load(fin, object_hook=lambda d: SimpleNamespace(**d)) # cfg = parse_args() os.makedirs(cfg.train_options.checkpoint_save_dir, exist_ok=True) logger = get_logger('torchocr', log_file=os.path.join( cfg.train_options.checkpoint_save_dir, 'train.log')) # ===> 训练信息的打印 train_options = cfg.train_options logger.info(cfg) # ===> to_use_device = torch.device( train_options.device if torch.cuda.is_available() and ( 'cuda' in train_options.device) else 'cpu') # set_random_seed(cfg.SEED, 'cuda' in train_options.device, deterministic=True) # ===> build network net = build_model(cfg.model) # ===> 模型初始化及模型部署到对应的设备 # net.apply(weight_init) # 使用 pretrained时,注释掉这句话 # if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net = net.to(to_use_device) net.train() # ===> get fine tune layers params_to_train = get_fine_tune_params(net, train_options.fine_tune_stage) # ===> solver and lr scheduler optimizer = build_optimizer(net.parameters(), cfg.optimizer) # ===> whether to resume from checkpoint resume_from = train_options.resume_from if resume_from: net, _resumed_optimizer, global_state = load_checkpoint( net, resume_from, to_use_device, optimizer, third_name=train_options.third_party_name) if _resumed_optimizer: optimizer = _resumed_optimizer logger.info(f'net resume from {resume_from}') else: global_state = {} logger.info(f'net resume from scratch.') # ===> loss function loss_func = build_loss(cfg.loss) loss_func = loss_func.to(to_use_device) # ===> data loader train_loader = build_dataloader(cfg.dataset.train) eval_loader = build_dataloader(cfg.dataset.eval) # post_process post_process = build_post_process(cfg.post_process) # ===> train train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger, post_process)
model_dict = net.state_dict() ckpt = torch.load(resume_from, map_location='cpu') pretrained_dict = ckpt['state_dict'] # txt_file = os.path.join('test_results/', 'pretrainedmodel_state.txt') # txt_f = open(txt_file, 'w') # txt_f.write('############## Model Dict ################' + '\n') # for j in model_dict: # txt_f.write(str(j) +'\n' ) # # txt_f.write('############## Pretrained Dict ################' + '\n') # # for k in pretrained_dict: # # txt_f.write(str(k) +'\n') # txt_f.close() net, _, global_state = load_checkpoint(net, resume_from, to_use_device, _optimizers=None, third_name=train_options['third_party_name']) # ===> loss function loss_func = build_loss(cfg['loss']) loss_func = loss_func.to(to_use_device) # ===> data loader train_loader = build_dataloader(cfg.dataset.train) eval_loader = build_dataloader(cfg.dataset.eval) # post_process post_process = build_post_process(cfg['post_process']) # ===> train train(net, optimizer, loss_func, train_loader, eval_loader, to_use_device, cfg, global_state, logger, post_process)
def main(): # ===> 获取配置文件参数 # cfg = parse_args() parser = argparse.ArgumentParser(description='train') parser.add_argument('-c', '--config', type=str, default='config/recrbt3.json', help='train config file path') args = parser.parse_args() with open(args.config) as fin: cfg = json.load(fin, object_hook=lambda d: SimpleNamespace(**d)) os.makedirs(cfg.train_options.checkpoint_save_dir, exist_ok=True) logger = get_logger('torchocr', log_file=os.path.join(cfg.train_options.checkpoint_save_dir, 'train.log')) # ===> 训练信息的打印 train_options = cfg.train_options logger.info(cfg) # ===> # to_use_device = torch.device( # train_options.device if torch.cuda.is_available() and ('cuda' in train_options.device) else 'cpu') # set_random_seed(cfg.SEED, 'cuda' in train_options.device, deterministic=True) # ===> build network net = build_model(cfg.model) # ===> 模型初始化及模型部署到对应的设备 # net.apply(weight_init) # # if torch.cuda.device_count() > 1: # net = nn.DataParallel(net) # net = net.to(to_use_device) net.train() # ===> get fine tune layers # params_to_train = get_fine_tune_params(net, train_options.fine_tune_stage) # ===> solver and lr scheduler optimizer = build_optimizer(net.parameters(), cfg.optimizer) scheduler = build_scheduler(optimizer, cfg.lr_scheduler) # ===> whether to resume from checkpoint resume_from = train_options.resume_from if resume_from: net, _resumed_optimizer, global_state = load_checkpoint(net, resume_from, 0, optimizer, third_name=train_options.third_party_name) if _resumed_optimizer: optimizer = _resumed_optimizer logger.info(f'net resume from {resume_from}') else: global_state = {} logger.info(f'net resume from scratch.') # ===> loss function loss_func = build_loss(cfg.loss) #loss_func = loss_func.to(to_use_device) if "dogclass.txt" in cfg.dataset.alphabet: with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file: cfg.dataset.alphabet = [s.strip('\n') for s in file.readlines()] else: with open(cfg.dataset.alphabet, 'r', encoding='utf-8') as file: cfg.dataset.alphabet = ''.join([s.strip('\n') for s in file.readlines()]) # ===> data loader cfg.dataset.train.dataset.alphabet = cfg.dataset.alphabet train_loader = build_dataloader(cfg.dataset.train) cfg.dataset.eval.dataset.alphabet = cfg.dataset.alphabet eval_loader = build_dataloader(cfg.dataset.eval) # ===> train train(net, optimizer, scheduler, loss_func, train_loader, eval_loader, 0, cfg, global_state, logger)