def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None if (args.load_from_extractive != ''): logger.info('Loading bert from extractive model %s' % args.load_from_extractive) bert_from_extractive = torch.load(args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = {'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]']} train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == "-1" else "cuda" logger.info("Device ID %d" % device_id) logger.info("Device %s" % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != "": logger.info("Loading checkpoint from %s" % args.train_from) checkpoint = torch.load( args.train_from, map_location=lambda storage, loc: storage ) opt = vars(checkpoint["opt"]) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None if args.load_from_extractive != "": logger.info("Loading bert from extractive model %s" % args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage ) bert_from_extractive = bert_from_extractive["model"] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader( args, load_dataset(args, "train", shuffle=True), args.batch_size, device, shuffle=True, is_test=False, ) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if args.sep_optim: optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) tokenizer = BertTokenizer.from_pretrained( "chinese_roberta_wwm_ext_pytorch/", do_lower_case=True, cache_dir=args.temp_dir ) symbols = { "BOS": tokenizer.vocab["[unused1]"], "EOS": tokenizer.vocab["[unused2]"], "PAD": tokenizer.vocab["[PAD]"], "EOQ": tokenizer.vocab["[unused3]"], } train_loss = abs_loss( model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing, ) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)
def train_single_hybrid(args, device_id): init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # 重新设定随机种子 torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint = None if args.train_from_extractor != '': logger.info('Loading checkpoint from %s' % args.train_from_extractor) checkpoint_ext = torch.load(args.train_from_extractor, map_location=lambda storage, loc: storage) opt = vars(checkpoint_ext['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint_ext = None if args.train_from_abstractor != '': logger.info('Loading checkpoint from %s' % args.train_from_abstractor) checkpoint_abs = torch.load(args.train_from_abstractor, map_location=lambda storage, loc: storage) opt = vars(checkpoint_abs['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint_abs = None def train_iter_fct(): # 读一次数据 if args.is_debugging: print("YES it is debugging") # 第三个参数是batch_size return data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) # exit() else: return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) # modules, consts, options = init_modules() # 选择模型: ExtSummarizer # print("1~~~~~~~~~~~~~~~~~~~~") model = HybridSummarizer(args, device, checkpoint, checkpoint_ext=checkpoint_ext, checkpoint_abs=checkpoint_abs) # 建优化器 # print("2~~~~~~~~~~~~~~~~~~~~") # optim = model_builder.build_optim(args, model, checkpoint) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] # print("????????") # print("optim") # print(optim) # exit() else: optim = [model_builder.build_optim(args, model, checkpoint)] # 做log logger.info(model) # print("3~~~~~~~~~~~~~~~~~~~~") # 建训练器 # tokenizer = BertTokenizer.from_pretrained('/home/ybai/projects/PreSumm/PreSumm/temp/', do_lower_case=True, cache_dir=args.temp_dir) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]'] } train_loss = abs_loss(model.abstractor.generator, symbols, model.abstractor.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) # print("4~~~~~~~~~~~~~~~~~~~~") # 开始训练 trainer.train(train_iter_fct, args.train_steps)
def train_abs_single(args, device_id): """Implements training process (meta / non-meta) Args: device_id (int) : the GPU id to be used """ device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d', device_id) logger.info('Device %s', device) # Fix random seed to control experiement torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: # if use GPU torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # Load checkpoint and args if args.train_from != '': logger.info('Loading checkpoint from %s', args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) # which is self.args for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None # Load extractive model as initial parameter (proposed by Presumm) if args.load_from_extractive != '': logger.info('Loading bert from extractive model %s', args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None # Prepare dataloader if args.meta_mode: def meta_train_iter_fct(): return data_loader.MetaDataloader(args, load_meta_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) else: def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) # Prepare model if args.meta_mode: model = MTLAbsSummarizer(args, device, checkpoint, bert_from_extractive) else: model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) # Prepare optimizer for inner loop # The optimizer for each task is seperated if args.meta_mode: optims_inner = [] for _ in range(args.num_task): if args.sep_optim: optim_bert_inner = model_builder.build_optim_bert_inner( args, model, checkpoint, 'maml') optim_dec_inner = model_builder.build_optim_dec_inner( args, model, checkpoint, 'maml') optims_inner.append([optim_bert_inner, optim_dec_inner]) else: optims_inner.append([ model_builder.build_optim_inner(args, model, checkpoint, 'maml') ]) # Prepare optimizer for outer loop if args.sep_optim: optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optims = [optim_bert, optim_dec] else: optims = [model_builder.build_optim(args, model, checkpoint)] # Prepare tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], # id = 1 'EOS': tokenizer.vocab['[unused1]'], # id = 2 'EOQ': tokenizer.vocab['[unused2]'], # id = 3 'PAD': tokenizer.vocab['[PAD]'] # id = 0 } # Self Check : special word ids special_words = [w for w in tokenizer.vocab.keys() if "[" in w] special_word_ids = [ tokenizer.convert_tokens_to_ids(w) for w in special_words ] # Prepare loss computation train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) # Prepare trainer and perform training if args.meta_mode: trainer = build_MTLtrainer(args, device_id, model, optims, optims_inner, train_loss) trainer.train(meta_train_iter_fct) else: trainer = build_trainer(args, device_id, model, optims, train_loss) trainer.train(train_iter_fct, args.train_steps)
def validate(args, device_id, pt, step): ''' Implements validation process (meta / non-memta) Arguments: device_id (int) : the GPU id to be used pt() : checkpoint model step (int) : checkpoint step Process: - load checkpoint - prepare dataloader class - prepare model class - prepare loss func, which return loss class - prepare trainer - trainer.validate() Meta vs Normal - MetaDataloader vs Dataloader - load_dataset vs load_meta_dataset - MTLAbsSummarizer vs AbsSummarizer - build_MTLtrainer vs MTLTrainer ''' device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) # Fix random seed to control experiement torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # Load checkpoint ard args if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) # which is self.args for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) # Prepare dataloader if (args.meta_mode): def valid_iter_fct(): return data_loader.MetaDataloader(args, load_meta_dataset(args, 'valid', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) else: valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) # Prepare model if (args.meta_mode): model = MTLAbsSummarizer(args, device, checkpoint) else: model = AbsSummarizer(args, device, checkpoint) #model.eval() # Prepare optimizer for inner loop # The optimizer for each task is seperated if (args.meta_mode): optims_inner = [] for i in range(args.num_task): if (args.sep_optim): optim_bert_inner = model_builder.build_optim_bert_inner( args, model, checkpoint, 'maml') optim_dec_inner = model_builder.build_optim_dec_inner( args, model, checkpoint, 'maml') optims_inner.append([optim_bert_inner, optim_dec_inner]) else: self.optims_inner.append([ model_builder.build_optim_inner(args, model, checkpoint, 'maml') ]) # Prepare optimizer (not actually used, but get the step information) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] # Prepare loss tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]'] } # Prepare loss computation valid_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=False) # Prepare trainer and perform validation if (args.meta_mode): trainer = build_MTLtrainer(args, device_id, model, optim, optims_inner, valid_loss) stats = trainer.validate(valid_iter_fct, step) else: trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None if (args.load_from_extractive != ''): logger.info('Loading bert from extractive model %s' % args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) def valid_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) print("model.vocab_size" + str(model.vocab_size)) parser = argparse.ArgumentParser() parser.add_argument('--bpe-codes', default="/content/PhoBERT_base_transformers/bpe.codes", required=False, type=str, help='path to fastBPE BPE') args1, unknown = parser.parse_known_args() bpe = fastBPE(args1) # Load the dictionary vocab = Dictionary() vocab.add_from_file("/content/PhoBERT_base_transformers/dict.txt") tokenizer = bpe symbols = { 'BOS': vocab.indices['[unused0]'], 'EOS': vocab.indices['[unused1]'], 'PAD': vocab.indices['[PAD]'], 'EOQ': vocab.indices['[unused2]'] } train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct=train_iter_fct, train_steps=args.train_steps, valid_iter_fct=valid_iter_fct)