def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) model = AbsSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = {'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]']} valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def build_trainer(args, device_id, model, optims, tokenizer): """ Simplify `Trainer` creation based on user `opt`s* Args: opt (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`onmt.models.NMTModel`): the model to train fields (dict): dict of fields optim (:obj:`onmt.utils.Optimizer`): optimizer used during training data_type (str): string describing the type of data e.g. "text", "img", "audio" model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object used to save the model """ device = "cpu" if args.visible_gpus == '-1' else "cuda" grad_accum_count = args.accum_count n_gpu = args.world_size if device_id >= 0: gpu_rank = int(args.gpu_ranks[device_id]) else: gpu_rank = 0 n_gpu = 0 print('gpu_rank %d' % gpu_rank) tensorboard_log_dir = args.model_path writer = SummaryWriter(tensorboard_log_dir, comment="Unmt") report_manager = ReportMgr(args.report_every, start_time=-1, tensorboard_writer=writer) symbols = { 'BOS': tokenizer.vocab['[unused1]'], 'EOS': tokenizer.vocab['[unused2]'], 'PAD': tokenizer.vocab['[PAD]'], 'SEG': tokenizer.vocab['[unused3]'], 'UNK': tokenizer.vocab['[UNK]'] } gen_loss = abs_loss(args, model.generator, symbols, tokenizer.vocab, device, train=True) pn_loss = CrossEntropyLossCompute().to(device) trainer = Trainer(args, model, optims, tokenizer, gen_loss, pn_loss, grad_accum_count, n_gpu, gpu_rank, report_manager) # print(tr) if (model): n_params = _tally_parameters(model) logger.info('* number of parameters: %d' % n_params) return trainer
def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" #if (pt != ''): if not (args.test_from): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) model = AbsSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) tokenizer = BertTokenizer.from_pretrained( '../ETRI_koBERT/003_bert_eojeol_pytorch/vocab.txt', do_lower_case=False, cache_dir=args.temp_dir) if not args.share_emb: tokenizer = add_tokens(tokenizer) symbols = { 'BOS': tokenizer.vocab['<S>'], 'EOS': tokenizer.vocab['<T>'], 'PAD': tokenizer.vocab['[PAD]'] } # symbols = {'BOS': tokenizer.vocab['[BOS]'], 'EOS': tokenizer.vocab['[EOS]'], # 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[EOQ]']} # symbols = {'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], # 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]']} # print(tokenizer.vocab_size) # print([(key, value) for key, value in tokenizer.vocab.items()][-10:]) # exit() valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def validate(args, device_id, pt, step, tokenizer): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) model = AbsSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'dev', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) symbols = {'BOS': tokenizer.convert_tokens_to_ids('<s>'), 'EOS': tokenizer.convert_tokens_to_ids('</s>'), 'PAD': tokenizer.convert_tokens_to_ids('[PAD]')} valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None if (args.load_from_extractive != ''): logger.info('Loading bert from extractive model %s' % args.load_from_extractive) bert_from_extractive = torch.load(args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = {'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]']} train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)
def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) print(args) model = AbsSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) parser = argparse.ArgumentParser() parser.add_argument('--bpe-codes', default="/content/PhoBERT_base_transformers/bpe.codes", required=False, type=str, help='path to fastBPE BPE') args1, unknown = parser.parse_known_args() bpe = fastBPE(args1) # Load the dictionary vocab = Dictionary() vocab.add_from_file("/content/PhoBERT_base_transformers/dict.txt") tokenizer = bpe symbols = { 'BOS': vocab.indices['[unused0]'], 'EOS': vocab.indices['[unused1]'], 'PAD': vocab.indices['[PAD]'], 'EOQ': vocab.indices['[unused2]'] } valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_pretrain.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = PretrainModel(args, device, checkpoint) optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) symbols = {'PAD': 0} train_loss = abs_loss(model.generator, symbols, model.dec_vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)
def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == "-1" else "cuda" if pt != "": test_from = pt else: test_from = args.test_from logger.info("Loading checkpoint from %s" % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint["opt"]) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) print(args) model = AbsSummarizer(args, device, checkpoint) model.eval() valid_iter = data_loader.Dataloader( args, load_dataset(args, "valid", shuffle=False), args.batch_size, device, shuffle=False, is_test=False, ) tokenizer = BertTokenizer.from_pretrained( "chinese_roberta_wwm_ext_pytorch", do_lower_case=True, cache_dir=args.temp_dir) symbols = { "BOS": tokenizer.vocab["[unused1]"], "EOS": tokenizer.vocab["[unused2]"], "PAD": tokenizer.vocab["[PAD]"], "EOQ": tokenizer.vocab["[unused3]"], } valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def validate(args, device_id, pt, step): device = "cpu" if args.visible_gpus == '-1' else "cuda" if pt != '': test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) print(args) symbols, tokenizer = get_symbol_and_tokenizer(args.encoder, args.temp_dir) model = AbsSummarizer(args, device, checkpoint, symbols=symbols) model.eval() valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False, tokenizer=tokenizer) valid_loss = abs_loss(model.generator, symbols, model.vocab_size, train=False, device=device) trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == "-1" else "cuda" logger.info("Device ID %d" % device_id) logger.info("Device %s" % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != "": logger.info("Loading checkpoint from %s" % args.train_from) checkpoint = torch.load( args.train_from, map_location=lambda storage, loc: storage ) opt = vars(checkpoint["opt"]) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None if args.load_from_extractive != "": logger.info("Loading bert from extractive model %s" % args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage ) bert_from_extractive = bert_from_extractive["model"] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader( args, load_dataset(args, "train", shuffle=True), args.batch_size, device, shuffle=True, is_test=False, ) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if args.sep_optim: optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) tokenizer = BertTokenizer.from_pretrained( "chinese_roberta_wwm_ext_pytorch/", do_lower_case=True, cache_dir=args.temp_dir ) symbols = { "BOS": tokenizer.vocab["[unused1]"], "EOS": tokenizer.vocab["[unused2]"], "PAD": tokenizer.vocab["[PAD]"], "EOQ": tokenizer.vocab["[unused3]"], } train_loss = abs_loss( model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing, ) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)
def train(self, train_iter_fct, train_steps, valid_iter_fct=None, valid_steps=-1): """ The main training loops. by iterating over training data (i.e. `train_iter_fct`) and running validation (i.e. iterating over `valid_iter_fct` Args: train_iter_fct(function): a function that returns the train iterator. e.g. something like train_iter_fct = lambda: generator(*args, **kwargs) valid_iter_fct(function): same as train_iter_fct, for valid data train_steps(int): valid_steps(int): save_checkpoint_steps(int): Return: None """ logger.info('Start training...') # step = self.optim._step + 1 step = self.optims[0]._step + 1 true_batchs = [] accum = 0 normalization = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: reduce_counter = 0 for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): true_batchs.append(batch) num_tokens = batch.tgt[:, 1:].ne(self.loss.padding_idx).sum() normalization += num_tokens.item() accum += 1 if accum == self.grad_accum_count: reduce_counter += 1 if self.n_gpu > 1: normalization = sum( distributed.all_gather_list(normalization)) self._gradient_accumulation(true_batchs, normalization, total_stats, report_stats) report_stats = self._maybe_report_training( step, train_steps, self.optims[0].learning_rate, report_stats) if step % self.args.report_every == 0: self.model.eval() logger.info('Model in set eval state') valid_iter = data_loader.Dataloader( self.args, load_dataset(self.args, 'test', shuffle=False), self.args.batch_size, "cuda", shuffle=False, is_test=True) tokenizer = BertTokenizer.from_pretrained( self.args.model_path, do_lower_case=True) symbols = { 'BOS': tokenizer.vocab['[unused1]'], 'EOS': tokenizer.vocab['[unused2]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused3]'] } valid_loss = abs_loss(self.model.generator, symbols, self.model.vocab_size, train=False, device="cuda") trainer = build_trainer(self.args, 0, self.model, None, valid_loss) stats = trainer.validate(valid_iter, step) self.report_manager.report_step( self.optims[0].learning_rate, step, train_stats=None, valid_stats=stats) self.model.train() true_batchs = [] accum = 0 normalization = 0 if (step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0): self._save(step) step += 1 if step > train_steps: break train_iter = train_iter_fct() return total_stats
def train_single_hybrid(args, device_id): init_logger(args.log_file) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # 重新设定随机种子 torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint = None if args.train_from_extractor != '': logger.info('Loading checkpoint from %s' % args.train_from_extractor) checkpoint_ext = torch.load(args.train_from_extractor, map_location=lambda storage, loc: storage) opt = vars(checkpoint_ext['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint_ext = None if args.train_from_abstractor != '': logger.info('Loading checkpoint from %s' % args.train_from_abstractor) checkpoint_abs = torch.load(args.train_from_abstractor, map_location=lambda storage, loc: storage) opt = vars(checkpoint_abs['opt']) for k in opt.keys(): if (k in model_flags): # 给attr加属性 setattr(args, k, opt[k]) else: checkpoint_abs = None def train_iter_fct(): # 读一次数据 if args.is_debugging: print("YES it is debugging") # 第三个参数是batch_size return data_loader.Dataloader(args, load_dataset(args, 'test', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) # exit() else: return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) # modules, consts, options = init_modules() # 选择模型: ExtSummarizer # print("1~~~~~~~~~~~~~~~~~~~~") model = HybridSummarizer(args, device, checkpoint, checkpoint_ext=checkpoint_ext, checkpoint_abs=checkpoint_abs) # 建优化器 # print("2~~~~~~~~~~~~~~~~~~~~") # optim = model_builder.build_optim(args, model, checkpoint) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] # print("????????") # print("optim") # print(optim) # exit() else: optim = [model_builder.build_optim(args, model, checkpoint)] # 做log logger.info(model) # print("3~~~~~~~~~~~~~~~~~~~~") # 建训练器 # tokenizer = BertTokenizer.from_pretrained('/home/ybai/projects/PreSumm/PreSumm/temp/', do_lower_case=True, cache_dir=args.temp_dir) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]'] } train_loss = abs_loss(model.abstractor.generator, symbols, model.abstractor.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) # print("4~~~~~~~~~~~~~~~~~~~~") # 开始训练 trainer.train(train_iter_fct, args.train_steps)
def train_abs_single(args, device_id): """Implements training process (meta / non-meta) Args: device_id (int) : the GPU id to be used """ device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d', device_id) logger.info('Device %s', device) # Fix random seed to control experiement torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: # if use GPU torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # Load checkpoint and args if args.train_from != '': logger.info('Loading checkpoint from %s', args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) # which is self.args for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None # Load extractive model as initial parameter (proposed by Presumm) if args.load_from_extractive != '': logger.info('Loading bert from extractive model %s', args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None # Prepare dataloader if args.meta_mode: def meta_train_iter_fct(): return data_loader.MetaDataloader(args, load_meta_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) else: def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) # Prepare model if args.meta_mode: model = MTLAbsSummarizer(args, device, checkpoint, bert_from_extractive) else: model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) # Prepare optimizer for inner loop # The optimizer for each task is seperated if args.meta_mode: optims_inner = [] for _ in range(args.num_task): if args.sep_optim: optim_bert_inner = model_builder.build_optim_bert_inner( args, model, checkpoint, 'maml') optim_dec_inner = model_builder.build_optim_dec_inner( args, model, checkpoint, 'maml') optims_inner.append([optim_bert_inner, optim_dec_inner]) else: optims_inner.append([ model_builder.build_optim_inner(args, model, checkpoint, 'maml') ]) # Prepare optimizer for outer loop if args.sep_optim: optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optims = [optim_bert, optim_dec] else: optims = [model_builder.build_optim(args, model, checkpoint)] # Prepare tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], # id = 1 'EOS': tokenizer.vocab['[unused1]'], # id = 2 'EOQ': tokenizer.vocab['[unused2]'], # id = 3 'PAD': tokenizer.vocab['[PAD]'] # id = 0 } # Self Check : special word ids special_words = [w for w in tokenizer.vocab.keys() if "[" in w] special_word_ids = [ tokenizer.convert_tokens_to_ids(w) for w in special_words ] # Prepare loss computation train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) # Prepare trainer and perform training if args.meta_mode: trainer = build_MTLtrainer(args, device_id, model, optims, optims_inner, train_loss) trainer.train(meta_train_iter_fct) else: trainer = build_trainer(args, device_id, model, optims, train_loss) trainer.train(train_iter_fct, args.train_steps)
def validate(args, device_id, pt, step): ''' Implements validation process (meta / non-memta) Arguments: device_id (int) : the GPU id to be used pt() : checkpoint model step (int) : checkpoint step Process: - load checkpoint - prepare dataloader class - prepare model class - prepare loss func, which return loss class - prepare trainer - trainer.validate() Meta vs Normal - MetaDataloader vs Dataloader - load_dataset vs load_meta_dataset - MTLAbsSummarizer vs AbsSummarizer - build_MTLtrainer vs MTLTrainer ''' device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) # Fix random seed to control experiement torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) # Load checkpoint ard args if (pt != ''): test_from = pt else: test_from = args.test_from logger.info('Loading checkpoint from %s' % test_from) checkpoint = torch.load(test_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) # which is self.args for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) # Prepare dataloader if (args.meta_mode): def valid_iter_fct(): return data_loader.MetaDataloader(args, load_meta_dataset(args, 'valid', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) else: valid_iter = data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=False), args.batch_size, device, shuffle=False, is_test=False) # Prepare model if (args.meta_mode): model = MTLAbsSummarizer(args, device, checkpoint) else: model = AbsSummarizer(args, device, checkpoint) #model.eval() # Prepare optimizer for inner loop # The optimizer for each task is seperated if (args.meta_mode): optims_inner = [] for i in range(args.num_task): if (args.sep_optim): optim_bert_inner = model_builder.build_optim_bert_inner( args, model, checkpoint, 'maml') optim_dec_inner = model_builder.build_optim_dec_inner( args, model, checkpoint, 'maml') optims_inner.append([optim_bert_inner, optim_dec_inner]) else: self.optims_inner.append([ model_builder.build_optim_inner(args, model, checkpoint, 'maml') ]) # Prepare optimizer (not actually used, but get the step information) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] # Prepare loss tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True, cache_dir=args.temp_dir) symbols = { 'BOS': tokenizer.vocab['[unused0]'], 'EOS': tokenizer.vocab['[unused1]'], 'PAD': tokenizer.vocab['[PAD]'], 'EOQ': tokenizer.vocab['[unused2]'] } # Prepare loss computation valid_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=False) # Prepare trainer and perform validation if (args.meta_mode): trainer = build_MTLtrainer(args, device_id, model, optim, optims_inner, valid_loss) stats = trainer.validate(valid_iter_fct, step) else: trainer = build_trainer(args, device_id, model, None, valid_loss) stats = trainer.validate(valid_iter, step) return stats.xent()
def train_abs_single(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if (k in model_flags): setattr(args, k, opt[k]) else: checkpoint = None if (args.load_from_extractive != ''): logger.info('Loading bert from extractive model %s' % args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) def valid_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'valid', shuffle=True), args.batch_size, device, shuffle=True, is_test=False) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive) if (args.sep_optim): optim_bert = model_builder.build_optim_bert(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_bert, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) print("model.vocab_size" + str(model.vocab_size)) parser = argparse.ArgumentParser() parser.add_argument('--bpe-codes', default="/content/PhoBERT_base_transformers/bpe.codes", required=False, type=str, help='path to fastBPE BPE') args1, unknown = parser.parse_known_args() bpe = fastBPE(args1) # Load the dictionary vocab = Dictionary() vocab.add_from_file("/content/PhoBERT_base_transformers/dict.txt") tokenizer = bpe symbols = { 'BOS': vocab.indices['[unused0]'], 'EOS': vocab.indices['[unused1]'], 'PAD': vocab.indices['[PAD]'], 'EOQ': vocab.indices['[unused2]'] } train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct=train_iter_fct, train_steps=args.train_steps, valid_iter_fct=valid_iter_fct)
def train_abs(args, device_id): init_logger(args.log_file) logger.info(str(args)) device = "cpu" if args.visible_gpus == '-1' else "cuda" logger.info('Device ID %d' % device_id) logger.info('Device %s' % device) torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True if device_id >= 0: torch.cuda.set_device(device_id) torch.cuda.manual_seed(args.seed) if args.train_from != '': logger.info('Loading checkpoint from %s' % args.train_from) checkpoint = torch.load(args.train_from, map_location=lambda storage, loc: storage) opt = vars(checkpoint['opt']) for k in opt.keys(): if k in model_flags: setattr(args, k, opt[k]) else: checkpoint = None if args.load_from_extractive != '': logger.info('Loading bert from extractive model %s' % args.load_from_extractive) bert_from_extractive = torch.load( args.load_from_extractive, map_location=lambda storage, loc: storage) bert_from_extractive = bert_from_extractive['model'] else: bert_from_extractive = None torch.manual_seed(args.seed) random.seed(args.seed) torch.backends.cudnn.deterministic = True symbols, tokenizer = get_symbol_and_tokenizer(args.encoder, args.temp_dir) model = AbsSummarizer(args, device, checkpoint, bert_from_extractive, symbols=symbols) if args.sep_optim: optim_enc = model_builder.build_optim_enc(args, model, checkpoint) optim_dec = model_builder.build_optim_dec(args, model, checkpoint) optim = [optim_enc, optim_dec] else: optim = [model_builder.build_optim(args, model, checkpoint)] logger.info(model) def train_iter_fct(): return data_loader.Dataloader(args, load_dataset(args, 'train', shuffle=True), args.batch_size, device, shuffle=True, is_test=False, tokenizer=tokenizer) train_loss = abs_loss(model.generator, symbols, model.vocab_size, device, train=True, label_smoothing=args.label_smoothing) trainer = build_trainer(args, device_id, model, optim, train_loss) trainer.train(train_iter_fct, args.train_steps)