示例#1
0
 def train_dataloader(self) -> DataLoader:
     dataloader = self.get_dataloader("train", batch_size=self.hparams.train_batch_size)
     t_total = (
         (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))
         // self.hparams.gradient_accumulation_steps
         * float(self.hparams.num_train_epochs)
     )
     scheduler = get_linear_schedule_with_warmup(
         self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total
     )
     self.lr_scheduler = scheduler
     return dataloader
示例#2
0
 def train_dataloader(self):
     train_dataset = SummarizationDataset(
         self.tokenizer, data_dir=self.hparams.data_dir, type_path="train", block_size=self.hparams.max_seq_length
     )
     dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size, num_workers=2, pin_memory=True, shuffle=True)
     t_total = (
         (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))
         // self.hparams.gradient_accumulation_steps
         * float(self.hparams.num_train_epochs)
     )
     scheduler = get_linear_schedule_with_warmup(
         self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total
     )
     self.lr_scheduler = scheduler
     return dataloader
 def train_dataloader(self):
     train_dataset = MaskedPlotFactEmbeddingDatasetBart(
         self.tokenizer,
         args,
         self.hparams.data_dir + "train/",
         block_size=self.hparams.max_seq_length,
         is_train=False)
     dataloader = DataLoader(train_dataset,
                             batch_size=self.hparams.train_batch_size)
     t_total = (
         (len(dataloader.dataset) //
          (self.hparams.train_batch_size * max(1, self.hparams.n_gpu))) //
         self.hparams.gradient_accumulation_steps *
         float(self.hparams.num_train_epochs))
     scheduler = get_linear_schedule_with_warmup(
         self.opt,
         num_warmup_steps=self.hparams.warmup_steps,
         num_training_steps=t_total)
     self.lr_scheduler = scheduler
     return dataloader