def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: if model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) return inputs_dict
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if return_labels: if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): # special case for models like BERT that use multi-loss training for PreTraining inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) return inputs_dict
HfArgumentParser, LineByLineTextDataset, LineByLineTextDatasetAVSD, PreTrainedTokenizer, TextDataset, Trainer, TrainingArguments, set_seed, ) from transformers import DataCollatorForLanguageModelingAVSD logger = logging.getLogger(__name__) #MODEL_CONFIG_CLASSES = list(MODEL_WITH_LM_HEAD_MAPPING.keys()) MODEL_CONFIG_CLASSES = list(MODEL_FOR_PRETRAINING_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) @dataclass class ModelArguments: """ Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch. """ model_name_or_path: Optional[str] = field( default=None, metadata={ "help": "The model checkpoint for weights initialization. Leave None if you want to train a model from scratch." },