def __init__( self, pretrained_model_name=None, config_filename=None, vocab_size=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", max_position_embeddings=512, ): super().__init__() # Check that only one of pretrained_model_name, config_filename, and # vocab_size was passed in total = 0 if pretrained_model_name is not None: total += 1 if config_filename is not None: total += 1 if vocab_size is not None: total += 1 if total != 1: raise ValueError( "Only one of pretrained_model_name, vocab_size, " + "or config_filename should be passed into the " + "ROBERTA constructor." ) # TK: The following code checks the same once again. if vocab_size is not None: config = RobertaConfig( vocab_size_or_config_json_file=vocab_size, vocab_size=vocab_size, hidden_size=hidden_size, num_hidden_layers=num_hidden_layers, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, max_position_embeddings=max_position_embeddings, ) model = RobertaModel(config) elif pretrained_model_name is not None: model = RobertaModel.from_pretrained(pretrained_model_name) elif config_filename is not None: config = RobertaConfig.from_json_file(config_filename) model = RobertaModel(config) else: raise ValueError( "Either pretrained_model_name or vocab_size must" + " be passed into the ROBERTA constructor" ) model.to(self._device) self.add_module("roberta", model) self.config = model.config self._hidden_size = model.config.hidden_size
def __init__(self, config: XLMRobertaConfig, local_config: dict, data_processor): super().__init__(config) syns = sorted(local_config['syns']) self.num_clfs = len(syns) + 1 if local_config['train_pos'] else len( syns) self.clfs_weights = torch.nn.parameter.Parameter(torch.ones( self.num_clfs, dtype=torch.float32), requires_grad=True) self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.local_config = local_config self.tokenizer = XLMRobertaTokenizer.from_pretrained( local_config['model_name']) self.clf2ncls = [2 for clf in syns] assert local_config['target_embeddings'] in ['concat', 'none'] if local_config['target_embeddings'] == 'concat': self.syns = nn.Linear(config.hidden_size * 2, len(syns) * 2) self.pos_clf = nn.Linear(config.hidden_size * 2, self.local_config['pos_ncls']) else: self.syns = nn.Linear(config.hidden_size, len(syns) * 2) self.pos_clf = nn.Linear(config.hidden_size, self.local_config['pos_ncls']) print(self.clfs_weights) self.data_processor = data_processor self.TARGET_START = '•' self.TARGET_END = '⁄' self.init_weights()
def __init__(self, config): super(RobertaForMultipleChoice, self).__init__(config) self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config) self.init_weights
def __init__(self, config, replace_mask_with_question_token=False, mask_id=103, question_token_id=104, sep_id=102, initialize_new_qass=True): super().__init__(config) self.encoder_name = config.model_type if "roberta" in self.encoder_name: self.roberta = RobertaModel(config) else: self.bert = BertModel(config) self.initialize_new_qass = initialize_new_qass self.cls = ClassificationHead( config) if not self.initialize_new_qass else None self.new_cls = ClassificationHead( config) if self.initialize_new_qass else None self.replace_mask_with_question_token = replace_mask_with_question_token self.mask_id = mask_id self.question_token_id = question_token_id self.sep_id = sep_id self.init_weights()
def main(): args = build_parser().parse_args() print("Creating snapshot directory if not exist...") if not os.path.exists(args.snapshots_path): os.mkdir(args.snapshots_path) print("Loading Roberta components...") tokenizer = RobertaTokenizer.from_pretrained("roberta-base") config = RobertaConfig.from_pretrained("roberta-base", output_hidden_states=True) base_model = RobertaModel(config).cuda() model = LangInferModel(base_model, config, args.span_heads).cuda() optimizer = configure_adam_optimizer(model, args.lr, args.weight_decay, args.adam_epsilon) print("Preparing the data for training...") train_loader, test_loaders = build_data_loaders(args, tokenizer) criterion = nn.CrossEntropyLoss() print( f"Training started for {args.epoch_num} epochs. Might take a while...") train(args.epoch_num, model, optimizer, criterion, train_loader, test_loaders, args.snapshots_path) print("Training is now finished. You can check out the results now")
def get_transformers_model( settings: Dict[str, Any], model_name: str, pretrained: bool = True, ckptdir: Optional[Path] = None, ) -> PreTrainedModel: model_path = model_name if pretrained else str(ckptdir) config = AutoConfig.from_pretrained(model_path) config.attention_probs_dropout_prob = settings.get( 'encoder_attn_dropout_rate', 0.1) config.hidden_dropout_prob = settings.get('encoder_ffn_dropout_rate', 0.1) config.layer_norm_eps = settings.get('layer_norm_eps', 1e-5) if pretrained: model = AutoModel.from_pretrained(model_name, config=config) return model # if you want not parameters but only model structure, each model class is needed. if 'xlm' in model_name: model = XLMModel(config=config) elif 'albert' in model_name: model = AlbertModel(config=config) elif 'roberta' in model_name: model = RobertaModel(config=config) elif 'deberta-v2' in model_name: model = DebertaV2Model(config=config) elif 'deberta' in model_name: model = DebertaModel(config=config) elif 'bert' in model_name: model = BertModel(config=config) elif 'electra' in model_name: model = ElectraModel(config=config) else: model = BertModel(config=config) return model
def __init__(self, config, num=0): super(roBerta, self).__init__() model_config = RobertaConfig() model_config.vocab_size = config.vocab_size model_config.hidden_size = config.hidden_size[0] model_config.num_attention_heads = 16 # 计算loss的方法 self.loss_method = config.loss_method self.multi_drop = config.multi_drop self.roberta = RobertaModel(model_config) if config.requires_grad: for param in self.roberta.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.hidden_size = config.hidden_size[num] if self.loss_method in ['binary', 'focal_loss', 'ghmc']: self.classifier = nn.Linear(self.hidden_size, 1) else: self.classifier = nn.Linear(self.hidden_size, self.num_labels) self.text_linear = nn.Linear(config.embeding_size, config.hidden_size[0]) self.vocab_layer = nn.Linear(config.hidden_size[0], config.vocab_size) self.classifier.apply(self._init_weights) self.roberta.apply(self._init_weights) self.text_linear.apply(self._init_weights) self.vocab_layer.apply(self._init_weights)
def __init__(self, config, num_choices: int = 3, recursive_step: int = 2, erasure_k: int = 50): super().__init__(config) self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.num_choices = num_choices self.recursive = recursive_step self.erasure_k = erasure_k self.memory_in = nn.MultiheadAttention(embed_dim=config.hidden_size, num_heads=8, dropout=0.1) self.mem_linear = nn.Linear(config.hidden_size * self.recursive, config.hidden_size) self.merge_linear = nn.Linear(config.hidden_size * 2, config.hidden_size) self.single_linear = nn.Linear(config.hidden_size, config.hidden_size) self.classifier = nn.Linear(config.hidden_size, self.num_choices) self.classifier_2 = nn.Linear(config.hidden_size, 1)
def __init__(self, config): super(BertForQueryNER, self).__init__(config) self.roberta = RobertaModel(config) self.construct_entity_span = config.construct_entity_span self.dropout = nn.Dropout(config.hidden_dropout_prob) if self.construct_entity_span == "start_end_match": self.start_outputs = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=config.num_labels, activate_func=config.activate_func) self.end_outputs = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=config.num_labels, activate_func=config.activate_func) self.span_embedding = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size*2, num_labels=1, activate_func=config.activate_func) elif self.construct_entity_span == "match": self.span_nn = SpanClassifier(config.hidden_size, config.hidden_dropout_prob) elif self.construct_entity_span == "start_and_end": self.start_outputs = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=config.num_labels, activate_func=config.activate_func) self.end_outputs = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=config.num_labels, activate_func=config.activate_func) elif self.construct_entity_span == "start_end": self.start_end_outputs = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=2, activate_func=config.activate_func) else: raise ValueError self.pred_answerable = config.pred_answerable if self.pred_answerable: self.answerable_cls_output = MultiLayerPerceptronClassifier(hidden_size=config.hidden_size, num_labels=1, activate_func=config.activate_func) self.init_weights()
def __init__(self, config): super().__init__(config) self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) super().init_weights()
def __init__(self, config): super(RobertaForMaskedLM, self).__init__(config) self.roberta = RobertaModel(config) self.lm_head = RobertaLMHead(config) self.init_weights()
def create_and_check_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): config.add_cross_attention = True model = RobertaModel(config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) self.parent.assertEqual(result.pooler_output.shape, (self.batch_size, self.hidden_size))
def __init__(self, config, num_types, ent_emb): super().__init__(config) self.roberta = RobertaModel(config) self.cls_head = ClsHead(config, num_types) self.apply(self._init_weights) self.ent_embeddings_n = nn.Embedding.from_pretrained(ent_emb) self.num_types = num_types
def __init__(self, config: XLMRobertaConfig, args, data_processor): super().__init__(config) self.roberta = RobertaModel(config) self.args = args self.tokenizer = XLMRobertaTokenizer.from_pretrained(args.model_name) input_size = config.hidden_size if args.pool_type in {'mmm', 'mmf'}: input_size *= 3 elif args.pool_type in {'mm', 'mf'}: input_size *= 2 if args.target_embeddings == 'concat': input_size *= 2 elif args.target_embeddings.startswith('comb_c'): input_size *= 3 elif args.target_embeddings.startswith('comb_'): input_size *= 2 elif args.target_embeddings.startswith('dist_'): input_size = len( args.target_embeddings.replace('dist_', '').replace('n', '')) // 2 print('Classification head input size:', input_size) if self.args.loss == 'mse_loss': self.syn_mse_clf = RobertaClassificationHead( config, 1, input_size, self.args) elif self.args.loss == 'crossentropy_loss': self.syn_clf = RobertaClassificationHead(config, 2, input_size, self.args) self.data_processor = data_processor self.init_weights()
def __init__(self, config: RobertaConfig): super().__init__(config) self.config = config self.roberta = RobertaModel(config) self.embeddingHead = nn.Linear(config.hidden_size, 768) self.norm = nn.LayerNorm(768) self.init_weights()
def __init__(self, config): super().__init__(config) # print(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config)
def __init__(self, config, pos_weight=None): super(RobertaForMultiLabelSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.pos_weight = pos_weight self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config)
def __init__(self, config): super(Introspector, self).__init__(config) self.roberta = RobertaModel(config) self.dropout = torch.nn.Dropout(0.1) self.classifier = torch.nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): super(QAReasoner, self).__init__(config) self.roberta = RobertaModel(config) self.qa_outputs = torch.nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config, device, pretrained_model, with_semi=True, with_sum=True): super().__init__() self.cls_x = ClassificationHead(config) self.cls_s = ClassificationHead(config) self.mlp_x = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size), nn.ReLU(), nn.Linear(config.hidden_size, 256)) self.mlp_s = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size), nn.ReLU(), nn.Linear(config.hidden_size, 256)) self.f = RobertaModel(config, add_pooling_layer=False) self.scl_criterion = SupConLoss(temperature=0.3, base_temperature=0.3) self.ce_criterion = nn.CrossEntropyLoss() # self.f = copy.deepcopy(pretrained_enc) # self.f = RobertaModel(config) self.device = device self.init_weights(pretrained_model) self.with_semi = with_semi self.with_sum = with_sum
def __init__( self, device, model_path, tokenizer, config, num_genre=24, num_rating=5, num_emotion=5, ): super(RobertaEncoder, self).__init__() self._device = device self._model_path = model_path self._tokenizer = tokenizer self._config = config self._num_genre = num_genre self._num_rating = num_rating self._num_emotion = num_emotion self._max_position_embeddings = config.max_position_embeddings self.roberta = RobertaModel(config) self.linear_genre = nn.Linear(config.hidden_size, num_genre) self.linear_rating = nn.Linear(config.hidden_size, num_rating) self.linear_emotion = nn.Linear(config.hidden_size, num_emotion) self._load_weight(device)
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.lang_discriminator = RobertaClassificationHead(config, 1) self.triple_validator = RobertaClassificationHead(config, 2) self.init_weights()
def __init__(self, config): super(RobertaForMCTACO, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.classifier = RobertaClassificationHead(config, out_size=2) self.classifier_bce = RobertaClassificationHead(config, out_size=1) self.sigmoid = nn.Sigmoid() self.init_weights()
def __init__(self, config): super(RobertaForXMC, self).__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.init_weights()
def __init__(self, config): super(RobertaForListRank, self).__init__(config) self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.linear_dropout_prob) self.linear = nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.roberta = RobertaModel(config, add_pooling_layer=False) self.classifier = LingBertaClassificationHead(config) self.init_weights()
def __init__(self, config): super().__init__(config) self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, 1) self.init_weights()
def __init__(self, config): super().__init__(config) self.roberta = RobertaModel(config) self.cause_outputs = nn.Linear(config.hidden_size, config.num_labels) self.effect_outputs = nn.Linear(config.hidden_size, config.num_labels) assert config.num_labels == 2 self.init_weights()
def __init__(self, config, num_classes, embd_method='cls'): super().__init__(config) self.num_labels = num_classes self.embd_method = embd_method config.num_labels = self.num_labels self.roberta = RobertaModel(config, add_pooling_layer=False) self.classifier = RobertaCLS(config) self.init_weights()
def __init__(self, config): super(RobertaForQuestRegression, self).__init__(config) self.config = config self.num_labels = config.num_labels self.roberta = RobertaModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)