def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a pretrained model by supplying * the name of a remote model on s3 ("bert-base-cased" ...) * OR a local path of a model trained via transformers ("some_dir/huggingface_model") * OR a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. :type pretrained_model_name_or_path: str """ bert = cls() if "farm_lm_name" in kwargs: bert.name = kwargs["farm_lm_name"] else: bert.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = Path(pretrained_model_name_or_path) / "language_model_config.json" if os.path.exists(farm_lm_config): # FARM style bert_config = BertConfig.from_pretrained(farm_lm_config) farm_lm_model = Path(pretrained_model_name_or_path) / "language_model.bin" bert.model = BertModel.from_pretrained(farm_lm_model, config=bert_config, **kwargs) bert.language = bert.model.config.language else: # Pytorch-transformer Style bert.model = BertModel.from_pretrained(str(pretrained_model_name_or_path), **kwargs) bert.language = cls._get_or_infer_language_from_name(language, pretrained_model_name_or_path) return bert
def __init__(self, config): super().__init__(config) # parallel, adapter-BERT self.parabert = BertModel(config.parabert_config) # freezing the pre-trained BERT self.freeze_original_params()
def from_torch(model: TorchBertModel, device: Optional[torch.device] = None): if device is not None and 'cuda' in device.type and torch.cuda.is_available( ): model.to(device) embeddings = BertEmbeddings.from_torch(model.embeddings) encoder = BertEncoder.from_torch(model.encoder) return BertModelNoPooler(embeddings, encoder)
def __init__(self, config: DPRConfig): super().__init__(config) self.bert_model = BertModel(config) assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero" self.projection_dim = config.projection_dim if self.projection_dim > 0: self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim) self.init_weights()
class DPREncoder(PreTrainedModel): base_model_prefix = "bert_model" def __init__(self, config: DPRConfig): super().__init__(config) self.bert_model = BertModel(config) assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero" self.projection_dim = config.projection_dim if self.projection_dim > 0: self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim) self.init_weights() def forward( self, input_ids: Tensor, attention_mask: Optional[Tensor] = None, token_type_ids: Optional[Tensor] = None, inputs_embeds: Optional[Tensor] = None, output_attentions: bool = False, output_hidden_states: bool = False, return_tuple: bool = True, ) -> Union[BaseModelOutputWithPooling, Tuple[Tensor, ...]]: outputs = self.bert_model( input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, ) sequence_output, pooled_output = outputs[:2] pooled_output = sequence_output[:, 0, :] if self.projection_dim > 0: pooled_output = self.encode_proj(pooled_output) if return_tuple: return (sequence_output, pooled_output) + outputs[2:] return BaseModelOutputWithPooling( last_hidden_state=sequence_output, pooler_output=pooled_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) @property def embeddings_size(self) -> int: if self.projection_dim > 0: return self.encode_proj.out_features return self.bert_model.config.hidden_size def init_weights(self): self.bert_model.init_weights() if self.projection_dim > 0: self.encode_proj.apply(self.bert_model._init_weights)
def __init__(self, config): super(BertForSequenceClassificationNq, self).__init__(config) self.num_labels = config.num_labels # config.output_hidden_states = True bert_later_dropout = 0.3 self.dropout = nn.Dropout(bert_later_dropout) self.later_model_type = config.later_model_type if self.later_model_type == 'linear': self.bert = BertModel(config) self.projection = nn.Linear(config.hidden_size * 3, config.hidden_size) self.projection_dropout = nn.Dropout(0.1) self.projection_activation = nn.Tanh() self.classifier = nn.Linear(config.hidden_size, config.num_labels) elif self.later_model_type == '1bert_layer': config.num_hidden_layers = 1 self.bert = BertModel(config) self.classifier = nn.Linear(config.hidden_size, config.num_labels) elif self.later_model_type == 'bilinear': self.bert = BertModel(config) lstm_layers = 2 self.qemb_match = SeqAttnMatch(config.hidden_size) doc_input_size = 2 * config.hidden_size # RNN document encoder self.doc_rnn = StackedBRNN( input_size=doc_input_size, hidden_size=config.hidden_size, num_layers=lstm_layers, dropout_rate=bert_later_dropout, dropout_output=bert_later_dropout, concat_layers=True, rnn_type=nn.LSTM, padding=False, ) self.bilinear_dropout = nn.Dropout(bert_later_dropout) self.bilinear_size = 128 self.doc_proj = nn.Linear(lstm_layers * 2 * config.hidden_size, self.bilinear_size) self.qs_proj = nn.Linear(config.hidden_size, self.bilinear_size) self.bilinear = nn.Bilinear(self.bilinear_size, self.bilinear_size, self.bilinear_size) self.classifier = nn.Linear(self.bilinear_size, config.num_labels) elif self.later_model_type == 'transformer': self.copy_from_bert_layer_num = 11 self.bert = BertModel(config) self.bert_position_emb = nn.Embedding( config.max_position_embeddings, config.hidden_size) self.bert_type_id_emb = nn.Embedding(config.type_vocab_size, config.hidden_size) self.bert_layer = BertLayer(config) self.bert_pooler_qd = BertPoolerQD(config) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__(self): config = BertConfig.from_json_file(join(BERT_PATH, 'bert_config.json')) self.tokenizer = BertTokenizer(vocab_file=join(BERT_PATH, 'vocab.txt')) self.model = BertModel(config, add_pooling_layer=False) load_tf_weights_in_bert(self.model, tf_checkpoint_path=join( BERT_PATH, 'bert_model.ckpt'), strip_bert=True) self.model.to(PT_DEVICE) self.model.eval()
def _bert_encode_article(self, max_seq_length=128, sequence_a_segment_id=0, sequence_b_segment_id=1, cls_token_segment_id=1, pad_token_segment_id=0, mask_padding_with_zero=True): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) bert_config = BertConfig.from_pretrained('bert-base-uncased') model = BertModel(bert_config) all_input_ids, all_input_mask, all_segment_ids = [], [], [] for header, article in zip(self.df_url['header'], self.df_url['article']): text = header + '. ' + article tokens = tokenizer.tokenize(text) special_tokens_count = 2 if len(tokens) > max_seq_length - special_tokens_count: tokens = tokens[:(max_seq_length - special_tokens_count)] segment_ids = [sequence_a_segment_id] * len(tokens) tokens = [tokenizer.cls_token] + tokens + [tokenizer.sep_token] segment_ids = [cls_token_segment_id ] + segment_ids + [sequence_a_segment_id] input_ids = tokenizer.convert_tokens_to_ids(tokens) input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) # Padding padding_length = max_seq_length - len(input_ids) pad_token = tokenizer.convert_tokens_to_ids([tokenizer.pad_token ])[0] input_ids = input_ids + ([pad_token] * padding_length) input_mask = input_mask + [0] * padding_length segment_ids = segment_ids + ([pad_token_segment_id] * padding_length) assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length all_input_ids.append(input_ids) all_input_mask.append(input_mask) all_segment_ids.append(segment_ids) all_input_ids = torch.tensor(all_input_ids) all_input_mask = torch.tensor(all_input_mask) all_segment_ids = torch.tensor(all_segment_ids) model.eval() outputs = model(all_input_ids, attention_mask=all_input_mask, token_type_ids=all_segment_ids) embedding = outputs[1].data.numpy() del model return embedding
def from_torch( model: TorchBertModel, # from_torch函数实现 device: Optional[torch.device] = None): if device is not None and 'cuda' in device.type and torch.cuda.is_available( ): model.to(device) bertmodel = turbo_transformers.BertModel.from_torch(model.bert) # We can copy the following code and do not change it # Notice: classifier is the class member of BertForSequenceClassification. If user define the other class member, # they need modify it here. return BertForSequenceClassification(bertmodel, model.classifier)
def __init__(self, bert_model_config: BertConfig): super(DocumentBertLinear, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) self.bert_batch_size = self.bert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) #self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=6, norm=nn.LayerNorm(bert_model_config.hidden_size)) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size * self.bert_batch_size, bert_model_config.num_labels), nn.Tanh())
class BertMultiTask: def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer): self.job_config = job_config if not use_pretrain: model_config = self.job_config.get_model_config() bert_config = BertConfig(**model_config) bert_config.vocab_size = len(tokenizer.vocab) self.bert_encoder = BertModel(bert_config) # Use pretrained bert weights else: self.bert_encoder = BertModel.from_pretrained(self.job_config.get_model_file_type()) bert_config = self.bert_encoder.config self.bert_encoder.to(device) self.network=MTLRouting(self.bert_encoder, write_log = write_log, summary_writer = summary_writer) #config_data=self.config['data'] loss_calculation = BertPretrainingLoss(self.bert_encoder, bert_config) loss_calculation.to(device) # Pretrain Dataset self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=loss_calculation) self.device=device # self.network = self.network.float() # print(f"Bert ID: {id(self.bert_encoder)} from GPU: {dist.get_rank()}") def save(self, filename: str): network=self.network.module return torch.save(network.state_dict(), filename) def load(self, model_state_dict: str): return self.network.module.load_state_dict(torch.load(model_state_dict, map_location=lambda storage, loc: storage)) def move_batch(self, batch, non_blocking=False): return batch.to(self.device, non_blocking) def eval(self): self.network.eval() def train(self): self.network.train() def save_bert(self, filename: str): return torch.save(self.bert_encoder.state_dict(), filename) def to(self, device): assert isinstance(device, torch.device) self.network.to(device) def half(self): self.network.half()
def __init__(self, bert_model_config: BertConfig): super(DocumentBertLSTM, self).__init__(bert_model_config) self.bert = BertModel(bert_model_config) self.bert_batch_size = self.bert.config.bert_batch_size self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob) self.lstm = LSTM( bert_model_config.hidden_size, bert_model_config.hidden_size, ) self.classifier = nn.Sequential( nn.Dropout(p=bert_model_config.hidden_dropout_prob), nn.Linear(bert_model_config.hidden_size, bert_model_config.num_labels), nn.Tanh())
class TestBertModel(unittest.TestCase): def init_data(self, use_cuda) -> None: torch.set_grad_enabled(False) torch.set_num_threads(4) turbo_transformers.set_num_threads(4) self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') self.cfg = BertConfig() self.torch_model = BertModel(self.cfg) self.torch_model.eval() if torch.cuda.is_available(): self.torch_model.to(self.test_device) self.turbo_model = turbo_transformers.BertModel.from_torch( self.torch_model, self.test_device) def check_torch_and_turbo(self, use_cuda): self.init_data(use_cuda) num_iter = 1 device_name = "GPU" if use_cuda else "CPU" input_ids = torch.randint(low=0, high=self.cfg.vocab_size - 1, size=(1, 10), dtype=torch.long, device=self.test_device) torch_model = lambda: self.torch_model(input_ids) torch_result, torch_qps, torch_time = \ test_helper.run_model(torch_model, use_cuda, num_iter) print(f'BertModel PyTorch({device_name}) QPS {torch_qps}') turbo_model = (lambda: self.turbo_model(input_ids)) with turbo_transformers.pref_guard("bert_perf") as perf: turbo_result, turbo_qps, turbo_time = \ test_helper.run_model(turbo_model, use_cuda, num_iter) print(f'BertModel TurboTransformer({device_name}) QPS {turbo_qps}') self.assertTrue( numpy.allclose(torch_result[0][:, 0].cpu(), turbo_result[0].cpu(), atol=1e-3, rtol=1e-3)) def test_bert_model(self): if torch.cuda.is_available() and \ turbo_transformers.config.is_compiled_with_cuda(): self.check_torch_and_turbo(use_cuda=True) self.check_torch_and_turbo(use_cuda=False)
def __init__(self, config): super(BertForMultiLable, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights()
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, **kwargs, ) -> None: super().__init__(vocab, **kwargs) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self.num_classes = self.vocab.get_vocab_size("labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.tag_projection_layer = Linear(self.bert_model.config.hidden_size, self.num_classes) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self)
def __init__(self, config): super(LFESM, self).__init__(config) self.bert = BertModel(config) # self.dropout = nn.Dropout(config.hidden_dropout_prob) # self.seq_relationship = nn.Linear(config.hidden_size, 2) self.init_weights() # dropout = 0.5 # self._rnn_dropout = RNNDropout(p=dropout) feature_size = 28 self._feature = nn.Linear(feature_size, config.hidden_size) self._attention = SoftmaxAttention() self._projection = nn.Sequential(nn.Linear(4 * config.hidden_size, config.hidden_size), nn.ReLU()) self._composition = Seq2SeqEncoder(nn.LSTM, config.hidden_size, config.hidden_size, bidirectional=True) self._classification = nn.Sequential(nn.Dropout(p=config.hidden_dropout_prob), # p=dropout nn.Linear(4 * 2 * config.hidden_size, config.hidden_size), nn.Tanh(), nn.Dropout(p=config.hidden_dropout_prob), # p=dropout nn.Linear(config.hidden_size, 2)) self.apply(self.init_esim_weights)
def init_data(self, use_cuda) -> None: torch.set_grad_enabled(False) torch.set_num_threads(4) turbo_transformers.set_num_threads(4) self.test_device = torch.device('cuda:0') if use_cuda else \ torch.device('cpu:0') self.cfg = BertConfig() self.torch_model = BertModel(self.cfg) self.torch_model.eval() if torch.cuda.is_available(): self.torch_model.to(self.test_device) self.turbo_model = turbo_transformers.BertModel.from_torch( self.torch_model, self.test_device, "turbo")
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True hidden_size = config.fc_hidden target_class = config.num_classes # self.resnet = resnet18(num_classes=hidden_size) #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size) # self.resnet = ResNet(config.in_channels, 18) self.fpn = FPN([256]* 4, 4) self.fpn_seq = FPN([128,128,128,70], 4) #cnn feature map has a total number of 228 dimensions. self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(hidden_size, target_class) self.num_classes = config.num_classes
def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config) self.crf = CRF(config.num_labels, batch_first=True) self.classifier_bienc = nn.Linear(2 * config.hidden_size, config.num_labels) N = 4 #layer number h = 4 #heads dropout_value = 0.1 d_model = config.hidden_size d_ff = 2048 c = copy.deepcopy attn = MultiHeadedAttention(h, d_model, dropout=dropout_value) ff = PositionwiseFeedForward(d_model, d_ff, dropout=dropout_value) self.encoder = Encoder( EncoderLayer(d_model, c(attn), c(ff), dropout_value), N) self.decoder = Decoder( DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout=dropout_value), N) self.init_weights()
def __init__(self, config: BertConfig, **kwargs: Any): """The classification init is a super set of LM init""" super().__init__(config, **kwargs) self.config = config self.bert = BertModel(config=self.config) self.lm_head = BertOnlyMLMHead(self.config) self.lm_head.apply(self._init_weights) self.qa_head = BertOnlyMLMHead(self.config) self.qa_head.apply(self._init_weights) self.dropout = nn.Dropout(self.config.hidden_dropout_prob) self.classifier = nn.Linear(self.config.hidden_size, self.config.num_labels) self.classifier.apply(self._init_weights)
def __init__(self, config): super(ImageBertForSequenceClassification, self).__init__(config) self.num_labels = config.num_labels self.config = config if config.img_feature_dim > 0: self.bert = BertImgModel(config) else: self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) if hasattr(config, 'classifier'): if not hasattr(config, 'cls_hidden_scale'): config.cls_hidden_scale = 2 if config.classifier == 'linear': self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) elif config.classifier == 'mlp': self.classifier = nn.Sequential( nn.Linear(config.hidden_size, config.hidden_size * config.cls_hidden_scale), nn.ReLU(), nn.Linear(config.hidden_size * config.cls_hidden_scale, self.config.num_labels)) else: self.classifier = nn.Linear(config.hidden_size, self.config.num_labels) # original self.apply(self._init_weights)
def __init__(self, config, args, tokenizer): super(DecoderWithLoss, self).__init__() # model components print("initializing decoder with params {}".format(args)) self.bert = BertModel(config) self.lm_head = BertOnlyMLMHead(config) self.span_b_proj = nn.ModuleList([ HighwayLayer(config.hidden_size) for _ in range(args.num_highway) ]) self.span_e_proj = nn.ModuleList([ HighwayLayer(config.hidden_size) for _ in range(args.num_highway) ]) # predict text span beginning and end self.text_span_start_head = nn.Linear(config.hidden_size, config.hidden_size) self.text_span_end_head = nn.Linear(config.hidden_size, config.hidden_size) # loss functions if args.node_label_smoothing > 0: self.lm_ce_loss = LabelSmoothingLoss( args.node_label_smoothing, config.vocab_size, ignore_index=tokenizer.pad_token_id) else: self.lm_ce_loss = torch.nn.CrossEntropyLoss( ignore_index=tokenizer.pad_token_id, reduction="none") self.span_ce_loss = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction="none") self.span_loss_lb = args.lambda_span_loss self.text_span_loss = torch.nn.CrossEntropyLoss(ignore_index=-1, reduction="none") self.tree_to_text = args.tree_to_text
def __init__(self, config): super(BertForSequentialSentenceSelector, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) # Initial state self.s = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) # Scaling factor for weight norm self.g = Parameter(torch.FloatTensor(1).fill_(1.0)) # RNN weight self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size) # EOE and output bias self.eos = Parameter( torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1)) self.bias = Parameter(torch.FloatTensor(1).zero_()) # self.apply(self.init_bert_weights) self.init_weights() self.cpu = torch.device('cpu')
def from_scratch(cls, vocab_size, name="bert", language="en"): bert = cls() bert.name = name bert.language = language config = BertConfig(vocab_size=vocab_size) bert.model = BertModel(config) return bert
def __init__(self, config, action_num, recur_type="gated", allow_yes_no=False): super(RCMBert, self).__init__(config) self.bert = BertModel(config) self.recur_type = recur_type self.allow_yes_no = allow_yes_no if recur_type == "gated": self.recur_network = recurGatedNetwork(config.hidden_size, config.hidden_size) elif recur_type == "lstm": self.recur_network = recurLSTMNetwork(config.hidden_size, config.hidden_size) else: print("Invalid recur_type: {}".format(recur_type)) sys.exit(0) self.action_num = action_num self.stop_network = stopNetwork(config.hidden_size) self.move_stride_network = moveStrideNetwork(config.hidden_size, self.action_num) self.dropout = nn.Dropout(config.hidden_dropout_prob) if self.allow_yes_no: self.yes_no_flag_outputs = nn.Linear(config.hidden_size, 2) self.yes_no_ans_outputs = nn.Linear(config.hidden_size, 2) self.qa_outputs = nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config): super().__init__(config) self.bert = BertModel(config) self.cls = BertPreTrainingHeads(config) self.qa_outputs = torch.nn.Linear(config.hidden_size, 2) self.init_weights()
def __init__(self, config, num_classes, vocab) -> None: super(PairwiseClassifier, self).__init__(config) self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_classes) self.vocab = vocab self.init_weights()
def __init__(self, config): super(BertForDST, self).__init__(config) self.slot_list = config.dst_slot_list self.class_types = config.dst_class_types self.class_labels = config.dst_class_labels self.token_loss_for_nonpointable = config.dst_token_loss_for_nonpointable self.refer_loss_for_nonpointable = config.dst_refer_loss_for_nonpointable self.class_aux_feats_inform = config.dst_class_aux_feats_inform self.class_aux_feats_ds = config.dst_class_aux_feats_ds self.class_loss_ratio = config.dst_class_loss_ratio # Only use refer loss if refer class is present in dataset. if 'refer' in self.class_types: self.refer_index = self.class_types.index('refer') else: self.refer_index = -1 self.bert = BertModel(config) self.dropout = nn.Dropout(config.dst_dropout_rate) self.dropout_heads = nn.Dropout(config.dst_heads_dropout_rate) if self.class_aux_feats_inform: self.add_module("inform_projection", nn.Linear(len(self.slot_list), len(self.slot_list))) if self.class_aux_feats_ds: self.add_module("ds_projection", nn.Linear(len(self.slot_list), len(self.slot_list))) aux_dims = len(self.slot_list) * (self.class_aux_feats_inform + self.class_aux_feats_ds) # second term is 0, 1 or 2 for slot in self.slot_list: self.add_module("class_" + slot, nn.Linear(config.hidden_size + aux_dims, self.class_labels)) self.add_module("token_" + slot, nn.Linear(config.hidden_size, 2)) self.add_module("refer_" + slot, nn.Linear(config.hidden_size + aux_dims, len(self.slot_list) + 1)) self.init_weights()
def from_pretrained(model_id_or_path: str, device: Optional[torch.device] = None): torch_model = TorchBertModel.from_pretrained(model_id_or_path) model = BertModelNoPooler.from_torch(torch_model, device) model.config = torch_model.config model._torch_model = torch_model # prevent destroy torch model. return model
def __init__(self, config, feature=None, use_lstm=False, device="cpu"): super(NerModel, self).__init__(config) self.num_labels = config.num_labels self.use_feature = False self.use_lstm = False self.hidden_size = config.hidden_size self.bert = BertModel(config) self.ferep = None if feature is not None: self.ferep = FeatureRep(feature, device) self.use_feature = True self.hidden_size += self.ferep.feature_dim if use_lstm: self.use_lstm = True self.lstm = nn.LSTM(self.hidden_size, config.hidden_size, batch_first=True, num_layers=1) self.hidden_size = config.hidden_size self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(self.hidden_size, config.num_labels) self.init_weights()