Пример #1
0
def extract_embeddings_for_other_clf():
    distil_bert = "distilbert-base-uncased"

    config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
    config.output_hidden_states = False
    transformer_model = TFDistilBertModel.from_pretrained(distil_bert,
                                                          config=config)

    input_ids_in = tf.keras.layers.Input(shape=(25, ),
                                         name="input_token",
                                         dtype="int32")
    input_masks_in = tf.keras.layers.Input(shape=(25, ),
                                           name="masked_token",
                                           dtype="int32")

    embedding_layer = transformer_model(input_ids_in,
                                        attention_mask=input_masks_in)[0]
    cls_token = embedding_layer[:, 0, :]
    X = tf.keras.layers.BatchNormalization()(cls_token)
    X = tf.keras.layers.Dense(192, activation="relu")(X)
    X = tf.keras.layers.Dropout(0.2)(X)
    X = tf.keras.layers.Dense(3, activation="softmax")(X)
    model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=X)

    for layer in model.layers[:3]:
        layer.trainable = False

    return model
Пример #2
0
def create_model(model_config: CommentClassifierConfig,
                 saved_weights_path: str = None,
                 max_seq_length: int = MAX_SEQ_LENGTH) -> tf.keras.Model:
    """
    :param model_config:       CommentClassifierConfig
    :param saved_weights_path: If defined, model weights will be loaded
                               from the provided checkpoint path
    :param max_seq_length:     Maximum length of the tokenized input to BERT
    :return:
        Model for text classification using DistilBert transformers
    """
    # Load pre-trained DistilBERT
    bert_config = DistilBertConfig(
        dropout=model_config.bert_dropout,
        attention_dropout=model_config.bert_attention_dropout,
        num_labels=NUM_CLASSES)
    bert_config.output_hidden_states = False
    transformer_model = TFDistilBertModel.from_pretrained(MODEL_NAME,
                                                          config=bert_config)

    input_ids_in = tf.keras.layers.Input(shape=(max_seq_length, ),
                                         name='input_token',
                                         dtype='int32')
    input_masks_in = tf.keras.layers.Input(shape=(max_seq_length, ),
                                           name='masked_token',
                                           dtype='int32')

    embedding_layer = transformer_model(input_ids_in,
                                        attention_mask=input_masks_in)[0]

    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(
            model_config.lstm_units,
            return_sequences=True,
            dropout=model_config.lstm_dropout,
            recurrent_dropout=model_config.lstm_recurrent_dropout))(
                embedding_layer)

    x = tf.keras.layers.GlobalMaxPool1D()(x)
    x = tf.keras.layers.Dense(
        model_config.hidden_layer_dim,
        activation=model_config.hidden_layer_activation)(x)

    x = tf.keras.layers.Dropout(model_config.final_layer_dropout)(x)
    x = tf.keras.layers.Dense(
        NUM_CLASSES, activation=model_config.final_layer_activation)(x)

    model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs=x)

    # Use transfer learning only - do not train BERT again
    for layer in model.layers[:3]:
        layer.trainable = False

    # Load weights from a checkpoint, but allow partial matching
    # (e.g. due to a change in the optimizer)
    if saved_weights_path is not None:
        model.load_weights(saved_weights_path).expect_partial()

    return model
Пример #3
0
    def __init__(self, config: Dict):
        super().__init__()

        self.config = config
        self.model_config = DistilBertConfig(**self.config["model"])
        self.model = DistilBertModel(self.model_config)
        self.criterion = nn.CosineEmbeddingLoss(margin=0.0, reduction='mean')
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    vocab_size=2)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size],
                                         self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length],
                                      self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = DistilBertConfig(
            vocab_size=self.vocab_size,
            dim=self.hidden_size,
            n_layers=self.num_hidden_layers,
            n_heads=self.num_attention_heads,
            hidden_dim=self.intermediate_size,
            hidden_act=self.hidden_act,
            dropout=self.hidden_dropout_prob,
            attention_dropout=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
Пример #5
0
 def __init__(self, config={}):
     super(DistilBert, self).__init__()
     self.masking = tf.keras.layers.Masking()
     self.fc1 = Dense(config['base_config']['dim'], activation='relu')
     self.model_config = DistilBertConfig.from_dict(config['base_config'])
     self.base = TFDistilBertModel(self.model_config)
     self.head = HEADS[config['head']['name']](config['head'])
Пример #6
0
def classify(text):
    print('start')

    path = settings.MEDIA_ROOT + "\distilbert.bin"
    MODEL_PATH = 'distilbert-base-uncased'
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

    encode = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=192,
        pad_to_max_length=True,
        truncation=True,
    )
    device = torch.device('cpu')
    tokens = encode['input_ids']
    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0)
    tokens = tokens.to(device)
    config = DistilBertConfig()
    model = Bert(DistilBertModel(config))

    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)

    output = model(tokens)
    output = output.cpu().detach().numpy()

    print(output)
    output = 0.0 if output < 0.5 else 1.0
    return output
Пример #7
0
    def __init__(self,
                 embedding_size,
                 projection_size,
                 n_layers,
                 emo_dict,
                 dropout=0.1):
        super(context_classifier_model, self).__init__()

        self.projection_size = projection_size
        self.projection = torch.nn.Linear(embedding_size, projection_size)
        self.position_embeds = torch.nn.Embedding(3, projection_size)
        self.norm = torch.nn.LayerNorm(projection_size)
        self.drop = torch.nn.Dropout(dropout)

        context_config = DistilBertConfig(dropout=dropout,
                                          dim=projection_size,
                                          hidden_dim=4 * projection_size,
                                          n_layers=n_layers,
                                          n_heads=1,
                                          num_labels=4)

        self.context_transformer = DistilBertForSequenceClassification(
            context_config)
        self.others_label = emo_dict['others']
        self.bin_loss_fct = torch.nn.BCEWithLogitsLoss()
Пример #8
0
def test(informal):
    if torch.cuda.is_available():
        device = torch.device('cuda:3')
        print(f'Using GPU device: {device}')
    else:
        device = torch.device('cpu')
        print(f'GPU is not available, using CPU device {device}')

    test_config = {'batch_size': 5, 'epoch': 29, 'save_dir': './checkpoints/'}

    test_dataset = FormalDataset(informal)
    dataloader = DataLoader(test_dataset,
                            batch_size=test_config['batch_size'],
                            shuffle=False,
                            num_workers=4,
                            drop_last=False)
    config = DistilBertConfig()
    model = DistilBertForMaskedLM(config)
    load_model(test_config['epoch'], model, test_config['save_dir'])
    model.to(device)
    model.eval()
    with torch.no_grad():
        for i, batch in tqdm(enumerate(dataloader)):
            inp = batch['input_ids'].to(device)
            attn = batch['attention_mask'].to(device)
            logits = model(input_ids=inp, attention_mask=attn)[0]
            preds = decode_text(test_dataset.tokenizer, logits)
            for seq in preds:
                with open('test_pred.txt', 'a') as res_file:
                    res_file.writelines(seq + '\n')
Пример #9
0
    def __init__(self,
                 max_seq_len=MAX_LEN,
                 batch_size=BATCH_SIZE,
                 n_epochs=N_EPOCHS,
                 val_size=0.1,
                 learning_rate=LEARNING_RATE,
                 load_local_pretrained=False):

        self.max_seq_len = max_seq_len
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.val_size = val_size
        self.learning_rate = learning_rate

        # Load dataset, tokenizer, model from pretrained model/vocabulary
        self.tokenizer = (DistilBertTokenizerFast.from_pretrained(
            BERTMODEL, do_lower_case=False))

        if load_local_pretrained:
            self.model = (TFDistilBertForSequenceClassification.
                          from_pretrained(MODEL_PATH))

        else:
            config = DistilBertConfig.from_pretrained(BERTMODEL, num_labels=2)
            self.model = (
                TFDistilBertForSequenceClassification.from_pretrained(
                    BERTMODEL, config=config))
            # Freeze distilbert layer
            self.model.distilbert.trainable = False
Пример #10
0
	def __load(self):
		dbertConf = DistilBertConfig.from_pretrained(self.path + '/config.json')
		self.model = TFDistilBertForSequenceClassification.from_pretrained\
		(
			self.path + '/tf_model.h5',
			config=dbertConf,
		)
Пример #11
0
def loadNet(modelURL,
            numClasses,
            unfreezePretrain=False,
            fromHuggingFace=False):

    if fromHuggingFace == False:
        pretrainedNet = hub.KerasLayer(modelURL,
                                       input_shape=(IMG_SIZE, IMG_SIZE,
                                                    CHANNELS))
        pretrainedNet.trainable = unfreezePretrain  # freezing the pretrained network

    else:
        # config = AutoConfig.from_pretrained(modelURL, config.num_labels = numClasses, config.seq_classif_dropout = 0) #distil
        # tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
        config = DistilBertConfig(
            num_labels=numClasses)  # , seq_classif_dropout = 0.99

        print(f'Number of Classes: {numClasses}')
        # config.num_labels = numClasses
        # config.seq_classif_dropout = 0
        print(config)
        pretrainedNet = TFDistilBertForSequenceClassification.from_pretrained(
            modelURL, config=config)  # TFBertForSequenceClassification
        pretrainedNet.layers[0].trainable = unfreezePretrain

    return pretrainedNet
Пример #12
0
 def load_model(self, model_name: str = "bert_ner_test"):
     # TODO model loaded from mlflow
     # Load model and tokenizer.
     config = DistilBertConfig.from_pretrained(model_name)
     model = DistilBertForTokenClassification(config).from_pretrained(
         model_name)
     tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
     return model, config, tokenizer
Пример #13
0
 def model_load(self, path: str):
     config = DistilBertConfig.from_pretrained(path + "/config.json")
     tokenizer = DistilBertTokenizer.from_pretrained(
         path, do_lower_case=self.do_lower_case)
     model = DistilBertForQuestionAnswering.from_pretrained(path,
                                                            from_tf=False,
                                                            config=config)
     return model, tokenizer
Пример #14
0
def classify_with_pre_trained():
    # model = "neuralmind/bert-base-portuguese-cased"

    config = DistilBertConfig(num_labels=3)
    config.output_hidden_states = False
    transformer_model = TFDistilBertForSequenceClassification.from_pretrained(
        distil_bert, config=config)[0]

    input_ids = tf.keras.layers.Input(shape=(128, ),
                                      name="input_token",
                                      dtype="int32")
    input_masks_ids = tf.keras.layers.Input(shape=(128, ),
                                            name="masked_token",
                                            dtype="int32")
    X = transformer_model(input_ids, input_masks_ids)
    model = tf.keras.Model(inputs=[input_ids, input_masks_ids], outputs=X)

    return model
Пример #15
0
    def __init__(self, dropout):
        super(DISTILBertModel, self).__init__()

        self.distilbert = DistilBertModel.from_pretrained(
            config.PATHS['distilbert'],
            config=DistilBertConfig())

        self.fc = nn.Linear(768, 2)
        self.dropout = nn.Dropout(dropout)
    def __init__(self,
                 args,
                 device,
                 d_model=256,
                 nhead=4,
                 d_ff=1024,
                 nlayers=2,
                 dropout=0.5):
        super(Autoencoder, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(d_model,
                                              dropout)  # encoder's position
        self.pos_decoder = PositionalEncoding(d_model,
                                              dropout)  # decoder's position

        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout)
        decoder_norm = nn.LayerNorm(d_model)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers,
                                                      decoder_norm)

        # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2)
        if args.use_albert:
            self.bert_encoder = AlbertModel.from_pretrained(
                "clue/albert_chinese_tiny")
            self.bert_embed = self.bert_encoder.embeddings
            # self.tgt_embed = self.bert_embed
            d_vocab = self.bert_encoder.config.vocab_size + 1
            self.tgt_embed = nn.Sequential(
                Embeddings(d_model, d_vocab),
                PositionalEncoding(d_model, dropout))
        elif args.use_tiny_bert:
            self.bert_encoder = AutoModel.from_pretrained(
                "google/bert_uncased_L-2_H-256_A-4")
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        elif args.use_distil_bert:
            configuration = DistilBertConfig()
            self.bert_encoder = DistilBertModel(configuration)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        # self.tgt_embed = self.bert.embeddings
        else:
            self.bert_encoder = BertModel.from_pretrained(
                args.PRETRAINED_MODEL_NAME,
                output_hidden_states=args.distill_2)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed

        self.distill_2 = args.distill_2
        self.gru = nn.GRU(d_model, d_model, 1)
        self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1)
        self.sigmoid = nn.Sigmoid()
        self.device = device
        self.init_weights()
Пример #17
0
    def model_load(self, path):

        s3_model_url = 'https://distilbert-finetuned-model.s3.eu-west-2.amazonaws.com/pytorch_model.bin'
        path_to_model = download_model(s3_model_url, model_name="pytorch_model.bin")

        config = DistilBertConfig.from_pretrained(path + "/config.json")
        tokenizer = DistilBertTokenizer.from_pretrained(path, do_lower_case=self.do_lower_case)
        model = DistilBertForQuestionAnswering.from_pretrained(path_to_model, from_tf=False, config=config)

        return model, tokenizer
Пример #18
0
def model_fn(model_dir):
    config = DistilBertConfig.from_json_file('/opt/ml/model/code/config.json')
    
    model_path = '{}/{}'.format(model_dir, 'model.pth') 
    model = DistilBertForSequenceClassification.from_pretrained(model_path, config=config)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    return model
Пример #19
0
def model_fn(model_dir):
    config = DistilBertConfig.from_json_file("/opt/ml/model/code/config.json")

    model_path = "{}/{}".format(model_dir, "model.pth")
    model = DistilBertForSequenceClassification.from_pretrained(model_path, config=config)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    return model
Пример #20
0
def download_distilbert_base():
    file = '../input/distilbert-base-uncased'

    config = DistilBertConfig.from_pretrained('distilbert-base-uncased')
    config.save_pretrained(file)
    
    model = DistilBertModel.from_pretrained('distilbert-base-uncased')
    model.save_pretrained(file)

    tkn = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    tkn.save_pretrained(file)
Пример #21
0
 def __init__(self, tokenizer, device):
     super(VQAModel, self).__init__()
     self.tokenizer = tokenizer
     self.device = device
     self.image_embedding = torch.nn.Sequential(
         *(list(models.resnet152(pretrained=True).children())[:-1]))
     configuration = DistilBertConfig()
     self.question_embedding = DistilBertModel(configuration)
     self.linear1 = torch.nn.Linear(2816, 1024)
     self.linear2 = torch.nn.Linear(1024, 512)
     self.linear3 = torch.nn.Linear(512, 2)
Пример #22
0
def main():
    # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成)
    args = docopt(__doc__)
    pprint(args)

    # パラメータの取得
    lr = float(args['--lr'])
    seq_len = int(args['--seq_len'])
    max_epoch = int(args['--max_epoch'])
    batch_size = int(args['--batch_size'])
    num_train = int(args['--num_train'])
    num_valid = int(args['--num_valid'])

    # モデルの選択
    pretrained_weights = 'distilbert-base-uncased'
    tokenizer = DistilBertTokenizer.from_pretrained(pretrained_weights)
    config = DistilBertConfig(num_labels=4)
    model = DistilBertForSequenceClassification.from_pretrained(
        pretrained_weights, config=config)

    # 使用デバイスの取得
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # データの読み込みとデータセットの作成
    encoder = TwinPhraseEncoder(tokenizer, seq_len)

    train_dataset = WordnetDataset(mode='train',
                                   num_data=num_train,
                                   transform=encoder)
    valid_dataset = WordnetDataset(mode='valid',
                                   num_data=num_valid,
                                   transform=encoder)
    train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True)
    valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True)

    # 最適化法の定義
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 学習
    for epoch in range(1, max_epoch + 1):
        print('=' * 27 + f' Epoch {epoch:0>2} ' + '=' * 27)
        # Training
        loss, accu = train_model(model, optimizer, train_loader, device)
        print(
            f'|  Training    |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # Validation
        loss, accu = valid_model(model, optimizer, valid_loader, device)
        print(
            f'|  Validation  |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |'
        )
        # 保存
        torch.save(model.state_dict(), f'../result/bert.pkl')
Пример #23
0
 def test_TFDistilBertForQuestionAnswering(self):
     from transformers import DistilBertConfig, TFDistilBertForQuestionAnswering
     keras.backend.clear_session()
     # pretrained_weights = 'distilbert-base-uncased'
     tokenizer_file = 'distilbert_distilbert-base-uncased.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = DistilBertConfig()
     model = TFDistilBertForQuestionAnswering(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
Пример #24
0
def create_model(max_seq_len, classes):
    config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
    config.output_hidden_states = False
    tfm = TFDistilBertModel.from_pretrained('./MODEL/uncased/', config=config)
    input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids")
    bert_output = tfm(input_ids)[0]

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=512, activation="tanh")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=256, activation="tanh")(logits)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    return model
Пример #25
0
 def __init__(
         self,
         pretrainString='distilbert-base-uncased-finetuned-sst-2-english'):
     super().__init__()
     configuration = DistilBertConfig(dropout=0.25, num_labels=7)
     self.bert = DistilBertModel(configuration).from_pretrained(
         pretrainString)
     self.pre_classifier = nn.Linear(configuration.dim, configuration.dim)
     self.classifier = nn.Linear(configuration.dim,
                                 configuration.num_labels)
     self.dropout = nn.Dropout(configuration.seq_classif_dropout)
     self.to(device)
Пример #26
0
def build_model(args):
    if args.clf_model.lower() == "cnn":
        # easy for text tokenization
        tokenizer = DistilBertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)
        model = CNN_Text(args)

    elif args.clf_model.lower() == "robert":
        print("name is {}".format(args.model_name_or_path))
        tokenizer = RobertaTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)

        config = RobertaConfig.from_pretrained(args.model_name_or_path,
                                               num_labels=args.num_labels,
                                               finetuning_task=args.task_name)

        model = RobertaForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)
        # freeze the weight for transformers
        if args.freeze:
            for n, p in model.named_parameters():
                if "bert" in n:
                    p.requires_grad = False
    elif args.clf_model.lower() == "bert":
        tokenizer = BertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)

        config = BertConfig.from_pretrained(args.model_name_or_path,
                                            num_labels=args.num_labels,
                                            finetuning_task=args.task_name)

        model = BertForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)
        # freeze the weight for transformers
        # if args.freeze:
        #     for n, p in model.named_parameters():
        #         if "bert" in n:
        #             p.requires_grad = False

    else:
        tokenizer = DistilBertTokenizer.from_pretrained(
            args.model_name_or_path, do_lower_case=args.do_lower_case)
        config = DistilBertConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=args.num_labels,
            finetuning_task=args.task_name)
        model = DistilBertForSequenceClassification.from_pretrained(
            args.model_name_or_path, config=config)

    model.expand_class_head(args.multi_head)
    model = model.to(args.device)
    return tokenizer, model
Пример #27
0
    def __init__(self, bert_type='bertbase'):
        super(Bert, self).__init__()
        self.bert_type = bert_type

        if bert_type == 'bertbase':
            configuration = BertConfig()
            self.model = BertModel(configuration)
        elif bert_type == 'distilbert':
            configuration = DistilBertConfig()
            self.model = DistilBertModel(configuration)           
        elif bert_type == 'mobilebert':
            configuration = MobileBertConfig.from_pretrained('checkpoints/mobilebert')
            self.model = MobileBertModel(configuration)  
Пример #28
0
    def __init__(self, pretrained=True, **kwargs):
        super().__init__()
        hidden_dimension = 32

        if pretrained:
            self.bert = DistilBertModel.from_pretrained(
                "distilbert-base-uncased")
        else:
            self.bert = DistilBertModel(DistilBertConfig())
        self.tokenizer = DistilBertTokenizer.from_pretrained(
            "distilbert-base-uncased")
        self.pre_classifier = nn.Linear(self.bert.config.dim, hidden_dimension)
        self.classifier = nn.Linear(hidden_dimension, 1)
Пример #29
0
 def get_config(self):
     return DistilBertConfig(
         vocab_size=self.vocab_size,
         dim=self.hidden_size,
         n_layers=self.num_hidden_layers,
         n_heads=self.num_attention_heads,
         hidden_dim=self.intermediate_size,
         hidden_act=self.hidden_act,
         dropout=self.hidden_dropout_prob,
         attention_dropout=self.attention_probs_dropout_prob,
         max_position_embeddings=self.max_position_embeddings,
         initializer_range=self.initializer_range,
     )
Пример #30
0
    def __init__(self, cfg: DictConfig):

        super().__init__(cfg)

        config = DistilBertConfig()
        config.sinusoidal_pos_embds = True

        self.module = DistilBertModel.from_pretrained(
            'distilbert-base-uncased', config=config)

        self.tokenizer = DistilBertTokenizer.from_pretrained(
            'distilbert-base-uncased')

        # Add in our own positional encodings
        embedding_layer = PositionalBertEmbeddings(self.module.config)
        self.module.embeddings = embedding_layer

        if self.is_frozen:
            for param in self.module.parameters():
                param.requires_grad = False

        layer = nn.TransformerEncoderLayer(self.hidden_size, self.num_heads)
        self.unit = nn.TransformerEncoder(layer, num_layers=self.num_layers)