Exemplo n.º 1
0
    def __init__(
            self,
            pretrained_model_name_or_path='google/electra-small-discriminator',
            reduce_output='sum',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(ELECTRAEncoder, self).__init__()
        try:
            from transformers import TFElectraModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFElectraModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Exemplo n.º 2
0
    def init_new_training(self):
        if self.params.use_hf_model_:
            if self.params.use_hf_electra_model_:
                self.pretrained_bert = TFElectraModel.from_pretrained(
                    self.params.pretrained_hf_model_,cache_dir=self.params.hf_cache_dir_, return_dict=True
                )
            else:
                self.pretrained_bert = TFBertModel.from_pretrained(self.params.pretrained_hf_model_,cache_dir=self.params.hf_cache_dir_, return_dict=True)

        elif self.params.pretrained_bert:
            logger.info(f"Attempt to load pre-trained bert from saved model: {self.params.pretrained_bert}")
            if os.path.basename(self.params.pretrained_bert.strip(os.sep)) == "encoder_only":
                saved_model_dir = self.params.pretrained_bert
            elif os.path.isdir(os.path.join(self.params.pretrained_bert, "export", "additional", "encoder_only")):
                saved_model_dir = os.path.join(self.params.pretrained_bert, "export", "additional", "encoder_only")
            elif os.path.isdir(os.path.join(self.params.pretrained_bert, "additional", "encoder_only")):
                saved_model_dir = os.path.join(self.params.pretrained_bert, "additional", "encoder_only")
            elif os.path.basename(self.params.pretrained_bert.strip(os.sep)) == "best" and os.path.isdir(
                os.path.join(self.params.pretrained_bert, "encoder_only")
            ):
                saved_model_dir = os.path.join(self.params.pretrained_bert, "encoder_only")
            else:
                saved_model_dir = os.path.join(self.params.pretrained_bert, "best", "encoder_only")

            self.pretrained_bert = keras.models.load_model(saved_model_dir)
            logger.info(f"Saved model loaded from: {self.params.pretrained_bert}")
        # else:
        #     logger.info(f"Attemed to load pre-trained bert from saved model: {self._params.pretrained_bert}")
        #     self.pretrained_bert = keras.models.load_model(self._params.pretrained_bert)

        pass
Exemplo n.º 3
0
 def _test_Electra(self, size, large=False):
     from transformers import ElectraTokenizer, TFElectraModel
     tokenizer = ElectraTokenizer.from_pretrained(size)
     model = TFElectraModel.from_pretrained(size)
     input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
 def build_transformer(self, training, transformer):
     if training:
         self.config.n_words = len(self.transform.form_vocab)
     self._init_config()
     if isinstance(transformer, str):
         if 'albert_chinese' in transformer:
             tokenizer = BertTokenizerFast.from_pretrained(
                 transformer, add_special_tokens=False)
             transformer: TFPreTrainedModel = TFAutoModel.from_pretrained(
                 transformer, name=transformer, from_pt=True)
         elif transformer.startswith('albert') and transformer.endswith(
                 'zh'):
             transformer, tokenizer, path = build_transformer(transformer)
             transformer.config = AlbertConfig.from_json_file(
                 os.path.join(path, "albert_config.json"))
             tokenizer = BertTokenizer.from_pretrained(
                 os.path.join(path, "vocab_chinese.txt"),
                 add_special_tokens=False)
         elif 'chinese-roberta' in transformer:
             tokenizer = BertTokenizer.from_pretrained(transformer)
             transformer = TFBertModel.from_pretrained(transformer,
                                                       name=transformer,
                                                       from_pt=True)
         elif 'electra' in transformer:
             from transformers import TFElectraModel
             tokenizer = BertTokenizer.from_pretrained(transformer)
             transformer = TFElectraModel.from_pretrained(transformer,
                                                          name=transformer,
                                                          from_pt=True)
         else:
             tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
                 transformer)
             try:
                 transformer: TFPreTrainedModel = TFAutoModel.from_pretrained(
                     transformer, name=transformer)
             except (TypeError, OSError):
                 transformer: TFPreTrainedModel = TFAutoModel.from_pretrained(
                     transformer, name=transformer, from_pt=True)
     elif transformer[0] == 'AutoModelWithLMHead':
         tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
             transformer[1])
         transformer: TFAutoModelWithLMHead = TFAutoModelWithLMHead.from_pretrained(
             transformer[1])
     else:
         raise ValueError(f'Unknown identifier {transformer}')
     self.transform.tokenizer = tokenizer
     if self.config.get('fp16', None) or self.config.get('use_amp', None):
         policy = tf.keras.mixed_precision.experimental.Policy(
             'mixed_float16')
         tf.keras.mixed_precision.experimental.set_policy(policy)
         # tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
         transformer.set_weights(
             [w.astype('float16') for w in transformer.get_weights()])
     self.transform.transformer_config = transformer.config
     return transformer
Exemplo n.º 5
0
def build_model(num_labels, use_dropout=True, dropout_rate=0.15):
    model = TFElectraModel.from_pretrained(model_name, cache_dir = cache_dir)
    input_ids = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='input_ids', dtype='int32')
    attention_mask = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='attention_mask', dtype='int32')
    token_type_ids = tf.keras.layers.Input(shape=(model_meta.max_seq_length,), name='token_type_ids', dtype='int32')
    model_inputs = [input_ids, attention_mask, token_type_ids]
    outputs = model(model_inputs)
    logits = outputs[0]
    if use_dropout and dropout_rate>0:
        logits = tf.keras.layers.Dropout(dropout_rate)(logits)
    model_op = tf.keras.layers.Dense(num_labels, activation = 'softmax', kernel_initializer='glorot_uniform')(logits)
    keras_model = tf.keras.Model(inputs= model_inputs, outputs = model_op)
Exemplo n.º 6
0
def get_electra():
    ids = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='ids')
    att = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='att')
    tok_type_ids = keras.layers.Input(shape=(None, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = ElectraConfig.from_pretrained(Config.Electra.config)
    electra_model = TFElectraModel.from_pretrained(Config.Electra.model,
                                                   config=config)

    x = electra_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    x1 = keras.layers.Dropout(0.15)(x[0])
    x1 = keras.layers.Conv1D(768, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(64, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(32, 2, padding='same')(x1)
    x1 = keras.layers.Conv1D(1, 1)(x1)
    x1 = keras.layers.Flatten()(x1)
    x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1)

    x2 = keras.layers.Dropout(0.15)(x[0])
    x2 = keras.layers.Conv1D(768, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(64, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(32, 2, padding='same')(x2)
    x2 = keras.layers.Conv1D(1, 1)(x2)
    x2 = keras.layers.Flatten()(x2)
    x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids],
                               outputs=[x1, x2])

    optimizer = keras.optimizers.Adam(learning_rate=6e-5)
    if Config.Train.use_amp:
        optimizer = keras.mixed_precision.experimental.LossScaleOptimizer(
            optimizer, 'dynamic')
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer)

    return model
Exemplo n.º 7
0
def make_model():
    transformer = TFElectraModel.from_pretrained(r'models\electra')
    input_ids = L.Input(shape=(512, ), dtype=tf.int32)
    x = transformer(input_ids)[0]
    x = x[:, 0, :]
    x = L.Dense(1, activation='sigmoid', name='sigmoid')(x)

    model = Model(inputs=input_ids, outputs=x)
    model.compile(loss='binary_crossentropy',
                  metrics=['accuracy'],
                  optimizer=Adam(lr=1e-5))
    last_layer = pickle.load(open('models\sigmoid.pickle', 'rb'))
    model.get_layer('sigmoid').set_weights(last_layer)

    return model
Exemplo n.º 8
0
    def __init__(self, extractor, config, *args, **kwargs):
        super(TFParade_Class, self).__init__(*args, **kwargs)
        self.extractor = extractor
        self.config = config

        if config["pretrained"] == "electra-base-msmarco":
            self.bert = TFElectraModel.from_pretrained(
                "Capreolus/electra-base-msmarco")
        elif config["pretrained"] == "bert-base-msmarco":
            self.bert = TFBertModel.from_pretrained(
                "Capreolus/bert-base-msmarco")
        elif config["pretrained"] == "bert-base-uncased":
            self.bert = TFBertModel.from_pretrained("bert-base-uncased")
        else:
            raise ValueError(
                f"unsupported model: {config['pretrained']}; need to ensure correct tokenizers will be used before arbitrary hgf models are supported"
            )

        self.transformer_layer_1 = TFBertLayer(self.bert.config)
        self.transformer_layer_2 = TFBertLayer(self.bert.config)
        self.num_passages = extractor.config["numpassages"]
        self.maxseqlen = extractor.config["maxseqlen"]
        self.linear = tf.keras.layers.Dense(
            1, input_shape=(self.bert.config.hidden_size, ), dtype=tf.float32)

        if config["aggregation"] == "maxp":
            self.aggregation = self.aggregate_using_maxp
        elif config["aggregation"] == "transformer":
            self.aggregation = self.aggregate_using_transformer
            input_embeddings = self.bert.get_input_embeddings()
            cls_token_id = tf.convert_to_tensor([101])
            cls_token_id = tf.reshape(cls_token_id, [1, 1])
            self.initial_cls_embedding = input_embeddings(
                input_ids=cls_token_id)
            self.initial_cls_embedding = tf.reshape(
                self.initial_cls_embedding, [1, self.bert.config.hidden_size])
            initializer = tf.random_normal_initializer(stddev=0.02)
            full_position_embeddings = tf.Variable(
                initial_value=initializer(shape=[
                    self.num_passages + 1, self.bert.config.hidden_size
                ]),
                name="passage_position_embedding",
            )
            self.full_position_embeddings = tf.expand_dims(
                full_position_embeddings, axis=0)
Exemplo n.º 9
0
def load_model(
    sigmoid_dir, transformer_dir="transformer", architecture="distilbert", max_len=256
):
    """
    Special function to load a keras model that uses a transformer layer
    """
    sigmoid_path = os.path.join(sigmoid_dir, "sigmoid.pickle")

    if architecture == "electra":
        transformer = TFElectraModel.from_pretrained(transformer_dir)
    else:
        transformer = TFAutoModel.from_pretrained(transformer_dir)
    model = build_model(transformer, max_len=max_len)

    sigmoid = pickle.load(open(sigmoid_path, "rb"))
    model.get_layer("sigmoid").set_weights(sigmoid)

    return model
Exemplo n.º 10
0
 def __init__(self, params, name="model", **kwargs):
     super(NERwithHFBERT, self).__init__(params, name=name, **kwargs)
     self._tag_string_mapper = get_sm(self._params.tags_fn_)
     self.tag_vocab_size = self._tag_string_mapper.size() + 2
     self._tracked_layers = dict()
     if self.pretrained_bert is None:
         if self._params.use_hf_electra_model_:
             self.pretrained_bert = TFElectraModel(ElectraConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_))
         else:
             self.pretrained_bert = TFBertModel(BertConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_))
     self._dropout = tf.keras.layers.Dropout(self._params.dropout_last)
     if self._params.bet_tagging_:
         # print(self.tag_vocab_size-1)
         # half of the classes is used plus O-Class, sos, eos
         self._layer_cls = tf.keras.layers.Dense(
             int(self._tag_string_mapper.size() // 2 + 3), activation=tf.keras.activations.softmax, name="layer_cls"
         )
         self._layer_start = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_start")
         self._layer_end = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_end")
     elif self._params.use_crf:
         self._last_layer = tf.keras.layers.Dense(self.tag_vocab_size, name="last_layer")
         self._trans_params = tf.keras.layers.Embedding(
             self.tag_vocab_size, self.tag_vocab_size, name="trans_params"
         )
         # ,embeddings_initializer=tf.keras.initializers.Constant(1))
         if self._params.crf_with_ner_rule:
             self._penalty_factor = tf.keras.layers.Embedding(1, 1, name="penalty_factor")
             # ,embeddings_initializer=tf.keras.initializers.Constant(1))
             self._penalty_absolute = tf.keras.layers.Embedding(1, 1, name="penalty_absolute")
             # ,embeddings_initializer=tf.keras.initializers.Constant(1))
         elif self.params.crf_with_ner_forb_trans:
             self._penalty_factor = tf.constant(0.0, name="penalty_factor", dtype=tf.float32)
             self._penalty_absolute = tf.constant(-100000.0, name="penalty_absolute", dtype=tf.float32)
         self.init_crf_with_ner_rule((self.tag_vocab_size - 3) // 2)
     else:
         self._last_layer = tf.keras.layers.Dense(
             self.tag_vocab_size, activation=tf.keras.activations.softmax, name="last_layer"
         )
Exemplo n.º 11
0
def test_load_tf_model():
    for model_name_or_path in ALL_MODEL_NAME_OR_PATH_LST:
        TFElectraModel.from_pretrained(model_name_or_path, from_pt=True)
Exemplo n.º 12
0
embedding_weights.append(np.zeros(vocab_size))

for char, i in tk.word_index.items():
    onehot = np.zeros(vocab_size)
    onehot[i - 1] = 1
    embedding_weights.append(onehot)

embedding_weights = np.array(embedding_weights)

### Ko-ELECTRA
from transformers import TFElectraModel
from transformers import ElectraTokenizer

tokenizer = ElectraTokenizer.from_pretrained(
    "monologg/koelectra-base-v3-discriminator")
model = TFElectraModel.from_pretrained(
    "monologg/koelectra-base-v3-discriminator", from_pt=True)

SEQ_LEN = 128
BATCH_SIZE = 16

token_inputs = tf.keras.layers.Input((SEQ_LEN, ),
                                     dtype=tf.int32,
                                     name='input_word_ids')
mask_inputs = tf.keras.layers.Input((SEQ_LEN, ),
                                    dtype=tf.int32,
                                    name='input_masks')
segment_inputs = tf.keras.layers.Input((SEQ_LEN, ),
                                       dtype=tf.int32,
                                       name='input_segment')

ELEC_outputs = model([token_inputs, mask_inputs, segment_inputs])