Пример #1
0
 def test_TFXLMForSequenceClassification(self):
     from transformers import XLMConfig, TFXLMForSequenceClassification
     keras.backend.clear_session()
     # pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer_file = 'xlm_xlm-mlm-enfr-1024.pickle'
     tokenizer = self._get_tokenzier(tokenizer_file)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     config = XLMConfig()
     model = TFXLMForSequenceClassification(config)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
Пример #2
0
 def test_TFXLMForSequenceClassification(self):
     from transformers import XLMTokenizer, TFXLMForSequenceClassification
     pretrained_weights = 'xlm-mlm-enfr-1024'
     tokenizer = XLMTokenizer.from_pretrained(pretrained_weights)
     text, inputs, inputs_onnx = self._prepare_inputs(tokenizer)
     model = TFXLMForSequenceClassification.from_pretrained(
         pretrained_weights)
     predictions = model.predict(inputs)
     onnx_model = keras2onnx.convert_keras(model, model.name)
     self.assertTrue(
         run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx,
                          predictions, self.model_files))
Пример #3
0
        def create_and_check_xlm_sequence_classif(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask):
            model = TFXLMForSequenceClassification(config)

            inputs = {'input_ids': input_ids,
                      'lengths': input_lengths}

            (logits,) = model(inputs)

            result = {
                "logits": logits.numpy(),
            }

            self.parent.assertListEqual(
                list(result["logits"].shape),
                [self.batch_size, self.type_sequence_label_size])
    def create_and_check_xlm_sequence_classif(
        self,
        config,
        input_ids,
        token_type_ids,
        input_lengths,
        sequence_labels,
        token_labels,
        is_impossible_labels,
        choice_labels,
        input_mask,
    ):
        model = TFXLMForSequenceClassification(config)

        inputs = {"input_ids": input_ids, "lengths": input_lengths}

        result = model(inputs)

        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
Пример #5
0
train_id = np.array(padded_ids_train)
train_mask = np.array(mask_ids_train)

test_id = np.array(padded_ids_test)
test_mask = np.array(mask_ids_test)

# *************** ARQUITECTURA ****************

input_1 = tf.keras.Input(shape=(128), dtype=np.int32)
input_2 = tf.keras.Input(shape=(128), dtype=np.int32)

#model = TFBertForSequenceClassification.from_pretrained("/home/murat/datasets/pytorch", from_pt=True)
#model = TFBertForSequenceClassification.from_pretrained('bert-base-multilingual-cased')
#model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')
#model = TFBertForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT", from_pt=True)
model = TFXLMForSequenceClassification.from_pretrained('xlm-mlm-17-1280',
                                                       from_pt=True)

output = model([input_1, input_2], training=True)
answer = tf.keras.layers.Dense(7, activation=tf.nn.sigmoid)(output[0])
model = tf.keras.Model(inputs=[input_1, input_2], outputs=[answer])
model.summary()

#model.load_weights("./checkpoints_padchest/xlm17_en_semilla1.h5")

# ********* OPTIMIZADOR , CHECKPOINTS Y CLASSWEIGHTS *****************
d_frecuencias = json.load(
    open(
        "/scratch/codigofsoler/baseDeDatos/diccionarios/d_frecuencias_5zonas_sin_diagnosticos.json"
    ))

nsamples = len(data)