def test_training_question_answering_head_weak_supervision(self): # note that google/tapas-base-finetuned-wtq should correspond to tapas_wtq_wikisql_sqa_inter_masklm_base_reset model = TFTapasForQuestionAnswering.from_pretrained( "google/tapas-base-finetuned-wtq") tokenizer = self.default_tokenizer # let's test on a batch table, queries, answer_coordinates, answer_text, float_answer = prepare_tapas_batch_inputs_for_training( ) inputs = tokenizer( table=table, queries=queries, answer_coordinates=answer_coordinates, answer_text=answer_text, padding="longest", return_tensors="tf", ) # the answer should be prepared by the user float_answer = tf.constant(float_answer, dtype=tf.float32) outputs = model( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], token_type_ids=inputs["token_type_ids"], labels=inputs["labels"], numeric_values=inputs["numeric_values"], numeric_values_scale=inputs["numeric_values_scale"], float_answer=float_answer, ) # test the loss loss = outputs.loss expected_loss = tf.constant(3.3527612686157227e-08) tf.debugging.assert_near(loss, expected_loss, atol=1e-6) # test the logits on the first example logits = outputs.logits expected_shape = tf.TensorShape([2, 29]) tf.debugging.assert_equal(logits.shape, expected_shape) expected_slice = tf.constant([ -160.0156, -160.0156, -160.0156, -160.0156, -160.0156, -10072.2266, -10070.8896, -10092.6006, -10092.6006, ]) tf.debugging.assert_near(logits[0, -9:], expected_slice, atol=1e-6) # test the aggregation logits on the second example logits_aggregation = outputs.logits_aggregation expected_shape = tf.TensorShape([2, 4]) tf.debugging.assert_equal(logits_aggregation.shape, expected_shape) expected_tensor = tf.constant([-4.0538, 40.0304, -5.3554, 23.3965]) tf.debugging.assert_near(logits_aggregation[1, -4:], expected_tensor, atol=1e-4)
def test_inference_question_answering_head_weak_supervision(self): # note that google/tapas-base-finetuned-wtq should correspond to tapas_wtq_wikisql_sqa_inter_masklm_base_reset model = TFTapasForQuestionAnswering.from_pretrained( "google/tapas-base-finetuned-wtq") tokenizer = self.default_tokenizer # let's test on a batch table, queries = prepare_tapas_batch_inputs_for_inference() inputs = tokenizer(table=table, queries=queries, padding="longest", return_tensors="tf") outputs = model(**inputs) # test the logits logits = outputs.logits expected_shape = tf.TensorShape([2, 28]) tf.debugging.assert_equal(logits.shape, expected_shape) expected_slice = tf.constant([ [ -160.375504, -160.375504, -160.375504, -10072.3965, -10070.9414, -10094.9736 ], [ -9861.6123, -9861.6123, -9861.6123, -9861.6123, -9891.01172, 146.600677 ], ]) tf.debugging.assert_near(logits[:, -6:], expected_slice, atol=0.4) # test the aggregation logits logits_aggregation = outputs.logits_aggregation expected_shape = tf.TensorShape([2, 4]) tf.debugging.assert_equal(logits_aggregation.shape, expected_shape) expected_tensor = tf.constant( [[18.8545208, -9.76614857, -6.3128891, -2.93525243], [-4.05782509, 40.0351, -5.35329962, 23.3978653]]) tf.debugging.assert_near(logits_aggregation, expected_tensor, atol=0.001) # test the predicted answer coordinates and aggregation indices EXPECTED_PREDICTED_ANSWER_COORDINATES = [[(0, 0)], [(1, 2)]] EXPECTED_PREDICTED_AGGREGATION_INDICES = [0, 1] predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions( inputs, outputs.logits, outputs.logits_aggregation) tf.debugging.assert_equal(EXPECTED_PREDICTED_ANSWER_COORDINATES, predicted_answer_coordinates) tf.debugging.assert_equal(EXPECTED_PREDICTED_AGGREGATION_INDICES, predicted_aggregation_indices)
def test_inference_question_answering_head_strong_supervision(self): # note that google/tapas-base-finetuned-wikisql-supervised should correspond to tapas_wikisql_sqa_inter_masklm_base_reset model = TFTapasForQuestionAnswering.from_pretrained( "google/tapas-base-finetuned-wikisql-supervised") tokenizer = self.default_tokenizer table, queries = prepare_tapas_single_inputs_for_inference() inputs = tokenizer(table=table, queries=queries, return_tensors="tf") outputs = model(**inputs) # test the logits logits = outputs.logits expected_shape = tf.TensorShape([1, 21]) tf.debugging.assert_equal(logits.shape, expected_shape) expected_slice = tf.constant([[ -10011.1084, -10011.1084, -10011.1084, -10011.1084, -10011.1084, -10011.1084, -10011.1084, -10011.1084, -10011.1084, -18.6185989, -10008.7969, 17.6355762, 17.6355762, 17.6355762, -10002.4404, -18.7111301, -18.7111301, -18.7111301, -18.7111301, -18.7111301, -10007.0977, ]]) tf.debugging.assert_near(logits, expected_slice, atol=0.02) # test the aggregation logits logits_aggregation = outputs.logits_aggregation expected_shape = tf.TensorShape([1, 4]) tf.debugging.assert_equal(logits_aggregation.shape, expected_shape) expected_tensor = tf.constant( [[16.5659733, -3.06624889, -2.34152961, -0.970244825]]) tf.debugging.assert_near(logits_aggregation, expected_tensor, atol=0.003)
def test_inference_question_answering_head_conversational_absolute_embeddings(self): # note that google/tapas-small-finetuned-sqa should correspond to tapas_sqa_inter_masklm_small_reset # however here we test the version with absolute position embeddings model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-small-finetuned-sqa") tokenizer = self.default_tokenizer table, queries = prepare_tapas_single_inputs_for_inference() inputs = tokenizer(table=table, queries=queries, return_tensors="tf") outputs = model(**inputs) # test the logits logits = outputs.logits expected_shape = tf.TensorShape([1, 21]) tf.debugging.assert_equal(logits.shape, expected_shape) expected_slice = tf.constant( [ [ -10000.041, -10000.041, -10000.041, -10000.041, -10000.041, -10000.041, -10000.041, -10000.041, -10000.041, -18.369339, -10014.692, 17.730324, 17.730324, 17.730324, -9984.974, -18.322773, -18.322773, -18.322773, -18.322773, -18.322773, -10007.267, ] ] ) tf.debugging.assert_near(logits, expected_slice, atol=0.01)
def test_inference_question_answering_head_conversational(self): # note that google/tapas-base-finetuned-sqa should correspond to tapas_sqa_inter_masklm_base_reset model = TFTapasForQuestionAnswering.from_pretrained("google/tapas-base-finetuned-sqa") tokenizer = self.default_tokenizer table, queries = prepare_tapas_single_inputs_for_inference() inputs = tokenizer(table=table, queries=queries, return_tensors="tf") outputs = model(**inputs) # test the logits logits = outputs.logits expected_shape = tf.TensorShape([1, 21]) tf.debugging.assert_equal(logits.shape, expected_shape) expected_slice = tf.constant( [ [ -9997.274, -9997.274, -9997.274, -9997.274, -9997.274, -9997.274, -9997.274, -9997.274, -9997.274, -16.262585, -10004.089, 15.435196, 15.435196, 15.435196, -9990.443, -16.327433, -16.327433, -16.327433, -16.327433, -16.327433, -10004.84, ] ] ) tf.debugging.assert_near(logits, expected_slice, atol=0.015)
def create_and_check_for_question_answering( self, config, input_ids, input_mask, token_type_ids, sequence_labels, token_labels, labels, numeric_values, numeric_values_scale, float_answer, aggregation_labels, ): # inference: without aggregation head (SQA). Model only returns logits sqa_config = copy.copy(config) sqa_config.num_aggregation_labels = 0 sqa_config.use_answer_as_supervision = False model = TFTapasForQuestionAnswering(config=sqa_config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids, } result = model(inputs) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length)) # inference: with aggregation head (WTQ, WikiSQL-supervised). Model returns logits and aggregation logits model = TFTapasForQuestionAnswering(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids, } result = model(inputs) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.logits_aggregation.shape, (self.batch_size, self.num_aggregation_labels)) # training: can happen in 3 main ways # case 1: conversational (SQA) model = TFTapasForQuestionAnswering(config=sqa_config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids, "labels": labels, } result = model(inputs) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length)) # case 2: weak supervision for aggregation (WTQ) model = TFTapasForQuestionAnswering(config=config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids, "labels": labels, "numeric_values": numeric_values, "numeric_values_scale": numeric_values_scale, "float_answer": float_answer, } result = model(inputs) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.logits_aggregation.shape, (self.batch_size, self.num_aggregation_labels)) # case 3: strong supervision for aggregation (WikiSQL-supervised) wikisql_config = copy.copy(config) wikisql_config.use_answer_as_supervision = False model = TFTapasForQuestionAnswering(config=wikisql_config) inputs = { "input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids, "labels": labels, "aggregation_labels": aggregation_labels, } result = model(inputs) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length)) self.parent.assertEqual(result.logits_aggregation.shape, (self.batch_size, self.num_aggregation_labels))