def test_table_question_answering_model_from_pretrained(self): for model_name in TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST[5:6]: config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, TapasConfig) model = AutoModelForTableQuestionAnswering.from_pretrained(model_name) model, loading_info = AutoModelForTableQuestionAnswering.from_pretrained( model_name, output_loading_info=True ) self.assertIsNotNone(model) self.assertIsInstance(model, TapasForQuestionAnswering)
def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained( "google/tapas-base-finetuned-wtq") self.model = AutoModelForTableQuestionAnswering.from_pretrained( "google/tapas-base-finetuned-wtq") self.tableQA = TableQuestionAnsweringPipeline( model=self.model, tokenizer=self.tokenizer)
def test_slow_tokenizer_sqa_pt(self): model_id = "lysandre/tiny-tapas-random-sqa" model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) inputs = { "table": { "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, "query": [ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], } sequential_outputs = table_querier(**inputs, sequential=True) batch_outputs = table_querier(**inputs, sequential=False) self.assertEqual(len(sequential_outputs), 3) self.assertEqual(len(batch_outputs), 3) self.assertEqual(sequential_outputs[0], batch_outputs[0]) self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) # self.assertNotEqual(sequential_outputs[2], batch_outputs[2]) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query="how many movies has george clooney played in?", ) self.assertEqual( outputs, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, ) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query=[ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], ) self.assertEqual( outputs, [ { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, { "answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"] }, ], ) outputs = table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, query=[ "What repository has the largest number of stars?", "Given that the numbers of stars defines if a repository is active, what repository is the most active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", ], ) self.assertEqual( outputs, [ { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, { "answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"] }, ], ) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table=None) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table="") with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table={}) with self.assertRaises(ValueError): table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }) with self.assertRaises(ValueError): table_querier( query="", table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, ) with self.assertRaises(ValueError): table_querier( query=None, table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, )
def test_small_model_pt(self): model_id = "lysandre/tiny-tapas-random-wtq" model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) self.assertIsInstance(model.config.aggregation_labels, dict) self.assertIsInstance(model.config.no_aggregation_label_index, int) table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query="how many movies has george clooney played in?", ) self.assertEqual( outputs, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ) outputs = table_querier( table={ "actors": ["brad pitt", "leonardo di caprio", "george clooney"], "age": ["56", "45", "59"], "number of movies": ["87", "53", "69"], "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], }, query=[ "how many movies has george clooney played in?", "how old is he?", "what's his date of birth?" ], ) self.assertEqual( outputs, [ { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ], ) outputs = table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, query=[ "What repository has the largest number of stars?", "Given that the numbers of stars defines if a repository is active, what repository is the most active?", "What is the number of repositories?", "What is the average number of stars?", "What is the total amount of stars?", ], ) self.assertEqual( outputs, [ { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, { "answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE" }, ], ) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table=None) with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table="") with self.assertRaises(ValueError): table_querier(query="What does it do with empty context ?", table={}) with self.assertRaises(ValueError): table_querier( table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }) with self.assertRaises(ValueError): table_querier( query="", table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, ) with self.assertRaises(ValueError): table_querier( query=None, table={ "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], }, )