def test_table_question_answering_model_from_pretrained(self):
        for model_name in TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST[5:6]:
            config = AutoConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, TapasConfig)

            model = AutoModelForTableQuestionAnswering.from_pretrained(model_name)
            model, loading_info = AutoModelForTableQuestionAnswering.from_pretrained(
                model_name, output_loading_info=True
            )
            self.assertIsNotNone(model)
            self.assertIsInstance(model, TapasForQuestionAnswering)
 def __init__(self):
     self.tokenizer = AutoTokenizer.from_pretrained(
         "google/tapas-base-finetuned-wtq")
     self.model = AutoModelForTableQuestionAnswering.from_pretrained(
         "google/tapas-base-finetuned-wtq")
     self.tableQA = TableQuestionAnsweringPipeline(
         model=self.model, tokenizer=self.tokenizer)
Exemplo n.º 3
0
    def test_slow_tokenizer_sqa_pt(self):
        model_id = "lysandre/tiny-tapas-random-sqa"
        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)

        inputs = {
            "table": {
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            "query": [
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        }
        sequential_outputs = table_querier(**inputs, sequential=True)
        batch_outputs = table_querier(**inputs, sequential=False)

        self.assertEqual(len(sequential_outputs), 3)
        self.assertEqual(len(batch_outputs), 3)
        self.assertEqual(sequential_outputs[0], batch_outputs[0])
        self.assertNotEqual(sequential_outputs[1], batch_outputs[1])
        # self.assertNotEqual(sequential_outputs[2], batch_outputs[2])

        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query="how many movies has george clooney played in?",
        )
        self.assertEqual(
            outputs,
            {
                "answer": "7 february 1967",
                "coordinates": [(0, 3)],
                "cells": ["7 february 1967"]
            },
        )
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query=[
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
                {
                    "answer": "7 february 1967",
                    "coordinates": [(0, 3)],
                    "cells": ["7 february 1967"]
                },
            ],
        )
        outputs = table_querier(
            table={
                "Repository": ["Transformers", "Datasets", "Tokenizers"],
                "Stars": ["36542", "4512", "3934"],
                "Contributors": ["651", "77", "34"],
                "Programming language":
                ["Python", "Python", "Rust, Python and NodeJS"],
            },
            query=[
                "What repository has the largest number of stars?",
                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
                "What is the number of repositories?",
                "What is the average number of stars?",
                "What is the total amount of stars?",
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
                {
                    "answer": "Python, Python",
                    "coordinates": [(0, 3), (1, 3)],
                    "cells": ["Python", "Python"]
                },
            ],
        )

        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table=None)
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table="")
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table={})
        with self.assertRaises(ValueError):
            table_querier(
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                })
        with self.assertRaises(ValueError):
            table_querier(
                query="",
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
        with self.assertRaises(ValueError):
            table_querier(
                query=None,
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
Exemplo n.º 4
0
    def test_small_model_pt(self):
        model_id = "lysandre/tiny-tapas-random-wtq"
        model = AutoModelForTableQuestionAnswering.from_pretrained(model_id)
        tokenizer = AutoTokenizer.from_pretrained(model_id)
        self.assertIsInstance(model.config.aggregation_labels, dict)
        self.assertIsInstance(model.config.no_aggregation_label_index, int)

        table_querier = TableQuestionAnsweringPipeline(model=model,
                                                       tokenizer=tokenizer)
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query="how many movies has george clooney played in?",
        )
        self.assertEqual(
            outputs,
            {
                "answer": "AVERAGE > ",
                "coordinates": [],
                "cells": [],
                "aggregator": "AVERAGE"
            },
        )
        outputs = table_querier(
            table={
                "actors":
                ["brad pitt", "leonardo di caprio", "george clooney"],
                "age": ["56", "45", "59"],
                "number of movies": ["87", "53", "69"],
                "date of birth":
                ["7 february 1967", "10 june 1996", "28 november 1967"],
            },
            query=[
                "how many movies has george clooney played in?",
                "how old is he?", "what's his date of birth?"
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
            ],
        )
        outputs = table_querier(
            table={
                "Repository": ["Transformers", "Datasets", "Tokenizers"],
                "Stars": ["36542", "4512", "3934"],
                "Contributors": ["651", "77", "34"],
                "Programming language":
                ["Python", "Python", "Rust, Python and NodeJS"],
            },
            query=[
                "What repository has the largest number of stars?",
                "Given that the numbers of stars defines if a repository is active, what repository is the most active?",
                "What is the number of repositories?",
                "What is the average number of stars?",
                "What is the total amount of stars?",
            ],
        )
        self.assertEqual(
            outputs,
            [
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
                {
                    "answer": "AVERAGE > ",
                    "coordinates": [],
                    "cells": [],
                    "aggregator": "AVERAGE"
                },
            ],
        )

        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table=None)
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table="")
        with self.assertRaises(ValueError):
            table_querier(query="What does it do with empty context ?",
                          table={})
        with self.assertRaises(ValueError):
            table_querier(
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                })
        with self.assertRaises(ValueError):
            table_querier(
                query="",
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )
        with self.assertRaises(ValueError):
            table_querier(
                query=None,
                table={
                    "Repository": ["Transformers", "Datasets", "Tokenizers"],
                    "Stars": ["36542", "4512", "3934"],
                    "Contributors": ["651", "77", "34"],
                    "Programming language":
                    ["Python", "Python", "Rust, Python and NodeJS"],
                },
            )