def setUp(self):
     super().setUp()
     question_tokens = [
         Token(x)
         for x in ['what', 'was', 'the', 'last', 'year', '2013', '?']
     ]
     self.table_file = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'sample_table.tsv'
     self.table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.table_file, question_tokens)
     self.world = WikiTablesVariableFreeWorld(self.table_kg)
     table_file_with_date = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'sample_table_with_date.tsv'
     table_kg_with_date = TableQuestionKnowledgeGraph.read_from_file(
         table_file_with_date, question_tokens)
     self.world_with_date = WikiTablesVariableFreeWorld(table_kg_with_date)
Exemplo n.º 2
0
 def test_world_parses_logical_forms_with_decimals(self):
     question_tokens = [Token(x) for x in ['0.2']]
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         "tests/fixtures/data/wikitables/sample_table.tsv", question_tokens)
     world = WikiTablesWorld(table_kg)
     sempre_form = "(fb:cell.cell.number (number 0.200))"
     expression = world.parse_logical_form(sempre_form)
     assert str(expression) == "I1(I(num:0_200))"
Exemplo n.º 3
0
 def test_world_parses_logical_forms_with_decimals(self):
     question_tokens = [Token(x) for x in ['0.2']]
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
             self.FIXTURES_ROOT / "data" / "wikitables" / "sample_table.tsv", question_tokens)
     world = WikiTablesWorld(table_kg)
     sempre_form = "(fb:cell.cell.number (number 0.200))"
     expression = world.parse_logical_form(sempre_form)
     f = types.name_mapper.get_alias
     assert str(expression) == f"{f('fb:cell.cell.number')}({f('number')}(num:0_200))"
Exemplo n.º 4
0
    def setUp(self):
        super().setUp()
        self.world_without_recursion = FakeWorldWithoutRecursion()
        self.world_with_recursion = FakeWorldWithRecursion()

        question_tokens = [Token(x) for x in ['what', 'was', 'the', 'last', 'year', '2004', '?']]
        table_file = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'sample_table.tsv'
        table_kg = TableQuestionKnowledgeGraph.read_from_file(table_file, question_tokens)
        self.wikitables_world = WikiTablesWorld(table_kg)
Exemplo n.º 5
0
 def test_world_parses_logical_forms_with_decimals(self):
     question_tokens = [Token(x) for x in [u'0.2']]
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.FIXTURES_ROOT / u"data" / u"wikitables" / u"sample_table.tsv",
         question_tokens)
     world = WikiTablesWorld(table_kg)
     sempre_form = u"(fb:cell.cell.number (number 0.200))"
     expression = world.parse_logical_form(sempre_form)
     assert unicode(expression) == u"I1(I(num:0_200))"
Exemplo n.º 6
0
 def setUp(self):
     super(TestWikiTablesWorld, self).setUp()
     question_tokens = [
         Token(x) for x in
         [u'what', u'was', u'the', u'last', u'year', u'2000', u'?']
     ]
     self.table_file = self.FIXTURES_ROOT / u'data' / u'wikitables' / u'sample_table.tsv'
     self.table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.table_file, question_tokens)
     self.world = WikiTablesWorld(self.table_kg)
Exemplo n.º 7
0
 def setUp(self):
     super().setUp()
     question_tokens = [
         Token(x)
         for x in ['what', 'was', 'the', 'last', 'year', '2000', '?']
     ]
     self.table_file = 'tests/fixtures/data/wikitables/sample_table.tsv'
     self.table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.table_file, question_tokens)
     self.world = WikiTablesWorld(self.table_kg)
Exemplo n.º 8
0
 def test_world_parses_logical_forms_with_decimals(self):
     question_tokens = [Token(x) for x in ['0.2']]
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.FIXTURES_ROOT / "data" / "wikitables" / "sample_table.tsv",
         question_tokens)
     world = WikiTablesWorld(table_kg)
     sempre_form = "(fb:cell.cell.number (number 0.200))"
     expression = world.parse_logical_form(sempre_form)
     f = types.name_mapper.get_alias
     assert str(expression
                ) == f"{f('fb:cell.cell.number')}({f('number')}(num:0_200))"
Exemplo n.º 9
0
    def setUp(self):
        super(TestWorld, self).setUp()
        self.world_without_recursion = FakeWorldWithoutRecursion()
        self.world_with_recursion = FakeWorldWithRecursion()

        test_filename = self.FIXTURES_ROOT / u"data" / u"nlvr" / u"sample_ungrouped_data.jsonl"
        data = [json.loads(line)[u"structured_rep"] for line in open(test_filename).readlines()]
        self.nlvr_world = NlvrWorld(data[0])

        question_tokens = [Token(x) for x in [u'what', u'was', u'the', u'last', u'year', u'2004', u'?']]
        table_file = self.FIXTURES_ROOT / u'data' / u'wikitables' / u'sample_table.tsv'
        table_kg = TableQuestionKnowledgeGraph.read_from_file(table_file, question_tokens)
        self.wikitables_world = WikiTablesWorld(table_kg)
Exemplo n.º 10
0
    def setUp(self):
        super().setUp()
        self.world_without_recursion = FakeWorldWithoutRecursion()
        self.world_with_recursion = FakeWorldWithRecursion()

        test_filename = self.FIXTURES_ROOT / "data" / "nlvr" / "sample_ungrouped_data.jsonl"
        data = [json.loads(line)["structured_rep"] for line in open(test_filename).readlines()]
        self.nlvr_world = NlvrWorld(data[0])

        question_tokens = [Token(x) for x in ['what', 'was', 'the', 'last', 'year', '2004', '?']]
        table_file = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'sample_table.tsv'
        table_kg = TableQuestionKnowledgeGraph.read_from_file(table_file, question_tokens)
        self.wikitables_world = WikiTablesWorld(table_kg)
Exemplo n.º 11
0
 def test_with_deeply_nested_logical_form(self):
     question_tokens = [Token(x) for x in ['what', 'was', 'the', 'district', '?']]
     table_filename = 'tests/fixtures/data/wikitables/table/109.tsv'
     table_kg = TableQuestionKnowledgeGraph.read_from_file(table_filename, question_tokens)
     world = WikiTablesWorld(table_kg)
     logical_form = ("(count ((reverse fb:cell.cell.number) (or (or (or (or (or (or (or (or "
                     "(or (or (or (or (or (or (or (or (or (or (or (or (or fb:cell.virginia_1 "
                     "fb:cell.virginia_10) fb:cell.virginia_11) fb:cell.virginia_12) "
                     "fb:cell.virginia_13) fb:cell.virginia_14) fb:cell.virginia_15) "
                     "fb:cell.virginia_16) fb:cell.virginia_17) fb:cell.virginia_18) "
                     "fb:cell.virginia_19) fb:cell.virginia_2) fb:cell.virginia_20) "
                     "fb:cell.virginia_21) fb:cell.virginia_22) fb:cell.virginia_3) "
                     "fb:cell.virginia_4) fb:cell.virginia_5) fb:cell.virginia_6) "
                     "fb:cell.virginia_7) fb:cell.virginia_8) fb:cell.virginia_9)))")
     print("Parsing...")
     world.parse_logical_form(logical_form)
Exemplo n.º 12
0
 def test_with_deeply_nested_logical_form(self):
     question_tokens = [Token(x) for x in ['what', 'was', 'the', 'district', '?']]
     table_filename = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'table' / '109.tsv'
     table_kg = TableQuestionKnowledgeGraph.read_from_file(table_filename, question_tokens)
     world = WikiTablesWorld(table_kg)
     logical_form = ("(count ((reverse fb:cell.cell.number) (or (or (or (or (or (or (or (or "
                     "(or (or (or (or (or (or (or (or (or (or (or (or (or fb:cell.virginia_1 "
                     "fb:cell.virginia_10) fb:cell.virginia_11) fb:cell.virginia_12) "
                     "fb:cell.virginia_13) fb:cell.virginia_14) fb:cell.virginia_15) "
                     "fb:cell.virginia_16) fb:cell.virginia_17) fb:cell.virginia_18) "
                     "fb:cell.virginia_19) fb:cell.virginia_2) fb:cell.virginia_20) "
                     "fb:cell.virginia_21) fb:cell.virginia_22) fb:cell.virginia_3) "
                     "fb:cell.virginia_4) fb:cell.virginia_5) fb:cell.virginia_6) "
                     "fb:cell.virginia_7) fb:cell.virginia_8) fb:cell.virginia_9)))")
     print("Parsing...")
     world.parse_logical_form(logical_form)
Exemplo n.º 13
0
    def test_world_adds_numbers_from_question(self):
        question_tokens = [Token(x) for x in ['what', '2007', '2,107', '0.2', '1800s', '1950s', '?']]
        table_kg = TableQuestionKnowledgeGraph.read_from_file("tests/fixtures/data/wikitables/sample_table.tsv",
                                                              question_tokens)
        world = WikiTablesWorld(table_kg)
        valid_actions = world.get_valid_actions()
        assert 'n -> 2007' in valid_actions['n']
        assert 'n -> 2107' in valid_actions['n']

        # It appears that sempre normalizes floating point numbers.
        assert 'n -> 0.200' in valid_actions['n']

        # We want to add the end-points to things like "1800s": 1800 and 1900.
        assert 'n -> 1800' in valid_actions['n']
        assert 'n -> 1900' in valid_actions['n']
        assert 'n -> 1950' in valid_actions['n']
        assert 'n -> 1960' in valid_actions['n']
Exemplo n.º 14
0
    def test_world_adds_numbers_from_question(self):
        question_tokens = [Token(x) for x in ['what', '2007', '2,107', '0.2', '1800s', '1950s', '?']]
        table_kg = TableQuestionKnowledgeGraph.read_from_file(
                self.FIXTURES_ROOT / "data" / "wikitables" / "sample_table.tsv", question_tokens)
        world = WikiTablesWorld(table_kg)
        valid_actions = world.get_valid_actions()
        assert 'n -> 2007' in valid_actions['n']
        assert 'n -> 2107' in valid_actions['n']

        # It appears that sempre normalizes floating point numbers.
        assert 'n -> 0.200' in valid_actions['n']

        # We want to add the end-points to things like "1800s": 1800 and 1900.
        assert 'n -> 1800' in valid_actions['n']
        assert 'n -> 1900' in valid_actions['n']
        assert 'n -> 1950' in valid_actions['n']
        assert 'n -> 1960' in valid_actions['n']
Exemplo n.º 15
0
    def setUp(self):
        super().setUp()
        self.world_without_recursion = FakeWorldWithoutRecursion()
        self.world_with_recursion = FakeWorldWithRecursion()

        test_filename = "tests/fixtures/data/nlvr/sample_ungrouped_data.jsonl"
        data = [
            json.loads(line)["structured_rep"]
            for line in open(test_filename).readlines()
        ]
        self.nlvr_world = NlvrWorld(data[0])

        question_tokens = [
            Token(x)
            for x in ['what', 'was', 'the', 'last', 'year', '2004', '?']
        ]
        table_file = 'tests/fixtures/data/wikitables/sample_table.tsv'
        table_kg = TableQuestionKnowledgeGraph.read_from_file(
            table_file, question_tokens)
        self.wikitables_world = WikiTablesWorld(table_kg)
Exemplo n.º 16
0
 def test_with_deeply_nested_logical_form(self):
     question_tokens = [
         Token(x) for x in [u'what', u'was', u'the', u'district', u'?']
     ]
     table_filename = self.FIXTURES_ROOT / u'data' / u'wikitables' / u'table' / u'109.tsv'
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         table_filename, question_tokens)
     world = WikiTablesWorld(table_kg)
     logical_form = (
         u"(count ((reverse fb:cell.cell.number) (or (or (or (or (or (or (or (or "
         u"(or (or (or (or (or (or (or (or (or (or (or (or (or fb:cell.virginia_1 "
         u"fb:cell.virginia_10) fb:cell.virginia_11) fb:cell.virginia_12) "
         u"fb:cell.virginia_13) fb:cell.virginia_14) fb:cell.virginia_15) "
         u"fb:cell.virginia_16) fb:cell.virginia_17) fb:cell.virginia_18) "
         u"fb:cell.virginia_19) fb:cell.virginia_2) fb:cell.virginia_20) "
         u"fb:cell.virginia_21) fb:cell.virginia_22) fb:cell.virginia_3) "
         u"fb:cell.virginia_4) fb:cell.virginia_5) fb:cell.virginia_6) "
         u"fb:cell.virginia_7) fb:cell.virginia_8) fb:cell.virginia_9)))")
     print(u"Parsing...")
     world.parse_logical_form(logical_form)
Exemplo n.º 17
0
    def text_to_instance(
            self,  # type: ignore
            question: str,
            table_info: Union[str, JsonDict],
            example_lisp_string: str = None,
            dpd_output: List[str] = None,
            tokenized_question: List[Token] = None) -> Instance:
        """
        Reads text inputs and makes an instance. WikitableQuestions dataset provides tables as TSV
        files, which we use for training. For running a demo, we may want to provide tables in a
        JSON format. To make this method compatible with both, we take ``table_info``, which can
        either be a filename, or a dict. We check the argument's type and call the appropriate
        method in ``TableQuestionKnowledgeGraph``.

        Parameters
        ----------
        question : ``str``
            Input question
        table_info : ``str`` or ``JsonDict``
            Table filename or the table content itself, as a dict. See
            ``TableQuestionKnowledgeGraph.read_from_json`` for the expected format.
        example_lisp_string : ``str``, optional
            The original (lisp-formatted) example string in the WikiTableQuestions dataset.  This
            comes directly from the ``.examples`` file provided with the dataset.  We pass this to
            SEMPRE for evaluating logical forms during training.  It isn't otherwise used for
            anything.
        dpd_output : List[str], optional
            List of logical forms, produced by dynamic programming on denotations. Not required
            during test.
        tokenized_question : ``List[Token]``, optional
            If you have already tokenized the question, you can pass that in here, so we don't
            duplicate that work.  You might, for example, do batch processing on the questions in
            the whole dataset, then pass the result in here.
        """
        # pylint: disable=arguments-differ
        tokenized_question = tokenized_question or self._tokenizer.tokenize(
            question.lower())
        question_field = TextField(tokenized_question,
                                   self._question_token_indexers)
        if isinstance(table_info, str):
            table_knowledge_graph = TableQuestionKnowledgeGraph.read_from_file(
                table_info, tokenized_question)
            table_metadata = MetadataField(open(table_info).readlines())
        else:
            table_knowledge_graph = TableQuestionKnowledgeGraph.read_from_json(
                table_info)
            table_metadata = MetadataField(table_info)
        table_field = KnowledgeGraphField(
            table_knowledge_graph,
            tokenized_question,
            self._table_token_indexers,
            tokenizer=self._tokenizer,
            feature_extractors=self._linking_feature_extractors,
            include_in_vocab=self._use_table_for_vocab,
            max_table_tokens=self._max_table_tokens)
        world = WikiTablesWorld(table_knowledge_graph)
        world_field = MetadataField(world)

        production_rule_fields: List[Field] = []
        for production_rule in world.all_possible_actions():
            _, rule_right_side = production_rule.split(' -> ')
            is_global_rule = not world.is_table_entity(rule_right_side)
            field = ProductionRuleField(production_rule, is_global_rule)
            production_rule_fields.append(field)
        action_field = ListField(production_rule_fields)

        fields = {
            'question': question_field,
            'table': table_field,
            'world': world_field,
            'actions': action_field
        }
        if self._include_table_metadata:
            fields['table_metadata'] = table_metadata
        if example_lisp_string:
            fields['example_lisp_string'] = MetadataField(example_lisp_string)

        # We'll make each target action sequence a List[IndexField], where the index is into
        # the action list we made above.  We need to ignore the type here because mypy doesn't
        # like `action.rule` - it's hard to tell mypy that the ListField is made up of
        # ProductionRuleFields.
        action_map = {
            action.rule: i
            for i, action in enumerate(action_field.field_list)
        }  # type: ignore
        if dpd_output:
            action_sequence_fields: List[Field] = []
            for logical_form in dpd_output:
                if not self._should_keep_logical_form(logical_form):
                    logger.debug(f'Question was: {question}')
                    logger.debug(f'Table info was: {table_info}')
                    continue
                try:
                    expression = world.parse_logical_form(logical_form)
                except ParsingError as error:
                    logger.debug(
                        f'Parsing error: {error.message}, skipping logical form'
                    )
                    logger.debug(f'Question was: {question}')
                    logger.debug(f'Logical form was: {logical_form}')
                    logger.debug(f'Table info was: {table_info}')
                    continue
                except:
                    logger.error(logical_form)
                    raise
                action_sequence = world.get_action_sequence(expression)
                try:
                    index_fields: List[Field] = []
                    for production_rule in action_sequence:
                        index_fields.append(
                            IndexField(action_map[production_rule],
                                       action_field))
                    action_sequence_fields.append(ListField(index_fields))
                except KeyError as error:
                    logger.debug(
                        f'Missing production rule: {error.args}, skipping logical form'
                    )
                    logger.debug(f'Question was: {question}')
                    logger.debug(f'Table info was: {table_info}')
                    logger.debug(f'Logical form was: {logical_form}')
                    continue
                if len(action_sequence_fields) >= self._max_dpd_logical_forms:
                    break

            if not action_sequence_fields:
                # This is not great, but we're only doing it when we're passed logical form
                # supervision, so we're expecting labeled logical forms, but we can't actually
                # produce the logical forms.  We should skip this instance.  Note that this affects
                # _dev_ and _test_ instances, too, so your metrics could be over-estimates on the
                # full test data.
                return None
            fields['target_action_sequences'] = ListField(
                action_sequence_fields)
        if self._output_agendas:
            agenda_index_fields: List[Field] = []
            for agenda_string in world.get_agenda():
                agenda_index_fields.append(
                    IndexField(action_map[agenda_string], action_field))
            if not agenda_index_fields:
                agenda_index_fields = [IndexField(-1, action_field)]
            fields['agenda'] = ListField(agenda_index_fields)
        return Instance(fields)
Exemplo n.º 18
0
 def _get_world_with_question_tokens(self, tokens):
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.table_file, tokens)
     world = WikiTablesWorld(table_kg)
     return world
Exemplo n.º 19
0
 def setUp(self):
     super().setUp()
     question_tokens = [Token(x) for x in ['what', 'was', 'the', 'last', 'year', '2000', '?']]
     self.table_file = self.FIXTURES_ROOT / 'data' / 'wikitables' / 'sample_table.tsv'
     self.table_kg = TableQuestionKnowledgeGraph.read_from_file(self.table_file, question_tokens)
     self.world = WikiTablesWorld(self.table_kg)
Exemplo n.º 20
0
 def _get_world_with_question_tokens(self, tokens: List[Token]) -> WikiTablesWorld:
     table_kg = TableQuestionKnowledgeGraph.read_from_file(self.table_file, tokens)
     world = WikiTablesWorld(table_kg)
     return world
 def _get_world_with_question_tokens(
         self, tokens: List[Token]) -> WikiTablesVariableFreeWorld:
     table_kg = TableQuestionKnowledgeGraph.read_from_file(
         self.table_file, tokens)
     world = WikiTablesVariableFreeWorld(table_kg)
     return world