Пример #1
0
    def test_read_from_json_handles_parentheses_correctly(self):
        json = {
            'question': [],
            'columns': ['Urban settlements'],
            'cells': [['Dzhebariki-Khaya\\n(Джебарики-Хая)'],
                      ['South Korea (KOR)'], ['Area (km²)']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.urban_settlements'])
        assert neighbors == {
            'fb:cell.dzhebariki_khaya', 'fb:cell.south_korea_kor',
            'fb:cell.area_km'
        }

        json = {
            'question': [],
            'columns': ['Margin\\nof victory'],
            'cells': [['−9 (67-67-68-69=271)']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.margin_of_victory'])
        assert neighbors == {'fb:cell._9_67_67_68_69_271'}

        json = {
            'question': [],
            'columns': ['Record'],
            'cells': [['4.08 m (13 ft 41⁄2 in)']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.record'])
        assert neighbors == {'fb:cell.4_08_m_13_ft_41_2_in'}
Пример #2
0
    def test_read_from_json_handles_simple_cases(self):
        json = {
            'question': [Token(x) for x in ['where', 'is', 'mersin', '?']],
            'columns': ['Name in English', 'Location'],
            'cells': [['Paradeniz', 'Mersin'], ['Lake Gala', 'Edirne']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:cell.mersin'])
        assert graph.entities == [
            '-1', '0', '1', 'fb:cell.edirne', 'fb:cell.lake_gala',
            'fb:cell.mersin', 'fb:cell.paradeniz', 'fb:row.row.location',
            'fb:row.row.name_in_english'
        ]
        assert neighbors == {'fb:row.row.location'}
        neighbors = set(graph.neighbors['fb:row.row.name_in_english'])
        assert neighbors == {'fb:cell.paradeniz', 'fb:cell.lake_gala'}
        assert graph.entity_text['fb:cell.edirne'] == 'Edirne'
        assert graph.entity_text['fb:cell.lake_gala'] == 'Lake Gala'
        assert graph.entity_text['fb:cell.mersin'] == 'Mersin'
        assert graph.entity_text['fb:cell.paradeniz'] == 'Paradeniz'
        assert graph.entity_text['fb:row.row.location'] == 'Location'
        assert graph.entity_text[
            'fb:row.row.name_in_english'] == 'Name in English'

        # These are default numbers that should always be in the graph.
        assert graph.neighbors['-1'] == []
        assert graph.neighbors['0'] == []
        assert graph.neighbors['1'] == []
        assert graph.entity_text['-1'] == '-1'
        assert graph.entity_text['0'] == '0'
        assert graph.entity_text['1'] == '1'
Пример #3
0
    def test_read_from_json_handles_crazy_unicode(self):
        json = {
            'question': [],
            'columns': ['Town'],
            'cells': [['Viðareiði'], ['Funningsfjørður'], ['Froðba']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.town'])
        assert neighbors == {
            'fb:cell.funningsfj_r_ur',
            'fb:cell.vi_arei_i',
            'fb:cell.fro_ba',
        }

        json = {
            'question': [],
            'columns': ['Fate'],
            'cells': [['Sunk at 45°00′N 11°21′W / 45.000°N 11.350°W'],
                      ['66°22′32″N 29°20′19″E / 66.37556°N 29.33861°E']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.fate'])
        assert neighbors == {
            'fb:cell.sunk_at_45_00_n_11_21_w_45_000_n_11_350_w',
            'fb:cell.66_22_32_n_29_20_19_e_66_37556_n_29_33861_e'
        }

        json = {
            'question': [],
            'columns': ['€0.01', 'Σ Points'],
            'cells': [['6,000', '9.5']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row._0_01'])
        assert neighbors == {'fb:cell.6_000'}
        neighbors = set(graph.neighbors['fb:row.row._points'])
        assert neighbors == {'fb:cell.9_5'}

        json = {
            'question': [],
            'columns': ['Division'],
            'cells': [['1ª Aut. Pref.']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.division'])
        assert neighbors == {'fb:cell.1_aut_pref'}
Пример #4
0
 def test_get_linked_agenda_items(self):
     json = {
         'question': [Token(x) for x in ['where', 'is', 'mersin', '?']],
         'columns': ['Name in English', 'Location'],
         'cells': [['Paradeniz', 'Mersin'], ['Lake Gala', 'Edirne']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     assert graph.get_linked_agenda_items() == [
         'fb:cell.mersin', 'fb:row.row.location'
     ]
Пример #5
0
 def test_get_longest_span_matching_entities(self):
     json = {
         'question':
         [Token(x) for x in ['where', 'is', 'lake', 'big', 'gala', '?']],
         'columns': ['Name in English', 'Location'],
         'cells': [['Paradeniz', 'Lake Big'], ['Lake Big Gala', 'Edirne']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     assert graph._get_longest_span_matching_entities() == [
         'fb:cell.lake_big_gala'
     ]
Пример #6
0
 def test_read_from_json_handles_diacritics_and_newlines(self):
     json = {
         'question': [],
         'columns': ['Notes'],
         'cells': [['8 districts\nFormed from Orūzgān Province in 2004']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     neighbors = set(graph.neighbors['fb:row.row.notes'])
     assert neighbors == {
         'fb:cell.8_districts_formed_from_oruzgan_province_in_2004'
     }
Пример #7
0
 def test_read_from_json_handles_numbers_in_question(self):
     # The TSV file we use has newlines converted to "\n", not actual escape characters.  We
     # need to be sure we catch this.
     json = {
         'question': [Token(x) for x in ['one', '4']],
         'columns': [],
         'cells': []
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     assert graph.neighbors['1'] == []
     assert graph.neighbors['4'] == []
     assert graph.entity_text['1'] == 'one'
     assert graph.entity_text['4'] == '4'
Пример #8
0
    def test_read_from_json_handles_diacritics(self):
        json = {
            'question': [],
            'columns': ['Name in English', 'Name in Turkish', 'Location'],
            'cells': [['Lake Van', 'Van Gölü', 'Mersin'],
                      ['Lake Gala', 'Gala Gölü', 'Edirne']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.name_in_turkish'])
        assert neighbors == {'fb:cell.van_golu', 'fb:cell.gala_golu'}

        json = {
            'question': [],
            'columns': ['Notes'],
            'cells': [['Ordained as a priest at\nReșița on March, 29th 1936']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.notes'])
        assert neighbors == {
            'fb:cell.ordained_as_a_priest_at_resita_on_march_29th_1936'
        }

        json = {
            'question': [],
            'columns': ['Player'],
            'cells': [['Mateja Kežman']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.player'])
        assert neighbors == {'fb:cell.mateja_kezman'}

        json = {
            'question': [],
            'columns': ['Venue'],
            'cells': [['Arena Națională, Bucharest, Romania']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.venue'])
        assert neighbors == {'fb:cell.arena_nationala_bucharest_romania'}
Пример #9
0
    def test_read_from_json_handles_cells_with_duplicate_normalizations(self):
        json = {
            'question': [],
            'columns': ['answer'],
            'cells': [['yes'], ['yes*'], ['yes'], ['yes '], ['yes*']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)

        # There are three unique text strings that all normalize to "yes", so there are three
        # fb:cell.yes entities.  Hopefully we produce them in the same order as SEMPRE does...
        assert graph.entities == [
            '-1', '0', '1', 'fb:cell.yes', 'fb:cell.yes_2', 'fb:cell.yes_3',
            'fb:row.row.answer'
        ]
Пример #10
0
 def test_read_from_json_handles_columns_with_duplicate_normalizations(
         self):
     json = {
         'question': [],
         'columns': ['# of votes', '% of votes'],
         'cells': [['1', '2'], ['3', '4']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     neighbors = set(graph.neighbors['fb:row.row._of_votes'])
     assert neighbors == {'fb:cell.1', 'fb:cell.3'}
     neighbors = set(graph.neighbors['fb:row.row._of_votes_2'])
     assert neighbors == {'fb:cell.2', 'fb:cell.4'}
     neighbors = set(graph.neighbors['fb:cell.1'])
     assert neighbors == {'fb:row.row._of_votes'}
Пример #11
0
    def test_read_from_json_handles_newlines_in_columns(self):
        # The TSV file we use has newlines converted to "\n", not actual escape characters.  We
        # need to be sure we catch this.
        json = {
            'question': [],
            'columns': ['Peak\\nAUS', 'Peak\\nNZ'],
            'cells': [['1', '2'], ['3', '4']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.peak_aus'])
        assert neighbors == {'fb:cell.1', 'fb:cell.3'}
        neighbors = set(graph.neighbors['fb:row.row.peak_nz'])
        assert neighbors == {'fb:cell.2', 'fb:cell.4'}
        neighbors = set(graph.neighbors['fb:cell.1'])
        assert neighbors == {'fb:row.row.peak_aus'}

        json = {
            'question': [],
            'columns': ['Title'],
            'cells': [['Dance of the\\nSeven Veils']]
        }
        graph = TableQuestionKnowledgeGraph.read_from_json(json)
        neighbors = set(graph.neighbors['fb:row.row.title'])
        assert neighbors == {'fb:cell.dance_of_the_seven_veils'}
Пример #12
0
 def test_read_from_json_splits_columns_when_necessary(self):
     json = {
         'question': [Token(x) for x in ['where', 'is', 'mersin', '?']],
         'columns': ['Name in English', 'Location'],
         'cells': [['Paradeniz', 'Mersin with spaces'],
                   ['Lake, Gala', 'Edirne']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     assert graph.entities == [
         '-1', '0', '1', 'fb:cell.edirne', 'fb:cell.lake_gala',
         'fb:cell.mersin_with_spaces', 'fb:cell.paradeniz', 'fb:part.gala',
         'fb:part.lake', 'fb:part.paradeniz', 'fb:row.row.location',
         'fb:row.row.name_in_english'
     ]
     assert graph.neighbors['fb:part.lake'] == []
     assert graph.neighbors['fb:part.gala'] == []
     assert graph.neighbors['fb:part.paradeniz'] == []
Пример #13
0
 def test_read_from_json_replaces_newlines(self):
     # The csv -> tsv conversion renders '\n' as r'\n' (with a literal slash character), that
     # gets read in a two characters instead of one.  We need to make sure we convert it back to
     # one newline character, so our splitting and other processing works correctly.
     json = {
         'question': [Token(x) for x in ['where', 'is', 'mersin', '?']],
         'columns': ['Name\\nin English', 'Location'],
         'cells': [['Paradeniz', 'Mersin'], ['Lake\\nGala', 'Edirne']]
     }
     graph = TableQuestionKnowledgeGraph.read_from_json(json)
     assert graph.entities == [
         '-1', '0', '1', 'fb:cell.edirne', 'fb:cell.lake_gala',
         'fb:cell.mersin', 'fb:cell.paradeniz', 'fb:part.gala',
         'fb:part.lake', 'fb:part.paradeniz', 'fb:row.row.location',
         'fb:row.row.name_in_english'
     ]
     assert graph.entity_text[
         'fb:row.row.name_in_english'] == 'Name\nin English'
Пример #14
0
    def setUp(self):
        self.tokenizer = WordTokenizer(SpacyWordSplitter(pos_tags=True))
        self.utterance = self.tokenizer.tokenize("where is mersin?")
        self.token_indexers = {"tokens": SingleIdTokenIndexer("tokens")}

        json = {
            'question': self.utterance,
            'columns': ['Name in English', 'Location in English'],
            'cells': [['Paradeniz', 'Mersin'], ['Lake Gala', 'Edirne']]
        }
        self.graph = TableQuestionKnowledgeGraph.read_from_json(json)
        self.vocab = Vocabulary()
        self.name_index = self.vocab.add_token_to_namespace("name",
                                                            namespace='tokens')
        self.in_index = self.vocab.add_token_to_namespace("in",
                                                          namespace='tokens')
        self.english_index = self.vocab.add_token_to_namespace(
            "english", namespace='tokens')
        self.location_index = self.vocab.add_token_to_namespace(
            "location", namespace='tokens')
        self.paradeniz_index = self.vocab.add_token_to_namespace(
            "paradeniz", namespace='tokens')
        self.mersin_index = self.vocab.add_token_to_namespace(
            "mersin", namespace='tokens')
        self.lake_index = self.vocab.add_token_to_namespace("lake",
                                                            namespace='tokens')
        self.gala_index = self.vocab.add_token_to_namespace("gala",
                                                            namespace='tokens')
        self.negative_one_index = self.vocab.add_token_to_namespace(
            "-1", namespace='tokens')
        self.zero_index = self.vocab.add_token_to_namespace("0",
                                                            namespace='tokens')
        self.one_index = self.vocab.add_token_to_namespace("1",
                                                           namespace='tokens')

        self.oov_index = self.vocab.get_token_index('random OOV string',
                                                    namespace='tokens')
        self.edirne_index = self.oov_index
        self.field = KnowledgeGraphField(self.graph, self.utterance,
                                         self.token_indexers, self.tokenizer)

        super(KnowledgeGraphFieldTest, self).setUp()