Пример #1
0
    def test_normalize_parse_tree(self):
        num_matches = 0
        for parse_tree, s, e in TEILiteParser.pRegion.scanString(self.tei_data1):
            num_matches = num_matches + 1
            nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList())
            self.assertTrue(nparse_tree['type'] == 'p')
            self.assertTrue(nparse_tree['id'] != None)
        self.assertEqual(num_matches, 6)

        num_matches = 0
        for parse_tree, s, e in TEILiteParser.headRegion.scanString(self.tei_data1):
            num_matches = num_matches + 1
            nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList())
            self.assertTrue(nparse_tree['type'] == 'head')
            self.assertTrue(nparse_tree['id'] != None)
        self.assertEqual(num_matches, 5)

        num_matches = 0
        for parse_tree, s, e in TEILiteParser.sssRegion.scanString(self.tei_data1):
            num_matches = num_matches + 1
            nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList())
            self.assertTrue(nparse_tree['type'] == 'subsubsection')
            self.assertTrue(nparse_tree['id'] != None)
        self.assertEqual(num_matches, 2)

        num_matches = 0
        for parse_tree, s, e in TEILiteParser.ssRegion.scanString(self.tei_data1):
            num_matches = num_matches + 1
            nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList())
            self.assertTrue(nparse_tree['type'] == 'subsection')
            self.assertTrue(nparse_tree['id'] != None)
        self.assertTrue(num_matches, 1)

        num_matches = 0
        for parse_tree, s, e in TEILiteParser.sRegion.scanString(self.tei_data1):
            num_matches = num_matches + 1
            nparse_tree = TEILiteParser.normalizeParseTree(parse_tree.asList())
            self.assertTrue(nparse_tree['type'] == 'section')
            self.assertTrue(nparse_tree['id'] != None)
        self.assertTrue(num_matches, 2)
Пример #2
0
    def test_get_region_id(self):
        # Write TEI tests for subsubsection, subsection, and section
        parser = TEILiteParser()
        expected_ids = {'subsubsection': ['1', '2'],
                         'subsection': ['1'],
                         'section': ['1', '9']}

        for region_type in expected_ids.keys():
            production = parser.getGrammarForUnit(region_type)

            ids = []
            match_forest = production.scanString(self.tei_data1)

            subtree_idx = 0
            for subtree, s, e in match_forest:
                new_id = parser.getRegionID(subtree, region_type, subtree_idx)
                ids.append(new_id)
                subtree_idx = subtree_idx + 1

            i = 0
            for id in ids:
                self.assertEqual(id, expected_ids[region_type][i])
                i = i + 1