Пример #1
0
 def roman_test(self, id_token):
     """
     Determine whether the root ID of a potential multi_ID paragraph
     is a roman numeral increment surfing levels 3 or 6
     (the roman levels of a-1-i-A-1-i)
     """
     roman_int = roman_to_int(id_token)
     if not roman_int:
         return False
     if self.level() not in [3, 6]:
         return False
     if roman_int - 1 == roman_to_int(self.current_token()):
         return True
Пример #2
0
    def multiple_id_test(self, ids):
        """
        Decide, based on a paragraph's first two IDS,
        whether to proceed with multi-ID processing.

        Allowed multi-ID patterns are:
          (lowercase)(1) - and the lowercase cannot be a roman increment
          (digit)(i)
          (roman)(A)
          (uppercase)(1)
        """
        if len(ids) < 2:
            return
        root_token = ids[0]
        # levels 1 or 4
        if (root_token.isalpha() and len(root_token) < 3
                and not self.roman_test(root_token) and ids[1] == '1'):
            good_ids = 2
            if len(ids) == 3 and ids[2] == 'i':
                good_ids = 3
            return ids[:good_ids]
        # levels 2 or 5
        if root_token.isdigit() and ids[1] == 'i':
            good_ids = 2
            if len(ids) == 3 and ids[2] == 'A' and self.level() != 5:
                good_ids = 3
            return ids[:good_ids]
        # level 3
        if roman_to_int(root_token) and ids[1] == 'A':
            good_ids = 2
            if len(ids) == 3 and ids[2] == '1':
                good_ids = 3
            return ids[:good_ids]
Пример #3
0
 def token_validity_test(self, token):
     "Make sure a singleton token is some kind of valid ID."
     if (token.isdigit() or roman_to_int(token)
             or (token.isalpha() and len(token) == 1) or
         (token.isalpha() and len(token) == 2 and token[0] == token[1])):
         return True
     else:
         return False
Пример #4
0
    def roman_surf_test(self, token, next_token):
        """
        Determine whether a Roman token is the next logical Roman token.

        This test is for Roman levels 3 or 6, and checks whether the next token
        is both a Roman numeral and the next bigger Roman numeral.

        For instance 'v' is a valid Roman numeral. But if the the current
        Roman numeral doesn't evaluate to 4, the 'v' must be a level-1 marker.

        Some ambiguity can remain, when the next token is both the next valid
        Roman numeral and the next valid level-1 marker. This happens most
        often when the level-1 marker is "h." The parser defaults to diving
        in this case, which will be wrong sometimes.
        """
        if not token:
            return False
        for each in [token, next_token]:
            if not roman_to_int(each):
                return False
        return roman_to_int(next_token) == roman_to_int(token) + 1
 def test_int_to_roman_invalid_sequence(self):
     self.assertIs(roman_to_int('ic'), None)
 def test_int_to_roman_invalid_type(self):
     self.assertIs(roman_to_int(1), None)
 def test_roman_to_int(self):
     for token in self.tokens:
         self.assertEqual(roman_to_int(token), self.tokens[token])
Пример #8
0
 def next_id(self):
     """The standard section indentation pattern: a-1-i-A-1-i."""
     _next = self.next_token
     if self.level() == 1:  # lowercase-alpha level
         if not self.current_id:
             self.current_id = _next
         if _next == '1':
             return self.dive()
         else:
             return self.surf()
     if self.level() == 2:  # digit level: a-1
         if _next.isdigit():
             return self.surf()
         elif _next == 'i':
             return self.dive()
         else:
             return self.rise(1)
     if self.level() == 3:  # roman level: a-1-i
         if _next == 'A':
             return self.dive()
         if self.roman_surf_test(self.current_token(), _next):
             return self.surf()
         elif _next.isdigit():
             return self.rise(1)
         else:
             return self.rise(2)
     if self.level() == 4:  # alpha-upper level: a-1-i-A
         if _next == '1':
             return self.dive()
         elif _next.isupper():
             return self.surf()
         elif self.roman_surf_test(self.current_id.split('-')[-2], _next):
             return self.rise(1)
         elif _next.isdigit():
             return self.rise(2)
         else:
             return self.rise(3)
     if self.level() == 5:  # 2nd digit level: a-1-i-A-1
         token_int = int(self.current_token())
         if _next == 'i':
             return self.dive()
         elif (_next.isdigit() and int(_next) == token_int + 1):
             return self.surf()
         elif _next.isupper():
             return self.rise(1)
         elif (roman_to_int(_next)):
             return self.rise(2)
         elif _next.isdigit():
             return self.rise(3)
         else:
             return self.rise(4)
     if self.level() == 6:  # 2nd roman level: 'a-1-i-A-1-i'
         previous_token = self.current_id.split('-')[-2]
         if previous_token.isdigit():
             previous_digit = int(previous_token)
         else:
             previous_digit = None
         if self.roman_surf_test(self.current_token(), _next):
             return self.surf()
         elif (previous_digit and _next.isdigit()
               and int(_next) == previous_digit + 1):
             return self.rise(1)
         elif _next.isupper():
             return self.rise(2)
         elif roman_to_int(_next):
             return self.rise(3)
         elif _next.isdigit():
             return self.rise(4)
         else:
             return self.rise(5)