def roman_test(self, id_token): """ Determine whether the root ID of a potential multi_ID paragraph is a roman numeral increment surfing levels 3 or 6 (the roman levels of a-1-i-A-1-i) """ roman_int = roman_to_int(id_token) if not roman_int: return False if self.level() not in [3, 6]: return False if roman_int - 1 == roman_to_int(self.current_token()): return True
def multiple_id_test(self, ids): """ Decide, based on a paragraph's first two IDS, whether to proceed with multi-ID processing. Allowed multi-ID patterns are: (lowercase)(1) - and the lowercase cannot be a roman increment (digit)(i) (roman)(A) (uppercase)(1) """ if len(ids) < 2: return root_token = ids[0] # levels 1 or 4 if (root_token.isalpha() and len(root_token) < 3 and not self.roman_test(root_token) and ids[1] == '1'): good_ids = 2 if len(ids) == 3 and ids[2] == 'i': good_ids = 3 return ids[:good_ids] # levels 2 or 5 if root_token.isdigit() and ids[1] == 'i': good_ids = 2 if len(ids) == 3 and ids[2] == 'A' and self.level() != 5: good_ids = 3 return ids[:good_ids] # level 3 if roman_to_int(root_token) and ids[1] == 'A': good_ids = 2 if len(ids) == 3 and ids[2] == '1': good_ids = 3 return ids[:good_ids]
def token_validity_test(self, token): "Make sure a singleton token is some kind of valid ID." if (token.isdigit() or roman_to_int(token) or (token.isalpha() and len(token) == 1) or (token.isalpha() and len(token) == 2 and token[0] == token[1])): return True else: return False
def roman_surf_test(self, token, next_token): """ Determine whether a Roman token is the next logical Roman token. This test is for Roman levels 3 or 6, and checks whether the next token is both a Roman numeral and the next bigger Roman numeral. For instance 'v' is a valid Roman numeral. But if the the current Roman numeral doesn't evaluate to 4, the 'v' must be a level-1 marker. Some ambiguity can remain, when the next token is both the next valid Roman numeral and the next valid level-1 marker. This happens most often when the level-1 marker is "h." The parser defaults to diving in this case, which will be wrong sometimes. """ if not token: return False for each in [token, next_token]: if not roman_to_int(each): return False return roman_to_int(next_token) == roman_to_int(token) + 1
def test_int_to_roman_invalid_sequence(self): self.assertIs(roman_to_int('ic'), None)
def test_int_to_roman_invalid_type(self): self.assertIs(roman_to_int(1), None)
def test_roman_to_int(self): for token in self.tokens: self.assertEqual(roman_to_int(token), self.tokens[token])
def next_id(self): """The standard section indentation pattern: a-1-i-A-1-i.""" _next = self.next_token if self.level() == 1: # lowercase-alpha level if not self.current_id: self.current_id = _next if _next == '1': return self.dive() else: return self.surf() if self.level() == 2: # digit level: a-1 if _next.isdigit(): return self.surf() elif _next == 'i': return self.dive() else: return self.rise(1) if self.level() == 3: # roman level: a-1-i if _next == 'A': return self.dive() if self.roman_surf_test(self.current_token(), _next): return self.surf() elif _next.isdigit(): return self.rise(1) else: return self.rise(2) if self.level() == 4: # alpha-upper level: a-1-i-A if _next == '1': return self.dive() elif _next.isupper(): return self.surf() elif self.roman_surf_test(self.current_id.split('-')[-2], _next): return self.rise(1) elif _next.isdigit(): return self.rise(2) else: return self.rise(3) if self.level() == 5: # 2nd digit level: a-1-i-A-1 token_int = int(self.current_token()) if _next == 'i': return self.dive() elif (_next.isdigit() and int(_next) == token_int + 1): return self.surf() elif _next.isupper(): return self.rise(1) elif (roman_to_int(_next)): return self.rise(2) elif _next.isdigit(): return self.rise(3) else: return self.rise(4) if self.level() == 6: # 2nd roman level: 'a-1-i-A-1-i' previous_token = self.current_id.split('-')[-2] if previous_token.isdigit(): previous_digit = int(previous_token) else: previous_digit = None if self.roman_surf_test(self.current_token(), _next): return self.surf() elif (previous_digit and _next.isdigit() and int(_next) == previous_digit + 1): return self.rise(1) elif _next.isupper(): return self.rise(2) elif roman_to_int(_next): return self.rise(3) elif _next.isdigit(): return self.rise(4) else: return self.rise(5)