def paragraph_with_marker(self, text, next_text=''): """The paragraph has an (a) or a. etc.""" marker, _ = initial_marker(text) n = Node(text, node_type=Node.APPENDIX, label=[marker]) if initial_marker(next_text): next_marker, _ = initial_marker(next_text) else: next_marker = None this_p_levels = set(idx for idx, lvl in enumerate(p_levels) if marker in lvl) next_p_levels = set(idx for idx, lvl in enumerate(p_levels) if next_marker in lvl) previous_levels = [l for l in self.m_stack.m_stack if l] previous_p_levels = set() for stack_level in previous_levels: previous_p_levels.update(sn.p_level for _, sn in stack_level if hasattr(sn, 'p_level')) # Ambiguity, e.g. 'i', 'v'. Disambiguate by looking forward if len(this_p_levels) > 1 and len(next_p_levels) == 1: next_p_level = next_p_levels.pop() # e.g. an 'i' followed by a 'ii' if next_p_level in this_p_levels: this_p_idx = p_levels[next_p_level].index(marker) next_p_idx = p_levels[next_p_level].index(next_marker) if this_p_idx < next_p_idx: # Heuristic n.p_level = next_p_level # e.g. (a)(1)(i) followed by an 'A' new_level = this_p_levels - previous_p_levels if next_p_level not in previous_p_levels and new_level: n.p_level = new_level.pop() # Ambiguity. Disambiguate by looking backwards if len(this_p_levels) > 1 and not hasattr(n, 'p_level'): for stack_level in previous_levels: for lvl, stack_node in stack_level: if getattr(stack_node, 'p_level', None) in this_p_levels: # Later levels replace earlier ones n.p_level = stack_node.p_level # Simple case (no ambiguity) and cases not seen above if not getattr(n, 'p_level', None): n.p_level = min(this_p_levels) # rule of thumb: favor lower case # Check if we've seen this type of marker before found_in_prev = False for stack_level in previous_levels: if stack_level and in_same_p_level(n, stack_level): found_in_prev = True self.depth = stack_level[-1][0] if not found_in_prev: # New type of marker self.depth += 1 self.m_stack.add(self.depth, n)