def build_reg_text_tree(text, part): """Build up the whole tree from the plain text of a single regulation. This only builds the regulation text part, and does not include appendices or the supplement. """ title, body = utils.title_body(text) label = [str(part)] subparts_list = [] subpart_locations = subparts(body) if subpart_locations: pre_subpart = body[:subpart_locations[0][0]] first_emptypart, children_text = build_subparts_tree( pre_subpart, part, build_empty_part) if pre_subpart.strip() and first_emptypart.children: subparts_list.append(first_emptypart) else: children_text = pre_subpart for start, end in subpart_locations: subpart_body = body[start:end] subpart, _ = build_subparts_tree( subpart_body, part, lambda p: build_subpart(subpart_body, p)) subparts_list.append(subpart) else: emptypart, children_text = build_subparts_tree(body, part, build_empty_part) if emptypart.children: subparts_list.append(emptypart) else: return struct.Node(text, [build_empty_part(part)], label, title) return struct.Node(children_text, subparts_list, label, title)
def build_reg_text_tree(text, part): """Build up the whole tree from the plain text of a single regulation. This only builds the regulation text part, and does not include appendices or the supplement. """ title, body = utils.title_body(text) label = [str(part)] subparts_list = [] subpart_locations = subparts(body) if subpart_locations: pre_subpart = body[:subpart_locations[0][0]] first_emptypart, children_text = build_subparts_tree( pre_subpart, part, build_empty_part) if pre_subpart.strip() and first_emptypart.children: subparts_list.append(first_emptypart) else: children_text = pre_subpart for start, end in subpart_locations: subpart_body = body[start:end] subpart, _ = build_subparts_tree( subpart_body, part, lambda p: build_subpart(subpart_body, p)) subparts_list.append(subpart) else: emptypart, children_text = build_subparts_tree( body, part, build_empty_part) if emptypart.children: subparts_list.append(emptypart) else: return struct.Node( text, [build_empty_part(part)], label, title) return struct.Node(children_text, subparts_list, label, title)
def segment_tree(text, part, parent_label): """Build a tree representing the interpretation of a section, paragraph, or appendix.""" title, body = utils.title_body(text) exclude = [(s, e) for _, s, e in comment_citation.scanString(body)] label = text_to_label(title, part) return interpParser.build_tree(body, 1, exclude, label, title)
def segment_tree(text, part, parent_label): """Build a tree representing the interpretation of a section, paragraph, or appendix.""" title, body = utils.title_body(text) exclude = [(pc.full_start, pc.full_end) for pc in internal_citations(body, Label(part=parent_label[0]))] label = merge_labels(text_to_labels(title, Label(part=part, comment=True))) return interpParser.build_tree(body, 1, exclude, label, title)
def build_section_tree(text, part): """Construct the tree for a whole section. Assumes the section starts with an identifier""" title, text = utils.title_body(text) exclude = [(pc.full_start, pc.full_end) for pc in internal_citations(text, Label(part=part))] section = re.search(r'%d\.(\d+)\b' % part, title).group(1) label = [str(part), section] p_tree = regParser.build_tree( text, exclude=exclude, label=label, title=title) return p_tree
def build(text, part): """Create a tree representing the whole interpretation.""" part = str(part) title, body = utils.title_body(text) segments = segment_by_header(body, part) if segments: children = [segment_tree(body[s:e], part, [part]) for s, e in segments] return Node(body[:segments[0][0]], treeify(children), [part, Node.INTERP_MARK], title, Node.INTERP) else: return Node(body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
def build_section_tree(text, part): """Construct the tree for a whole section. Assumes the section starts with an identifier""" title, text = utils.title_body(text) exclude = [(pc.full_start, pc.full_end) for pc in internal_citations(text, Label(part=part))] section = re.search(r'%d\.(\d+)\b' % part, title).group(1) label = [str(part), section] p_tree = regParser.build_tree(text, exclude=exclude, label=label, title=title) return p_tree
def build_section_tree(text, part): """Construct the tree for a whole section. Assumes the section starts with an identifier""" title, text = utils.title_body(text) exclude = [(start, end) for _, start, end in regtext_citation.scanString(text)] exclude += [(start, end) for _, start, end in appendix_citation.scanString(text)] section = re.search(r'%d\.(\d+)\b' % part, title).group(1) label = [str(part), section] p_tree = regParser.build_tree( text, exclude=exclude, label=label, title=title) return p_tree
def build(text, part): """Create a tree representing the whole interpretation.""" part = str(part) title, body = utils.title_body(text) segments = segment_by_header(body, part) if segments: children = [segment_tree(body[s:e], part, [part]) for s, e in segments] return Node( body[:segments[0][0]], treeify(children), [part, Node.INTERP_MARK], title, Node.INTERP) else: return Node( body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
def trees_from(text, part, parent_label): """Build a tree for the appendix section. It will have children for each appendix. Text is the text of the entire regulation, while part is the regulation's part (e.g. 1520.)""" children = [] for begin, end in carving.appendices(text): title, appendix = utils.title_body(text[begin:end]) appendix_letter = carving.get_appendix_letter(title, part) label = parent_label + [appendix_letter] sections = carving.appendix_sections(appendix, appendix_letter) if sections: child = paragraph_tree( appendix_letter, sections, appendix, label, title) else: child = generic_tree(appendix, label, title) children.append(child) return children
def trees_from(text, part, parent_label): """Build a tree for the appendix section. It will have children for each appendix. Text is the text of the entire regulation, while part is the regulation's part (e.g. 1520.)""" children = [] for begin, end in carving.appendices(text): title, appendix = utils.title_body(text[begin:end]) appendix_letter = carving.get_appendix_letter(title, part) label = parent_label + [appendix_letter] sections = carving.appendix_sections(appendix, appendix_letter) if sections: child = paragraph_tree(appendix_letter, sections, appendix, label, title) else: child = generic_tree(appendix, label, title) children.append(child) return children
def generic_tree(text, label, title=None): """Use the "generic" parser to build a tree. The "generic" parser simply splits on Title Case and treats body text as the node content.""" segments = generic.segments(text) if not segments: return Node(text, label=label, title=title, node_type=Node.APPENDIX) children = [] for index, seg in enumerate(segments): start, end = seg seg_title, body = utils.title_body(text[start:end]) label_character = letter_for(index) children.append( Node(body, label=( label + [label_character]), title=seg_title, node_type=Node.APPENDIX)) return Node(text[:segments[0][0]], children, label, title, Node.APPENDIX)
def paragraph_tree(appendix_letter, sections, text, label, title=None): """Use the paragraph parser to parse through each section in this appendix.""" if not sections: return Node(text, label=label, title=title, node_type=Node.APPENDIX) children = [] for begin, end in sections: seg_title, section_text = utils.title_body(text[begin:end]) sec_num = carving.get_appendix_section_number( seg_title, appendix_letter) exclude = [(pc.full_start, pc.full_end) for pc in internal_citations(section_text, Label(part=label[0]))] child = parParser.build_tree( section_text, exclude=exclude, label=label + [sec_num], title=seg_title) children.append(child) return Node(text[:sections[0][0]], children, label, title, Node.APPENDIX)
def test_title_body_normal_case(self): title = "This is a title" body = "Here is text that follows\nnewlines\n\n\nabout in the body" self.assertEqual((title, "\n" + body), utils.title_body(title + "\n" + body))
def test_title_body_title_only(self): text = "This is some long, long title with no body" self.assertEqual((text, ""), utils.title_body(text))