Пример #1
0
def build_reg_text_tree(text, part):
    """Build up the whole tree from the plain text of a single regulation. This
    only builds the regulation text part, and does not include appendices or
    the supplement. """
    title, body = utils.title_body(text)
    label = [str(part)]

    subparts_list = []

    subpart_locations = subparts(body)
    if subpart_locations:
        pre_subpart = body[:subpart_locations[0][0]]
        first_emptypart, children_text = build_subparts_tree(
            pre_subpart, part, build_empty_part)
        if pre_subpart.strip() and first_emptypart.children:
            subparts_list.append(first_emptypart)
        else:
            children_text = pre_subpart

        for start, end in subpart_locations:
            subpart_body = body[start:end]
            subpart, _ = build_subparts_tree(
                subpart_body, part, lambda p: build_subpart(subpart_body, p))
            subparts_list.append(subpart)
    else:
        emptypart, children_text = build_subparts_tree(body, part,
                                                       build_empty_part)
        if emptypart.children:
            subparts_list.append(emptypart)
        else:
            return struct.Node(text, [build_empty_part(part)], label, title)
    return struct.Node(children_text, subparts_list, label, title)
Пример #2
0
def build_reg_text_tree(text, part):
    """Build up the whole tree from the plain text of a single regulation. This
    only builds the regulation text part, and does not include appendices or
    the supplement. """
    title, body = utils.title_body(text)
    label = [str(part)]

    subparts_list = []

    subpart_locations = subparts(body)
    if subpart_locations:
        pre_subpart = body[:subpart_locations[0][0]]
        first_emptypart, children_text = build_subparts_tree(
            pre_subpart, part, build_empty_part)
        if pre_subpart.strip() and first_emptypart.children:
            subparts_list.append(first_emptypart)
        else:
            children_text = pre_subpart

        for start, end in subpart_locations:
            subpart_body = body[start:end]
            subpart, _ = build_subparts_tree(
                subpart_body, part, lambda p: build_subpart(subpart_body, p))
            subparts_list.append(subpart)
    else:
        emptypart, children_text = build_subparts_tree(
            body, part, build_empty_part)
        if emptypart.children:
            subparts_list.append(emptypart)
        else:
            return struct.Node(
                text, [build_empty_part(part)], label, title)
    return struct.Node(children_text, subparts_list, label, title)
def segment_tree(text, part, parent_label):
    """Build a tree representing the interpretation of a section, paragraph,
    or appendix."""
    title, body = utils.title_body(text)
    exclude = [(s, e) for _, s, e in comment_citation.scanString(body)]

    label = text_to_label(title, part)
    return interpParser.build_tree(body, 1, exclude, label, title)
def segment_tree(text, part, parent_label):
    """Build a tree representing the interpretation of a section, paragraph,
    or appendix."""
    title, body = utils.title_body(text)
    exclude = [(pc.full_start, pc.full_end) for pc in
               internal_citations(body, Label(part=parent_label[0]))]

    label = merge_labels(text_to_labels(title, Label(part=part, comment=True)))
    return interpParser.build_tree(body, 1, exclude, label, title)
def segment_tree(text, part, parent_label):
    """Build a tree representing the interpretation of a section, paragraph,
    or appendix."""
    title, body = utils.title_body(text)
    exclude = [(pc.full_start, pc.full_end) for pc in
               internal_citations(body, Label(part=parent_label[0]))]

    label = merge_labels(text_to_labels(title, Label(part=part, comment=True)))
    return interpParser.build_tree(body, 1, exclude, label, title)
Пример #6
0
def build_section_tree(text, part):
    """Construct the tree for a whole section. Assumes the section starts
    with an identifier"""
    title, text = utils.title_body(text)

    exclude = [(pc.full_start, pc.full_end) for pc in
               internal_citations(text, Label(part=part))]
    section = re.search(r'%d\.(\d+)\b' % part, title).group(1)
    label = [str(part), section]
    p_tree = regParser.build_tree(
        text, exclude=exclude, label=label, title=title)
    return p_tree
def build(text, part):
    """Create a tree representing the whole interpretation."""
    part = str(part)
    title, body = utils.title_body(text)
    segments = segment_by_header(body, part)

    if segments:
        children = [segment_tree(body[s:e], part, [part]) for s, e in segments]
        return Node(body[:segments[0][0]], treeify(children),
                    [part, Node.INTERP_MARK], title, Node.INTERP)
    else:
        return Node(body, [], [part, Node.INTERP_MARK], title, Node.INTERP)
Пример #8
0
def build_section_tree(text, part):
    """Construct the tree for a whole section. Assumes the section starts
    with an identifier"""
    title, text = utils.title_body(text)

    exclude = [(pc.full_start, pc.full_end)
               for pc in internal_citations(text, Label(part=part))]
    section = re.search(r'%d\.(\d+)\b' % part, title).group(1)
    label = [str(part), section]
    p_tree = regParser.build_tree(text,
                                  exclude=exclude,
                                  label=label,
                                  title=title)
    return p_tree
Пример #9
0
def build_section_tree(text, part):
    """Construct the tree for a whole section. Assumes the section starts
    with an identifier"""
    title, text = utils.title_body(text)

    exclude = [(start, end) for _, start, end in
               regtext_citation.scanString(text)]

    exclude += [(start, end) for _, start, end in
                appendix_citation.scanString(text)]
    section = re.search(r'%d\.(\d+)\b' % part, title).group(1)
    label = [str(part), section]
    p_tree = regParser.build_tree(
        text, exclude=exclude, label=label, title=title)
    return p_tree
def build(text, part):
    """Create a tree representing the whole interpretation."""
    part = str(part)
    title, body = utils.title_body(text)
    segments = segment_by_header(body, part)

    if segments:
        children = [segment_tree(body[s:e], part, [part]) for s, e in segments]
        return Node(
            body[:segments[0][0]], treeify(children),
            [part, Node.INTERP_MARK], title, Node.INTERP)
    else:
        return Node(
            body, [], [part, Node.INTERP_MARK], title,
            Node.INTERP)
Пример #11
0
def trees_from(text, part, parent_label):
    """Build a tree for the appendix section. It will have children for each
    appendix. Text is the text of the entire regulation, while part is the
    regulation's part (e.g. 1520.)"""
    children = []
    for begin, end in carving.appendices(text):
        title, appendix = utils.title_body(text[begin:end])
        appendix_letter = carving.get_appendix_letter(title, part)
        label = parent_label + [appendix_letter]
        sections = carving.appendix_sections(appendix, appendix_letter)
        if sections:
            child = paragraph_tree(
                appendix_letter, sections, appendix, label, title)
        else:
            child = generic_tree(appendix, label, title)
        children.append(child)
    return children
Пример #12
0
def trees_from(text, part, parent_label):
    """Build a tree for the appendix section. It will have children for each
    appendix. Text is the text of the entire regulation, while part is the
    regulation's part (e.g. 1520.)"""
    children = []
    for begin, end in carving.appendices(text):
        title, appendix = utils.title_body(text[begin:end])
        appendix_letter = carving.get_appendix_letter(title, part)
        label = parent_label + [appendix_letter]
        sections = carving.appendix_sections(appendix, appendix_letter)
        if sections:
            child = paragraph_tree(appendix_letter, sections, appendix, label,
                                   title)
        else:
            child = generic_tree(appendix, label, title)
        children.append(child)
    return children
Пример #13
0
def generic_tree(text, label, title=None):
    """Use the "generic" parser to build a tree. The "generic" parser simply
    splits on Title Case and treats body text as the node content."""
    segments = generic.segments(text)
    if not segments:
        return Node(text, label=label, title=title, node_type=Node.APPENDIX)

    children = []
    for index, seg in enumerate(segments):
        start, end = seg
        seg_title, body = utils.title_body(text[start:end])
        label_character = letter_for(index)
        children.append(
            Node(body, label=(
                label + [label_character]),
                title=seg_title, node_type=Node.APPENDIX))

    return Node(text[:segments[0][0]], children, label, title, Node.APPENDIX)
Пример #14
0
def paragraph_tree(appendix_letter, sections, text, label, title=None):
    """Use the paragraph parser to parse through each section in this
    appendix."""
    if not sections:
        return Node(text, label=label, title=title, node_type=Node.APPENDIX)
    children = []
    for begin, end in sections:
        seg_title, section_text = utils.title_body(text[begin:end])
        sec_num = carving.get_appendix_section_number(
            seg_title, appendix_letter)
        exclude = [(pc.full_start, pc.full_end) for pc in
                   internal_citations(section_text, Label(part=label[0]))]

        child = parParser.build_tree(
            section_text, exclude=exclude, label=label + [sec_num],
            title=seg_title)

        children.append(child)
    return Node(text[:sections[0][0]], children, label, title, Node.APPENDIX)
 def test_title_body_normal_case(self):
     title = "This is a title"
     body = "Here is text that follows\nnewlines\n\n\nabout in the body"
     self.assertEqual((title, "\n" + body),
                      utils.title_body(title + "\n" + body))
 def test_title_body_title_only(self):
     text = "This is some long, long title with no body"
     self.assertEqual((text, ""), utils.title_body(text))
Пример #17
0
 def test_title_body_title_only(self):
     text = "This is some long, long title with no body"
     self.assertEqual((text, ""), utils.title_body(text))
Пример #18
0
 def test_title_body_normal_case(self):
     title = "This is a title"
     body = "Here is text that follows\nnewlines\n\n\nabout in the body"
     self.assertEqual((title, "\n" + body),
                      utils.title_body(title + "\n" + body))