Exemplo n.º 1
0
def context_to_paragraph(tokenized):
    """Generally, section numbers, subparts, etc. are good contextual clues,
    but sometimes they are the object of manipulation."""

    #   Don't modify anything if there are already paragraphs or no verbs
    for token in tokenized:
        if isinstance(token, tokens.Paragraph):
            return tokenized
        elif (isinstance(token, tokens.TokenList)
              and any(isinstance(t, tokens.Paragraph) for t in token.tokens)):
            return tokenized
    #copy
    converted = list(tokenized)
    verb_seen = False
    for i in range(len(converted)):
        token = converted[i]
        if isinstance(token, tokens.Verb):
            verb_seen = True
        elif verb_seen and token.match(tokens.Context, certain=False):
            converted[i] = tokens.Paragraph(token.label)
    return converted
Exemplo n.º 2
0
def deal_with_subpart_adds(tokenized):
    """If we have a designate verb, and a token list, we're going to
    change the context to a Paragraph. Because it's not a context, it's
    part of the manipulation."""

    #Ensure that we only have one of each: designate verb, a token list and
    #a context
    verb_exists = contains_one_designate_token(tokenized)
    list_exists = contains_one_tokenlist(tokenized)
    context_exists = contains_one_context(tokenized)

    if verb_exists and list_exists and context_exists:
        token_list = []
        for token in tokenized:
            if isinstance(token, tokens.Context):
                token_list.append(tokens.Paragraph(token.label))
            else:
                token_list.append(token)
        return token_list, True
    else:
        return tokenized, False
Exemplo n.º 3
0
def and_token_resolution(tokenized):
    """Troublesome case where a Context should be a Paragraph, but the only
    indicator is the presence of an "and" token afterwards. We'll likely
    want to expand this step in the future, but for now, we only catch a few
    cases"""
    # compress "and" tokens
    tokenized = zip(tokenized, tokenized[1:] + [None])
    tokenized = [
        l for l, r in tokenized if l != r or not l.match(tokens.AndToken)
    ]

    # we'll strip out all "and" tokens in just a moment, but as a first
    # pass, remove all those preceded by a verb (which makes the following
    # logic simpler).
    tokenized = list(reversed(tokenized))
    tokenized = zip(tokenized, tokenized[1:] + [None])
    tokenized = list(
        reversed([
            l for l, r in tokenized if not l.match(tokens.AndToken) or not r
            or not r.match(tokens.Verb)
        ]))

    # check for the pattern in question
    final_tokens = []
    idx = 0
    while idx < len(tokenized) - 3:
        t1, t2, t3, t4 = tokenized[idx:idx + 4]
        if (t1.match(tokens.Verb) and t2.match(tokens.Context)
                and t3.match(tokens.AndToken)
                and t4.match(tokens.Paragraph, tokens.TokenList)):
            final_tokens.append(t1)
            final_tokens.append(tokens.Paragraph(t2.label))
            final_tokens.append(t4)
            idx += 3  # not 4 as one will appear below
        elif t1 != tokens.AndToken:
            final_tokens.append(t1)
        idx += 1

    final_tokens.extend(tokenized[idx:])
    return final_tokens
Exemplo n.º 4
0
def deal_with_subpart_adds(tokenized):
    """If we have a designate verb, and a token list, we're going to
    change the context to a Paragraph. Because it's not a context, it's
    part of the manipulation."""

    # Ensure that we only have one of each: designate verb, a token list and
    # a context
    verb_exists = len(
        matching(tokenized, tokens.Verb, verb=tokens.Verb.DESIGNATE)) == 1
    list_exists = len(matching(tokenized, tokens.TokenList)) == 1
    context_exists = len(matching(tokenized, tokens.Context)) == 1

    if verb_exists and list_exists and context_exists:
        token_list = []
        for token in tokenized:
            if isinstance(token, tokens.Context):
                token_list.append(tokens.Paragraph(token.label))
            else:
                token_list.append(token)
        return token_list, True
    else:
        return tokenized, False
Exemplo n.º 5
0
def _through_paren(prev_lab, next_lab):
    """Expand "through" for labels with embedded paragraphs (e.g. 12(c))"""
    lhs, rhs = prev_lab[-1], next_lab[-1]
    lhs_idx, rhs_idx = lhs.rindex('('), rhs.rindex('(')
    # Check if the previous and next labels are "through"-able. For example,
    # we can't compute A-14(a)(2) through B-14(a)(4) nor can we compute
    # A-14(a)(1) through A-14(b)(3)
    if lhs[:lhs_idx] != rhs[:rhs_idx] or prev_lab[:-1] != next_lab[:-1]:
        logging.warning("Bad use of 'through': %s %s", prev_lab, next_lab)
        return []
    else:
        prefix = lhs[:lhs_idx + 1]
        lhs, rhs = lhs[lhs_idx + 1:-1], rhs[rhs_idx + 1:-1]
        for level in p_levels:
            if lhs in level and rhs in level:
                lidx, ridx = level.index(lhs), level.index(rhs)
                if lidx < ridx:
                    return [tokens.Paragraph(prev_lab[:-1]
                                             + [prefix + level[i] + ')'])
                            for i in range(lidx + 1, ridx)]
        logging.warning("Error with 'through': %s %s", prev_lab, next_lab)
        return []
 def test_compress_context_in_tokenlists(self):
     tokenized = [
         tokens.Context(['123', 'Interpretations']),
         tokens.Paragraph(['123', None, '23', 'a']),
         tokens.Verb(tokens.Verb.PUT, True),
         tokens.TokenList([
             tokens.Verb(tokens.Verb.POST, True),
             tokens.Paragraph(['123', None, '23', 'a', '1']),
             tokens.Paragraph([None, None, None, None, None, 'i']),
             tokens.Paragraph([None, None, '23', 'b'])
         ])
     ]
     converted = compress_context_in_tokenlists(tokenized)
     self.assertEqual(converted, [
         tokens.Context(['123', 'Interpretations']),
         tokens.Paragraph(['123', None, '23', 'a']),
         tokens.Verb(tokens.Verb.PUT, True),
         tokens.TokenList([
             tokens.Verb(tokens.Verb.POST, True),
             tokens.Paragraph(['123', None, '23', 'a', '1']),
             tokens.Paragraph(['123', None, '23', 'a', '1', 'i']),
             tokens.Paragraph(['123', None, '23', 'b'])
         ])
     ])
 def test_make_amendments(self):
     tokenized = [
         tokens.Paragraph(['111']),
         tokens.Verb(tokens.Verb.PUT, active=True),
         tokens.Paragraph(['222']),
         tokens.Paragraph(['333']),
         tokens.Paragraph(['444']),
         tokens.Verb(tokens.Verb.DELETE, active=True),
         tokens.Paragraph(['555']),
         tokens.Verb(tokens.Verb.MOVE, active=True),
         tokens.Paragraph(['666']),
         tokens.Paragraph(['777'])
     ]
     amends = make_amendments(tokenized)
     self.assertEqual(amends, [
         Amendment(tokens.Verb.PUT, '222'),
         Amendment(tokens.Verb.PUT, '333'),
         Amendment(tokens.Verb.PUT, '444'),
         Amendment(tokens.Verb.DELETE, '555'),
         Amendment(tokens.Verb.MOVE, '666', '777')
     ])
 def test_example15(self):
     text = "paragraphs (a)(1)(iii), (a)(1)(iv)(B), (c)(2) introductory "
     text += 'text and (c)(2)(ii)(A)(<E T="03">2</E>) redesignating '
     text += "paragraph (c)(2)(iii) as paragraph (c)(2)(iv),"
     result = parse_text(text)
     expected = [
         tokens.TokenList([
             tokens.Paragraph([None, None, None, 'a', '1', 'iii']),
             tokens.Paragraph([None, None, None, 'a', '1', 'iv', 'B']),
             tokens.Paragraph([None, None, None, 'c', '2'],
                              field=tokens.Paragraph.TEXT_FIELD),
             tokens.Paragraph([None, None, None, 'c', '2', 'ii', 'A'])
         ]),
         tokens.Verb(tokens.Verb.MOVE, active=True),
         tokens.Paragraph([None, None, None, 'c', '2', 'iii']),
         tokens.Paragraph([None, None, None, 'c', '2', 'iv'])
     ]
     self.assertEqual(result, expected)
Exemplo n.º 9
0
 def test_make_instructions(self):
     tokenized = [
         tokens.Paragraph(part='111'),
         tokens.Verb(tokens.Verb.PUT, active=True),
         tokens.Paragraph(part='222'),
         tokens.Paragraph(part='333'),
         tokens.Paragraph(part='444'),
         tokens.Verb(tokens.Verb.DELETE, active=True),
         tokens.Paragraph(part='555'),
         tokens.Verb(tokens.Verb.MOVE, active=True),
         tokens.Paragraph(part='666'),
         tokens.Paragraph(part='777')
     ]
     with XMLBuilder("EREGS_INSTRUCTIONS") as ctx:
         ctx.PUT(label=222)
         ctx.PUT(label=333)
         ctx.PUT(label=444)
         ctx.DELETE(label=555)
         ctx.MOVE(label=666, destination=777)
     self.assertEqual(
         etree.tostring(amdparser.make_instructions(tokenized)),
         ctx.xml_str)
Exemplo n.º 10
0
def subpart_designation(tokenized):
    u"""If we have a designate verb, and a token list, we're going to
    change the context to a Paragraph. Because it's not a context, it's
    part of the manipulation.
    e.g. Designate §§ 1005.1 through 1005.20 as subpart A under the heading
    set forth above."""

    # Ensure that we only have one of each: designate verb, a token list and
    # a context
    verb_exists = len(
        matching(tokenized, tokens.Verb, verb=tokens.Verb.DESIGNATE)) == 1
    list_exists = len(matching(tokenized, tokens.TokenList)) == 1
    context_exists = len(matching(tokenized, tokens.Context)) == 1

    if verb_exists and list_exists and context_exists:
        token_list = []
        for token in tokenized:
            if isinstance(token, tokens.Context):
                token_list.append(tokens.Paragraph(token.label))
            else:
                token_list.append(token)
        return token_list, True
    else:
        return tokenized, False
 def text_example9(self):
     text = u"3. Amend § 5397.31 to revise paragraphs (a)(3)(ii), "
     text += "(a)(3)(iii), and (b)(3); and add paragraphs (a)(3)(iv), "
     text += "(a)(5)(iv), and (b)(2)(vii) to read as follows:"
     result = parse_text(text)
     self.assertEqual(result, [
         tokens.Context(['5397', None, '31']),
         tokens.Verb(tokens.Verb.PUT, active=True),
         tokens.TokenList([
             tokens.Paragraph([None, None, None, 'a', '3', 'ii']),
             tokens.Paragraph([None, None, None, 'a', '3', 'iii']),
             tokens.Paragraph([None, None, None, 'b', '3'])
         ]),
         tokens.Verb(tokens.Verb.POST, active=True),
         tokens.TokenList([
             tokens.Paragraph([None, None, None, 'a', '3', 'iv']),
             tokens.Paragraph([None, None, None, 'a', '5', 'iv']),
             tokens.Paragraph([None, None, None, 'b', '2', 'vii'])
         ]),
     ])
 def paragraph_token_list(self):
     paragraph_tokens = [
         tokens.Paragraph(['200', '1', 'a']),
         tokens.Paragraph(['200', '1', 'b'])
     ]
     return tokens.TokenList(paragraph_tokens)
Exemplo n.º 13
0
 def paragraph_token_list(self):
     paragraph_tokens = [
         tokens.Paragraph(part='200', sub='1', section='a'),
         tokens.Paragraph(part='200', sub='1', section='b')
     ]
     return tokens.TokenList(paragraph_tokens)
Exemplo n.º 14
0
    def test_get_destination_normal(self):
        subpart_token = tokens.Paragraph(part='205', subpart='A')
        tokenized = [subpart_token]

        self.assertEqual(amdparser.get_destination(tokenized, '205'),
                         '205-Subpart:A')
Exemplo n.º 15
0
    def test_get_destination_no_reg_part(self):
        subpart_token = tokens.Paragraph(subpart='J')
        tokenized = [subpart_token]

        self.assertEqual(amdparser.get_destination(tokenized, '205'),
                         '205-Subpart:J')
Exemplo n.º 16
0
    unified.marker_appendix +
    Optional(Marker("to") + unified.marker_part)
).setParseAction(
    lambda m: tokens.Context([m.part, 'Appendix:' + m.appendix],
                             bool(m.certain)))
section = (
    context_certainty +
    atomic.section_marker +
    unified.part_section
).setParseAction(
    lambda m: tokens.Context([m.part, None, m.section], bool(m.certain)))


#   Paragraph components (used when not replacing the whole paragraph)
section_heading = Marker("heading").setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.HEADING_FIELD))
intro_text = intro_text_marker.copy().setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.TEXT_FIELD))


#   Paragraphs
comment_p = (
    Word(string.digits).setResultsName("level2") +
    Optional(
        Suppress(".") + Word("ivxlcdm").setResultsName('level3') +
        Optional(
            Suppress(".") +
            Word(string.ascii_uppercase).setResultsName("level4"))))

section_heading_of = (
    Marker("heading") + of_connective +
Exemplo n.º 17
0
def _through_sect(prev_lab, next_lab):
    """Expand "through" for labels ending in a section number."""
    return [
        tokens.Paragraph(prev_lab[:2] + [str(i)])
        for i in range(int(prev_lab[-1]) + 1, int(next_lab[-1]))
    ]
Exemplo n.º 18
0
    context_certainty + atomic.paragraph_marker +
    unified.depth2_p).setParseAction(lambda m: tokens.Context([
        None, 'Interpretations', None,
        _paren_join([m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6])
    ], bool(m.certain)))
appendix = (context_certainty + unified.marker_appendix +
            Optional(Marker("to") + unified.marker_part)
            ).setParseAction(lambda m: tokens.Context(
                [m.part, 'Appendix:' + m.appendix], bool(m.certain)))
section = (context_certainty + atomic.section_marker +
           unified.part_section).setParseAction(lambda m: tokens.Context(
               [m.part, None, m.section], bool(m.certain)))

#   Paragraph components (used when not replacing the whole paragraph)
section_heading = Marker("heading").setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.HEADING_FIELD))
intro_text = intro_text_marker.copy().setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.TEXT_FIELD))

#   Paragraphs
comment_p = (Word(string.digits).setResultsName("level2") + Optional(
    Suppress(".") + Word("ivxlcdm").setResultsName('level3') + Optional(
        Suppress(".") + Word(string.ascii_uppercase).setResultsName("level4")))
             )

section_heading_of = (
    Marker("heading") + of_connective +
    unified.marker_part_section).setParseAction(lambda m: tokens.Paragraph(
        part=m.part, section=m.section, field=tokens.Paragraph.HEADING_FIELD))

section_paragraph_heading_of = (
Exemplo n.º 19
0
    def test_multiple_moves(self):
        tokenized = [
            tokens.TokenList([
                tokens.Paragraph(part='444', sub='1'),
                tokens.Paragraph(part='444', sub='2')
            ]),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.TokenList([
                tokens.Paragraph(part='444', sub='3'),
                tokens.Paragraph(part='444', sub='4')
            ])
        ]
        tokenized = amdparser.multiple_moves(tokenized)
        self.assertEqual(tokenized, [
            tokens.Verb(tokens.Verb.MOVE, active=True),
            tokens.Paragraph(part='444', sub='1'),
            tokens.Paragraph(part='444', sub='3'),
            tokens.Verb(tokens.Verb.MOVE, active=True),
            tokens.Paragraph(part='444', sub='2'),
            tokens.Paragraph(part='444', sub='4')
        ])

        # Not even number of elements on either side
        tokenized = [
            tokens.TokenList([
                tokens.Paragraph(part='444', sub='1'),
                tokens.Paragraph(part='444', sub='2')
            ]),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.TokenList([tokens.Paragraph(part='444', sub='3')])
        ]
        self.assertEqual(tokenized, amdparser.multiple_moves(tokenized))

        # Paragraphs on either side of a move
        tokenized = [
            tokens.Paragraph(part='444', sub='1'),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.Paragraph(part='444', sub='3')
        ]
        self.assertEqual(tokenized, amdparser.multiple_moves(tokenized))
    def test_multiple_moves(self):
        tokenized = [
            tokens.TokenList([
                tokens.Paragraph(['444', '1']),
                tokens.Paragraph(['444', '2'])
            ]),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.TokenList([
                tokens.Paragraph(['444', '3']),
                tokens.Paragraph(['444', '4'])
            ])
        ]
        tokenized = multiple_moves(tokenized)
        self.assertEqual(tokenized, [
            tokens.Verb(tokens.Verb.MOVE, active=True),
            tokens.Paragraph(['444', '1']),
            tokens.Paragraph(['444', '3']),
            tokens.Verb(tokens.Verb.MOVE, active=True),
            tokens.Paragraph(['444', '2']),
            tokens.Paragraph(['444', '4'])
        ])

        # Not even number of elements on either side
        tokenized = [
            tokens.TokenList([
                tokens.Paragraph(['444', '1']),
                tokens.Paragraph(['444', '2'])
            ]),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.TokenList([tokens.Paragraph(['444', '3'])])
        ]
        self.assertEqual(tokenized, multiple_moves(tokenized))

        # Paragraphs on either side of a move
        tokenized = [
            tokens.Paragraph(['444', '1']),
            tokens.Verb(tokens.Verb.MOVE, active=False),
            tokens.Paragraph(['444', '3'])
        ]
        self.assertEqual(tokenized, multiple_moves(tokenized))
    def test_get_destination_normal(self):
        subpart_token = tokens.Paragraph(['205', 'Subpart', 'A'])
        tokenized = [subpart_token]

        self.assertEqual(get_destination(tokenized, '205'), '205-Subpart-A')
    def test_get_destination_no_reg_part(self):
        subpart_token = tokens.Paragraph([None, 'Subpart', 'J'])
        tokenized = [subpart_token]

        self.assertEqual(get_destination(tokenized, '205'), '205-Subpart-J')
Exemplo n.º 23
0
    context_certainty + atomic.paragraph_marker +
    unified.depth2_p).setParseAction(lambda m: tokens.Context([
        None, 'Interpretations', None,
        _paren_join([m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6])
    ], bool(m.certain)))
appendix = (context_certainty + unified.marker_appendix +
            Optional(Marker("to") + unified.marker_part)
            ).setParseAction(lambda m: tokens.Context(
                [m.part, 'Appendix:' + m.appendix], bool(m.certain)))
section = (context_certainty + atomic.section_marker +
           unified.part_section).setParseAction(lambda m: tokens.Context(
               [m.part, None, m.section], bool(m.certain)))

#   Paragraph components (used when not replacing the whole paragraph)
section_heading = Marker("heading").setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.HEADING_FIELD))
intro_text = intro_text_marker.copy().setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.TEXT_FIELD))

#   Paragraphs
comment_p = (Word(string.digits).setResultsName("level2") + Optional(
    Suppress(".") + Word("ivxlcdm").setResultsName('level3') + Optional(
        Suppress(".") + Word(string.ascii_uppercase).setResultsName("level4")))
             )

section_heading_of = (
    Marker("heading") + of_connective +
    unified.marker_part_section).setParseAction(lambda m: tokens.Paragraph(
        [m.part, None, m.section], field=tokens.Paragraph.HEADING_FIELD))

section_paragraph_heading_of = (
 def test_compress_context_initial_context(self):
     tokenized = [tokens.Paragraph([None, None, None, 'q'])]
     converted, _ = compress_context(tokenized, ['111', None, '12'])
     self.assertEqual(converted,
                      [tokens.Paragraph(['111', None, '12', 'q'])])
Exemplo n.º 25
0
    unified.marker_appendix +
    Optional(Marker("to") + unified.marker_part)
).setParseAction(
    lambda m: tokens.Context([m.part, 'Appendix:' + m.appendix],
                             bool(m.certain)))
section = (
    context_certainty +
    atomic.section_marker +
    unified.part_section
).setParseAction(
    lambda m: tokens.Context([m.part, None, m.section], bool(m.certain)))


#   Paragraph components (used when not replacing the whole paragraph)
section_heading = Marker("heading").setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.HEADING_FIELD))
intro_text = intro_text_marker.copy().setParseAction(
    lambda _: tokens.Paragraph([], field=tokens.Paragraph.TEXT_FIELD))


#   Paragraphs
comment_p = (
    Word(string.digits).setResultsName("level2") +
    Optional(
        Suppress(".") + Word("ivxlcdm").setResultsName('level3') +
        Optional(
            Suppress(".") +
            Word(string.ascii_uppercase).setResultsName("level4"))))

section_heading_of = (
    Marker("heading") + of_connective +