def test_compress_context_in_tokenlists(): tokenized = [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph.make(part='123', section='23', paragraph='a'), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph.make(part='123', section='23', paragraphs=['a', '1']), tokens.Paragraph.make(paragraphs=[None, None, 'i']), tokens.Paragraph.make(section='23', paragraph='b') ]) ] assert amdparser.compress_context_in_tokenlists(tokenized) == [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph.make(part='123', section='23', paragraph='a'), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph.make(part='123', section='23', paragraphs=['a', '1']), tokens.Paragraph.make(part='123', section='23', paragraphs=['a', '1', 'i']), tokens.Paragraph.make(part='123', section='23', paragraph='b') ]) ]
def test_compress_context_in_tokenlists(self): tokenized = [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph(part='123', section='23', paragraph='a'), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph(part='123', section='23', paragraphs=['a', '1']), tokens.Paragraph(paragraphs=[None, None, 'i']), tokens.Paragraph(section='23', paragraph='b') ]) ] converted = amdparser.compress_context_in_tokenlists(tokenized) self.assertEqual(converted, [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph(part='123', section='23', paragraph='a'), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph( part='123', section='23', paragraphs=['a', '1']), tokens.Paragraph( part='123', section='23', paragraphs=['a', '1', 'i']), tokens.Paragraph(part='123', section='23', paragraph='b') ]) ])
def test_multiple_moved_not_even_number_of_elements_on_either_side(): tokenized = [ tokens.TokenList([ tokens.Paragraph.make(part='444', sub='1'), tokens.Paragraph.make(part='444', sub='2') ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([tokens.Paragraph.make(part='444', sub='3')]) ] assert tokenized == amdparser.multiple_moves(tokenized)
def test_contains_one_tokenlist(self): token_list = self.list_of_tokens() designate_token_2 = tokens.Verb(tokens.Verb.DESIGNATE, True) tokenized = [tokens.TokenList(token_list), designate_token_2] self.assertTrue(contains_one_tokenlist(tokenized)) tokenized = [ tokens.TokenList(token_list), designate_token_2, tokens.TokenList(token_list) ] self.assertFalse(contains_one_tokenlist(tokenized))
def test_context_to_paragraph_exceptions2(): tokenized = [ tokens.Verb(tokens.Verb.PUT, active=True), tokens.Context(['2']), tokens.TokenList([tokens.Paragraph.make(part='3')]) ] assert tokenized == amdparser.context_to_paragraph(tokenized)
def test_separate_tokenlist(): tokenized = [ tokens.Context(['1']), tokens.TokenList([ tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Context([None, '2']) ]), tokens.Paragraph.make(sub='3'), tokens.TokenList([tokens.Paragraph.make(section='b')]) ] assert amdparser.separate_tokenlist(tokenized) == [ tokens.Context(['1']), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Context([None, '2']), tokens.Paragraph.make(sub='3'), tokens.Paragraph.make(section='b') ]
def test_multiple_intro_texts(self): text = ('the introductory text of paragraphs (a)(5)(ii) and ' '(d)(5)(ii)') self.assertEqual( parse_text(text), [tokens.TokenList([ tokens.Paragraph(paragraphs=['a', '5', 'ii'], field='text'), tokens.Paragraph(paragraphs=['d', '5', 'ii'], field='text'), ])])
def test_separate_tokenlist(self): tokenized = [ tokens.Context(['1']), tokens.TokenList([ tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Context([None, '2']) ]), tokens.Paragraph([None, '3']), tokens.TokenList([tokens.Paragraph([None, None, 'b'])]) ] converted = separate_tokenlist(tokenized) self.assertEqual(converted, [ tokens.Context(['1']), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Context([None, '2']), tokens.Paragraph([None, '3']), tokens.Paragraph([None, None, 'b']) ])
def text_example9(self): text = u"3. Amend § 5397.31 to revise paragraphs (a)(3)(ii), " text += "(a)(3)(iii), and (b)(3); and add paragraphs (a)(3)(iv), " text += "(a)(5)(iv), and (b)(2)(vii) to read as follows:" result = parse_text(text) self.assertEqual(result, [ tokens.Context(['5397', None, '31']), tokens.Verb(tokens.Verb.PUT, active=True), tokens.TokenList([ tokens.Paragraph([None, None, None, 'a', '3', 'ii']), tokens.Paragraph([None, None, None, 'a', '3', 'iii']), tokens.Paragraph([None, None, None, 'b', '3']) ]), tokens.Verb(tokens.Verb.POST, active=True), tokens.TokenList([ tokens.Paragraph([None, None, None, 'a', '3', 'iv']), tokens.Paragraph([None, None, None, 'a', '5', 'iv']), tokens.Paragraph([None, None, None, 'b', '2', 'vii']) ]), ])
def test_example16(self): text = " A-30(a), A-30(b), A-30(c), A-30(d) are added" result = parse_text(text) self.assertEqual(result, [ tokens.TokenList([ tokens.Paragraph([None, "Appendix:A", "30(a)"]), tokens.Paragraph([None, "Appendix:A", "30(b)"]), tokens.Paragraph([None, "Appendix:A", "30(c)"]), tokens.Paragraph([None, "Appendix:A", "30(d)"]), ]), tokens.Verb(tokens.Verb.POST, active=False), ])
def test_tokenlist_iteratable(self): token_list = tokens.TokenList([ tokens.Paragraph([1005, None, 1]), tokens.Paragraph([1005, None, 2]), tokens.Paragraph([1005, None, 3]), ]) count = 1 for t in token_list: self.assertEqual(t.label, [1005, None, count]) count += 1 self.assertEqual(count, 4)
def test_multiple_moves_success(): tokenized = [ tokens.TokenList([ tokens.Paragraph.make(part='444', sub='1'), tokens.Paragraph.make(part='444', sub='2') ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([ tokens.Paragraph.make(part='444', sub='3'), tokens.Paragraph.make(part='444', sub='4') ]) ] tokenized = amdparser.multiple_moves(tokenized) assert tokenized == [ tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph.make(part='444', sub='1'), tokens.Paragraph.make(part='444', sub='3'), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph.make(part='444', sub='2'), tokens.Paragraph.make(part='444', sub='4') ]
def test_example16(self): text = " A-30(a), A-30(b), A-30(c), A-30(d) are added" result = parse_text(text) self.assertEqual(result, [ tokens.TokenList([ tokens.Paragraph(appendix='A', section='30(a)'), tokens.Paragraph(appendix='A', section='30(b)'), tokens.Paragraph(appendix='A', section='30(c)'), tokens.Paragraph(appendix='A', section='30(d)') ]), tokens.Verb(tokens.Verb.POST, active=False), ])
def test_tokenlist_iteratable(self): token_list = tokens.TokenList([ tokens.Paragraph(part=1005, section=1), tokens.Paragraph(part=1005, section=2), tokens.Paragraph(part=1005, section=3) ]) count = 1 for t in token_list: self.assertEqual(t.label, [1005, None, count]) count += 1 self.assertEqual(count, 4)
def test_example18(self): text = 'Section 106.52(b)(1)(ii)(A) and (B) is revised' text += ' to read as follows' result = parse_text(text) self.assertEqual(result, [ tokens.TokenList([ tokens.Paragraph(['106', None, '52', 'b', '1', 'ii', 'A']), tokens.Paragraph([None, None, None, None, None, None, 'B']), ]), tokens.Verb(tokens.Verb.PUT, active=False) ])
def test_multiple_moves(self): tokenized = [ tokens.TokenList([ tokens.Paragraph(['444', '1']), tokens.Paragraph(['444', '2']) ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([ tokens.Paragraph(['444', '3']), tokens.Paragraph(['444', '4']) ]) ] tokenized = multiple_moves(tokenized) self.assertEqual(tokenized, [ tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph(['444', '1']), tokens.Paragraph(['444', '3']), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph(['444', '2']), tokens.Paragraph(['444', '4']) ]) # Not even number of elements on either side tokenized = [ tokens.TokenList([ tokens.Paragraph(['444', '1']), tokens.Paragraph(['444', '2']) ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([tokens.Paragraph(['444', '3'])]) ] self.assertEqual(tokenized, multiple_moves(tokenized)) # Paragraphs on either side of a move tokenized = [ tokens.Paragraph(['444', '1']), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.Paragraph(['444', '3']) ] self.assertEqual(tokenized, multiple_moves(tokenized))
def test_multiple_moves(self): tokenized = [ tokens.TokenList([ tokens.Paragraph(part='444', sub='1'), tokens.Paragraph(part='444', sub='2') ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([ tokens.Paragraph(part='444', sub='3'), tokens.Paragraph(part='444', sub='4') ]) ] tokenized = amdparser.multiple_moves(tokenized) self.assertEqual(tokenized, [ tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph(part='444', sub='1'), tokens.Paragraph(part='444', sub='3'), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph(part='444', sub='2'), tokens.Paragraph(part='444', sub='4') ]) # Not even number of elements on either side tokenized = [ tokens.TokenList([ tokens.Paragraph(part='444', sub='1'), tokens.Paragraph(part='444', sub='2') ]), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.TokenList([tokens.Paragraph(part='444', sub='3')]) ] self.assertEqual(tokenized, amdparser.multiple_moves(tokenized)) # Paragraphs on either side of a move tokenized = [ tokens.Paragraph(part='444', sub='1'), tokens.Verb(tokens.Verb.MOVE, active=False), tokens.Paragraph(part='444', sub='3') ] self.assertEqual(tokenized, amdparser.multiple_moves(tokenized))
def test_example13(self): text = "h. Under Section 6363.36, add comments 36(a), 36(b) and " text += "36(d)." result = parse_text(text) self.assertEqual(result, [ tokens.Context(['6363', None, '36'], certain=True), tokens.Verb(tokens.Verb.POST, active=True), # We assume that lists of comments are not context tokens.TokenList([ tokens.Paragraph(is_interp=True, section='36', paragraph=p) for p in ('(a)', '(b)', '(d)') ]) ])
def test_example4(self): text = "b. Add Model Forms E-11 through E-15." result = parse_text(text) self.assertEqual(result, [ tokens.Verb(tokens.Verb.POST, active=True), tokens.TokenList([ tokens.Paragraph([None, 'Appendix:E', '11']), tokens.Paragraph([None, 'Appendix:E', '12']), tokens.Paragraph([None, 'Appendix:E', '13']), tokens.Paragraph([None, 'Appendix:E', '14']), tokens.Paragraph([None, 'Appendix:E', '15']) ]) ])
def test_example4(self): text = "b. Add Model Forms E-11 through E-15." result = parse_text(text) self.assertEqual(result, [ tokens.Verb(tokens.Verb.POST, active=True), tokens.TokenList([ tokens.Paragraph(appendix='E', section='11'), tokens.Paragraph(appendix='E', section='12'), tokens.Paragraph(appendix='E', section='13'), tokens.Paragraph(appendix='E', section='14'), tokens.Paragraph(appendix='E', section='15') ]) ])
def test_example12(self): text = "comment 31(b), amend paragraph 31(b)(2) by adding " text += "paragraphs 4 through 6;" result = parse_text(text) self.assertEqual(result, [ tokens.Context([None, 'Interpretations', '31', '(b)']), tokens.Context([None, 'Interpretations', '31', '(b)(2)']), tokens.Verb(tokens.Verb.POST, active=True), tokens.TokenList([ tokens.Paragraph([None, 'Interpretations', None, None, '4']), tokens.Paragraph([None, 'Interpretations', None, None, '5']), tokens.Paragraph([None, 'Interpretations', None, None, '6']) ]) ])
def test_compress_context_in_tokenlists(self): tokenized = [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph(['123', None, '23', 'a']), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph(['123', None, '23', 'a', '1']), tokens.Paragraph([None, None, None, None, None, 'i']), tokens.Paragraph([None, None, '23', 'b']) ]) ] converted = compress_context_in_tokenlists(tokenized) self.assertEqual(converted, [ tokens.Context(['123', 'Interpretations']), tokens.Paragraph(['123', None, '23', 'a']), tokens.Verb(tokens.Verb.PUT, True), tokens.TokenList([ tokens.Verb(tokens.Verb.POST, True), tokens.Paragraph(['123', None, '23', 'a', '1']), tokens.Paragraph(['123', None, '23', 'a', '1', 'i']), tokens.Paragraph(['123', None, '23', 'b']) ]) ])
def test_example15(self): text = "paragraphs (a)(1)(iii), (a)(1)(iv)(B), (c)(2) introductory " text += 'text and (c)(2)(ii)(A)(<E T="03">2</E>) redesignating ' text += "paragraph (c)(2)(iii) as paragraph (c)(2)(iv)," result = parse_text(text) expected = [tokens.TokenList([ tokens.Paragraph(paragraphs=['a', '1', 'iii']), tokens.Paragraph(paragraphs=['a', '1', 'iv', 'B']), tokens.Paragraph(paragraphs=['c', '2'], field=tokens.Paragraph.TEXT_FIELD), tokens.Paragraph(paragraphs=['c', '2', 'ii', 'A'])]), tokens.Verb(tokens.Verb.MOVE, active=True), tokens.Paragraph(paragraphs=['c', '2', 'iii']), tokens.Paragraph(paragraphs=['c', '2', 'iv'])] self.assertEqual(result, expected)
def test_example11(self): text = u"Amend § 1005.36 to revise the section heading and " text += "paragraphs (a) and (b), and to add paragraph (d) to read " text += "as follows:" result = parse_text(text) self.assertEqual(result, [ tokens.Context(['1005', None, '36']), tokens.Verb(tokens.Verb.PUT, active=True), tokens.Paragraph([], field=tokens.Paragraph.HEADING_FIELD), tokens.AndToken(), tokens.TokenList([tokens.Paragraph(paragraph='a'), tokens.Paragraph(paragraph='b')]), tokens.AndToken(), tokens.Verb(tokens.Verb.POST, active=True), tokens.Paragraph(paragraph='d'), ])
def test_context_to_paragraph_exceptions(self): tokenized = [ tokens.Verb(tokens.Verb.PUT, active=True), tokens.Context(['2']), tokens.Paragraph(['3']) ] converted = context_to_paragraph(tokenized) self.assertEqual(tokenized, converted) tokenized = [ tokens.Verb(tokens.Verb.PUT, active=True), tokens.Context(['2']), tokens.TokenList([tokens.Paragraph(['3'])]) ] converted = context_to_paragraph(tokenized) self.assertEqual(tokenized, converted)
def test_example8(self): text = "2. On page 8765 through 8767, in Appendix A to Part 1234," text += "Model Forms A-15 through A-19 are corrected to read as " text += "follows:" result = parse_text(text) self.assertEqual(result, [ tokens.Context(['1234', 'Appendix:A'], certain=True), tokens.TokenList([ tokens.Paragraph([None, 'Appendix:A', '15']), tokens.Paragraph([None, 'Appendix:A', '16']), tokens.Paragraph([None, 'Appendix:A', '17']), tokens.Paragraph([None, 'Appendix:A', '18']), tokens.Paragraph([None, 'Appendix:A', '19']) ]), tokens.Verb(tokens.Verb.PUT, active=False), ])
def test_example6(self): """Although this includes the term 'Commentary', we assume these are not interpretations and handle the problem of merging later""" text = u"a. Add new Commentary for §§ 6363.30, 6363.31, 6363.32," text += " 6363.33, 6363.34, 6363.35, and 6363.36." result = parse_text(text) self.assertEqual(result, [ tokens.Verb(tokens.Verb.POST, active=True), tokens.TokenList([ tokens.Paragraph(['6363', None, '30']), tokens.Paragraph(['6363', None, '31']), tokens.Paragraph(['6363', None, '32']), tokens.Paragraph(['6363', None, '33']), tokens.Paragraph(['6363', None, '34']), tokens.Paragraph(['6363', None, '35']), tokens.Paragraph(['6363', None, '36']), ]) ])
def curried(match=None): pars = [] matches = [match.head] + list(match.tail) for match in matches: match_as_list = listify(match) next_par = tokens.Paragraph(match_as_list) next_lab = next_par.label if match[-1] == 'text' or force_text_field: next_par.field = tokens.Paragraph.TEXT_FIELD if match.through: # Iterate through, creating paragraph tokens prev_lab = pars[-1].label if '(' in prev_lab[-1] and '(' in next_lab[-1]: pars.extend(_through_paren(prev_lab, next_lab)) elif len(prev_lab) == 3: pars.extend(_through_sect(prev_lab, next_lab)) elif len(prev_lab) > 3: pars.extend(_through_paragraph(prev_lab, next_lab)) pars.append(next_par) return tokens.TokenList(pars)
def paragraph_token_list(self): paragraph_tokens = [ tokens.Paragraph(['200', '1', 'a']), tokens.Paragraph(['200', '1', 'b']) ] return tokens.TokenList(paragraph_tokens)
def _paragraph_token_list(): return tokens.TokenList([ tokens.Paragraph.make(part='200', sub='1', section='a'), tokens.Paragraph.make(part='200', sub='1', section='b') ])