def test_excluded_offsets(self): t = Terms(None) t.scoped_terms['_'] = [ Ref('term', 'lablab', (4, 6)), Ref('other', 'lablab', (8, 9)), Ref('more', 'nonnon', (1, 8)) ] self.assertEqual([(4, 6), (8, 9)], t.excluded_offsets('lablab', 'Some text')) self.assertEqual([(1, 8)], t.excluded_offsets('nonnon', 'Other')) self.assertEqual([], t.excluded_offsets('ababab', 'Ab ab ab'))
def test_excluded_offsets_blacklist_per_reg(self): t = Terms(None) t.scoped_terms['_'] = [ Ref('bourgeois', '12-Q-2', 'Def'), Ref('consumer', '12-Q-3', 'Def')] settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig'] settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index'] exclusions = [(0, 4)] excluded = t.per_regulation_ignores( exclusions, ['12', '2'], 'There is a consumer price index') self.assertEqual([(0, 4), (11, 31)], excluded)
def test_is_exclusion(self): t = Terms(None) n = Node('ex ex ex', label=['1111', '2']) self.assertFalse(t.is_exclusion('ex', n)) t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]} self.assertFalse(t.is_exclusion('ex', n)) t.scoped_terms = {('1111',): [Ref('ex', '1', (0, 0))]} self.assertFalse(t.is_exclusion('ex', n)) n.text = u'Something something the term “ex” does not include potato' self.assertTrue(t.is_exclusion('ex', n)) t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]} self.assertFalse(t.is_exclusion('ex', n))
def test_node_definitions_exclusion(self): n1 = Node(u'“Bologna” is a type of deli meat', label=['111', '1']) n2 = Node(u'Let us not forget that the term “bologna” does not ' + 'include turtle meat', label=['111', '1', 'a']) t = Terms(Node(label=['111'], children=[n1, n2])) t.pre_process() stack = ParentStack() stack.add(1, Node('Definitions')) included, excluded = t.node_definitions(n1, stack) self.assertEqual([Ref('bologna', '111-1', (1, 8))], included) self.assertEqual([], excluded) t.scoped_terms[('111', '1')] = included included, excluded = t.node_definitions(n2, stack) self.assertEqual([], included) self.assertEqual([Ref('bologna', '111-1-a', (33, 40))], excluded)
def test_process_label_in_node(self): """Make sure we don't highlight definitions that are being defined in this paragraph.""" tree = Node(children=[ Node("Defining secret phrase.", label=['AB', 'a']), Node("Has secret phrase. Then some other content", label=['AB', 'b']) ], label=['AB']) t = Terms(tree) t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", (9, 22))]} # Term is defined in the first child self.assertEqual([], t.process(tree.children[0])) self.assertEqual(1, len(t.process(tree.children[1])))
def test_process(self): t = Terms( Node(children=[ Node("ABC5", children=[Node("child")], label=['ref1']), Node("AABBCC5", label=['ref2']), Node("ABC3", label=['ref3']), Node("AAA3", label=['ref4']), Node("ABCABC3", label=['ref5']), Node("ABCOTHER", label=['ref6']), Node("ZZZOTHER", label=['ref7']), ])) t.scoped_terms = { ("101", "22", "b", "2", "ii"): [Ref("abc", "ref1", (1, 2)), Ref("aabbcc", "ref2", (2, 3))], ("101", "22", "b"): [ Ref("abc", "ref3", (3, 4)), Ref("aaa", "ref4", (4, 5)), Ref("abcabc", "ref5", (5, 6)) ], ("101", "22", "b", "2", "iii"): [Ref("abc", "ref6", (6, 7)), Ref("zzz", "ref7", (7, 8))] } # Check that the return value is correct layer_el = t.process( Node("This has abc, aabbcc, aaa, abcabc, and zzz", label=["101", "22", "b", "2", "ii"])) self.assertEqual(4, len(layer_el)) found = [False, False, False, False] for ref_obj in layer_el: if ref_obj['ref'] == 'abc:ref1': found[0] = True if ref_obj['ref'] == 'aabbcc:ref2': found[1] = True if ref_obj['ref'] == 'aaa:ref4': found[2] = True if ref_obj['ref'] == 'abcabc:ref5': found[3] = True self.assertEqual([True, True, True, True], found)
def test_node_definitions_multiple_xml(self): t = Terms(None) stack = ParentStack() stack.add(0, Node(label=['9999'])) winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4']) tagged = '(4) <E T="03">Cold</E> and <E T="03">dreary</E> mean ' tagged += 'winter.' winter.tagged_text = tagged inc, _ = t.node_definitions(winter, stack) self.assertEqual(len(inc), 2) cold, dreary = inc self.assertEqual(cold, Ref('cold', '9999-4', (4, 8))) self.assertEqual(dreary, Ref('dreary', '9999-4', (13, 19))) summer = Node("(i) Hot, humid, or dry means summer.", label=['9999', '4']) tagged = '(i) <E T="03">Hot</E>, <E T="03">humid</E>, or ' tagged += '<E T="03">dry</E> means summer.' summer.tagged_text = tagged inc, _ = t.node_definitions(summer, stack) self.assertEqual(len(inc), 3) hot, humid, dry = inc self.assertEqual(hot, Ref('hot', '9999-4', (4, 7))) self.assertEqual(humid, Ref('humid', '9999-4', (9, 14))) self.assertEqual(dry, Ref('dry', '9999-4', (19, 22))) tamale = Node("(i) Hot tamale or tamale means nom nom", label=['9999', '4']) tagged = '(i) <E T="03">Hot tamale</E> or <E T="03"> tamale</E> ' tagged += 'means nom nom ' tamale.tagged_text = tagged inc, _ = t.node_definitions(tamale, stack) self.assertEqual(len(inc), 2) hot, tamale = inc self.assertEqual(hot, Ref('hot tamale', '9999-4', (4, 14))) self.assertEqual(tamale, Ref('tamale', '9999-4', (18, 24)))
def test_node_definitions(self): t = Terms(None) smart_quotes = [ (u'This has a “worD” and then more', [Ref('word', 'aaa', (12, 16))]), (u'I have “anotheR word” term and “moree”', [Ref('another word', 'bbb', (8, 20)), Ref('moree', 'bbb', (32, 37))]), (u'But the child “DoeS sEe”?', [Ref('does see', 'ccc', (15, 23))]), (u'Start with “this,”', [Ref('this', 'hhh', (12, 16))]), (u'Start with “this;”', [Ref('this', 'iii', (12, 16))]), (u'Start with “this.”', [Ref('this', 'jjj', (12, 16))]), (u'As do “subchildren”', [Ref('subchildren', 'ddd', (7, 18))])] no_defs = [ u'This has no defs', u'Also has no terms', u'Still no terms, but', u'the next one does'] xml_defs = [ (u'(4) Thing means a thing that is defined', u'(4) <E T="03">Thing</E> means a thing that is defined', Ref('thing', 'eee', (4, 9))), (u'(e) Well-meaning lawyers means people who do weird things', u'(e) <E T="03">Well-meaning lawyers</E> means people who do ' + 'weird things', Ref('well-meaning lawyers', 'fff', (4, 24))), (u'(e) Words have the same meaning as in a dictionary', u'(e) <E T="03">Words</E> have the same meaning as in a ' + 'dictionary', Ref('words', 'ffg', (4, 9))), (u'(e) Banana has the same meaning as bonono', u'(e) <E T="03">Banana</E> has the same meaning as bonono', Ref('banana', 'fgf', (4, 10))), (u'(f) Huge billowy clouds means I want to take a nap', u'(f) <E T="03">Huge billowy clouds</E> means I want to take a ' + 'nap', Ref('huge billowy clouds', 'ggg', (4, 23)))] xml_no_defs = [ (u'(d) Term1 or term2 means stuff', u'(d) <E T="03">Term1</E> or <E T="03">term2></E> means stuff'), (u'This term means should not match', u'<E T="03">This term</E> means should not match')] scope_term_defs = [ ('For purposes of this section, the term blue means the color', Ref('blue', '11-11', (39, 43))), ('For purposes of paragraph (a)(1) of this section, the term ' + 'cool bro means hip cat', Ref('cool bro', '11-22', (59, 67))), ('For purposes of this paragraph, po jo means "poor Joe"', Ref('po jo', '11-33', (32, 37)))] stack = ParentStack() stack.add(0, Node(label=['999'])) for txt in no_defs: defs, exc = t.node_definitions(Node(txt), stack) self.assertEqual([], defs) self.assertEqual([], exc) for txt, refs in smart_quotes: defs, exc = t.node_definitions(Node(txt), stack) self.assertEqual([], defs) self.assertEqual([], exc) for txt, xml in xml_no_defs: node = Node(txt) node.tagged_text = xml defs, exc = t.node_definitions(node, stack) self.assertEqual([], defs) self.assertEqual([], exc) for txt, xml, ref in xml_defs: node = Node(txt, label=[ref.label]) node.tagged_text = xml defs, exc = t.node_definitions(node, stack) self.assertEqual([ref], defs) self.assertEqual([], exc) for txt, ref in scope_term_defs: defs, exc = t.node_definitions( Node(txt, label=ref.label.split('-')), stack) self.assertEqual([ref], defs) self.assertEqual([], exc) # smart quotes are affected by the parent stack.add(1, Node('Definitions', label=['999', '1'])) for txt in no_defs: defs, exc = t.node_definitions(Node(txt), stack) self.assertEqual([], defs) self.assertEqual([], exc) for txt, refs in smart_quotes: defs, exc = t.node_definitions(Node(txt, label=[refs[0].label]), stack) self.assertEqual(refs, defs) self.assertEqual([], exc) for txt, xml in xml_no_defs: node = Node(txt) node.tagged_text = xml defs, exc = t.node_definitions(node, stack) self.assertEqual([], defs) self.assertEqual([], exc) for txt, xml, ref in xml_defs: node = Node(txt, label=[ref.label]) node.tagged_text = xml defs, exc = t.node_definitions(node, stack) self.assertEqual([ref], defs) self.assertEqual([], exc)
def test_excluded_offsets_blacklist_word_boundaries(self): t = Terms(None) t.scoped_terms['_'] = [Ref('act', '28-6-d', 'Def def def')] settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act'] excluded = t.excluded_offsets('28-9', "That's a watershed act") self.assertEqual([], excluded)
def test_excluded_offsets_blacklist(self): t = Terms(None) t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 'Def')] settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig'] excluded = t.excluded_offsets('12-3', 'You are a bourgeois pig!') self.assertEqual([(10, 23)], excluded)
def test_pre_process(self): noname_subpart = Node( '', label=['88', 'Subpart'], node_type=Node.EMPTYPART, children=[ Node(u"Definition. For the purposes of this part, " + u"“abcd” is an alphabet", label=['88', '1'])]) xqxq_subpart = Node( '', title='Subpart XQXQ: The unreadable', label=['88', 'Subpart', 'XQXQ'], node_type=Node.SUBPART, children=[ Node(label=['88', '2'], children=[ Node(label=['88', '2', 'a'], text="Definitions come later for the purposes of " + "this section ", children=[ Node(u"“AXAX” means axe-cop", label=['88', '2', 'a', '1'])]), Node(label=['88', '2', 'b'], children=[ Node(label=['88', '2', 'b', 'i'], children=[ Node(label=['88', '2', 'b', 'i', 'A'], text=u"Definition. “Awesome sauce” means " + "great for the purposes of this " + "paragraph",)])])])]) tree = Node(label=['88'], children=[noname_subpart, xqxq_subpart]) t = Terms(tree) t.pre_process() self.assertTrue(('88',) in t.scoped_terms) self.assertEqual([Ref('abcd', '88-1', (44, 48))], t.scoped_terms[('88',)]) self.assertTrue(('88', '2') in t.scoped_terms) self.assertEqual([Ref('axax', '88-2-a-1', (1, 5))], t.scoped_terms[('88', '2')]) self.assertTrue(('88', '2', 'b', 'i', 'A') in t.scoped_terms) self.assertEqual([Ref('awesome sauce', '88-2-b-i-A', (13, 26))], t.scoped_terms[('88', '2', 'b', 'i', 'A')]) # Check subparts are correct self.assertEqual({None: ['1'], 'XQXQ': ['2']}, dict(t.subpart_map)) # Finally, make sure the references are added referenced = t.layer['referenced'] self.assertTrue('abcd:88-1' in referenced) self.assertEqual('abcd', referenced['abcd:88-1']['term']) self.assertEqual('88-1', referenced['abcd:88-1']['reference']) self.assertEqual((44, 48), referenced['abcd:88-1']['position']) self.assertTrue('axax:88-2-a-1' in referenced) self.assertEqual('axax', referenced['axax:88-2-a-1']['term']) self.assertEqual('88-2-a-1', referenced['axax:88-2-a-1']['reference']) self.assertEqual((1, 5), referenced['axax:88-2-a-1']['position']) self.assertTrue('awesome sauce:88-2-b-i-A' in referenced) self.assertEqual('awesome sauce', referenced['awesome sauce:88-2-b-i-A']['term']) self.assertEqual('88-2-b-i-A', referenced['awesome sauce:88-2-b-i-A']['reference']) self.assertEqual((13, 26), referenced['awesome sauce:88-2-b-i-A']['position'])