def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', (4, 6)), Ref('other', 'lablab', (8, 9)),
         Ref('more', 'nonnon', (1, 8))
     ]
     self.assertEqual([(4, 6), (8, 9)],
                      t.excluded_offsets('lablab', 'Some text'))
     self.assertEqual([(1, 8)], t.excluded_offsets('nonnon', 'Other'))
     self.assertEqual([], t.excluded_offsets('ababab', 'Ab ab ab'))
    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 'Def'),
            Ref('consumer', '12-Q-3', 'Def')]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        exclusions = [(0, 4)]
        excluded = t.per_regulation_ignores(
            exclusions, ['12', '2'], 'There is a consumer price index')
        self.assertEqual([(0, 4), (11, 31)], excluded)
    def test_is_exclusion(self):
        t = Terms(None)
        n = Node('ex ex ex', label=['1111', '2'])
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('ex', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))
        n.text = u'Something something the term “ex” does not include potato'
        self.assertTrue(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))
    def test_node_definitions_exclusion(self):
        n1 = Node(u'“Bologna” is a type of deli meat', label=['111', '1'])
        n2 = Node(u'Let us not forget that the term “bologna” does not ' +
                  'include turtle meat', label=['111', '1', 'a'])
        t = Terms(Node(label=['111'], children=[n1, n2]))
        t.pre_process()

        stack = ParentStack()
        stack.add(1, Node('Definitions'))

        included, excluded = t.node_definitions(n1, stack)
        self.assertEqual([Ref('bologna', '111-1', (1, 8))], included)
        self.assertEqual([], excluded)
        t.scoped_terms[('111', '1')] = included

        included, excluded = t.node_definitions(n2, stack)
        self.assertEqual([], included)
        self.assertEqual([Ref('bologna', '111-1-a', (33, 40))], excluded)
 def test_process_label_in_node(self):
     """Make sure we don't highlight definitions that are being defined
     in this paragraph."""
     tree = Node(children=[
         Node("Defining secret phrase.", label=['AB', 'a']),
         Node("Has secret phrase. Then some other content",
              label=['AB', 'b'])
     ],
                 label=['AB'])
     t = Terms(tree)
     t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", (9, 22))]}
     #   Term is defined in the first child
     self.assertEqual([], t.process(tree.children[0]))
     self.assertEqual(1, len(t.process(tree.children[1])))
 def test_process(self):
     t = Terms(
         Node(children=[
             Node("ABC5", children=[Node("child")], label=['ref1']),
             Node("AABBCC5", label=['ref2']),
             Node("ABC3", label=['ref3']),
             Node("AAA3", label=['ref4']),
             Node("ABCABC3", label=['ref5']),
             Node("ABCOTHER", label=['ref6']),
             Node("ZZZOTHER", label=['ref7']),
         ]))
     t.scoped_terms = {
         ("101", "22", "b", "2", "ii"):
         [Ref("abc", "ref1", (1, 2)),
          Ref("aabbcc", "ref2", (2, 3))],
         ("101", "22", "b"): [
             Ref("abc", "ref3", (3, 4)),
             Ref("aaa", "ref4", (4, 5)),
             Ref("abcabc", "ref5", (5, 6))
         ],
         ("101", "22", "b", "2", "iii"):
         [Ref("abc", "ref6", (6, 7)),
          Ref("zzz", "ref7", (7, 8))]
     }
     #   Check that the return value is correct
     layer_el = t.process(
         Node("This has abc, aabbcc, aaa, abcabc, and zzz",
              label=["101", "22", "b", "2", "ii"]))
     self.assertEqual(4, len(layer_el))
     found = [False, False, False, False]
     for ref_obj in layer_el:
         if ref_obj['ref'] == 'abc:ref1':
             found[0] = True
         if ref_obj['ref'] == 'aabbcc:ref2':
             found[1] = True
         if ref_obj['ref'] == 'aaa:ref4':
             found[2] = True
         if ref_obj['ref'] == 'abcabc:ref5':
             found[3] = True
     self.assertEqual([True, True, True, True], found)
    def test_node_definitions_multiple_xml(self):
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node(label=['9999']))

        winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4'])
        tagged = '(4) <E T="03">Cold</E> and <E T="03">dreary</E> mean '
        tagged += 'winter.'
        winter.tagged_text = tagged
        inc, _ = t.node_definitions(winter, stack)
        self.assertEqual(len(inc), 2)
        cold, dreary = inc
        self.assertEqual(cold, Ref('cold', '9999-4', (4, 8)))
        self.assertEqual(dreary, Ref('dreary', '9999-4', (13, 19)))

        summer = Node("(i) Hot, humid, or dry means summer.",
                      label=['9999', '4'])
        tagged = '(i) <E T="03">Hot</E>, <E T="03">humid</E>, or '
        tagged += '<E T="03">dry</E> means summer.'
        summer.tagged_text = tagged
        inc, _ = t.node_definitions(summer, stack)
        self.assertEqual(len(inc), 3)
        hot, humid, dry = inc
        self.assertEqual(hot, Ref('hot', '9999-4', (4, 7)))
        self.assertEqual(humid, Ref('humid', '9999-4', (9, 14)))
        self.assertEqual(dry, Ref('dry', '9999-4', (19, 22)))

        tamale = Node("(i) Hot tamale or tamale means nom nom",
                      label=['9999', '4'])
        tagged = '(i) <E T="03">Hot tamale</E> or <E T="03"> tamale</E> '
        tagged += 'means nom nom '
        tamale.tagged_text = tagged
        inc, _ = t.node_definitions(tamale, stack)
        self.assertEqual(len(inc), 2)
        hot, tamale = inc
        self.assertEqual(hot, Ref('hot tamale', '9999-4', (4, 14)))
        self.assertEqual(tamale, Ref('tamale', '9999-4', (18, 24)))
    def test_node_definitions(self):
        t = Terms(None)
        smart_quotes = [
            (u'This has a “worD” and then more',
             [Ref('word', 'aaa', (12, 16))]),
            (u'I have “anotheR word” term and “moree”',
             [Ref('another word', 'bbb', (8, 20)),
              Ref('moree', 'bbb', (32, 37))]),
            (u'But the child “DoeS sEe”?',
             [Ref('does see', 'ccc', (15, 23))]),
            (u'Start with “this,”', [Ref('this', 'hhh', (12, 16))]),
            (u'Start with “this;”', [Ref('this', 'iii', (12, 16))]),
            (u'Start with “this.”', [Ref('this', 'jjj', (12, 16))]),
            (u'As do “subchildren”',
             [Ref('subchildren', 'ddd', (7, 18))])]

        no_defs = [
            u'This has no defs',
            u'Also has no terms',
            u'Still no terms, but',
            u'the next one does']

        xml_defs = [
            (u'(4) Thing means a thing that is defined',
             u'(4) <E T="03">Thing</E> means a thing that is defined',
             Ref('thing', 'eee', (4, 9))),
            (u'(e) Well-meaning lawyers means people who do weird things',
             u'(e) <E T="03">Well-meaning lawyers</E> means people who do '
             + 'weird things',
             Ref('well-meaning lawyers', 'fff', (4, 24))),
            (u'(e) Words have the same meaning as in a dictionary',
             u'(e) <E T="03">Words</E> have the same meaning as in a '
             + 'dictionary',
             Ref('words', 'ffg', (4, 9))),
            (u'(e) Banana has the same meaning as bonono',
             u'(e) <E T="03">Banana</E> has the same meaning as bonono',
             Ref('banana', 'fgf', (4, 10))),
            (u'(f) Huge billowy clouds means I want to take a nap',
             u'(f) <E T="03">Huge billowy clouds</E> means I want to take a '
             + 'nap',
             Ref('huge billowy clouds', 'ggg', (4, 23)))]

        xml_no_defs = [
            (u'(d) Term1 or term2 means stuff',
             u'(d) <E T="03">Term1</E> or <E T="03">term2></E> means stuff'),
            (u'This term means should not match',
             u'<E T="03">This term</E> means should not match')]

        scope_term_defs = [
            ('For purposes of this section, the term blue means the color',
             Ref('blue', '11-11', (39, 43))),
            ('For purposes of paragraph (a)(1) of this section, the term '
             + 'cool bro means hip cat', Ref('cool bro', '11-22', (59, 67))),
            ('For purposes of this paragraph, po jo means "poor Joe"',
             Ref('po jo', '11-33', (32, 37)))]

        stack = ParentStack()
        stack.add(0, Node(label=['999']))
        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, refs in smart_quotes:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml in xml_no_defs:
            node = Node(txt)
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml, ref in xml_defs:
            node = Node(txt, label=[ref.label])
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)
        for txt, ref in scope_term_defs:
            defs, exc = t.node_definitions(
                Node(txt, label=ref.label.split('-')), stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)

        #   smart quotes are affected by the parent
        stack.add(1, Node('Definitions', label=['999', '1']))
        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, refs in smart_quotes:
            defs, exc = t.node_definitions(Node(txt, label=[refs[0].label]),
                                           stack)
            self.assertEqual(refs, defs)
            self.assertEqual([], exc)
        for txt, xml in xml_no_defs:
            node = Node(txt)
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
        for txt, xml, ref in xml_defs:
            node = Node(txt, label=[ref.label])
            node.tagged_text = xml
            defs, exc = t.node_definitions(node, stack)
            self.assertEqual([ref], defs)
            self.assertEqual([], exc)
 def test_excluded_offsets_blacklist_word_boundaries(self):
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('act', '28-6-d', 'Def def def')]
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act']
     excluded = t.excluded_offsets('28-9', "That's a watershed act")
     self.assertEqual([], excluded)
 def test_excluded_offsets_blacklist(self):
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 'Def')]
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
     excluded = t.excluded_offsets('12-3', 'You are a bourgeois pig!')
     self.assertEqual([(10, 23)], excluded)
    def test_pre_process(self):
        noname_subpart = Node(
            '',
            label=['88', 'Subpart'],
            node_type=Node.EMPTYPART,
            children=[
                Node(u"Definition. For the purposes of this part, "
                     + u"“abcd” is an alphabet", label=['88', '1'])])
        xqxq_subpart = Node(
            '',
            title='Subpart XQXQ: The unreadable',
            label=['88', 'Subpart', 'XQXQ'], node_type=Node.SUBPART,
            children=[
                Node(label=['88', '2'], children=[
                    Node(label=['88', '2', 'a'],
                         text="Definitions come later for the purposes of "
                              + "this section ",
                         children=[
                             Node(u"“AXAX” means axe-cop",
                                  label=['88', '2', 'a', '1'])]),
                    Node(label=['88', '2', 'b'], children=[
                        Node(label=['88', '2', 'b', 'i'], children=[
                            Node(label=['88', '2', 'b', 'i', 'A'],
                                 text=u"Definition. “Awesome sauce” means "
                                      + "great for the purposes of this "
                                      + "paragraph",)])])])])
        tree = Node(label=['88'], children=[noname_subpart, xqxq_subpart])
        t = Terms(tree)
        t.pre_process()

        self.assertTrue(('88',) in t.scoped_terms)
        self.assertEqual([Ref('abcd', '88-1', (44, 48))],
                         t.scoped_terms[('88',)])
        self.assertTrue(('88', '2') in t.scoped_terms)
        self.assertEqual([Ref('axax', '88-2-a-1', (1, 5))],
                         t.scoped_terms[('88', '2')])
        self.assertTrue(('88', '2', 'b', 'i', 'A') in t.scoped_terms)
        self.assertEqual([Ref('awesome sauce', '88-2-b-i-A', (13, 26))],
                         t.scoped_terms[('88', '2', 'b', 'i', 'A')])

        #   Check subparts are correct
        self.assertEqual({None: ['1'], 'XQXQ': ['2']}, dict(t.subpart_map))

        # Finally, make sure the references are added
        referenced = t.layer['referenced']
        self.assertTrue('abcd:88-1' in referenced)
        self.assertEqual('abcd', referenced['abcd:88-1']['term'])
        self.assertEqual('88-1', referenced['abcd:88-1']['reference'])
        self.assertEqual((44, 48), referenced['abcd:88-1']['position'])

        self.assertTrue('axax:88-2-a-1' in referenced)
        self.assertEqual('axax', referenced['axax:88-2-a-1']['term'])
        self.assertEqual('88-2-a-1', referenced['axax:88-2-a-1']['reference'])
        self.assertEqual((1, 5), referenced['axax:88-2-a-1']['position'])

        self.assertTrue('awesome sauce:88-2-b-i-A' in referenced)
        self.assertEqual('awesome sauce',
                         referenced['awesome sauce:88-2-b-i-A']['term'])
        self.assertEqual('88-2-b-i-A',
                         referenced['awesome sauce:88-2-b-i-A']['reference'])
        self.assertEqual((13, 26),
                         referenced['awesome sauce:88-2-b-i-A']['position'])