Пример #1
0
 def is_definition(node, keyterm):
     """A definition might be masquerading as a keyterm. Do not allow
     this"""
     included, excluded = Terms(None).node_definitions(node)
     terms = included + excluded
     keyterm_as_term = keyterm.lower()
     return any(ref.term == keyterm_as_term for ref in terms)
Пример #2
0
    def test_look_for_defs(self, node_definitions):
        """We should be walking through the tree to find terms. Test this by
        documenting which nodes are touched. We should be _ignoring_ certain
        subtrees (notable, any which aren't associated w/ regtext)"""
        node_definitions.side_effect = lambda n, _: ([], [n.label_id()])
        t = Terms(None)

        root = Node(label=['111'], children=[
            Node(label=['111', 'Subpart'], node_type=Node.EMPTYPART, children=[
                Node(label=['111', '1'], children=[
                    Node(label=['111', '1', 'a']),
                    Node(label=['111', '1', 'b']),
                    Node(label=['111', '1', 'c'])]),
                Node(label=['111', '2'], children=[
                    Node(label=['111', '2', 'p1'], node_type=Node.EXTRACT,
                         children=[Node(label=['111', '2', 'p1', 'p1'])])
                ])]),
            Node(label=['111', 'A'], node_type=Node.APPENDIX, children=[
                Node(label=['111', 'A', '1'], node_type=Node.APPENDIX)])])
        t.look_for_defs(root)
        self.assertItemsEqual(
            t.scoped_terms['EXCLUDED'],
            # note the absence of APPENDIX, and anything below an EXTRACT
            ['111', '111-Subpart', '111-1', '111-1-a', '111-1-b', '111-1-c',
             '111-2'])
Пример #3
0
 def test_excluded_offsets_blacklist(self):
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 0)]
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
     excluded = t.excluded_offsets(
         Node('You are a bourgeois pig!', label=['12', '3']))
     self.assertEqual([(10, 23)], excluded)
Пример #4
0
 def test_process(self):
     """The process() method should both find terms in the requested node
     and order them by term name"""
     t = Terms(
         Node(children=[
             Node("ABC5", children=[Node("child")], label=['ref1']),
             Node("AABBCC5", label=['ref2']),
             Node("ABC3", label=['ref3']),
             Node("AAA3", label=['ref4']),
             Node("ABCABC3", label=['ref5']),
             Node("ABCOTHER", label=['ref6']),
             Node("ZZZOTHER", label=['ref7']),
         ]))
     t.scoped_terms = {
         ("101", "22", "b", "2", "ii"):
         [Ref("abc", "ref1", 1),
          Ref("aabbcc", "ref2", 2)],
         ("101", "22", "b"): [
             Ref("abc", "ref3", 3),
             Ref("aaa", "ref4", 4),
             Ref("abcabc", "ref5", 5)
         ],
         ("101", "22", "b", "2", "iii"):
         [Ref("abc", "ref6", 6),
          Ref("zzz", "ref7", 7)]
     }
     #   Check that the return value is correct
     layer_el = t.process(
         Node("This has abc, aabbcc, aaa, abcabc, and zzz",
              label=["101", "22", "b", "2", "ii"]))
     self.assertEqual(
         [el['ref'] for el in layer_el],
         ['aaa:ref4', 'aabbcc:ref2', 'abc:ref1', 'abcabc:ref5'])
 def test_has_parent_definitions_indicator_p_marker(self):
     t = Terms(None)
     stack = ParentStack()
     stack.add(
         0,
         Node("(a) Definitions. For purposes of this " +
              "section except blah"))
     self.assertTrue(t.has_parent_definitions_indicator(stack))
Пример #6
0
 def test_calculate_offsets_word_part(self):
     """If a defined term is part of another word, don't include it"""
     applicable_terms = [('act', 'a')]
     text = "I am about to act on this transaction."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     self.assertEqual(1, len(matches[0][2]))
Пример #7
0
 def test_calculate_offsets_overlap(self):
     applicable_terms = [('mad cow disease', 'mc'), ('goes mad', 'gm')]
     text = 'There goes mad cow disease'
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     _, ref, offsets = matches[0]
     self.assertEqual('mc', ref)
     self.assertEqual('mad cow disease', text[offsets[0][0]:offsets[0][1]])
Пример #8
0
 def test_calculate_offsets_lexical_container(self):
     applicable_terms = [('access device', 'a'), ('device', 'd')]
     text = "This access device is fantastic!"
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     _, ref, offsets = matches[0]
     self.assertEqual('a', ref)
     self.assertEqual([(5, 18)], offsets)
Пример #9
0
 def test_calculate_offsets_exclusions(self):
     applicable_terms = [('act', 'a')]
     text = "This text defines the 'fudge act'"
     t = Terms(None)
     self.assertEqual([],
                      t.calculate_offsets(text, applicable_terms,
                                          [(23, 32)]))
     self.assertEqual([('act', 'a', [(29, 32)])],
                      t.calculate_offsets(text, applicable_terms, [(1, 5)]))
Пример #10
0
    def test_node_defintions_act(self):
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node('Definitions', label=['9999']))

        node = Node(u'“Act” means something else entirely')
        included, excluded = t.node_definitions(node, stack)
        self.assertEqual(1, len(included))
        self.assertEqual([], excluded)
Пример #11
0
 def test_node_definitions_needs_term(self):
     t = Terms(None)
     stack = ParentStack()
     stack.add(0, Node('Definitions', label=['9999']))
     node = Node(u"However, for purposes of rescission under §§ 1111.15 " +
                 u"and 1111.13, and for purposes of §§ 1111.12(a)(1), " +
                 u"and 1111.46(d)(4), the term means all calendar " +
                 u"days...")
     self.assertEqual(([], []), t.node_definitions(node, stack))
Пример #12
0
 def get_keyterm(node):
     pattern = re.compile(ur'.*?<E T="03">([^<]*?)</E>.*?', re.UNICODE)
     matches = pattern.match(node.tagged_text)
     if matches and KeyTerms.keyterm_is_first(node, matches.groups()[0]):
         included, excluded = Terms(None).node_definitions(node)
         terms = included + excluded
         keyterm_as_term = matches.groups()[0].lower()
         if not any(ref.term == keyterm_as_term for ref in terms):
             return matches.groups()[0]
Пример #13
0
    def test_pre_process_defined_twice(self):
        tree = Node(u"The term “lol” means laugh out loud. " +
                    u"How do you pronounce “lol”, though?",
                    label=['1212', '5'])
        t = Terms(tree)
        t.pre_process()

        self.assertEqual(t.layer['referenced']['lol:1212-5']['position'],
                         (10, 13))
 def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', (4, 6)), Ref('other', 'lablab', (8, 9)),
         Ref('more', 'nonnon', (1, 8))
     ]
     self.assertEqual([(4, 6), (8, 9)],
                      t.excluded_offsets('lablab', 'Some text'))
     self.assertEqual([(1, 8)], t.excluded_offsets('nonnon', 'Other'))
     self.assertEqual([], t.excluded_offsets('ababab', 'Ab ab ab'))
Пример #15
0
 def test_calculate_offsets(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     six.assertCountEqual(self, matches, [('rock band', 'a', [(10, 19)]),
                                          ('band', 'b', [(30, 34)]),
                                          ('drum', 'c', [(42, 46),
                                                         (55, 59)])])
Пример #16
0
 def test_node_definitions_multiple_xml(self):
     """Find xml definitions which are separated by `and`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4'])
     winter.tagged_text = ('(4) <E T="03">Cold</E> and '
                           '<E T="03">dreary</E> mean winter.')
     inc, _ = Terms(None).node_definitions(winter, stack)
     self.assertEqual(len(inc), 2)
     cold, dreary = inc
     self.assertEqual(cold, Ref('cold', '9999-4', 4))
     self.assertEqual(dreary, Ref('dreary', '9999-4', 13))
Пример #17
0
 def test_node_definitions_xml_or(self):
     """Find xml definitions which are separated by `or`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     tamale = Node("(i) Hot tamale or tamale means nom nom",
                   label=['9999', '4'])
     tamale.tagged_text = ('(i) <E T="03">Hot tamale</E> or <E T="03"> '
                           'tamale</E> means nom nom ')
     inc, _ = Terms(None).node_definitions(tamale, stack)
     self.assertEqual(len(inc), 2)
     hot, tamale = inc
     self.assertEqual(hot, Ref('hot tamale', '9999-4', 4))
     self.assertEqual(tamale, Ref('tamale', '9999-4', 18))
Пример #18
0
 def test_excluded_offsets_blacklist_word_boundaries(self):
     """If an exclusion begins/ends with word characters, the searching
     regex should make sure to only match on word boundaries"""
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act', '(phrase)']
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('act', '28-6-d', 0)]
     excluded = t.excluded_offsets(
         Node("That's a watershed act", label=['28', '9']))
     self.assertEqual([], excluded)
     excluded = t.excluded_offsets(
         Node("This has a '(phrase)' in it", label=['28', '9']))
     self.assertNotEqual([], excluded)
Пример #19
0
    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 0),
            Ref('consumer', '12-Q-3', 0)]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        excluded = t.excluded_offsets(
            Node('There is a consumer price index', label=['12', '2']))
        self.assertEqual([(11, 31)], excluded)
Пример #20
0
 def test_calculate_offsets_pluralized1(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     text += " Many bands. "
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertItemsEqual(matches, [
         ('rock band', 'a', [(10, 19)]),
         ('band', 'b', [(30, 34)]),
         ('bands', 'b', [(66, 71)]),
         ('drum', 'c', [(42, 46), (55, 59)])])
Пример #21
0
 def test_node_definitions_xml_commas(self):
     """Find xml definitions which have commas separating them"""
     stack = ParentStack().add(0, Node(label=['9999']))
     summer = Node("(i) Hot, humid, or dry means summer.",
                   label=['9999', '4'])
     summer.tagged_text = ('(i) <E T="03">Hot</E>, <E T="03">humid</E>, '
                           'or <E T="03">dry</E> means summer.')
     inc, _ = Terms(None).node_definitions(summer, stack)
     self.assertEqual(len(inc), 3)
     hot, humid, dry = inc
     self.assertEqual(hot, Ref('hot', '9999-4', 4))
     self.assertEqual(humid, Ref('humid', '9999-4', 9))
     self.assertEqual(dry, Ref('dry', '9999-4', 19))
    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 'Def'),
            Ref('consumer', '12-Q-3', 'Def')]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        exclusions = [(0, 4)]
        excluded = t.per_regulation_ignores(
            exclusions, ['12', '2'], 'There is a consumer price index')
        self.assertEqual([(0, 4), (11, 31)], excluded)
Пример #23
0
    def test_node_definitions_too_long(self):
        """Don't find definitions which are too long"""
        stack = ParentStack().add(0, Node('Definitions', label=['9999']))

        text = u"""“I declare under the penalties of perjury that this—(insert
        type of document, such as, statement, application, request,
        certificate), including the documents submitted in support thereof,
        has been examined by me and, to the best of my knowledge and belief,
        is true, correct, and complete.”"""
        node = Node(u'```extract\n{}\n```'.format(text))
        included, excluded = Terms(None).node_definitions(node, stack)
        self.assertEqual([], included)
        self.assertEqual([], excluded)
 def test_subpart_scope(self):
     t = Terms(None)
     t.subpart_map = {
         None: ['1', '2', '3'],
         'A': ['7', '5', '0'],
         'Q': ['99', 'abc', 'q']
     }
     self.assertEqual([['111', '1'], ['111', '2'], ['111', '3']],
                      t.subpart_scope(['111', '3']))
     self.assertEqual([['115', '7'], ['115', '5'], ['115', '0']],
                      t.subpart_scope(['115', '5']))
     self.assertEqual([['62', '99'], ['62', 'abc'], ['62', 'q']],
                      t.subpart_scope(['62', 'abc']))
     self.assertEqual([], t.subpart_scope(['71', 'Z']))
Пример #25
0
 def test_process_label_in_node(self):
     """Make sure we don't highlight definitions that are being defined
     in this paragraph."""
     tree = Node(children=[
         Node("Defining secret phrase.", label=['AB', 'a']),
         Node("Has secret phrase. Then some other content",
              label=['AB', 'b'])
     ],
                 label=['AB'])
     t = Terms(tree)
     t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", 9)]}
     #   Term is defined in the first child
     self.assertEqual([], t.process(tree.children[0]))
     self.assertEqual(1, len(t.process(tree.children[1])))
Пример #26
0
 def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', 4),
         Ref('other', 'lablab', 8),
         Ref('more', 'nonnon', 1)
     ]
     self.assertEqual([(4, 8), (8, 13)],
                      t.excluded_offsets(Node('Some text',
                                              label=['lablab'])))
     self.assertEqual([(1, 5)],
                      t.excluded_offsets(Node('Other', label=['nonnon'])))
     self.assertEqual([],
                      t.excluded_offsets(Node('Ab ab ab',
                                              label=['ababab'])))
    def test_is_exclusion(self):
        t = Terms(None)
        n = Node('ex ex ex', label=['1111', '2'])
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('ex', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))
        n.text = u'Something something the term “ex” does not include potato'
        self.assertTrue(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))
Пример #28
0
    def test_node_definitions_no_def(self):
        """Verify that none of the matchers match certain strings"""
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node(label=['999']))
        stack.add(1, Node('Definitions', label=['999', '1']))

        no_defs = [
            'This has no defs', 'Also has no terms', 'Still no terms, but',
            'the next one does'
        ]

        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)
 def test_calculate_offsets(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(3, len(matches))
     found = [False, False, False]
     for _, ref, offsets in matches:
         if ref == 'a' and offsets == [(10, 19)]:
             found[0] = True
         if ref == 'b' and offsets == [(30, 34)]:
             found[1] = True
         if ref == 'c' and offsets == [(42, 46), (55, 59)]:
             found[2] = True
     self.assertEqual([True, True, True], found)
    def test_has_parent_definitions_indicator_the_term_means(self):
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node('Contains no terms or definitions'))
        self.assertFalse(t.has_parent_definitions_indicator(stack))
        stack.add(1, Node("(a) The term Bob means awesome"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))
        stack.add(2, Node("No defs either"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))

        stack.pop()
        stack.pop()
        stack.add(1, Node(u"(a) “Term” means some stuff"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))

        stack.pop()
        stack.add(1, Node("(a) The term Bob refers to"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))