Python Terms.Terms примеры, regparser.layer.terms.Terms.Terms Python примеры использования

Пример #1

0

Показать файл

 def is_definition(node, keyterm):
     """A definition might be masquerading as a keyterm. Do not allow
     this"""
     included, excluded = Terms(None).node_definitions(node)
     terms = included + excluded
     keyterm_as_term = keyterm.lower()
     return any(ref.term == keyterm_as_term for ref in terms)

Пример #2

0

Показать файл

    def test_look_for_defs(self, node_definitions):
        """We should be walking through the tree to find terms. Test this by
        documenting which nodes are touched. We should be _ignoring_ certain
        subtrees (notable, any which aren't associated w/ regtext)"""
        node_definitions.side_effect = lambda n, _: ([], [n.label_id()])
        t = Terms(None)

        root = Node(label=['111'], children=[
            Node(label=['111', 'Subpart'], node_type=Node.EMPTYPART, children=[
                Node(label=['111', '1'], children=[
                    Node(label=['111', '1', 'a']),
                    Node(label=['111', '1', 'b']),
                    Node(label=['111', '1', 'c'])]),
                Node(label=['111', '2'], children=[
                    Node(label=['111', '2', 'p1'], node_type=Node.EXTRACT,
                         children=[Node(label=['111', '2', 'p1', 'p1'])])
                ])]),
            Node(label=['111', 'A'], node_type=Node.APPENDIX, children=[
                Node(label=['111', 'A', '1'], node_type=Node.APPENDIX)])])
        t.look_for_defs(root)
        self.assertItemsEqual(
            t.scoped_terms['EXCLUDED'],
            # note the absence of APPENDIX, and anything below an EXTRACT
            ['111', '111-Subpart', '111-1', '111-1-a', '111-1-b', '111-1-c',
             '111-2'])

Пример #3

0

Показать файл

 def test_excluded_offsets_blacklist(self):
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 0)]
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
     excluded = t.excluded_offsets(
         Node('You are a bourgeois pig!', label=['12', '3']))
     self.assertEqual([(10, 23)], excluded)

Пример #4

0

Показать файл

 def test_process(self):
     """The process() method should both find terms in the requested node
     and order them by term name"""
     t = Terms(
         Node(children=[
             Node("ABC5", children=[Node("child")], label=['ref1']),
             Node("AABBCC5", label=['ref2']),
             Node("ABC3", label=['ref3']),
             Node("AAA3", label=['ref4']),
             Node("ABCABC3", label=['ref5']),
             Node("ABCOTHER", label=['ref6']),
             Node("ZZZOTHER", label=['ref7']),
         ]))
     t.scoped_terms = {
         ("101", "22", "b", "2", "ii"):
         [Ref("abc", "ref1", 1),
          Ref("aabbcc", "ref2", 2)],
         ("101", "22", "b"): [
             Ref("abc", "ref3", 3),
             Ref("aaa", "ref4", 4),
             Ref("abcabc", "ref5", 5)
         ],
         ("101", "22", "b", "2", "iii"):
         [Ref("abc", "ref6", 6),
          Ref("zzz", "ref7", 7)]
     }
     #   Check that the return value is correct
     layer_el = t.process(
         Node("This has abc, aabbcc, aaa, abcabc, and zzz",
              label=["101", "22", "b", "2", "ii"]))
     self.assertEqual(
         [el['ref'] for el in layer_el],
         ['aaa:ref4', 'aabbcc:ref2', 'abc:ref1', 'abcabc:ref5'])

Пример #5

0

Показать файл

Файл: layer_terms_tests.py Проект: sihaysistema/regulations-parser

 def test_has_parent_definitions_indicator_p_marker(self):
     t = Terms(None)
     stack = ParentStack()
     stack.add(
         0,
         Node("(a) Definitions. For purposes of this " +
              "section except blah"))
     self.assertTrue(t.has_parent_definitions_indicator(stack))

Пример #6

0

Показать файл

 def test_calculate_offsets_word_part(self):
     """If a defined term is part of another word, don't include it"""
     applicable_terms = [('act', 'a')]
     text = "I am about to act on this transaction."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     self.assertEqual(1, len(matches[0][2]))

Пример #7

0

Показать файл

 def test_calculate_offsets_overlap(self):
     applicable_terms = [('mad cow disease', 'mc'), ('goes mad', 'gm')]
     text = 'There goes mad cow disease'
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     _, ref, offsets = matches[0]
     self.assertEqual('mc', ref)
     self.assertEqual('mad cow disease', text[offsets[0][0]:offsets[0][1]])

Пример #8

0

Показать файл

 def test_calculate_offsets_lexical_container(self):
     applicable_terms = [('access device', 'a'), ('device', 'd')]
     text = "This access device is fantastic!"
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(1, len(matches))
     _, ref, offsets = matches[0]
     self.assertEqual('a', ref)
     self.assertEqual([(5, 18)], offsets)

Пример #9

0

Показать файл

 def test_calculate_offsets_exclusions(self):
     applicable_terms = [('act', 'a')]
     text = "This text defines the 'fudge act'"
     t = Terms(None)
     self.assertEqual([],
                      t.calculate_offsets(text, applicable_terms,
                                          [(23, 32)]))
     self.assertEqual([('act', 'a', [(29, 32)])],
                      t.calculate_offsets(text, applicable_terms, [(1, 5)]))

Пример #10

0

Показать файл

    def test_node_defintions_act(self):
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node('Definitions', label=['9999']))

        node = Node(u'“Act” means something else entirely')
        included, excluded = t.node_definitions(node, stack)
        self.assertEqual(1, len(included))
        self.assertEqual([], excluded)

Пример #11

0

Показать файл

 def test_node_definitions_needs_term(self):
     t = Terms(None)
     stack = ParentStack()
     stack.add(0, Node('Definitions', label=['9999']))
     node = Node(u"However, for purposes of rescission under §§ 1111.15 " +
                 u"and 1111.13, and for purposes of §§ 1111.12(a)(1), " +
                 u"and 1111.46(d)(4), the term means all calendar " +
                 u"days...")
     self.assertEqual(([], []), t.node_definitions(node, stack))

Пример #12

0

Показать файл

 def get_keyterm(node):
     pattern = re.compile(ur'.*?<E T="03">([^<]*?)</E>.*?', re.UNICODE)
     matches = pattern.match(node.tagged_text)
     if matches and KeyTerms.keyterm_is_first(node, matches.groups()[0]):
         included, excluded = Terms(None).node_definitions(node)
         terms = included + excluded
         keyterm_as_term = matches.groups()[0].lower()
         if not any(ref.term == keyterm_as_term for ref in terms):
             return matches.groups()[0]

Пример #13

0

Показать файл

    def test_pre_process_defined_twice(self):
        tree = Node(u"The term “lol” means laugh out loud. " +
                    u"How do you pronounce “lol”, though?",
                    label=['1212', '5'])
        t = Terms(tree)
        t.pre_process()

        self.assertEqual(t.layer['referenced']['lol:1212-5']['position'],
                         (10, 13))

Пример #14

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

 def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', (4, 6)), Ref('other', 'lablab', (8, 9)),
         Ref('more', 'nonnon', (1, 8))
     ]
     self.assertEqual([(4, 6), (8, 9)],
                      t.excluded_offsets('lablab', 'Some text'))
     self.assertEqual([(1, 8)], t.excluded_offsets('nonnon', 'Other'))
     self.assertEqual([], t.excluded_offsets('ababab', 'Ab ab ab'))

Пример #15

0

Показать файл

 def test_calculate_offsets(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     six.assertCountEqual(self, matches, [('rock band', 'a', [(10, 19)]),
                                          ('band', 'b', [(30, 34)]),
                                          ('drum', 'c', [(42, 46),
                                                         (55, 59)])])

Пример #16

0

Показать файл

 def test_node_definitions_multiple_xml(self):
     """Find xml definitions which are separated by `and`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4'])
     winter.tagged_text = ('(4) <E T="03">Cold</E> and '
                           '<E T="03">dreary</E> mean winter.')
     inc, _ = Terms(None).node_definitions(winter, stack)
     self.assertEqual(len(inc), 2)
     cold, dreary = inc
     self.assertEqual(cold, Ref('cold', '9999-4', 4))
     self.assertEqual(dreary, Ref('dreary', '9999-4', 13))

Пример #17

0

Показать файл

 def test_node_definitions_xml_or(self):
     """Find xml definitions which are separated by `or`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     tamale = Node("(i) Hot tamale or tamale means nom nom",
                   label=['9999', '4'])
     tamale.tagged_text = ('(i) <E T="03">Hot tamale</E> or <E T="03"> '
                           'tamale</E> means nom nom ')
     inc, _ = Terms(None).node_definitions(tamale, stack)
     self.assertEqual(len(inc), 2)
     hot, tamale = inc
     self.assertEqual(hot, Ref('hot tamale', '9999-4', 4))
     self.assertEqual(tamale, Ref('tamale', '9999-4', 18))

Пример #18

0

Показать файл

 def test_excluded_offsets_blacklist_word_boundaries(self):
     """If an exclusion begins/ends with word characters, the searching
     regex should make sure to only match on word boundaries"""
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act', '(phrase)']
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('act', '28-6-d', 0)]
     excluded = t.excluded_offsets(
         Node("That's a watershed act", label=['28', '9']))
     self.assertEqual([], excluded)
     excluded = t.excluded_offsets(
         Node("This has a '(phrase)' in it", label=['28', '9']))
     self.assertNotEqual([], excluded)

Пример #19

0

Показать файл

    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 0),
            Ref('consumer', '12-Q-3', 0)]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        excluded = t.excluded_offsets(
            Node('There is a consumer price index', label=['12', '2']))
        self.assertEqual([(11, 31)], excluded)

Пример #20

0

Показать файл

 def test_calculate_offsets_pluralized1(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     text += " Many bands. "
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertItemsEqual(matches, [
         ('rock band', 'a', [(10, 19)]),
         ('band', 'b', [(30, 34)]),
         ('bands', 'b', [(66, 71)]),
         ('drum', 'c', [(42, 46), (55, 59)])])

Пример #21

0

Показать файл

 def test_node_definitions_xml_commas(self):
     """Find xml definitions which have commas separating them"""
     stack = ParentStack().add(0, Node(label=['9999']))
     summer = Node("(i) Hot, humid, or dry means summer.",
                   label=['9999', '4'])
     summer.tagged_text = ('(i) <E T="03">Hot</E>, <E T="03">humid</E>, '
                           'or <E T="03">dry</E> means summer.')
     inc, _ = Terms(None).node_definitions(summer, stack)
     self.assertEqual(len(inc), 3)
     hot, humid, dry = inc
     self.assertEqual(hot, Ref('hot', '9999-4', 4))
     self.assertEqual(humid, Ref('humid', '9999-4', 9))
     self.assertEqual(dry, Ref('dry', '9999-4', 19))

Пример #22

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 'Def'),
            Ref('consumer', '12-Q-3', 'Def')]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        exclusions = [(0, 4)]
        excluded = t.per_regulation_ignores(
            exclusions, ['12', '2'], 'There is a consumer price index')
        self.assertEqual([(0, 4), (11, 31)], excluded)

Пример #23

0

Показать файл

    def test_node_definitions_too_long(self):
        """Don't find definitions which are too long"""
        stack = ParentStack().add(0, Node('Definitions', label=['9999']))

        text = u"""“I declare under the penalties of perjury that this—(insert
        type of document, such as, statement, application, request,
        certificate), including the documents submitted in support thereof,
        has been examined by me and, to the best of my knowledge and belief,
        is true, correct, and complete.”"""
        node = Node(u'```extract\n{}\n```'.format(text))
        included, excluded = Terms(None).node_definitions(node, stack)
        self.assertEqual([], included)
        self.assertEqual([], excluded)

Пример #24

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

 def test_subpart_scope(self):
     t = Terms(None)
     t.subpart_map = {
         None: ['1', '2', '3'],
         'A': ['7', '5', '0'],
         'Q': ['99', 'abc', 'q']
     }
     self.assertEqual([['111', '1'], ['111', '2'], ['111', '3']],
                      t.subpart_scope(['111', '3']))
     self.assertEqual([['115', '7'], ['115', '5'], ['115', '0']],
                      t.subpart_scope(['115', '5']))
     self.assertEqual([['62', '99'], ['62', 'abc'], ['62', 'q']],
                      t.subpart_scope(['62', 'abc']))
     self.assertEqual([], t.subpart_scope(['71', 'Z']))

Пример #25

0

Показать файл

 def test_process_label_in_node(self):
     """Make sure we don't highlight definitions that are being defined
     in this paragraph."""
     tree = Node(children=[
         Node("Defining secret phrase.", label=['AB', 'a']),
         Node("Has secret phrase. Then some other content",
              label=['AB', 'b'])
     ],
                 label=['AB'])
     t = Terms(tree)
     t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", 9)]}
     #   Term is defined in the first child
     self.assertEqual([], t.process(tree.children[0]))
     self.assertEqual(1, len(t.process(tree.children[1])))

Пример #26

0

Показать файл

 def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', 4),
         Ref('other', 'lablab', 8),
         Ref('more', 'nonnon', 1)
     ]
     self.assertEqual([(4, 8), (8, 13)],
                      t.excluded_offsets(Node('Some text',
                                              label=['lablab'])))
     self.assertEqual([(1, 5)],
                      t.excluded_offsets(Node('Other', label=['nonnon'])))
     self.assertEqual([],
                      t.excluded_offsets(Node('Ab ab ab',
                                              label=['ababab'])))

Пример #27

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

    def test_is_exclusion(self):
        t = Terms(None)
        n = Node('ex ex ex', label=['1111', '2'])
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('ex', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))
        n.text = u'Something something the term “ex” does not include potato'
        self.assertTrue(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111',): [Ref('abc', '1', (0, 0))]}
        self.assertFalse(t.is_exclusion('ex', n))

Пример #28

0

Показать файл

    def test_node_definitions_no_def(self):
        """Verify that none of the matchers match certain strings"""
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node(label=['999']))
        stack.add(1, Node('Definitions', label=['999', '1']))

        no_defs = [
            'This has no defs', 'Also has no terms', 'Still no terms, but',
            'the next one does'
        ]

        for txt in no_defs:
            defs, exc = t.node_definitions(Node(txt), stack)
            self.assertEqual([], defs)
            self.assertEqual([], exc)

Пример #29

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

 def test_calculate_offsets(self):
     applicable_terms = [('rock band', 'a'), ('band', 'b'), ('drum', 'c'),
                         ('other thing', 'd')]
     text = "I am in a rock band. That's a band with a drum, a rock drum."
     t = Terms(None)
     matches = t.calculate_offsets(text, applicable_terms)
     self.assertEqual(3, len(matches))
     found = [False, False, False]
     for _, ref, offsets in matches:
         if ref == 'a' and offsets == [(10, 19)]:
             found[0] = True
         if ref == 'b' and offsets == [(30, 34)]:
             found[1] = True
         if ref == 'c' and offsets == [(42, 46), (55, 59)]:
             found[2] = True
     self.assertEqual([True, True, True], found)

Пример #30

0

Показать файл

Файл: layer_terms_tests.py Проект: pombreda/regulations-parser

    def test_has_parent_definitions_indicator_the_term_means(self):
        t = Terms(None)
        stack = ParentStack()
        stack.add(0, Node('Contains no terms or definitions'))
        self.assertFalse(t.has_parent_definitions_indicator(stack))
        stack.add(1, Node("(a) The term Bob means awesome"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))
        stack.add(2, Node("No defs either"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))

        stack.pop()
        stack.pop()
        stack.add(1, Node(u"(a) “Term” means some stuff"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))

        stack.pop()
        stack.add(1, Node("(a) The term Bob refers to"))
        self.assertTrue(t.has_parent_definitions_indicator(stack))

Python Terms.Terms примеры использования