Пример #1
0
 def test_node_definitions_multiple_xml(self):
     """Find xml definitions which are separated by `and`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4'])
     winter.tagged_text = ('(4) <E T="03">Cold</E> and '
                           '<E T="03">dreary</E> mean winter.')
     inc, _ = Terms(None).node_definitions(winter, stack)
     self.assertEqual(len(inc), 2)
     cold, dreary = inc
     self.assertEqual(cold, Ref('cold', '9999-4', 4))
     self.assertEqual(dreary, Ref('dreary', '9999-4', 13))
Пример #2
0
 def test_node_definitions_xml_or(self):
     """Find xml definitions which are separated by `or`"""
     stack = ParentStack().add(0, Node(label=['9999']))
     tamale = Node("(i) Hot tamale or tamale means nom nom",
                   label=['9999', '4'])
     tamale.tagged_text = ('(i) <E T="03">Hot tamale</E> or <E T="03"> '
                           'tamale</E> means nom nom ')
     inc, _ = Terms(None).node_definitions(tamale, stack)
     self.assertEqual(len(inc), 2)
     hot, tamale = inc
     self.assertEqual(hot, Ref('hot tamale', '9999-4', 4))
     self.assertEqual(tamale, Ref('tamale', '9999-4', 18))
Пример #3
0
    def test_excluded_offsets_blacklist_per_reg(self):
        t = Terms(None)

        t.scoped_terms['_'] = [
            Ref('bourgeois', '12-Q-2', 0),
            Ref('consumer', '12-Q-3', 0)]

        settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
        settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index']
        excluded = t.excluded_offsets(
            Node('There is a consumer price index', label=['12', '2']))
        self.assertEqual([(11, 31)], excluded)
Пример #4
0
 def test_node_definitions_xml_commas(self):
     """Find xml definitions which have commas separating them"""
     stack = ParentStack().add(0, Node(label=['9999']))
     summer = Node("(i) Hot, humid, or dry means summer.",
                   label=['9999', '4'])
     summer.tagged_text = ('(i) <E T="03">Hot</E>, <E T="03">humid</E>, '
                           'or <E T="03">dry</E> means summer.')
     inc, _ = Terms(None).node_definitions(summer, stack)
     self.assertEqual(len(inc), 3)
     hot, humid, dry = inc
     self.assertEqual(hot, Ref('hot', '9999-4', 4))
     self.assertEqual(humid, Ref('humid', '9999-4', 9))
     self.assertEqual(dry, Ref('dry', '9999-4', 19))
Пример #5
0
 def test_excluded_offsets(self):
     t = Terms(None)
     t.scoped_terms['_'] = [
         Ref('term', 'lablab', 4),
         Ref('other', 'lablab', 8),
         Ref('more', 'nonnon', 1)
     ]
     self.assertEqual([(4, 8), (8, 13)],
                      t.excluded_offsets(Node('Some text',
                                              label=['lablab'])))
     self.assertEqual([(1, 5)],
                      t.excluded_offsets(Node('Other', label=['nonnon'])))
     self.assertEqual([],
                      t.excluded_offsets(Node('Ab ab ab',
                                              label=['ababab'])))
Пример #6
0
 def test_excluded_offsets_blacklist(self):
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 0)]
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig']
     excluded = t.excluded_offsets(
         Node('You are a bourgeois pig!', label=['12', '3']))
     self.assertEqual([(10, 23)], excluded)
Пример #7
0
 def test_process(self):
     """The process() method should both find terms in the requested node
     and order them by term name"""
     t = Terms(
         Node(children=[
             Node("ABC5", children=[Node("child")], label=['ref1']),
             Node("AABBCC5", label=['ref2']),
             Node("ABC3", label=['ref3']),
             Node("AAA3", label=['ref4']),
             Node("ABCABC3", label=['ref5']),
             Node("ABCOTHER", label=['ref6']),
             Node("ZZZOTHER", label=['ref7']),
         ]))
     t.scoped_terms = {
         ("101", "22", "b", "2", "ii"):
         [Ref("abc", "ref1", 1),
          Ref("aabbcc", "ref2", 2)],
         ("101", "22", "b"): [
             Ref("abc", "ref3", 3),
             Ref("aaa", "ref4", 4),
             Ref("abcabc", "ref5", 5)
         ],
         ("101", "22", "b", "2", "iii"):
         [Ref("abc", "ref6", 6),
          Ref("zzz", "ref7", 7)]
     }
     #   Check that the return value is correct
     layer_el = t.process(
         Node("This has abc, aabbcc, aaa, abcabc, and zzz",
              label=["101", "22", "b", "2", "ii"]))
     self.assertEqual(
         [el['ref'] for el in layer_el],
         ['aaa:ref4', 'aabbcc:ref2', 'abc:ref1', 'abcabc:ref5'])
Пример #8
0
    def test_node_definitions_exclusion(self):
        n1 = Node(u'“Bologna” is a type of deli meat', label=['111', '1'])
        n2 = Node(u'Let us not forget that the term “bologna” does not ' +
                  'include turtle meat', label=['111', '1', 'a'])
        t = Terms(Node(label=['111'], children=[n1, n2]))
        t.pre_process()

        stack = ParentStack()
        stack.add(1, Node('Definitions'))

        included, excluded = t.node_definitions(n1, stack)
        self.assertEqual([Ref('bologna', '111-1', 1)], included)
        self.assertEqual([], excluded)
        t.scoped_terms[('111', '1')] = included

        included, excluded = t.node_definitions(n2, stack)
        self.assertEqual([], included)
        self.assertEqual([Ref('bologna', '111-1-a', 33)], excluded)
Пример #9
0
 def test_excluded_offsets_blacklist_word_boundaries(self):
     """If an exclusion begins/ends with word characters, the searching
     regex should make sure to only match on word boundaries"""
     settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act', '(phrase)']
     t = Terms(None)
     t.scoped_terms['_'] = [Ref('act', '28-6-d', 0)]
     excluded = t.excluded_offsets(
         Node("That's a watershed act", label=['28', '9']))
     self.assertEqual([], excluded)
     excluded = t.excluded_offsets(
         Node("This has a '(phrase)' in it", label=['28', '9']))
     self.assertNotEqual([], excluded)
Пример #10
0
 def test_process_label_in_node(self):
     """Make sure we don't highlight definitions that are being defined
     in this paragraph."""
     tree = Node(children=[
         Node("Defining secret phrase.", label=['AB', 'a']),
         Node("Has secret phrase. Then some other content",
              label=['AB', 'b'])
     ],
                 label=['AB'])
     t = Terms(tree)
     t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", 9)]}
     #   Term is defined in the first child
     self.assertEqual([], t.process(tree.children[0]))
     self.assertEqual(1, len(t.process(tree.children[1])))
Пример #11
0
    def test_is_exclusion(self):
        """There are certain indicators that a definition _should not_ be
        considered the definition of that term. For example, exclusions to a
        general definition should not replace the original. We can also
        explicitly ignore chunks of text when finding definitions.."""
        t = Terms(None)
        n = Node('ex ex ex', label=['1111', '2'])
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111', ): [Ref('abc', '1', 0)]}
        self.assertFalse(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111', ): [Ref('ex', '1', 0)]}
        self.assertFalse(t.is_exclusion('ex', n))
        n.text = u'Something something the term “ex” does not include potato'
        self.assertTrue(t.is_exclusion('ex', n))

        t.scoped_terms = {('1111', ): [Ref('abc', '1', 0)]}
        self.assertFalse(t.is_exclusion('ex', n))

        settings.IGNORE_DEFINITIONS_IN['1111'] = ['phrase with abc in it']
        self.assertFalse(t.is_exclusion('abc', n))
        n.text = "Now the node has a phrase with abc in it, doesn't it?"
        self.assertTrue(t.is_exclusion('abc', n))
Пример #12
0
    def test_pre_process(self):
        noname_subpart = Node(
            '',
            label=['88', 'Subpart'],
            node_type=Node.EMPTYPART,
            children=[
                Node(u"Definition. For the purposes of this part, " +
                     u"“abcd” is an alphabet",
                     label=['88', '1'])
            ])
        xqxq_subpart = Node(
            '',
            title='Subpart XQXQ: The unreadable',
            label=['88', 'Subpart', 'XQXQ'],
            node_type=Node.SUBPART,
            children=[
                Node(
                    label=['88', '2'],
                    children=[
                        Node(label=['88', '2', 'a'],
                             text="Definitions come later for the purposes of "
                             + "this section ",
                             children=[
                                 Node(u"“AXAX” means axe-cop",
                                      label=['88', '2', 'a', '1'])
                             ]),
                        Node(
                            label=['88', '2', 'b'],
                            children=[
                                Node(
                                    label=['88', '2', 'b', 'i'],
                                    children=[
                                        Node(
                                            label=['88', '2', 'b', 'i', 'A'],
                                            text=
                                            u"Definition. “Awesome sauce” means "
                                            +
                                            "great for the purposes of this " +
                                            "paragraph",
                                        )
                                    ])
                            ])
                    ])
            ])
        tree = Node(label=['88'], children=[noname_subpart, xqxq_subpart])
        t = Terms(tree)
        t.pre_process()

        self.assertTrue(('88', ) in t.scoped_terms)
        self.assertEqual([Ref('abcd', '88-1', 44)], t.scoped_terms[('88', )])
        self.assertTrue(('88', '2') in t.scoped_terms)
        self.assertEqual([Ref('axax', '88-2-a-1', 1)],
                         t.scoped_terms[('88', '2')])
        self.assertTrue(('88', '2', 'b', 'i', 'A') in t.scoped_terms)
        self.assertEqual([Ref('awesome sauce', '88-2-b-i-A', 13)],
                         t.scoped_terms[('88', '2', 'b', 'i', 'A')])

        #   Check subparts are correct
        self.assertEqual({
            None: ['1'],
            'XQXQ': ['2']
        }, dict(t.scope_finder.subpart_map))

        # Finally, make sure the references are added
        referenced = t.layer['referenced']
        self.assertTrue('abcd:88-1' in referenced)
        self.assertEqual('abcd', referenced['abcd:88-1']['term'])
        self.assertEqual('88-1', referenced['abcd:88-1']['reference'])
        self.assertEqual((44, 48), referenced['abcd:88-1']['position'])

        self.assertTrue('axax:88-2-a-1' in referenced)
        self.assertEqual('axax', referenced['axax:88-2-a-1']['term'])
        self.assertEqual('88-2-a-1', referenced['axax:88-2-a-1']['reference'])
        self.assertEqual((1, 5), referenced['axax:88-2-a-1']['position'])

        self.assertTrue('awesome sauce:88-2-b-i-A' in referenced)
        self.assertEqual('awesome sauce',
                         referenced['awesome sauce:88-2-b-i-A']['term'])
        self.assertEqual('88-2-b-i-A',
                         referenced['awesome sauce:88-2-b-i-A']['reference'])
        self.assertEqual((13, 26),
                         referenced['awesome sauce:88-2-b-i-A']['position'])