def test_node_definitions_multiple_xml(self): """Find xml definitions which are separated by `and`""" stack = ParentStack().add(0, Node(label=['9999'])) winter = Node("(4) Cold and dreary mean winter.", label=['9999', '4']) winter.tagged_text = ('(4) <E T="03">Cold</E> and ' '<E T="03">dreary</E> mean winter.') inc, _ = Terms(None).node_definitions(winter, stack) self.assertEqual(len(inc), 2) cold, dreary = inc self.assertEqual(cold, Ref('cold', '9999-4', 4)) self.assertEqual(dreary, Ref('dreary', '9999-4', 13))
def test_node_definitions_xml_or(self): """Find xml definitions which are separated by `or`""" stack = ParentStack().add(0, Node(label=['9999'])) tamale = Node("(i) Hot tamale or tamale means nom nom", label=['9999', '4']) tamale.tagged_text = ('(i) <E T="03">Hot tamale</E> or <E T="03"> ' 'tamale</E> means nom nom ') inc, _ = Terms(None).node_definitions(tamale, stack) self.assertEqual(len(inc), 2) hot, tamale = inc self.assertEqual(hot, Ref('hot tamale', '9999-4', 4)) self.assertEqual(tamale, Ref('tamale', '9999-4', 18))
def test_excluded_offsets_blacklist_per_reg(self): t = Terms(None) t.scoped_terms['_'] = [ Ref('bourgeois', '12-Q-2', 0), Ref('consumer', '12-Q-3', 0)] settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig'] settings.IGNORE_DEFINITIONS_IN['12'] = ['consumer price index'] excluded = t.excluded_offsets( Node('There is a consumer price index', label=['12', '2'])) self.assertEqual([(11, 31)], excluded)
def test_node_definitions_xml_commas(self): """Find xml definitions which have commas separating them""" stack = ParentStack().add(0, Node(label=['9999'])) summer = Node("(i) Hot, humid, or dry means summer.", label=['9999', '4']) summer.tagged_text = ('(i) <E T="03">Hot</E>, <E T="03">humid</E>, ' 'or <E T="03">dry</E> means summer.') inc, _ = Terms(None).node_definitions(summer, stack) self.assertEqual(len(inc), 3) hot, humid, dry = inc self.assertEqual(hot, Ref('hot', '9999-4', 4)) self.assertEqual(humid, Ref('humid', '9999-4', 9)) self.assertEqual(dry, Ref('dry', '9999-4', 19))
def test_excluded_offsets(self): t = Terms(None) t.scoped_terms['_'] = [ Ref('term', 'lablab', 4), Ref('other', 'lablab', 8), Ref('more', 'nonnon', 1) ] self.assertEqual([(4, 8), (8, 13)], t.excluded_offsets(Node('Some text', label=['lablab']))) self.assertEqual([(1, 5)], t.excluded_offsets(Node('Other', label=['nonnon']))) self.assertEqual([], t.excluded_offsets(Node('Ab ab ab', label=['ababab'])))
def test_excluded_offsets_blacklist(self): t = Terms(None) t.scoped_terms['_'] = [Ref('bourgeois', '12-Q-2', 0)] settings.IGNORE_DEFINITIONS_IN['ALL'] = ['bourgeois pig'] excluded = t.excluded_offsets( Node('You are a bourgeois pig!', label=['12', '3'])) self.assertEqual([(10, 23)], excluded)
def test_process(self): """The process() method should both find terms in the requested node and order them by term name""" t = Terms( Node(children=[ Node("ABC5", children=[Node("child")], label=['ref1']), Node("AABBCC5", label=['ref2']), Node("ABC3", label=['ref3']), Node("AAA3", label=['ref4']), Node("ABCABC3", label=['ref5']), Node("ABCOTHER", label=['ref6']), Node("ZZZOTHER", label=['ref7']), ])) t.scoped_terms = { ("101", "22", "b", "2", "ii"): [Ref("abc", "ref1", 1), Ref("aabbcc", "ref2", 2)], ("101", "22", "b"): [ Ref("abc", "ref3", 3), Ref("aaa", "ref4", 4), Ref("abcabc", "ref5", 5) ], ("101", "22", "b", "2", "iii"): [Ref("abc", "ref6", 6), Ref("zzz", "ref7", 7)] } # Check that the return value is correct layer_el = t.process( Node("This has abc, aabbcc, aaa, abcabc, and zzz", label=["101", "22", "b", "2", "ii"])) self.assertEqual( [el['ref'] for el in layer_el], ['aaa:ref4', 'aabbcc:ref2', 'abc:ref1', 'abcabc:ref5'])
def test_node_definitions_exclusion(self): n1 = Node(u'“Bologna” is a type of deli meat', label=['111', '1']) n2 = Node(u'Let us not forget that the term “bologna” does not ' + 'include turtle meat', label=['111', '1', 'a']) t = Terms(Node(label=['111'], children=[n1, n2])) t.pre_process() stack = ParentStack() stack.add(1, Node('Definitions')) included, excluded = t.node_definitions(n1, stack) self.assertEqual([Ref('bologna', '111-1', 1)], included) self.assertEqual([], excluded) t.scoped_terms[('111', '1')] = included included, excluded = t.node_definitions(n2, stack) self.assertEqual([], included) self.assertEqual([Ref('bologna', '111-1-a', 33)], excluded)
def test_excluded_offsets_blacklist_word_boundaries(self): """If an exclusion begins/ends with word characters, the searching regex should make sure to only match on word boundaries""" settings.IGNORE_DEFINITIONS_IN['ALL'] = ['shed act', '(phrase)'] t = Terms(None) t.scoped_terms['_'] = [Ref('act', '28-6-d', 0)] excluded = t.excluded_offsets( Node("That's a watershed act", label=['28', '9'])) self.assertEqual([], excluded) excluded = t.excluded_offsets( Node("This has a '(phrase)' in it", label=['28', '9'])) self.assertNotEqual([], excluded)
def test_process_label_in_node(self): """Make sure we don't highlight definitions that are being defined in this paragraph.""" tree = Node(children=[ Node("Defining secret phrase.", label=['AB', 'a']), Node("Has secret phrase. Then some other content", label=['AB', 'b']) ], label=['AB']) t = Terms(tree) t.scoped_terms = {('AB', ): [Ref("secret phrase", "AB-a", 9)]} # Term is defined in the first child self.assertEqual([], t.process(tree.children[0])) self.assertEqual(1, len(t.process(tree.children[1])))
def test_is_exclusion(self): """There are certain indicators that a definition _should not_ be considered the definition of that term. For example, exclusions to a general definition should not replace the original. We can also explicitly ignore chunks of text when finding definitions..""" t = Terms(None) n = Node('ex ex ex', label=['1111', '2']) self.assertFalse(t.is_exclusion('ex', n)) t.scoped_terms = {('1111', ): [Ref('abc', '1', 0)]} self.assertFalse(t.is_exclusion('ex', n)) t.scoped_terms = {('1111', ): [Ref('ex', '1', 0)]} self.assertFalse(t.is_exclusion('ex', n)) n.text = u'Something something the term “ex” does not include potato' self.assertTrue(t.is_exclusion('ex', n)) t.scoped_terms = {('1111', ): [Ref('abc', '1', 0)]} self.assertFalse(t.is_exclusion('ex', n)) settings.IGNORE_DEFINITIONS_IN['1111'] = ['phrase with abc in it'] self.assertFalse(t.is_exclusion('abc', n)) n.text = "Now the node has a phrase with abc in it, doesn't it?" self.assertTrue(t.is_exclusion('abc', n))
def test_pre_process(self): noname_subpart = Node( '', label=['88', 'Subpart'], node_type=Node.EMPTYPART, children=[ Node(u"Definition. For the purposes of this part, " + u"“abcd” is an alphabet", label=['88', '1']) ]) xqxq_subpart = Node( '', title='Subpart XQXQ: The unreadable', label=['88', 'Subpart', 'XQXQ'], node_type=Node.SUBPART, children=[ Node( label=['88', '2'], children=[ Node(label=['88', '2', 'a'], text="Definitions come later for the purposes of " + "this section ", children=[ Node(u"“AXAX” means axe-cop", label=['88', '2', 'a', '1']) ]), Node( label=['88', '2', 'b'], children=[ Node( label=['88', '2', 'b', 'i'], children=[ Node( label=['88', '2', 'b', 'i', 'A'], text= u"Definition. “Awesome sauce” means " + "great for the purposes of this " + "paragraph", ) ]) ]) ]) ]) tree = Node(label=['88'], children=[noname_subpart, xqxq_subpart]) t = Terms(tree) t.pre_process() self.assertTrue(('88', ) in t.scoped_terms) self.assertEqual([Ref('abcd', '88-1', 44)], t.scoped_terms[('88', )]) self.assertTrue(('88', '2') in t.scoped_terms) self.assertEqual([Ref('axax', '88-2-a-1', 1)], t.scoped_terms[('88', '2')]) self.assertTrue(('88', '2', 'b', 'i', 'A') in t.scoped_terms) self.assertEqual([Ref('awesome sauce', '88-2-b-i-A', 13)], t.scoped_terms[('88', '2', 'b', 'i', 'A')]) # Check subparts are correct self.assertEqual({ None: ['1'], 'XQXQ': ['2'] }, dict(t.scope_finder.subpart_map)) # Finally, make sure the references are added referenced = t.layer['referenced'] self.assertTrue('abcd:88-1' in referenced) self.assertEqual('abcd', referenced['abcd:88-1']['term']) self.assertEqual('88-1', referenced['abcd:88-1']['reference']) self.assertEqual((44, 48), referenced['abcd:88-1']['position']) self.assertTrue('axax:88-2-a-1' in referenced) self.assertEqual('axax', referenced['axax:88-2-a-1']['term']) self.assertEqual('88-2-a-1', referenced['axax:88-2-a-1']['reference']) self.assertEqual((1, 5), referenced['axax:88-2-a-1']['position']) self.assertTrue('awesome sauce:88-2-b-i-A' in referenced) self.assertEqual('awesome sauce', referenced['awesome sauce:88-2-b-i-A']['term']) self.assertEqual('88-2-b-i-A', referenced['awesome sauce:88-2-b-i-A']['reference']) self.assertEqual((13, 26), referenced['awesome sauce:88-2-b-i-A']['position'])