def _add_code_ref(self, index, code_ref_element, page, load, s_code_references): text = self.xcoderef.get_text(code_ref_element) text = clean_breaks(text).strip() # Not significant if len(text) < 2 or text.isdigit(): return text_context = get_text_context(code_ref_element) sentence = get_sentence(code_ref_element, text, text_context) (text, kind_hint) = self._get_code_ref_kind(code_ref_element, text) xpath = load.tree.getpath(code_ref_element) for code in parse_single_code_references(text, kind_hint, self.kind_strategies, self.kinds): code.xpath = xpath code.file_path = page.file_path code.source = DOCUMENT_SOURCE code.index = index code.sentence = sentence code.paragraph = text_context code.project = self.document.project_release.project code.project_release = self.document.project_release code.save() s_code_references.append(code)
def test_get_text_context(self): encoding = cc.get_encoding(page_test2) parser = etree.HTMLParser(remove_comments=True, encoding=encoding) tree = etree.fromstring(page_test2, parser).getroottree() eu.clean_tree(tree) tt = tree.xpath("//tt[1]")[0] text_context = eu.get_text_context(tt) self.assertEqual("Hello World foobar. This is nice. Yo.", text_context)
def test_get_text_context(self): encoding = cc.get_encoding(page_test2) parser = etree.HTMLParser(remove_comments=True, encoding=encoding) tree = etree.fromstring(page_test2, parser).getroottree() eu.clean_tree(tree) tt = tree.xpath('//tt[1]')[0] text_context = eu.get_text_context(tt) self.assertEqual('Hello World foobar. This is nice. Yo.', text_context)
def test_get_sentence(self): encoding = cc.get_encoding(page_test2) parser = etree.HTMLParser(remove_comments=True, encoding=encoding) tree = etree.fromstring(page_test2, parser).getroottree() eu.clean_tree(tree) tt = tree.xpath("//tt[1]")[0] text_context = eu.get_text_context(tt) sentence = eu.get_sentence(tt, "foobar", text_context) self.assertEqual("Hello World foobar.", sentence) # Test when there are more than one match! code = tree.xpath("//code[2]")[0] text_context = eu.get_text_context(code) sentence = eu.get_sentence(code, "foo", text_context) self.assertEqual("This is foo.", sentence) # Test when there are more than one match, but wrong markup (sorry...) b = tree.xpath("//b[1]")[0] text_context = eu.get_text_context(b) sentence = eu.get_sentence(b, "foo", text_context) self.assertEqual("Hello World foo.", sentence)
def test_get_sentence(self): encoding = cc.get_encoding(page_test2) parser = etree.HTMLParser(remove_comments=True, encoding=encoding) tree = etree.fromstring(page_test2, parser).getroottree() eu.clean_tree(tree) tt = tree.xpath('//tt[1]')[0] text_context = eu.get_text_context(tt) sentence = eu.get_sentence(tt, 'foobar', text_context) self.assertEqual('Hello World foobar.', sentence) # Test when there are more than one match! code = tree.xpath('//code[2]')[0] text_context = eu.get_text_context(code) sentence = eu.get_sentence(code, 'foo', text_context) self.assertEqual('This is foo.', sentence) # Test when there are more than one match, but wrong markup (sorry...) b = tree.xpath('//b[1]')[0] text_context = eu.get_text_context(b) sentence = eu.get_sentence(b, 'foo', text_context) self.assertEqual('Hello World foo.', sentence)