def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_datastore_v3_stub() self.testbed.init_memcache_stub() self.testbed.init_user_stub() user_stub = self.testbed._test_stub_map.GetStub(testbed.USER_SERVICE_NAME) user_stub.SetOAuthUser(email=None) # no OAuth login self.testbed.init_taskqueue_stub() self.parser = html5parser.HTMLParser(strict=True) self.browser = Browser() self.fixtures = [ [u'Home', u'Goto [[한글 제목]]'], [u'A', u'Goto [[Home]]'], [u'한글 제목', u'Goto [[Home]]'], [u'Presentation', u'.pt\nHello'], [u'Blog post', u'.pub\nHello'], ] for title, body in self.fixtures: page = WikiPage.get_by_title(title) page.update_content(body, 0, None) # update again to create revisions page.update_content(body + u'!', 1, None)
def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_datastore_v3_stub() self.testbed.init_memcache_stub() self.testbed.init_user_stub() self.testbed.init_taskqueue_stub() self.parser = html5parser.HTMLParser(strict=True) self.browser = Browser()
def html2text(html): """Render html as text, convert line breaks to spaces.""" if not ishtml(html): return re.sub(r'\s+', ' ', html.strip()) parser = html5parser.HTMLParser(namespaceHTMLElements=False) with warnings.catch_warnings(): warnings.simplefilter('ignore') html = html5parser.fromstring(html, parser=parser) for b in BLOCKTAGS: for e in html.xpath(f'//{b}'): e.text = ' ' + e.text if e.text else '' if len(e) > 0: lc = e[-1] lc.tail = (lc.tail if lc.tail else '') + ' ' else: e.text = e.text + ' ' text = tostring(html, method='text', encoding='utf-8') return re.sub(r'\s+', ' ', text.decode().strip())
def setUp(self): super(HTML5ValidationTest, self).setUp() # no OAuth login user_stub = self.testbed._test_stub_map.GetStub(testbed.USER_SERVICE_NAME) user_stub.SetOAuthUser(email=None) self.parser = html5parser.HTMLParser(strict=True) self.login('*****@*****.**', 'ak') self.browser = Browser() self.fixtures = [ [u'Home', u'Goto [[한글 제목]]'], [u'A', u'Goto [[Home]]'], [u'한글 제목', u'Goto [[Home]]'], [u'Presentation', u'.pt\nHello'], [u'Blog post', u'.pub\nHello'], ] for title, body in self.fixtures: page = WikiPage.get_by_title(title) page.update_content(body, 0, None, user=self.get_cur_user()) # update again to create revisions page.update_content(body + u'!', 1, None, user=self.get_cur_user())
def repair_html(html_str): parser = html5parser.HTMLParser(namespaceHTMLElements=False) parsed = html5parser.fromstring(html_str, guess_charset=False, parser=parser) return html.tostring(parsed, encoding='unicode')
def __init__(self): self.parser = html5parser.HTMLParser(strict=True) self.res = None self.tree = None
def setUp(self): super(RevisionTest, self).setUp() self.parser = html5parser.HTMLParser(strict=True) self.login('*****@*****.**', 'ak') self.browser = Browser()