def __init__(self, url, content): Parser.__init__(self, url, content) # convert hexadecimal html character into decimal notation massaged_html = re.sub(r'&#x([^;]+);', lambda m: '&#%d;' % int(m.group(1), 16), content) # Convert html characters to normal characters self._massaged_soup = BeautifulSoup(massaged_html, features='html') # Instantiate BeautifulSoup without converting html characters to normal characters self._unmassaged_soup = BeautifulSoup(content, features='html')
def __init__(self, url, content): Parser.__init__(self, url, content) self._soup = BeautifulSoup(content, features="html")