def feed(self, markup): encoding = self.soup.original_encoding try: self.parser = self.parser_for(encoding) self.parser.feed(markup) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))
def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) try: self.parser = self.parser_for(self.soup.original_encoding) self.parser.feed(data) while len(data) != 0: # Now call feed() on the rest of the data, chunk by chunk. data = markup.read(self.CHUNK_SIZE) if len(data) != 0: self.parser.feed(data) self.parser.close() except (UnicodeDecodeError, LookupError, etree.ParserError) as e: raise ParserRejectedMarkup(str(e))