Exemplos de UnicodeDammit.encode em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: bs4

Classe / Tipo: UnicodeDammit

Método / Função: encode

Exemplos em hotexamples.com: 2

UnicodeDammit.encode em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de bs4.UnicodeDammit.encode em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

UnicodeDammit(30)

detwingle(21)

strip(10)

split(8)

replace(4)

lower(3)

splitlines(3)

encode(2)

startswith(2)

append(1)

decode(1)

endswith(1)

lstrip(1)

rstrip(1)

translate(1)

xpath(1)

Métodos Frequentes

UnicodeDammit (30)

detwingle (21)

strip (10)

split (8)

replace (4)

lower (3)

splitlines (3)

encode (2)

startswith (2)

append (1)

Métodos Frequentes

decode (1)

endswith (1)

lstrip (1)

rstrip (1)

translate (1)

xpath (1)

Exemplo n.º 1

0

Exibir arquivo

def page_changed(fp: str, new_content: str) -> Tuple[bool, str, str]: """ Compares hash of html file content to new file content. Returns: bool: whether change str: new content (formatted) str: encoding code """ # read binary of existing html and use it to get probable encoding try: with open(fp, "rb") as file: file_bytes = file.read() orig_encoding = chardet.detect(file_bytes)["encoding"] # read existing html using the above encoding and generate a hash with open(fp, encoding=orig_encoding) as file: file_content = UnicodeDammit(file.read()).unicode_markup old_hash = hashlib.sha256( file_content.encode(orig_encoding)).hexdigest() # generate hash from new html formatted_new_content = UnicodeDammit(new_content).unicode_markup new_hash = hashlib.sha256( formatted_new_content.encode(orig_encoding)).hexdigest() print(old_hash, new_hash) return old_hash != new_hash, formatted_new_content, orig_encoding except FileNotFoundError: return True, UnicodeDammit(new_content).unicode_markup, "utf8"

Exemplo n.º 2

0

Exibir arquivo

def getURL(self, url, uid): if not os.path.isfile(str(uid) + ".html"): with open(str(uid) + ".html", "w") as fptr: try: response = self.opener.open(url) except: return None data = response.read() try: decoded = UnicodeDammit( gzip.GzipFile(fileobj=io.BytesIO(data)).read(), ["windows-1252"], smart_quotes_to="html").unicode_markup except: decoded = UnicodeDammit( data, ["windows-1252"], smart_quotes_to="html").unicode_markup decoded = decoded.replace(u"%20", u" ").replace( u"\u00c2", u" ").replace(u"\xe2€™", u"\'").replace( u"\xe2€&oelig;", u"\"").replace(u"\xe2€", "\"").replace(u"\"“", "-") #.replace(u"\xe2\x80\x9c", u"\"").replace(u"\xe2\x80\x9d", u"\"").replace(u"\xc3\xb3", u"\u00f3").replace(u"\xc3\xad", u"\u00ed").replace(u"\xe2\x20\xac\x21\x22", u"\'").replace(u"\xe2\x20\xac\x01\x53", u"\"").replace(u"\xe2\x20\xac", u"\"").replace(u"\xe2\x20\xac\x20\x1c", u" - ").replace(u"\xc3", u"\u00e9").replace(u"\x00\xc2", u" ") print >> fptr, decoded.encode('utf8') parser = etree.HTMLParser(target=Parser()) with open(str(uid) + ".html", 'r') as fptr: data = fptr.read() parser.feed(data.decode('utf8')) return parser.close()