Python _unquote 예제들, urllib._unquote Python 예제들

예제 #1

0

파일 보기

파일: URLBase.py 프로젝트: swipswaps/SickGear

    def unquote(content, encoding='utf-8', errors='replace'):
        """
        Replace %xx escapes by their single-character equivalent. The optional
        encoding and errors parameters specify how to decode percent-encoded
        sequences.

        Wrapper to Python's unquote while remaining compatible with both
        Python 2 & 3 since the reference to this function changed between
        versions.

        Note: errors set to 'replace' means that invalid sequences are
              replaced by a placeholder character.

        Args:
            content (str): The quoted URI string you wish to unquote
            encoding (:obj:`str`, optional): encoding type
            errors (:obj:`str`, errors): how to handle invalid character found
                in encoded string (defined by encoding)

        Returns:
            str: The unquoted URI string
        """
        if not content:
            return ''

        try:
            # Python v3.x
            return _unquote(content, encoding=encoding, errors=errors)

        except TypeError:
            # Python v2.7
            return _unquote(content)

예제 #2

0

파일 보기

파일: textextraction.py 프로젝트: ankurgupta8907/info_retrieval_project

def textExtraction(wikidocument, lang):
    #extract the body part
    body=_body_re.search(wikidocument).group(1)
    
    #list internal links
    internal_links=[(lang, _unquote(url)) for (url, document_name) in _internal_link.findall(body)]
    
    #list interlanguage links
    interlanguage_links=[(lang_ref, _unquote(url)) for (lang_ref, url) in _interlanguage_link.findall(body)]
    
    
    #replace links
    body=_link_re.sub((lambda match: match.group(2)), body)
    
    #supress table toc
    body=_table_toc_re.sub("\n", body)
    
    #supress imgages
    body=_img_re.sub("", body)
    
    #supress scripts
    body=_script_re.sub("", body)
    
    #supress citations
    body=_cite_re.sub("", body)
    
    
    #supress sups
    body=_sup_re.sub((lambda match: match.group(1)), body)
            
    #supress tables
    body=_table_re.sub("\n", body)
            
    ##supress everything after "see also"
    #see_also_re=_re.compile("<h2><span class=\"mw-headline\" id=\"Voir_aussi\">Voir aussi</span></h2>", _re.DOTALL)
    #match=see_also_re.search(body)
    #if match:
        #body=body[:match.start()]
            
    #only keeps p and hx
    body="\n".join(_p_and_hx_re.findall(body))
    
    #remove (formating) tags
    body=_tags_re.sub("", body)
    
    #the following is coding dependant
    body=body.decode("utf8")
    
    #split lines
    body=_end_line_re.sub((lambda match: match.group(0)+"\n"), body)
    
    #encoding normalization
    body=_entity_re.sub(_entity_callback, body)
    
    
    
    return (body.encode("utf8"), internal_links, interlanguage_links)

예제 #3

0

파일 보기

def textExtraction(wikidocument, lang):
    #extract the body part
    body = _body_re.search(wikidocument).group(1)

    #list internal links
    internal_links = [(lang, _unquote(url))
                      for (url, document_name) in _internal_link.findall(body)]

    #list interlanguage links
    interlanguage_links = [(lang_ref, _unquote(url))
                           for (lang_ref,
                                url) in _interlanguage_link.findall(body)]

    #replace links
    body = _link_re.sub((lambda match: match.group(2)), body)

    #supress table toc
    body = _table_toc_re.sub("\n", body)

    #supress imgages
    body = _img_re.sub("", body)

    #supress scripts
    body = _script_re.sub("", body)

    #supress citations
    body = _cite_re.sub("", body)

    #supress sups
    body = _sup_re.sub((lambda match: match.group(1)), body)

    #supress tables
    body = _table_re.sub("\n", body)

    ##supress everything after "see also"
    #see_also_re=_re.compile("<h2><span class=\"mw-headline\" id=\"Voir_aussi\">Voir aussi</span></h2>", _re.DOTALL)
    #match=see_also_re.search(body)
    #if match:
    #body=body[:match.start()]

    #only keeps p and hx
    body = "\n".join(_p_and_hx_re.findall(body))

    #remove (formating) tags
    body = _tags_re.sub("", body)

    #the following is coding dependant
    body = body.decode("utf8")

    #split lines
    body = _end_line_re.sub((lambda match: match.group(0) + "\n"), body)

    #encoding normalization
    body = _entity_re.sub(_entity_callback, body)

    return (body.encode("utf8"), internal_links, interlanguage_links)

예제 #4

0

파일 보기

파일: NotifyBase.py 프로젝트: drziskind/nzb-notify

    def unquote(content, encoding='utf-8', errors='replace'):
        """
        common unquote function

        """
        if not content:
            return ''

        try:
            # Python v3.x
            return _unquote(content, encoding=encoding, errors=errors)

        except TypeError:
            # Python v2.7
            return _unquote(content)

예제 #5

0

파일 보기

파일: common.py 프로젝트: floppym/oauthlib

def unquote(s):
    s = _unquote(s)
    # PY3 always returns unicode.  PY2 seems to always return what you give it,
    # which differs from quote's behavior.  Just to be safe, make sure it is
    # unicode before we return.
    if isinstance(s, bytes_type):
        s = s.decode('utf-8')
    return s

예제 #6

0

파일 보기

파일: common.py 프로젝트: aqeelahamad/searching

def unquote(s):
    s = _unquote(s)
    # PY3 always returns unicode.  PY2 seems to always return what you give it,
    # which differs from quote's behavior.  Just to be safe, make sure it is
    # unicode before we return.
    if isinstance(s, bytes_type):
        s = s.decode('utf-8')
    return s

예제 #7

0

파일 보기

파일: urls.py 프로젝트: ISIS2503/201810_02_architack

def unquote(s):
    return to_unicode(_unquote(s))

예제 #8

0

파일 보기

파일: site_helper.py 프로젝트: npk/zarkpy

def unquote(string):
    assert (type(string) in [unicode, str])
    return _unquote(string.encode('utf-8')) if isinstance(
        string, unicode) else _unquote(string)

예제 #9

0

파일 보기

파일: site_helper.py 프로젝트: ajiexw/old-zarkpy

def getUrlParams(url=None):
    if url is None:
        url = getEnv('REQUEST_URI')
    url = urlparse(url)
    return dict([(part.split('=')[0], _unquote(part.split('=')[1])) for part in url[4].split('&') if len(part.split('=')) == 2])

예제 #10

0

파일 보기

파일: site_helper.py 프로젝트: ajiexw/old-zarkpy

def unquote(string):
    if type(string) is unicode:
        string = string.encode('utf-8')
    return _unquote(string)

예제 #11

0

파일 보기

파일: chm_input.py 프로젝트: pkuhzx/calibre

 def unquote(x):
     if isinstance(x, unicode_type):
         x = x.encode('utf-8')
     return _unquote(x).decode('utf-8')

예제 #12

0

파일 보기

파일: compat.py 프로젝트: guix77/weboob

 def unquote(s):
     s = _reencode(s)
     return _unquote(s).decode('utf-8')

예제 #13

0

파일 보기

def unquote(*l):
    return tuple(_unquote(unicodeToStr(s))
                 for s in l) if len(l) != 1 else _unquote(unicodeToStr(l[0]))

예제 #14

0

파일 보기

파일: common.py 프로젝트: MwzkQmuUZkFLbXm/me

def unquote(s):
    return unicode(_unquote(s.encode("utf-8")), "utf-8")

예제 #15

0

파일 보기

# coding=utf-8
# Copyright 2008-9, Sean B. Palmer, inamidst.com
# Copyright 2012, Elsie Powell, embolalia.com
# Licensed under the Eiffel Forum License 2.
from __future__ import unicode_literals, absolute_import, print_function, division

import re
from sopel import web
from sopel.module import commands, example
import requests
import xmltodict
import sys

if sys.version_info.major < 3:
    from urllib import quote_plus, unquote as _unquote
    unquote = lambda s: _unquote(s.encode('utf-8')).decode('utf-8')
else:
    from urllib.parse import quote_plus, unquote


def formatnumber(n):
    """Format a number with beautiful commas."""
    parts = list(str(n))
    for i in range((len(parts) - 3), 0, -3):
        parts.insert(i, ',')
    return ''.join(parts)


r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"')

예제 #16

0

파일 보기

def unquote(url):
    if PY3:
        return _unquote(u(url), encoding="utf-8")
    return _unquote(u(url)).decode("utf-8")

예제 #17

0

파일 보기

파일: search.py 프로젝트: sopel-irc/sopel

 def unquote(s):
     return _unquote(s.encode('utf-8')).decode('utf-8')

예제 #18

0

파일 보기

파일: site_helper.py 프로젝트: alliadt/zarkpy

def unquote(string):
    assert(type(string) in [unicode, str])
    return _unquote(string.encode('utf-8')) if isinstance(string, unicode) else _unquote(string)

예제 #19

0

파일 보기

def unquote(s):
    return networkString(_unquote(nativeString(s)))

예제 #20

0

파일 보기

def unquote(s):
    return unicode(_unquote(s.encode("utf-8")), "utf-8")

예제 #21

0

파일 보기

파일: search.py 프로젝트: heroku-miraheze/Exambot-Source

 def unquote(s):
     return _unquote(s.encode('utf-8')).decode('utf-8')

예제 #22

0

파일 보기

파일: compat.py 프로젝트: cleitner/WeasyPrint

 def unquote(data, encoding='utf-8', errors='replace'):
     return _unquote(data).encode('latin1').decode(encoding, errors)

예제 #23

0

파일 보기

def getUrlParams(url=None):
    if url is None: url = getEnv('REQUEST_URI')
    url = urlparse(url)
    return dict([(part.split('=')[0], _unquote(part.split('=')[1]))
                 for part in url[4].split('&') if len(part.split('=')) == 2])

예제 #24

0

파일 보기

파일: search.py 프로젝트: neonobjclash/sopel

# coding=utf-8
# Copyright 2008-9, Sean B. Palmer, inamidst.com
# Copyright 2012, Elsie Powell, embolalia.com
# Licensed under the Eiffel Forum License 2.
from __future__ import unicode_literals, absolute_import, print_function, division

import re
import sys

if sys.version_info.major < 3:
    from urllib import unquote as _unquote
    unquote = lambda s: _unquote(s.encode('utf-8')).decode('utf-8')
else:
    from urllib.parse import unquote

import requests
import xmltodict

from sopel import web
from sopel.module import commands, example


def formatnumber(n):
    """Format a number with beautiful commas."""
    parts = list(str(n))
    for i in range((len(parts) - 3), 0, -3):
        parts.insert(i, ',')
    return ''.join(parts)


r_bing = re.compile(r'<h2(?: class=" b_topTitle")?><a href="([^"]+)"')

예제 #25

0

파일 보기

파일: site_helper.py 프로젝트: duoduo369/zarkpy

def unquote(*l):
    return tuple(_unquote(unicodeToStr(s)) for s in l) if len(l) != 1 else _unquote(unicodeToStr(l[0]))

예제 #26

0

파일 보기

파일: compat.py 프로젝트: cleitner/WeasyPrint

 def unquote_to_bytes(data):
     if isinstance(data, unicode):
         data = data.encode('ascii')
     return _unquote(data)

예제 #27

0

파일 보기

파일: postfix.py 프로젝트: Arshdeep10/scrapy

def unquote(s):
    if isinstance(s, bytes):
        s = s.decode("ascii")
    quoted = _unquote(s)
    return quoted.encode("ascii")

예제 #28

0

파일 보기

파일: chm_input.py 프로젝트: artbycrunk/calibre

 def unquote(x):
     if isinstance(x, unicode):
         x = x.encode('utf-8')
     return _unquote(x).decode('utf-8')

예제 #29

0

파일 보기

파일: postfix.py 프로젝트: JohnDoes95/project_parser

def unquote(s):
    return networkString(_unquote(nativeString(s)))

예제 #30

0

파일 보기

파일: compat.py 프로젝트: peopledoc/multipart-reader

 def unquote(value, encoding, errors):
     return _unquote(value).decode(encoding, errors)