Python unicode_to_str примеры, w3lib.util.unicode_to_str Python примеры использования

Пример #1

0

Показать файл

Файл: url.py Проект: 668Jerry/rations

def urljoin_rfc(base, ref, encoding='utf-8'):
    r"""
    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urlparse.urljoin`` instead.

    Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.

    >>> import w3lib.url
    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
    'http://www.example.com/otherpath/index2.html'
    >>>

    >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm')
    'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
    >>>


    """

    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)

    str_base = unicode_to_str(base, encoding)
    str_ref = unicode_to_str(ref, encoding)
    return moves.urllib.parse.urljoin(str_base, str_ref)

Пример #2

0

Показать файл

Файл: url.py Проект: hbradlow/ggAPI

def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))

Пример #3

0

Показать файл

Файл: url.py Проект: nasirsphi/w3lib

def urljoin_rfc(base, ref, encoding="utf-8"):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), unicode_to_str(ref, encoding))

Пример #4

0

Показать файл

def urljoin_rfc(base, ref, encoding='utf-8'):
    """Same as urlparse.urljoin but supports unicode values in base and ref
    parameters (in which case they will be converted to str using the given
    encoding).

    Always returns a str.
    """
    warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
        DeprecationWarning)
    return urlparse.urljoin(unicode_to_str(base, encoding), \
        unicode_to_str(ref, encoding))

Пример #5

0

Показать файл

 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes,
                              key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"
                                               ] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"],
                                     old_index["unit"])
             if get_index_class(old_index["name"]) in [
                     "commodity", "stock"
             ] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] /
                                          self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)

Пример #6

0

Показать файл

def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(replace_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Пример #7

0

Показать файл

Файл: form.py Проект: JodeZer/moodle-scraper

def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Пример #8

0

Показать файл

Файл: html.py Проект: fubuki/w3lib

def get_meta_refresh(text, baseurl='', encoding='utf-8'):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple ``(interval, url)`` where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, ``(None, None)`` is returned.

    """

    if six.PY2:
        baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print(text)
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group('int'))
        url = safe_url_string(m.group('url').strip(' "\''), encoding)
        url = moves.urllib.parse.urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Пример #9

0

Показать файл

Файл: form.py Проект: Quebec-Python/web-scraping-101

def encode_multipart(data):
    """Encode the given data to be used in a multipart HTTP POST. Data is a
    where keys are the field name, and values are either strings or tuples
    (filename, content) for file uploads.

    This code is based on distutils.command.upload.

    Return (body, boundary) tuple where ``body`` is binary body value,
    and ``boundary`` is the boundary used (as native string).
    """
    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Пример #10

0

Показать файл

Файл: form.py Проект: 668Jerry/rations

def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning
    )

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Пример #11

0

Показать файл

def encode_multipart(data):
    r"""

    .. warning::

        This function is deprecated and will be removed in future.
        Please use ``urllib3.filepost.encode_multipart_formdata`` instead.

    Encode the given data to be used in a multipart HTTP POST.

    `data` is a dictionary where keys are the field name, and values are
    either strings or tuples as `(filename, content)` for file uploads.

    This code is based on :class:`distutils.command.upload`.

    Returns a `(body, boundary)` tuple where `body` is binary body value,
    and `boundary` is the boundary used (as native string).

    >>> import w3lib.form
    >>> w3lib.form.encode_multipart({'key': 'value'})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key"\r\n\r\nvalue\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'key1': 'value1', 'key2': 'value2'})   # doctest: +SKIP
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key2"\r\n\r\nvalue2\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="key1"\r\n\r\nvalue1\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>> w3lib.form.encode_multipart({'somekey': ('path/to/filename', b'\xa1\xa2\xa3\xa4\r\n\r')})
    ('\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254\r\nContent-Disposition: form-data; name="somekey"; filename="path/to/filename"\r\n\r\n\xa1\xa2\xa3\xa4\r\n\r\r\n----------------GHSKFJDLGDS7543FJKLFHRE75642756743254--\r\n', '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254')
    >>>

    """

    warnings.warn(
        "`w3lib.form.encode_multipart` function is deprecated and "
        "will be removed in future releases. Please use "
        "`urllib3.filepost.encode_multipart_formdata` instead.",
        DeprecationWarning)

    # Build up the MIME payload for the POST data
    boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
    sep_boundary = b'\r\n--' + boundary.encode('ascii')
    end_boundary = sep_boundary + b'--'
    body = BytesIO()
    for key, value in data.items():
        title = u'\r\nContent-Disposition: form-data; name="%s"' % key
        # handle multiple entries for the same name
        if type(value) != type([]):
            value = [value]
        for value in value:
            if type(value) is tuple:
                title += u'; filename="%s"' % value[0]
                value = value[1]
            else:
                value = unicode_to_str(
                    value)  # in distutils: str(value).encode('utf-8')
            body.write(sep_boundary)
            body.write(title.encode('utf-8'))
            body.write(b"\r\n\r\n")
            body.write(value)
    body.write(end_boundary)
    body.write(b"\r\n")
    return body.getvalue(), boundary

Пример #12

0

Показать файл

Файл: html.py Проект: xacprod/ve1

def get_base_url(text, baseurl='', encoding='utf-8'):
    """Return the base url if declared in the given html text, relative to the
    given base url. If no base url is found, the given base url is returned
    """
    text = str_to_unicode(text, encoding)
    baseurl = unicode_to_str(baseurl, encoding)
    m = _baseurl_re.search(text)
    if m:
        baseurl = urljoin(baseurl, m.group(1).encode(encoding))
    return safe_url_string(baseurl)

Пример #13

0

Показать файл

def safe_url_string(url, encoding='utf8'):
    """Convert the given url into a legal URL by escaping unsafe characters
    according to RFC-3986.

    If a unicode url is given, it is first converted to str using the given
    encoding (which defaults to 'utf-8'). When passing a encoding, you should
    use the encoding of the original page (the page from which the url was
    extracted from).

    Calling this function on an already "safe" url will return the url
    unmodified.

    Always returns a str.
    """
    s = unicode_to_str(url, encoding)
    return urllib.quote(s,  _safe_chars)

Пример #14

0

Показать файл

    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)

Пример #15

0

Показать файл

Файл: printers.py Проект: biddyweb/finance

    def __repr__(self):
        self.date = self.date or time.ctime()
        ret = u"%s\n" % self.date
        ret += u"-" * len(self.date) + "\n"
        ordered_items = sorted(self.items, key=lambda x: x["name"])
        for item in ordered_items:
            ret += self.lformat % item
            if self.olformat:
                old_item = self.items_old.get(item["name"])
                if old_item:
                    ts = old_item["timestamp"]
                    date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
                    if self.show_year:
                        date += " %s" % ts.year
                    ret += u" [%s: " % date + self.olformat % old_item + "]"
            ret += "\n"

        return unicode_to_str(ret)

Пример #16

0

Показать файл

Файл: printers.py Проект: biddyweb/finance

 def __repr__(self):
     self.date = self.date or time.ctime()
     ret = u"%s\n" % self.date
     ret += u"-" * len(self.date) + "\n"
     ordered_indexes = sorted(self.indexes, key=lambda x: get_index_order(x["name"]))
     for index in ordered_indexes:
         ret += u"%(name)s: %(value)s%(unit)s" % index
         if get_index_class(index["name"]) in ["commodity", "stock"] and self.gold_price:
             ret += u"(%.5fozAu)" % (index["value"] / self.gold_price)
         old_index = self.indexes_old.get(index["name"])
         if old_index:
             ts = old_index["timestamp"]
             date = "%s %s" % (_MONTHS[ts.month - 1], ts.day)
             if self.show_year:
                 date += " %s" % ts.year
             ret += u" [%s: %s%s" % (date, old_index["value"], old_index["unit"])
             if get_index_class(old_index["name"]) in ["commodity", "stock"] and self.gold_price_old:
                 ret += u" (%.5fozAu)" % (old_index["value"] / self.gold_price_old)
             ret += "]"
         ret += u"\n"
     return unicode_to_str(ret)

Пример #17

0

Показать файл

def jsonrpc_client_call(url, method, *args, **kwargs):
    """Execute a JSON-RPC call on the given url"""
    if args and kwargs:
        raise ValueError(
            "Pass *args or **kwargs but not both to jsonrpc_client_call")
    req = {
        'jsonrpc': '2.0',
        'method': method,
        'params': args or kwargs,
        'id': 1
    }
    data = unicode_to_str(json.dumps(req))
    body = urllib.request.urlopen(url, data).read()
    res = json.loads(body.decode('utf-8'))
    if 'result' in res:
        return res['result']
    elif 'error' in res:
        er = res['error']
        raise JsonRpcError(er['code'], er['message'], er['data'])
    else:
        msg = "JSON-RPC response must contain 'result' or 'error': %s" % res
        raise ValueError(msg)

Пример #18

0

Показать файл

Файл: html.py Проект: AaronMT/spade

def get_meta_refresh(text, baseurl="", encoding="utf-8"):
    """Return  the http-equiv parameter of the HTML meta element from the given
    HTML text and return a tuple (interval, url) where interval is an integer
    containing the delay in seconds (or zero if not present) and url is a
    string with the absolute url to redirect.

    If no meta redirect is found, (None, None) is returned.
    """
    baseurl = unicode_to_str(baseurl, encoding)
    try:
        text = str_to_unicode(text, encoding)
    except UnicodeDecodeError:
        print text
        raise
    text = remove_comments(remove_entities(text))
    m = _meta_refresh_re.search(text)
    if m:
        interval = float(m.group("int"))
        url = safe_url_string(m.group("url").strip(" \"'"))
        url = urljoin(baseurl, url)
        return interval, url
    else:
        return None, None

Пример #19

0

Показать файл

def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))

Пример #20

0

Показать файл

Файл: url.py Проект: StardustZhou/crawl-frontier

def parse_url(url, encoding=None):
    """Return urlparsed url from the given argument (which could be an already
    parsed url)
    """
    return url if isinstance(url, urlparse.ParseResult) else \
        urlparse.urlparse(unicode_to_str(url, encoding))

Пример #21

0

Показать файл

Файл: test_util.py Проект: scrapy/w3lib

 def test_deprecation(self):
     with deprecated_call():
         unicode_to_str("")

Python unicode_to_str примеры использования