Python unquote_markup примеры использования

Язык программирования: Python

Пространство имен/Пакет: scrapy.utils.markup

Метод/Функция: unquote_markup

Примеров на hotexamples.com: 8

Python unquote_markup - 8 примеров найдено. Это лучшие примеры Python кода для scrapy.utils.markup.unquote_markup, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: url.py Проект: zirconer/portia

 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     if text is None:
         return
     encoding = getattr(htmlpage, 'encoding', 'utf-8')
     text = text.encode(encoding)
     unquoted = unquote_markup(text, encoding=encoding)
     cleaned = strip_url(disallowed.sub('', unquoted))
     base = get_base_url(htmlpage).encode(encoding)
     base_url = strip_url(unquote_markup(base, encoding=encoding))
     joined = urljoin(base_url, cleaned)
     return safe_download_url(joined)

Пример #2

Показать файл

Файл: processors.py Проект: txrproject/scrapy_plus

 def __call__(self, values, loader_context=None):
     values = super(Url, self).__call__(values)
     urls = []
     for value in values:
         if isinstance(value, (dict, list)):
             urls.append(value)
         value = _strip_url(unquote_markup(value))
         base = loader_context.get('baseurl', '')
         urls.append(urljoin(base, value))
     return urls

Пример #3

Показать файл

Файл: processors.py Проект: datnamer/portia2code

 def __call__(self, values, loader_context=None):
     values = super(Url, self).__call__(values)
     urls = []
     for value in values:
         if isinstance(value, (dict, list)):
             urls.append(value)
         value = _strip_url(unquote_markup(value))
         base = loader_context.get('baseurl', '')
         urls.append(urljoin(base, value))
     return urls

Пример #4

Показать файл

    def test_unquote_markup(self):
        sample_txt1 = u"""<node1>hi, this is sample text with entities: &amp; &copy;
<![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
        sample_txt2 = u'<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
        sample_txt3 = u'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'

        # make sure it always return unicode
        assert isinstance(unquote_markup(sample_txt1.encode('latin-1')),
                          unicode)
        assert isinstance(unquote_markup(sample_txt2), unicode)

        self.assertEqual(
            unquote_markup(sample_txt1),
            u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1>""")

        self.assertEqual(
            unquote_markup(sample_txt2),
            u'<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')

        self.assertEqual(
            unquote_markup(sample_txt1 + sample_txt2),
            u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1><node2>blah&blahblahblahblah!&pound;moreblah<></node2>"""
        )

        self.assertEqual(
            unquote_markup(sample_txt3),
            u'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4'
        )

Пример #5

Показать файл

Файл: test_utils_markup.py Проект: chzealot/scrapy

    def test_unquote_markup(self):
        sample_txt1 = u"""<node1>hi, this is sample text with entities: &amp; &copy;
<![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
        sample_txt2 = u'<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
        sample_txt3 = u'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'

        # make sure it always return unicode
        assert isinstance(unquote_markup(sample_txt1.encode('latin-1')), unicode)
        assert isinstance(unquote_markup(sample_txt2), unicode)

        self.assertEqual(unquote_markup(sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1>""")

        self.assertEqual(unquote_markup(sample_txt2), u'<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')

        self.assertEqual(unquote_markup(sample_txt1 + sample_txt2), u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1><node2>blah&blahblahblahblah!&pound;moreblah<></node2>""")

        self.assertEqual(unquote_markup(sample_txt3), u'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4')

Пример #6

Показать файл

Файл: images.py Проект: netconstructor/slybot

 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin_rfc(get_base_url(htmlpage), text)
     return safe_download_url(unquote_markup(joined))

Пример #7

Показать файл

 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin(
         get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(
         unquote_markup(joined, encoding=htmlpage.encoding))

Пример #8

Показать файл

Файл: url.py Проект: TimoC1982/portia

 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     text = text.encode(htmlpage.encoding)
     joined = urljoin(get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(unquote_markup(joined, encoding=htmlpage.encoding))