Python unquote_markup示例

编程语言: Python

命名空间/包名称: scrapy.utils.markup

方法/功能: unquote_markup

hotexamples.com的示例: 8

Python unquote_markup - 已找到8个示例。这些是从开源项目中提取的最受好评的scrapy.utils.markup.unquote_markup现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： url.py 项目： zirconer/portia

 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     if text is None:
         return
     encoding = getattr(htmlpage, 'encoding', 'utf-8')
     text = text.encode(encoding)
     unquoted = unquote_markup(text, encoding=encoding)
     cleaned = strip_url(disallowed.sub('', unquoted))
     base = get_base_url(htmlpage).encode(encoding)
     base_url = strip_url(unquote_markup(base, encoding=encoding))
     joined = urljoin(base_url, cleaned)
     return safe_download_url(joined)

示例#2

显示文件

文件： processors.py 项目： txrproject/scrapy_plus

 def __call__(self, values, loader_context=None):
     values = super(Url, self).__call__(values)
     urls = []
     for value in values:
         if isinstance(value, (dict, list)):
             urls.append(value)
         value = _strip_url(unquote_markup(value))
         base = loader_context.get('baseurl', '')
         urls.append(urljoin(base, value))
     return urls

示例#3

显示文件

文件： processors.py 项目： datnamer/portia2code

 def __call__(self, values, loader_context=None):
     values = super(Url, self).__call__(values)
     urls = []
     for value in values:
         if isinstance(value, (dict, list)):
             urls.append(value)
         value = _strip_url(unquote_markup(value))
         base = loader_context.get('baseurl', '')
         urls.append(urljoin(base, value))
     return urls

示例#4

显示文件

    def test_unquote_markup(self):
        sample_txt1 = u"""<node1>hi, this is sample text with entities: &amp; &copy;
<![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
        sample_txt2 = u'<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
        sample_txt3 = u'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'

        # make sure it always return unicode
        assert isinstance(unquote_markup(sample_txt1.encode('latin-1')),
                          unicode)
        assert isinstance(unquote_markup(sample_txt2), unicode)

        self.assertEqual(
            unquote_markup(sample_txt1),
            u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1>""")

        self.assertEqual(
            unquote_markup(sample_txt2),
            u'<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')

        self.assertEqual(
            unquote_markup(sample_txt1 + sample_txt2),
            u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1><node2>blah&blahblahblahblah!&pound;moreblah<></node2>"""
        )

        self.assertEqual(
            unquote_markup(sample_txt3),
            u'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4'
        )

示例#5

显示文件

文件： test_utils_markup.py 项目： chzealot/scrapy

    def test_unquote_markup(self):
        sample_txt1 = u"""<node1>hi, this is sample text with entities: &amp; &copy;
<![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
        sample_txt2 = u'<node2>blah&amp;blah<![CDATA[blahblahblah!&pound;]]>moreblah&lt;&gt;</node2>'
        sample_txt3 = u'something&pound;&amp;more<node3><![CDATA[things, stuff, and such]]>what&quot;ever</node3><node4'

        # make sure it always return unicode
        assert isinstance(unquote_markup(sample_txt1.encode('latin-1')), unicode)
        assert isinstance(unquote_markup(sample_txt2), unicode)

        self.assertEqual(unquote_markup(sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1>""")

        self.assertEqual(unquote_markup(sample_txt2), u'<node2>blah&blahblahblahblah!&pound;moreblah<></node2>')

        self.assertEqual(unquote_markup(sample_txt1 + sample_txt2), u"""<node1>hi, this is sample text with entities: & \xa9
although this is inside a cdata! &amp; &quot;</node1><node2>blah&blahblahblahblah!&pound;moreblah<></node2>""")

        self.assertEqual(unquote_markup(sample_txt3), u'something\xa3&more<node3>things, stuff, and suchwhat"ever</node3><node4')

示例#6

显示文件

文件： images.py 项目： netconstructor/slybot

 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin_rfc(get_base_url(htmlpage), text)
     return safe_download_url(unquote_markup(joined))

示例#7

显示文件

 def adapt(self, text, htmlpage):
     text = text.encode(htmlpage.encoding)
     joined = urljoin(
         get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(
         unquote_markup(joined, encoding=htmlpage.encoding))

示例#8

显示文件

文件： url.py 项目： TimoC1982/portia

 def adapt(self, text, htmlpage=None):
     if htmlpage is None:
         return text
     text = text.encode(htmlpage.encoding)
     joined = urljoin(get_base_url(htmlpage).encode(htmlpage.encoding), text)
     return safe_download_url(unquote_markup(joined, encoding=htmlpage.encoding))