Python correct_relative_path示例

编程语言: Python

命名空间/包名称: crawlmi.utils.url

方法/功能: correct_relative_path

hotexamples.com的示例: 4

Python correct_relative_path - 已找到4个示例。这些是从开源项目中提取的最受好评的crawlmi.utils.url.correct_relative_path现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： lxml_link_extractor.py 项目： Mimino666/crawlmi

 def _extract_links_from_html(self, html, response_encoding):
     links = []
     for el, attr, attr_val, pos in html.iterlinks():
         if self.tag_func(el.tag):
             if self.attr_func(attr):
                 try:
                     url = attr_val
                     if isinstance(url, unicode):
                         try:
                             url = to_str(url, response_encoding)
                         except UnicodeEncodeError:
                             # fallback
                             url = to_str(url, 'utf-8')
                     url = requote_url(url)
                     url = correct_relative_path(url)
                     text = el.text or u''
                     text = to_unicode(text, 'utf-8')
                     nofollow = (el.attrib.get('rel') == 'nofollow')
                 except Exception as e:
                     log.msg(
                         format='Error occurred while extracting links from %(url)s. Error (%(etype)s): %(error)s',
                         level=log.WARNING, url=html.base_url, etype=type(e),
                         error=e)
                 else:
                     links.append(Link(url=url, text=text, nofollow=nofollow))
     return links

示例#2

显示文件

 def test_correct_relative_path(self):
     self.assertEqual(
         correct_relative_path(
             'http://digineff.cz/art/sout/fotky-s-p-b-hem.html'),
         'http://digineff.cz/art/sout/fotky-s-p-b-hem.html')
     self.assertEqual(correct_relative_path('http://www.test.com/.'),
                      'http://www.test.com/')
     self.assertEqual(correct_relative_path('http://www.test.com/./'),
                      'http://www.test.com/')
     self.assertEqual(correct_relative_path('http://www.test.com/..'),
                      'http://www.test.com/')
     self.assertEqual(correct_relative_path('http://www.test.com/../'),
                      'http://www.test.com/')
     self.assertEqual(
         correct_relative_path('http://www.test.com/./.././..'),
         'http://www.test.com/')
     self.assertEqual(
         correct_relative_path('http://www.test.com/./a/./b/../c'),
         'http://www.test.com/a/c')

示例#3

显示文件

文件： test_utils_url.py 项目： Mimino666/crawlmi

 def test_correct_relative_path(self):
     self.assertEqual(correct_relative_path(
         'http://digineff.cz/art/sout/fotky-s-p-b-hem.html'),
         'http://digineff.cz/art/sout/fotky-s-p-b-hem.html')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/.'),
         'http://www.test.com/')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/./'),
         'http://www.test.com/')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/..'),
         'http://www.test.com/')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/../'),
         'http://www.test.com/')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/./.././..'),
         'http://www.test.com/')
     self.assertEqual(correct_relative_path(
         'http://www.test.com/./a/./b/../c'),
         'http://www.test.com/a/c')

示例#4

显示文件

 def _extract_links_from_html(self, html, response_encoding):
     links = []
     for e, a, l, p in html.iterlinks():
         if self.tag_func(e.tag):
             if self.attr_func(a):
                 try:
                     url = requote_url(
                         to_str(to_unicode(l, 'utf-8'), response_encoding))
                     url = correct_relative_path(url)
                     text = e.text or u''
                     text = to_unicode(text, 'utf-8')
                     nofollow = (e.attrib.get('rel') == 'nofollow')
                 except Exception as e:
                     log.msg(
                         format=
                         'Error occurred while extracting links from %(url)s. Error (%(etype)s): %(error)s',
                         level=log.WARNING,
                         url=html.base_url,
                         etype=type(e),
                         error=e)
                 else:
                     links.append(
                         Link(url=url, text=text, nofollow=nofollow))
     return links