Python normalize_url示例

编程语言: Python

命名空间/包名称: crawler.connection

方法/功能: normalize_url

hotexamples.com的示例: 4

Python normalize_url - 已找到4个示例。这些是从开源项目中提取的最受好评的crawler.connection.normalize_url现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

 def __get_anchors(self):
     if not hasattr(self, '__anchors'):
         if not self.context:
             self.__anchors = self.raw_anchors
         else:
             self.__anchors = list()
             for anchor in self.raw_anchors:
                 if anchor.startswith('http://') or anchor.startswith(
                         'https://'):
                     self.__anchors.append(anchor)
                     continue
                 if '../' in anchor:
                     # TODO Process relative anchor and continue
                     continue
                 if anchor.startswith('/'):
                     uri_scheme, authority, port, path = normalize_url(
                         self.context)
                     anchor = "{0}{1}{2}".format(uri_scheme, authority,
                                                 anchor)
                     self.__anchors.append(anchor)
                     continue
                 if not anchor.startswith('/'):
                     uri_scheme, authority, port, path = normalize_url(
                         self.context)
                     pieces = path[1:].split('/')
                     pieces.pop()
                     if len(pieces) > 0: path = '/'.join(pieces)
                     if len(pieces) > 1: path += '/'
                     else: path = ''
                     anchor = "{0}{1}/{2}{3}".format(
                         uri_scheme, authority, path, anchor)
                     self.__anchors.append(anchor)
                     continue
     return self.__anchors

示例#2

显示文件

文件： parser.py 项目： earaujoassis/computer-science

 def __get_anchors(self):
     if not hasattr(self, '__anchors'):
         if not self.context:
             self.__anchors = self.raw_anchors
         else:
             self.__anchors = list()
             for anchor in self.raw_anchors:
                 if anchor.startswith('http://') or anchor.startswith('https://'):
                     self.__anchors.append(anchor)
                     continue
                 if '../' in anchor:
                     # TODO Process relative anchor and continue
                     continue
                 if anchor.startswith('/'):
                     uri_scheme, authority, port, path = normalize_url(self.context)
                     anchor = "{0}{1}{2}".format(uri_scheme, authority, anchor)
                     self.__anchors.append(anchor)
                     continue
                 if not anchor.startswith('/'):
                     uri_scheme, authority, port, path = normalize_url(self.context)
                     pieces = path[1:].split('/'); pieces.pop()
                     if len(pieces) > 0: path = '/'.join(pieces)
                     if len(pieces) > 1: path += '/'
                     else: path = ''
                     anchor = "{0}{1}/{2}{3}".format(uri_scheme, authority, path, anchor)
                     self.__anchors.append(anchor)
                     continue
     return self.__anchors

示例#3

显示文件

文件： support.py 项目： earaujoassis/computer-science

def can_access(url):
    from crawler.connection import normalize_url
    uri_scheme, authority, port, path = normalize_url(url)
    del uri_scheme, port
    robot = Robot(authority)
    value = robot.can_access(path)
    del robot
    return value

示例#4

显示文件

文件： support.py 项目： earaujoassis/computer-science

def can_access(url):
    from crawler.connection import normalize_url
    uri_scheme, authority, port, path = normalize_url(url)
    del uri_scheme, port
    robot = Robot(authority)
    value = robot.can_access(path)
    del robot
    return value