示例#1
0
def rewrite_urls(origin_url, urls):
    origin_pack = urlparse.urlsplit(origin_url)
    for u in urls:
        # kill breaks
        if u:
            u = re.sub("(\n|\t)", "", u)

        pack = urlparse.urlsplit(u)
        (scheme, netloc, path, query, fragment) = pack

        # try to rewrite scheme
        scheme = rewrite_scheme(pack.scheme)

        # rewrite netloc to include credentials
        if origin_pack.username and pack.hostname == origin_pack.hostname:
            netloc = assemble_netloc(origin_pack.username,
                                     origin_pack.password, pack.hostname, pack.port)

        # reassemble into url
        new_u = urlparse.urlunsplit((scheme, netloc, path, query, None))

        # no scheme or netloc, it's a path on-site
        if not scheme and not netloc and (path or query):
            path_query = urlparse.urlunsplit(('', '', path, query, ''))
            new_u = urlparse.urljoin(origin_url, path_query)

        # quote spaces
        new_u = new_u.replace(" ", "%20")
        if new_u:
            yield new_u
示例#2
0
    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
        if os.environ.get("SILENT_REDIRECT"):
            return urllib.FancyURLopener.redirect_internal(
                self, url, fp, errcode, errmsg, headers, data)

        if 'location' in headers:
            newurl = headers['location']
        elif 'uri' in headers:
            newurl = headers['uri']
        #newurl = urlparse.urljoin(url, newurl)
        newurl = urlparse.urljoin(self.fetcher.url, newurl)
        raise ChangedUrlWarning(newurl)