示例#1
0
 def replace(self, arg, charset='utf-8'):
     #n = arg.rpartition("/")[0].count("/")
     self.host = urlparse.urljoin(self.host, arg.replace("//", "/"))
     this = ''
     try:
         this = lower(self.host.replace("www.", ""))
         if isinstance(this, unicode):
             this = this.encode(charset, 'ignore')
     except UnicodeEncodeError:
         pass #log
     scheme, netloc, path_A, qs, anchor = urlparse.urlsplit(this)
     path_B = urlnorm.norm_path("http", path_A)
     path = urllib.quote(path_B.encode('utf-8'), '/%')
     qs = urllib.quote_plus(qs.encode('utf8'), ':&?/=')
     split = urlparse.urlunsplit((scheme, netloc, lower(path), qs, anchor))
     
     return split
示例#2
0
def norm_url(url):
    url = uni(url).encode('utf-8')
    try:
        return urlnorm.norm(url)
    except urlnorm.InvalidUrl:
        # Happens when the URL is relative. Call path normalization directly.
        try:
            return urlnorm.norm_path('', url)
        except UnicodeDecodeError:
            return url

    except UnicodeDecodeError:
        # work around for bug in urlnorm on unicode url
        return url
    except:
        traceback.print_exc()
    return None
示例#3
0
def norm_url(url):
    url = uni(url).encode('utf-8')
    try:
        return urlnorm.norm(url)
    except urlnorm.InvalidUrl:
        # Happens when the URL is relative. Call path normalization directly.
        try:
            return urlnorm.norm_path('', url)
        except UnicodeDecodeError:
            return url

    except UnicodeDecodeError:
        # work around for bug in urlnorm on unicode url
        return url
    except:
        traceback.print_exc()
    return None
示例#4
0
def test_norm_path(bad, good):
    output = urlnorm.norm_path("http", bad)
    assert output == _unicode(good)
示例#5
0
def test_norm_path(bad, good):
    output = urlnorm.norm_path("http", bad)
    assert output == _unicode(good)
示例#6
0
def normurl(url):
    try:
        return urlnorm.norm(url)
    except urlnorm.InvalidUrl:
        return urlnorm.norm_path('', url)
示例#7
0
def normurl(url):
    try:
        return urlnorm.norm(url)
    except urlnorm.InvalidUrl:
        return urlnorm.norm_path('', url)