def __init__(self, orig_url): import re import six from six.moves.urllib.parse import urlsplit, urlunsplit from six.moves.urllib.parse import quote_plus, quote, unquote_plus from pywb.utils.loaders import to_native_str from pywb.rewrite.wburl import WbUrl pywb.rewrite.wburl.BaseWbUrl.__init__(self) if six.PY2 and isinstance(orig_url, six.text_type): orig_url = orig_url.encode('utf-8') orig_url = quote(orig_url) self._original_url = orig_url if not self._init_query(orig_url): if not self._init_replay(orig_url): raise Exception('Invalid WbUrl: ', orig_url) new_uri = WbUrl.to_uri(self.url) self._do_percent_encode = True self.url = new_uri # begin brozzler changes if (self.url.startswith('urn:') or self.url.startswith('screenshot:') or self.url.startswith('thumbnail:')): return # end brozzler changes # protocol agnostic url -> http:// # no protocol -> http:// #inx = self.url.find('://') inx = -1 m = self.SCHEME_RX.match(self.url) if m: inx = m.span(1)[0] #if inx < 0: # check for other partially encoded variants # m = self.PARTIAL_ENC_RX.match(self.url) # if m: # len_ = len(m.group(0)) # self.url = (urllib.unquote_plus(self.url[:len_]) + # self.url[len_:]) # inx = self.url.find(':/') if inx < 0: self.url = self.DEFAULT_SCHEME + self.url else: inx += 2 if inx < len(self.url) and self.url[inx] != '/': self.url = self.url[:inx] + '/' + self.url[inx:]