def to_url(self): """Serialize as a URL for a GET request.""" base_url = urlparse.urlparse(self.url) try: query = base_url.query except AttributeError: # must be python <2.5 query = base_url[4] query = parse_qs(query) for k, v in self.items(): query.setdefault(k, []).append(v) try: scheme = base_url.scheme netloc = base_url.netloc path = base_url.path params = base_url.params fragment = base_url.fragment except AttributeError: # must be python <2.5 scheme = base_url[0] netloc = base_url[1] path = base_url[2] params = base_url[3] fragment = base_url[5] try: url = (scheme, netloc, path, params, urllib.urlencode(query, True), fragment) return urllib.urlunparse(url) except AttributeError: url = (scheme, netloc, path, params, urlparse.urlencode(query, True), fragment) return urlparse.urlunparse(url)
def get_bookmarks_by_uri(self, uri): parsed = _parse_uri(uri) uri = urllib.urlunparse(parsed[:3] + ('', '', '')) try: return tuple(self.__node_map[uri]) except KeyError: return ()
def url2key(self, url, mode, params): """Normalize a URL for use as a caching key. - change the hostname to all lowercase - remove the port if it is the scheme's default port - reformat the port using %d - get rid of the fragment identifier """ scheme, netloc, path, params, query, fragment = urlparse(url) i = str.find(netloc, '@') if i > 0: userpass = netloc[:i] netloc = netloc[i+1:] # delete the '@' else: userpass = "" scheme = str.lower(scheme) netloc = str.lower(netloc) i = str.find(netloc, ':') if i >= 0: try: port = int(netloc[i+1:]) except ValueError: port = None else: port = None if scheme == 'http' and port == 80: netloc = netloc[:i] elif type(port) == type(0): netloc = netloc[:i] + ":%d" % port return urllib.urlunparse((scheme, netloc, path, params, query, ""))
def register_id(self, name): """Add page number of element start to internal database.""" (scheme, netloc, path, params, query, fragment) = \ urlparse(self.context.get_url()) netloc = str.lower(netloc) url = urllib.urlunparse( (scheme, netloc, path, params, query, name)) pageno = self.formatter.writer.ps.get_pageno() self._set_docinfo(url, pageno, '')
def _make_url(row, band): parse = urlparse( 'http://storage.googleapis.com/gcp-public-data-landsat/LC08/01/037/029/' 'LC08_L1TP_037029_20130101_20190131_01_T1/LC08_L1TP_037029_20130101_20190131_01_T1_B9.TIF' ) base = row.BASE_URL.replace('gs://', '') path = '{}/{}_{}'.format(base, row.PRODUCT_ID, band) url = urlunparse([parse.scheme, parse.netloc, path, '', '', '']) return url
def format_url(template, params={}, **kwparams): args = {**get_formatdata(params), **kwparams} parts = list(urllib.urlparse(template)) for i in range(0, len(parts)): if i == 4: parts[i] = QuerystringFormatter().format(parts[i], **args) else: parts[i] = format(parts[i], **args) return urllib.urlunparse(parts)
def start_Bookmark(self, node): new_node = nodes.Bookmark() self.add_node(new_node) self.add_describable(new_node, node) uri = node.uri() new_node.set_uri(uri) new_node.set_last_modified(node.last_modified()) new_node.set_last_visited(node.last_visited()) key = urllib.urlunparse(_parse_uri(uri)[:3] + ('', '', '')) try: self.__node_map[key].append(new_node) except KeyError: self.__node_map[key] = [new_node]
def wrapper(*args, **kwargs): if args: args = list(args) first_arg = args.pop(0) if not kwargs.get("path"): if isinstance(first_arg, UniversalPath): first_arg = str(first_arg) args.insert(0, first_arg) args = tuple(args) else: new_url = self._url.replace(path=kwargs["path"]) unparsed = urllib.urlunparse(new_url) kwargs["path"] = unparsed return func(*args, **kwargs)
def norm_uri(uri): scheme, netloc, path, params, query, fragment \ = urlparse(uri) if scheme == "http" and ':' in netloc: loc = str.split(netloc, ':') try: port = int(loc[-1], 10) except: pass else: if port == 80: del loc[-1] netloc = str.join(loc, ':') return urllib.urlunparse( (scheme, str.lower(netloc), path, params, query, fragment))
def build_info(self, node): node_map = {} id_map = {} ref_map = {} need_ids = [] queue = [node] while queue: node = queue[0] del queue[0] nodetype = node.get_nodetype() if nodetype == "Bookmark": id = node.id() if id_map.has_key(id): raise NodeIDError("duplicate ID found: " + repr(id)) if id: id_map[id] = node if id in need_ids: need_ids.remove(id) uri = node.uri() key = urllib.urlunparse(_parse_uri(uri)[:3] + ('', '', '')) try: node_map[key].append(node) except KeyError: node_map[key] = [node] elif nodetype == "Folder": id = node.id() if id_map.has_key(id): raise NodeIDError("duplicate ID found: " + repr(id)) if id: id_map[id] = node if id in need_ids: need_ids.remove(id) # add child nodes to the end of the queue queue[len(queue):] = node.children() elif nodetype == "Alias": idref = node.idref() if not id_map.has_key(idref): need_ids.append(idref) try: ref_map[idref].append(node) except KeyError: ref_map[idref] = [node] if need_ids: raise NodeIDError("Could not locate IDs", need_ids) return node_map, id_map, ref_map
def restart(self, url): self.maxrestarts = self.maxrestarts - 1 self.viewer = self.last_context.viewer self.app = self.last_context.app self.parser = None tuple = urlparse(url) # it's possible that the url send in a 301 or 302 error is a # relative URL. if there's no scheme or netloc in the # returned tuple, try joining the URL with the previous URL # and retry parsing it. if not (tuple[0] and tuple[1]): url = urllib.urljoin(self.url, url) tuple = urlparse(url) self.url = url self.fragment = tuple[-1] tuple = tuple[:-1] + ("",) if self.user_passwd: netloc = tuple[1] i = str.find(netloc, '@') if i >= 0: netloc = netloc[i+1:] netloc = self.user_passwd + '@' + netloc tuple = (tuple[0], netloc) + tuple[2:] realurl = urllib.urlunparse(tuple) # Check first to see if the previous Context has any protocol handlers api = self.last_context.get_local_api(realurl, self.method, self.params) if not api: if self.app: api = self.app.open_url(realurl, self.method, self.params, self.reload, data=self.data) else: import protocols api = protocols.protocol_access(realurl, self.method, self.params, data=self.data) BaseReader.__init__(self, self.last_context, api)
def __call__(self, url, attrs): scheme, netloc, path, params, query, frag = urlparse(url) if params or query: # safety restraint return url netloc = str.lower(netloc) if scheme != self.__scheme or netloc != self.__netloc: return url # check the paths: stored_url = urllib.urlunparse((scheme, netloc, path, '', '', '')) if self.__docs.has_key(stored_url): return url if len(path) < len(self.__path): return url if path[:len(self.__path)] != self.__path: return url if (not self.__max_levels) \ or (self.__max_levels and self.__level < self.__max_levels): self.__docs[stored_url] = self.__level + 1 self.insert(stored_url) return url
def __init__(self, master, context, class_="DocumentInfo"): root = tktools.make_toplevel(master, class_=class_, title="Document Info") self.root = root self.app = context.app page_title = context.page.title() if page_title: root.title("Document Info: " + page_title) destroy = self.destroy for seq in ("<Alt-W>", "<Alt-w>", "<Return>"): root.bind(destroy) root.protocol("WM_DELETE_WINDOW", destroy) frame, self.__topfr, botfr = tktools.make_double_frame(root) # # Info display # url = context.page.url() scheme, netloc, path, params, query, fragment = urlparse(url) url = urllib.urlunparse((scheme, netloc, path, '', '', '')) self.add_label_field("Title", page_title or "(unknown)", "title") self.add_label_field("URI", url, "uri") if fragment: self.add_label_field("Fragment", fragment, "fragment") headers = context.get_headers() if headers.has_key("date") and type(headers["date"]) is type(self): self.add_label_field("", "(Loaded from local cache.)", "cached") items = headers.items() items.sort() s = "" for k, v in items: if k == 'date' and type(v) is type(self): from . import ht_time v = ht_time.unparse(v.get_secs()) s = "%s%s:\t%s\n" % (s, k, v) stretch = self.add_text_field("Response headers", s, "headers") if query: query = str.translate(query, FIELD_BREAKER) stretch = stretch or \ self.add_text_field("Query fields", query, "query") postdata = context.get_postdata() if postdata: postdata = str.translate(postdata, FIELD_BREAKER) stretch = stretch or \ self.add_text_field("POST fields", postdata, "postdata") # # Bottom button # fr = tkinter.Frame(botfr, borderwidth=1, relief=tkinter.SUNKEN) fr.pack() btn = tkinter.Button(fr, text="OK", command=destroy) # '2m' is the value from the standard Tk 'tk_dialog' command btn.pack(padx='2m', pady='2m') btn.focus_set() # del self.__topfr # loose the reference tktools.set_transient(root, master) root.update_idletasks() reqwidth = root.winfo_reqwidth() reqheight = root.winfo_reqheight() root.minsize(reqwidth, reqheight) if not stretch: root.maxsize(reqwidth, reqheight)
print(response.read()) from urllib.parse import urlparse from urllib.parse import urljoin # url拆分 # urllib.parse.urlparse(urlstring, scheme='', allow_fragments=True) result = urlparse('http://www.baidu.com/index.html;user?id=5#comment') # 指定请求协议,当然如果前面有,不会强求 result = urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='https') # ParseResult(scheme='http', netloc='www.baidu.com', path='/index.html', params='user', query='id=5', fragment='comment') print(type(result), result) # url合并 data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=6', 'comment'] print(urllib.urlunparse(data)) # url合并:以后面为主,后面有的看后面,后面没有的前面补 # 少域名的时候可用, print(urljoin('http://www.baidu.com', 'FAQ.html')) print(urljoin('http://www.baidu.com', 'https://cuiqingcai.com/FAQ.html')) print( urljoin('http://www.baidu.com/about.html', 'https://cuiqingcai.com/FAQ.html')) print( urljoin('http://www.baidu.com/about.html', 'https://cuiqingcai.com/FAQ.html?question=2')) print( urljoin('http://www.baidu.com?wd=abc', 'https://cuiqingcai.com/index.php')) print(urljoin('http://www.baidu.com', '?category=2#comment')) print(urljoin('www.baidu.com', '?category=2#comment'))
def __make_node_key(self, node): parsed = _parse_uri(node.uri())[:3] + ('', '', '') return urllib.urlunparse(parsed)
def __init__(self, master, address, data): # query semantics may be used to identify header field values scheme, netloc, path, params, query, fragment = urllib(address) address = urlunparse((scheme, netloc, path, '', '', '')) headers = cgi.parse_qs(query) # create widgets self.master = master self.root = tktools.make_toplevel(self.master, title="Mail Dialog") self.root.protocol("WM_DELETE_WINDOW", self.cancel_command) self.root.bind("<Alt-w>", self.cancel_command) self.root.bind("<Alt-W>", self.cancel_command) fr, top, botframe = tktools.make_double_frame(self.root) self.text, fr = tktools.make_text_box(top, 80, 24) self.text.tag_config('SUSPICIOUS_HEADER', foreground='red') self.send_button = Button(botframe, text="Send", command=self.send_command) self.send_button.pack(side=LEFT) self.cancel_button = Button(botframe, text="Cancel", command=self.cancel_command) self.cancel_button.pack(side=RIGHT) tktools.unify_button_widths(self.send_button, self.cancel_button) hinfo = _make_sequence_dict(COMMON_HEADERS) variables = { 'to': address, 'subject': data and 'Form posted from Grail' or '', 'mime-version': '1.0', 'x-mailer': GRAILVERSION, 'x-url': LAST_CONTEXT and LAST_CONTEXT.get_baseurl() or '' } if data: variables["content-type"] = "application/x-www-form-urlencoded" else: variables["content-type"] = "text/plain; charset=us-ascii" variables["content-transfer-encoding"] = "7bit" # move default set of query'd headers into variables for header, vlist in headers.items(): header = str.lower(header) if header != 'body': if header not in DISALLOWED_HEADERS: variables[header] = vlist[0] # toss duplicates if not hinfo.has_key(header): hinfo[header] = 15 del headers[header] # insert user-specified extra headers variables = self.add_user_headers(variables) for header in variables.keys(): if not hinfo.has_key(header): hinfo[header] = 19 # write the headers into the buffer variables['date'] = time.ctime(time.time()) hseq = _make_dict_sequence(hinfo) for x, header in hseq: if variables.has_key(header): s = "%s: %s\n" \ % (str.title(header, '-'), variables[header]) self.text.insert(END, s) # insert newline self.text.insert(END, '\n', ()) # insert data if data: self.text.insert(END, data) elif headers.has_key('body'): self.text.insert(END, headers['body'][0] + '\n') else: self.add_user_signature() self.text.focus_set()
def set_url(self, url): parsed = urlparse(url)[:3] + ('', '', '') self._url_cooked = cook(urllib.urlunparse(parsed))