def attributeProp(attributeNode): result = OrderedDict() result["name"] = attributeNode.content.strip() result["read-only"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_READONLY)!=0); result["write-once"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_WRITEONCE)!=0); result["required"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_REQUIRED)!=0); result["queryable"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_QUERYABLE)!=0); result["guid"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_GUID)!=0); commentNodes = attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_COMMENT) if len(commentNodes)!=0: comment = commentNodes[0].content.strip() else: comment = "" # consumers # link must be in format {uri}\#{attribute} # consumers = [] for consumerNode in attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_CONSUMER): hreflink=consumerNode.prop('href') (url,attribute) = splittag(hreflink) if attribute == None: attribute =result["name"] api = consumerNode.content.strip() consumers.append({"attribute":attribute,"doc-url":url,"api":api}) # producers # link must be in format {uri}\#{attribute} # producers = [] for producerNode in attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_PRODUCER): hreflink=producerNode.prop('href') (url,attribute) = splittag(hreflink) if attribute == None: attribute =result["name"] api = producerNode.content.strip() producers.append({"attribute":attribute,"doc-url":url,"api":api}) #type---- typeNodes= attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_TYPE) if len(typeNodes) == 0: datatype = None else : datatype = typeNodes[0].content.strip() result["type"] = datatype result["description"] = comment result["consumed-by"] = consumers result["producered-by"] = producers return result
def _links(self, filename_matches=None, external_links=False, follow_external_index_pages=False): """ This is an iterator which returns useful links on files for mirroring """ remote_index_html = self._fetch_index() for link in self._fetch_links(remote_index_html): # then handle "normal" packages in pypi. (url, hash) = urllib.splittag(link) if not hash: continue try: (hashname, hash) = hash.split("=") except ValueError: continue if not hashname == "md5": continue if filename_matches: if not self.matches(url, filename_matches): continue yield (url, hash) if external_links: for link in self._links_external(remote_index_html, filename_matches, follow_external_index_pages): yield (link, None)
def get_or_add (self, url_or_doc): # For a standard URL, returns the numeric record ID. # For a URL which has a fragment-id: # If the fragment is a paragraph of a text page, a pair # (record-id, paragraph-id) is returned. # Otherwise, just the record id is returned. # If arg is PluckerDocument, returns the id assigned for that document. # If arg is integer, treats it as a registered-document id. Get-only. if type(url_or_doc) == type(''): import urllib url, tag = urllib.splittag(url_or_doc) finalurl = self._alias_list.get(url, url) if tag: id = self._get_id_for_url((finalurl, tag)) else: id = self._get_id_for_url(finalurl) return id elif isinstance(url_or_doc, PluckerDocs.PluckerDocument): url = url_or_doc.get_url() if not self._url_to_doc_mapping.has_key(url): self._url_to_doc_mapping[url] = url_or_doc if not self._doc_to_id_mapping.has_key(url_or_doc) and self._url_to_id_mapping.has_key(url): self._doc_to_id_mapping[url_or_doc] = self._url_to_id_mapping[url] if not self._doc_to_id_mapping.has_key(url_or_doc): message(2, "New document %s added", url_or_doc) return self._get_id_for_doc(url_or_doc) else: raise ValueError("not a URL or an instance of " + str(PluckerDocs.PluckerDocument))
def url(self, filename=None, splittag=True): if filename: (filename, rest) = urllib.splittag(filename) try: filename = urllib.quote(filename) except KeyError: raise PackageError("%s is not a valid filename." % filename) url = "%s/%s" % (self._pypi_base_url, self.name) if filename: url = "%s/%s" % (url, filename) return url
def url(self, filename=None, splittag=True): if filename: (filename, rest) = urllib.splittag(filename) try: filename = urllib.quote(filename) except KeyError: raise PackageError("%s is not a valid filename." % filename) url = "%s/%s/" % (self._pypi_base_url, self.name.lower().replace('_', '-')) #print "--> ", url if filename: url = "%s/%s" % (url, filename) return url
def __init__(self, url, data = None, headers = {}, origin_req_host = None, unverifiable = False): self.__original = unwrap(url) self.__original, fragment = splittag(self.__original) self.type = None self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def sanitize_path(path): """Sanitize a /-separated PATH. Components that mean special things (e.g. '..' and '.') are ignored. """ # abandon query parameters and hash tag. path = urllib.splitquery(path)[0] path = urllib.splittag(path)[0] path = urllib.unquote(path) path = posixpath.normpath(path) words = path.split('/') words = filter(None, words) path = '' for word in words: if word in (posixpath.curdir, posixpath.pardir): continue path = posixpath.join(path, word) return path
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): self.__original = unwrap(url) self.__original, self.__fragment = splittag(self.__original) self.type = None self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable return
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.__original, fragment = splittag(self.__original) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) self.__original, self.__fragment = splittag(self.__original) self.type = None # self.__r_type is what's left after doing the splittype self.host = None self.port = None self._tunnel_host = None self.data = data self.headers = {} for key, value in headers.items(): self.add_header(key, value) self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable
def location_bar (self): """Returns a string representing the active location bar. This method will use the current URL path, and return an HTML string where the different components of the path are linked to their URL. @return: a HTML string @rtype: string """ s = urllib.splittag( urllib.splitquery(cherrypy.request.path_info)[0] )[0] path = s.split('/')[1:] return """<a href="/">/</a>""" + "/".join( ["""<a href="%s">%s</a>""" % (uri, name) for (name, uri) in zip(path, ["/"+"/".join(path[:i+1]) for i in range(len(path))])] )
def _get_youtube_url(self): url = self.surl.get_long_url() # split 'http://www.youtube.com/v=VIDEOID#tag' -> 'v=VIDEOID#tag' query = urllib.splitquery(os.path.split(url)[1])[1] # split 'v=VIDEOID#tag' -> 'VIDEOID#tag' value = urllib.splitvalue(query)[1] # split 'VIDEOID#tag' -> ('VIDEOID', 'tag') vid, tag = urllib.splittag(value) if tag: # split 't=5m2s' -> ('t', '5m2s') tag = urllib.splitvalue(tag)[1] # convert '5m2s' -> int(302) time_offset = Forwarder._friendly_to_seconds(tag) else: time_offset = 0 if time_offset > 0: return 'http://www.youtube.com/v/%s?start=%s' % (vid, time_offset) else: return 'http://www.youtube.com/v/%s' % (vid)
def _parseURI(uri): protocol, request = urllib.splittype(uri) user, password, port = None, None, None host, path = urllib.splithost(request) if host: # Python < 2.7 have a problem - splituser() calls unquote() too early #user, host = urllib.splituser(host) if '@' in host: user, host = host.split('@', 1) if user: user, password = [ x and urllib.unquote(x) or None for x in urllib.splitpasswd(user) ] host, port = urllib.splitport(host) if port: port = int(port) elif host == '': host = None # hash-tag is splitted but ignored path, tag = urllib.splittag(path) path, query = urllib.splitquery(path) path = urllib.unquote(path) if (os.name == 'nt') and (len(path) > 2): # Preserve backward compatibility with URIs like /C|/path; # replace '|' by ':' if path[2] == '|': path = "%s:%s" % (path[0:2], path[3:]) # Remove leading slash if (path[0] == '/') and (path[2] == ':'): path = path[1:] args = {} if query: for name, value in parse_qsl(query): args[name] = value return user, password, host, port, path, args
def parse_url(url, default_port=None): ''' Parse url in the following form: PROTO://[USER:[:PASSWD]@]HOST[:PORT][/PATH[;ATTR][?QUERY]] A tuple containing (proto, user, passwd, host, port, path, tag, attrs, query) is returned, where `attrs' is a tuple containing ('attr1=value1', 'attr2=value2', ...) ''' proto, user, passwd, host, port, path, tag, attrs, query = (None, ) * 9 try: proto, tmp_host = urllib.splittype(url) tmp_host, tmp_path = urllib.splithost(tmp_host) tmp_user, tmp_host = urllib.splituser(tmp_host) if tmp_user: user, passwd = urllib.splitpasswd(tmp_user) host, port = urllib.splitport(tmp_host) port = int(port) if port else default_port tmp_path, query = urllib.splitquery(tmp_path) tmp_path, attrs = urllib.splitattr(tmp_path) path, tag = urllib.splittag(tmp_path) except Exception, err: raise Exception('parse_db_url error - {0}'.format(str(err)))
def _parseURI(uri): protocol, request = urllib.splittype(uri) user, password, port = None, None, None host, path = urllib.splithost(request) if host: # Python < 2.7 have a problem - splituser() calls unquote() too early #user, host = urllib.splituser(host) if '@' in host: user, host = host.split('@', 1) if user: user, password = [x and urllib.unquote(x) or None for x in urllib.splitpasswd(user)] host, port = urllib.splitport(host) if port: port = int(port) elif host == '': host = None # hash-tag is splitted but ignored path, tag = urllib.splittag(path) path, query = urllib.splitquery(path) path = urllib.unquote(path) if (os.name == 'nt') and (len(path) > 2): # Preserve backward compatibility with URIs like /C|/path; # replace '|' by ':' if path[2] == '|': path = "%s:%s" % (path[0:2], path[3:]) # Remove leading slash if (path[0] == '/') and (path[2] == ':'): path = path[1:] args = {} if query: for name, value in parse_qsl(query): args[name] = value return user, password, host, port, path, args
def get_selector(self): return urllib.splittag(self.__r_host)[0]
for (header, value) in result.headers.items(): response.setHeader(header, value) response.writeHeader() response.forceFlush(self.wfile) self.copyfile(result, self.wfile, False) except Exception,x: if isInclude: import traceback err= "".join(traceback.format_exception(*sys.exc_info())) log.error("problem with including '%s': %s" % (url,err)) response.getOutput().write(u"<h1>Problem with including '%s':</h1><h3>%s</h3>" % (url,x)) else: response.sendError(404,"not found: "+str(x)) else: # call our custom version of send_head for the new url and reusing the request object if urllib.splittag(url)[1] is not None: raise ValueError("bad redirect URL requested, cannot contain #anchor") self.path=url self.command="GET" result=self.do_GETorHEAD(request,response) if result: if not response.used() or isInclude: # only flush when it is not yet used... (or when something is being included) if not response.used(): response.writeHeader() response.forceFlush(self.wfile) self.copyfile(result, self.wfile, False) # # The threading HTTP server. # We do all socket handling ourselves, to have maximum control.
response.writeHeader() response.forceFlush(self.wfile) self.copyfile(result, self.wfile, False) except Exception, x: if isInclude: import traceback err = "".join(traceback.format_exception(*sys.exc_info())) log.error("problem with including '%s': %s" % (url, err)) response.getOutput().write( u"<h1>Problem with including '%s':</h1><h3>%s</h3>" % (url, x)) else: response.sendError(404, "not found: " + str(x)) else: # call our custom version of send_head for the new url and reusing the request object if urllib.splittag(url)[1] is not None: raise ValueError( "bad redirect URL requested, cannot contain #anchor") self.path = url self.command = "GET" result = self.do_GETorHEAD(request, response) if result: if not response.used() or isInclude: # only flush when it is not yet used... (or when something is being included) if not response.used(): response.writeHeader() response.forceFlush(self.wfile) self.copyfile(result, self.wfile, False) #