Пример #1
0
def attributeProp(attributeNode):
    result = OrderedDict()

    result["name"] = attributeNode.content.strip()
    result["read-only"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_READONLY)!=0);
    result["write-once"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_WRITEONCE)!=0);
    result["required"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_REQUIRED)!=0);
    result["queryable"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_QUERYABLE)!=0);
    result["guid"] = (attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_IS_GUID)!=0);
    
    commentNodes = attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_COMMENT)
    if len(commentNodes)!=0:
        comment = commentNodes[0].content.strip()
    else:
        comment = ""

    # consumers
    # link must be in format {uri}\#{attribute}
    #
    consumers = []
    for consumerNode in attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_CONSUMER):
        hreflink=consumerNode.prop('href')
        (url,attribute) = splittag(hreflink)
        if attribute == None:
            attribute =result["name"]
        api = consumerNode.content.strip()
        consumers.append({"attribute":attribute,"doc-url":url,"api":api})


    # producers
    # link must be in format {uri}\#{attribute}
    #
    producers = []
    for producerNode in attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_PRODUCER):
        hreflink=producerNode.prop('href')
        (url,attribute) = splittag(hreflink)
        if attribute == None:
            attribute =result["name"]
        api = producerNode.content.strip()
        producers.append({"attribute":attribute,"doc-url":url,"api":api})

    #type----
    typeNodes= attributeNode.xpathEval2(XPATH_H_ATTRIBUTE_TYPE)
    if len(typeNodes) == 0:
        datatype = None
    else :
        datatype = typeNodes[0].content.strip()

    result["type"] = datatype
    result["description"] = comment
    result["consumed-by"] = consumers
    result["producered-by"] = producers
    return result
Пример #2
0
    def _links(self,
               filename_matches=None,
               external_links=False,
               follow_external_index_pages=False):
        """ This is an iterator which returns useful links on files for
            mirroring
        """
        remote_index_html = self._fetch_index()
        for link in self._fetch_links(remote_index_html):
            # then handle "normal" packages in pypi.
            (url, hash) = urllib.splittag(link)
            if not hash:
                continue
            try:
                (hashname, hash) = hash.split("=")
            except ValueError:
                continue
            if not hashname == "md5":
                continue

            if filename_matches:
                if not self.matches(url, filename_matches):
                    continue

            yield (url, hash)

        if external_links:
            for link in self._links_external(remote_index_html,
                                             filename_matches,
                                             follow_external_index_pages):
                yield (link, None)
Пример #3
0
 def get_or_add (self, url_or_doc):
    # For a standard URL, returns the numeric record ID.
    # For a URL which has a fragment-id:
    #   If the fragment is a paragraph of a text page, a pair
    #   (record-id, paragraph-id) is returned.
    #   Otherwise, just the record id is returned.
    # If arg is PluckerDocument, returns the id assigned for that document.
    # If arg is integer, treats it as a registered-document id.  Get-only.
    if type(url_or_doc) == type(''):
        import urllib
        url, tag = urllib.splittag(url_or_doc)
        finalurl = self._alias_list.get(url, url)
        if tag:
            id = self._get_id_for_url((finalurl, tag))
        else:
            id = self._get_id_for_url(finalurl)
        return id
    elif isinstance(url_or_doc, PluckerDocs.PluckerDocument):
        url = url_or_doc.get_url()
        if not self._url_to_doc_mapping.has_key(url):
            self._url_to_doc_mapping[url] = url_or_doc
        if not self._doc_to_id_mapping.has_key(url_or_doc) and self._url_to_id_mapping.has_key(url):
            self._doc_to_id_mapping[url_or_doc] = self._url_to_id_mapping[url]
        if not self._doc_to_id_mapping.has_key(url_or_doc):
            message(2, "New document %s added", url_or_doc)
        return self._get_id_for_doc(url_or_doc)
    else:
        raise ValueError("not a URL or an instance of " + str(PluckerDocs.PluckerDocument))
Пример #4
0
    def _links(self, filename_matches=None, external_links=False, follow_external_index_pages=False):
        """ This is an iterator which returns useful links on files for
            mirroring
        """
        remote_index_html = self._fetch_index()
        for link in self._fetch_links(remote_index_html):
            # then handle "normal" packages in pypi.
            (url, hash) = urllib.splittag(link)
            if not hash:
                continue
            try:
                (hashname, hash) = hash.split("=")
            except ValueError:
                continue
            if not hashname == "md5":
                continue

            if filename_matches:
                if not self.matches(url, filename_matches):
                    continue

            yield (url, hash)

        if external_links:
            for link in self._links_external(remote_index_html, filename_matches, follow_external_index_pages):
                yield (link, None)
Пример #5
0
 def url(self, filename=None, splittag=True):
     if filename:
         (filename, rest) = urllib.splittag(filename)
         try:
             filename = urllib.quote(filename)
         except KeyError:
             raise PackageError("%s is not a valid filename." % filename)
     url = "%s/%s" % (self._pypi_base_url, self.name)
     if filename:
         url = "%s/%s" % (url, filename)
     return url
Пример #6
0
 def url(self, filename=None, splittag=True):
     if filename:
         (filename, rest) = urllib.splittag(filename)
         try:
             filename = urllib.quote(filename)
         except KeyError:
             raise PackageError("%s is not a valid filename." % filename)
     url = "%s/%s" % (self._pypi_base_url, self.name)
     if filename:
         url = "%s/%s" % (url, filename)
     return url
Пример #7
0
 def url(self, filename=None, splittag=True):
     if filename:
         (filename, rest) = urllib.splittag(filename)
         try:
             filename = urllib.quote(filename)
         except KeyError:
             raise PackageError("%s is not a valid filename." % filename)
     url = "%s/%s/" % (self._pypi_base_url, self.name.lower().replace('_', '-'))
     #print "--> ", url
     if filename:
         url = "%s/%s" % (url, filename)
     return url
Пример #8
0
    def __init__(self, url, data = None, headers = {}, origin_req_host = None, unverifiable = False):
        self.__original = unwrap(url)
        self.__original, fragment = splittag(self.__original)
        self.type = None
        self.host = None
        self.port = None
        self._tunnel_host = None
        self.data = data
        self.headers = {}
        for key, value in headers.items():
            self.add_header(key, value)

        self.unredirected_hdrs = {}
        if origin_req_host is None:
            origin_req_host = request_host(self)
        self.origin_req_host = origin_req_host
        self.unverifiable = unverifiable
Пример #9
0
def sanitize_path(path):
  """Sanitize a /-separated PATH.

  Components that mean special things (e.g. '..' and '.') are ignored.
  """
  # abandon query parameters and hash tag.
  path = urllib.splitquery(path)[0]
  path = urllib.splittag(path)[0]
  path = urllib.unquote(path)
  path = posixpath.normpath(path)
  words = path.split('/')
  words = filter(None, words)
  path = ''
  for word in words:
    if word in (posixpath.curdir, posixpath.pardir):
      continue
    path = posixpath.join(path, word)
  return path
Пример #10
0
    def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False):
        self.__original = unwrap(url)
        self.__original, self.__fragment = splittag(self.__original)
        self.type = None
        self.host = None
        self.port = None
        self._tunnel_host = None
        self.data = data
        self.headers = {}
        for key, value in headers.items():
            self.add_header(key, value)

        self.unredirected_hdrs = {}
        if origin_req_host is None:
            origin_req_host = request_host(self)
        self.origin_req_host = origin_req_host
        self.unverifiable = unverifiable
        return
Пример #11
0
 def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.__original, fragment = splittag(self.__original)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self._tunnel_host = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
     self.unredirected_hdrs = {}
     if origin_req_host is None:
         origin_req_host = request_host(self)
     self.origin_req_host = origin_req_host
     self.unverifiable = unverifiable
Пример #12
0
def sanitize_path(path):
    """Sanitize a /-separated PATH.

  Components that mean special things (e.g. '..' and '.') are ignored.
  """
    # abandon query parameters and hash tag.
    path = urllib.splitquery(path)[0]
    path = urllib.splittag(path)[0]
    path = urllib.unquote(path)
    path = posixpath.normpath(path)
    words = path.split('/')
    words = filter(None, words)
    path = ''
    for word in words:
        if word in (posixpath.curdir, posixpath.pardir):
            continue
        path = posixpath.join(path, word)
    return path
Пример #13
0
 def __init__(self, url, data=None, headers={},
              origin_req_host=None, unverifiable=False):
     # unwrap('<URL:type://host/path>') --> 'type://host/path'
     self.__original = unwrap(url)
     self.__original, self.__fragment = splittag(self.__original)
     self.type = None
     # self.__r_type is what's left after doing the splittype
     self.host = None
     self.port = None
     self._tunnel_host = None
     self.data = data
     self.headers = {}
     for key, value in headers.items():
         self.add_header(key, value)
     self.unredirected_hdrs = {}
     if origin_req_host is None:
         origin_req_host = request_host(self)
     self.origin_req_host = origin_req_host
     self.unverifiable = unverifiable
Пример #14
0
    def location_bar (self):
        """Returns a string representing the active location bar.

        This method will use the current URL path, and return an HTML string
        where the different components of the path are linked to their URL.

        @return: a HTML string
        @rtype: string
        """
        s = urllib.splittag(
            urllib.splitquery(cherrypy.request.path_info)[0]
            )[0]
        path = s.split('/')[1:]
        return """<a href="/">/</a>""" + "/".join(
            ["""<a href="%s">%s</a>"""
             % (uri, name)
             for (name, uri) in zip(path,
                                    ["/"+"/".join(path[:i+1])
                                     for i in range(len(path))])]
            )
Пример #15
0
    def _get_youtube_url(self):
        url = self.surl.get_long_url()
        # split 'http://www.youtube.com/v=VIDEOID#tag' -> 'v=VIDEOID#tag'
        query = urllib.splitquery(os.path.split(url)[1])[1]
        # split 'v=VIDEOID#tag' -> 'VIDEOID#tag'
        value = urllib.splitvalue(query)[1]
        # split 'VIDEOID#tag' -> ('VIDEOID', 'tag')
        vid, tag = urllib.splittag(value)

        if tag:
            # split 't=5m2s' -> ('t', '5m2s')
            tag = urllib.splitvalue(tag)[1]
            # convert '5m2s' -> int(302)
            time_offset = Forwarder._friendly_to_seconds(tag)
        else:
            time_offset = 0

        if time_offset > 0:
            return 'http://www.youtube.com/v/%s?start=%s' % (vid, time_offset)
        else:
            return 'http://www.youtube.com/v/%s' % (vid)
Пример #16
0
    def _parseURI(uri):
        protocol, request = urllib.splittype(uri)
        user, password, port = None, None, None
        host, path = urllib.splithost(request)

        if host:
            # Python < 2.7 have a problem - splituser() calls unquote() too early
            #user, host = urllib.splituser(host)
            if '@' in host:
                user, host = host.split('@', 1)
            if user:
                user, password = [
                    x and urllib.unquote(x) or None
                    for x in urllib.splitpasswd(user)
                ]
            host, port = urllib.splitport(host)
            if port: port = int(port)
        elif host == '':
            host = None

        # hash-tag is splitted but ignored
        path, tag = urllib.splittag(path)
        path, query = urllib.splitquery(path)

        path = urllib.unquote(path)
        if (os.name == 'nt') and (len(path) > 2):
            # Preserve backward compatibility with URIs like /C|/path;
            # replace '|' by ':'
            if path[2] == '|':
                path = "%s:%s" % (path[0:2], path[3:])
            # Remove leading slash
            if (path[0] == '/') and (path[2] == ':'):
                path = path[1:]

        args = {}
        if query:
            for name, value in parse_qsl(query):
                args[name] = value

        return user, password, host, port, path, args
Пример #17
0
def parse_url(url, default_port=None):
    '''
    Parse url in the following form:
      PROTO://[USER:[:PASSWD]@]HOST[:PORT][/PATH[;ATTR][?QUERY]]
    A tuple containing (proto, user, passwd, host, port, path, tag, attrs, query) is returned,
    where `attrs' is a tuple containing ('attr1=value1', 'attr2=value2', ...)
    '''
    proto, user, passwd, host, port, path, tag, attrs, query = (None, ) * 9

    try:
        proto, tmp_host = urllib.splittype(url)
        tmp_host, tmp_path = urllib.splithost(tmp_host)
        tmp_user, tmp_host = urllib.splituser(tmp_host)
        if tmp_user:
            user, passwd = urllib.splitpasswd(tmp_user)
        host, port = urllib.splitport(tmp_host)
        port = int(port) if port else default_port
        tmp_path, query = urllib.splitquery(tmp_path)
        tmp_path, attrs = urllib.splitattr(tmp_path)
        path, tag = urllib.splittag(tmp_path)
    except Exception, err:
        raise Exception('parse_db_url error - {0}'.format(str(err)))
Пример #18
0
def parse_url(url, default_port=None):
    '''
    Parse url in the following form:
      PROTO://[USER:[:PASSWD]@]HOST[:PORT][/PATH[;ATTR][?QUERY]]
    A tuple containing (proto, user, passwd, host, port, path, tag, attrs, query) is returned,
    where `attrs' is a tuple containing ('attr1=value1', 'attr2=value2', ...)
    '''
    proto, user, passwd, host, port, path, tag, attrs, query = (None, ) * 9

    try:
        proto, tmp_host = urllib.splittype(url)
        tmp_host, tmp_path = urllib.splithost(tmp_host)
        tmp_user, tmp_host = urllib.splituser(tmp_host)
        if tmp_user:
            user, passwd = urllib.splitpasswd(tmp_user)
        host, port = urllib.splitport(tmp_host)
        port = int(port) if port else default_port
        tmp_path, query = urllib.splitquery(tmp_path)
        tmp_path, attrs = urllib.splitattr(tmp_path)
        path, tag = urllib.splittag(tmp_path)
    except Exception, err:
        raise Exception('parse_db_url error - {0}'.format(str(err)))
Пример #19
0
    def _parseURI(uri):
        protocol, request = urllib.splittype(uri)
        user, password, port = None, None, None
        host, path = urllib.splithost(request)

        if host:
            # Python < 2.7 have a problem - splituser() calls unquote() too early
            #user, host = urllib.splituser(host)
            if '@' in host:
                user, host = host.split('@', 1)
            if user:
                user, password = [x and urllib.unquote(x) or None for x in urllib.splitpasswd(user)]
            host, port = urllib.splitport(host)
            if port: port = int(port)
        elif host == '':
            host = None

        # hash-tag is splitted but ignored
        path, tag = urllib.splittag(path)
        path, query = urllib.splitquery(path)

        path = urllib.unquote(path)
        if (os.name == 'nt') and (len(path) > 2):
            # Preserve backward compatibility with URIs like /C|/path;
            # replace '|' by ':'
            if path[2] == '|':
                path = "%s:%s" % (path[0:2], path[3:])
            # Remove leading slash
            if (path[0] == '/') and (path[2] == ':'):
                path = path[1:]

        args = {}
        if query:
            for name, value in parse_qsl(query):
                args[name] = value

        return user, password, host, port, path, args
Пример #20
0
 def get_selector(self):
     return urllib.splittag(self.__r_host)[0]
Пример #21
0
                        for (header, value) in result.headers.items():
                            response.setHeader(header, value)
                        response.writeHeader()
                    response.forceFlush(self.wfile)
                    self.copyfile(result, self.wfile, False)
            except Exception,x:
                if isInclude:
                    import traceback
                    err= "".join(traceback.format_exception(*sys.exc_info()))
                    log.error("problem with including '%s': %s" % (url,err))
                    response.getOutput().write(u"<h1>Problem with including '%s':</h1><h3>%s</h3>" % (url,x))
                else:
                    response.sendError(404,"not found: "+str(x))
        else:
            # call our custom version of send_head for the new url and reusing the request object
            if urllib.splittag(url)[1] is not None:
                raise ValueError("bad redirect URL requested, cannot contain #anchor")
            self.path=url
            self.command="GET"
            result=self.do_GETorHEAD(request,response)
            if result:
                if not response.used() or isInclude:
                    # only flush when it is not yet used... (or when something is being included)
                    if not response.used():
                        response.writeHeader()
                    response.forceFlush(self.wfile)  
                self.copyfile(result, self.wfile, False)

#
#   The threading HTTP server.
#   We do all socket handling ourselves, to have maximum control.
Пример #22
0
                        response.writeHeader()
                    response.forceFlush(self.wfile)
                    self.copyfile(result, self.wfile, False)
            except Exception, x:
                if isInclude:
                    import traceback
                    err = "".join(traceback.format_exception(*sys.exc_info()))
                    log.error("problem with including '%s': %s" % (url, err))
                    response.getOutput().write(
                        u"<h1>Problem with including '%s':</h1><h3>%s</h3>" %
                        (url, x))
                else:
                    response.sendError(404, "not found: " + str(x))
        else:
            # call our custom version of send_head for the new url and reusing the request object
            if urllib.splittag(url)[1] is not None:
                raise ValueError(
                    "bad redirect URL requested, cannot contain #anchor")
            self.path = url
            self.command = "GET"
            result = self.do_GETorHEAD(request, response)
            if result:
                if not response.used() or isInclude:
                    # only flush when it is not yet used... (or when something is being included)
                    if not response.used():
                        response.writeHeader()
                    response.forceFlush(self.wfile)
                self.copyfile(result, self.wfile, False)


#
Пример #23
0
 def get_selector(self):
     return urllib.splittag(self.__r_host)[0]