Python urlparse示例，lib.six.moves.urllib_parse.urlparse Python示例

示例#1

0

显示文件

文件： __init__.py 项目： Cliffnla/LazyLibrarian-1

    def from_request(cls, http_method, http_url, headers=None, parameters=None, query_string=None):
        """Combines multiple parameter sources."""
        if parameters is None:
            parameters = {}

        # Headers
        if headers and 'Authorization' in headers:
            auth_header = headers['Authorization']
            # Check that the authorization header is OAuth.
            if auth_header[:6] == 'OAuth ':
                auth_header = auth_header[6:]
                try:
                    # Get the parameters from the header.
                    header_params = cls._split_header(auth_header)
                    parameters.update(header_params)
                except Exception:
                    raise Error('Unable to parse OAuth parameters from Authorization header.')

        # GET or POST query string.
        if query_string:
            query_params = cls._split_url_string(query_string)
            parameters.update(query_params)

        # URL parameters.
        param_str = urlparse(http_url)[4]  # query
        url_params = cls._split_url_string(param_str)
        parameters.update(url_params)

        if parameters:
            return cls(http_method, http_url, parameters)

        return None

示例#2

0

显示文件

文件： __init__.py 项目： Cliffnla/LazyLibrarian-1

 def get_callback_url(self):
     if self.callback and self.verifier:
         # Append the oauth_verifier.
         parts = urlparse(self.callback)
         scheme, netloc, path, params, query, fragment = parts[:6]
         if query:
             query = '%s&oauth_verifier=%s' % (query, self.verifier)
         else:
             query = 'oauth_verifier=%s' % self.verifier
         return urlunparse((scheme, netloc, path, params, query, fragment))
     return self.callback

示例#3

0

显示文件

文件： directparser.py 项目： jamiees2/LazyLibrarian

def redirect_url(genhost, url):
    """ libgen.io might have dns blocked, but user can bypass using genhost 93.174.95.27 in config
        libgen might send us a book url that still contains http://libgen.io/  or /libgen.io/
        so we might need to redirect it to users genhost setting """

    myurl = urlparse(url)
    if myurl.netloc.lower() != 'libgen.io':
        return url

    host = urlparse(genhost)
    # genhost http://93.174.95.27 -> scheme http, netloc 93.174.95.27, path ""
    # genhost 93.174.95.27 -> scheme "", netloc "", path 93.174.95.27
    if host.netloc:
        if host.netloc.lower() != 'libgen.io':
            # noinspection PyArgumentList
            myurl = myurl._replace(**{"netloc": host.netloc})
            logger.debug('Redirected libgen.io to [%s]' % host.netloc)
    elif host.path:
        if host.path.lower() != 'libgen.io':
            # noinspection PyArgumentList
            myurl = myurl._replace(**{"netloc": host.netloc})
            logger.debug('Redirected libgen.io to [%s]' % host.netloc)
    return myurl.geturl()

示例#4

0

显示文件

文件： directparser.py 项目： DobyTang/LazyLibrarian

def redirect_url(genhost, url):
    """ libgen.io might have dns blocked, but user can bypass using genhost 93.174.95.27 in config
        libgen might send us a book url that still contains http://libgen.io/  or /libgen.io/
        so we might need to redirect it to users genhost setting """

    myurl = urlparse(url)
    if myurl.netloc.lower() != 'libgen.io':
        return url

    host = urlparse(genhost)
    # genhost http://93.174.95.27 -> scheme http, netloc 93.174.95.27, path ""
    # genhost 93.174.95.27 -> scheme "", netloc "", path 93.174.95.27
    if host.netloc:
        if host.netloc.lower() != 'libgen.io':
            # noinspection PyArgumentList
            myurl = myurl._replace(**{"netloc": host.netloc})
            logger.debug('Redirected libgen.io to [%s]' % host.netloc)
    elif host.path:
        if host.path.lower() != 'libgen.io':
            # noinspection PyArgumentList
            myurl = myurl._replace(**{"netloc": host.netloc})
            logger.debug('Redirected libgen.io to [%s]' % host.netloc)
    return myurl.geturl()

示例#5

0

显示文件

文件： sanitizer.py 项目： kuuratsanik/LazyLibrarian-1

    def allowed_token(self, token):
        if "data" in token:
            attrs = token["data"]
            attr_names = set(attrs.keys())

            # Remove forbidden attributes
            for to_remove in (attr_names - self.allowed_attributes):
                del token["data"][to_remove]
                attr_names.remove(to_remove)

            # Remove attributes with disallowed URL values
            for attr in (attr_names & self.attr_val_is_uri):
                assert attr in attrs
                # I don't have a clue where this regexp comes from or why it matches those
                # characters, nor why we call unescape. I just know it's always been here.
                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
                # this will do is remove *more* than it otherwise would.
                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = data_content_type.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type'
                                     ) not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href
                    and (namespaces['xlink'], 'href') in attrs and re.search(
                        '^\s*[^#\s].*', attrs[(namespaces['xlink'], 'href')])):
                del attrs[(namespaces['xlink'], 'href')]
            if (None, 'style') in attrs:
                attrs[(None, 'style')] = self.sanitize_css(attrs[(None,
                                                                  'style')])
            token["data"] = attrs
        return token

示例#6

0

显示文件

文件： sanitizer.py 项目： DobyTang/LazyLibrarian

    def allowed_token(self, token):
        if "data" in token:
            attrs = token["data"]
            attr_names = set(attrs.keys())

            # Remove forbidden attributes
            for to_remove in (attr_names - self.allowed_attributes):
                del token["data"][to_remove]
                attr_names.remove(to_remove)

            # Remove attributes with disallowed URL values
            for attr in (attr_names & self.attr_val_is_uri):
                assert attr in attrs
                # I don't have a clue where this regexp comes from or why it matches those
                # characters, nor why we call unescape. I just know it's always been here.
                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
                # this will do is remove *more* than it otherwise would.
                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = data_content_type.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]

            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                (namespaces['xlink'], 'href') in attrs and re.search('^\s*[^#\s].*',
                                                                     attrs[(namespaces['xlink'], 'href')])):
                del attrs[(namespaces['xlink'], 'href')]
            if (None, 'style') in attrs:
                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
            token["data"] = attrs
        return token

示例#7

0

显示文件

文件： __init__.py 项目： Cliffnla/LazyLibrarian-1

    def url(self, value):
        parts = urlparse(value)
        scheme, netloc, path = parts[:3]

        # Exclude default port numbers.
        if scheme == 'http' and netloc[-3:] == ':80':
            netloc = netloc[:-3]
        elif scheme == 'https' and netloc[-4:] == ':443':
            netloc = netloc[:-4]

        if scheme != 'http' and scheme != 'https':
            raise ValueError("Unsupported URL %s (%s)." % (value, scheme))

        value = '%s://%s%s' % (scheme, netloc, path)
        self.__dict__['url'] = value

示例#8

0

显示文件

文件： __init__.py 项目： Cliffnla/LazyLibrarian-1

    def request(self, uri, method="GET", body=None, headers=None,
                redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None,
                force_auth_header=False):

        if not isinstance(headers, dict):
            headers = {}

        if body and method == "POST":
            parameters = dict(parse_qsl(body))
        elif method == "GET":
            parsed = urlparse(uri)
            parameters = parse_qs(parsed.query)
        else:
            parameters = None

        req = Request.from_consumer_and_token(self.consumer, token=self.token,
                                              http_method=method, http_url=uri,
                                              parameters=parameters)

        req.sign_request(self.method, self.consumer, self.token)

        if force_auth_header:
            # ensure we always send Authorization
            headers.update(req.to_header())

        if method == "POST":
            if not force_auth_header:
                body = req.to_postdata()
            else:
                body = req.encode_postdata(req.get_nonoauth_parameters())
            headers['Content-Type'] = 'application/x-www-form-urlencoded'
        elif method == "GET":
            if not force_auth_header:
                uri = req.to_url()
        else:
            if not force_auth_header:
                # don't call update twice.
                headers.update(req.to_header())

        return httplib2.Http.request(self, uri, method=method, body=body,
                                     headers=headers, redirections=redirections,
                                     connection_type=connection_type)

示例#9

0

显示文件

文件： transmission.py 项目： cdancette/LazyLibrarian

def torrentAction(method, arguments):
    global session_id, host_url

    username = lazylibrarian.CONFIG['TRANSMISSION_USER']
    password = lazylibrarian.CONFIG['TRANSMISSION_PASS']

    if host_url:
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug("Using existing host %s" % host_url)
    else:
        host = lazylibrarian.CONFIG['TRANSMISSION_HOST']
        port = check_int(lazylibrarian.CONFIG['TRANSMISSION_PORT'], 0)

        if not host or not port:
            logger.error(
                'Invalid transmission host or port, check your config')
            return False

        if not host.startswith("http://") and not host.startswith("https://"):
            host = 'http://' + host

        if host.endswith('/'):
            host = host[:-1]

        # Fix the URL. We assume that the user does not point to the RPC endpoint,
        # so add it if it is missing.
        parts = list(urlparse(host))

        if parts[0] not in ("http", "https"):
            parts[0] = "http"

        if ':' not in parts[1]:
            parts[1] += ":%s" % port

        if not parts[2].endswith("/rpc"):
            parts[2] += "/transmission/rpc"

        host_url = urlunparse(parts)

    auth = (username, password) if username and password else None
    proxies = proxyList()
    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    # Retrieve session id
    if session_id:
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Using existing session_id %s' % session_id)
    else:
        response = requests.get(host_url,
                                auth=auth,
                                proxies=proxies,
                                timeout=timeout)
        if response is None:
            logger.error("Error getting Transmission session ID")
            return

        # Parse response
        if response.status_code == 401:
            if auth:
                logger.error(
                    "Username and/or password not accepted by Transmission")
            else:
                logger.error("Transmission authorization required")
            return
        elif response.status_code == 409:
            session_id = response.headers['x-transmission-session-id']

        if not session_id:
            logger.error("Expected a Session ID from Transmission, got %s" %
                         response.status_code)
            return

    # Prepare next request
    headers = {'x-transmission-session-id': session_id}
    data = {'method': method, 'arguments': arguments}
    try:
        response = requests.post(host_url,
                                 json=data,
                                 headers=headers,
                                 proxies=proxies,
                                 auth=auth,
                                 timeout=timeout)
        if response.status_code == 409:
            session_id = response.headers['x-transmission-session-id']
            logger.debug("Retrying with new session_id %s" % session_id)
            headers = {'x-transmission-session-id': session_id}
            response = requests.post(host_url,
                                     json=data,
                                     headers=headers,
                                     proxies=proxies,
                                     auth=auth,
                                     timeout=timeout)
        if not str(response.status_code).startswith('2'):
            logger.error("Expected a response from Transmission, got %s" %
                         response.status_code)
            return
        try:
            res = response.json()
        except ValueError:
            logger.error("Expected json, Transmission returned %s" %
                         response.text)
            res = ''
        return res

    except Exception as e:
        logger.error('Transmission %s: %s' % (type(e).__name__, str(e)))
        return

示例#10

0

显示文件

文件： transmission.py 项目： knobunc/LazyLibrarian

def torrentAction(method, arguments):
    global session_id, host_url

    username = lazylibrarian.CONFIG['TRANSMISSION_USER']
    password = lazylibrarian.CONFIG['TRANSMISSION_PASS']

    if host_url:
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug("Using existing host %s" % host_url)
    else:
        host = lazylibrarian.CONFIG['TRANSMISSION_HOST']
        port = check_int(lazylibrarian.CONFIG['TRANSMISSION_PORT'], 0)

        if not host or not port:
            res = 'Invalid transmission host or port, check your config'
            logger.error(res)
            return False, res

        if not host.startswith("http://") and not host.startswith("https://"):
            host = 'http://' + host

        if host.endswith('/'):
            host = host[:-1]

        # Fix the URL. We assume that the user does not point to the RPC endpoint,
        # so add it if it is missing.
        parts = list(urlparse(host))

        if parts[0] not in ("http", "https"):
            parts[0] = "http"

        if ':' not in parts[1]:
            parts[1] += ":%s" % port

        if not parts[2].endswith("/rpc"):
            parts[2] += "/transmission/rpc"

        host_url = urlunparse(parts)

    auth = (username, password) if username and password else None
    proxies = proxyList()
    timeout = check_int(lazylibrarian.CONFIG['HTTP_TIMEOUT'], 30)
    # Retrieve session id
    if session_id:
        if lazylibrarian.LOGLEVEL & lazylibrarian.log_dlcomms:
            logger.debug('Using existing session_id %s' % session_id)
    else:
        response = requests.get(host_url, auth=auth, proxies=proxies, timeout=timeout)
        if response is None:
            res = "Error getting Transmission session ID"
            logger.error(res)
            return False, res

        # Parse response
        if response.status_code == 401:
            if auth:
                res = "Username and/or password not accepted by Transmission"
            else:
                res = "Transmission authorization required"
            logger.error(res)
            return False, res
        elif response.status_code == 409:
            session_id = response.headers['x-transmission-session-id']

        if not session_id:
            res = "Expected a Session ID from Transmission, got %s" % response.status_code
            logger.error(res)
            return False, res

    # Prepare next request
    headers = {'x-transmission-session-id': session_id}
    data = {'method': method, 'arguments': arguments}
    try:
        response = requests.post(host_url, json=data, headers=headers, proxies=proxies,
                                 auth=auth, timeout=timeout)
        if response.status_code == 409:
            session_id = response.headers['x-transmission-session-id']
            logger.debug("Retrying with new session_id %s" % session_id)
            headers = {'x-transmission-session-id': session_id}
            response = requests.post(host_url, json=data, headers=headers, proxies=proxies,
                                     auth=auth, timeout=timeout)
        if not str(response.status_code).startswith('2'):
            res = "Expected a response from Transmission, got %s" % response.status_code
            logger.error(res)
            return False, res
        try:
            res = response.json()
        except ValueError:
            res = "Expected json, Transmission returned %s" % response.text
            logger.error(res)
            return False, res
        return res, ''

    except Exception as e:
        res = 'Transmission %s: %s' % (type(e).__name__, str(e))
        logger.error(res)
        return False, res